package A2B; use A2B::Table; use A2B::Tools; use A2B::Types; use A2B::Index; use File::MMagic; use File::Path; use File::Basename; use File::Copy; use strict; my $a2b; sub new { my $package = shift; my %args = @_; my $database = $args{database} || "/etc/a2b"; my $types = A2B::Types->new; A2B::Index->new($types, "type"); A2B::Index->new($types, "extension"); my $tools = A2B::Tools->new(file => "$database/tools"); $tools->bind_foreign_key("to", $types, "type"); $tools->bind_foreign_key("from", $types, "type"); A2B::Index->new($tools, "to", "from"); A2B::Index->new($tools, "from"); A2B::Index->new($tools, "tool"); my $self = bless { tools => $tools, types => $types, file_mmagic => File::MMagic->new, quiet => 0, }, $package; $a2b = $self; return $self; } sub a2b { my $this = ref $_[0] || $a2b; return $a2b; } sub quiet { return a2b(@_)->{quiet}; } sub identify { my $this = shift; my $file = shift; my $type = $this->identify_by_extension($file) || $this->identify_by_content($file); return $type; } sub identify_by_content_first { my $this = shift; my $file = shift; my $type = $this->identify_by_content($file) || $this->identify_by_extension($file); return $type; } sub identify_by_content { my $this = shift; my $file = shift; if ($file =~ m{://}) { return undef; } my %already; while (-l $file && !$already{$file}++) { my $file1 = readlink($file); if ($file1 && $file1 !~ m{^/}) { $file1 = dirname($file)."/".$file1; } if ($file1 && -e $file1) { $file = $file1; } else { last; } } if (! -e $file) { warn "file not found: $file\n"; return undef; } if (-d $file) { return 'inode/directory'; } my $mimetype = $this->{file_mmagic}->checktype_filename($file); # cope with foolish File::MMagic! if ($mimetype) { if ($mimetype =~ m|^x-system/x-error; (.*)|) { die "File::MMagic: $1"; } if ($mimetype =~ m|^x-system/x-unix; .*?text.*|) { return "text/plain"; } if ($mimetype eq "application/octet-stream" || $mimetype =~ m{/unknown$}) { undef $mimetype; } } if (!$mimetype) { $mimetype = `file -i -b -L \Q$file\E`; chomp $mimetype; $mimetype =~ s/;.*//; if ($mimetype =~ /ERROR/ || $mimetype eq "") { undef $mimetype; } } if ($mimetype eq "image/x-portable-greymap") { # correction! $mimetype = "image/x-portable-graymap"; } if ($mimetype eq "application/octet-stream") { undef $mimetype; } return $mimetype; } sub identify_by_extension { my $this = shift; my $file = shift; if ($file =~ m{://}) { if ($file =~ m{^(dvd|dvdnav|vcd|tv)://}) { return 'video/x-generic'; } elsif ($file =~ m{^(radio|cdda)://}) { return 'audio/x-generic'; } elsif ($file =~ m{^(http|https|ftp)://}) { if ($file =~ m{/$}) { return 'text/html'; } } } if ($file =~ m{/$}) { return 'inode/directory'; } if ($file =~ m{\.tar\.gz$}) { return "application/x-gtar"; } elsif ($file =~ m{\.tar.bz2}) { return "application/x-bzip2tar"; } elsif ($file =~ m{\.tar.lzma}) { return "application/x-lzmatar"; } my ($extension) = $file =~ /.*\.(.*)/; if (!$extension) { return undef; } my $mimetypes = $this->{types}->query(extension=>lc($extension)); if (@$mimetypes == 0) { warn "file extension $extension not found"; return undef; } my $mimetype = $mimetypes->[0][0]; return $mimetype; } sub type { my ($self, $type) = @_; my $types; if ($type =~ m{/}) { $types = $self->{types}->query(type => $type); } else { $types = $self->{types}->query(extension => $type); } my $n = @$types; if ($n == 0) { die "unknown type `$type': please add it to the database!"; } # elsif ($n != 1) { # die "database error: type `$type' is not unique: $n instances"; #} return $types->[0]; } sub best { my ($self, $alpha, $omega) = @_; my $paths = $self->paths($alpha, $omega, 1); return $paths->[0]; } sub paths { my ($self, $alpha, $omega, $limit, $grep, $nogrep) = @_; $grep ||= $self->{grep}; $nogrep ||= $self->{nogrep}; $omega = [$omega] unless ref $omega eq "ARRAY"; $_ = $self->type($_) || die "unknown type `$_'" for ($alpha, @$omega); my $paths = $self->{tools}->paths($alpha, $omega, $limit, $grep, $nogrep); return $paths; } # TODO: implement this without using system "mv ..."? sub mv { my ($from, $to) = @_; my $ask = ""; my $quiet = ""; if (!A2B->a2b->{force}) { $ask = "-i"; } elsif (A2B->a2b->{quiet} >= 2) { $quiet = "exec >/dev/null 2>&1;" } if (-l $from) { return not system "$quiet cp $ask \Q$from\E \Q$to\E"; } else { return not system "$quiet mv $ask \Q$from\E \Q$to\E"; } } sub convert { my ($self, $path, $in, $out, $dir) = @_; $in = [$in] unless ref $in eq "ARRAY"; my $cwd = `pwd` or die "cannot `pwd'!"; # TODO replace this, use the perl module chomp $cwd; # canonicalise pathnames $out ||= $cwd; for ($out, @$in) { m|^/| or m|://| or $_ = "$cwd/$_"; 1 while s!(/|^)[^./]+/\.\./!$1!; 1 while s!/\./!/!; #/# (keep vim happy) } # make a temp dir if necessary; my $delete_temp_dir; unless (defined $dir) { $dir = POSIX::tmpnam; $delete_temp_dir = 1; } -d $dir or mkdir $dir, 0700 or die "cannot mkdir `$dir': $!"; chdir $dir or die "cannot chdir `$dir': $!"; # link source files into temp dir my @inputs; for my $file (@$in) { my $target; if ($file =~ m{^(http|ftp|https)://}) { my @quiet; if (!$self->quiet) { warn "fetching object at url: $file\n"; } else { @quiet = ("-q"); } if ($file =~ m{/$}) { $target = "index.html"; } else { $target = basename $file; } my $base = $target; my $i = 2; while (-e $target) { $target = "$i-$base"; ++$i; } if ($self->quiet < 2) { print join " ", "wget", @quiet, "-O", $target, $file; print "\n"; } system "wget", @quiet, "-O", $target, $file and die "could not fetch: $file\n"; push @inputs, $target; } elsif ($file =~ m{://}) { push @inputs, $file; } else { if (0 == index $file, "$cwd/") { # we preserve the dir structure $target = substr $file, 1+length $cwd; my $dirname = dirname $target; -d $dirname or mkpath $dirname or die "cannot mkpath `$dirname'"; } else { # forget dir structure if the file was not under the cwd - sorry! $target = basename $file; } $target =~ s{/+$}{}; -e $target and die "`$target' already exists\n"; symlink $file, $target or link $file, $target or copy($file, $target) or !system("cp", "-a", $file, $target) or die "cannot symlink, link, copy or cp `$file' `$target': $!"; push @inputs, $target; } } my $results = eval { $path->convert(@inputs); }; $@ and die "error in conversion: $@\n"; my $out_is_dir = -d $out; if (@$results == 1 && ! $out_is_dir) { mv @$results, $out or die "cannot mv `@$results' -> `$out': $!"; } elsif (@$results == 0) { warn "no results / conversion failed?\n"; } else { $out_is_dir or mkdir $out, 0777 or die "cannot mkdir `$out': $!"; for my $file (@$results) { # NOTE: currently, it won't copy empty directories if ($file =~ m|/|) { my ($dir, $leaf) = $file =~ m|(.*)/(.*)|; my $target = "$out/$dir"; -d $target or mkpath $target or die "cannot mkpath `$target'"; mv $file, "$target" or die "cannot mv `$file' `$target'"; } else { mv $file, $out or die "cannot mv `$file' `$out': $!"; } } } chdir $cwd or die "cannot chdir `$cwd': $!"; $delete_temp_dir and (rmtree $dir or warn "cannot rmtree `$dir': $!"); } 1; =head1 A2B.pm Here's a simple example of how to use A2B from a Perl program - this API may change a bit in future. Note that you should probably put the call to "convert" inside an eval {} block in a real program, because it can die in many and varied ways. use lib "/usr/lib/a2b"; use A2B; my $a2b = A2B->new; my $path = $a2b->best("image/gif", "text/plain") or die "cannot convert from gif to text"; $a2b->convert($path, "fred.gif", "fred.txt"); =cut