3 # COPYRIGHT AND LICENSE
4 # Copyright (C) 2005-2018 H.Merijn Brand
6 # This script is free software; you can redistribute it and/or modify it
7 # under the same terms as Perl and/or Claws Mail itself. (GPL)
12 our $VERSION = "1.01 - 2018-10-08";
13 our $CMD = $0 =~ s{.*/}{}r;
16 my ($err, $str) = (@_, "");
17 $err and select STDERR
;
18 say "usage: $CMD [--html] [--type=<type>] file\n",
19 " --html Generate HTML (if supported)\n",
20 " --type=X X as mimetype (msword => doc)\n",
21 " $CMD --list will show all implemented conversions";
26 use Getopt
::Long
qw(:config bundling nopermute);
30 "help|?" => sub { usage
(0); },
31 "V|version" => sub { say "$CMD [$VERSION]"; exit 0; },
33 "v|verbose:1" => \
$opt_v,
34 "t|type|mimetype=s" => \
my $opt_t,
35 "h|html" => sub { $opt_h = "html" },
36 "l|list!" => \
my $opt_l,
39 $opt_v and say "$0 @ARGV";
41 # anon-list contains all possible commands to show content
42 # plain text is a reference to same type (alias)
43 # %f will be replaced with file. If no %f, file will be the last arg
46 bin
=> [ "strings" ], # fallback for binary files
48 txt
=> [ "cat" ], # Plain text
54 doc
=> [ "catdoc -x -dutf-8",
56 "antiword -w 72" ], # M$ Word
57 "vnd.ms-excel" => "xls",
59 docx
=> [ "unoconv -f text --stdout" ], # MS Word
63 "wvText" ], # M$ Excel
64 # ppt => [ "ppthtml" ], # M$ PowerPoint
65 # ppthtml "$1" | html2text
66 csv
=> "xls", # Comma Separated Values
68 ics
=> [ "ics2txt" ], # ICS calendar request
71 "unrtf -t text" ], # RTF
72 pdf
=> [ "pdftotext %f -" ], # Adobe PDF
74 ods
=> "xls", # OpenOffice spreadsheet
75 sxc
=> "xls", # OpenOffice spreadsheet
76 odt
=> [ "oo2pod %f | pod2text",
77 "ooo2txt" ], # OpenOffice writer
78 rtf
=> [ "rtf2text" ], # RTF
80 pl
=> [ "perltidy -st -se",
84 jsn
=> [ "json_pp" ], # JSON
87 xml
=> [ "xml_pp" ], # XML
89 ( map { $_ => "txt" } qw(
97 bz2
=> [ "bzip2 -d < %f | strings" ],
99 zip
=> [ "unzip -l %f" ], # ZIP
101 test
=> [ \
&test
], # Internal
103 tgz
=> [ "tar tvf" ], # Tar uncompressed
104 tgz
=> [ "tar tzvf" ], # Tar GZ compressed
105 tbz
=> [ "tar tjvf" ], # Tar BZip2 compressed
106 txz
=> [ "tar tJvf" ], # Tar XZ compressed
108 rar
=> [ "unrar l" ], # RAR
112 rtf
=> [ "rtf2html" ],
117 my %tc = %{$fh{text
}};
118 foreach my $ext (sort keys %tc) {
120 ref $exe or $exe = $tc{$exe};
121 printf " .%-12s %s\n", $ext, $_ for @
$exe;
126 my $file = shift or usage
(1, "File argument is missing");
127 -f
$file or usage
(1, "File argument is not a plain file");
128 -r
$file or usage
(1, "File argument is not a readable file");
129 -s
$file or usage
(1, "File argument is an empty file");
131 my $ext = $file =~ m/\.(\w+)$/ ?
lc $1 : "";
132 $opt_t && exists $fh{text
}{lc $opt_t} and $ext = lc$opt_t;
133 unless (exists $fh{text
}{$ext}) {
134 my $ftype = `file --brief $file`;
136 $ftype =~ m/^pdf doc/i ?
"pdf" :
137 $ftype =~ m/^ascii( english)? text/i ?
"txt" :
138 $ftype =~ m/^(utf-8 unicode|iso-\d+)( english)? text/i ?
"txt" :
139 $ftype =~ m/^xml doc/i ?
"xml" :
140 $ftype =~ m/^\w+ compress/i ?
"bin" :
147 exists $fh{$opt_h}{$ext} or $opt_h = "text";
148 exists $fh{$opt_h}{$ext} or $ext = "txt";
149 my $ref = $fh{$opt_h}{$ext};
150 ref $ref or $ref = $fh{$opt_h}{$ref};
152 $opt_v and warn "[ @$ref ] $file\n";
155 (my $cmd = shift) =~ s/\s.*//; # Only the command. Discard arguments here
156 foreach my $path (split m/:+/, $ENV{PATH
}) {
157 -x
"$path/$cmd" and return "$path/$cmd";
163 foreach my $c (@
$ref) {
169 my $cp = which
($c) or next;
174 my @cmd = split m/ +/ => $cmd;
175 grep { s/%f\b/$file/ } @cmd or push @cmd, $file;
176 #$cmd =~ s/%f\b/$file/g or $cmd .= " $file";
177 $opt_v and say "@cmd";