updated git and svn scripts
[xrzperl.git] / htm2txt_adult168
blob0cb1ff1cf8d0ea30194bf3fe6781cd292b34bc67
1 #!/usr/bin/perl -w
2 ###APPNAME: htm2txt_adult168
3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Tue Mar 11 05:03:49 2008
5 ###APPVER: 0.1
6 ###APPDESC: convert html file download from adult168.com to text
7 ###APPUSAGE: [htmlfile...]
8 ###APPEXAMPLE: htm2txt_adult168 booksrc/1.shtml
9 ###APPOPTION:
10 use strict;
11 use HTML::TreeBuilder;
12 use lib $ENV{XR_PERL_MODULE_DIR};
13 use MyPlace::Script::Usage qw/help_required/;
15 exit 0 if(help_required($0,@ARGV));
16 use MyPlace::Filename qw/get_uniqname/;
17 use MyPlace::HTML;
18 use MyPlace::HTML::Convertor;
21 my @flist;
23 if(@ARGV) {
24 push @flist,@ARGV;
26 else {
27 push @flist,"/dev/stdin";
30 my $index=0;
31 my $count=@flist;
32 foreach my $fn (@flist) {
33 my $title;
34 my @src;
35 my $dst;
37 $index++;
38 next unless(-f $fn or $fn="/dev/stdin");
39 open FI,"-|","cat '$fn' | gb2utf" or die("$!\n");
40 while(<FI>) {
41 push @src,$_;
42 $title = get_title($_) unless($title)
44 close FI;
46 if(!$title) {
47 $title = $fn;
48 $title =~ s/^.*\///g;
49 $title =~ s/\.(html|htm)$//;
51 else {
52 $title =~ s/\s+.*$//;
53 $title =~ s/\[.*$//g;
55 $dst = get_uniqname($title,".txt");
56 print STDERR "[$index/$count]$fn -> $dst ...";
57 if(! open FO,">",$dst) {
58 print STDERR ("\n$!\n");
59 next;
61 print FO $title,"\n","\n";
63 my @dst=@{text_from_string(\@src,undef,undef,["zoom"],undef)};
64 my $blanks=0;
65 foreach(@dst) {
66 chomp;
67 if(/^\s*$/) {
68 $blanks++;
70 else {
71 if($blanks) {
72 $_ .= "\n";
73 $blanks=0;
75 print FO $_;
78 close FO;
79 print STDERR "\n";
81 exit 0;