2 ###APPNAME: xrbook_writeinfo.pl
3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Tue Mar 11 04:58:02 2008
6 ###APPDESC: create bookinfo.xml
7 ###APPUSAGE: indexURL [directory]
8 ###APPEXAMPLE: xrbook_writeinfo http://book.sina.com.cn/nzt/lit/wodemingzijiaohong/index.shtml src
14 $ARGV[0]="-h" unless(@ARGV);
16 exit(system("plhelp",$0,@ARGV)) if($_ eq "-h" || $_ eq "--help");
19 die("Usage: $0 indexURL [directory]\n") unless(@ARGV);
25 $end='[\<\n]' unless($end);
26 my @match = $text =~ m{$begin\s*([^\<\>]*)\s*$end}i;
27 return $match[0] if(@match);
33 $book{saveto
}= $ARGV[1];
35 my @match = ($book{URL
} =~ /^(http:\/\
/|)(.*\/)catalog\
.php
\?book
=([0-9]+)/);
37 $book{base
}=$match[1];
38 $book{dirname
}=$match[2];
41 if($book{URL
} =~ /\/$/) {
42 $book{URL
} .= "index.shtml";
44 @match=($book{URL
} =~ /^(http:\/\
/|)([^\/]+)\
/.*\/([^\
/]+)\/[^\
/]*$/);
46 $book{base
}=$match[1];
47 $book{dirname
}=$match[2];
51 die("Invalid index URL:\n$book{URL}\n") unless(@match);
53 open(SOURCE
,"-|", "netcat '$book{URL}' | iconv -f gb2312 -t utf8 -c")
54 or die("Cant't fork netcat | iconv\n");
58 unless($book{title
}) {
59 $book{title
} = getTextBetween
($_,'\<\s*title\s*\>');
60 $book{title
} =~ s/_.*$//g;
61 $book{title
} =~ s/.*(代表作|作品)://g;
63 unless($book{author
}) {
64 $book{author
} = getTextBetween
($_,'\<span class=td_m\>作者:\<\/span\>\<a[^\>\<]*\>\<span class=td_m\>');
65 $book{author
} = getTextBetween
($_,"\<span\>作者:") unless($book{author
});
67 unless($book{description
}) {
68 $book{description
} = getTextBetween
($_,
69 '\<meta\s*name=\"*description\"*\s*content=\"',
72 unless($book{about
}) {
73 $book{about
} = getTextBetween
($_,'\<span class=td_m\>');
75 unless($book{cover
}) {
76 my @match = $_ =~ /src\s*=\s*(.*$book{dirname}.*\.jpg)/;
78 $book{cover
}=$match[0];
81 my @match = $_ =~ /\<a[^\<\>]*href\s*=\s*($TEXT$book{dirname}${TEXT}html)$TEXT\>($TEXT)\<\/a\
>/i
;
82 @match = $_ =~ /\<a[^\<\>]*href\s*=\s*(\/longbook\
/$TEXT\.shtml)$TEXT\>($TEXT)\<\/a\
>/i
unless(@match);
84 next if($match[0] =~ /index/);
85 $match[1] =~ s/^\s*[0-9]*//g;
86 $match[1] =~ s/^.\s*//;
88 push(@pages,{"NAME"=>$match[1],"URL"=>$match[0]});
93 $book{title
}=$book{dirname
} unless($book{title
});
96 $book{saveto
}=$book{author
} . "/" . $book{title
};
97 if(! -d
$book{author
}) {mkdir $book{author
} or die("$!\n");}
100 $book{saveto
}=$book{title
};
102 if(! -d
$book{saveto
}) {mkdir $book{saveto
} or die("$!\n");}
103 $book{saveto
} .= "/src";
106 if(! -d
$book{saveto
}) {
107 mkdir $book{saveto
} or die("$!\n");
111 open fO
,"|-","batchget -m 1 -n '$book{title}' -r '$book{URL}'";
112 for(my $i=0;$i<@pages;$i++) {
113 print fO
"http://$book{base}$pages[$i]{URL}\n";
115 print fO
$book{cover
},"\n" if($book{cover
});
116 print fO
$book{URL
},"\n";
119 my $xml = XMLout
(\
%book,SuppressEmpty
=>1,KeyAttr
=>[],NoAttr
=>1,NumericEscape
=>0) or die("$!\n");
120 open fO
,">","bookinfo.xml";
123 print "Done saved in \"$book{saveto}\"\n";