2 ###APPNAME: sinabook_parsesearch
3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Tue Mar 11 16:56:49 2008
6 ###APPDESC: [cmd] default as sinabook_justtext
11 use HTML
::TreeBuilder
;
14 $ARGV[0]="-h" unless(@ARGV);
16 exit(system("plhelp",$0,@ARGV)) if($_ eq "-h" || $_ eq "--help");
21 open FI
,"-|","netcat '$URL' | iconv -f gb2312 -t utf8" or die("Unable to fork netcat | iconv\n");
23 $tree[0] = HTML
::TreeBuilder
->new();
27 my @match = $_ =~ /(\/booksearch\
/booksearch\.php\?page=)([0-9]+)(\&[^\"\' ]+)/;
35 for(my $i=2;$i<=$pages;$i++) {
36 $tree[$i-1] = HTML
::TreeBuilder
->new();
37 open FI
,"-|","netcat '$URL&page=$i' | iconv -f gb2312 -t utf8" or die("Unable to fork netcat | iconv\n");
38 $tree[$i-1]->parse($_) while(<FI
>);
43 open FO
,"|-","sort -u";
44 foreach my $tree(@tree) {
45 foreach my $div($tree->look_down("_tag","div","class","des")) {
46 foreach my $node($div->look_down("_tag","a")) {
47 print FO
$node->attr("href"),"\n";