3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Fri Oct 5 12:37:53 2007
6 ###APPDESC: 将文心阁小说转为UTF8,并整理页面
8 ###APPEXAMPLE: tidywenk2 "在人间"
11 $ARGV[0]="-h" unless(@ARGV);
13 exit(system("plhelp",$0,@ARGV)) if($_ eq "-h" || $_ eq "--help");
24 #my $SIGN="<div align=right id=sign>Reproduced by <b>Xiaoranzzz<\/b><br>" . `date` . "<\/div>";
25 my $SIGN='<div align="right" id="sign">WenKuV2.0 Geek@MYPLACE</b><br>' . `date` . '</div>';
28 buildRule
qr/--.*<\/title
>/,'</title
>';
29 buildRule qr/<script src=.*<\/script>/,"";
30 buildRule qr/gb2312/,"utf-8";
31 buildRule qr/<body .* marginwidth="20">/,'<body
class="mainBody">' . "\n";
32 buildRule qr/class='middle'/,'class="mtext"';
33 buildRule qr/.*<a href='(http\:\/\/www\.wenku\.com|\/)'>.*$/i
,"$SIGN";
34 buildRule
qr/<script language='JavaScript'>/,"\n" . '<!--/';
35 buildRule
qr/<\/script
>/,"-->\n" .
36 '<script language="JavaScript" src="wenku.js"></script>' . "\n" .
37 '<script language="JavaScript" src="file:///c:/windows/wenku.js"></script>' . "\n" .
38 '<script language="JavaScript" src="file:///etc/wenku.js"></script>' . "\n";
39 buildRule
qr/<link[^<>]*new.css[^<>]*>/,
40 '<link rel="stylesheet" href="wenku.css" type="text/css">' . "\n" .
41 '<link rel="stylesheet" href="file:///c:/windows/wenku.css" type="text/css">' . "\n" .
42 '<link rel="stylesheet" href="file:///etc/wenku.css" type="text/css">' . "\n";
44 buildRule
qr/<table[^<>]*width=[\'\"]*100%[^><]*>/,'<table id="headTable">';
45 buildRule
qr/<table[^<>]*width=[\'\"]*93%[^<>]*>/,'<table id="contentTable">';
46 buildRule
qr/<td[^<>]*width=[\"\']*80%[^<>]*>/,'<td id="contentText">';
47 buildRule
qr/class=\"title\"/,'id="bookTitle"';
48 buildRule
qr/<center><\/center
>/,'';
49 buildRule
qr/<hr[^<>]*>/,'';
51 my $TEMPFILE=`mktemp`;
55 return 0 unless(-f
"$filename");
56 if (system("iconv","-c","-f","gb18030","-t","utf8","-o","$TEMPFILE","--","$filename")==0){
60 print STDERR
"Error while convert $filename from gb2312 to utf8\n";
66 if(GB2UTF
($filename)) {
67 open(TEMP
,"<",$TEMPFILE) or die("$!\n");
68 open(FH
,">",$filename) or die("$!\n");
71 $LINE =~ s
#<a.*bookroom.php.*>(.*)<\/a>》#<b id="title">$1</b>#;
72 for(my $i=0;$i<@PAT;$i++) {
73 $LINE =~ s/$PAT[$i]/$REP[$i]/g;
75 print FH
($LINE,"\n");
83 foreach my $cur(@ARGV) {
85 print STDERR
("$cur ... \r");
87 print STDERR
("$cur [Done] \n");
90 foreach my $sub(glob("$cur/*")) {
92 print STDERR
("$sub ... \r");
96 print STDERR
("$cur [Done] \n");
99 print STDERR
("$cur [Ingored]\n");