3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Tue Mar 11 05:03:49 2008
6 ###APPDESC: convert html file download from book.sina.bom.cn to txt
7 ###APPUSAGE: [htmlfile] [[-r rootId,[rootId,...]] [-f filter,[filter,...]] [-s regexp -e regexp]]
8 ###APPEXAMPLE: htm2txt booksrc/1.shtml "content,article" "sina.com" \n\tcat booksrc/1.shtml | htm2txt
9 ###APPOPTION: -r:rootIds|-f:filters|-s:starting mark|-e ending mark
11 use HTML
::TreeBuilder
;
12 use lib
$ENV{XR_PERL_MODULE_DIR
};
13 use MyPlace
::Script
::Usage qw
/help_required/;
14 use MyPlace
::HTML
::Convertor
;
16 exit 0 if(help_required
($0,@ARGV));
26 foreach my $opt (@ARGV) {
27 if($opt =~ /^-[rfse]$/) {
33 push @rootid,split(",",$opt);
35 elsif($what eq "-f") {
36 push @filter,split(",",$opt);
38 elsif($what eq "-s") {
41 elsif($what eq "-e") {
57 open FI
,"<",$fn or die("$!\n");
61 my @result = @
{text_from_string
(\
@src,$start,$end,\
@rootid,\
@filter)};