3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Thu Mar 27 22:43:15 2008
6 ###APPDESC: strip links from html source
8 ###APPEXAMPLE: cat index.htm | htmllinks www.google.com
12 use HTML
::TreeBuilder
;
14 exit(system("plhelp",$0,@ARGV)) if($_ eq "-h" || $_ eq "--help");
19 $base=URI
->new($base);
20 $base="http://$base" unless($base->scheme);
23 my $tree=HTML
::TreeBuilder
->new();
29 foreach my $node($tree->find("a","link")){
30 my $url=$node->attr("href");
31 print URI
->new_abs($url,$base),"\n" if($url);
33 foreach my $node($tree->find("img")){
34 my $url=$node->attr("src");
35 print URI
->new_abs($url,$base),"\n" if($url);
39 foreach my $node($tree->find("a","link")){
40 my $url=$node->attr("href");
41 print ("$url\n") if($url);
43 foreach my $node($tree->find("img")){
44 my $url=$node->attr("src");
45 print ("$url\n") if($url);