5 #use Time::HiRes qw ( gettimeofday tv_interval );
6 #use Term::ANSIColor qw(:constants);
8 #use HTTP::Request::Common qw(POST);
9 use HTML
::TreeBuilder
::XPath
;
10 #use LWP::Simple qw ( get );
13 #use HTML::TableExtract;
16 open I
,'<','resLOC.lst' or die $!;
17 open O
,'>','resLOC.anno' or die $!;
19 my $ua = LWP
::UserAgent
->new;
20 $ua->agent("Mozilla/5.0");
22 my ($req,$res,$count);
24 $req = HTTP
::Request
->new(POST
=> 'http://ricexpro.dna.affrc.go.jp/RXP_1006/gene-search.php');
25 $req->content_type('application/x-www-form-urlencoded');
27 my $keyword = 'LOC_Os01g01360';
28 #$keyword = 'LOC_Os01g01040';
35 $req->content("keyword=$keyword");
36 #$req = POST 'http://ricexpro.dna.affrc.go.jp/RXP_1006/gene-search.php', [ keyword => 'LOC_Os01g01030' ];
37 $res = $ua->request($req);
38 if ($res->is_success) {
39 #print $res->decoded_content;
40 my $tmp_html = $res->content;
44 <table border="1" align="center" width="1000"><tr><th>Locus ID / Links</th><th>Locus<br>Select</th><th>FeatureNum<br>(Link to graph)</th><th>Feature<br>Select</th><th>Accession</th><th>Probe Sequence ID<br>(Link to SeqInfo)</th><th style="width:400px;">Description</th><th>MSU ID</th></tr>
45 <tr><td valign="top" rowspan="1"><span class="locus-link" tos17="1">Os01g0100500</span></td><td valign="top" rowspan="1"><input type="checkbox" class="locus-select" name="Os01g0100500"></td><td><a class="graph-link" barimg="images/barplot/RXP_1006-Os01g0100500-12943_bar.png" lineimg="images/lineplot/RXP_1006-Os01g0100500-12943_line.png" href="graph-view.php?featurenum=12943" target="_blank">12943</a></td>
46 <td><input type="checkbox" class="feature-select Os01g0100500-feature" name="12943"></td>
47 <td>AK067316</td><td><a href="probe-seq-info.php?seqid=S-10941" target="_blank">S-10941</a> (unique)</td><td><span class="desc descinfo">Similar to Pectinesterase-like protein.</span></td><td><a href="http://rice.plantbiology.msu.edu/cgi-bin/ORF_infopage.cgi?orf=LOC_Os01g01040" target="_blank">LOC_Os01g01040</a><br/><a href="http://rice.plantbiology.msu.edu/cgi-bin/ORF_infopage.cgi?orf=LOC_Os01g01030" target="_blank">LOC_Os01g01030</a><br/></td></tr>
51 my $tree= HTML
::TreeBuilder
::XPath
->new;
52 $tree->parse_content($tmp_html);
53 my @toc = $tree->findnodes('//div[@id="result"]/table/tr/td');
56 my (@Locus,@FeatureNum,@Accession,@Desc,%Desc,@DescUniq);
58 my $tmp = $el->as_HTML;
59 #print $el->as_HTML," ---\n";
61 <td rowspan="2" valign="top"><span class="locus-link" tos17="0">Os01g0103100</span></td> ---
62 <td rowspan="2" valign="top"><input class="locus-select" name="Os01g0103100" type="checkbox" /></td> ---
63 <td><a barimg="images/barplot/RXP_1006-Os01g0103100-07015_bar.png" class="graph-link" href="graph-view.php?featurenum=7015" lineimg="images/lineplot/RXP_1006-Os01g0103100-07015_line.png" target="_blank">7015</a></td> ---
64 <td><input class="feature-select Os01g0103100-feature" name="7015" type="checkbox" /></td> ---
66 <td><a href="probe-seq-info.php?seqid=S-5924" target="_blank">S-5924</a> (unique)</td> ---
67 <td><span class="desc descinfo">TGF-beta receptor, type I/II extracellular region family protein.</span></td> ---
68 <td><a href="http://rice.plantbiology.msu.edu/cgi-bin/ORF_infopage.cgi?orf=LOC_Os01g01360" target="_blank">LOC_Os01g01360</a><br /></td> ---
69 <td><a barimg="images/barplot/RXP_1006-Os01g0103100-36463_bar.png" class="graph-link" href="graph-view.php?featurenum=36463" lineimg="images/lineplot/RXP_1006-Os01g0103100-36463_line.png" target="_blank">36463</a></td> ---
70 <td><input class="feature-select Os01g0103100-feature" name="36463" type="checkbox" /></td> ---
72 <td><a href="probe-seq-info.php?seqid=S-29658" target="_blank">S-29658</a> (unique)</td> ---
73 <td><span class="desc descinfo">TGF-beta receptor, type I/II extracellular region family protein.</span></td> ---
74 <td><a href="http://rice.plantbiology.msu.edu/cgi-bin/ORF_infopage.cgi?orf=LOC_Os01g01360" target="_blank">LOC_Os01g01360</a><br /></td> ---
76 #print $el->as_trimmed_text,"\n";
77 if ($tmp =~ /"locus-link".+\>(\w+)\<\//) {
80 } elsif ($tmp =~ / barimg.+\>(\d+)\<\//) {
81 print "FeatureNum=$1\n";
83 } elsif ($tmp =~ /\<td\>(\w+)\<\/td\
>/) {
84 print "Accession=$1\n";
86 } elsif ($tmp =~ / class="desc descinfo".*\>([^<]+)\<\//) {
93 push @DescUniq,$_ if $Desc{$_} > 0;
94 $Desc{$_} *= -1 if $Desc{$_} > 1;
96 my $tmp = join("\t",$keyword,join('|',@Locus),join('|',@FeatureNum),join('|',@Accession),join('|',@DescUniq));
98 print '-' x
5,"$tmp\n";
102 print "Error: " . $res->status_line . "\n";
109 #my $tmp_html = $res->content;
113 awk
'{print $1}' crep_all_tsv_new
.txt
.up2
*.txt
|sort|uniq
> resLOC
.lst