5 use Time
::HiRes qw
( gettimeofday tv_interval
);
7 use Term
::ANSIColor
qw(:constants);
9 use LWP
::Simple qw
( get
);
12 use HTML
::TableExtract
;
22 our($opt_p, $opt_o, $opt_n, $opt_s);
24 $Getopt::Std
::STANDARD_HELP_VERSION
=1;
26 #print "$opt_i, $opt_o, $opt_e\n";
29 #my ($scr) = ($0 =~ m,([^/\\]+)$,);
31 \t-p NetAffx Rice Probe Set Information (.psi) file [./Rice.psi]
32 \t-o Output file (crep_anno_tsv.txt)
33 \t-s To Start at . (for Rice 1..57381)
36 \nUsage: \033[0;1m$0\033[0;0m [-OPTIONS [-MORE_OPTIONS]] [--] [PROGRAM_ARG1 ...]
38 The following single-character options are accepted:
39 \033[32;1m$help\033[0;0m
40 Options may be merged together. -- stops processing of options.
41 Space is not required between options and their arguments.
44 sub VERSION_MESSAGE
() {
46 $perlv = sprintf "%vd", $^V
if $] >= 5.006;
47 my $ver = sprintf "%vd", $main::VERSION
;
48 my ($scr) = ($0 =~ m
,([^/\\]+)$,);
50 $scr version \033[0;1m$ver\033[0;0m, running under Perl version $perlv.
54 $opt_p='./Rice.psi' if ! defined $opt_p;
55 $opt_o='crep_anno_tsv.txt' if ! defined $opt_o;
56 $opt_s=1 if ! defined $opt_s;
58 $opt_s=1 if $opt_s < 1;
60 warn "\nInput PSI: \033[32;1m$opt_p\033[0;0m\nOutput: \033[32;1m$opt_o\033[0;0m\n\n";
61 warn "Start at \033[32;1m$opt_s\033[0;0m\n";
62 #warn 'Check fasta id matches: ',GREEN,BOLD,$opt_c?'Enabled':'Disabled',RESET,"\n";
64 my $start_time = [gettimeofday
];
65 ################### MAIN ####################
67 open PSI
,'<',$opt_p or die "Error: $!\n";
70 push @psid,(split /\t/,$_)[1];
71 #print "[$tmp_line]\n";
78 warn GREEN
,BOLD
,$tmp,RESET
," ProbeSets loaded.\n";
80 if ($#psid >= $tmp-1) {
84 warn "Remains: \033[32;1m$tmp\033[0;0m.\n\n";
85 ################### Login ####################
86 my $ua = LWP
::UserAgent
->new;
87 my ($req,$res,$count);
88 $ua->cookie_jar(HTTP
::Cookies
->new(file
=> "lwpcookies2.txt", ignore_discard
=> 1,
91 $req = HTTP
::Request
->new(GET
=> 'http://crep.ncpgr.cn/crep-cgi/login2.pl?User_Name=guest&Password=57510426775c5b0f');
93 $res = $ua->request($req);
95 if (! $res->is_success) {
96 die "Error: Login as Guest failed !\n";
99 ################### Retrieve ####################
100 # http://crep.ncpgr.cn/crep-cgi/elements_details.pl?unique_id=Os.24470.1.A1_at
101 $req = HTTP
::Request
->new(POST
=> 'http://crep.ncpgr.cn/crep-cgi/elements_details.pl');
102 $req->content_type('application/x-www-form-urlencoded');
103 my $ps_count = $#psid + 1;
104 my ($with_head,$to_retrieve,$csv_data_ref,$percent)=(1);
106 open OUTPUT
,'>',$opt_o or die "Error: $!\n";
108 open OUTPUT
,'>>',$opt_o or die "Error: $!\n";
115 my $csv_subject = Text
::CSV_XS
->new;
116 print STDERR
'== 0.00 %';
118 while ($#psid >= 0) {
119 $to_retrieve = shift(@psid);
120 $req = HTTP
::Request
->new(GET
=> 'http://crep.ncpgr.cn/crep-cgi/elements_details.pl?unique_id='.$to_retrieve);
121 #$req->content_type('application/x-www-form-urlencoded');
123 $res = $ua->request($req);
124 ($csv_data_ref,$count)=&crep_retrieve
(\
$res,$with_head,$to_retrieve);
125 die "Error: $count <> 1, Server Overflow !\n" if $count != 1;
127 print OUTPUT
$$csv_data_ref;
128 #print "$$csv_data_ref";
129 $percent=sprintf "%5.2f",($ps_count-$#psid-1)*100/$ps_count;
130 print STDERR
"\b"x10
, ($#psid % 2) ?
('>='):('=>')," $percent %";
138 sub crep_retrieve
() {
139 my ($res_ref,$with_head,$to_retrieve)=@_;
140 my $tmp_html = $$res_ref->content;
142 <tr bgcolor="#FFFFFF"><th align="left" valign="center" width="20%">Associated TIGR Gene Locus <a class="descrp" onclick="help_window('/crep-cgi/help.pl?Element_details','Help','450','400')" href="javascript:void(0);"><img height="23" border="0" src="/images/question_s.gif" width="8" /></a></th><td align="left"><table border="0" cellspacing="1" cellpadding="5" width="95%"><tr><th align="center" valign="center" width="20%">TIGR Locus</th><th align="center" valign="center" width="20%">Matching Probes</th><th align="center" valign="center" width="60%">Description</th></tr><tr><td align="center" valign="center">NONE</td><td align="left" valign="center" width="20%">NONE</td><td align="left" valign="center" width="60%">NONE</td></tr></table></td></tr>
144 <tr bgcolor="#FFFFFF"><th align="left" valign="center" width="20%">Associated TIGR Gene Locus <a class="descrp" onclick="help_window('/crep-cgi/help.pl?Element_details','Help','450','400')" href="javascript:void(0);"><img height="23" border="0" src="/images/question_s.gif" width="8" /></a></th><td align="left"><table border="0" cellspacing="1" cellpadding="5" width="95%"><tr><th align="center" valign="center" width="20%">TIGR Locus</th><th align="center" valign="center" width="20%">Matching Probes</th><th align="center" valign="center" width="60%">Description</th></tr><tr><td align="center" valign="center" width="20%"><a href='http://rice.plantbiology.msu.edu/cgi-bin/ORF_infopage.cgi?db=osa1r5&orf=LOC_Os12g17920' target='_blank'>LOC_Os12g17920</a></td><td align="center" valign="center" width="20%">1/11</td><td align="center" valign="center" width="60%">hypothetical protein</td></tr><tr><td align="center" valign="center" width="20%"><a href='http://rice.plantbiology.msu.edu/cgi-bin/ORF_infopage.cgi?db=osa1r5&orf=LOC_Os11g17770' target='_blank'>LOC_Os11g17770</a></td><td align="center" valign="center" width="20%">1/11</td><td align="center" valign="center" width="60%">transposon protein, putative, Mutator sub-class</td></tr></table></td></tr>
146 $tmp_html = (grep /Associated TIGR Gene Locus/,(split /\n/,$tmp_html))[0];
147 #$tmp_html =~ m#^<tr bgcolor=\"\#FFFFFF\"><th[^>]*>Associated TIGR Gene Locus.+(<table.*?</table>)#s;
148 #print "\n\n[$tmp_html]\n";
149 my $te = HTML
::TableExtract
->new( headers
=> [('TIGR Locus', 'Matching Probes', 'Description')] );
150 $te->parse("$tmp_html");
151 my $ts = $te->first_table_found();
152 my ($to_return,$item_count,@dat) = ('',1);
154 warn "\n[!Table]$tmp_html\n";
158 foreach my $row ($ts->rows) {
159 push @dat,join("|",@
$row);
161 $to_return = join("\t",$to_retrieve,@dat)."\n";
162 #print "$to_return\n";
163 return (\
$to_return,$item_count);
166 my $stop_time = [gettimeofday
];
167 print STDERR
"\n Time Elapsed:\t",tv_interval
( $start_time, $stop_time )," second(s)\n";