5 use Time
::HiRes qw
( gettimeofday tv_interval
);
7 use Term
::ANSIColor
qw(:constants);
9 use LWP
::Simple qw
( get
);
21 our($opt_p, $opt_o, $opt_n, $opt_s);
23 $Getopt::Std
::STANDARD_HELP_VERSION
=1;
25 #print "$opt_i, $opt_o, $opt_e\n";
28 #my ($scr) = ($0 =~ m,([^/\\]+)$,);
30 \t-p NetAffx Rice Probe Set Information (.psi) file [./Rice.psi]
31 \t-o Output file (crep_all_tsv.txt)
32 \t-n Number of ProbeSet to retrieve at one time (50 <= 250)
33 \t-s To Start at . (for Rice 1..57381)
36 \nUsage: \033[0;1m$0\033[0;0m [-OPTIONS [-MORE_OPTIONS]] [--] [PROGRAM_ARG1 ...]
38 The following single-character options are accepted:
39 \033[32;1m$help\033[0;0m
40 Options may be merged together. -- stops processing of options.
41 Space is not required between options and their arguments.
44 sub VERSION_MESSAGE
() {
46 $perlv = sprintf "%vd", $^V
if $] >= 5.006;
47 my $ver = sprintf "%vd", $main::VERSION
;
48 my ($scr) = ($0 =~ m
,([^/\\]+)$,);
50 $scr version \033[0;1m$ver\033[0;0m, running under Perl version $perlv.
54 $opt_p='./Rice.psi' if ! defined $opt_p;
55 $opt_o='crep_all_tsv.txt' if ! defined $opt_o;
56 $opt_n=50 if ! defined $opt_n;
58 $opt_n=1 if $opt_n < 1;
59 $opt_n=500 if $opt_n > 500;
60 $opt_s=1 if ! defined $opt_s;
62 $opt_s=1 if $opt_s < 1;
64 warn "\nInput PSI: \033[32;1m$opt_p\033[0;0m\nOutput: \033[32;1m$opt_o\033[0;0m\nRetrieve per Turn: \033[32;1m$opt_n\033[0;0m\n\n";
65 warn "Start at \033[32;1m$opt_s\033[0;0m\n";
66 #warn 'Check fasta id matches: ',GREEN,BOLD,$opt_c?'Enabled':'Disabled',RESET,"\n";
68 my $start_time = [gettimeofday
];
69 ################### MAIN ####################
71 open PSI
,'<',$opt_p or die "Error: $!\n";
74 push @psid,(split /\t/,$_)[1];
75 #print "[$tmp_line]\n";
82 warn GREEN
,BOLD
,$tmp,RESET
," ProbeSets loaded.\n";
84 if ($#psid >= $tmp-1) {
88 warn "Remains: \033[32;1m$tmp\033[0;0m.\n\n";
89 ################### Login ####################
90 my $ua = LWP
::UserAgent
->new;
91 my ($req,$res,$count);
92 $ua->cookie_jar(HTTP
::Cookies
->new(file
=> "lwpcookies.txt", ignore_discard
=> 1,
95 $req = HTTP
::Request
->new(GET
=> 'http://crep.ncpgr.cn/crep-cgi/login2.pl?User_Name=guest&Password=57510426775c5b0f');
97 $res = $ua->request($req);
99 if (! $res->is_success) {
100 die "Error: Login as Guest failed !\n";
103 ################### Retrieve ####################
104 $req = HTTP
::Request
->new(POST
=> 'http://crep.ncpgr.cn/crep-cgi/element_multi_chronologer_view.pl');
105 $req->content_type('application/x-www-form-urlencoded');
106 my $ps_count = $#psid + 1;
107 my ($with_head,$to_retrieve,$csv_data_ref,$percent)=(1);
109 open OUTPUT
,'>',$opt_o or die "Error: $!\n";
111 open OUTPUT
,'>>',$opt_o or die "Error: $!\n";
118 my $csv_subject = Text
::CSV_XS
->new;
119 print STDERR
'== 0.00 %';
121 while ($#psid >= $opt_n-1) {
123 # login everytime to avoid session out.
124 $req = HTTP::Request->new(GET => 'http://crep.ncpgr.cn/crep-cgi/login2.pl?User_Name=guest&Password=57510426775c5b0f');
125 $res = $ua->request($req);
126 die "Error: Login as Guest failed !\n" if ! $res->is_success;
128 $req = HTTP::Request->new(POST => 'http://crep.ncpgr.cn/crep-cgi/element_multi_chronologer_view.pl');
129 $req->content_type('application/x-www-form-urlencoded');
131 $to_retrieve = 'unique_id=' . join ' ',splice(@psid,0,$opt_n);
132 #warn "[$to_retrieve]\n";
133 $req->content($to_retrieve);
135 $res = $ua->request($req);
136 ($csv_data_ref,$count)=&crep_retrieve
(\
$res,$with_head);
137 die "Error: $count <> $opt_n, Server Overflow !\n" if $count != $opt_n;
139 print OUTPUT
$$csv_data_ref;
140 $percent=sprintf "%5.2f",($ps_count-$#psid-1)*100/$ps_count;
141 print STDERR
"\b"x10
, ($#psid % 2) ?
('>='):('=>')," $percent %";
146 # login everytime to avoid session out.
147 $req = HTTP::Request->new(GET => 'http://crep.ncpgr.cn/crep-cgi/login2.pl?User_Name=guest&Password=57510426775c5b0f');
148 $res = $ua->request($req);
149 die "Error: Login as Guest failed !\n" if ! $res->is_success;
151 $req = HTTP
::Request
->new(POST
=> 'http://crep.ncpgr.cn/crep-cgi/element_multi_chronologer_view.pl');
152 $req->content_type('application/x-www-form-urlencoded');
153 $to_retrieve = 'unique_id=' . join ' ', @psid;
154 $req->content($to_retrieve);
156 $res = $ua->request($req);
157 $with_head=0 if $tmp==1;
158 ($csv_data_ref,$count)=&crep_retrieve
(\
$res,$with_head);
160 die "Error: $count <> $tmp, Server Overflow !\n" if $count != $tmp;
161 print OUTPUT
$$csv_data_ref;
162 print STDERR
"\b"x10
,">> 100 % \n";
165 #$req->content('unique_id=Os.54189.1.S1_at Os.42585.1.S1_at Os.12030.1.S1_at');
167 GET or POST is supported.(POST will be parsed over GET)
169 unique_id=Os.54189.1.S1_at Os.42585.1.S1_at Os.12030.1.S1_at
171 checkbox_Os.8271.1.S1_at=&checkbox_Os.8271.2.S1_x_at=&checkbox_Os.18205.1.S1_at=
173 since lc will be used before query, DATA is case-insensitive, still, 'ProbeSet' will be printed in the same case as those in DATA.
175 #$res = $ua->request($req);
176 sub crep_retrieve
() {
177 my ($res_ref,$with_head)=@_;
178 my $tmp_html = $$res_ref->as_string;
179 #</table></center><br /><p /><br /><a href="/runblast/32494.csv">csv file</a><p /><br /></body></html>
180 $tmp_html =~ m
#</table></center><br /><p /><br /><a href="/runblast/(\d+).csv">csv file</a><p /><br /></body></html>$#s;
182 if (! defined $csv_file) {
184 die "Sever HTML wrong !";
186 #print "\nhttp://crep.ncpgr.cn/runblast/$csv_file.csv\n";
187 my $csv = get
"http://crep.ncpgr.cn/runblast/$csv_file.csv";
189 my ($to_return,@row,$csvfh,$status)=('');
190 #my $f = new File::Tabular($csvfh,{fieldSep=>','});
191 #"ProbeSet","TIGR Locus","Minghui 63(endosperm, 14 days after pollination)","Shanyou 63(endosperm, 14 days after pollination)"
192 #my $csv_subject = Text::CSV_XS->new;
193 #my $status = $csv->parse ($line);
194 my @csv=split /\n/,$csv;
196 shift @csv if $with_head == 0;
199 $status = $csv_subject->parse($_);
200 for ($csv_subject->fields) {
203 $to_return .= join "\t",@row;
206 my $item_count=$#csv + 1 - $with_head;
208 return (\
$to_return,$item_count);
211 my $stop_time = [gettimeofday
];
212 print STDERR
"\n Time Elapsed:\t",tv_interval
( $start_time, $stop_time )," second(s)\n";