modified: Makefile
[GalaxyCodeBases.git] / perl / etc / crep / fetch_crep_data.pl
blob12db10ee80686dab5d3393181f9694d331ed190e
1 #!/usr/bin/perl -w
2 use strict;
3 use warnings;
4 #use IO::Handle;
5 use Time::HiRes qw ( gettimeofday tv_interval );
6 use Getopt::Std;
7 use Term::ANSIColor qw(:constants);
8 use LWP::UserAgent;
9 use LWP::Simple qw ( get );
10 use HTTP::Cookies;
11 use Text::CSV_XS;
12 #use Data::Dumper;
14 $main::VERSION=1.3.2;
16 if (@ARGV == 0) {
17 &VERSION_MESSAGE();
18 &HELP_MESSAGE();
19 die "\n";
21 our($opt_p, $opt_o, $opt_n, $opt_s);
23 $Getopt::Std::STANDARD_HELP_VERSION=1;
24 getopts('p:o:n:s:');
25 #print "$opt_i, $opt_o, $opt_e\n";
27 sub HELP_MESSAGE() {
28 #my ($scr) = ($0 =~ m,([^/\\]+)$,);
29 my $help=<<EOH;
30 \t-p NetAffx Rice Probe Set Information (.psi) file [./Rice.psi]
31 \t-o Output file (crep_all_tsv.txt)
32 \t-n Number of ProbeSet to retrieve at one time (50 <= 250)
33 \t-s To Start at . (for Rice 1..57381)
34 EOH
35 print STDERR <<EOH;
36 \nUsage: \033[0;1m$0\033[0;0m [-OPTIONS [-MORE_OPTIONS]] [--] [PROGRAM_ARG1 ...]
38 The following single-character options are accepted:
39 \033[32;1m$help\033[0;0m
40 Options may be merged together. -- stops processing of options.
41 Space is not required between options and their arguments.
42 EOH
44 sub VERSION_MESSAGE() {
45 my $perlv = $];
46 $perlv = sprintf "%vd", $^V if $] >= 5.006;
47 my $ver = sprintf "%vd", $main::VERSION;
48 my ($scr) = ($0 =~ m,([^/\\]+)$,);
49 print STDERR <<EOH;
50 $scr version \033[0;1m$ver\033[0;0m, running under Perl version $perlv.
51 EOH
54 $opt_p='./Rice.psi' if ! defined $opt_p;
55 $opt_o='crep_all_tsv.txt' if ! defined $opt_o;
56 $opt_n=50 if ! defined $opt_n;
57 $opt_n=int($opt_n);
58 $opt_n=1 if $opt_n < 1;
59 $opt_n=500 if $opt_n > 500;
60 $opt_s=1 if ! defined $opt_s;
61 $opt_s=int($opt_s);
62 $opt_s=1 if $opt_s < 1;
63 &VERSION_MESSAGE();
64 warn "\nInput PSI: \033[32;1m$opt_p\033[0;0m\nOutput: \033[32;1m$opt_o\033[0;0m\nRetrieve per Turn: \033[32;1m$opt_n\033[0;0m\n\n";
65 warn "Start at \033[32;1m$opt_s\033[0;0m\n";
66 #warn 'Check fasta id matches: ',GREEN,BOLD,$opt_c?'Enabled':'Disabled',RESET,"\n";
68 my $start_time = [gettimeofday];
69 ################### MAIN ####################
70 my (@psid,$tmp)=();
71 open PSI,'<',$opt_p or die "Error: $!\n";
72 while (<PSI>) {
73 next if /^#/;
74 push @psid,(split /\t/,$_)[1];
75 #print "[$tmp_line]\n";
77 close PSI;
78 #for (@psid) {
79 # print "[$_]\t";
81 $tmp = $#psid + 1;
82 warn GREEN,BOLD,$tmp,RESET," ProbeSets loaded.\n";
83 $tmp = $opt_s-1;
84 if ($#psid >= $tmp-1) {
85 splice(@psid,0,$tmp);
87 $tmp = $#psid + 1;
88 warn "Remains: \033[32;1m$tmp\033[0;0m.\n\n";
89 ################### Login ####################
90 my $ua = LWP::UserAgent->new;
91 my ($req,$res,$count);
92 $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt", ignore_discard => 1,
93 autosave => 1));
95 $req = HTTP::Request->new(GET => 'http://crep.ncpgr.cn/crep-cgi/login2.pl?User_Name=guest&amp;Password=57510426775c5b0f');
96 # send request
97 $res = $ua->request($req);
98 # check the outcome
99 if (! $res->is_success) {
100 die "Error: Login as Guest failed !\n";
103 ################### Retrieve ####################
104 $req = HTTP::Request->new(POST => 'http://crep.ncpgr.cn/crep-cgi/element_multi_chronologer_view.pl');
105 $req->content_type('application/x-www-form-urlencoded');
106 my $ps_count = $#psid + 1;
107 my ($with_head,$to_retrieve,$csv_data_ref,$percent)=(1);
108 if ($opt_s==1) {
109 open OUTPUT,'>',$opt_o or die "Error: $!\n";
110 } else {
111 open OUTPUT,'>>',$opt_o or die "Error: $!\n";
112 $tmp=tell OUTPUT;
113 warn "$tmp";
114 if ($tmp > 1) {
115 $with_head=0;
118 my $csv_subject = Text::CSV_XS->new;
119 print STDERR '== 0.00 %';
120 $tmp=-1;
121 while ($#psid >= $opt_n-1) {
122 =pod
123 # login everytime to avoid session out.
124 $req = HTTP::Request->new(GET => 'http://crep.ncpgr.cn/crep-cgi/login2.pl?User_Name=guest&amp;Password=57510426775c5b0f');
125 $res = $ua->request($req);
126 die "Error: Login as Guest failed !\n" if ! $res->is_success;
128 $req = HTTP::Request->new(POST => 'http://crep.ncpgr.cn/crep-cgi/element_multi_chronologer_view.pl');
129 $req->content_type('application/x-www-form-urlencoded');
130 =cut
131 $to_retrieve = 'unique_id=' . join ' ',splice(@psid,0,$opt_n);
132 #warn "[$to_retrieve]\n";
133 $req->content($to_retrieve);
134 $to_retrieve='';
135 $res = $ua->request($req);
136 ($csv_data_ref,$count)=&crep_retrieve(\$res,$with_head);
137 die "Error: $count <> $opt_n, Server Overflow !\n" if $count != $opt_n;
138 $with_head=0;
139 print OUTPUT $$csv_data_ref;
140 $percent=sprintf "%5.2f",($ps_count-$#psid-1)*100/$ps_count;
141 print STDERR "\b"x10, ($#psid % 2) ? ('>='):('=>')," $percent %";
142 $tmp=1;
144 if ($#psid >= 0) {
145 =pod
146 # login everytime to avoid session out.
147 $req = HTTP::Request->new(GET => 'http://crep.ncpgr.cn/crep-cgi/login2.pl?User_Name=guest&amp;Password=57510426775c5b0f');
148 $res = $ua->request($req);
149 die "Error: Login as Guest failed !\n" if ! $res->is_success;
150 =cut
151 $req = HTTP::Request->new(POST => 'http://crep.ncpgr.cn/crep-cgi/element_multi_chronologer_view.pl');
152 $req->content_type('application/x-www-form-urlencoded');
153 $to_retrieve = 'unique_id=' . join ' ', @psid;
154 $req->content($to_retrieve);
155 $to_retrieve='';
156 $res = $ua->request($req);
157 $with_head=0 if $tmp==1;
158 ($csv_data_ref,$count)=&crep_retrieve(\$res,$with_head);
159 $tmp = $#psid + 1;
160 die "Error: $count <> $tmp, Server Overflow !\n" if $count != $tmp;
161 print OUTPUT $$csv_data_ref;
162 print STDERR "\b"x10,">> 100 % \n";
164 close OUTPUT;
165 #$req->content('unique_id=Os.54189.1.S1_at Os.42585.1.S1_at Os.12030.1.S1_at');
166 =pod
167 GET or POST is supported.(POST will be parsed over GET)
168 DATA can be:
169 unique_id=Os.54189.1.S1_at Os.42585.1.S1_at Os.12030.1.S1_at
171 checkbox_Os.8271.1.S1_at=&checkbox_Os.8271.2.S1_x_at=&checkbox_Os.18205.1.S1_at=
173 since lc will be used before query, DATA is case-insensitive, still, 'ProbeSet' will be printed in the same case as those in DATA.
174 =cut
175 #$res = $ua->request($req);
176 sub crep_retrieve () {
177 my ($res_ref,$with_head)=@_;
178 my $tmp_html = $$res_ref->as_string;
179 #</table></center><br /><p /><br /><a href="/runblast/32494.csv">csv file</a><p /><br /></body></html>
180 $tmp_html =~ m#</table></center><br /><p /><br /><a href="/runblast/(\d+).csv">csv file</a><p /><br /></body></html>$#s;
181 my $csv_file=$1;
182 if (! defined $csv_file) {
183 print $tmp_html;
184 die "Sever HTML wrong !";
186 #print "\nhttp://crep.ncpgr.cn/runblast/$csv_file.csv\n";
187 my $csv = get "http://crep.ncpgr.cn/runblast/$csv_file.csv";
188 #print $csv;
189 my ($to_return,@row,$csvfh,$status)=('');
190 #my $f = new File::Tabular($csvfh,{fieldSep=>','});
191 #"ProbeSet","TIGR Locus","Minghui 63(endosperm, 14 days after pollination)","Shanyou 63(endosperm, 14 days after pollination)"
192 #my $csv_subject = Text::CSV_XS->new;
193 #my $status = $csv->parse ($line);
194 my @csv=split /\n/,$csv;
195 $csv='';
196 shift @csv if $with_head == 0;
197 for ( @csv ) {
198 @row=();
199 $status = $csv_subject->parse($_);
200 for ($csv_subject->fields) {
201 push @row,$_;
203 $to_return .= join "\t",@row;
204 $to_return .= "\n";
206 my $item_count=$#csv + 1 - $with_head;
207 @csv=();
208 return (\$to_return,$item_count);
211 my $stop_time = [gettimeofday];
212 print STDERR "\n Time Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s)\n";
214 __END__