1 use CatalystX
::GlobalContext
qw( $c );
3 use warnings; #FATAL => 'all';
9 use File::Temp qw/tempfile/;
11 use Storable qw / nstore /;
12 use Tie::UrlEncoder; our %urlencode;
15 use CXGN::DB::Connection;
16 use CXGN::Page::UserPrefs;
19 use CXGN::Tools::Identifiers;
20 use CXGN::Tools::List qw/distinct evens/;
22 my $page = CXGN::Page->new( "blast search execution", "Rob" );
23 my $prefs = CXGN::Page::UserPrefs->new( CXGN::DB::Connection->new );
32 sub {()}, #< does nothing to blast command
37 my $sequence = $params{sequence};
39 $sequence =~ s/^\s+|\s+$|\n\s*\n//g; #< trim out leading and trailing whitespace and blank lines
40 if ($sequence !~ /^\s*>/) {
41 $sequence = ">WEB-USER-SEQUENCE (Unknown)\n$sequence";
43 $sequence .= "\n"; #< add a final newline
47 #make a tempfile that has our sequence(s) in it
48 my ($seq_fh, $seq_filename) = tempfile( "seqXXXXXX",
49 DIR=> $c->get_conf('cluster_shared_tempdir'),
51 $seq_fh->print($sequence) if $sequence;
53 if(my $file_upload = $page->get_upload) {
54 if ( my $fh = $file_upload->fh ) {
55 print $seq_fh $_ while <$fh>;
59 seek $seq_fh,0,0; #< rewind the filehandle
60 # go over file, checking for empty seqs or other badness
61 # also, count the number of seqs in the file
62 my $i = Bio::SeqIO->new(
67 while ( my $s = $i->next_seq ) {
68 $seq_count++ if $s->length;
69 validate_seq( $s, $params{program} );
70 $s->length or $c->throw(
71 message => 'Sequence '.encode_entities('"'.$s->id.'"').' is empty, this is not allowed by BLAST.',
76 die $_ if ref; #< throw it onward if it's an exception
78 if( /MSG:([^\n]+)/ ) {
81 s/at \/[\-\w \/\.]+ line \d+.+//; # remove anything resembling backtraces
82 $c->throw( message => $_,
84 developer_message => $full_error,
88 $seq_count >= 1 or $c->throw( message => 'no sequence submitted, cannot run BLAST',
90 developer_message => Data::Dumper::Dumper({
91 '$seq_count' => $seq_count,
92 '$seq_filename' => $seq_filename,
96 return -i => $seq_filename
101 my $m = $params{matrix};
102 $m =~ /^(BLOSUM|PAM)\d\d$/
103 or $c->throw( is_error => 0, message => "invalid matrix '$m'" );
110 $params{expect} =~ s/[^\d\.e\-\+]//gi; #can only be these characters
111 return -e => $params{expect} || 1
116 my $h = $params{maxhits} || 100;
117 $h =~ s/\D//g; #only digits allowed
123 return -F => $params{filterq} ? 'T' : 'F'
128 $params{outformat} =~ s/\D//g; #only digits allowed
129 return -m => $params{outformat}
134 #my ($bdb) = CXGN::BlastDB->search( file_base => $params{database} )
135 #or die "could not find bdb with file_base '$params{database}'";
137 # warn "setting pref last_blast_db_fil
138 #database object for specific ID_No
139 my $bdb = CXGN::BlastDB->from_id($params{database});
140 my $basename = $bdb->full_file_basename;
141 #returns '/data/shared/blast/databases/genbank/nr'
142 #remember the ID of the blast db the user just blasted with
143 $prefs->set_pref( last_blast_db_id => $bdb->blast_db_id );
145 return -d => $basename;
150 $params{program} =~ s/[^a-z]//g; #only lower-case letters
151 return -p => $params{program}
155 sub {()}, #< no effect on command line
158 #sub {warn "GOT FILE $params{file}\n"; ()},
161 #get all the params from our request
162 @params{keys %arg_handlers} = $page->get_arguments(keys %arg_handlers);
164 #build our command with our arg handlers
167 map $_->(), values %arg_handlers
173 #check some specific error conditions
174 # multiple sequences in given to simple BLAST
175 if($params{interface_type} eq 'simple' && $seq_count > 1) {
177 $c->throw( is_error => 0,
179 The Simple BLAST interface is limited to one query sequence. Please
180 use the Advanced BLAST for multiple query sequences.
188 my $job = CXGN
::Tools
::Run
->run_cluster(
190 { temp_base
=> $c->get_conf('cluster_shared_tempdir'),
191 queue
=> $c->get_conf('web_cluster_queue'),
192 working_dir
=> $c->get_conf('cluster_shared_tempdir'),
193 # don't block and wait if the cluster looks full
194 max_cluster_jobs
=> 1_000_000_000
,
198 #$job->do_not_cleanup(1);
200 # store the job object in a file
201 my ($job_file,$job_file_uri) = $c->tempfile( TEMPLATE
=> ['blast','object_XXXXXX'] );
202 nstore
( $job, $job_file )
203 or die 'could not serialize job object';
204 my $job_file_base = basename
($job_file);
206 # url encode the destination pass_back page.
207 delete $params{sequence
};
208 my $pass_back = "/tools/blast/view_result.pl?".hash2param
(%params, seq_count
=> $seq_count).'&report_file=';
209 my $redir_url = "/tools/wait.pl?tmp_app_dir=/blast&job_file=$job_file_base&redirect=$urlencode{$pass_back}";
210 $page->client_redirect( $redir_url );
217 no warnings
'uninitialized';
218 return join '&', map "$urlencode{$_}=$urlencode{$args{$_}}", distinct evens
@_;
221 # validate the given sequence as input for the given blast program
223 my ($s,$program) = @_;
226 ( map { $_ => 'protein' } 'tblastn', 'blastp' ),
227 ( map { $_ => 'DNA' } 'blastn', 'blastx', 'tblastx' ),
230 my $alphabet = $alphabets{$program}
231 or $c->throw( message
=> 'invalid program!',
233 developer_message
=> "program was '$program'",
236 return 0 unless $s->validate_seq; #< bioperl must think it's OK
238 my %not_iupac_pats = ( DNA
=> qr/([^ACGTURYKMSWBDHVN]+)/i,
239 protein
=> qr/([^GAVLIPFYCMHKRWSTDENQBZ\.X\*]+)/i,
240 rna
=> qr/([^ACGTURYKMSWBDHVN]+)/i,
243 my $val_pat = $not_iupac_pats{$alphabet}
244 or $c->throw( message
=> 'invalid alphabet!',
246 developer_message
=> "alphabet was '$alphabet'",
250 message
=> encode_entities
('Sequence "'.$s->id.qq|" contains invalid $alphabet characters "$1"| ),