Merge branch 'master' into topic/pedigree_upload_fix
[sgn.git] / cgi-bin / tools / blast / blast_result.pl
blobc6e45596b5fb656ff1158c261ad70430a2b64a61
1 use CatalystX::GlobalContext qw( $c );
2 use strict;
3 use warnings; #FATAL => 'all';
4 use CXGN::Page;
5 use POSIX;
7 use File::Basename;
8 use File::Spec;
9 use File::Temp qw/tempfile/;
10 use HTML::Entities;
11 use Storable qw / nstore /;
12 use Tie::UrlEncoder; our %urlencode;
13 use Try::Tiny;
15 use CXGN::DB::Connection;
16 use CXGN::Page::UserPrefs;
17 use CXGN::BlastDB;
19 use CXGN::Tools::Identifiers;
20 use CXGN::Tools::List qw/distinct evens/;
22 my $page = CXGN::Page->new( "blast search execution", "Rob" );
23 my $prefs = CXGN::Page::UserPrefs->new( CXGN::DB::Connection->new );
25 my %params;
26 my $seq_count = 0;
28 my %arg_handlers =
31 interface_type =>
32 sub {()}, #< does nothing to blast command
35 sequence =>
36 sub {
37 my $sequence = $params{sequence};
38 if( $sequence ) {
39 $sequence =~ s/^\s+|\s+$|\n\s*\n//g; #< trim out leading and trailing whitespace and blank lines
40 if ($sequence !~ /^\s*>/) {
41 $sequence = ">WEB-USER-SEQUENCE (Unknown)\n$sequence";
43 $sequence .= "\n"; #< add a final newline
47 #make a tempfile that has our sequence(s) in it
48 my ($seq_fh, $seq_filename) = tempfile( "seqXXXXXX",
49 DIR=> $c->get_conf('cluster_shared_tempdir'),
51 $seq_fh->print($sequence) if $sequence;
53 if(my $file_upload = $page->get_upload) {
54 if ( my $fh = $file_upload->fh ) {
55 print $seq_fh $_ while <$fh>;
59 seek $seq_fh,0,0; #< rewind the filehandle
60 # go over file, checking for empty seqs or other badness
61 # also, count the number of seqs in the file
62 my $i = Bio::SeqIO->new(
63 -format => 'fasta',
64 -fh => $seq_fh,
66 try {
67 while ( my $s = $i->next_seq ) {
68 $seq_count++ if $s->length;
69 validate_seq( $s, $params{program} );
70 $s->length or $c->throw(
71 message => 'Sequence '.encode_entities('"'.$s->id.'"').' is empty, this is not allowed by BLAST.',
72 is_error => 0,
75 } catch {
76 die $_ if ref; #< throw it onward if it's an exception
77 my $full_error = $_;
78 if( /MSG:([^\n]+)/ ) {
79 $_ = $1;
81 s/at \/[\-\w \/\.]+ line \d+.+//; # remove anything resembling backtraces
82 $c->throw( message => $_,
83 is_error => 0,
84 developer_message => $full_error,
88 $seq_count >= 1 or $c->throw( message => 'no sequence submitted, cannot run BLAST',
89 is_error => 0,
90 developer_message => Data::Dumper::Dumper({
91 '$seq_count' => $seq_count,
92 '$seq_filename' => $seq_filename,
93 }),
96 return -i => $seq_filename
99 matrix =>
100 sub {
101 my $m = $params{matrix};
102 $m =~ /^(BLOSUM|PAM)\d\d$/
103 or $c->throw( is_error => 0, message => "invalid matrix '$m'" );
104 return -M => $m;
108 expect =>
109 sub {
110 $params{expect} =~ s/[^\d\.e\-\+]//gi; #can only be these characters
111 return -e => $params{expect} || 1
114 maxhits =>
115 sub {
116 my $h = $params{maxhits} || 100;
117 $h =~ s/\D//g; #only digits allowed
118 return -b => $h
121 filterq =>
122 sub {
123 return -F => $params{filterq} ? 'T' : 'F'
126 outformat =>
127 sub {
128 $params{outformat} =~ s/\D//g; #only digits allowed
129 return -m => $params{outformat}
132 database =>
133 sub {
134 #my ($bdb) = CXGN::BlastDB->search( file_base => $params{database} )
135 #or die "could not find bdb with file_base '$params{database}'";
137 # warn "setting pref last_blast_db_fil
138 #database object for specific ID_No
139 my $bdb = CXGN::BlastDB->from_id($params{database});
140 my $basename = $bdb->full_file_basename;
141 #returns '/data/shared/blast/databases/genbank/nr'
142 #remember the ID of the blast db the user just blasted with
143 $prefs->set_pref( last_blast_db_id => $bdb->blast_db_id );
145 return -d => $basename;
148 program =>
149 sub {
150 $params{program} =~ s/[^a-z]//g; #only lower-case letters
151 return -p => $params{program}
154 output_graphs =>
155 sub {()}, #< no effect on command line
157 file => sub {},
158 #sub {warn "GOT FILE $params{file}\n"; ()},
161 #get all the params from our request
162 @params{keys %arg_handlers} = $page->get_arguments(keys %arg_handlers);
164 #build our command with our arg handlers
165 my @command =
166 ( 'blastall',
167 map $_->(), values %arg_handlers
170 # save our prefs
171 $prefs->save;
173 #check some specific error conditions
174 # multiple sequences in given to simple BLAST
175 if($params{interface_type} eq 'simple' && $seq_count > 1) {
177 $c->throw( is_error => 0,
178 message => <<EOM,
179 The Simple BLAST interface is limited to one query sequence. Please
180 use the Advanced BLAST for multiple query sequences.
187 #now run the blast
188 my $job = CXGN::Tools::Run->run_cluster(
189 @command,
190 { temp_base => $c->get_conf('cluster_shared_tempdir'),
191 queue => $c->get_conf('web_cluster_queue'),
192 working_dir => $c->get_conf('cluster_shared_tempdir'),
193 # don't block and wait if the cluster looks full
194 max_cluster_jobs => 1_000_000_000,
198 #$job->do_not_cleanup(1);
200 # store the job object in a file
201 my ($job_file,$job_file_uri) = $c->tempfile( TEMPLATE => ['blast','object_XXXXXX'] );
202 nstore( $job, $job_file )
203 or die 'could not serialize job object';
204 my $job_file_base = basename($job_file);
206 # url encode the destination pass_back page.
207 delete $params{sequence};
208 my $pass_back = "/tools/blast/view_result.pl?".hash2param(%params, seq_count => $seq_count).'&report_file=';
209 my $redir_url = "/tools/wait.pl?tmp_app_dir=/blast&job_file=$job_file_base&redirect=$urlencode{$pass_back}";
210 $page->client_redirect( $redir_url );
213 ####### subs #######
215 sub hash2param {
216 my %args = @_;
217 no warnings 'uninitialized';
218 return join '&', map "$urlencode{$_}=$urlencode{$args{$_}}", distinct evens @_;
221 # validate the given sequence as input for the given blast program
222 sub validate_seq {
223 my ($s,$program) = @_;
225 my %alphabets = (
226 ( map { $_ => 'protein' } 'tblastn', 'blastp' ),
227 ( map { $_ => 'DNA' } 'blastn', 'blastx', 'tblastx' ),
230 my $alphabet = $alphabets{$program}
231 or $c->throw( message => 'invalid program!',
232 is_error => 1,
233 developer_message => "program was '$program'",
236 return 0 unless $s->validate_seq; #< bioperl must think it's OK
238 my %not_iupac_pats = ( DNA => qr/([^ACGTURYKMSWBDHVN]+)/i,
239 protein => qr/([^GAVLIPFYCMHKRWSTDENQBZ\.X\*]+)/i,
240 rna => qr/([^ACGTURYKMSWBDHVN]+)/i,
243 my $val_pat = $not_iupac_pats{$alphabet}
244 or $c->throw( message => 'invalid alphabet!',
245 is_error => 1,
246 developer_message => "alphabet was '$alphabet'",
248 $s->seq =~ $val_pat
249 and $c->throw(
250 message => encode_entities('Sequence "'.$s->id.qq|" contains invalid $alphabet characters "$1"| ),
251 is_error => 0,
255 return 1;