2 package SGN
::Controller
::AJAX
::Blast
;
9 use Storable qw
| nstore retrieve
|;
11 use Tie
::UrlEncoder
; our %urlencode;
12 use File
::Temp qw
| tempfile
|;
13 use File
::Basename qw
| basename
|;
14 use File
::Copy qw
| copy
|;
15 use File
::Spec qw
| catfile
|;
16 use File
::Slurp qw
| read_file write_file
|;
17 use File
::NFSLock qw
| uncache
|;
19 use CXGN
::Page
::UserPrefs
;
20 use CXGN
::Tools
::List qw
/distinct evens/;
21 use CXGN
::Blast
::Parse
;
22 use CXGN
::Blast
::SeqQuery
;
24 use Time
::HiRes
qw(gettimeofday tv_interval);
26 BEGIN { extends
'Catalyst::Controller::REST'; };
29 default => 'application/json',
31 map => { 'application/json' => 'JSON', 'text/html' => 'JSON' },
34 sub run
: Path
('/tools/blast/run') Args
(0) {
38 my $params = $c->req->params();
39 my $input_query = CXGN
::Blast
::SeqQuery
->new();
40 my $valid = $input_query->validate($c, $params->{input_options
}, $params->{sequence
});
43 $c->stash->{rest
} = { error
=> "Your input contains illegal characters. Please verify your input." };
47 $params->{sequence
} = $input_query->process($c, $params->{input_options
}, $params->{sequence
});
49 if ($params->{input_options
} eq 'autodetect') {
50 my $detected_type = $input_query->autodetect_seq_type($c, $params->{input_options
}, $params->{sequence
});
52 # print STDERR "SGN BLAST detected your sequence is: $detected_type\n";
54 # create a hash with the valid options =1 and check and if result 0 return error
55 my %blast_seq_db_program = (
75 if (!$blast_seq_db_program{$detected_type}{$params->{db_type
}}{$params->{program
}}) {
76 $c->stash->{rest
} = { error
=> "the program ".$params->{program
}." can not be used with a ".$detected_type." sequence (autodetected) and a ".$params->{db_type
}." database.\n\nPlease, use different options and disable the autodetection of the query type if it is wrong." };
82 my $blast_tmp_output = $c->config->{cluster_shared_tempdir
}."/blast";
83 mkdir $blast_tmp_output if ! -d
$blast_tmp_output;
84 if ($params->{sequence
} =~ /\>/) {
85 $seq_count= $params->{sequence
} =~ tr/\>/\>/;
88 print STDERR
"SEQ COUNT = $seq_count\n";
89 my ($seq_fh, $seqfile) = tempfile
(
91 DIR
=> $blast_tmp_output,
94 my $jobid = basename
($seqfile);
96 print STDERR
"JOB ID CREATED: $jobid\n";
98 my $schema = $c->dbic_schema("SGN::Schema");
105 my $sequence = $params->{sequence
};
107 $sequence =~ s/^\s+|\s+$|\n\s*\n//g; #< trim out leading and trailing whitespace and blank lines
108 # if ($sequence !~ /^\s*>/) {
109 # $sequence = ">WEB-USER-SEQUENCE (Unknown)\n$sequence";
111 # $sequence .= "\n"; #< add a final newline
113 print STDERR
"Opening file for sequence ($seqfile)... ";
114 open(my $FH, ">", $seqfile) || die "Can't open file for query ($seqfile)\n";
115 print $FH $sequence if $sequence;
118 # print STDERR "Done.\n";
120 # if(my $file_upload = $page->get_upload) {
121 # if ( my $fh = $file_upload->fh ) {
122 # print $seq_fh $_ while <$fh>;
126 print STDERR
"Parsing with bioperl... ";
127 my $i = Bio
::SeqIO
->new(
133 while ( my $s = $i->next_seq ) {
134 $s->length or $c->throw(
135 message
=> 'Sequence '.encode_entities
('"'.$s->id.'"').' is empty, this is not allowed by BLAST.',
141 die $_ if ref; #< throw it onward if it's an exception
143 if( /MSG:([^\n]+)/ ) {
146 s/at \/[\-\w \/\
.]+ line \d
+.+//; # remove anything resembling backtraces
147 $c->throw( message
=> $_,
149 developer_message
=> $full_error,
153 $seq_count >= 1 or $c->throw( message
=> 'no sequence submitted, cannot run BLAST',
155 developer_message
=> Data
::Dumper
::Dumper
({
156 '$seq_count' => $seq_count,
157 '$seq_filename' => $seqfile,
161 return -i
=> $seqfile;
167 my $m = $params->{matrix
};
168 $m =~ /^(BLOSUM|PAM)\d+$/
169 or $c->throw( is_error
=> 0, message
=> "invalid matrix '$m'" );
176 $params->{evalue
} =~ s/[^\d\.e\-\+]//gi; #can only be these characters
177 return -e
=> $params->{evalue
} ?
$params->{evalue
} : 1;
182 my $h = $params->{maxhits
} || 20;
183 $h =~ s/\D//g; #only digits allowed
189 my $h = $params->{maxhits
} || 20;
190 $h =~ s/\D//g; #only digits allowed
196 return -F
=> $params->{filterq
} ?
'T' : 'F';
201 # $params->{outformat} =~ s/\D//g; #only digits allowed
202 # return -m => $params->{outformat};
208 my $bdb = $schema->resultset("BlastDb")->find($params->{database
} )
209 or die "could not find bdb with file_base '$params->{database}'";
211 my $basename = File
::Spec
->catfile($c->config->{blast_db_path
},$bdb->file_base());
212 #returns '/data/shared/blast/databases/genbank/nr'
213 #remember the ID of the blast db the user just blasted with
215 return -d
=> $basename;
220 $params->{program
} =~ s/[^a-z]//g; #only lower-case letters
221 return -p
=> $params->{program
};
225 print STDERR
"BUILDING COMMAND...\n";
228 # build our command with our arg handlers
230 my @command = ('blastall');
231 foreach my $k (keys %arg_handlers) {
233 print STDERR
"evaluating $k...";
234 my @x = $arg_handlers{$k}->();
235 print STDERR
"component:
236 ", (join ",", @x)."\n";
237 @command = (@command, @x);
240 # To get the proper format for gi sequences (CitrusGreening.org case)
241 push(@command, '-I');
244 print STDERR
"COMMAND: ".join(" ", @command);
256 $job = CXGN
::Tools
::Run
->run_cluster(
259 temp_base
=> $blast_tmp_output,
260 queue
=> $c->config->{'web_cluster_queue'},
261 working_dir
=> $blast_tmp_output,
263 # temp_base => $c->config->{'cluster_shared_tempdir'},
264 # queue => $c->config->{'web_cluster_queue'},
265 # working_dir => $c->config->{'cluster_shared_tempdir'},
267 # don't block and wait if the cluster looks full
268 max_cluster_jobs
=> 1_000_000_000
,
273 print STDERR
"Saving job state to $seqfile.job for id ".$job->job_id()."\n";
275 $job->do_not_cleanup(1);
277 nstore
( $job, $seqfile.".job" ) or die 'could not serialize job object';
284 print STDERR
"An error occurred! $@\n";
285 $c->stash->{rest
} = { error
=> $@
};
288 # write data in blast.log
289 my $blast_log_path = $c->config->{blast_log
};
291 if (-e
$blast_log_path) {
292 open($blast_log_fh, ">>", $blast_log_path) || print STDERR
"cannot create $blast_log_path\n";
294 open($blast_log_fh, ">", $blast_log_path) || print STDERR
"cannot open $blast_log_path\n";
295 print $blast_log_fh "Seq_num\tDB_id\tProgram\teval\tMaxHits\tMatrix\tDate\n";
297 print $blast_log_fh "$seq_count\t".$params->{database
}."\t".$params->{program
}."\t".$params->{evalue
}."\t".$params->{maxhits
}."\t".$params->{matrix
}."\t".localtime()."\n";
300 print STDERR
"Passing jobid code ".(basename
($jobid))."\n";
301 $c->stash->{rest
} = { jobid
=> basename
($jobid),
302 seq_count
=> $seq_count,
308 sub check
: Path
('/tools/blast/check') Args
(1) {
313 # my $t0 = [gettimeofday]; #-------------------------- TIME CHECK
315 my $blast_tmp_output = $c->get_conf('cluster_shared_tempdir')."/blast";
317 #my $jobid =~ s/\.\.//g; # prevent hacks
318 my $job = retrieve
($blast_tmp_output."/".$jobid.".job");
322 # my $t1 = [gettimeofday]; #-------------------------- TIME CHECK
325 $c->stash->{rest
} = { status
=> 'running', };
327 # my $t2 = [gettimeofday]; #-------------------------- TIME CHECK
329 # my $t1_t2 = tv_interval $t1, $t2;
330 # print STDERR "Job alive: $t1_t2\n";
336 # my $t3 = [gettimeofday]; #-------------------------- TIME CHECK
338 # the job has finished
339 # copy the cluster temp file back into "apache space"
341 my $result_file = $self->jobid_to_file($c, $jobid.".out");
343 my $job_out_file = $job->out_file();
345 uncache
($job_out_file);
346 last if -f
$job_out_file;
348 # my $t4 = [gettimeofday]; #-------------------------- TIME CHECK
349 # my $t3_t4 = tv_interval $t3, $t4;
350 # print STDERR "Job not alive loop: $t3_t4\n";
353 # my $t5 = [gettimeofday]; #-------------------------- TIME CHECK
355 -f
$job_out_file or die "job output file ($job_out_file) doesn't exist";
356 -r
$job_out_file or die "job output file ($job_out_file) not readable";
358 # my $t6 = [gettimeofday]; #-------------------------- TIME CHECK
360 # You may wish to provide a different output file to send back
361 # rather than STDOUT from the job. Use the out_file_override
362 # parameter if this is the case.
363 #my $out_file = $out_file_override || $job->out_file();
364 # system("ls $blast_tmp_output 2>&1 >/dev/null");
366 # my $t7 = [gettimeofday]; #-------------------------- TIME CHECK
368 # system("ls $c->{config}->{cluster_shared_tempdir} 2>&1 >/dev/null");
369 copy
($job_out_file, $result_file) or die "Can't copy result file '$job_out_file' to $result_file ($!)";
371 # my $t8 = [gettimeofday]; #-------------------------- TIME CHECK
373 #clean up the job tempfiles
376 # my $t9 = [gettimeofday]; #-------------------------- TIME CHECK
378 #also delete the job file
380 # my $t10 = [gettimeofday]; #-------------------------- TIME CHECK
382 # my $t5_t6 = tv_interval $t5, $t6;
383 # my $t6_t7 = tv_interval $t6, $t7;
384 # my $t7_t8 = tv_interval $t7, $t8;
385 # my $t8_t9 = tv_interval $t8, $t9;
386 # my $t9_t10 = tv_interval $t9, $t10;
388 # my $t3_t10 = tv_interval $t3, $t10;
389 # my $t0_t10 = tv_interval $t0, $t10;
391 # print STDERR "check 5-6 interval: $t5_t6\n";
392 # print STDERR "check 6-7 interval: $t6_t7\n";
393 # print STDERR "check 7-8 interval: $t7_t8\n";
394 # print STDERR "check 8-9 interval: $t8_t9\n";
395 # print STDERR "check 9-10 interval: $t9_t10\n";
397 # print STDERR "Job not alive (else): $t3_t10\n";
398 # print STDERR "CHECK SUB TIME: $t0_t10\n";
401 $c->stash->{rest
} = { status
=> "complete" };
405 # fetch some html/js required for displaying the parse report
406 # sub get_prereqs : Path('/tools/blast/prereqs') Args(1) {
411 # my $format=$c->req->param('format');
412 # my $parser = CXGN::Blast::Parse->new();
413 # my $prereqs = $parser->prereqs($format);
414 # $c->stash->{rest} = { prereqs => $prereqs };
417 sub get_result
: Path
('/tools/blast/result') Args
(1) {
422 # my $t0 = [gettimeofday]; #-------------------------- TIME CHECK
424 my $format = $c->req->param('format');
425 my $db_id = $c->req->param('db_id');
427 my $result_file = $self->jobid_to_file($c, $jobid.".out");
428 my $blast_tmp_output = $c->get_conf('cluster_shared_tempdir')."/blast";
430 # system("ls $blast_tmp_output 2>&1 >/dev/null");
431 # system("ls ".($c->config->{cluster_shared_tempdir})." 2>&1 >/dev/null");
433 my $schema = $c->dbic_schema("SGN::Schema");
434 my $db = $schema->resultset("BlastDb")->find($db_id);
435 if (!$db) { die "Can't find database with id $db_id"; }
436 my $parser = CXGN
::Blast
::Parse
->new();
437 my $parsed_data = $parser->parse($c, $format, $result_file, $db);
439 # my $t1 = [gettimeofday]; #-------------------------- TIME CHECK
440 # my $t0_t1 = tv_interval $t0, $t1;
441 # print STDERR "GET RESULT SUB TIME: $t0_t1\n";
443 $c->stash->{rest
} = $parsed_data; # { blast_report => '<pre>'.(join("\n", read_file($parsed_file))).'</pre>', };
451 return File
::Spec
->catfile($c->config->{basepath
}, $c->tempfiles_subdir('blast'), "$jobid");
455 sub search_gene_ids
{
456 my $ids_array = shift;
457 my $blastdb_path = shift;
458 my @ids = @
{$ids_array};
461 my $fs = Bio
::BLAST
::Database
->open(full_file_basename
=> "$blastdb_path",);
463 foreach my $input_string (@ids) {
465 if ($fs->get_sequence($input_string)) {
466 my $seq_obj = $fs->get_sequence($input_string);
467 my $seq = $seq_obj->seq();
468 my $id = $seq_obj->id();
469 my $desc = $seq_obj->desc();
472 for (my $i=0; $i<length($seq); $i=$i+60) {
473 $new_seq = $new_seq.substr($seq,$i,60)."<br>";
476 push(@output_seqs, ">$id $desc<br>$new_seq");
479 return join('', @output_seqs);
482 sub search_desc
: Path
('/tools/blast/desc_search/') Args
(0) {
487 my $schema = $c->dbic_schema("SGN::Schema");
488 my $params = $c->req->params();
489 my $input_string = $params->{blast_desc
};
490 my $db_id = $params->{database
};
492 my $bdb = $schema->resultset("BlastDb")->find($db_id) || die "could not find bdb with file_base $db_id";
493 my $blastdb_path = File
::Spec
->catfile($c->config->{blast_db_path
}, $bdb->file_base());
495 my $grepcmd = "grep -i \"$input_string\" $blastdb_path \| sed 's/>//' \| cut -d ' ' -f 1";
496 my $output_seq = `$grepcmd`;
500 @ids = split(/\n/, $output_seq);
501 $output_seqs = search_gene_ids
(\
@ids,$blastdb_path);
504 $output_seqs = "There were not results for your search\n";
506 $c->stash->{rest
} = {output_seq
=> "$output_seqs"};