2 package SGN
::Controller
::AJAX
::Blast
;
9 use Storable qw
| nstore retrieve
|;
11 use Tie
::UrlEncoder
; our %urlencode;
12 use File
::Temp qw
| tempfile
|;
13 use File
::Basename qw
| basename
|;
14 use File
::Copy qw
| copy
|;
15 use File
::Spec qw
| catfile
|;
16 use File
::Slurp qw
| read_file write_file
|;
17 use File
::NFSLock qw
| uncache
|;
19 use CXGN
::Page
::UserPrefs
;
20 use CXGN
::Tools
::List qw
/distinct evens/;
21 use CXGN
::Blast
::Parse
;
22 use CXGN
::Blast
::SeqQuery
;
25 my $json = JSON
::Any
->new;
27 use Time
::HiRes
qw(gettimeofday tv_interval);
29 BEGIN { extends
'Catalyst::Controller::REST'; };
32 default => 'application/json',
34 map => { 'application/json' => 'JSON', 'text/html' => 'JSON' },
37 sub run
: Path
('/tools/blast/run') Args
(0) {
41 my $params = $c->req->params();
42 my $input_query = CXGN
::Blast
::SeqQuery
->new();
43 my $valid = $input_query->validate($c, $params->{input_options
}, $params->{sequence
});
46 $c->stash->{rest
} = { error
=> "Your input contains illegal characters. Please verify your input." };
50 $params->{sequence
} = $input_query->process($c, $params->{input_options
}, $params->{sequence
});
52 if ($params->{input_options
} eq 'autodetect') {
53 my $detected_type = $input_query->autodetect_seq_type($c, $params->{input_options
}, $params->{sequence
});
55 # print STDERR "SGN BLAST detected your sequence is: $detected_type\n";
57 # create a hash with the valid options =1 and check and if result 0 return error
58 my %blast_seq_db_program = (
78 if (!$blast_seq_db_program{$detected_type}{$params->{db_type
}}{$params->{program
}}) {
79 $c->stash->{rest
} = { error
=> "the program ".$params->{program
}." can not be used with a ".$detected_type." sequence (autodetected) and a ".$params->{db_type
}." database.\n\nPlease, use different options and disable the autodetection of the query type if it is wrong." };
85 my $blast_tmp_output = $c->config->{cluster_shared_tempdir
}."/blast";
86 mkdir $blast_tmp_output if ! -d
$blast_tmp_output;
87 if ($params->{sequence
} =~ /\>/) {
88 $seq_count= $params->{sequence
} =~ tr/\>/\>/;
91 print STDERR
"SEQ COUNT = $seq_count\n";
92 my ($seq_fh, $seqfile) = tempfile
(
94 DIR
=> $blast_tmp_output,
97 my $jobid = basename
($seqfile);
99 print STDERR
"JOB ID CREATED: $jobid\n";
101 my $schema = $c->dbic_schema("SGN::Schema");
108 my $sequence = $params->{sequence
};
110 $sequence =~ s/^\s+|\s+$|\n\s*\n//g; #< trim out leading and trailing whitespace and blank lines
111 # if ($sequence !~ /^\s*>/) {
112 # $sequence = ">WEB-USER-SEQUENCE (Unknown)\n$sequence";
114 # $sequence .= "\n"; #< add a final newline
116 print STDERR
"Opening file for sequence ($seqfile)... ";
117 open(my $FH, ">", $seqfile) || die "Can't open file for query ($seqfile)\n";
118 print $FH $sequence if $sequence;
121 # print STDERR "Done.\n";
123 # if(my $file_upload = $page->get_upload) {
124 # if ( my $fh = $file_upload->fh ) {
125 # print $seq_fh $_ while <$fh>;
129 print STDERR
"Parsing with bioperl... ";
130 my $i = Bio
::SeqIO
->new(
136 while ( my $s = $i->next_seq ) {
137 $s->length or $c->throw(
138 message
=> 'Sequence '.encode_entities
('"'.$s->id.'"').' is empty, this is not allowed by BLAST.',
144 die $_ if ref; #< throw it onward if it's an exception
146 if( /MSG:([^\n]+)/ ) {
149 s/at \/[\-\w \/\
.]+ line \d
+.+//; # remove anything resembling backtraces
150 $c->throw( message
=> $_,
152 developer_message
=> $full_error,
156 $seq_count >= 1 or $c->throw( message
=> 'no sequence submitted, cannot run BLAST',
158 developer_message
=> Data
::Dumper
::Dumper
({
159 '$seq_count' => $seq_count,
160 '$seq_filename' => $seqfile,
164 return -i
=> $seqfile;
170 my $m = $params->{matrix
};
171 $m =~ /^(BLOSUM|PAM)\d+$/
172 or $c->throw( is_error
=> 0, message
=> "invalid matrix '$m'" );
179 $params->{evalue
} =~ s/[^\d\.e\-\+]//gi; #can only be these characters
180 return -e
=> $params->{evalue
} ?
$params->{evalue
} : 1;
185 my $h = $params->{maxhits
} || 20;
186 $h =~ s/\D//g; #only digits allowed
192 my $h = $params->{maxhits
} || 20;
193 $h =~ s/\D//g; #only digits allowed
199 return -F
=> $params->{filterq
} ?
'T' : 'F';
204 # $params->{outformat} =~ s/\D//g; #only digits allowed
205 # return -m => $params->{outformat};
211 my $bdb = $schema->resultset("BlastDb")->find($params->{database
} )
212 or die "could not find bdb with file_base '$params->{database}'";
214 my $basename = File
::Spec
->catfile($c->config->{blast_db_path
},$bdb->file_base());
215 #returns '/data/shared/blast/databases/genbank/nr'
216 #remember the ID of the blast db the user just blasted with
218 return -d
=> $basename;
223 $params->{program
} =~ s/[^a-z]//g; #only lower-case letters
224 return -p
=> $params->{program
};
228 print STDERR
"BUILDING COMMAND...\n";
231 # build our command with our arg handlers
233 my @command = ('blastall');
234 foreach my $k (keys %arg_handlers) {
236 print STDERR
"evaluating $k...";
237 my @x = $arg_handlers{$k}->();
238 print STDERR
"component:
239 ", (join ",", @x)."\n";
240 @command = (@command, @x);
243 # To get the proper format for gi sequences (CitrusGreening.org case)
244 push(@command, '-I');
247 print STDERR
"COMMAND: ".join(" ", @command);
259 $job = CXGN
::Tools
::Run
->run_cluster(
262 temp_base
=> $blast_tmp_output,
263 queue
=> $c->config->{'web_cluster_queue'},
264 working_dir
=> $blast_tmp_output,
266 # temp_base => $c->config->{'cluster_shared_tempdir'},
267 # queue => $c->config->{'web_cluster_queue'},
268 # working_dir => $c->config->{'cluster_shared_tempdir'},
270 # don't block and wait if the cluster looks full
271 max_cluster_jobs
=> 1_000_000_000
,
275 print STDERR
"Saving job state to $seqfile.job for id ".$job->job_id()."\n";
277 $job->do_not_cleanup(1);
279 nstore
( $job, $seqfile.".job" ) or die 'could not serialize job object';
286 print STDERR
"An error occurred! $@\n";
287 $c->stash->{rest
} = { error
=> $@
};
290 # write data in blast.log
291 my $blast_log_path = $c->config->{blast_log
};
293 if (-e
$blast_log_path) {
294 open($blast_log_fh, ">>", $blast_log_path) || print STDERR
"cannot create $blast_log_path\n";
296 open($blast_log_fh, ">", $blast_log_path) || print STDERR
"cannot open $blast_log_path\n";
297 print $blast_log_fh "Seq_num\tDB_id\tProgram\teval\tMaxHits\tMatrix\tDate\n";
299 print $blast_log_fh "$seq_count\t".$params->{database
}."\t".$params->{program
}."\t".$params->{evalue
}."\t".$params->{maxhits
}."\t".$params->{matrix
}."\t".localtime()."\n";
302 print STDERR
"Passing jobid code ".(basename
($jobid))."\n";
303 $c->stash->{rest
} = { jobid
=> basename
($jobid),
304 seq_count
=> $seq_count,
310 sub check
: Path
('/tools/blast/check') Args
(1) {
315 # my $t0 = [gettimeofday]; #-------------------------- TIME CHECK
317 my $blast_tmp_output = $c->get_conf('cluster_shared_tempdir')."/blast";
319 #my $jobid =~ s/\.\.//g; # prevent hacks
320 my $job = retrieve
($blast_tmp_output."/".$jobid.".job");
322 # my $t1 = [gettimeofday]; #-------------------------- TIME CHECK
325 $c->stash->{rest
} = { status
=> 'running', };
327 # my $t2 = [gettimeofday]; #-------------------------- TIME CHECK
329 # my $t1_t2 = tv_interval $t1, $t2;
330 # print STDERR "Job alive: $t1_t2\n";
336 # my $t3 = [gettimeofday]; #-------------------------- TIME CHECK
338 # the job has finished
339 # copy the cluster temp file back into "apache space"
341 my $result_file = $self->jobid_to_file($c, $jobid.".out");
343 my $job_out_file = $job->out_file();
345 uncache
($job_out_file);
346 last if -f
$job_out_file;
348 # my $t4 = [gettimeofday]; #-------------------------- TIME CHECK
349 # my $t3_t4 = tv_interval $t3, $t4;
350 # print STDERR "Job not alive loop: $t3_t4\n";
353 # my $t5 = [gettimeofday]; #-------------------------- TIME CHECK
355 -f
$job_out_file or die "job output file ($job_out_file) doesn't exist";
356 -r
$job_out_file or die "job output file ($job_out_file) not readable";
358 # my $t6 = [gettimeofday]; #-------------------------- TIME CHECK
360 # You may wish to provide a different output file to send back
361 # rather than STDOUT from the job. Use the out_file_override
362 # parameter if this is the case.
363 #my $out_file = $out_file_override || $job->out_file();
364 # system("ls $blast_tmp_output 2>&1 >/dev/null");
366 # my $t7 = [gettimeofday]; #-------------------------- TIME CHECK
368 # system("ls $c->{config}->{cluster_shared_tempdir} 2>&1 >/dev/null");
369 copy
($job_out_file, $result_file) or die "Can't copy result file '$job_out_file' to $result_file ($!)";
371 # my $t8 = [gettimeofday]; #-------------------------- TIME CHECK
373 #clean up the job tempfiles
376 # my $t9 = [gettimeofday]; #-------------------------- TIME CHECK
378 #also delete the job file
380 # my $t10 = [gettimeofday]; #-------------------------- TIME CHECK
382 # my $t5_t6 = tv_interval $t5, $t6;
383 # my $t6_t7 = tv_interval $t6, $t7;
384 # my $t7_t8 = tv_interval $t7, $t8;
385 # my $t8_t9 = tv_interval $t8, $t9;
386 # my $t9_t10 = tv_interval $t9, $t10;
388 # my $t3_t10 = tv_interval $t3, $t10;
389 # my $t0_t10 = tv_interval $t0, $t10;
391 # print STDERR "check 5-6 interval: $t5_t6\n";
392 # print STDERR "check 6-7 interval: $t6_t7\n";
393 # print STDERR "check 7-8 interval: $t7_t8\n";
394 # print STDERR "check 8-9 interval: $t8_t9\n";
395 # print STDERR "check 9-10 interval: $t9_t10\n";
397 # print STDERR "Job not alive (else): $t3_t10\n";
398 # print STDERR "CHECK SUB TIME: $t0_t10\n";
401 $c->stash->{rest
} = { status
=> "complete" };
405 # fetch some html/js required for displaying the parse report
406 # sub get_prereqs : Path('/tools/blast/prereqs') Args(1) {
411 # my $format=$c->req->param('format');
412 # my $parser = CXGN::Blast::Parse->new();
413 # my $prereqs = $parser->prereqs($format);
414 # $c->stash->{rest} = { prereqs => $prereqs };
417 sub get_result
: Path
('/tools/blast/result') Args
(1) {
422 # my $t0 = [gettimeofday]; #-------------------------- TIME CHECK
424 my $format = $c->req->param('format');
425 my $db_id = $c->req->param('db_id');
427 my $result_file = $self->jobid_to_file($c, $jobid.".out");
428 my $blast_tmp_output = $c->get_conf('cluster_shared_tempdir')."/blast";
430 # system("ls $blast_tmp_output 2>&1 >/dev/null");
431 # system("ls ".($c->config->{cluster_shared_tempdir})." 2>&1 >/dev/null");
433 my $schema = $c->dbic_schema("SGN::Schema");
434 my $db = $schema->resultset("BlastDb")->find($db_id);
435 if (!$db) { die "Can't find database with id $db_id"; }
436 my $parser = CXGN
::Blast
::Parse
->new();
437 my $parsed_data = $parser->parse($c, $format, $result_file, $db);
439 # my $t1 = [gettimeofday]; #-------------------------- TIME CHECK
440 # my $t0_t1 = tv_interval $t0, $t1;
441 # print STDERR "GET RESULT SUB TIME: $t0_t1\n";
443 $c->stash->{rest
} = $parsed_data; # { blast_report => '<pre>'.(join("\n", read_file($parsed_file))).'</pre>', };
449 sub render_canvas_graph
: Path
('/tools/blast/render_graph') Args
(1) {
453 my $db_id = $c->req->param('db_id');
455 my $file = $self->jobid_to_file($c, $jobid.".out");
456 my $blast_tmp_output = $c->get_conf('cluster_shared_tempdir')."/blast";
458 my $schema = $c->dbic_schema("SGN::Schema");
459 my $bdb = $schema->resultset("BlastDb")->find($db_id);
460 if (!$bdb) { die "Can't find database with id $db_id"; }
463 my $jbrowse_path = $c->config->{jbrowse_path
};;
464 # my $db_id = $bdb->blast_db_id();
465 my $jbr_src = $bdb->jbrowse_src();
482 my $query_line_on = 0;
483 my $query_length = 0;
487 push(@aln_html, "<br><pre>");
489 # variables for the canvas graph
492 open (my $blast_fh, "<", $file);
494 push(@res_html, "<table id=\"blast_table\" class=\"table\">");
495 push(@res_html, "<tr><th>SubjectId</th><th>id%</th><th>Aln</th><th>evalue</th><th>Score</th><th>Description</th></tr>");
497 while (my $line = <$blast_fh>) {
500 if ($line =~ /Query\=\s*(\S+)/) {
502 unshift(@res_html, "<center><h3>".$query." vs ".$bdb->title()."</h3></center>");
506 if ($query_line_on && $line =~ /\((\d+)\s+letters/) {
511 if ($line =~ /\w+/) {
512 my $new_desc_line = $line;
513 $new_desc_line =~ s/\s+/ /g;
514 $desc .= $new_desc_line;
526 my $jbrowse_url = _build_jbrowse_url
($jbr_src,$subject,$sstart,$send,$jbrowse_path);
527 ($sstart,$send) = _check_coordinates
($sstart,$send);
529 push(@res_html, "<tr><td><a id=\"$subject\" class=\"blast_match_ident\" href=\"/tools/blast/show_match_seq.pl?blast_db_id=$db_id;id=$subject;hilite_coords=$sstart-$send\" onclick=\"return resolve_blast_ident( '$subject', '$jbrowse_url', '/tools/blast/show_match_seq.pl?blast_db_id=$db_id;id=$subject;hilite_coords=$sstart-$send', null )\">$subject</a></td><td>$id</td><td>$aln</td><td>$evalue</td><td>$score</td><td>$desc</td></tr>");
531 if (length($desc) > 150) {
532 $desc = substr($desc,0,150)." ...";
535 my %description_hash;
537 $description_hash{"name"} = $subject;
538 $description_hash{"id_percent"} = $id;
539 $description_hash{"score"} = $score;
540 $description_hash{"description"} = $desc;
541 $description_hash{"qstart"} = $qstart;
542 $description_hash{"qend"} = $qend;
543 push(@json_array, \
%description_hash);
558 if ($line =~ /^>(\S+)\s*(.*)/) {
562 # print STDERR "subject: $subject\n";
567 if ($line =~ /Score\s*=/ && $one_hsp == 1) {
568 my $jbrowse_url = _build_jbrowse_url
($jbr_src,$subject,$sstart,$send,$jbrowse_path);
569 ($sstart,$send) = _check_coordinates
($sstart,$send);
571 push(@res_html, "<tr><td><a id=\"$subject\" class=\"blast_match_ident\" href=\"/tools/blast/show_match_seq.pl?blast_db_id=$db_id;id=$subject;hilite_coords=$sstart-$send\" onclick=\"return resolve_blast_ident( '$subject', '$jbrowse_url', '/tools/blast/show_match_seq.pl?blast_db_id=$db_id;id=$subject;hilite_coords=$sstart-$send', null )\">$subject</a></td><td>$id</td><td>$aln</td><td>$evalue</td><td>$score</td><td>$desc</td></tr>");
573 if (length($desc) > 150) {
574 $desc = substr($desc,0,150)." ...";
577 my %description_hash;
579 $description_hash{"name"} = $subject;
580 $description_hash{"id_percent"} = $id;
581 $description_hash{"score"} = $score;
582 $description_hash{"description"} = $desc;
583 $description_hash{"qstart"} = $qstart;
584 $description_hash{"qend"} = $qend;
585 push(@json_array, \
%description_hash);
597 if ($line =~ /Score\s*=\s*([\d\.]+)/) {
604 if ($line =~ /Expect\s*=\s*([\d\.\-e]+)/) {
608 if ($line =~ /Identities\s*=\s*(\d+)\/(\d
+)/) {
609 my $aln_matched = $1;
611 $aln = "$aln_matched/$aln_total";
612 $id = sprintf("%.2f", $aln_matched*100/$aln_total);
615 if (($line =~ /^Query:\s+(\d+)/) && ($qstart == 0)) {
618 if (($line =~ /^Sbjct:\s+(\d+)/) && ($sstart == 0)) {
622 if (($line =~ /^Query:/) && ($line =~ /(\d+)\s*$/)) {
625 if (($line =~ /^Sbjct:/) && ($line =~ /(\d+)\s*$/)) {
630 push(@aln_html, $line);
637 my $jbrowse_url = _build_jbrowse_url
($jbr_src,$subject,$sstart,$send,$jbrowse_path);
638 ($sstart,$send) = _check_coordinates
($sstart,$send);
640 push(@res_html, "<tr><td><a id=\"$subject\" class=\"blast_match_ident\" href=\"/tools/blast/show_match_seq.pl?blast_db_id=$db_id;id=$subject;hilite_coords=$sstart-$send\" onclick=\"return resolve_blast_ident( '$subject', '$jbrowse_url', '/tools/blast/show_match_seq.pl?blast_db_id=$db_id;id=$subject;hilite_coords=$sstart-$send', null )\">$subject</a></td><td>$id</td><td>$aln</td><td>$evalue</td><td>$score</td><td>$desc</td></tr>");
641 push(@res_html, "</table>");
645 if (length($desc) > 150) {
646 $desc = substr($desc,0,150)." ...";
649 my %description_hash;
651 $description_hash{"name"} = $subject;
652 $description_hash{"id_percent"} = $id;
653 $description_hash{"score"} = $score;
654 $description_hash{"description"} = $desc;
655 $description_hash{"qstart"} = $qstart;
656 $description_hash{"qend"} = $qend;
657 push(@json_array, \
%description_hash);
662 my $prereqs = <<EOJS;
664 <div class="modal fade" id="xref_menu_popup" role="dialog">
665 <div class="modal-dialog">
667 <!-- Modal content-->
668 <div class="modal-content">
669 <div class="modal-header">
670 <button type="button" class="close" data-dismiss="modal">×</button>
671 <h4 id="match_name" class="modal-title">Match Information</h4>
673 <div class="modal-body">
676 <div style="margin: 0.5em 0"><a class="match_details" href="" target="_blank">View matched sequence</a></div>
677 <div id="jbrowse_div" style="display:none"><a id="jbrowse_link" href="" target="_blank">View in genome context</a></div>
679 <dd class="subject_sequence_xrefs">
690 function resolve_blast_ident( id, jbrowse_url, match_detail_url, identifier_url ) {
692 var popup = jQuery( "#xref_menu_popup" );
694 jQuery('#match_name').html( id );
696 popup.find('a.match_details').attr( 'href', match_detail_url );
697 popup.find('#jbrowse_link').attr( 'href', jbrowse_url );
700 popup.find('#jbrowse_div').css( 'display', 'inline' );
703 // look up xrefs for overall subject sequence
704 var subj = popup.find('.subject_sequence_xrefs');
706 subj.html( '<img src="/img/throbber.gif" /> searching ...' );
707 subj.load( '/api/v1/feature_xrefs?q='+id );
718 push(@aln_html, "</pre></div><br>");
719 my $blast_table = join('', @res_html);
720 my $aln_text = join('<br>', @aln_html);
723 $c->stash->{rest
} = {
724 sgn_html
=> $blast_table."<br>".$aln_text,
725 desc_array
=> \
@json_array,
726 sequence_length
=> $query_length,
735 sub _build_jbrowse_url
{
740 my $jbrowse_path = shift;
742 my $jbrowse_url = "";
745 if ($jbr_src =~ /(.+)_gene/) {
746 $jbrowse_url = $jbrowse_path."/".$1."&loc=".$subject."&tracks=DNA,gene_models";
748 elsif ($jbr_src =~ /(.+)_genome/) {
749 $jbrowse_url = $jbrowse_path."/".$1."&loc=".$subject."%3A".$sstart."..".$send."&tracks=DNA,gene_models";
756 sub _check_coordinates
{
757 my $tmp_start = shift;
760 my $final_start = $tmp_start;
761 my $final_end = $tmp_end;
763 if ($tmp_start > $tmp_end) {
764 $final_start = $tmp_end;
765 $final_end = $tmp_start;
768 return ($final_start, $final_end);
791 return File
::Spec
->catfile($c->config->{basepath
}, $c->tempfiles_subdir('blast'), "$jobid");
795 sub search_gene_ids
{
796 my $ids_array = shift;
797 my $blastdb_path = shift;
798 my @ids = @
{$ids_array};
801 my $fs = Bio
::BLAST
::Database
->open(full_file_basename
=> "$blastdb_path",);
803 foreach my $input_string (@ids) {
805 if ($fs->get_sequence($input_string)) {
806 my $seq_obj = $fs->get_sequence($input_string);
807 my $seq = $seq_obj->seq();
808 my $id = $seq_obj->id();
809 my $desc = $seq_obj->desc();
812 for (my $i=0; $i<length($seq); $i=$i+60) {
813 $new_seq = $new_seq.substr($seq,$i,60)."<br>";
816 push(@output_seqs, ">$id $desc<br>$new_seq");
819 return join('', @output_seqs);
822 sub search_desc
: Path
('/tools/blast/desc_search/') Args
(0) {
827 my $schema = $c->dbic_schema("SGN::Schema");
828 my $params = $c->req->params();
829 my $input_string = $params->{blast_desc
};
830 my $db_id = $params->{database
};
832 my $bdb = $schema->resultset("BlastDb")->find($db_id) || die "could not find bdb with file_base $db_id";
833 my $blastdb_path = File
::Spec
->catfile($c->config->{blast_db_path
}, $bdb->file_base());
835 my $grepcmd = "grep -i \"$input_string\" $blastdb_path \| sed 's/>//' \| cut -d ' ' -f 1";
836 my $output_seq = `$grepcmd`;
840 @ids = split(/\n/, $output_seq);
841 $output_seqs = search_gene_ids
(\
@ids,$blastdb_path);
844 $output_seqs = "There were not results for your search\n";
846 $c->stash->{rest
} = {output_seq
=> "$output_seqs"};