2 package SGN
::Controller
::AJAX
::Blast
;
9 use Storable qw
| nstore retrieve
|;
11 use Tie
::UrlEncoder
; our %urlencode;
12 use File
::Temp qw
| tempfile
|;
13 use File
::Basename qw
| basename
|;
14 use File
::Copy qw
| copy
|;
15 use File
::Spec qw
| catfile
|;
16 use File
::Slurp qw
| read_file write_file
|;
17 use File
::NFSLock qw
| uncache
|;
19 use CXGN
::Page
::UserPrefs
;
20 use CXGN
::Tools
::List qw
/distinct evens/;
21 use CXGN
::Blast
::Parse
;
22 use CXGN
::Blast
::SeqQuery
;
24 BEGIN { extends
'Catalyst::Controller::REST'; };
27 default => 'application/json',
29 map => { 'application/json' => 'JSON', 'text/html' => 'JSON' },
32 sub run
: Path
('/tools/blast/run') Args
(0) {
36 my $params = $c->req->params();
38 my $input_query = CXGN
::Blast
::SeqQuery
->new();
40 my $valid = $input_query->validate($c, $params->{input_options
}, $params->{sequence
});
43 $c->stash->{rest
} = { error
=> "Your input contains illegal characters. Please verify your input." };
47 $params->{sequence
} = $input_query->process($c, $params->{input_options
}, $params->{sequence
});
49 # print STDERR "SEQUENCE now : ".$params->{sequence}."\n";
51 if ($params->{input_options
} eq 'autodetect') {
52 my $detected_type = $input_query->autodetect_seq_type($c, $params->{input_options
}, $params->{sequence
});
54 # print STDERR "SGN BLAST detected your sequence is: $detected_type\n";
56 # create a hash with the valid options =1 and check and if result 0 return error
57 my %blast_seq_db_program = (
77 if (!$blast_seq_db_program{$detected_type}{$params->{db_type
}}{$params->{program
}}) {
78 $c->stash->{rest
} = { error
=> "the program ".$params->{program
}." can not be used with a ".$detected_type." sequence (autodetected) and a ".$params->{db_type
}." database.\n\nPlease, use different options and disable the autodetection of the query type if it is wrong." };
85 if ($params->{sequence
} =~ /\>/) {
86 $seq_count= $params->{sequence
} =~ tr/\>/\>/;
88 print STDERR
"SEQ COUNT = $seq_count\n";
89 my ($seq_fh, $seqfile) = tempfile
(
91 DIR
=> $c->get_conf('cluster_shared_tempdir'),
94 my $jobid = basename
($seqfile);
96 print STDERR
"JOB ID CREATED: $jobid\n";
98 my $schema = $c->dbic_schema("SGN::Schema");
105 my $sequence = $params->{sequence
};
107 $sequence =~ s/^\s+|\s+$|\n\s*\n//g; #< trim out leading and trailing whitespace and blank lines
108 # if ($sequence !~ /^\s*>/) {
109 # $sequence = ">WEB-USER-SEQUENCE (Unknown)\n$sequence";
111 # $sequence .= "\n"; #< add a final newline
113 print STDERR
"Opening file for sequence ($seqfile)... ";
114 open(my $FH, ">", $seqfile) || die "Can't open file for query ($seqfile)\n";
115 print $FH $sequence if $sequence;
118 print STDERR
"Done.\n";
120 # if(my $file_upload = $page->get_upload) {
121 # if ( my $fh = $file_upload->fh ) {
122 # print $seq_fh $_ while <$fh>;
126 print STDERR
"Paring with bioperl... ";
127 my $i = Bio
::SeqIO
->new(
133 while ( my $s = $i->next_seq ) {
134 $s->length or $c->throw(
135 message
=> 'Sequence '.encode_entities
('"'.$s->id.'"').' is empty, this is not allowed by BLAST.',
141 die $_ if ref; #< throw it onward if it's an exception
143 if( /MSG:([^\n]+)/ ) {
146 s/at \/[\-\w \/\
.]+ line \d
+.+//; # remove anything resembling backtraces
147 $c->throw( message
=> $_,
149 developer_message
=> $full_error,
153 $seq_count >= 1 or $c->throw( message
=> 'no sequence submitted, cannot run BLAST',
155 developer_message
=> Data
::Dumper
::Dumper
({
156 '$seq_count' => $seq_count,
157 '$seq_filename' => $seqfile,
161 return -i
=> $seqfile;
167 my $m = $params->{matrix
};
168 $m =~ /^(BLOSUM|PAM)\d+$/
169 or $c->throw( is_error
=> 0, message
=> "invalid matrix '$m'" );
176 $params->{evalue
} =~ s/[^\d\.e\-\+]//gi; #can only be these characters
177 return -e
=> $params->{evalue
} ?
$params->{evalue
} : 1;
182 my $h = $params->{maxhits
} || 20;
183 $h =~ s/\D//g; #only digits allowed
189 my $h = $params->{maxhits
} || 20;
190 $h =~ s/\D//g; #only digits allowed
196 return -F
=> $params->{filterq
} ?
'T' : 'F';
201 # $params->{outformat} =~ s/\D//g; #only digits allowed
202 # return -m => $params->{outformat};
208 my $bdb = $schema->resultset("BlastDb")->find($params->{database
} )
209 or die "could not find bdb with file_base '$params->{database}'";
211 my $basename = $bdb->full_file_basename;
212 #returns '/data/shared/blast/databases/genbank/nr'
213 #remember the ID of the blast db the user just blasted with
215 return -d
=> $basename;
220 $params->{program
} =~ s/[^a-z]//g; #only lower-case letters
221 return -p
=> $params->{program
};
225 print STDERR
"BUILDING COMMAND...\n";
228 # build our command with our arg handlers
230 my @command = ('blastall');
231 foreach my $k (keys %arg_handlers) {
233 print STDERR
"evaluating $k...";
234 my @x = $arg_handlers{$k}->();
235 print STDERR
"component:
236 ", (join ",", @x)."\n";
237 @command = (@command, @x);
240 print STDERR
"COMMAND: ".join(" ", @command);
251 $job = CXGN
::Tools
::Run
->run_cluster(
254 temp_base
=> $c->config->{'cluster_shared_tempdir'},
255 queue
=> $c->config->{'web_cluster_queue'},
256 working_dir
=> $c->config->{'cluster_shared_tempdir'},
257 # don't block and wait if the cluster looks full
258 max_cluster_jobs
=> 1_000_000_000
,
262 print STDERR
"Saving job state to $seqfile.job for id ".$job->job_id()."\n";
264 $job->do_not_cleanup(1);
266 nstore
( $job, $seqfile.".job" )
267 or die 'could not serialize job object';
272 print STDERR
"An error occurred! $@\n";
273 $c->stash->{rest
} = { error
=> $@
};
276 # write data in blast.log
277 my $blast_log_path = $c->config->{blast_log
};
279 if (-e
$blast_log_path) {
280 open($blast_log_fh, ">>", $blast_log_path) || print STDERR
"cannot create $blast_log_path\n";
282 open($blast_log_fh, ">", $blast_log_path) || print STDERR
"cannot open $blast_log_path\n";
283 print $blast_log_fh "Seq_num\tDB_id\tProgram\teval\tMaxHits\tMatrix\tDate\n";
285 print $blast_log_fh "$seq_count\t".$params->{database
}."\t".$params->{program
}."\t".$params->{evalue
}."\t".$params->{maxhits
}."\t".$params->{matrix
}."\t".localtime()."\n";
288 print STDERR
"Passing jobid code ".(basename
($jobid))."\n";
289 $c->stash->{rest
} = { jobid
=> basename
($jobid),
290 seq_count
=> $seq_count,
296 sub check
: Path
('/tools/blast/check') Args
(1) {
301 #my $jobid =~ s/\.\.//g; # prevent hacks
302 my $job = retrieve
($c->config->{cluster_shared_tempdir
}."/".$jobid.".job");
306 $c->stash->{rest
} = { status
=> 'running', };
310 # the job has finished
311 # copy the cluster temp file back into "apache space"
313 my $result_file = $self->jobid_to_file($c, $jobid.".out");
315 my $job_out_file = $job->out_file();
317 uncache
($job_out_file);
318 last if -f
$job_out_file;
322 -f
$job_out_file or die "job output file ($job_out_file) doesn't exist";
323 -r
$job_out_file or die "job output file ($job_out_file) not readable";
325 # You may wish to provide a different output file to send back
326 # rather than STDOUT from the job. Use the out_file_override
327 # parameter if this is the case.
328 #my $out_file = $out_file_override || $job->out_file();
329 system("ls $c->{config}->{cluster_shared_tempdir} 2>&1 >/dev/null");
330 copy
($job_out_file, $result_file)
331 or die "Can't copy result file '$job_out_file' to $result_file ($!)";
333 #clean up the job tempfiles
336 #also delete the job file
338 $c->stash->{rest
} = { status
=> "complete" };
342 # fetch some html/js required for displaying the parse report
343 # sub get_prereqs : Path('/tools/blast/prereqs') Args(1) {
348 # my $format=$c->req->param('format');
349 # my $parser = CXGN::Blast::Parse->new();
350 # my $prereqs = $parser->prereqs($format);
351 # $c->stash->{rest} = { prereqs => $prereqs };
354 sub get_result
: Path
('/tools/blast/result') Args
(1) {
359 my $format = $c->req->param('format');
360 my $db_id = $c->req->param('db_id');
362 my $result_file = $self->jobid_to_file($c, $jobid.".out");
364 system("ls ".($c->config->{cluster_shared_tempdir
})." 2>&1 >/dev/null");
366 my $schema = $c->dbic_schema("SGN::Schema");
367 my $db = $schema->resultset("BlastDb")->find($db_id);
368 if (!$db) { die "Can't find database with id $db_id"; }
369 my $parser = CXGN
::Blast
::Parse
->new();
370 my $parsed_data = $parser->parse($c, $format, $result_file, $db);
372 $c->stash->{rest
} = $parsed_data; # { blast_report => '<pre>'.(join("\n", read_file($parsed_file))).'</pre>', };
376 # sub blast_overview_graph : Path('/tools/blast/overview') Args(1) {
384 # sub blast_coverage_graph : Path('/tools/blast/coverage') Args(1) {
393 # # validate the given sequence as input for the given blast program
394 # sub validate_seq : Path('/tools/blast/validate') Args(0) {
398 # my $program = shift;
401 # ( map { $_ => 'protein' } 'tblastn', 'blastp' ),
402 # ( map { $_ => 'DNA' } 'blastn', 'blastx', 'tblastx' ),
405 # #my $alphabet = $alphabets{$program}
406 # #or $c->throw( message => 'invalid program!',
408 # # developer_message => "program was '$program'",
410 # if (!exists($alphabets{$program})) {
411 # $c->stash->{rest} = {
413 # error => "Invalid program '$program'. Please choose another program.",
417 # my $alphabet = $alphabets{$program};
419 # if (!$s->validate_seq) { #< bioperl must think it's OK
420 # $c->stash->{rest} = {
422 # error => 'Not a legal sequence',
427 # my %not_iupac_pats = ( DNA => qr/([^ACGTURYKMSWBDHVN]+)/i,
428 # protein => qr/([^GAVLIPFYCMHKRWSTDENQBZ\.X\*]+)/i,
429 # rna => qr/([^ACGTURYKMSWBDHVN]+)/i,
432 # my $val_pat = $not_iupac_pats{$alphabet};
434 # $c->stash->{rest} = {
436 # error => "Invalid alphabet ($alphabet)",
440 # # or $c->throw( message => 'invalid alphabet!',
442 # # developer_message => "alphabet was '$alphabet'",
444 # if ($s->seq =~ $val_pat) {
446 # # message => encode_entities('Sequence "'.$s->id.qq|" contains invalid $alphabet characters "$1"| ),
449 # $c->stash->{rest} = {
451 # error => encode_entities('Sequence "'.$s->id.qq|" contains invalid $alphabet characters "$1"| ),
455 # $c->stash->{rest} = {
465 return File
::Spec
->catfile($c->config->{basepath
}, $c->tempfiles_subdir('blast'), "$jobid");