max hits filtered for proteins
[sgn.git] / lib / SGN / Controller / AJAX / Blast.pm
blobe888bc3fb7a1d82ed564f754430a068f8a41a241
2 package SGN::Controller::AJAX::Blast;
4 use Moose;
6 use Bio::SeqIO;
7 use Config::Any;
8 use Data::Dumper;
9 use Storable qw | nstore retrieve |;
10 use Try::Tiny;
11 use Tie::UrlEncoder; our %urlencode;
12 use File::Temp qw | tempfile |;
13 use File::Basename qw | basename |;
14 use File::Copy qw | copy |;
15 use File::Spec qw | catfile |;
16 use File::Slurp qw | read_file write_file |;
17 use File::NFSLock qw | uncache |;
18 use CXGN::Tools::Run;
19 use CXGN::Page::UserPrefs;
20 use CXGN::Tools::List qw/distinct evens/;
21 use CXGN::Blast::Parse;
22 use CXGN::Blast::SeqQuery;
24 BEGIN { extends 'Catalyst::Controller::REST'; };
26 __PACKAGE__->config(
27 default => 'application/json',
28 stash_key => 'rest',
29 map => { 'application/json' => 'JSON', 'text/html' => 'JSON' },
32 sub run : Path('/tools/blast/run') Args(0) {
33 my $self = shift;
34 my $c = shift;
36 my $params = $c->req->params();
38 my $input_query = CXGN::Blast::SeqQuery->new();
40 my $valid = $input_query->validate($c, $params->{input_options}, $params->{sequence});
42 if ($valid ne "OK") {
43 $c->stash->{rest} = { error => "Your input contains illegal characters. Please verify your input." };
44 return;
47 $params->{sequence} = $input_query->process($c, $params->{input_options}, $params->{sequence});
49 # print STDERR "SEQUENCE now : ".$params->{sequence}."\n";
51 if ($params->{input_options} eq 'autodetect') {
52 my $detected_type = $input_query->autodetect_seq_type($c, $params->{input_options}, $params->{sequence});
54 # print STDERR "SGN BLAST detected your sequence is: $detected_type\n";
56 # create a hash with the valid options =1 and check and if result 0 return error
57 my %blast_seq_db_program = (
58 nucleotide => {
59 nucleotide => {
60 blastn => 1,
61 tblastx => 1,
63 protein => {
64 blastx => 1,
67 protein => {
68 protein => {
69 blastp => 1,
71 nucleotide => {
72 tblastn => 1,
77 if (!$blast_seq_db_program{$detected_type}{$params->{db_type}}{$params->{program}}) {
78 $c->stash->{rest} = { error => "the program ".$params->{program}." can not be used with a ".$detected_type." sequence (autodetected) and a ".$params->{db_type}." database.\n\nPlease, use different options and disable the autodetection of the query type if it is wrong." };
79 return;
84 my $seq_count = 1;
85 if ($params->{sequence} =~ /\>/) {
86 $seq_count= $params->{sequence} =~ tr/\>/\>/;
88 print STDERR "SEQ COUNT = $seq_count\n";
89 my ($seq_fh, $seqfile) = tempfile(
90 "blast_XXXXXX",
91 DIR=> $c->get_conf('cluster_shared_tempdir'),
94 my $jobid = basename($seqfile);
96 print STDERR "JOB ID CREATED: $jobid\n";
98 my $schema = $c->dbic_schema("SGN::Schema");
100 my %arg_handlers =
103 sequence =>
104 sub {
105 my $sequence = $params->{sequence};
106 if( $sequence ) {
107 $sequence =~ s/^\s+|\s+$|\n\s*\n//g; #< trim out leading and trailing whitespace and blank lines
108 # if ($sequence !~ /^\s*>/) {
109 # $sequence = ">WEB-USER-SEQUENCE (Unknown)\n$sequence";
111 # $sequence .= "\n"; #< add a final newline
113 print STDERR "Opening file for sequence ($seqfile)... ";
114 open(my $FH, ">", $seqfile) || die "Can't open file for query ($seqfile)\n";
115 print $FH $sequence if $sequence;
116 close($FH);
118 print STDERR "Done.\n";
120 # if(my $file_upload = $page->get_upload) {
121 # if ( my $fh = $file_upload->fh ) {
122 # print $seq_fh $_ while <$fh>;
126 print STDERR "Paring with bioperl... ";
127 my $i = Bio::SeqIO->new(
128 -format => 'fasta',
129 -file => $seqfile,
132 try {
133 while ( my $s = $i->next_seq ) {
134 $s->length or $c->throw(
135 message => 'Sequence '.encode_entities('"'.$s->id.'"').' is empty, this is not allowed by BLAST.',
136 is_error => 0,
139 } catch {
140 print STDERR $@;
141 die $_ if ref; #< throw it onward if it's an exception
142 my $full_error = $_;
143 if( /MSG:([^\n]+)/ ) {
144 $_ = $1;
146 s/at \/[\-\w \/\.]+ line \d+.+//; # remove anything resembling backtraces
147 $c->throw( message => $_,
148 is_error => 0,
149 developer_message => $full_error,
153 $seq_count >= 1 or $c->throw( message => 'no sequence submitted, cannot run BLAST',
154 is_error => 0,
155 developer_message => Data::Dumper::Dumper({
156 '$seq_count' => $seq_count,
157 '$seq_filename' => $seqfile,
161 return -i => $seqfile;
165 matrix =>
166 sub {
167 my $m = $params->{matrix};
168 $m =~ /^(BLOSUM|PAM)\d+$/
169 or $c->throw( is_error => 0, message => "invalid matrix '$m'" );
170 return -M => $m;
174 expect =>
175 sub {
176 $params->{evalue} =~ s/[^\d\.e\-\+]//gi; #can only be these characters
177 return -e => $params->{evalue} ? $params->{evalue} : 1;
180 maxhits =>
181 sub {
182 my $h = $params->{maxhits} || 20;
183 $h =~ s/\D//g; #only digits allowed
184 return -b => $h;
187 hits_list =>
188 sub {
189 my $h = $params->{maxhits} || 20;
190 $h =~ s/\D//g; #only digits allowed
191 return -v => $h;
194 filterq =>
195 sub {
196 return -F => $params->{filterq} ? 'T' : 'F';
199 # outformat =>
200 # sub {
201 # $params->{outformat} =~ s/\D//g; #only digits allowed
202 # return -m => $params->{outformat};
203 # },
205 database =>
207 sub {
208 my $bdb = $schema->resultset("BlastDb")->find($params->{database} )
209 or die "could not find bdb with file_base '$params->{database}'";
211 my $basename = $bdb->full_file_basename;
212 #returns '/data/shared/blast/databases/genbank/nr'
213 #remember the ID of the blast db the user just blasted with
215 return -d => $basename;
218 program =>
219 sub {
220 $params->{program} =~ s/[^a-z]//g; #only lower-case letters
221 return -p => $params->{program};
225 print STDERR "BUILDING COMMAND...\n";
228 # build our command with our arg handlers
230 my @command = ('blastall');
231 foreach my $k (keys %arg_handlers) {
233 print STDERR "evaluating $k...";
234 my @x = $arg_handlers{$k}->();
235 print STDERR "component:
236 ", (join ",", @x)."\n";
237 @command = (@command, @x);
240 print STDERR "COMMAND: ".join(" ", @command);
241 print STDERR "\n";
243 # save our prefs
244 # $prefs->save;
246 # now run the blast
249 my $job;
250 eval {
251 $job = CXGN::Tools::Run->run_cluster(
252 @command,
254 temp_base => $c->config->{'cluster_shared_tempdir'},
255 queue => $c->config->{'web_cluster_queue'},
256 working_dir => $c->config->{'cluster_shared_tempdir'},
257 # don't block and wait if the cluster looks full
258 max_cluster_jobs => 1_000_000_000,
262 print STDERR "Saving job state to $seqfile.job for id ".$job->job_id()."\n";
264 $job->do_not_cleanup(1);
266 nstore( $job, $seqfile.".job" )
267 or die 'could not serialize job object';
271 if ($@) {
272 print STDERR "An error occurred! $@\n";
273 $c->stash->{rest} = { error => $@ };
275 else {
276 # write data in blast.log
277 my $blast_log_path = $c->config->{blast_log};
278 my $blast_log_fh;
279 if (-e $blast_log_path) {
280 open($blast_log_fh, ">>", $blast_log_path) || print STDERR "cannot create $blast_log_path\n";
281 } else {
282 open($blast_log_fh, ">", $blast_log_path) || print STDERR "cannot open $blast_log_path\n";
283 print $blast_log_fh "Seq_num\tDB_id\tProgram\teval\tMaxHits\tMatrix\tDate\n";
285 print $blast_log_fh "$seq_count\t".$params->{database}."\t".$params->{program}."\t".$params->{evalue}."\t".$params->{maxhits}."\t".$params->{matrix}."\t".localtime()."\n";
288 print STDERR "Passing jobid code ".(basename($jobid))."\n";
289 $c->stash->{rest} = { jobid => basename($jobid),
290 seq_count => $seq_count,
296 sub check : Path('/tools/blast/check') Args(1) {
297 my $self = shift;
298 my $c = shift;
299 my $jobid = shift;
301 #my $jobid =~ s/\.\.//g; # prevent hacks
302 my $job = retrieve($c->config->{cluster_shared_tempdir}."/".$jobid.".job");
304 if ( $job->alive ){
305 sleep(1);
306 $c->stash->{rest} = { status => 'running', };
307 return;
309 else {
310 # the job has finished
311 # copy the cluster temp file back into "apache space"
313 my $result_file = $self->jobid_to_file($c, $jobid.".out");
315 my $job_out_file = $job->out_file();
316 for( 1..10 ) {
317 uncache($job_out_file);
318 last if -f $job_out_file;
319 sleep 1;
322 -f $job_out_file or die "job output file ($job_out_file) doesn't exist";
323 -r $job_out_file or die "job output file ($job_out_file) not readable";
325 # You may wish to provide a different output file to send back
326 # rather than STDOUT from the job. Use the out_file_override
327 # parameter if this is the case.
328 #my $out_file = $out_file_override || $job->out_file();
329 system("ls $c->{config}->{cluster_shared_tempdir} 2>&1 >/dev/null");
330 copy($job_out_file, $result_file)
331 or die "Can't copy result file '$job_out_file' to $result_file ($!)";
333 #clean up the job tempfiles
334 $job->cleanup();
336 #also delete the job file
338 $c->stash->{rest} = { status => "complete" };
342 # fetch some html/js required for displaying the parse report
343 # sub get_prereqs : Path('/tools/blast/prereqs') Args(1) {
344 # my $self = shift;
345 # my $c = shift;
346 # my $jobid = shift;
348 # my $format=$c->req->param('format');
349 # my $parser = CXGN::Blast::Parse->new();
350 # my $prereqs = $parser->prereqs($format);
351 # $c->stash->{rest} = { prereqs => $prereqs };
354 sub get_result : Path('/tools/blast/result') Args(1) {
355 my $self = shift;
356 my $c = shift;
357 my $jobid = shift;
359 my $format = $c->req->param('format');
360 my $db_id = $c->req->param('db_id');
362 my $result_file = $self->jobid_to_file($c, $jobid.".out");
364 system("ls ".($c->config->{cluster_shared_tempdir})." 2>&1 >/dev/null");
366 my $schema = $c->dbic_schema("SGN::Schema");
367 my $db = $schema->resultset("BlastDb")->find($db_id);
368 if (!$db) { die "Can't find database with id $db_id"; }
369 my $parser = CXGN::Blast::Parse->new();
370 my $parsed_data = $parser->parse($c, $format, $result_file, $db);
372 $c->stash->{rest} = $parsed_data; # { blast_report => '<pre>'.(join("\n", read_file($parsed_file))).'</pre>', };
376 # sub blast_overview_graph : Path('/tools/blast/overview') Args(1) {
377 # my $self = shift;
378 # my $c = shift;
379 # my $jobid = shift;
384 # sub blast_coverage_graph : Path('/tools/blast/coverage') Args(1) {
385 # my $self = shift;
386 # my $c = shift;
387 # my $jobid = shift;
393 # # validate the given sequence as input for the given blast program
394 # sub validate_seq : Path('/tools/blast/validate') Args(0) {
395 # my $self = shift;
396 # my $c = shift;
397 # my $s = shift;
398 # my $program = shift;
400 # my %alphabets = (
401 # ( map { $_ => 'protein' } 'tblastn', 'blastp' ),
402 # ( map { $_ => 'DNA' } 'blastn', 'blastx', 'tblastx' ),
403 # );
405 # #my $alphabet = $alphabets{$program}
406 # #or $c->throw( message => 'invalid program!',
407 # # is_error => 1,
408 # # developer_message => "program was '$program'",
409 # # );
410 # if (!exists($alphabets{$program})) {
411 # $c->stash->{rest} = {
412 # validated => 0,
413 # error => "Invalid program '$program'. Please choose another program.",
414 # };
415 # return;
417 # my $alphabet = $alphabets{$program};
419 # if (!$s->validate_seq) { #< bioperl must think it's OK
420 # $c->stash->{rest} = {
421 # validated => 0,
422 # error => 'Not a legal sequence',
423 # };
424 # return;
427 # my %not_iupac_pats = ( DNA => qr/([^ACGTURYKMSWBDHVN]+)/i,
428 # protein => qr/([^GAVLIPFYCMHKRWSTDENQBZ\.X\*]+)/i,
429 # rna => qr/([^ACGTURYKMSWBDHVN]+)/i,
430 # );
432 # my $val_pat = $not_iupac_pats{$alphabet};
433 # if (!$val_pat) {
434 # $c->stash->{rest} = {
435 # validated => 0,
436 # error => "Invalid alphabet ($alphabet)",
437 # };
438 # return;
440 # # or $c->throw( message => 'invalid alphabet!',
441 # # is_error => 1,
442 # # developer_message => "alphabet was '$alphabet'",
443 # # );
444 # if ($s->seq =~ $val_pat) {
445 # #and $c->throw(
446 # # message => encode_entities('Sequence "'.$s->id.qq|" contains invalid $alphabet characters "$1"| ),
447 # # is_error => 0,
448 # # );
449 # $c->stash->{rest} = {
450 # validated => 1,
451 # error => encode_entities('Sequence "'.$s->id.qq|" contains invalid $alphabet characters "$1"| ),
452 # };
453 # return;
455 # $c->stash->{rest} = {
456 # validated => 1,
457 # error => '',
458 # };
461 sub jobid_to_file {
462 my $self = shift;
463 my $c = shift;
464 my $jobid = shift;
465 return File::Spec->catfile($c->config->{basepath}, $c->tempfiles_subdir('blast'), "$jobid");