Merge branch 'master' into topic/pedigree_upload_fix
[sgn.git] / cgi-bin / tools / blast / index.pl
blob1ef983d3dcac5f67970fe093b3f2e6b35ef10ac7
1 use CatalystX::GlobalContext qw( $c );
3 =head1 NAME
5 THIS PAGE IS DEPRECATED AND WILL CLIENT REDIRECT TO THE REAL BLAST.
7 /tools/blast/ - the entry page to the SGN blast tool
9 =head1 DESCRIPTION
11 This page displays a text box for a query sequence, along with a
12 number of pull down menus to select (1) the dataset to match against,
13 (2) the blast program, (3) an evalue cutoff, (4) a substitution
14 matrix, (5) the number of maximum hits to show, and (6) parameters
15 affecting the output of the results.
17 Important features: the default dataset selected is
18 tomato_combined. If the user selects another dataset, this is stored
19 in the user prefs and comes up as the default later on. A specific
20 dataset can be pre-selected in a link using the db_id parameter.
22 This script is in desparate need of a decent re-factoring...
24 Parameters:
26 =over 5
28 =item preload_id
30 an id that can be preloaded from the database. Requires preload_type
31 to be set as well.
33 =item preload_type
35 the type of identifier to preload from the database (7 is EST, 15 is
36 unigene). Requires preload_id to be set as well.
38 =item seq
40 a sequence to blast, automatically filled in
42 =item interface_type
44 either 'simple' or 'advanced', affects the features displayed on the
45 page.
47 =item db_id
49 an id of a database, which will appear pre-selected in the pulldown
50 menu.
52 =back
54 =head1 AUTHORS
56 This script was initially written by Koni Wright. Many additions and
57 changes by SGN staff. POD by Lukas.
59 =cut
62 use CXGN::Page;
65 my $page = CXGN::Page->new("BLAST Search Interface","Evan");
67 $page->client_redirect("/tools/blast");
70 return;
73 use Modern::Perl;
74 use POSIX;
75 use English;
77 use Memoize;
78 use Storable qw/ retrieve nstore /;
80 use Tie::UrlEncoder;
81 our %urlencode;
83 use File::Flock;
85 use CXGN::Page;
86 use CXGN::BlastDB;
87 use CXGN::Page::FormattingHelpers qw/page_title_html modesel info_table_html hierarchical_selectboxes_html simple_selectbox_html/;
88 use CXGN::Page::UserPrefs;
89 use CXGN::Tools::List qw/evens distinct/;
90 use CatalystX::GlobalContext '$c';
92 $page = CXGN::Page->new("BLAST Search Interface","Evan");
93 my $dbh = CXGN::DB::Connection->new;
94 my $prefs = CXGN::Page::UserPrefs->new( $dbh );
96 my %params;
97 { my @p = qw/preload_id preload_type seq interface_type db_id flush_cache/;
98 @params{@p} = $page->get_encoded_arguments(@p);
101 $params{interface_type} ||= 0;
103 my $blast_path = $c->config->{'blast_path'};
104 my $blast_version = do {
105 unless( -x "$blast_path/blastall") {
107 } else {
108 eval {
109 open BP, qq{echo '>shutup' | $blast_path/blastall -p blastn 2>&1 |};
110 my $v = '';
111 while (<BP>) {
112 if (m/BLASTN (.+)/) {
113 $v = " v. $1";
114 last;
117 close BP;
118 }; if( $EVAL_ERROR ) {
124 my $preload_seq;
125 if ($params{preload_type}) {
126 if ($params{preload_type} == 7) {
128 my $estq = $dbh->prepare_cached(<<EOSQL);
129 SELECT COALESCE( CASE WHEN hqi_length > 0 THEN SUBSTRING(seq, hqi_start::integer+1, hqi_length::integer)
130 ELSE NULL
131 END,
134 FROM est
135 LEFT JOIN qc_report USING (est_id)
136 WHERE est.est_id=?
137 EOSQL
139 $estq->execute($params{preload_id});
141 if ($estq->rows == 0) {
142 die("Preloaded BLAST search specified EST sequence SGN-E$params{preload_id} but it is not found in database ($params{preload_id})");
145 ($preload_seq) = $estq->fetchrow_array();
147 my $x = "";
148 my $i = 0;
149 while((length($preload_seq) - $i) > 78) {
150 $x .= substr($preload_seq, $i, 78) . "\n";
151 $i+=78;
153 $x .= substr($preload_seq, $i) . "\n";
154 $preload_seq = ">SGN-E$params{preload_id}\n$x\n";
156 } elsif ($params{preload_type} == 15) {
158 my $memberq = $dbh->prepare_cached("SELECT nr_members FROM unigene WHERE unigene_id=?");
159 $memberq->execute($params{preload_id});
160 if ($memberq->rows == 0) {
161 $page->error_page("Preloaded BLAST search specified unigene identifier ($params{preload_id}) which is not found");
164 my ($nr_members) = $memberq->fetchrow_array();
165 if ($nr_members > 1) {
166 ($preload_seq) = $dbh->selectrow_array(<<EOSQL,undef,$params{preload_id})
167 SELECT seq
168 FROM unigene
169 INNER JOIN unigene_consensi
170 USING (consensi_id)
171 WHERE unigene_id=?
172 EOSQL
173 } else {
174 ($preload_seq) = $dbh->selectrow_array(<<EOSQL,undef,$params{preload_id});
175 SELECT COALESCE( CASE WHEN hqi_length > 0
176 THEN SUBSTRING(seq, hqi_start::integer+1, hqi_length::integer)
177 ELSE NULL
178 END,
181 FROM unigene
182 LEFT JOIN unigene_member USING (unigene_id)
183 LEFT JOIN est USING (est_id)
184 LEFT JOIN qc_report USING (est_id)
185 WHERE unigene.unigene_id=?
186 EOSQL
188 my $x = "";
189 my $i = 0;
190 while((length($preload_seq) - $i) > 78) {
191 $x .= substr($preload_seq, $i, 78) . "\n";
192 $i+=78;
194 $x .= substr($preload_seq, $i) . "\n";
195 $preload_seq = ">SGN-U$params{preload_id}\n$x";
196 } else {
197 $page->error_page("Unknown preloaded sequence type\n");
199 } elsif ($params{seq}) {
200 $preload_seq = $params{seq};
201 } else {
202 $preload_seq = "";
205 $page->header('SGN BLAST');
206 $page->jsan_use('jquery');
208 my ($databases,$programs,$programs_js) = blast_db_prog_selects( $params{db_id}, $params{flush_cache}, $prefs );
209 my $spellcheck_js = <<'';
210 // turn off spell check on sequence inputs without emitting invalid HTML
211 jQuery(function($) { $('#sequence_input').attr('spellcheck',false) });
214 sub hash2param {
215 my %args = @_;
216 no warnings 'uninitialized';
217 return join '&', map "$urlencode{$_}=$urlencode{$args{$_}}", distinct evens @_;
220 print page_title_html("NCBI BLAST$blast_version");
222 print <<HTML;
224 <div class="boxbgcolor5">This is the old BLAST interface. The new interface is available <a href="/tools/blast">here</a>.
225 </div><br />
227 HTML
229 print modesel([ ['?'.hash2param(%params, interface_type => 0),'Simple'],
230 ['?'.hash2param(%params, interface_type => 1),'Advanced'],
232 $params{interface_type},
235 #simple blast interface form
236 if($params{interface_type} == 0) {
237 print <<EOF
239 <script language="JavaScript" type="text/JavaScript" >
241 function clearField() {
242 // OK - there are three ways to clear the fields.
243 // The first is let the browser do it. Then it does not clear
244 // with preset sequences. The second way is to clear it with javascript.
245 // on reload, this will create confusion with preset sequences.
246 // The third way is simply to redirect to the empty page. Thats good
247 // because it also resets to the users preferred datatset.
248 // var i = document.getElementById("sequence_input");
249 // i.innerHTML='';
250 window.location="index.pl";
253 </script>
255 <form method="post" action="blast_result.pl" name="blastform">
256 <input type="hidden" checked="checked" name="filterq" value="1" />
257 <input type="hidden" name="interface_type" value="simple" />
258 <input type="hidden" name="outformat" value="0" />
259 <table align="center" summary="" cellpadding="0" cellspacing="15">
260 <tr><td><b>Sequence Set</b> </td><td>$databases <a style="font-size: 80%" title="View details of each database" href="dbinfo.pl">db details</a></td></tr>
261 <tr><td><b>Program</b> </td><td>$programs</td></tr>
262 <tr><td colspan="2" align="center"><b>Query sequence</b><div style="font-size: 80%">single sequence only, use Advanced for multiple</div><textarea name="sequence" id="sequence_input" rows="8" cols="80">$preload_seq</textarea></td></tr>
263 <tr><td colspan="2">
264 <table width="100%"><tr>
265 <td>
267 .info_table_html('Expect (e-value) Threshold' => '<input type="text" size="6" value="1e-10" name="expect" />',
268 'Substitution Matrix' => simple_selectbox_html( name => 'matrix',
269 choices => [ [ 'BLOSUM62', 'BLOSUM62 (default)' ],
270 [ 'BLOSUM80', 'BLOSUM80 (recent divergence)' ],
271 [ 'BLOSUM45', 'BLOSUM45 (ancient divergence)' ],
272 'PAM30',
273 'PAM70',
276 'Max. hits to show' => '<input type="text" name="maxhits" size="6" value="100" />',
277 ' ' => '<div style="text-align: right"><input type="reset" value="Clear" onclick="clearField(); "/> <input type="submit" name="search" value="Search" /></div>',
278 'Show Graphics' => simple_selectbox_html( name => 'output_graphs',
279 choices => [ [ 'bioperl_histogram', 'all' ],
280 'none',
281 [ 'bioperl_only', 'alignment summary only' ],
282 [ 'histogram_only', 'conservedness histogram only' ],
285 __multicol => 2,
286 __border => 0,
287 __tableattrs => 'width="100%"',
289 .<<EOF
290 </td>
291 </tr>
292 </table>
293 </td></tr>
294 </table>
295 </form>
296 <script language="JavaScript" type="text/javascript">
297 $programs_js
298 $spellcheck_js
299 </script>
304 #advanced blast interface form
305 else {
306 my $mselect = simple_selectbox_html( name => 'outformat',
307 choices => [ [0 => '0 - pairwise (default)'],
308 [1 => '1 - query-anchored showing identities'],
309 [2 => '2 - query-anchored no identities'],
310 [3 => '3 - flat query-anchored, show identities'],
311 [4 => '4 - flat query-anchored, no identities'],
312 [5 => '5 - query-anchored no identities and blunt ends'],
313 [6 => '6 - flat query-anchored, no identities and blunt ends'],
314 [7 => '7 - XML Blast output'],
315 [8 => '8 - tabular'],
316 [9 => '9 - tabular with comment lines'],
317 [10 => '10 - ASN, text'],
318 [11 => '11 - ASN, binary'],
322 print <<EOF;
323 <table style="border: 1px solid gray; padding: 1em 2em 1em 2em; background: #eeeeff;"><tr><td><img src="/documents/img/info_icon.png" border="0" style="margin-right: 1em; vertical-align: middle" /></td><td style="padding-top: 0.35em">This version of the BLAST online tool allows multiple query sequences, more control over running options, and more report formats.</td></tr></table>
324 <form method="post" action="blast_result.pl" name="blastform" enctype="multipart/form-data">
325 <input type="hidden" name="interface_type" value="advanced" />
326 <table id="blastinput" align="center" summary="" cellpadding="0" cellspacing="15">
327 <tr><td><b>Database (<tt>-d</tt>)</b> </td><td>$databases <a style="font-size: 80%" title="View details of each database" href="dbinfo.pl">db details</a></td></tr>
328 <tr><td><b>Program (<tt>-p</tt>)</b> </td><td>$programs</td></tr>
329 <tr>
330 <td><b>Query sequences (<tt>-i</tt>)</b></td>
331 <td >
332 <textarea class="fix" id="sequence_input" name="sequence" rows="8" cols="65">$preload_seq</textarea><br />
333 <b>AND/OR upload multi-fasta query file</b> <input type="file" name="file" />
334 </td>
335 </tr>
336 <tr><td><b>Output format (<tt>-m</tt>)</b></td>
337 <td>$mselect</td>
338 </tr>
339 <tr><td><b>Substitution Matrix (<tt>-M</tt>)</b></td>
340 <td >
341 <select name="matrix">
342 <option value="BLOSUM62">BLOSUM62 (default)</option>
343 <option value="BLOSUM80">BLOSUM80 (recent divergence)</option>
344 <option value="BLOSUM45">BLOSUM45 (ancient divergence)</option>
345 <option value="PAM30">PAM30</option>
346 <option value="PAM70">PAM70</option>
347 </select>
348 </td>
349 </tr>
350 <tr><td><b>Expectation value (<tt>-e</tt>)</b> </td>
351 <td ><input type="text" size="6" value="1e-10" name="expect" /></td>
352 </tr>
353 <tr><td><b>Max DB seqs to show hits from (<tt>-b</tt>)</b></td>
354 <td ><input type="text" name="maxhits" size="6" value="100" /></td>
355 </tr>
356 <tr><td><b>Filter query sequence (DUST with blastn, SEG with others) (<tt>-F</tt>)</b></td>
357 <td><input type="checkbox" checked="checked" name="filterq" /></td>
358 </tr>
359 <tr>
360 <td>
361 <b>Show Graphics</b><br /><span style="font-size: 80%">not available for multiple query seqs</span>
362 </td>
363 <td>
364 <select name="output_graphs">
365 <option value="none">none</option>
366 <option value="bioperl_only">alignment summary only</option>
367 <option value="histogram_only">conservedness histogram only</option>
368 <option value="bioperl_histogram" selected="selected">all</option>
369 </select>
370 </td>
371 </tr>
373 <tr><td align="right"><input type="reset" value="Clear" /></td><td align="center"><input type="submit" name="search" value="Submit" style="background: red; font-size: 130%" /></td></tr>
374 </table>
375 </form>
376 <script language="JavaScript" type="text/javascript">
377 $programs_js
378 $spellcheck_js
379 </script>
384 $page->footer();
386 ##########################################################################################################################
388 memoize '_cached_file_modtime';
389 sub _cached_file_modtime {
390 shift->file_modtime
393 sub blast_db_prog_selects {
394 my ( $db_id, $flush_cache, $prefs ) = @_;
396 my $db_choices = blast_db_choices( $flush_cache );
398 return '<span class="ghosted">The BLAST service is temporarily unavailable, we apologize for the inconvenience</span>'
399 unless @$db_choices;
401 # DB select box will either the db_id supplied, or what the user last selected, or the tomato combined blast db
402 my $selected_db_id = $db_id || $prefs->get_pref('last_blast_db_id')
403 || do {
404 my ($d) = map $_->blast_db_id,
405 grep _cached_file_modtime($_),
406 grep $_->web_interface_visible,
407 CXGN::BlastDB->search_ilike( title => '%SGN Tomato Combined%' );
411 my %prog_descs = ( blastn => 'BLASTN (nucleotide to nucleotide)',
412 blastx => 'BLASTX (nucleotide to protein; query translated to protein)',
413 blastp => 'BLASTP (protein to protein)',
414 tblastx => 'TBLASTX (protein to protein; both database and query are translated)',
415 tblastn => 'TBLASTN (protein to nucleotide; database translated to protein)',
418 my @program_choices = map {
419 my ($db) = @$_;
420 if ($db->type eq 'protein') {
421 [map [$_,$prog_descs{$_}], 'blastx','blastp']
422 } else {
423 [map [$_,$prog_descs{$_}], 'blastn','tblastx','tblastn']
425 } grep ref, @$db_choices;
427 @$db_choices = map {ref($_) ? $_->[1] : $_} @$db_choices;
429 return hierarchical_selectboxes_html( parentsel => { name => 'database',
430 choices => $db_choices,
431 ( $selected_db_id ? (selected => $selected_db_id) : () ),
433 childsel => { name => 'program' },
434 childchoices => \@program_choices
438 sub blast_db_choices {
439 my ( $flush_cache ) = @_;
441 my $choices_cache_filename = $c->path_to( $c->generated_file_uri('blast','choices_cache.dat') );
442 my $lockfile = "$choices_cache_filename.lock";
444 unless( $flush_cache ) {
445 my $l = File::Flock->new($lockfile,'shared');
446 my $cache_modtime = (stat($choices_cache_filename))[9];
448 if( $cache_modtime && $cache_modtime > time - 15*60 ) {
449 my $data = retrieve( $choices_cache_filename );
450 return $data if $data;
454 my $l = File::Flock->new($lockfile);
455 my $choices = _build_blast_db_choices();
456 nstore( $choices, $choices_cache_filename )
457 or warn "WARNING: $! caching blast db choices in file '$choices_cache_filename'";
458 return $choices;
461 sub _build_blast_db_choices {
463 sleep 5;
465 my @db_choices = map {
466 my @dbs = map [ $_, bdb_opt($_) ],
467 grep _cached_file_modtime($_), #filter for dbs that are on disk
468 $_->blast_dbs( web_interface_visible => 't'); #get all dbs in this group
469 @dbs ? ('__'.$_->name, @dbs) : ()
470 } CXGN::BlastDB::Group->search_like(name => '%',{order_by => 'ordinal, name'});
472 my @ungrouped_dbs =
473 grep _cached_file_modtime($_),
474 CXGN::BlastDB->search(
475 blast_db_group_id => undef,
476 web_interface_visible => 't',
477 { order_by => 'title' }
480 push @db_choices, ( '__Other',
481 map [$_,bdb_opt($_)],
482 @ungrouped_dbs
485 return \@db_choices;
488 sub bdb_opt {
489 my $db = shift;
490 # my $timestamp = _cached_file_modtime($db)
491 # or return;
492 # $timestamp = strftime(' &nbsp;(%F)',gmtime _cached_file_modtime($db));
493 my $seq_count = $db->sequences_count;
495 [$db->blast_db_id, $db->title ]