1 use CatalystX
::GlobalContext
qw( $c );
4 /tools/blast/ - the entry page to the SGN blast tool
8 This page displays a text box for a query sequence, along with a
9 number of pull down menus to select (1) the dataset to match against,
10 (2) the blast program, (3) an evalue cutoff, (4) a substitution
11 matrix, (5) the number of maximum hits to show, and (6) parameters
12 affecting the output of the results.
14 Important features: the default dataset selected is
15 tomato_combined. If the user selects another dataset, this is stored
16 in the user prefs and comes up as the default later on. A specific
17 dataset can be pre-selected in a link using the db_id parameter.
19 This script is in desparate need of a decent re-factoring...
27 an id that can be preloaded from the database. Requires preload_type
32 the type of identifier to preload from the database (7 is EST, 15 is
33 unigene). Requires preload_id to be set as well.
37 a sequence to blast, automatically filled in
41 either 'simple' or 'advanced', affects the features displayed on the
46 an id of a database, which will appear pre-selected in the pulldown
53 This script was initially written by Koni Wright. Many additions and
54 changes by SGN staff. POD by Lukas.
64 use Storable qw/ retrieve nstore /;
73 use CXGN::Page::FormattingHelpers qw/page_title_html modesel info_table_html hierarchical_selectboxes_html simple_selectbox_html/;
74 use CXGN::Page::UserPrefs;
75 use CXGN::Tools::List qw/evens distinct/;
76 use CatalystX::GlobalContext '$c';
78 my $page = CXGN::Page->new("BLAST Search Interface","Evan");
79 my $dbh = CXGN::DB::Connection->new;
80 my $prefs = CXGN::Page::UserPrefs->new( $dbh );
83 @params{qw/preload_id preload_type seq interface_type db_id flush_cache/} = $page->get_encoded_arguments("preload_id","preload_type","seq","interface_type", "db_id", 'flush_cache');
85 $params{interface_type} ||= 0;
88 my $blast_path = $c->config->{'blast_path'};
89 my $blast_version = do {
90 unless( -x "$blast_path/blastall") {
94 open BP, qq{echo '>shutup' | $blast_path/blastall -p blastn 2>&1 |};
103 }; if( $EVAL_ERROR ) {
110 if ($params{preload_type}) {
111 if ($params{preload_type} == 7) {
113 my $estq = $dbh->prepare_cached(<<EOSQL);
114 SELECT COALESCE( CASE WHEN hqi_length > 0 THEN SUBSTRING(seq, hqi_start::integer+1, hqi_length::integer)
120 LEFT JOIN qc_report USING (est_id)
124 $estq->execute($params{preload_id
});
126 if ($estq->rows == 0) {
127 die("Preloaded BLAST search specified EST sequence SGN-E$params{preload_id} but it is not found in database ($params{preload_id})");
130 ($preload_seq) = $estq->fetchrow_array();
134 while((length($preload_seq) - $i) > 78) {
135 $x .= substr($preload_seq, $i, 78) . "\n";
138 $x .= substr($preload_seq, $i) . "\n";
139 $preload_seq = ">SGN-E$params{preload_id}\n$x\n";
141 } elsif ($params{preload_type
} == 15) {
143 my $memberq = $dbh->prepare_cached("SELECT nr_members FROM unigene WHERE unigene_id=?");
144 $memberq->execute($params{preload_id
});
145 if ($memberq->rows == 0) {
146 $page->error_page("Preloaded BLAST search specified unigene identifier ($params{preload_id}) which is not found");
149 my ($nr_members) = $memberq->fetchrow_array();
150 if ($nr_members > 1) {
151 ($preload_seq) = $dbh->selectrow_array(<<EOSQL,undef,$params{preload_id})
154 INNER JOIN unigene_consensi
159 ($preload_seq) = $dbh->selectrow_array(<<EOSQL,undef,$params{preload_id});
160 SELECT COALESCE( CASE WHEN hqi_length > 0
161 THEN SUBSTRING(seq, hqi_start::integer+1, hqi_length::integer)
167 LEFT JOIN unigene_member USING (unigene_id)
168 LEFT JOIN est USING (est_id)
169 LEFT JOIN qc_report USING (est_id)
170 WHERE unigene.unigene_id=?
175 while((length($preload_seq) - $i) > 78) {
176 $x .= substr($preload_seq, $i, 78) . "\n";
179 $x .= substr($preload_seq, $i) . "\n";
180 $preload_seq = ">SGN-U$params{preload_id}\n$x";
182 $page->error_page("Unknown preloaded sequence type\n");
184 } elsif ($params{seq
}) {
185 $preload_seq = $params{seq
};
190 $page->header('SGN BLAST');
191 $page->jsan_use('jquery');
193 my ($databases,$programs,$programs_js) = blast_db_prog_selects
($params{db_id
});
194 my $spellcheck_js = <<'';
195 // turn off spell check on sequence inputs without emitting invalid HTML
196 jQuery
(function
($) { $('#sequence_input').attr
('spellcheck',false
) });
201 return join '&', map "$urlencode{$_}=$urlencode{$args{$_}}", distinct evens
@_;
204 print page_title_html
("NCBI BLAST$blast_version");
205 print modesel
([ ['?'.hash2param
(%params, interface_type
=> 0),'Simple'],
206 ['?'.hash2param
(%params, interface_type
=> 1),'Advanced'],
208 $params{interface_type
},
211 #simple blast interface form
212 if($params{interface_type
} == 0) {
215 <script language="JavaScript" type="text/JavaScript" >
217 function clearField() {
218 // OK - there are three ways to clear the fields.
219 // The first is let the browser do it. Then it does not clear
220 // with preset sequences. The second way is to clear it with javascript.
221 // on reload, this will create confusion with preset sequences.
222 // The third way is simply to redirect to the empty page. Thats good
223 // because it also resets to the users preferred datatset.
224 // var i = document.getElementById("sequence_input");
226 window.location="index.pl";
231 <form method="post" action="blast_result.pl" name="blastform">
232 <input type="hidden" checked="checked" name="filterq" value="1" />
233 <input type="hidden" name="interface_type" value="simple" />
234 <input type="hidden" name="outformat" value="0" />
235 <table align="center" summary="" cellpadding="0" cellspacing="15">
236 <tr><td><b>Sequence Set</b> </td><td>$databases <a style="font-size: 80%" title="View details of each database" href="dbinfo.pl">db details</a></td></tr>
237 <tr><td><b>Program</b> </td><td>$programs</td></tr>
238 <tr><td colspan="2" align="center"><b>Query sequence</b><div style="font-size: 80%">single sequence only, use Advanced for multiple</div><textarea name="sequence" id="sequence_input" rows="8" cols="80">$preload_seq</textarea></td></tr>
240 <table width="100%"><tr>
243 .info_table_html
('Expect (e-value) Threshold' => '<input type="text" size="6" value="1e-10" name="expect" />',
244 'Substitution Matrix' => simple_selectbox_html
( name
=> 'matrix',
245 choices
=> [ [ 'BLOSUM62', 'BLOSUM62 (default)' ],
246 [ 'BLOSUM80', 'BLOSUM80 (recent divergence)' ],
247 [ 'BLOSUM45', 'BLOSUM80 (ancient divergence)' ],
252 'Max. hits to show' => '<input type="text" name="maxhits" size="6" value="100" />',
253 ' ' => '<div style="text-align: right"><input type="reset" value="Clear" onclick="clearField(); "/> <input type="submit" name="search" value="Search" /></div>',
254 'Show Graphics' => simple_selectbox_html
( name
=> 'output_graphs',
255 choices
=> [ [ 'bioperl_histogram', 'all' ],
257 [ 'bioperl_only', 'alignment summary only' ],
258 [ 'histogram_only', 'conservedness histogram only' ],
263 __tableattrs
=> 'width="100%"',
272 <script language="JavaScript" type="text/javascript">
280 #advanced blast interface form
282 my $mselect = simple_selectbox_html
( name
=> 'outformat',
283 choices
=> [ [0 => '0 - pairwise (default)'],
284 [1 => '1 - query-anchored showing identities'],
285 [2 => '2 - query-anchored no identities'],
286 [3 => '3 - flat query-anchored, show identities'],
287 [4 => '4 - flat query-anchored, no identities'],
288 [5 => '5 - query-anchored no identities and blunt ends'],
289 [6 => '6 - flat query-anchored, no identities and blunt ends'],
290 [7 => '7 - XML Blast output'],
291 [8 => '8 - tabular'],
292 [9 => '9 - tabular with comment lines'],
293 [10 => '10 - ASN, text'],
294 [11 => '11 - ASN, binary'],
299 <table style="border: 1px solid gray; padding: 1em 2em 1em 2em; background: #eeeeff;"><tr><td><img src="/documents/img/info_icon.png" border="0" style="margin-right: 1em; vertical-align: middle" /></td><td style="padding-top: 0.35em">This version of the BLAST online tool allows multiple query sequences, more control over running options, and more report formats.</td></tr></table>
300 <form method="post" action="blast_result.pl" name="blastform" enctype="multipart/form-data">
301 <input type="hidden" name="interface_type" value="advanced" />
302 <table id="blastinput" align="center" summary="" cellpadding="0" cellspacing="15">
303 <tr><td><b>Database (<tt>-d</tt>)</b> </td><td>$databases <a style="font-size: 80%" title="View details of each database" href="dbinfo.pl">db details</a></td></tr>
304 <tr><td><b>Program (<tt>-p</tt>)</b> </td><td>$programs</td></tr>
306 <td><b>Query sequences (<tt>-i</tt>)</b></td>
308 <textarea class="fix" id="sequence_input" name="sequence" rows="8" cols="65">$preload_seq</textarea><br />
309 <b>AND/OR upload multi-fasta query file</b> <input type="file" name="file" />
312 <tr><td><b>Output format (<tt>-m</tt>)</b></td>
315 <tr><td><b>Substitution Matrix (<tt>-M</tt>)</b></td>
317 <select name="matrix">
318 <option value="BLOSUM62">BLOSUM62 (default)</option>
319 <option value="BLOSUM80">BLOSUM80 (recent divergence)</option>
320 <option value="BLOSUM45">BLOSUM45 (ancient divergence)</option>
321 <option value="PAM30">PAM30</option>
322 <option value="PAM70">PAM70</option>
326 <tr><td><b>Expectation value (<tt>-e</tt>)</b> </td>
327 <td ><input type="text" size="6" value="1e-10" name="expect" /></td>
329 <tr><td><b>Max DB seqs to show hits from (<tt>-b</tt>)</b></td>
330 <td ><input type="text" name="maxhits" size="6" value="100" /></td>
332 <tr><td><b>Filter query sequence (DUST with blastn, SEG with others) (<tt>-F</tt>)</b></td>
333 <td><input type="checkbox" checked="checked" name="filterq" /></td>
337 <b>Show Graphics</b><br /><span style="font-size: 80%">not available for multiple query seqs</span>
340 <select name="output_graphs">
341 <option value="none">none</option>
342 <option value="bioperl_only">alignment summary only</option>
343 <option value="histogram_only">conservedness histogram only</option>
344 <option value="bioperl_histogram" selected="selected">all</option>
349 <tr><td align="right"><input type="reset" value="Clear" /></td><td align="center"><input type="submit" name="search" value="Submit" style="background: red; font-size: 130%" /></td></tr>
352 <script language="JavaScript" type="text/javascript">
362 ##########################################################################################################################
364 memoize
'_cached_file_modtime';
365 sub _cached_file_modtime
{
369 sub blast_db_prog_selects
{
372 my $db_choices = blast_db_choices
();
374 return '<span class="ghosted">The BLAST service is temporarily unavailable, we apologize for the inconvenience</span>'
377 # DB select box will either the db_id supplied, or what the user last selected, or the tomato combined blast db
378 my $selected_db_id = $db_id #|| $prefs->get_pref('last_blast_db_id')
380 my ($d) = map $_->blast_db_id,
381 grep _cached_file_modtime
($_),
382 grep $_->web_interface_visible,
383 CXGN
::BlastDB
->search_ilike( title
=> '%SGN Tomato Combined%' );
387 my %prog_descs = ( blastn
=> 'BLASTN (nucleotide to nucleotide)',
388 blastx
=> 'BLASTX (nucleotide to protein; query translated to protein)',
389 blastp
=> 'BLASTP (protein to protein)',
390 tblastx
=> 'TBLASTX (protein to protein; both database and query are translated)',
391 tblastn
=> 'TBLASTN (protein to nucleotide; database translated to protein)',
394 my @program_choices = map {
396 if ($db->type eq 'protein') {
397 [map [$_,$prog_descs{$_}], 'blastx','blastp']
399 [map [$_,$prog_descs{$_}], 'blastn','tblastx','tblastn']
401 } grep ref, @
$db_choices;
403 @
$db_choices = map {ref($_) ?
$_->[1] : $_} @
$db_choices;
405 return hierarchical_selectboxes_html
( parentsel
=> { name
=> 'database',
406 choices
=> $db_choices,
407 ( $selected_db_id ?
(selected
=> $selected_db_id) : () ),
409 childsel
=> { name
=> 'program' },
410 childchoices
=> \
@program_choices
414 sub blast_db_choices
{
416 my $choices_cache_filename = $c->path_to( $c->generated_file_uri('blast','choices_cache.dat') );
417 my $lockfile = "$choices_cache_filename.lock";
419 unless( $params{flush_cache
} ) {
420 my $l = File
::Flock
->new($lockfile,'shared');
421 my $cache_modtime = (stat($choices_cache_filename))[9];
423 if( $cache_modtime && $cache_modtime > time - 15*60 ) {
424 my $data = retrieve
( $choices_cache_filename );
425 return $data if $data;
429 my $l = File
::Flock
->new($lockfile);
430 my $choices = _build_blast_db_choices
();
431 nstore
( $choices, $choices_cache_filename )
432 or warn "WARNING: $! caching blast db choices in file '$choices_cache_filename'";
436 sub _build_blast_db_choices
{
440 my @db_choices = map {
441 my @dbs = map [ $_, bdb_opt
($_) ],
442 grep _cached_file_modtime
($_), #filter for dbs that are on disk
443 $_->blast_dbs( web_interface_visible
=> 't'); #get all dbs in this group
444 @dbs ?
('__'.$_->name, @dbs) : ()
445 } CXGN
::BlastDB
::Group
->search_like(name
=> '%',{order_by
=> 'ordinal, name'});
448 grep _cached_file_modtime
($_),
449 CXGN
::BlastDB
->search(
450 blast_db_group_id
=> undef,
451 web_interface_visible
=> 't',
452 { order_by
=> 'title' }
455 push @db_choices, ( '__Other',
456 map [$_,bdb_opt
($_)],
465 # my $timestamp = _cached_file_modtime($db)
467 # $timestamp = strftime(' (%F)',gmtime _cached_file_modtime($db));
468 my $seq_count = $db->sequences_count;
470 [$db->blast_db_id, $db->title ]