bug fix. Now it tests correctly if either of the tests evaluate to false or not
[sgn.git] / cgi-bin / search / est.pl
blob36ad105e3bff6991c5dfe22e7c2d06a2a6564384
1 #!/usr/bin/perl -w
2 use strict;
3 use warnings;
4 use CXGN::Page;
5 use CXGN::Page::FormattingHelpers qw/blue_section_html page_title_html html_break_string/;
6 use CXGN::Chromatogram;
7 use URI::Escape;
8 use CatalystX::GlobalContext '$c';
9 use CXGN::DB::Connection;
11 my %current_unigene = ();
12 my %previous_unigene = ();
14 our ($auto_idq, $request_typeq, $cloneq, $clone_read_idq,
15 $clone_groupq, $alt_readq, $arrayq, $readq, $estq, $unigeneq,
16 $by_clone_idq, $by_read_idq, $mspot_cloneq, $mid_cloneq, $table,
17 $h_cgq, $h_cq, $h_traceq, $h_estq, $h_unigeneq, $randomq,
18 $max_estid, $blastq, $microarray_byunigeneq, $try_clone_groupq,
19 $marker_mappingq, $mapped_memberq, $trace_nameq, $clone_nameq,
20 @known_request_from_types);
23 our $page = CXGN::Page->new( "SGN EST Search Result", "Koni");
25 my $dbh = CXGN::DB::Connection->new('sgn');
27 $auto_idq = $dbh->prepare_cached("SELECT internal_id, internal_id_type, t1.comment, t2.comment from id_linkage as il LEFT OUTER JOIN types as t1 ON (t1.type_id=il.internal_id_type) LEFT OUTER JOIN types as t2 ON (t2.type_id=il.link_id_type) where il.link_id=?");
29 $request_typeq = $dbh->prepare_cached("SELECT comment from types where type_id=?");
31 $cloneq = $dbh->prepare_cached("SELECT c.clone_name, c.clone_group_id, l.library_shortname, l.tissue, l.development_stage, l.order_routing_id, o.organism_id, o.organism_name from clone as c, library as l, organism as o where c.clone_id=? and c.library_id=l.library_id and l.organism_id=o.organism_id");
33 $clone_read_idq = $dbh->prepare_cached(<<EOS);
34 SELECT r.clone_id,
35 e.read_id
36 FROM seqread as r,
37 est as e
38 WHERE e.est_id=?
39 AND e.read_id=r.read_id
40 EOS
42 $try_clone_groupq = $dbh->prepare_cached("SELECT c2.clone_id, c2.clone_name from clone as c1 LEFT JOIN clone as c2 ON (c1.clone_group_id=c2.clone_group_id) where c1.clone_id=?");
44 $clone_groupq = $dbh->prepare_cached("SELECT clone_id, clone_name from clone where clone_group_id=?");
46 $alt_readq = $dbh->prepare_cached("SELECT clone.clone_name, seqread.read_id, direction, facility_shortname, est_id from clone LEFT JOIN seqread USING (clone_id) LEFT JOIN facility USING (facility_id) LEFT JOIN est ON (seqread.read_id=est.read_id) where clone.clone_id=? and (est.status=0 and est.flags=0)");
48 $arrayq = $dbh->prepare_cached("SELECT chip_name, version, release, spot_id from microarray where clone_id=?");
50 $readq = $dbh->prepare_cached("SELECT r.trace_name, r.direction, f.facility_shortname, f.funding_agency, f.attribution_display, s.name from seqread as r LEFT OUTER JOIN facility as f USING (facility_id) LEFT OUTER JOIN clone ON (r.clone_id=clone.clone_id) LEFT OUTER JOIN library USING (library_id) LEFT OUTER JOIN submit_user as s ON (library.submit_user_id=s.submit_user_id) where r.read_id=?");
52 $estq = $dbh->prepare_cached("SELECT est.basecaller, version, seq, status, flags, hqi_start, hqi_length, entropy, expected_error, quality_trim_threshold, vs_status from est LEFT JOIN qc_report USING (est_id) where est.est_id=?");
54 $unigeneq = $dbh->prepare_cached(<<EOS);
55 SELECT unigene_member.unigene_id,
56 unigene_build.unigene_build_id,
57 groups.comment,
58 build_nr,
59 build_date,
60 nr_members,
61 est.est_id
62 FROM est LEFT JOIN unigene_member USING (est_id)
63 LEFT JOIN unigene USING (unigene_id)
64 LEFT JOIN unigene_build USING (unigene_build_id)
65 LEFT JOIN groups ON (organism_group_id=groups.group_id)
66 WHERE est.est_id=?
67 AND unigene_build.status=?
68 EOS
70 $by_clone_idq = $dbh->prepare_cached("SELECT read_id, direction, facility_id, date from seqread where clone_id=?");
72 $by_read_idq = $dbh->prepare_cached("SELECT est_id, version from est where read_id=?");
73 $mspot_cloneq = $dbh->prepare_cached("SELECT clone_id from microarray where release=? and version=? and spot_id=?");
74 $mid_cloneq = $dbh->prepare_cached("SELECT clone_id from microarray where microarray_id=?");
76 $h_cgq = $dbh->prepare_cached("SELECT clone_group_id, clone_name
77 FROM clone where clone_id=?");
79 $h_cq = $dbh->prepare_cached("SELECT clone_id, clone_name from clone where clone_group_id=? and clone_id<>?");
81 $h_traceq = $dbh->prepare_cached("SELECT read_id, trace_name FROM seqread where
82 clone_id=?");
83 $h_estq = $dbh->prepare_cached("SELECT est_id, version FROM est where read_id=?");
85 $h_unigeneq = $dbh->prepare_cached(<<EOS);
86 SELECT unigene.unigene_id,
87 unigene_build.build_nr,
88 groups.comment
89 FROM unigene_member
90 LEFT JOIN unigene ON (unigene_member.unigene_id=unigene.unigene_id)
91 LEFT JOIN unigene_build ON (unigene.unigene_build_id=unigene_build.unigene_build_id)
92 LEFT JOIN groups ON (unigene_build.organism_group_id=groups.group_id)
93 WHERE unigene_member.est_id=?
94 AND unigene_build.status IS NOT NULL
95 AND unigene_build.status <> 'D'
96 EOS
98 $blastq = $dbh->prepare_cached("SELECT db_name, blast_program, hits_stored from blast_annotations LEFT JOIN blast_targets USING (blast_target_id) where apply_id=? and apply_type=15");
100 #$randomq = $dbh->prepare_cached("SELECT est_id from est where status=0 and flags=0 order by random() limit 1000");
102 $microarray_byunigeneq = $dbh->prepare_cached("select est.est_id from unigene LEFT JOIN unigene_member USING (unigene_id) LEFT JOIN est USING (est_id) LEFT JOIN seqread using (read_id) LEFT JOIN clone using (clone_id) INNER JOIN microarray using (clone_id) where unigene.unigene_id=? order by clone.clone_id");
104 $marker_mappingq = $dbh->prepare_cached("select marker_id, alias from marker_alias where marker_id in (select marker_id from marker_derived_from where derived_from_source_id = 1 and id_in_source = ?) and preferred is true");
106 $mapped_memberq = $dbh->prepare_cached("select ests_mapped_by_clone.clone_id from unigene_member INNER JOIN est USING (est_id) INNER JOIN seqread USING (read_id) INNER JOIN ests_mapped_by_clone USING (clone_id) where unigene_id=?");
108 $trace_nameq = $dbh->prepare_cached("select read_id from seqread where trace_name=?");
110 $clone_nameq = $dbh->prepare_cached("select clone_id from clone where clone_name=?");
112 @known_request_from_types = ( "web user", "SGN database generated link", "SGN BLAST generated link","Random Selection" );
114 # This is used commonly when building the HTML below, saves having to
115 # type (or read) these commonly desired settings
116 $table = 'table cellspacing="0" cellpadding="0" border="0" width="100%"';
118 my ($request_id, $request_type, $request_from, $show_hierarchy, $random) =
119 $page->get_arguments("request_id","request_type","request_from",
120 "show_hierarchy","random");
122 # If the identifier is not given, or the identifier parameter is
123 # screwed up, we want to give the right error instead of failing
124 # later on for an unrelated reason.
125 $page->message_page("No EST identifier specified") unless $request_id || ($random eq 'yes');
128 if ($random eq "yes") {
129 $request_type=7;
130 $request_from=3;
131 ($request_id) = $dbh->selectrow_array("select est_id from est where status=0 and flags=0 order by random() limit 1");
135 if ($request_id eq "" || $request_type eq "") {
136 if ($request_from==1 || $request_from==2) {
137 $page->error_page("Invalid Direct Search from SGN-generated URL. Requested \"$request_id\" set type \"$request_type\"");
138 } else {
139 invalid_search($page);
143 if ($request_from<0 || $request_from>$#known_request_from_types) {
144 $request_from = "external (unknown - unsupported)";
145 } else {
146 $request_from = $known_request_from_types[$request_from];
149 my ($id, $id_type) = ($request_id, $request_type);
153 # If a user-entered generic identifier has been entered, take it to the
154 # id linkage table for resolution to an SGN internal identifier and type
155 my ($id_type_name, $link_id_type_name);
156 if ($id_type eq "automatic") {
158 # Check for internal types. These will not be in the id_linkage table
159 if ($id =~ m/^SGN[|-]C([0-9]+)$/i) {
160 $id_type = 8;
161 $id_type_name = "SGN Clone Identifier";
162 $id = $1;
163 } elsif ($id =~ m/^SGN[|-]T([0-9]+)$/i) {
164 $id_type = 9;
165 $id_type_name = "SGN Chromatogram Identifer";
166 $id = $1;
167 } elsif ($id =~ m/^SGN[|-]E([0-9]+)$/i) {
168 $id_type = 7;
169 $id_type_name = "SGN EST Identifier";
170 $id = $1;
171 } else {
172 # OK, try the id_linkage table then
173 if ($request_id =~ m/^([A-Z]{3,4})([0-9]+)([A-P][0-9]{1,2})$/i) {
174 $request_id = "$1-$2-$3";
176 $auto_idq->execute($request_id);
177 if ($auto_idq->rows == 0) {
178 not_found($page, "Identifier \"$id\" was not found in SGN's databases.");
181 if ($auto_idq->rows > 1) {
182 ($id) = $auto_idq->fetchrow_array();
183 show_list($page, $id);
186 ($id, $id_type, $id_type_name, $link_id_type_name) =
187 $auto_idq->fetchrow_array();
189 } else {
190 if ($id_type =~ m/\d+/) {
191 $request_typeq->execute($id_type);
192 ($id_type_name) = $request_typeq->fetchrow_array();
197 # Resolve to an EST identifier, either the internal id found above,
198 # or the direct reference from an SGN-generated link
199 my $est_id = "";
200 my $match_id = "";
201 if ($id_type == 7) {
202 # This is an EST identifier already
203 invalid_search($page, "Invalid identifier \"$id\" for specified identifier type (SGN-E#)") if ($id !~ m/^(SGN-E|E|)([0-9]+)$/);
204 $match_id = "SGN-E$2";
205 $est_id = $2;
207 } elsif ($id_type == 8) {
208 invalid_search($page, "Invalid identifier \"$id\" for specified identifier type (SGN-C#)") if ($id !~ m/^(SGN-C|C|)([0-9]+)$/);
209 # This is a clone internal identifier - find a trace, then find an EST
210 if ($show_hierarchy) {
211 hierarchy_requested($page, $id, $id_type);
213 $match_id = "SGN-C$2";
214 $est_id = by_clone($page, $2);
215 } elsif ($id_type == 9) {
216 # This is a trace identifier, find the most recent sequence for it
217 invalid_search($page, "Invalid identifier \"$id\" for specified identifier type (SGN-T#)") if ($id !~ m/^(SGN-T|T|)([0-9]+)$/);
218 $match_id = "SGN-T$2";
219 $est_id = by_read($page, $2);
220 } elsif ($id_type == 10) {
221 if ($request_id =~ m/^([A-Z]{3,4})([0-9]+)([A-P][0-9]{1,2})$/i) {
222 $request_id = "$1-$2-$3";
224 $clone_nameq->execute($request_id);
225 if ($clone_nameq->rows == 0) {
226 not_found($page, "Identifier \"$request_id\" was not found in SGN's databases.");
228 ($match_id) = $clone_nameq->fetchrow_array();
229 $est_id = by_clone($page, $match_id);
230 } elsif ($id_type == 11) {
231 $trace_nameq->execute($request_id);
232 if ($trace_nameq->rows == 0) {
233 not_found($page, "Identifier \"$request_id\" was not found in SGN's databases.");
235 ($match_id) = $trace_nameq->fetchrow_array();
236 $est_id = by_read($page, $match_id);
237 } elsif ($id_type == 14) {
238 # This is a microarray identifier.
239 invalid_search($page, "Invalid identifier \"$id\" for specified identifier type (SGN/CGEP/TMD Microarray Spot Identifier)") if ($id !~ m/^(SGN-S|S|)([0-9]+-[0-9]+-[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/);
240 $match_id = "SGN-S$2";
241 $est_id = by_mspot($page, $2);
244 # This is an EST search result page, we must now have an internal EST
245 # identifier (est_id in table est) to continue
246 if ($est_id eq "") {
247 not_traceable_to_est($page, $request_id, $id, $id_type);
250 # Now that we have an EST identifier, try to find a chromatogram that
251 # it is linked to, and a clone that is linked to that chromatogram.
253 # NOTE: This step may go backwards (unnecessarily) from the above
254 # Fetch out the clone_id and read_id for this EST.
255 my ($clone_id, $read_id);
256 $clone_read_idq->execute($est_id);
257 ($clone_id, $read_id) = $clone_read_idq->fetchrow_array();
259 # Section containing information on what was searched for and what was found
260 my $search_info = <<EOF;
261 <$table>
262 <tr><td width="50%"><b>Request:</b> $request_id</td><td><b>Match:</b> $match_id</td></tr>
263 <tr><td><b>Request From:</b> $request_from</td><td><b>Match Type:</b> $id_type_name</td></tr>
264 </table>
268 # Look up clone info and library information
269 my $clone_info = "";
270 my $clone_name = "";
271 my @clone_group = ();
273 my $organism_id = "";
274 if ($clone_id ne "" && $cloneq->execute($clone_id) && $cloneq->rows>0) {
275 my ($clone_name, $clone_group_id, $library_name, $tissue,
276 $development_stage, $order_routing_id, $organism_id, $organism_name) =
277 $cloneq->fetchrow_array();
279 # If there are nulls in the table we'll get undefs for these values, so we
280 # set displayed values here.
281 $organism_name = "Unknown" if !defined($organism_name);
282 $tissue = "Unknown" if !defined($tissue);
284 # If we have a clone group, seek out the replicate clone_ids so we can
285 # search them below for alternative reads
286 if ($clone_group_id) {
287 $clone_groupq->execute($clone_group_id);
288 while(my ($cid, $cname) = $clone_groupq->fetchrow_array()) {
289 push @clone_group, [ $cid, $cname ];
291 } else {
292 # If there is no clone group in the database, make a fake singleton
293 # group here for ease below
294 @clone_group = ([ $clone_id, $clone_name ]);
297 # Microarray Information
298 my $microarray = "";
299 if ($organism_id == 1 || $organism_id == 2 || $organism_id == 3) {
300 foreach ( @clone_group ) {
301 $arrayq->execute($_->[0]);
302 my ($chip_name, $version, $release, $spot_id);
303 while (($chip_name, $version, $release, $spot_id) =
304 $arrayq->fetchrow_array) {
305 if ($_->[0] != $clone_id) {
306 $microarray .= qq{Alias clone <a href="/search/est.pl?request_id=$_->[0]&request_type=8&request_from=1">SGN-C$_->[0]</a> is on microarray $chip_name: SGN-S$version-$release-$spot_id\n};
307 } else {
308 $microarray .= qq{SGN-C$_->[0] is on microarray $chip_name spot ID $version-$release-$spot_id [<a href="http://bti.cornell.edu/CGEP/CGEP.html">Order</a>] [<a href="http://ted.bti.cornell.edu/cgi-bin/array/basicsearch.cgi?arrayID=$version-$release-$spot_id">Tomato Microarray Database</a>]\n};
312 $microarray ||= '<span class="ghosted">This clone is not found on any microarray</span>';
313 $microarray = "<b>Microarray:</b> $microarray<br />";
316 my $ordering;
317 if ($order_routing_id) {
318 $ordering = qq{<table cellpadding="1" cellspacing="0" border="0"><tr><td><a href="/search/clone-order.pl?add_clone=$clone_name"><img src="/documents/img/sgn-cart.gif" border="0" alt="cart" /></a></td><td>Order Clone</td></tr></table>};
319 } else {
320 $ordering = qq{<table cellpadding="1" cellspacing="0" border="0"><tr><td><img src="/documents/img/sgn-nocart.gif" border="0" alt="nocart" /></td><td><span class="ghosted">Ordering Not Available</span></td></tr></table>};
323 # Section with information about the selected clone
324 # Note: The closing table & outer table cell/row tags are omitted here, because
325 # "the group" requested that the EST search result page show whether or
326 # not a current unigene contains a microarray'd clone. This query
327 # is not possible until we get the unigene section, so we'll have some
328 # more things to add to the Clone section later.
330 # So much for organizing the script into little sections.
331 $clone_info = <<EOF;
332 <$table>
333 <tr><td width="40%"><b>SGN ID:</b> SGN-C$clone_id</td><td><b>Clone name:</b> $clone_name</td><td width="20%" rowspan="2">$ordering</td></tr>
334 <tr><td><b>Library Name:</b> $library_name</td><td><b>Organism:</b> $organism_name</td></tr>
335 </table>
336 <br />
337 <b>Tissue:</b> $tissue<br />
338 <b>Development Stage:</b> $development_stage<br />
339 <br />
340 $microarray
343 } else {
344 # no clone information was found
345 $clone_info = '<div><span class="ghosted">No clone information found</span></div>';
346 $clone_name = "clone name unknown";
349 # Search for information on the chromatogram
350 my $read_info = "";
351 my $seqdir = "";
352 if ($read_id && $readq->execute($read_id) && $readq->rows>0) {
353 $readq->execute($read_id);
355 my ($chroma_name, $facility_name, $submitter_name, $funding_agency,
356 $atb_organization, $atb_display);
357 ($chroma_name, $seqdir, $facility_name, $atb_organization, $atb_display,
358 $submitter_name) = $readq->fetchrow_array();
360 if (!defined($seqdir)) {
361 $seqdir = "Unknown";
362 } elsif ($seqdir eq "5") {
363 $seqdir = "5'";
364 } elsif ($seqdir eq "3") {
365 $seqdir = "3'";
366 } else {
367 $seqdir = "Unknown";
370 $submitter_name = '<span class="ghosted">None</span>' unless $submitter_name;
371 $atb_display ||= $atb_organization;
372 if ($atb_display) {
373 $atb_display = "<b>Funding Organization:</b>&nbsp;$atb_display";
376 my $view_link='[<span class="ghosted">View</span>]';
377 # my $view_link=" [<a href=\"/tools/trace_view.pl?read_id=$read_id&est_id=$est_id\">View</a>]";
378 my $tmp_tracename;
379 if($tmp_tracename=CXGN::Chromatogram::has_abi_chromatogram($read_id))
381 my $path_to_remove = $c->path_to( $c->tempfiles_subdir('traceimages') );
382 $tmp_tracename=~s/$path_to_remove//;
383 my $file=URI::Escape::uri_escape("$tmp_tracename");
384 $view_link=" [<a href=\"/tools/trace_view.pl?file=$file&temp=yes\">View</a>]";
387 $read_info = <<EOF;
388 <$table>
389 <tr><td width="50%"><b>SGN-ID:</b> SGN-T$read_id [<a href="/search/trace_download.pl?read_id=$read_id">Download</a>]$view_link</td>
390 <td><b>Facility Assigned ID:</b> $chroma_name\n</td>
391 </tr>
392 <tr><td><b>Submitter:</b> $submitter_name</td>
393 <td><b>Sequencing Facility:</b> $facility_name</td>
394 </tr>
395 </table>
397 $atb_display
399 } else {
400 $read_info='<span class="ghosted">No chromatogram information found for this sequence</span>';
403 # Find alternate reads for this clone group
404 my $alt_reads = "";
405 my $see_also = "";
406 foreach ( @clone_group ) {
407 $alt_readq->execute($_->[0]);
408 my ($clone_name, $trace_id, $dir, $facility, $eid);
409 while(($clone_name, $trace_id, $dir, $facility, $eid)
410 = $alt_readq->fetchrow_array()) {
411 next if $eid==$est_id;
412 if (!defined($dir)) {
413 $dir = "Unknown";
414 } elsif ($dir eq "5") {
415 $dir = "5'";
416 } elsif ($dir eq "3") {
417 $dir = "3'";
418 } else {
419 $dir = "Unknown";
422 # This is hacked in here as an after thought, to draw the viewer's
423 # attention to the additional sequencing section.
424 if ($seqdir eq "5'" && $dir eq "3'") {
425 $see_also = "[See links to 3' reads above]";
426 } elsif ($seqdir eq "3'" && $dir eq "5'") {
427 $see_also = "[See links to 5' reads above]";
430 $facility = "Unknown" if !defined($facility);
431 $alt_reads .= <<EOF
432 <tr><td><b>Clone:</b> SGN-C$_->[0] [$clone_name]</td>
433 <td><b>Trace:</b> SGN-T$trace_id</td>
434 <td><b>EST:</b> <a href="/search/est.pl?request_id=$eid&amp;request_type=7&amp;request_from=1">SGN-E$eid</a></td>
435 <td><b>Direction:</b> $dir</td>
436 <td><b>Facility:</b> $facility</td>
437 </tr>
441 if($alt_reads) {
442 $alt_reads = "<table>$alt_reads</table>" ;
443 } else {
444 $alt_reads = '<span class="ghosted">No additional reads found.</span>';
448 $estq->execute($est_id);
449 if ($estq->rows == 0) {
450 not_found($page,"No database entry was found for EST identifier SGN-E$est_id");
452 my ($basecaller,$version,$seq,$status,$flags,$start,$length,$entropy,$expected_error,$qtrim_threshold,$vs_status) = $estq->fetchrow_array();
454 my $fasta_header;
455 my $seq_display = "";
456 my $untrim_length = length($seq);
457 my $seq_length;
458 if (defined($start) && defined($length) && ($length > 10) ) {
459 if ($flags) {
460 $fasta_header = "&gt;SGN-E$est_id [$clone_name] (trimmed - flagged)";
461 } else {
462 $fasta_header = "&gt;SGN-E$est_id [$clone_name] (trimmed)";
464 $seq = substr $seq,$start,$length;
465 $seq_length = qq|${length} bp <span class="ghosted">(${untrim_length} bp untrimmed)</span>|;
466 } else {
467 if ($status & 0x1) {
468 $fasta_header = ">SGN-E$est_id [$clone_name] (called/trimmed by facility)<br />";
469 $seq_length = "${untrim_length} bp (called/trimmed by facility)<br />";
470 } else {
471 $fasta_header = ">SGN-E$est_id [$clone_name] (untrimmed)<br />";
472 $seq_length = qq{<span class="ghosted">${untrim_length} bp (untrimmed)</span>};
477 $seq_display = html_break_string($seq,95);
479 my $display_status = "";
480 if ($status == 0) {
481 $display_status = "Current Version";
483 if ($status & 0x1) {
484 $display_status .= "Legacy ";
486 if ($status & 0x2) {
487 $display_status .= "Discarded ";
489 if ($status & 0x4) {
490 $display_status .= "Deprecated ";
492 if ($status & 0x8) {
493 $display_status .= "Censored ";
495 if ($status & 0x10) {
496 $display_status .= "Vector/Quality trimming not applied ";
498 if ($status & 0x20) {
499 $display_status .= "Contaminants not assessed ";
501 if ($status & 0x40) {
502 $display_status .= "Chimera not assessed ";
505 my $insert_recovery = "";
506 my $vector_signature = "";
507 if (!($status & 0x11)) {
508 $expected_error = sprintf "%7.4f",$expected_error;
509 $entropy = sprintf "%5.3f",$entropy;
512 my @vector_sig_strings =
513 ("5' sequence read -- flanking 3' vector arm detected.",
514 "3' sequence read -- flanking 5' vector arm detected.",
515 "5' sequence read, incomplete (flanking vector not found)",
516 "3' sequence read, incomplete (flanking vector not found)",
517 #make these red and bold
518 (map {'<span style="color: red; font-weight: bold;">'.$_.'</span>'}
519 ('No vector sequence detected',
520 'Vector found but pattern inconsistent with a normal insert',
521 'Multiple cloning site sequence detected -- chimeric clone suspected.',
526 $vector_signature = $vector_sig_strings[$vs_status]
527 || '<span class="ghosted">Not available</span>';
530 ### render any flags as html
531 my @flag_strings = ('Vector anomaly',
532 'Possibly chimeric (anomalous insert into vector)',
533 'Too short after trimming low-quality bases',
534 'High expected error (low overall quality)',
535 'Low complexity',
536 'E. Coli (cloning host) sequence detected',
537 'rRNA contamination detected',
538 "Possibly chimeric (ends match significantly different genes in arabidopsis)",
539 "Possibly chimeric, detected by unigene assembly preclustering",
540 "Manually censored by SGN staff");
542 my $flags_display = "";
543 if ($flags == 0) {
544 $flags_display = "Passed all screens and filters";
545 } else {
546 my @flags = ();
548 my $ind = 0;
549 foreach my $str (@flag_strings) {
550 push @flags,$str if $flags & (1<<$ind++);
553 $flags_display = qq{<$table><tr><td width="10%"><b>Problems: </b></td><td>} . join("<br />",@flags) . "</td></tr></table>";
556 $insert_recovery= <<EOF;
557 <$table><tr><td width="50%"><b>Processed By:</b> SGN</td>
558 <td><b>Basecalling Software:</b> phred</td>
559 </tr>
560 </table>
561 <b>Vector Signature:</b> $vector_signature<br />
562 $flags_display
563 <table cellspacing="0" cellpadding="0" border="0" width="90%" align="center">
564 <tr><td><b>Sequence Entropy:</b> $entropy</td>
565 <td><b>Expected Error Rate:</b> $expected_error</td>
566 <td><b>Quality Trim Threshold:</b> $qtrim_threshold</td>
567 </tr>
568 </table>
570 } else {
571 $insert_recovery = "<span class=\"ghosted\">Processing information not available for this sequence</span>";
574 my $sequence_info = <<END_HTML;
575 <$table>
576 <tr><td width="50%"><b>Sequence Id:</b> SGN-E$est_id</td><td><b>Length:</b> $seq_length</td></tr>
577 <tr><td><b>Status:</b> $display_status</td><td><b>Direction:</b> $seqdir $see_also</td></tr>
578 </table>
579 <div class="sequence" style="margin: 1em">
580 $fasta_header
581 $seq_display
582 </div>
583 <center>
584 [<a href="/tools/blast/?preload_id=$est_id&amp;preload_type=7">BLAST</a>]&nbsp;&nbsp;[<a href="/tools/sixframe_translate.pl?est_id=$est_id">AA Translate</a>]
585 </center>
586 END_HTML
588 my $unigene_content;
589 my $other_estid = "";
590 my $alt_microarray = "";
591 my $alt_mapped = "";
592 my @recent = ();
593 $unigeneq->execute($est_id, "C");
594 while(my ($unigene_id, $build_id, $og_name, $build_nr, $build_date,
595 $nr_members, $eid) = $unigeneq->fetchrow_array()) {
596 # Used to indicate below that we have pulled up unigenes from other ESTs
597 # that are from the same chromatogram
598 $other_estid = $eid if ($eid != $est_id);
601 # unless(defined($organism_id)) {$organism_id=0;}
602 unless($organism_id) {$organism_id=0;}
604 if ($organism_id == 1 || $organism_id == 2 || $organism_id == 3) {
605 $microarray_byunigeneq->execute($unigene_id);
606 if ($microarray_byunigeneq->rows > 0) {
607 my $alt_microarray_found = 0;
608 while(my ($eid) = $microarray_byunigeneq->fetchrow_array()) {
609 next if ($eid == $est_id);
610 $alt_microarray_found = 1;
611 last;
613 if ($alt_microarray_found) {
614 $alt_microarray .= qq{See unigene <a href="/search/unigene.pl?unigene_id=$unigene_id">SGN-U$unigene_id</a> for alternatives which are available on a microarray<br />};
619 $marker_mappingq->execute($clone_id);
620 if ($marker_mappingq->rows > 0) {
621 while (my ($marker_id, $alias) = $marker_mappingq->fetchrow_array()){
622 $alt_mapped .= qq{This clone has been mapped as <a href="/search/markers/markerinfo.pl?marker_id=$marker_id">$alias</a>.<br />}
624 } else {
625 $mapped_memberq->execute($unigene_id);
626 if ($mapped_memberq->rows > 0) {
627 my $alt_mapped_found = 0;
628 while (my ($cid) = $mapped_memberq->fetchrow_array()) {
629 next if ($cid == $clone_id);
630 $alt_mapped_found = 1;
632 if ($alt_mapped_found) {
633 $alt_mapped .= qq{See unigene <a href="/search/unigene.pl?unigene_id=$unigene_id">SGN-U$unigene_id</a> for alternative clones/ESTs which are mapped};
637 push @recent, <<EOF;
638 <tr><td>[SGN-E$eid] </td>
639 <td><a href="/search/unigene.pl?unigene_id=$unigene_id&amp;highlight=$eid">SGN-U$unigene_id</a></td>
640 <td>$og_name </td>
641 <td>Build $build_nr </td>
642 <td>$nr_members ESTs assembled</td>
643 </tr>
647 if (!$alt_microarray &&
648 ($organism_id == 1 || $organism_id == 2 || $organism_id == 3)) {
649 $alt_microarray = '<span class="ghosted">No alternative clones from any current unigene containing this EST are available on a microarray</span>';
652 $alt_mapped ||= "<div><span class=\"ghosted\">There is no map position defined on SGN for this EST or others in the same unigene.</span></div>";
656 $unigene_content = '<table align="center" cellspacing="0" cellpadding="0" border="0" width="100%">';
658 if ($other_estid) {
659 $unigene_content .= qq{<tr><td colspan="6"><span class="ghosted">Note: Some unigenes listed here are assembled from different versions of the sequence displayed above. Note SGN-E# in left column. All versions were derived from the same chromatogram</span></td></tr><tr><td colspan="6"><br /></td></tr>};
662 if (@recent) {
663 $unigene_content .= <<EOF;
664 <tr><td colspan="6"><b>Current Unigene builds</b></td></tr>
665 @recent
666 <tr><td colspan="6" align="center"><span class="ghosted">Follow SGN-U# link for detailed information and annotations</span></td></tr>
668 } else {
669 $unigene_content .= <<EOF;
670 <tr><td align="left" colspan="6"><b>Current Unigene builds</b></td></tr>
671 <tr><td colspan="6"><span class="ghosted">No current unigene builds incorporate this sequence</td></tr>
675 $unigene_content .= "</table>";
678 $page->header();
679 print page_title_html("EST details &mdash; $match_id");
680 print blue_section_html('Search information',$search_info);
681 print blue_section_html('Clone information',<<EOH);
682 $clone_info
683 $alt_microarray
684 $alt_mapped
686 $alt_reads .= qq{<center style="margin-top: 0.8em"><a href="/search/est.pl?request_id=$clone_id&amp;request_type=8&amp;request_from=1&amp;show_hierarchy=1">[Show information hierarchy]</a></center>};
687 print blue_section_html('Additional sequencing',$alt_reads);
688 print blue_section_html('Sequence',$sequence_info);
689 print blue_section_html('Unigenes',$unigene_content);
690 print blue_section_html('Chromatogram',$read_info);
691 print blue_section_html('Quality processing',$insert_recovery);
693 $page->footer();
695 sub by_clone {
696 my ($page, $id) = @_;
698 my $read_id = "";
699 $by_clone_idq->execute($id);
700 if ($by_clone_idq->rows == 0) {
701 $try_clone_groupq->execute($id);
702 if ($try_clone_groupq->rows > 1) {
703 try_clone_group($page, $id);
705 not_found($page, "Your search resolved to a clone identifier (SGN-C$id) that was not found in SGN's databases. No alias clones were found.");
706 } elsif ($by_clone_idq->rows == 1) {
707 ($read_id) = $by_clone_idq->fetchrow_array();
708 } else {
709 my @reads = sort by_clone_sort @{$by_clone_idq->fetchall_arrayref()};
710 ($read_id) = $reads[0]->[0];
713 return by_read($page, $read_id);
716 # Hard coded layout here, as well as codes for facility ids.
717 sub by_clone_sort {
719 if ($a->[1] ne $b->[1]) {
720 # Promotes 5' over 3' reads
721 return -1 if $a->[1] eq "5";
722 return 1;
724 if ($a->[2] != $b->[2]) {
725 # Promotes TIGR and Genoscope facilities, but ignores the rest
726 return -1 if $a->[2] == 1;
727 return 1 if $b->[2] == 1;
728 return -1 if $a->[2] == 4;
729 return 1 if $b->[2] == 4;
731 # Lastly, sort by submission date
732 return $b->[2] - $a->[2];
735 sub by_read {
736 my ($page, $id) = @_;
738 $by_read_idq->execute($id);
740 if ($by_read_idq->rows == 0) {
741 not_found($page, "Your search resolved to a trace identifier (SGN-T$id) that was not found in SGN's databases");
742 } elsif ($by_read_idq->rows == 1) {
743 my ($est_id) = $by_read_idq->fetchrow_array();
744 return $est_id;
747 my @ests = sort by_read_sort @{$by_read_idq->fetchall_arrayref()};
748 return $ests[0]->[0];
751 sub by_read_sort {
752 # Sort by version
753 if ($a->[1] != $b->[1]) {
754 return $b->[1] - $a->[1];
756 # Default to identifier sort if necessary
757 return $b->[0] - $a->[0];
760 sub by_mspot {
761 my ($page, $id) = @_;
763 if ($id =~ m/([0-9]+)-([0-9])+-([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/) {
764 $mspot_cloneq->execute($1,$2,$3);
765 if ($mspot_cloneq->rows>0) {
766 my ($clone_id) = $mspot_cloneq->fetchrow_array();
767 return by_clone($page, $clone_id);
770 not_found($page, "Microarray spot identifier (SGN-S$id) was not found in SGN's databases");
771 } else {
772 $id =~ m/([0-9]+)/;
773 $mid_cloneq->execute($1);
774 if ($mid_cloneq->rows>0) {
775 my ($clone_id) = $mid_cloneq->fetchrow_array();
776 return by_clone($page, $clone_id);
779 not_found($page, "Your search was resolved to a microarray entry (SGN-S$id) that was not found in SGN's databases");
784 sub try_clone_group {
785 my ($page, $id) = @_;
787 $page->header();
789 my $clones = "";
790 while(my ($clone_id, $clone_name) = $try_clone_groupq->fetchrow_array()) {
791 $clones .= "<tr><td><a href=\"/search/est.pl?request_id=$clone_id&amp;request_from=1&amp;request_type=8\">SGN-C$clone_id</a></td><td>[$clone_name]</td></tr>";
794 print <<EOF;
795 <center>
796 <h4>Direct Search Result - Not Found</h4>
797 </center>
799 <p>Your search resolved to a clone identifier (SGN-C$id) that has no associated reads. An alias group was detected, however. See links below for other clone ids/names that are expected to be from identical stock. </p>
801 <p>This may happen for cases where clones were "rearrayed" and resequenced, but the resequenced data has not been recovered yet or the reactions failed. In this case, the rearrayed clone identifier and name will have no associated sequence, but the original clone may have usable reads available in the database.</p>
802 <br /><br />
803 <$table>$clones</table>
804 <br /><br />
807 $page->footer();
809 exit 0;
812 sub invalid_search {
813 my ($page, $message) = @_;
815 $page->header();
817 unless (defined($message)) {$message=''};
819 print <<EOF;
820 <center>
821 <h4>Not Found - Search Invalid</h4>
822 </center>
825 if ($message) {
826 print <<EOF;
827 <p>$message</p>
831 $page->footer();
833 exit (0);
836 sub not_found {
837 my ($page, $message) = @_;
839 $page->header();
841 print <<EOF;
842 <center>
843 <h4>Direct Search Result - Not Found</h4>
844 </center>
847 if ($message) {
848 print <<EOF;
849 <p>$message</p>
853 $page->footer();
855 exit(0);
858 sub not_traceable_to_est {
859 my ($page, $id) = @_;
861 $page->header();
863 print <<EOF;
864 <center>
865 <h4>Direct Search Result - Not Found</h4>
866 </center>
868 <p>The identifier $id of the specified type can not be traced to an EST in SGN\'s databases</p>
869 <br />
872 $page->footer();
874 exit(0);
877 sub show_list {
878 my ($page, $id) = @_;
880 my $content = build_tree($page, $id);
882 $page->header();
884 print "<tr><td>$content</td></tr>";
885 print "<br />\n";
886 $page->footer();
888 exit(0);
891 sub hierarchy_requested {
892 my ($page, $id, $id_type) = @_;
894 if ($id_type != 8) {
895 # shit a brick.
898 my $content = build_hierarchy($page, $id);
900 $page->header();
901 print page_title_html("Information Hierarchy for Clone $id");
903 if (!$content) {
904 print <<EOF;
905 No structure was found for identifier $id
907 $page->footer();
908 exit 0;
911 print blue_section_html('Information Hierarchy',$content);
912 $page->footer();
914 exit 0;
917 sub build_hierarchy {
918 my ($page, $clone_id) = @_;
919 my $table = 'table cellpadding="0" cellspacing="1" border="0"';
921 $h_cgq->execute($clone_id);
922 return "" if ($h_cgq->rows == 0);
924 my ($clone_group, $clone_name) = $h_cgq->fetchrow_array();
927 my @clones = ();
928 $clones[0] = [ $clone_id, $clone_name ];
930 if ($clone_group) {
931 $h_cq->execute($clone_group, $clone_id);
932 while(@_ = $h_cq->fetchrow_array()) {
933 push @clones, [ @_ ];
938 my $flat = "";
940 foreach my $clone ( @clones ) {
941 # Lookup the reads
942 $flat .= "Clone SGN-C$clone->[0] - $clone->[1]\n";
944 my ($read_id, $trace_name);
945 $h_traceq->execute($clone->[0]);
946 $h_traceq->bind_columns(\$read_id, \$trace_name);
947 while($h_traceq->fetch()) {
948 $flat .= " "x4 . "Chromatogram SGN-T$read_id - $trace_name\n";
950 my ($est_id, $version);
951 $h_estq->execute($read_id);
952 $h_estq->bind_columns(\$est_id, \$version);
953 while($h_estq->fetch()) {
954 $flat .= " "x8 . "Processed EST <a href=\"/search/est.pl?request_id=$est_id&amp;request_type=7&amp;request_from=1\">SGN-E$est_id</a> - version $version\n";
956 my ($unigene_id, $build, $organism);
957 $h_unigeneq->execute($est_id);
958 $h_unigeneq->bind_columns(\$unigene_id,\$build,\$organism);
959 while($h_unigeneq->fetch()) {
960 $flat .= " "x12 . "Unigene member <a href=\"/search/unigene.pl?unigene_id=$unigene_id\">SGN-U$unigene_id</a> - $organism Build $build\n";
961 $blastq->execute($unigene_id);
962 while(my ($db_name, $blast_program, $n_hits) =
963 $blastq->fetchrow_array()) {
964 $flat .= " "x16 . "$n_hits stored BLAST hits against $db_name [$blast_program]\n";
966 # This is old from when we were displaying the entire blast hit, which we
967 # may want to do still...
969 # my ($match_id, $match_db, $evalue, $score, $identity_percentage,
970 # $alignment_length, $start, $end, $frame, $defline);
971 # $cached_blastq->bind_columns(\$match_db, \$match_id, \$evalue, \$score, \$identity_percentage, \$alignment_length, \$defline);
972 # while($cached_blastq->fetch()) {
973 # my ($whole_shebang, $link_id, $display_id) = $defline =~ m/(gi\|([0-9]+)\|(\S+))/;
974 # $defline =~ s!\Q$whole_shebang\E!$display_id!g;
975 # if (length($defline) > 70) {
976 # $defline = sprintf "%-67.67s...%6.1f%6.0g",$defline,$score,$evalue;
977 # } else {
978 # $defline = sprintf "%-70.70s%6.1f%6.0g",$defline,$score,$evalue;
980 # $defline =~ s!\Q$display_id\E!<a href=http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=protein&list_uids=$link_id&dopt=genpept>$display_id</a>!;
981 # $flat .= " "x16 . "$defline\n";
988 return "<pre>" . $flat . "\n</pre>";