Merge pull request #5134 from solgenomics/topic/fix_seedlot_search
[sgn.git] / cgi-bin / search / est.pl
blobc54ac82acce4399e74bdac7bcb7b64c8285d7085
1 #!/usr/bin/perl -w
2 use strict;
3 use warnings;
4 use CXGN::Page;
5 use CXGN::Page::FormattingHelpers qw/blue_section_html page_title_html html_break_string/;
6 use CXGN::Chromatogram;
7 use URI::Escape;
8 use CXGN::Tools::Text qw | sanitize_string |;
9 use CatalystX::GlobalContext '$c';
10 use CXGN::DB::Connection;
12 my %current_unigene = ();
13 my %previous_unigene = ();
15 our ($auto_idq, $request_typeq, $cloneq, $clone_read_idq,
16 $clone_groupq, $alt_readq, $arrayq, $readq, $estq, $unigeneq,
17 $by_clone_idq, $by_read_idq, $mspot_cloneq, $mid_cloneq, $table,
18 $h_cgq, $h_cq, $h_traceq, $h_estq, $h_unigeneq, $randomq,
19 $max_estid, $blastq, $microarray_byunigeneq, $try_clone_groupq,
20 $marker_mappingq, $mapped_memberq, $trace_nameq, $clone_nameq,
21 @known_request_from_types);
24 our $page = CXGN::Page->new( "SGN EST Search Result", "Koni");
26 my $dbh = CXGN::DB::Connection->new;
28 $auto_idq = $dbh->prepare_cached("SELECT internal_id, internal_id_type, t1.comment, t2.comment from id_linkage as il LEFT OUTER JOIN types as t1 ON (t1.type_id=il.internal_id_type) LEFT OUTER JOIN types as t2 ON (t2.type_id=il.link_id_type) where il.link_id=?");
30 $request_typeq = $dbh->prepare_cached("SELECT comment from types where type_id=?");
32 $cloneq = $dbh->prepare_cached("SELECT c.clone_name, c.clone_group_id, l.library_shortname, l.tissue, l.development_stage, l.order_routing_id, o.organism_id, o.organism_name from clone as c, library as l, organism as o where c.clone_id=? and c.library_id=l.library_id and l.organism_id=o.organism_id");
34 $clone_read_idq = $dbh->prepare_cached(<<EOS);
35 SELECT r.clone_id,
36 e.read_id
37 FROM seqread as r,
38 est as e
39 WHERE e.est_id=?
40 AND e.read_id=r.read_id
41 EOS
43 $try_clone_groupq = $dbh->prepare_cached("SELECT c2.clone_id, c2.clone_name from clone as c1 LEFT JOIN clone as c2 ON (c1.clone_group_id=c2.clone_group_id) where c1.clone_id=?");
45 $clone_groupq = $dbh->prepare_cached("SELECT clone_id, clone_name from clone where clone_group_id=?");
47 $alt_readq = $dbh->prepare_cached("SELECT clone.clone_name, seqread.read_id, direction, facility_shortname, est_id from clone LEFT JOIN seqread USING (clone_id) LEFT JOIN facility USING (facility_id) LEFT JOIN est ON (seqread.read_id=est.read_id) where clone.clone_id=? and (est.status=0 and est.flags=0)");
49 $arrayq = $dbh->prepare_cached("SELECT chip_name, version, release, spot_id from microarray where clone_id=?");
51 $readq = $dbh->prepare_cached("SELECT r.trace_name, r.direction, f.facility_shortname, f.funding_agency, f.attribution_display, s.name from seqread as r LEFT OUTER JOIN facility as f USING (facility_id) LEFT OUTER JOIN clone ON (r.clone_id=clone.clone_id) LEFT OUTER JOIN library USING (library_id) LEFT OUTER JOIN submit_user as s ON (library.submit_user_id=s.submit_user_id) where r.read_id=?");
53 $estq = $dbh->prepare_cached("SELECT est.basecaller, version, seq, status, flags, hqi_start, hqi_length, entropy, expected_error, quality_trim_threshold, vs_status from est LEFT JOIN qc_report USING (est_id) where est.est_id=?");
55 $unigeneq = $dbh->prepare_cached(<<EOS);
56 SELECT unigene_member.unigene_id,
57 unigene_build.unigene_build_id,
58 groups.comment,
59 build_nr,
60 build_date,
61 nr_members,
62 est.est_id
63 FROM est LEFT JOIN unigene_member USING (est_id)
64 LEFT JOIN unigene USING (unigene_id)
65 LEFT JOIN unigene_build USING (unigene_build_id)
66 LEFT JOIN groups ON (organism_group_id=groups.group_id)
67 WHERE est.est_id=?
68 AND unigene_build.status=?
69 EOS
71 $by_clone_idq = $dbh->prepare_cached("SELECT read_id, direction, facility_id, date from seqread where clone_id=?");
73 $by_read_idq = $dbh->prepare_cached("SELECT est_id, version from est where read_id=?");
74 $mspot_cloneq = $dbh->prepare_cached("SELECT clone_id from microarray where release=? and version=? and spot_id=?");
75 $mid_cloneq = $dbh->prepare_cached("SELECT clone_id from microarray where microarray_id=?");
77 $h_cgq = $dbh->prepare_cached("SELECT clone_group_id, clone_name
78 FROM clone where clone_id=?");
80 $h_cq = $dbh->prepare_cached("SELECT clone_id, clone_name from clone where clone_group_id=? and clone_id<>?");
82 $h_traceq = $dbh->prepare_cached("SELECT read_id, trace_name FROM seqread where
83 clone_id=?");
84 $h_estq = $dbh->prepare_cached("SELECT est_id, version FROM est where read_id=?");
86 $h_unigeneq = $dbh->prepare_cached(<<EOS);
87 SELECT unigene.unigene_id,
88 unigene_build.build_nr,
89 groups.comment
90 FROM unigene_member
91 LEFT JOIN unigene ON (unigene_member.unigene_id=unigene.unigene_id)
92 LEFT JOIN unigene_build ON (unigene.unigene_build_id=unigene_build.unigene_build_id)
93 LEFT JOIN groups ON (unigene_build.organism_group_id=groups.group_id)
94 WHERE unigene_member.est_id=?
95 AND unigene_build.status IS NOT NULL
96 AND unigene_build.status <> 'D'
97 EOS
99 $blastq = $dbh->prepare_cached("SELECT db_name, blast_program, hits_stored from blast_annotations LEFT JOIN blast_targets USING (blast_target_id) where apply_id=? and apply_type=15");
101 #$randomq = $dbh->prepare_cached("SELECT est_id from est where status=0 and flags=0 order by random() limit 1000");
103 $microarray_byunigeneq = $dbh->prepare_cached("select est.est_id from unigene LEFT JOIN unigene_member USING (unigene_id) LEFT JOIN est USING (est_id) LEFT JOIN seqread using (read_id) LEFT JOIN clone using (clone_id) INNER JOIN microarray using (clone_id) where unigene.unigene_id=? order by clone.clone_id");
105 $marker_mappingq = $dbh->prepare_cached("select marker_id, alias from marker_alias where marker_id in (select marker_id from marker_derived_from where derived_from_source_id = 1 and id_in_source = ?) and preferred is true");
107 $mapped_memberq = $dbh->prepare_cached("select ests_mapped_by_clone.clone_id from unigene_member INNER JOIN est USING (est_id) INNER JOIN seqread USING (read_id) INNER JOIN ests_mapped_by_clone USING (clone_id) where unigene_id=?");
109 $trace_nameq = $dbh->prepare_cached("select read_id from seqread where trace_name=?");
111 $clone_nameq = $dbh->prepare_cached("select clone_id from clone where clone_name=?");
113 @known_request_from_types = ( "web user", "SGN database generated link", "SGN BLAST generated link","Random Selection" );
115 # This is used commonly when building the HTML below, saves having to
116 # type (or read) these commonly desired settings
117 $table = 'table cellspacing="0" cellpadding="0" border="0" width="100%"';
119 my ($request_id, $request_type, $request_from, $show_hierarchy, $random) =
120 $page->get_arguments("request_id","request_type","request_from",
121 "show_hierarchy","random");
123 $request_id = sanitize_string($request_id);
124 $request_type = sanitize_string($request_type);
125 $request_from = sanitize_string($request_from);
126 $show_hierarchy = sanitize_string($show_hierarchy);
127 $random = sanitize_string($random);
131 # If the identifier is not given, or the identifier parameter is
132 # screwed up, we want to give the right error instead of failing
133 # later on for an unrelated reason.
134 $page->message_page("No EST identifier specified") unless $request_id || ($random eq 'yes');
137 if ($random eq "yes") {
138 $request_type=7;
139 $request_from=3;
140 ($request_id) = $dbh->selectrow_array("select est_id from est where status=0 and flags=0 order by random() limit 1");
144 if ($request_id eq "" || $request_type eq "") {
145 if ($request_from==1 || $request_from==2) {
146 $page->error_page("Invalid Direct Search from SGN-generated URL. Requested \"$request_id\" set type \"$request_type\"");
147 } else {
148 invalid_search($page);
152 if ($request_from<0 || $request_from>$#known_request_from_types) {
153 $request_from = "external (unknown - unsupported)";
154 } else {
155 $request_from = $known_request_from_types[$request_from];
158 my ($id, $id_type) = ($request_id, $request_type);
162 # If a user-entered generic identifier has been entered, take it to the
163 # id linkage table for resolution to an SGN internal identifier and type
164 my ($id_type_name, $link_id_type_name);
165 if ($id_type eq "automatic") {
167 # Check for internal types. These will not be in the id_linkage table
168 if ($id =~ m/^SGN[|-]C([0-9]+)$/i) {
169 $id_type = 8;
170 $id_type_name = "SGN Clone Identifier";
171 $id = $1;
172 } elsif ($id =~ m/^SGN[|-]T([0-9]+)$/i) {
173 $id_type = 9;
174 $id_type_name = "SGN Chromatogram Identifer";
175 $id = $1;
176 } elsif ($id =~ m/^SGN[|-]E([0-9]+)$/i) {
177 $id_type = 7;
178 $id_type_name = "SGN EST Identifier";
179 $id = $1;
180 } else {
181 # OK, try the id_linkage table then
182 if ($request_id =~ m/^([A-Z]{3,4})([0-9]+)([A-P][0-9]{1,2})$/i) {
183 $request_id = "$1-$2-$3";
185 $auto_idq->execute($request_id);
186 if ($auto_idq->rows == 0) {
187 not_found($page, "Identifier \"$id\" was not found in SGN's databases.");
190 if ($auto_idq->rows > 1) {
191 ($id) = $auto_idq->fetchrow_array();
192 show_list($page, $id);
195 ($id, $id_type, $id_type_name, $link_id_type_name) =
196 $auto_idq->fetchrow_array();
198 } else {
199 if ($id_type =~ m/\d+/) {
200 $request_typeq->execute($id_type);
201 ($id_type_name) = $request_typeq->fetchrow_array();
206 # Resolve to an EST identifier, either the internal id found above,
207 # or the direct reference from an SGN-generated link
208 my $est_id = "";
209 my $match_id = "";
210 if ($id_type == 7) {
211 # This is an EST identifier already
212 invalid_search($page, "Invalid identifier \"$id\" for specified identifier type (SGN-E#)") if ($id !~ m/^(SGN-E|E|)([0-9]+)$/);
213 $match_id = "SGN-E$2";
214 $est_id = $2;
216 } elsif ($id_type == 8) {
217 invalid_search($page, "Invalid identifier \"$id\" for specified identifier type (SGN-C#)") if ($id !~ m/^(SGN-C|C|)([0-9]+)$/);
218 # This is a clone internal identifier - find a trace, then find an EST
219 if ($show_hierarchy) {
220 hierarchy_requested($page, $id, $id_type);
222 $match_id = "SGN-C$2";
223 $est_id = by_clone($page, $2);
224 } elsif ($id_type == 9) {
225 # This is a trace identifier, find the most recent sequence for it
226 invalid_search($page, "Invalid identifier \"$id\" for specified identifier type (SGN-T#)") if ($id !~ m/^(SGN-T|T|)([0-9]+)$/);
227 $match_id = "SGN-T$2";
228 $est_id = by_read($page, $2);
229 } elsif ($id_type == 10) {
230 if ($request_id =~ m/^([A-Z]{3,4})([0-9]+)([A-P][0-9]{1,2})$/i) {
231 $request_id = "$1-$2-$3";
233 $clone_nameq->execute($request_id);
234 if ($clone_nameq->rows == 0) {
235 not_found($page, "Identifier \"$request_id\" was not found in SGN's databases.");
237 ($match_id) = $clone_nameq->fetchrow_array();
238 $est_id = by_clone($page, $match_id);
239 } elsif ($id_type == 11) {
240 $trace_nameq->execute($request_id);
241 if ($trace_nameq->rows == 0) {
242 not_found($page, "Identifier \"$request_id\" was not found in SGN's databases.");
244 ($match_id) = $trace_nameq->fetchrow_array();
245 $est_id = by_read($page, $match_id);
246 } elsif ($id_type == 14) {
247 # This is a microarray identifier.
248 invalid_search($page, "Invalid identifier \"$id\" for specified identifier type (SGN/CGEP/TMD Microarray Spot Identifier)") if ($id !~ m/^(SGN-S|S|)([0-9]+-[0-9]+-[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/);
249 $match_id = "SGN-S$2";
250 $est_id = by_mspot($page, $2);
253 # This is an EST search result page, we must now have an internal EST
254 # identifier (est_id in table est) to continue
255 if ($est_id eq "") {
256 not_traceable_to_est($page, $request_id, $id, $id_type);
259 # Now that we have an EST identifier, try to find a chromatogram that
260 # it is linked to, and a clone that is linked to that chromatogram.
262 # NOTE: This step may go backwards (unnecessarily) from the above
263 # Fetch out the clone_id and read_id for this EST.
264 my ($clone_id, $read_id);
265 $clone_read_idq->execute($est_id);
266 ($clone_id, $read_id) = $clone_read_idq->fetchrow_array();
268 # Section containing information on what was searched for and what was found
269 my $search_info = <<EOF;
270 <$table>
271 <tr><td width="50%"><b>Request:</b> $request_id</td><td><b>Match:</b> $match_id</td></tr>
272 <tr><td><b>Request From:</b> $request_from</td><td><b>Match Type:</b> $id_type_name</td></tr>
273 </table>
277 # Look up clone info and library information
278 my $clone_info = "";
279 my $clone_name = "";
280 my @clone_group = ();
282 my $organism_id = "";
283 if ($clone_id ne "" && $cloneq->execute($clone_id) && $cloneq->rows>0) {
284 my ($clone_name, $clone_group_id, $library_name, $tissue,
285 $development_stage, $order_routing_id, $organism_id, $organism_name) =
286 $cloneq->fetchrow_array();
288 # If there are nulls in the table we'll get undefs for these values, so we
289 # set displayed values here.
290 $organism_name = "Unknown" if !defined($organism_name);
291 $tissue = "Unknown" if !defined($tissue);
293 # If we have a clone group, seek out the replicate clone_ids so we can
294 # search them below for alternative reads
295 if ($clone_group_id) {
296 $clone_groupq->execute($clone_group_id);
297 while(my ($cid, $cname) = $clone_groupq->fetchrow_array()) {
298 push @clone_group, [ $cid, $cname ];
300 } else {
301 # If there is no clone group in the database, make a fake singleton
302 # group here for ease below
303 @clone_group = ([ $clone_id, $clone_name ]);
306 # Microarray Information
307 my $microarray = "";
308 if ($organism_id == 1 || $organism_id == 2 || $organism_id == 3) {
309 foreach ( @clone_group ) {
310 $arrayq->execute($_->[0]);
311 my ($chip_name, $version, $release, $spot_id);
312 while (($chip_name, $version, $release, $spot_id) =
313 $arrayq->fetchrow_array) {
314 if ($_->[0] != $clone_id) {
315 $microarray .= qq{Alias clone <a href="/search/est.pl?request_id=$_->[0]&request_type=8&request_from=1">SGN-C$_->[0]</a> is on microarray $chip_name: SGN-S$version-$release-$spot_id\n};
316 } else {
317 $microarray .= qq{SGN-C$_->[0] is on microarray $chip_name spot ID $version-$release-$spot_id [<a href="http://bti.cornell.edu/CGEP/CGEP.html">Order</a>] [<a href="http://ted.bti.cornell.edu/cgi-bin/array/basicsearch.cgi?arrayID=$version-$release-$spot_id">Tomato Microarray Database</a>]\n};
321 $microarray ||= '<span class="ghosted">This clone is not found on any microarray</span>';
322 $microarray = "<b>Microarray:</b> $microarray<br />";
325 my $ordering;
326 if ($order_routing_id) {
327 $ordering = qq{<table cellpadding="1" cellspacing="0" border="0"><tr><td><a href="/search/clone-order.pl?add_clone=$clone_name"><img src="/documents/img/sgn-cart.gif" border="0" alt="cart" /></a></td><td>Order Clone</td></tr></table>};
328 } else {
329 $ordering = qq{<table cellpadding="1" cellspacing="0" border="0"><tr><td><img src="/documents/img/sgn-nocart.gif" border="0" alt="nocart" /></td><td><span class="ghosted">Ordering Not Available</span></td></tr></table>};
332 # Section with information about the selected clone
333 # Note: The closing table & outer table cell/row tags are omitted here, because
334 # "the group" requested that the EST search result page show whether or
335 # not a current unigene contains a microarray'd clone. This query
336 # is not possible until we get the unigene section, so we'll have some
337 # more things to add to the Clone section later.
339 # So much for organizing the script into little sections.
340 $clone_info = <<EOF;
341 <$table>
342 <tr><td width="40%"><b>SGN ID:</b> SGN-C$clone_id</td><td><b>Clone name:</b> $clone_name</td><td width="20%" rowspan="2">$ordering</td></tr>
343 <tr><td><b>Library Name:</b> $library_name</td><td><b>Organism:</b> $organism_name</td></tr>
344 </table>
345 <br />
346 <b>Tissue:</b> $tissue<br />
347 <b>Development Stage:</b> $development_stage<br />
348 <br />
349 $microarray
352 } else {
353 # no clone information was found
354 $clone_info = '<div><span class="ghosted">No clone information found</span></div>';
355 $clone_name = "clone name unknown";
358 # Search for information on the chromatogram
359 my $read_info = "";
360 my $seqdir = "";
361 if ($read_id && $readq->execute($read_id) && $readq->rows>0) {
362 $readq->execute($read_id);
364 my ($chroma_name, $facility_name, $submitter_name, $funding_agency,
365 $atb_organization, $atb_display);
366 ($chroma_name, $seqdir, $facility_name, $atb_organization, $atb_display,
367 $submitter_name) = $readq->fetchrow_array();
369 if (!defined($seqdir)) {
370 $seqdir = "Unknown";
371 } elsif ($seqdir eq "5") {
372 $seqdir = "5'";
373 } elsif ($seqdir eq "3") {
374 $seqdir = "3'";
375 } else {
376 $seqdir = "Unknown";
379 $submitter_name = '<span class="ghosted">None</span>' unless $submitter_name;
380 $atb_display ||= $atb_organization;
381 if ($atb_display) {
382 $atb_display = "<b>Funding Organization:</b>&nbsp;$atb_display";
385 my $view_link='[<span class="ghosted">View</span>]';
386 # my $view_link=" [<a href=\"/tools/trace_view.pl?read_id=$read_id&est_id=$est_id\">View</a>]";
387 my $tmp_tracename;
388 if($tmp_tracename=CXGN::Chromatogram::has_abi_chromatogram($read_id))
390 my $path_to_remove = $c->path_to( $c->tempfiles_subdir('traceimages') );
391 $tmp_tracename=~s/$path_to_remove//;
392 my $file=URI::Escape::uri_escape("$tmp_tracename");
393 $view_link=" [<a href=\"/tools/trace_view.pl?file=$file&temp=yes\">View</a>]";
396 $read_info = <<EOF;
397 <$table>
398 <tr><td width="50%"><b>SGN-ID:</b> SGN-T$read_id [<a href="/search/trace_download.pl?read_id=$read_id">Download</a>]$view_link</td>
399 <td><b>Facility Assigned ID:</b> $chroma_name\n</td>
400 </tr>
401 <tr><td><b>Submitter:</b> $submitter_name</td>
402 <td><b>Sequencing Facility:</b> $facility_name</td>
403 </tr>
404 </table>
406 $atb_display
408 } else {
409 $read_info='<span class="ghosted">No chromatogram information found for this sequence</span>';
412 # Find alternate reads for this clone group
413 my $alt_reads = "";
414 my $see_also = "";
415 foreach ( @clone_group ) {
416 $alt_readq->execute($_->[0]);
417 my ($clone_name, $trace_id, $dir, $facility, $eid);
418 while(($clone_name, $trace_id, $dir, $facility, $eid)
419 = $alt_readq->fetchrow_array()) {
420 next if $eid==$est_id;
421 if (!defined($dir)) {
422 $dir = "Unknown";
423 } elsif ($dir eq "5") {
424 $dir = "5'";
425 } elsif ($dir eq "3") {
426 $dir = "3'";
427 } else {
428 $dir = "Unknown";
431 # This is hacked in here as an after thought, to draw the viewer's
432 # attention to the additional sequencing section.
433 if ($seqdir eq "5'" && $dir eq "3'") {
434 $see_also = "[See links to 3' reads above]";
435 } elsif ($seqdir eq "3'" && $dir eq "5'") {
436 $see_also = "[See links to 5' reads above]";
439 $facility = "Unknown" if !defined($facility);
440 $alt_reads .= <<EOF
441 <tr><td><b>Clone:</b> SGN-C$_->[0] [$clone_name]</td>
442 <td><b>Trace:</b> SGN-T$trace_id</td>
443 <td><b>EST:</b> <a href="/search/est.pl?request_id=$eid&amp;request_type=7&amp;request_from=1">SGN-E$eid</a></td>
444 <td><b>Direction:</b> $dir</td>
445 <td><b>Facility:</b> $facility</td>
446 </tr>
450 if($alt_reads) {
451 $alt_reads = "<table>$alt_reads</table>" ;
452 } else {
453 $alt_reads = '<span class="ghosted">No additional reads found.</span>';
457 $estq->execute($est_id);
458 if ($estq->rows == 0) {
459 not_found($page,"No database entry was found for EST identifier SGN-E$est_id");
461 my ($basecaller,$version,$seq,$status,$flags,$start,$length,$entropy,$expected_error,$qtrim_threshold,$vs_status) = $estq->fetchrow_array();
463 my $fasta_header;
464 my $seq_display = "";
465 my $untrim_length = length($seq);
466 my $seq_length;
467 if (defined($start) && defined($length) && ($length > 10) ) {
468 if ($flags) {
469 $fasta_header = "&gt;SGN-E$est_id [$clone_name] (trimmed - flagged)";
470 } else {
471 $fasta_header = "&gt;SGN-E$est_id [$clone_name] (trimmed)";
473 $seq = substr $seq,$start,$length;
474 $seq_length = qq|${length} bp <span class="ghosted">(${untrim_length} bp untrimmed)</span>|;
475 } else {
476 if ($status & 0x1) {
477 $fasta_header = ">SGN-E$est_id [$clone_name] (called/trimmed by facility)<br />";
478 $seq_length = "${untrim_length} bp (called/trimmed by facility)<br />";
479 } else {
480 $fasta_header = ">SGN-E$est_id [$clone_name] (untrimmed)<br />";
481 $seq_length = qq{<span class="ghosted">${untrim_length} bp (untrimmed)</span>};
486 $seq_display = html_break_string($seq,95);
488 my $display_status = "";
489 if ($status == 0) {
490 $display_status = "Current Version";
492 if ($status & 0x1) {
493 $display_status .= "Legacy ";
495 if ($status & 0x2) {
496 $display_status .= "Discarded ";
498 if ($status & 0x4) {
499 $display_status .= "Deprecated ";
501 if ($status & 0x8) {
502 $display_status .= "Censored ";
504 if ($status & 0x10) {
505 $display_status .= "Vector/Quality trimming not applied ";
507 if ($status & 0x20) {
508 $display_status .= "Contaminants not assessed ";
510 if ($status & 0x40) {
511 $display_status .= "Chimera not assessed ";
514 my $insert_recovery = "";
515 my $vector_signature = "";
516 if (!($status & 0x11)) {
517 $expected_error = sprintf "%7.4f",$expected_error;
518 $entropy = sprintf "%5.3f",$entropy;
521 my @vector_sig_strings =
522 ("5' sequence read -- flanking 3' vector arm detected.",
523 "3' sequence read -- flanking 5' vector arm detected.",
524 "5' sequence read, incomplete (flanking vector not found)",
525 "3' sequence read, incomplete (flanking vector not found)",
526 #make these red and bold
527 (map {'<span style="color: red; font-weight: bold;">'.$_.'</span>'}
528 ('No vector sequence detected',
529 'Vector found but pattern inconsistent with a normal insert',
530 'Multiple cloning site sequence detected -- chimeric clone suspected.',
535 $vector_signature = $vector_sig_strings[$vs_status]
536 || '<span class="ghosted">Not available</span>';
539 ### render any flags as html
540 my @flag_strings = ('Vector anomaly',
541 'Possibly chimeric (anomalous insert into vector)',
542 'Too short after trimming low-quality bases',
543 'High expected error (low overall quality)',
544 'Low complexity',
545 'E. Coli (cloning host) sequence detected',
546 'rRNA contamination detected',
547 "Possibly chimeric (ends match significantly different genes in arabidopsis)",
548 "Possibly chimeric, detected by unigene assembly preclustering",
549 "Manually censored by SGN staff");
551 my $flags_display = "";
552 if ($flags == 0) {
553 $flags_display = "Passed all screens and filters";
554 } else {
555 my @flags = ();
557 my $ind = 0;
558 foreach my $str (@flag_strings) {
559 push @flags,$str if $flags & (1<<$ind++);
562 $flags_display = qq{<$table><tr><td width="10%"><b>Problems: </b></td><td>} . join("<br />",@flags) . "</td></tr></table>";
565 $insert_recovery= <<EOF;
566 <$table><tr><td width="50%"><b>Processed By:</b> SGN</td>
567 <td><b>Basecalling Software:</b> phred</td>
568 </tr>
569 </table>
570 <b>Vector Signature:</b> $vector_signature<br />
571 $flags_display
572 <table cellspacing="0" cellpadding="0" border="0" width="90%" align="center">
573 <tr><td><b>Sequence Entropy:</b> $entropy</td>
574 <td><b>Expected Error Rate:</b> $expected_error</td>
575 <td><b>Quality Trim Threshold:</b> $qtrim_threshold</td>
576 </tr>
577 </table>
579 } else {
580 $insert_recovery = "<span class=\"ghosted\">Processing information not available for this sequence</span>";
583 my $sequence_info = <<END_HTML;
584 <$table>
585 <tr><td width="50%"><b>Sequence Id:</b> SGN-E$est_id</td><td><b>Length:</b> $seq_length</td></tr>
586 <tr><td><b>Status:</b> $display_status</td><td><b>Direction:</b> $seqdir $see_also</td></tr>
587 </table>
588 <div class="sequence" style="margin: 1em">
589 $fasta_header
590 $seq_display
591 </div>
592 <center>
593 [<a href="/tools/blast/?preload_id=$est_id&amp;preload_type=7">BLAST</a>]&nbsp;&nbsp;[<a href="/tools/sixframe_translate.pl?est_id=$est_id">AA Translate</a>]
594 </center>
595 END_HTML
597 my $unigene_content;
598 my $other_estid = "";
599 my $alt_microarray = "";
600 my $alt_mapped = "";
601 my @recent = ();
602 $unigeneq->execute($est_id, "C");
603 while(my ($unigene_id, $build_id, $og_name, $build_nr, $build_date,
604 $nr_members, $eid) = $unigeneq->fetchrow_array()) {
605 # Used to indicate below that we have pulled up unigenes from other ESTs
606 # that are from the same chromatogram
607 $other_estid = $eid if ($eid != $est_id);
610 # unless(defined($organism_id)) {$organism_id=0;}
611 unless($organism_id) {$organism_id=0;}
613 if ($organism_id == 1 || $organism_id == 2 || $organism_id == 3) {
614 $microarray_byunigeneq->execute($unigene_id);
615 if ($microarray_byunigeneq->rows > 0) {
616 my $alt_microarray_found = 0;
617 while(my ($eid) = $microarray_byunigeneq->fetchrow_array()) {
618 next if ($eid == $est_id);
619 $alt_microarray_found = 1;
620 last;
622 if ($alt_microarray_found) {
623 $alt_microarray .= qq{See unigene <a href="/search/unigene.pl?unigene_id=$unigene_id">SGN-U$unigene_id</a> for alternatives which are available on a microarray<br />};
628 $marker_mappingq->execute($clone_id);
629 if ($marker_mappingq->rows > 0) {
630 while (my ($marker_id, $alias) = $marker_mappingq->fetchrow_array()){
631 $alt_mapped .= qq{This clone has been mapped as <a href="/search/markers/markerinfo.pl?marker_id=$marker_id">$alias</a>.<br />}
633 } else {
634 $mapped_memberq->execute($unigene_id);
635 if ($mapped_memberq->rows > 0) {
636 my $alt_mapped_found = 0;
637 while (my ($cid) = $mapped_memberq->fetchrow_array()) {
638 next if ($cid == $clone_id);
639 $alt_mapped_found = 1;
641 if ($alt_mapped_found) {
642 $alt_mapped .= qq{See unigene <a href="/search/unigene.pl?unigene_id=$unigene_id">SGN-U$unigene_id</a> for alternative clones/ESTs which are mapped};
646 push @recent, <<EOF;
647 <tr><td>[SGN-E$eid] </td>
648 <td><a href="/search/unigene.pl?unigene_id=$unigene_id&amp;highlight=$eid">SGN-U$unigene_id</a></td>
649 <td>$og_name </td>
650 <td>Build $build_nr </td>
651 <td>$nr_members ESTs assembled</td>
652 </tr>
656 if (!$alt_microarray &&
657 ($organism_id == 1 || $organism_id == 2 || $organism_id == 3)) {
658 $alt_microarray = '<span class="ghosted">No alternative clones from any current unigene containing this EST are available on a microarray</span>';
661 $alt_mapped ||= "<div><span class=\"ghosted\">There is no map position defined on SGN for this EST or others in the same unigene.</span></div>";
665 $unigene_content = '<table align="center" cellspacing="0" cellpadding="0" border="0" width="100%">';
667 if ($other_estid) {
668 $unigene_content .= qq{<tr><td colspan="6"><span class="ghosted">Note: Some unigenes listed here are assembled from different versions of the sequence displayed above. Note SGN-E# in left column. All versions were derived from the same chromatogram</span></td></tr><tr><td colspan="6"><br /></td></tr>};
671 if (@recent) {
672 $unigene_content .= <<EOF;
673 <tr><td colspan="6"><b>Current Unigene builds</b></td></tr>
674 @recent
675 <tr><td colspan="6" align="center"><span class="ghosted">Follow SGN-U# link for detailed information and annotations</span></td></tr>
677 } else {
678 $unigene_content .= <<EOF;
679 <tr><td align="left" colspan="6"><b>Current Unigene builds</b></td></tr>
680 <tr><td colspan="6"><span class="ghosted">No current unigene builds incorporate this sequence</td></tr>
684 $unigene_content .= "</table>";
687 $page->header();
688 print page_title_html("EST details &mdash; $match_id");
689 print blue_section_html('Search information',$search_info);
690 print blue_section_html('Clone information',<<EOH);
691 $clone_info
692 $alt_microarray
693 $alt_mapped
695 $alt_reads .= qq{<center style="margin-top: 0.8em"><a href="/search/est.pl?request_id=$clone_id&amp;request_type=8&amp;request_from=1&amp;show_hierarchy=1">[Show information hierarchy]</a></center>};
696 print blue_section_html('Additional sequencing',$alt_reads);
697 print blue_section_html('Sequence',$sequence_info);
698 print blue_section_html('Unigenes',$unigene_content);
699 print blue_section_html('Chromatogram',$read_info);
700 print blue_section_html('Quality processing',$insert_recovery);
702 $page->footer();
704 sub by_clone {
705 my ($page, $id) = @_;
707 my $read_id = "";
708 $by_clone_idq->execute($id);
709 if ($by_clone_idq->rows == 0) {
710 $try_clone_groupq->execute($id);
711 if ($try_clone_groupq->rows > 1) {
712 try_clone_group($page, $id);
714 not_found($page, "Your search resolved to a clone identifier (SGN-C$id) that was not found in SGN's databases. No alias clones were found.");
715 } elsif ($by_clone_idq->rows == 1) {
716 ($read_id) = $by_clone_idq->fetchrow_array();
717 } else {
718 my @reads = sort by_clone_sort @{$by_clone_idq->fetchall_arrayref()};
719 ($read_id) = $reads[0]->[0];
722 return by_read($page, $read_id);
725 # Hard coded layout here, as well as codes for facility ids.
726 sub by_clone_sort {
728 if ($a->[1] ne $b->[1]) {
729 # Promotes 5' over 3' reads
730 return -1 if $a->[1] eq "5";
731 return 1;
733 if ($a->[2] != $b->[2]) {
734 # Promotes TIGR and Genoscope facilities, but ignores the rest
735 return -1 if $a->[2] == 1;
736 return 1 if $b->[2] == 1;
737 return -1 if $a->[2] == 4;
738 return 1 if $b->[2] == 4;
740 # Lastly, sort by submission date
741 return $b->[2] - $a->[2];
744 sub by_read {
745 my ($page, $id) = @_;
747 $by_read_idq->execute($id);
749 if ($by_read_idq->rows == 0) {
750 not_found($page, "Your search resolved to a trace identifier (SGN-T$id) that was not found in SGN's databases");
751 } elsif ($by_read_idq->rows == 1) {
752 my ($est_id) = $by_read_idq->fetchrow_array();
753 return $est_id;
756 my @ests = sort by_read_sort @{$by_read_idq->fetchall_arrayref()};
757 return $ests[0]->[0];
760 sub by_read_sort {
761 # Sort by version
762 if ($a->[1] != $b->[1]) {
763 return $b->[1] - $a->[1];
765 # Default to identifier sort if necessary
766 return $b->[0] - $a->[0];
769 sub by_mspot {
770 my ($page, $id) = @_;
772 if ($id =~ m/([0-9]+)-([0-9])+-([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/) {
773 $mspot_cloneq->execute($1,$2,$3);
774 if ($mspot_cloneq->rows>0) {
775 my ($clone_id) = $mspot_cloneq->fetchrow_array();
776 return by_clone($page, $clone_id);
779 not_found($page, "Microarray spot identifier (SGN-S$id) was not found in SGN's databases");
780 } else {
781 $id =~ m/([0-9]+)/;
782 $mid_cloneq->execute($1);
783 if ($mid_cloneq->rows>0) {
784 my ($clone_id) = $mid_cloneq->fetchrow_array();
785 return by_clone($page, $clone_id);
788 not_found($page, "Your search was resolved to a microarray entry (SGN-S$id) that was not found in SGN's databases");
793 sub try_clone_group {
794 my ($page, $id) = @_;
796 $page->header();
798 my $clones = "";
799 while(my ($clone_id, $clone_name) = $try_clone_groupq->fetchrow_array()) {
800 $clones .= "<tr><td><a href=\"/search/est.pl?request_id=$clone_id&amp;request_from=1&amp;request_type=8\">SGN-C$clone_id</a></td><td>[$clone_name]</td></tr>";
803 print <<EOF;
804 <center>
805 <h4>Direct Search Result - Not Found</h4>
806 </center>
808 <p>Your search resolved to a clone identifier (SGN-C$id) that has no associated reads. An alias group was detected, however. See links below for other clone ids/names that are expected to be from identical stock. </p>
810 <p>This may happen for cases where clones were "rearrayed" and resequenced, but the resequenced data has not been recovered yet or the reactions failed. In this case, the rearrayed clone identifier and name will have no associated sequence, but the original clone may have usable reads available in the database.</p>
811 <br /><br />
812 <$table>$clones</table>
813 <br /><br />
816 $page->footer();
818 exit 0;
821 sub invalid_search {
822 my ($page, $message) = @_;
824 $page->header();
826 unless (defined($message)) {$message=''};
828 print <<EOF;
829 <center>
830 <h4>Not Found - Search Invalid</h4>
831 </center>
834 if ($message) {
835 print <<EOF;
836 <p>$message</p>
840 $page->footer();
842 exit (0);
845 sub not_found {
846 my ($page, $message) = @_;
848 $page->header();
850 print <<EOF;
851 <center>
852 <h4>Direct Search Result - Not Found</h4>
853 </center>
856 if ($message) {
857 print <<EOF;
858 <p>$message</p>
862 $page->footer();
864 exit(0);
867 sub not_traceable_to_est {
868 my ($page, $id) = @_;
870 $page->header();
872 print <<EOF;
873 <center>
874 <h4>Direct Search Result - Not Found</h4>
875 </center>
877 <p>The identifier $id of the specified type can not be traced to an EST in SGN\'s databases</p>
878 <br />
881 $page->footer();
883 exit(0);
886 sub show_list {
887 my ($page, $id) = @_;
889 my $content = build_tree($page, $id);
891 $page->header();
893 print "<tr><td>$content</td></tr>";
894 print "<br />\n";
895 $page->footer();
897 exit(0);
900 sub hierarchy_requested {
901 my ($page, $id, $id_type) = @_;
903 if ($id_type != 8) {
904 # shit a brick.
907 my $content = build_hierarchy($page, $id);
909 $page->header();
910 print page_title_html("Information Hierarchy for Clone $id");
912 if (!$content) {
913 print <<EOF;
914 No structure was found for identifier $id
916 $page->footer();
917 exit 0;
920 print blue_section_html('Information Hierarchy',$content);
921 $page->footer();
923 exit 0;
926 sub build_hierarchy {
927 my ($page, $clone_id) = @_;
928 my $table = 'table cellpadding="0" cellspacing="1" border="0"';
930 $h_cgq->execute($clone_id);
931 return "" if ($h_cgq->rows == 0);
933 my ($clone_group, $clone_name) = $h_cgq->fetchrow_array();
936 my @clones = ();
937 $clones[0] = [ $clone_id, $clone_name ];
939 if ($clone_group) {
940 $h_cq->execute($clone_group, $clone_id);
941 while(@_ = $h_cq->fetchrow_array()) {
942 push @clones, [ @_ ];
947 my $flat = "";
949 foreach my $clone ( @clones ) {
950 # Lookup the reads
951 $flat .= "Clone SGN-C$clone->[0] - $clone->[1]\n";
953 my ($read_id, $trace_name);
954 $h_traceq->execute($clone->[0]);
955 $h_traceq->bind_columns(\$read_id, \$trace_name);
956 while($h_traceq->fetch()) {
957 $flat .= " "x4 . "Chromatogram SGN-T$read_id - $trace_name\n";
959 my ($est_id, $version);
960 $h_estq->execute($read_id);
961 $h_estq->bind_columns(\$est_id, \$version);
962 while($h_estq->fetch()) {
963 $flat .= " "x8 . "Processed EST <a href=\"/search/est.pl?request_id=$est_id&amp;request_type=7&amp;request_from=1\">SGN-E$est_id</a> - version $version\n";
965 my ($unigene_id, $build, $organism);
966 $h_unigeneq->execute($est_id);
967 $h_unigeneq->bind_columns(\$unigene_id,\$build,\$organism);
968 while($h_unigeneq->fetch()) {
969 $flat .= " "x12 . "Unigene member <a href=\"/search/unigene.pl?unigene_id=$unigene_id\">SGN-U$unigene_id</a> - $organism Build $build\n";
970 $blastq->execute($unigene_id);
971 while(my ($db_name, $blast_program, $n_hits) =
972 $blastq->fetchrow_array()) {
973 $flat .= " "x16 . "$n_hits stored BLAST hits against $db_name [$blast_program]\n";
975 # This is old from when we were displaying the entire blast hit, which we
976 # may want to do still...
978 # my ($match_id, $match_db, $evalue, $score, $identity_percentage,
979 # $alignment_length, $start, $end, $frame, $defline);
980 # $cached_blastq->bind_columns(\$match_db, \$match_id, \$evalue, \$score, \$identity_percentage, \$alignment_length, \$defline);
981 # while($cached_blastq->fetch()) {
982 # my ($whole_shebang, $link_id, $display_id) = $defline =~ m/(gi\|([0-9]+)\|(\S+))/;
983 # $defline =~ s!\Q$whole_shebang\E!$display_id!g;
984 # if (length($defline) > 70) {
985 # $defline = sprintf "%-67.67s...%6.1f%6.0g",$defline,$score,$evalue;
986 # } else {
987 # $defline = sprintf "%-70.70s%6.1f%6.0g",$defline,$score,$evalue;
989 # $defline =~ s!\Q$display_id\E!<a href=http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=protein&list_uids=$link_id&dopt=genpept>$display_id</a>!;
990 # $flat .= " "x16 . "$defline\n";
997 return "<pre>" . $flat . "\n</pre>";