1 package CXGN
::Transcript
::Unigene
;
5 CXGN::Transcript::Unigene - a class to deal with unigenes in the SGN database
9 The unigene table in the SGN database has complex relationships. This object deals with these relationships and adds some handy accessors for related data, such as member ship information and annotation information.
11 It inherits from CXGN::DB::Object for the database connection accessors.
13 Storing infomation back to the database is not fully supported. Some accessors that associate unigenes with other information will directly modify the database though. This is indicated in the pod.
17 Lukas Mueller <lam87@cornell.edu> (July 2007)
19 Cobbled together from years of unstructured unigene detail page hacking by too many people.
21 =head1 MEMBER FUNCTIONS
23 This class implements the following functions:
32 use CXGN
::Tools
::WebImageCache
;
33 use CXGN
::Unigene
::Tools
;
34 use CXGN
::Transcript
::CDS
;
35 use CXGN
::Transcript
::EST
;
36 use CXGN
::Transcript
::UnigeneBuild
;
39 use base qw
| CXGN
::DB
::Object
|;
41 =head2 constructor new
45 Ret: a CXGN::Transcript::Unigene object
46 Args: a $dbh database handle, preferentially created
47 using CXGN::DB::Connection
48 a $id specifying a unigene
49 if $id is omitted, an empty unigene object is
51 Side Effects: accesses the database
60 my $self = $class->SUPER::new
($dbh);
62 if ( $id && $id=~/SGN-*U(\d+)/i) {
67 $self->set_unigene_id($id);
69 if (!$self->get_unigene_id()) {
79 Usage: my $u = CXGN::Transcript::Unigene->new_randoma($dbh);
80 Desc: returns a random unigene. Used on the unigene
82 Ret: a random unigene object.
92 my $query = "select unigene_id from sgn.unigene LEFT JOIN sgn.unigene_build USING (unigene_build_id) where sgn.unigene_build.status='C' and nr_members>1 order by random() limit 1";
93 my $sth = $dbh->prepare($query);
95 my ($unigene_id) = $sth->fetchrow_array();
97 my $self = $class->new($dbh, $unigene_id);
102 =head2 new_lite_unigene
104 Usage: my $unigene_lite = CXGN::Transcript::Unigene->new_lite_unigene($dbh, $id)
105 Desc: instantiateds a "light" unigene object, meaning one
106 that has not all the accessors populated.
107 List of accessors that can be used on a light object:
109 The purpose of this object is to have a faster unigene
118 sub new_lite_unigene
{
122 my $self = $class->SUPER::new
($dbh);
124 #if ($id=~/SGN-*U(\d+)/i) {
129 $self->set_unigene_id($id);
130 $self->fetch_lite($id);
131 #if (!$self->get_unigene_id()) {
140 my $query = "SELECT unigene_id, unigene_build_id, nr_members, build_nr, database_name, sequence_name, status FROM sgn.unigene LEFT JOIN sgn.unigene_build using(unigene_build_id) WHERE unigene_id=?";
141 my $sth = $self->get_dbh()->prepare($query);
142 $sth->execute($self->get_unigene_id());
143 my ($unigene_id, $unigene_build_id, $nr_members, $build_nr, $database_name, $sequence_name, $status) = $sth->fetchrow_array();
144 $self->set_unigene_id($unigene_id);
145 $self->set_build_id($unigene_build_id);
146 $self->set_build_nr($build_nr);
147 $self->set_nr_members($nr_members);
148 $self->set_alternate_namespace($database_name);
149 $self->set_alternate_identifier($sequence_name);
150 $self->set_status($status);
153 # deprecated. This should be in UnigeneBuild.
155 sub get_unigene_ids_by_build_id
{
157 my $build_id = shift;
158 my $query = "SELECT unigene_id FROM sgn.unigene WHERE unigene_build_id=? ORDER BY unigene_id";
159 my $sth = $dbh->prepare($query);
160 $sth->execute($build_id);
161 my @unigene_ids = ();
162 while (my ($unigene_id) = $sth->fetchrow_array()){
163 push @unigene_ids, $unigene_id;
170 my $query = "SELECT unigene_id, unigene_build_id, nr_members, build_nr, seq, qscores, database_name, sequence_name FROM
171 sgn.unigene LEFT JOIN sgn.unigene_consensi using(consensi_id) LEFT JOIN sgn.unigene_build using(unigene_build_id) WHERE
173 my $sth = $self->get_dbh()->prepare($query);
174 $sth->execute($self->get_unigene_id());
175 my ($unigene_id, $unigene_build_id, $nr_members, $build_nr, $seq, $qscores, $database_name, $sequence_name) =
176 $sth->fetchrow_array();
177 $self->set_unigene_id($unigene_id);
178 $self->set_build_id($unigene_build_id);
179 $self->set_build_nr($build_nr);
180 $self->set_nr_members($nr_members);
181 $self->set_sequence($seq);
182 $self->set_scores($qscores);
183 $self->set_alternate_namespace($database_name);
184 $self->set_alternate_identifier($sequence_name);
186 # if it is a singleton unigene, fetch the sequence data from the
187 # est table, using trimming information in the qc_report table.
188 # (substring(est.seq FROM qc_report.hqi_start::int FOR qc_report.hqi_length::int)) as trimmed
189 if ($nr_members ==1) {
190 my $est_q = "SELECT est.seq as raw, est.qscore, qc_report.hqi_start, hqi_length
192 FROM sgn.unigene_member JOIN sgn.est USING (est_id)
193 LEFT JOIN sgn.qc_report ON (est.est_id=qc_report.est_id)
194 WHERE sgn.unigene_member.unigene_id=?";
195 my $est_h = $self->get_dbh()->prepare($est_q);
196 $est_h->execute($self->get_unigene_id());
197 my ($raw, $scores, $hqi_start, $hqi_length) = $est_h->fetchrow_array();
198 $self->set_sequence( $raw );
200 if( defined $hqi_start && defined $hqi_length ) {
201 # trim both sequence and scores...
203 my $trimmed = substr($raw, $hqi_start, $hqi_length);
204 $self->set_sequence($trimmed);
206 if( defined $scores ) {
207 my @scores = split /\s+/, $scores;
208 my $score_string = join " ", (@scores[$hqi_start..($hqi_length+$hqi_start)]);
210 #print STDERR "length raw: ".length($raw)." START: $hqi_start LENGTH: $hqi_length\n";
211 $self->set_scores($score_string);
216 # get the estscan predicted peptide and cds sequences
218 my $estscan_q = "SELECT cds_id, seq_text, protein_seq, forward_reverse, run_id, score
219 FROM sgn.cds WHERE unigene_id=?";
220 my $estscan_h = $self->get_dbh()->prepare($estscan_q);
221 $estscan_h->execute($self->get_unigene_id());
222 my ($cds_id, $seq_text, $protein_seq, $forward_reverse, $run_id, $score) =
223 $estscan_h->fetchrow_array();
225 $self->set_estscan_protein($protein_seq);
226 $self->set_estscan_cds($seq_text);
227 $self->set_estscan_direction($forward_reverse);
230 =head2 get_unigene_id, set_unigene_id
232 Usage: $id = $unigene->get_unigene_id()
233 Property: the unique id of the unigene as an int.
234 see get_sgn_id for obtaining a formatted
241 return $self->{unigene_id
};
246 $self->{unigene_id
}=shift;
250 =head2 get_status, set_status
252 Usage: $status = $unigene->get_status()
253 Property: the status of the unique as a single character .
262 return $self->{status
};
267 $self->{status
}=shift;
270 =head2 function get_sgn_id
272 Usage: my $id = $unigene->get_sgn_id()
273 Desc: returns the unigene id formatted in the
274 standard sgn way, eg, SGN-U222222
275 to set the id, use the set_unigene_id
282 return "SGN-U".$self->{unigene_id
};
285 =head2 accessors get_build_id, set_build_id
288 Property: the build_id of the unigene. This id can
289 be used to instantiate a unigene_build object.
299 return $self->{build_id
};
305 $self->{build_id
}=shift;
308 =head2 get_unigene_build
310 Usage: my $unigene_build = $u->get_unigene_build();
311 Desc: returns a L<CXGN::Transcript::UnigeneBuild> object
312 corresponding to the build of this unigene.
313 Ret: a L<CXGN::Transcript::UnigeneBuild> object
320 sub get_unigene_build
{
322 return my $unigene_build = CXGN
::Transcript
::UnigeneBuild
->new($self->get_dbh(), $self->get_build_id());
327 =head2 get_build_nr, set_build_nr
329 Usage: my $build_nr = $u->get_build_nr();
330 Desc: the build nr is a counter of how many builds
331 have been created for a given set of input
332 data. Not to be confused with the unigene_build_id.
333 Ret: the build_nr, and integer.
342 return $self->{build_nr
};
349 $self->{build_nr
}=shift;
356 =head2 accessors get_sequence, set_sequence
358 Usage: my $seq = $unigene->get_sequence()
359 Desc: returns the DNA sequence of the unigene.
360 if the unigene is a contig, returns the sequence
361 of the contig, and if the unigene is a singleton,
362 returns the trimmed est sequence.
368 return $self->{sequence
};
374 $self->{sequence
}=shift;
377 =head2 accessors get_scores, set_scores
379 Usage: my @scores = $unigene->get_scores()
380 Desc: returns a list of score values for each
381 nucleotide position in the unigene.
387 return $self->{scores
};
393 $self->{scores
}=shift;
396 =head2 accessors get_nr_members, set_nr_members
398 Usage: my $member_count = $unigene->get_nr_members();
399 Desc: returns the number of member sequences (ESTs)
400 that compose the unigene.
406 return $self->{nr_members
};
412 $self->{nr_members
}=shift;
415 =head2 add_est_member
420 Args: $est - a CXGN::Transcript::EST object
426 Side Effects: modifies the database in realtime.
433 my $est_object = shift;
438 my $query = "INSERT INTO sgn.unigene_member (unigene_id, est_id, start, end, qstart, qend) VALUES (?,?,?,?,?,?)";
439 my $sth = $self->get_dbh()->prepare($query);
441 $self->get_unigene_id(),
442 $est_object->get_est_id(),
453 =head2 function get_member_est_ids
455 Usage: my @est_ids = $unigene->get_member_est_ids()
456 Desc: returns the ids of member ids as a list
457 Side Effects: accesses the database
461 sub get_member_est_ids
{
463 my $query = "SELECT est_id FROM sgn.unigene_member WHERE unigene_id=?";
464 my $sth = $self->get_dbh()->prepare($query);
465 $sth->execute($self->get_unigene_id());
467 while (my ($est_id) = $sth->fetchrow_array()) {
468 push @est_ids, $est_id;
473 =head2 get_member_ests
475 Usage: my @est_obj = $unigene->get_member_ests()
476 Desc: returns the member ests as a list of
477 CXGN::Transcript::EST objects
478 Ret: a list of CXGN::Transcript::EST objects
480 Side Effects: accesses the database
484 sub get_member_ests
{
486 my @est_ids = $self->get_member_est_ids();
488 foreach my $est_id (@est_ids) {
489 push @ests, CXGN
::Transcript
::EST
->new($self->get_dbh(), $est_id);
494 =head2 get_est_align_coords
497 Desc: returns $start, $stop, $qstart, $qend, $dir
498 for the alignment of a member sequence
506 sub get_est_align_coords
{
510 my $query = "SELECT start, stop, qstart, qend, dir
511 FROM sgn.unigene LEFT JOIN sgn.unigene_member USING (unigene_id)
512 WHERE sgn.unigene.unigene_id=? AND est_id=? ";
513 my $sth = $self->get_dbh()->prepare($query);
514 $sth->execute($self->get_unigene_id(), $est_id);
516 my ($start, $stop, $qstart, $qend, $dir) = $sth->fetchrow_array();
517 # print STDERR "**** EST: $start,$stop, $qstart, $qend, $dir\n";
518 return ($start, $stop, $qstart, $qend, $dir);
522 =head2 get_manual_annotations
524 Usage: my @a = $u->get_manual_annotations();
525 Desc: returns the manual annotations for this unigene.
526 Ret: a list of lists, with the following columns:
529 3. annotation last modified
539 sub get_manual_annotations
{
541 my $query = "SELECT sgn_people.sp_person.first_name || ' ' || sgn_people.sp_person.last_name,
542 manual_annotations.date_entered,
543 manual_annotations.last_modified,
544 manual_annotations.annotation_text,
548 LEFT JOIN sgn.unigene_member USING (unigene_id)
549 LEFT JOIN sgn.est USING (est_id)
550 LEFT JOIN sgn.seqread USING (read_id)
551 LEFT JOIN sgn.clone USING (clone_id)
552 LEFT JOIN sgn.manual_annotations ON (clone.clone_id = manual_annotations.annotation_target_id)
553 LEFT JOIN sgn_people.sp_person ON (manual_annotations.author_id = sgn_people.sp_person.sp_person_id)
554 LEFT JOIN sgn.annotation_target_type ON (manual_annotations.annotation_target_type_id = annotation_target_type.annotation_target_type_id)
555 WHERE sgn.unigene.unigene_id=?
556 AND sgn.annotation_target_type.type_name='clone'
558 my $sth = $self->get_dbh()->prepare($query);
559 $sth->execute($self->get_unigene_id());
561 while (my @data = $sth->fetchrow_array()) {
568 =head2 accessors get_alternate_namespace, set_alternate_namespace
570 Usage: my $db = $unigene->get_alternate_namespace()
572 Property: each unigene can have an alternate namespace associate with
573 it, which is stored in the sgn.unigene.database_name field.
574 the get_alternate_identifier() accessor gets the associated
575 identifer, stored in the sgn.unigene.sequence_name field.
581 sub get_alternate_namespace
{
583 return $self->{alternate_namespace
};
586 sub set_alternate_namespace
{
588 $self->{alternate_namespace
} = shift;
590 =head2 accessors get_alternate_identifier, set_alternate_identifier
592 Usage: my $alternate = $u->get_alternate_namespace().$u->get_alterante_identifer
593 Desc: gets the alternate identifier. Currently used to store legacy
594 identifiers for coffee unigenes.
601 sub get_alternate_identifier
{
603 return $self->{alternate_identifier
};
606 sub set_alternate_identifier
{
608 $self->{alternate_identifier
} = shift;
613 =head2 function get_arabidopsis_annotations
615 Usage: @annotations = $unigene->get_arabidopsis_annotations(1e-6)
616 Desc: gets the blast-based annotation against arabidopsis
617 Ret: a list of lists of annotations, sorted by score descending.
618 The list of list consists of the following headers:
628 Args: an optional evalue cutoff. Default is 1.
629 Note: the corresponding setter is not implemented.
635 sub get_arabidopsis_annotations
{
637 my $evalue_cutoff = shift || 1;
638 return $self->get_annotations(2, $evalue_cutoff);
641 sub set_arabidopsis_annotations
{
646 =head2 function get_annotation_string
648 Usage: print $unigene->get_annotation_string(1e-10, 200);
649 Desc: Concats the arabidopsis, then genbank deflines with ';'
650 Ret: A string representing the unigene annotation
651 Args: (float) The max evalue for match
652 (int) limit characters of annotation, unlimited by default.
653 Will only cut-off middle of annotation item if it is the
654 first item, otherwise the item is not added
658 sub get_annotation_string
{
660 my ($evalue, $limit) = @_;
661 my @annotations = ();
662 foreach($self->get_arabidopsis_annotations($evalue), $self->get_genbank_annotations($evalue)) {
663 push(@annotations, pop @
$_);
665 my $annotation_string = "";
666 foreach my $annot (@annotations){
667 if(defined($limit) && (length($annotation_string) + length("; $annot")) > $limit){
668 unless($annotation_string){
669 $annotation_string = substr($annot, 0, $limit);
673 $annotation_string .= "; " if($annotation_string);
674 $annotation_string .= $annot;
676 return $annotation_string;
679 =head2 function get_genbank_annotations
681 Usage: my @annotations = $unigene->get_genbank_annotation(1e-7)
682 Desc: retrieves the genbank blast annotations from the db.
683 Ret: a list of lists, see get_arabidopsis_annotations().
690 sub get_genbank_annotations
{
692 my $evalue_cutoff = shift || 1;
693 return $self->get_annotations(1, $evalue_cutoff);
697 sub set_genbank_annotations
{
702 =head2 function get_annotations
707 Args: the blast annotation target database id
708 which is 1 for genbank nr
716 sub get_annotations
{
718 my $blast_target_id = shift;
719 my $evalue_cutoff = shift || 1;
720 my $query = "SELECT blast_annotations.blast_target_id, blast_hits.target_db_id, evalue, score, identity_percentage, apply_start, apply_end, defline FROM sgn.blast_annotations JOIN sgn.blast_hits using(blast_annotation_id) JOIN sgn.blast_defline USING(defline_id) WHERE apply_id=? AND apply_type=15 AND sgn.blast_annotations.blast_target_id=? AND evalue<? ORDER BY score desc";
721 my $sth = $self->get_dbh()->prepare($query);
722 $sth->execute($self->get_unigene_id(), $blast_target_id, $evalue_cutoff);
724 my @annotations = ();
725 while (my @data = $sth->fetchrow_array()) {
726 push @annotations, \
@data;
734 =head2 function get_microarray_info
736 Usage: my @microarray_info = $u->get_microarray_info()
737 Desc: returns information on whether and where this
738 unigene has representation on a microarray.
739 Ret: returns a list of hashrefs with the following keys:
753 sub get_microarray_info
{
758 my $query = "SELECT clone.clone_id as clone_id, est.est_id as est_id, seqread.direction as direction,
759 chip_name, release, microarray.version as version, spot_id,
762 LEFT JOIN sgn.unigene_member USING (unigene_id)
763 LEFT JOIN sgn.est USING (est_id)
764 LEFT JOIN sgn.seqread using (read_id)
765 LEFT JOIN sgn.clone using (clone_id)
766 INNER JOIN sgn.microarray using (clone_id)
767 WHERE sgn.unigene.unigene_id=?
768 ORDER BY sgn.clone.clone_id";
769 my $sth = $self->get_dbh()->prepare($query);
770 $sth->execute($self->get_unigene_id());
772 while (my $hashref = $sth->fetchrow_hashref()) {
773 push @answer, $hashref;
779 =head2 get_mapped_members
781 Usage: my @mapped = $u->get_mapped_members();
782 Desc: returns information on the mapping of
784 Ret: a list of hashrefs with the following keys:
793 sub get_mapped_members
{
797 ests_mapped_by_clone.clone_id as clone_id,
801 INNER JOIN sgn.est USING (est_id)
802 INNER JOIN sgn.seqread USING (read_id)
803 INNER JOIN sgn.ests_mapped_by_clone USING (clone_id)
807 my $sth = $self->get_dbh()->prepare($query);
808 $sth->execute($self->get_unigene_id());
809 my @clone_marker_list = ();
810 while (my $data =$sth->fetchrow_hashref()) {
811 push @clone_marker_list, $data;
813 return @clone_marker_list;
816 =head2 get_cosii_info
830 my $sth=$self->get_dbh()->prepare("select marker_id,alias from sgn.cosii_ortholog inner join sgn.marker using (marker_id) inner join sgn.marker_alias using (marker_id) where preferred='t' and (unigene_id=?)");
831 $sth->execute($self->get_unigene_id());
834 while(my ($marker_id,$marker_name)=$sth->fetchrow_array())
836 push @cosii_data, [$marker_id, $marker_name];
841 ###########################################
842 #end COSII marker section inserted by john.
848 =head2 get_estscan_cds, set_estscan_cds
859 sub get_estscan_cds
{
861 return $self->{estscan_cds
};
865 sub set_estscan_cds
{
867 $self->{estscan_cds
}=shift;
870 =head2 get_estscan_protein, set_estscan_protein
881 sub get_estscan_protein
{
883 return $self->{estscan_protein
};
887 sub set_estscan_protein
{
889 $self->{estscan_protein
}=shift;
892 =head2 get_estscan_direction
903 sub get_estscan_direction
{
905 return $self->{estscan_direction
};
909 sub set_estscan_direction
{
911 $self->{estscan_direction
}=shift;
915 =head2 get_cds_list()
917 Usage: my @cds = $unigene->get_cds_list()
918 Desc: returns the associated CDS objects, which
919 contain cds and protein information
920 see L<CXGN::Transcript::CDS> for more info.
921 Ret: returns a list of CXGN::Transcript::CDS objects
923 Side Effects: accesses the database.
929 my $query = "SELECT cds_id FROM sgn.cds WHERE unigene_id=?";
930 my $sth = $self->get_dbh()->prepare($query);
931 $sth->execute($self->get_unigene_id());
933 while (my ($cds_id) = $sth->fetchrow_array()) {
934 push @cds_ids, CXGN
::Transcript
::CDS
->new($self->get_dbh(), $cds_id);
939 =head2 function gene_ontology_annotations
941 Usage: my @go_annots = $u->gene_ontology_anntations()
942 Desc: returns go annotation information for this unigene
943 Ret: a list of lists, with the following columns:
947 Side Effects: accesses the database.
952 sub gene_ontology_annotations
{
955 my $query = " SELECT g.go_accession, g.description
956 FROM sgn.domain_match AS dm,
959 sgn.interpro_go AS ig,
961 WHERE dm.domain_id = d.domain_id
962 AND d.interpro_id = i.interpro_id
963 AND i.interpro_accession = ig.interpro_accession
964 AND ig.go_accession = g.go_accession
965 AND dm.hit_status = 'T'
966 AND dm.unigene_id=? ";
968 my $sth = $self->get_dbh()->prepare($query);
969 $sth->execute($self->get_unigene_id);
971 my @go_annotations = ();
972 while (my ($go_accession, $go_description) = $sth->fetchrow_array()) {
973 push @go_annotations, [$go_accession, $go_description];
975 return @go_annotations;
982 Usage: my @family_data = $unigene->get_families();
984 Ret: a list of arrayrefs that contain family_id, i_value,
985 family_annotation, and member_count.
987 Side Effects: accesses da database.
997 # $family_q = $dbh->prepare(" SELECT i_value, family.family_id, family_annotation, status
998 # FROM sgn.family_build
999 # INNER JOIN sgn.family USING (family_build_id)
1000 # INNER JOIN sgn.family_member USING (family_id)
1001 # INNER JOIN cds USING (cds_id)
1002 # WHERE unigene_id = ?
1005 # $family_member_q = $dbh->prepare(" SELECT count(family_member_id)
1006 # FROM family_member
1007 # WHERE family_id = ?
1008 # GROUP BY family_id ");
1009 ######################################################
1011 my $family_group_h = $self->get_dbh()->prepare(" SELECT max(group_id)
1012 FROM sgn.family_build
1013 INNER JOIN sgn.family USING (family_build_id)
1014 INNER JOIN sgn.family_member USING (family_id)
1015 INNER JOIN sgn.cds USING (cds_id)
1016 WHERE unigene_id=? ");
1017 $family_group_h->execute($self->get_unigene_id());
1018 my ($group_id) = $family_group_h->fetchrow_array();
1022 my $family_q = $self->get_dbh()->prepare(" SELECT sgn.family.family_id, i_value, family_annotation
1023 FROM $sgn.family_build
1024 JOIN $sgn.family USING (family_build_id)
1025 JOIN $sgn.family_member USING (family_id)
1026 JOIN $sgn.cds USING(cds_id)
1027 WHERE group_id=? AND
1029 --AND $sgn.family_build.status='C' --
1030 GROUP BY family_id, i_value, family_annotation
1031 ORDER BY family_id" );
1033 $family_q ->execute($group_id, $self->get_unigene_id());
1034 my @family_data = ();
1037 while (my ($family_id, $i_value, $family_annotation, $member_count) = $family_q->fetchrow_array()) {
1038 my $member_count_q = $self->get_dbh()->prepare("SELECT count(*) from $sgn.family_member WHERE family_id=?");
1039 $member_count_q->execute($family_id);
1040 my ($member_count) = $member_count_q->fetchrow_array();
1041 push @family_data, [$family_id, $i_value, $family_annotation, $member_count];
1043 return @family_data;
1047 =head2 get_current_unigene_ids
1049 Usage: my @current_unigene_ids = $unigene_build->get_current_unigene()
1050 Desc: returns the unigene ids of the current unigene build that share
1051 ESTs with the given unigene id.
1052 Ret: a list of unigene ids
1054 Side Effects: accesses the database
1058 sub get_current_unigene_ids
{
1060 #If the build is deprecated, run this query to find the updated unigene(s)
1061 my $unigene_updatedq = $self->get_dbh->prepare
1063 SELECT distinct unigene_id FROM sgn.unigene_member
1064 JOIN sgn.unigene USING (unigene_id)
1065 JOIN sgn.unigene_build USING (unigene_build_id)
1068 (select est_id FROM unigene_member WHERE unigene_id=?)
1070 AND unigene_build_id =
1071 ( SELECT latest_build_id FROM unigene_build
1072 WHERE unigene_build_id =
1073 ( SELECT unigene_build_id FROM unigene
1079 my $unigene_id = $self->get_unigene_id();
1081 $unigene_updatedq ->execute($unigene_id, $unigene_id);
1082 my @unigene_ids = ();
1083 while (my ($updated_id) = $unigene_updatedq->fetchrow_array()) {
1084 push @unigene_ids, $updated_id;
1086 return @unigene_ids;
1089 =head2 get_preceding_unigene_ids
1091 Usage: my @preceding_unigene_ids = $unigene_build->get_preceding_unigene_ids($unigene_id)
1092 Desc: returns the preceding unigene ids. A list because
1093 sometimes unigenes are merged.
1094 Ret: a list of unigene ids from the previous build.
1101 sub get_preceding_unigene_ids
{
1104 #Find the preceding unigene(s) if there is a preceding build
1105 my $unigene_precededq = $self->get_dbh()->prepare
1107 SELECT distinct unigene_id FROM sgn.unigene_member
1108 JOIN sgn.unigene USING (unigene_id)
1109 JOIN sgn.unigene_build USING (unigene_build_id)
1110 WHERE unigene_id != ? AND
1112 (select est_id FROM sgn.unigene_member WHERE unigene_id=?)
1113 AND unigene_build_id =
1114 ( SELECT unigene_build_id FROM sgn.unigene_build
1115 WHERE next_build_id =
1116 ( SELECT unigene_build_id FROM sgn.unigene
1122 $unigene_precededq->execute($self->get_unigene_id(), $self->get_unigene_id(), $self->get_unigene_id());
1123 my @unigene_ids = ();
1124 while (my ($old_id) = $unigene_precededq->fetchrow_array()) {
1125 if ($old_id) { push @unigene_ids, $old_id; }
1127 return @unigene_ids;
1131 =head2 superseding_build_info
1133 Usage: my ($superseding_build_name, $build_nr) =
1134 $unigene->superseding_build_info();
1135 Desc: gets the build name and the build_nr (not the build_id!)
1136 for the current unigene.
1137 Side Effects: Accesses the database.
1141 sub superseding_build_info
{
1144 #Find superseding build name, given unigene_id
1145 my $sth = $self->get_dbh->prepare
1147 SELECT groups.comment, build_nr FROM sgn.unigene_build
1148 JOIN sgn.groups ON (organism_group_id=group_id)
1151 ( SELECT latest_build_id FROM sgn.unigene_build
1154 ( SELECT unigene_build_id FROM sgn.unigene
1160 $sth->execute($self->get_unigene_id());
1161 my ($superseding_build_name, $build_nr) = $sth->fetchrow_array();
1162 return ($superseding_build_name, $build_nr);
1167 =head2 get_unigene_member_count_in_library
1169 Usage: my $count = $u->get_unigene_member_count_in_library($library_id)
1170 Desc: gives the number of ESTs that are present in this
1171 unigene from library $library_id
1172 Ret: the number of ESTs, an integer.
1173 Args: the library_id
1177 sub get_unigene_member_count_in_library
{
1179 my $library_id = shift;
1180 my $query = "SELECT count(*) FROM sgn.unigene
1181 LEFT JOIN sgn.unigene_member USING (unigene_id)
1182 LEFT JOIN sgn.est USING (est_id)
1183 LEFT JOIN sgn.seqread USING (read_id)
1184 LEFT JOIN sgn.clone USING (clone_id)
1185 LEFT JOIN sgn.library USING (library_id)
1186 WHERE sgn.unigene.unigene_id=? AND sgn.library.library_id=?";
1188 my $sth = $self->get_dbh()->prepare($query);
1189 $sth->execute($self->get_unigene_id(), $library_id);
1190 my ($count) = $sth->fetchrow_array();
1194 =head2 function get_member_library_ids
1196 Usage: my @library_ids = $unigene->get_member_library_ids()
1197 Desc: returns a list of library ids that are the source of
1198 the member ests of this unigene, sorted by the descending
1199 number of member sequences (descending)
1207 sub get_member_library_ids
{
1210 my $query = "SELECT library_id, count(*) as c FROM sgn.unigene_member
1211 JOIN sgn.est USING(est_id)
1212 JOIN sgn.seqread USING(read_id)
1213 JOIN sgn.clone using(clone_id)
1217 my $sth = $self->get_dbh()->prepare($query);
1218 $sth->execute($self->get_unigene_id());
1219 my @library_ids = ();
1220 while (my ($library_id, $count) = $sth->fetchrow_array()) {
1221 push @library_ids, $library_id;
1223 return @library_ids;
1226 =head2 get_associated_loci
1228 Usage: my @associated_loci = $unigene->get_associated_loci($obsolete);
1229 Desc: gets all the loci that are associated with this unigene
1230 Ret: a CXGN::Phenome::Locus object
1231 Args: $obsolete ['f' or 't' optional] passing this arg will fetch only the non-obsolete links, or all the obsolete
1232 unigene-locus associations (may be useful for an unobsolete function)
1233 Side Effects: accesses the database
1237 sub get_associated_loci
{
1238 my ( $self, $obs ) = @_;
1240 my $query = "SELECT locus_id FROM phenome.locus_unigene WHERE unigene_id=?";
1241 my @bind = ( $self->get_unigene_id );
1243 if( defined $obs ) {
1245 $obs eq 't' || $obs eq 'f'
1246 or croak
"obsolete must be either 't' or 'f' if passed";
1248 $query .= " AND phenome.locus_unigene.obsolete = ?";
1252 my $sth = $self->get_dbh()->prepare_cached($query);
1253 $sth->execute(@bind);
1256 while (my ($locus_id) = $sth->fetchrow_array()) {
1257 push @loci, CXGN
::Phenome
::Locus
->new($self->get_dbh(), $locus_id);
1262 =head2 function get_preferred_protein
1264 Usage: my $cds_id = $u->get_preferred_protein()
1265 Desc: selects the so-called preferred protein among
1266 all the protein predictions for this unigene.
1267 The preferred protein is set using the function
1268 determine_and_set_preferred_protein().
1269 Ret: the cds_id of the preferred protein prediction.
1276 sub get_preferred_protein
{
1278 my $query = "SELECT cds_id FROM cds WHERE unigene_id=? and preferred='t'";
1280 my $sth = $self->get_dbh()->prepare($query);
1282 $sth->execute($self->get_unigene_id());
1284 my ($cds_id) = $sth->fetchrow_array();
1290 =head2 function determine_and_set_preferred_protein
1292 Usage: my $unigene->determine_and_set_preferred_protein()
1293 Desc: determines which of the predicted proteins associated with
1294 the unigene is the best one. Uses a simple metric. First,
1295 the longest protein is considered the best one. Then, each
1296 protein is considered sorted by its length, and the first
1297 to match the direction to the best blastmatch is selected
1298 as the preferred protein. If there is only one associated
1299 protein, it will be selected as the preferred one.
1302 Side Effects: modifies the database.
1303 first, sets all the preferred flats of the associated cds to 'f',
1304 then sets the preferred flag of the "best" cds/protein to 't'.
1309 sub determine_and_set_preferred_protein
{
1311 my $query = "SELECT cds_id FROM cds WHERE unigene_id=? ORDER BY length(cds.protein_seq)";
1313 my $sth = $self->get_dbh()->prepare($query);
1315 $sth->execute($self->get_unigene_id());
1318 while (my ($cds_id) = $sth->fetchrow_array()) {
1319 push @cds, CXGN
::Transcript
::CDS
->new ($self->get_dbh(), $cds_id);
1323 # change all preferred statuses to false
1324 foreach my $c (@cds) {
1325 $c->set_preferred(0);
1329 if (! @cds) { return ; }
1331 # we only have one predicted protein. it's the preferred one by default!
1332 $cds[0]->set_preferred(1);
1336 # verify that the direction is the same as for the best hits of some blast targets.
1337 my @arabidopsis_hits = $self->get_arabidopsis_annotations();
1338 my @genbank_hits = $self ->get_genbank_annotations();
1340 my $best_genbank_blast_hit_direction = "";
1341 my $best_arabidopsis_blast_hit_direction= "";
1343 if (@arabidopsis_hits) {
1344 my $dir = $arabidopsis_hits[0]->[6] - $arabidopsis_hits[0]->[5];
1346 $best_arabidopsis_blast_hit_direction="R";
1348 else { $best_arabidopsis_blast_hit_direction = "F"; }
1352 if (@genbank_hits) {
1353 my $dir = $genbank_hits[0]->[6] - $genbank_hits[0]->[5];
1355 $best_genbank_blast_hit_direction = "R";
1358 $best_genbank_blast_hit_direction = "F";
1362 my $have_preferred = 0;
1363 foreach my $c (@cds) {
1364 if ($best_genbank_blast_hit_direction) {
1365 if ($c->get_direction() eq $best_genbank_blast_hit_direction) {
1366 $c->set_preferred(1);
1368 $have_preferred = 1;
1372 if ($best_arabidopsis_blast_hit_direction) {
1373 if ($c->get_direction() eq $best_arabidopsis_blast_hit_direction) {
1374 $c->set_preferred(1);
1382 if (!$have_preferred) {
1383 $cds[0]->set_preferred(1);
1388 foreach my $c (@cds) {
1389 if ($c->get_preferred()) {
1399 =head2 function get_unigene_member_image
1401 Usage: my $html = $unigene->get_unigene_member_image(@ests_to_be_hilited);
1402 Desc: returns an image tag and associated html for
1403 displaying the unigene member image. This image is currently
1404 being created using an external C program which this
1407 Args: a listref of EST ids to be highlighted in the image
1408 a boolean to force re-load the image
1414 sub get_unigene_member_image
{
1416 my $highlight_ref = shift; # the est ids to be highlighted on the overview
1417 my $force_image = shift;
1419 my $unigene_id = $self->get_unigene_id();
1421 if ($highlight_ref) {
1422 @highlight = @
$highlight_ref;
1424 my @members = $self->get_member_ests();
1426 my $highlight_link = ""; # not sure what that is for...
1427 my $alignment_content;
1428 my $nr_members = $self->get_nr_members();
1429 my $cache = CXGN
::Tools
::WebImageCache
->new();
1430 $cache->set_force($force_image);
1431 if ( ($nr_members > 1 && $nr_members < 20) || $force_image ) {
1432 $cache->set_key("unigene_image-$unigene_id-".join(",", @highlight));
1433 $cache->set_expiration_time(86400); # seconds, this would be a day.
1434 $cache->set_map_name("contigmap_SGN-U".$self->get_unigene_id()); # what's in the <map name='map_name' tag.
1435 my $vh = SGN
::Context
->new();
1436 my $temp_dir = $vh->get_conf("tempfiles_subdir");
1437 $cache->set_temp_dir(File
::Spec
->catfile($temp_dir, "unigene_images"));
1438 $cache->set_basedir($vh->get_conf("basepath"));
1440 if (!$cache->is_valid()) {
1441 # generate the image and associated image map.
1442 my $img_fullpath = $cache->get_image_path();
1443 my $map_fullpath = $cache->get_image_map_path();
1445 my $image_program = File
::Spec
->catfile($vh->get_conf('basepath'),
1446 $vh->get_conf('programs_subdir'),
1451 my $stuff="| $image_program --imagefile=\"$img_fullpath\" --mapfile=\"$map_fullpath\" --link_basename=\"/search/est.pl?request_from=1&request_type=7&request_id=\" --image_name=\"SGN-U$unigene_id\"";
1453 # print STDERR "Calling drawcontig_align: $stuff\n";
1455 open IMAGE_PROGRAM
,$stuff;
1456 foreach my $m ( @members ) {
1457 my $est_id = $m->get_est_id();
1458 my ($start, $end, $qstart, $qend, $dir) = $self->get_est_align_coords($est_id);
1460 if (grep (/^$est_id$/, @highlight)) {
1467 my ($strim, $etrim) = ($qstart- $start, $end - $qend);
1468 my $label = sprintf "%-12s %-10s","SGN-E".$m->get_est_id(),$m->get_clone_name();
1469 print IMAGE_PROGRAM
join( "\t", $label,
1470 $m->get_est_id(), $dir, $start, $end, $strim, $etrim, $highlight),"\n";
1473 or CXGN
::Apache
::Error
::notify
('failed to display unigene alignment image',"Non-zero exit code from unigene alignment imaging program $image_program ($?)");
1478 if ($nr_members == 1 || $nr_members > 20) {
1479 $hide_image = qq{<br
/>[<a href="/search
/unigene.pl?unigene_id=$unigene_id&force_image=0$highlight_link">Hide Image</a>]};
1482 $alignment_content = "<center>";
1483 $alignment_content .= $cache->get_image_html();
1485 $alignment_content .= <<EOF
1486 <br /><span class="ghosted">To view details for a particular member sequence, click the SGN-E# identifier.</span>$hide_image
1491 if ($nr_members == 1) {
1492 my $est_id = $members[0]->get_est_id();
1493 my $clone_name = $members[0]->get_clone_name();
1494 # Don't bother passing the highlight option around here -- there is only one EST
1495 $alignment_content = <<EOF;
1497 <span class="ghosted">Alignment image suppressed for unigene with only one aligned EST <a href="/search/est.pl?request_id=$est_id&request_type=7&request_from=X">SGN-E$est_id</a></span><br />
1498 [<a href="/search/unigene.pl?unigene_id=$unigene_id&force_image=1">Show Image</a>]
1503 # If a highlight option was passed in, pass it on...
1504 $alignment_content = <<EOF;
1506 <span class="ghosted">Alignment image suppressed for unigene with $nr_members aligned sequences.</span><br />
1507 [<a href="/search/unigene.pl?unigene_id=$unigene_id&force_image=1$highlight_link">Show Image</a>]
1516 return $alignment_content;
1520 =head2 add_dbxref_id
1522 Usage: my $id = $u->add_dbxref_id($dbxref_id)
1523 Desc: adds the dbxref $dbxref_id to the unigene $u.
1524 $u needs to be already stored into the database
1525 and have a valid unigene id for this function
1527 Ret: an id designating the new unigene-dbxref relationship
1528 Args: a dbxref_id [int]
1529 Side Effects: modifies the database.
1536 my $dbxref_id = shift;
1537 if ($self->unigene_dbxref_exists($dbxref_id) ) {
1538 print STDERR
" *** unigene-dbxref already exists for dbxref_id $dbxref_id and unigene_id ". $self->get_unigene_id() . " \n";
1541 my $query = "INSERT INTO public.unigene_dbxref (unigene_id, dbxref_id) VALUES (?, ?)";
1542 my $sth = $self->get_dbh()->prepare($query);
1543 $sth->execute($self->get_unigene_id(), $dbxref_id);
1544 my $id = $self->get_currval("public.unigene_dbxref_unigene_dbxref_id_seq");
1548 =head2 unigene_dbxref_exists
1550 Usage: my $exists= $self->unigene_dbxref_exists($dbxref_id)
1551 Desc: check if unigene-dbxref connection already exists in the database
1552 Use this function before storing a new unigene_dbxref! ($self->add_dbxref_id() )
1553 Ret: database id or undef
1555 Side Effects: accesses the database
1560 sub unigene_dbxref_exists
{
1562 my $dbxref_id=shift;
1563 my $query= "SELECT unigene_dbxref_id FROM public.unigene_dbxref
1564 WHERE dbxref_id=? AND unigene_id= ?";
1565 my $sth=$self->get_dbh()->prepare($query);
1566 $sth->execute($dbxref_id, $self->get_unigene_id() ) ;
1567 my ($id) = $sth->fetchrow_array();
1568 if ($id) { return $id ; }
1569 else { return undef ; }
1572 =head2 get_dbxref_ids
1574 Usage: my @dbxref_ids = $u -> get_dbxref_ids()
1575 Desc: returns a list of dbxref_ids that are associated
1579 Side Effects: accesses the database
1584 sub get_dbxref_ids
{
1586 my $query = "SELECT dbxref_id FROM unigene_dbxref WHERE unigene_id=?";
1587 my $sth = $self->get_dbh()->prepare($query);
1588 $sth->execute($self->get_unigene_id());
1589 my @dbxref_ids = ();
1590 while (my ($dbxref_id) = $sth->fetchrow_array()) {
1591 push @dbxref_ids, $dbxref_id;
1598 Usage: my @dbxrefs = $u -> get_dbxrefs()
1599 Desc: returns a list of dbxref objects that
1600 are associated with the unigene
1601 Ret: a list of CXGN::Chado::Dbxref objects
1603 Side Effects: accesses the database
1611 foreach my $id ($self->get_dbxref_ids() ) {
1612 push @dbxrefs, CXGN
::Chado
::Dbxref
->new($self->get_dbh(), $id);
1617 =head2 create_unigene_dbxref_schema
1619 Usage: CXGN::Transcript::Unigene::create_unigene_dbxref_schema($dbh);
1620 Desc: creates the unigene_dbxref table in the public schema
1628 sub create_unigene_dbxref_schema
{
1631 my $query = "CREATE TABLE public.unigene_dbxref (
1632 unigene_dbxref_id serial primary key not null,
1633 unigene_id bigint not null references sgn.unigene,
1634 dbxref_id bigint not null references public.dbxref