2 # BioPerl module for Bio::Cluster::UniGene.pm
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Andrew Macgregor <andrew at cbbc.murdoch.edu.au>
8 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
9 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
10 # http://meg.otago.ac.nz/
12 # You may distribute this module under the same terms as perl itself
15 # April 17, 2002 - Initial implementation by Andrew Macgregor
16 # POD documentation - main docs before the code
20 Bio::Cluster::UniGene - UniGene object
24 use Bio::Cluster::UniGene;
27 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
28 '-format' => "unigene");
29 # note: we quote -format to keep older perl's from complaining.
31 while ( my $in = $stream->next_cluster() ) {
32 print $in->unigene_id() . "\n";
33 while ( my $sequence = $in->next_seq() ) {
34 print $sequence->accession_number() . "\n";
40 This UniGene object implements the L<Bio::Cluster::UniGeneI> interface
41 for the representation if UniGene clusters in Bioperl. It is returned
42 by the L<Bio::ClusterIO> parser for unigene format and contains all
43 the data associated with one UniGene record.
45 This class implements several interfaces and hence can be used
46 wherever instances of such interfaces are expected. In particular, the
47 interfaces are L<Bio::ClusterI> as the base interface for all cluster
48 representations, and in addition L<Bio::IdentifiableI> and
51 The following lists the UniGene specific methods that are available
52 (see below for details). Be aware next_XXX iterators take a snapshot
53 of the array property when they are first called, and this snapshot is
54 not reset until the iterator is exhausted. Hence, once called you need
55 to exhaust the iterator to see any changes that have been made to the
56 property in the meantime. You will usually want to use the
57 non-iterator equivalents and loop over the elements yourself.
59 new() - standard new call
61 unigene_id() - set/get unigene_id
63 title() - set/get title (description)
67 cytoband() - set/get cytoband
71 locuslink() - set/get locuslink
73 homol() - set/get homologene
75 gnm_terminus() - set/get gnm_terminus
77 scount() - set/get scount
79 express() - set/get express, currently takes/returns a reference to an
80 array of expressed tissues
82 next_express() - returns the next tissue expression from the expressed
85 chromosome() - set/get chromosome, currently takes/returns a reference
86 to an array of chromosome lines
88 next_chromosome() - returns the next chromosome line from the array of
91 sts() - set/get sts, currently takes/returns a reference to an array
94 next_sts() - returns the next sts line from the array of sts lines
96 txmap() - set/get txmap, currently takes/returns a reference to an
99 next_txmap() - returns the next txmap line from the array of txmap
102 protsim() - set/get protsim, currently takes/returns a reference to an
103 array of protsim lines
105 next_protsim() - returns the next protsim line from the array of
108 sequences() - set/get sequence, currently takes/returns a reference to
109 an array of references to seq info
111 next_seq() - returns a Seq object that currently only contains an
115 =head1 Implemented Interfaces
117 This class implementes the following interfaces.
121 =item Bio::Cluster::UniGeneI
123 This includes implementing Bio::ClusterI.
125 =item Bio::IdentifiableI
127 =item Bio::DescribableI
129 =item Bio::AnnotatableI
131 =item Bio::Factory::SequenceStreamI
140 User feedback is an integral part of the evolution of this and other
141 Bioperl modules. Send your comments and suggestions preferably to one
142 of the Bioperl mailing lists. Your participation is much appreciated.
144 bioperl-l@bioperl.org - General discussion
145 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
149 Please direct usage questions or support issues to the mailing list:
151 I<bioperl-l@bioperl.org>
153 rather than to the module maintainer directly. Many experienced and
154 reponsive experts will be able look at the problem and quickly
155 address it. Please include a thorough description of the problem
156 with code and data examples if at all possible.
158 =head2 Reporting Bugs
160 Report bugs to the Bioperl bug tracking system to help us keep track
161 the bugs and their resolution. Bug reports can be submitted via the
164 https://github.com/bioperl/bioperl-live/issues
166 =head1 AUTHOR - Andrew Macgregor
168 Email andrew at cbbc.murdoch.edu.au
172 Hilmar Lapp, hlapp at gmx.net
177 The rest of the documentation details each of the object
178 methods. Internal methods are usually preceded with a "_".
182 # Let the code begin...
185 package Bio
::Cluster
::UniGene
;
188 use Bio
::Annotation
::Collection
;
189 use Bio
::Annotation
::DBLink
;
190 use Bio
::Annotation
::SimpleValue
;
192 use Bio
::Seq
::SeqFactory
;
194 use base
qw(Bio::Root::Root Bio::Cluster::UniGeneI Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI Bio::Factory::SequenceStreamI);
197 'Aga' => "Anopheles gambiae",
198 'Ame' => "Apis mellifera",
199 'At' => "Arabidopsis thaliana",
200 'Bmo' => "Bombyx mori",
201 'Bt' => "Bos taurus",
202 'Cel' => "Caenorhabditis elegans",
203 'Cfa' => "Canine familiaris",
204 'Cin' => "Ciona intestinalis",
205 'Cre' => "Chlamydomonas reinhardtii",
206 'Csa' => "Ciona savignyi",
207 'Csi' => "Citrus sinensis",
208 'Ddi' => "Dictyostelium discoideum",
209 'Dr' => "Danio rerio",
210 'Dm' => "Drosophila melanogaster",
211 'Gga' => "Gallus gallus",
212 'Gma' => "Glycine max",
213 'Han' => "Helianthus annus",
214 'Hs' => "Homo sapiens",
215 'Hma' => "Hydra magnipapillata",
216 'Hv' => "Hordeum vulgare",
217 'Lco' => "Lotus corniculatus",
218 'Les' => "Lycopersicon esculentum",
219 'Lsa' => "Lactuca sativa",
220 'Mdo' => "Malus x domestica",
221 'Mgr' => "Magnaporthe grisea",
222 'Mm' => "Mus musculus",
223 'Mtr' => "Medicago truncatula",
224 'Ncr' => "Neurospora crassa",
225 'Oar' => "Ovis aries",
226 'Omy' => "Oncorhynchus mykiss",
227 'Os' => "Oryza sativa",
228 'Ola' => "Oryzias latipes",
229 'Ppa' => "Physcomitrella patens",
230 'Pta' => "Pinus taeda",
231 'Ptp' => "Populus tremula x Populus tremuloides",
232 'Rn' => "Rattus norvegicus",
233 'Sbi' => "Sorghum bicolor",
234 'Sma' => "Schistosoma mansoni",
235 'Sof' => "Saccharum officinarum",
236 'Spu' => "Strongylocentrotus purpuratus",
237 'Ssa' => "Salmo salar",
238 'Ssc' => "Sus scrofa",
239 'Str' => "Xenopus tropicalis",
240 'Stu' => "Solanum tuberosum",
241 'Ta' => "Triticum aestivum",
242 'Tgo' => "Toxoplasma gondii",
243 'Tru' => "Takifugu rubripes",
244 'Vvi' => "Vitis vinifera",
245 'Xl' => "Xenopus laevis",
253 Usage : used by ClusterIO
254 Returns : a new Bio::Cluster::Unigene object
259 # standard new call..
260 my($caller,@args) = @_;
261 my $self = $caller->SUPER::new
(@args);
263 my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
264 $self->_rearrange([qw(UNIGENE_ID
277 $self->{'_alphabet'} = 'dna';
279 $self->unigene_id($ugid) if $ugid;
280 $self->description($desc) if $desc;
281 $self->sequences($mems) if $mems;
282 $self->size($size) if defined($size);
283 $self->display_id($dispid) if $dispid; # overwrites ugid
284 $self->object_id($id) if $id; # overwrites dispid
285 $self->namespace($ns || 'UniGene');
286 $self->authority($auth || 'NCBI');
287 $self->version($v) if defined($v);
288 if( ! defined $seqfact ) {
289 $seqfact = Bio
::Seq
::SeqFactory
->new
290 (-verbose
=> $self->verbose(),
291 -type
=> 'Bio::Seq::RichSeq');
293 $self->sequence_factory($seqfact);
294 if( (! $species) && (defined $self->unigene_id() &&
295 $self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) {
296 # try set a default one depending on the ID
297 $species = $species_map{$1};
299 $self->species($species);
304 =head1 L<Bio::Cluster::UniGeneI> methods
311 Usage : unigene_id();
312 Function: Returns the unigene_id associated with the object.
313 Example : $id = $unigene->unigene_id or $unigene->unigene_id($id)
321 my ($obj,$value) = @_;
322 if( defined $value) {
323 $obj->{'unigene_id'} = $value;
325 return $obj->{'unigene_id'};
334 Function: Returns the title associated with the object.
335 Example : $title = $unigene->title or $unigene->title($title)
337 Args : None or a title
343 my ($obj,$value) = @_;
344 if( defined $value) {
345 $obj->{'title'} = $value;
347 return $obj->{'title'};
355 Function: Returns the gene associated with the object.
356 Example : $gene = $unigene->gene or $unigene->gene($gene)
358 Args : None or a gene
365 return $self->_annotation_value('gene_name', @_);
373 Function: Returns the cytoband associated with the object.
374 Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband)
376 Args : None or a cytoband
383 return $self->_annotation_value('cyto_band', @_);
390 Function: Returns the mgi associated with the object.
391 Example : $mgi = $unigene->mgi or $unigene->mgi($mgi)
404 $self->_remove_dblink('dblink','MGI');
405 # then add if a valid value is present
407 $self->_annotation_dblink('dblink','MGI',$acc);
410 ($acc) = $self->_annotation_dblink('dblink','MGI');
420 Function: Returns or stores a reference to an array containing locuslink data.
421 Returns : An array reference
422 Args : None or an array reference
431 $self->_remove_dblink('dblink','LocusLink');
432 # then add as many accessions as are present
433 foreach my $acc (@
$ll) {
434 $self->_annotation_dblink('dblink','LocusLink',$acc);
437 my @accs = $self->_annotation_dblink('dblink','LocusLink');
448 Function: Returns the homol entry associated with the object.
449 Example : $homol = $unigene->homol or $unigene->homol($homol)
451 Args : None or a homol entry
457 return $self->_annotation_value('homol', @_);
464 Usage : restr_expr();
465 Function: Returns the restr_expr entry associated with the object.
466 Example : $restr_expr = $unigene->restr_expr or $unigene->restr_expr($restr_expr)
468 Args : None or a restr_expr entry
474 return $self->_annotation_value('restr_expr', @_);
481 Usage : gnm_terminus();
482 Function: Returns the gnm_terminus associated with the object.
483 Example : $gnm_terminus = $unigene->gnm_terminus or
484 $unigene->gnm_terminus($gnm_terminus)
486 Args : None or a gnm_terminus
492 return $self->_annotation_value('gnm_terminus', @_);
499 Function: Returns the scount associated with the object.
500 Example : $scount = $unigene->scount or $unigene->scount($scount)
502 Args : None or a scount
507 my ($obj,$value) = @_;
508 if( defined $value) {
509 $obj->{'scount'} = $value;
510 } elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) {
511 $obj->{'scount'} = $obj->size();
513 return $obj->{'scount'};
521 Function: Returns or stores a reference to an array containing
522 tissue expression data
523 Returns : An array reference
524 Args : None or an array reference
531 return $self->_annotation_value_ary('expressed',@_);
538 Usage : chromosome();
539 Function: Returns or stores a reference to an array containing
541 Returns : An array reference
542 Args : None or an array reference
549 return $self->_annotation_value_ary('chromosome',@_);
557 Function: Returns or stores a reference to an array containing sts lines
559 Returns : An array reference
560 Args : None or an array reference
567 return $self->_annotation_value_ary('sts',@_);
575 Function: Returns or stores a reference to an array containing txmap lines
577 Returns : An array reference
578 Args : None or an array reference
585 return $self->_annotation_value_ary('txmap',@_);
593 Function: Returns or stores a reference to an array containing protsim lines
594 This should really only be used by ClusterIO, not directly
595 Returns : An array reference
596 Args : None or an array reference
603 return $self->_annotation_value_ary('protsim',@_);
611 Function: Returns or stores a reference to an array containing
614 This is mostly reserved for ClusterIO parsers. You should
615 use get_members() for get and add_member()/remove_members()
618 Returns : An array reference, or undef
619 Args : None or an array reference or undef
626 return $self->{'members'} = shift if @_;
627 return $self->{'members'};
633 Usage : $obj->species($newval)
634 Function: Get/set the species object for this Unigene cluster.
636 Returns : value of species (a L<Bio::Species> object)
637 Args : on set, new value (a L<Bio::Species> object or
638 the binomial name, or undef, optional)
648 if($species && (! ref($species))) {
649 my @class = reverse(split(' ',$species));
650 $species = Bio
::Species
->new(-classification
=> \
@class);
652 return $self->{'species'} = $species;
654 return $self->{'species'};
658 =head1 L<Bio::ClusterI> methods
666 Function: Get/set the display name or identifier for the cluster
668 This is aliased to unigene_id().
671 Args : optional, on set the display ID ( a string)
676 return shift->unigene_id(@_);
682 Usage : Bio::ClusterI->description("POLYUBIQUITIN")
683 Function: get/set for the consensus description of the cluster
685 This is aliased to title().
687 Returns : the description string
688 Args : Optional the description string
693 return shift->title(@_);
699 Usage : Bio::ClusterI->size();
700 Function: get for the size of the family,
701 calculated from the number of members
703 This is aliased to scount().
705 Returns : the size of the cluster
713 # hard-wiring the size is allowed if there are no sequences
714 return $self->scount(@_) unless defined($self->sequences());
715 # but we can't change the number of members through this method
716 my $n = scalar(@
{$self->sequences()});
717 if(@_ && ($n != $_[0])) {
718 $self->throw("Cannot change cluster size using size() from $n to ".
726 Title : cluster_score
727 Usage : $cluster ->cluster_score(100);
728 Function: get/set for cluster_score which
729 represent the score in which the clustering
730 algorithm assigns to this cluster.
732 For UniGene clusters, there really is no cluster score that
733 would come with the data. However, we provide an
734 implementation here so that you can score UniGene clusters
738 Args : optionally, on set a number
745 return $self->{'cluster_score'} = shift if @_;
746 return $self->{'cluster_score'};
752 Usage : Bio::ClusterI->get_members(($seq1, $seq2));
753 Function: retrieve the members of the family by some criteria
755 Will return all members if no criteria are provided.
757 At this time this implementation does not support
758 specifying criteria and will always return all members.
760 Returns : the array of members
768 my $mems = $self->sequences() || [];
770 if(@
$mems && (ref($mems->[0]) eq "HASH")) {
771 # nope, we need to build the object list from scratch
773 while(my $seq = $self->next_seq()) {
774 push(@memlist, $seq);
776 # we cache this array of objects as the new member list
778 $self->sequences($mems);
785 =head1 Annotatable view at the object properties
792 Usage : $obj->annotation($newval)
793 Function: Get/set the L<Bio::AnnotationCollectionI> object for
794 this UniGene cluster.
796 Many attributes of this class are actually stored within
797 the annotation collection object as L<Bio::AnnotationI>
798 compliant objects, so you can conveniently access them
799 through the same interface as you would e.g. access
800 L<Bio::SeqI> annotation properties.
802 If you call this method in set mode and replace the
803 annotation collection with another one you should know
804 exactly what you are doing.
807 Returns : a L<Bio::AnnotationCollectionI> compliant object
808 Args : on set, new value (a L<Bio::AnnotationCollectionI>
809 compliant object or undef, optional)
818 return $self->{'annotation'} = shift;
819 } elsif(! exists($self->{'annotation'})) {
820 $self->{'annotation'} = Bio
::Annotation
::Collection
->new();
822 return $self->{'annotation'};
826 =head1 Implementation specific methods
828 These are mostly for adding/removing to array properties, and for
829 methods with special functionality.
837 Function: Adds a member object to the list of members.
839 Returns : TRUE if the new member was successfully added, and FALSE
841 Args : The member to add.
847 my ($self,@mems) = @_;
849 my $memlist = $self->{'members'} || [];
850 # this is an object interface; is the member list already objects?
851 if(@
$memlist && (ref($memlist->[0]) eq "HASH")) {
852 # nope, convert to objects
853 $memlist = [$self->get_members()];
856 push(@
$memlist, @mems);
857 # store if we created this array ref ourselves
858 $self->sequences($memlist);
863 =head2 remove_members
865 Title : remove_members
867 Function: Remove the list of members for this cluster such that the
868 member list is undefined afterwards (as opposed to zero members).
870 Returns : the previous list of members
879 my @mems = $self->get_members();
880 $self->sequences(undef);
885 =head2 next_locuslink
887 Title : next_locuslink
888 Usage : next_locuslink();
889 Function: Returns the next locuslink from an array referred
890 to using $obj->{'locuslink'}
892 If you call this iterator again after it returned undef, it
893 will re-cycle through the list of elements. Changes in the
894 underlying array property while you loop over this iterator
895 will not be reflected until you exhaust the iterator.
897 Example : while ( my $locuslink = $in->next_locuslink() ) {
898 print "$locuslink\n";
908 return $obj->_next_element("ll","locuslink");
914 Usage : next_express();
915 Function: Returns the next tissue from an array referred
916 to using $obj->{'express'}
918 If you call this iterator again after it returned undef, it
919 will re-cycle through the list of elements. Changes in the
920 underlying array property while you loop over this iterator
921 will not be reflected until you exhaust the iterator.
923 Example : while ( my $express = $in->next_express() ) {
934 return $obj->_next_element("express","express");
938 =head2 next_chromosome
940 Title : next_chromosome
941 Usage : next_chromosome();
942 Function: Returns the next chromosome line from an array referred
943 to using $obj->{'chromosome'}
945 If you call this iterator again after it returned undef, it
946 will re-cycle through the list of elements. Changes in the
947 underlying array property while you loop over this iterator
948 will not be reflected until you exhaust the iterator.
950 Example : while ( my $chromosome = $in->next_chromosome() ) {
951 print "$chromosome\n";
958 sub next_chromosome
{
961 return $obj->_next_element("chr","chromosome");
968 Usage : next_protsim();
969 Function: Returns the next protsim line from an array referred
970 to using $obj->{'protsim'}
972 If you call this iterator again after it returned undef, it
973 will re-cycle through the list of elements. Changes in the
974 underlying array property while you loop over this iterator
975 will not be reflected until you exhaust the iterator.
977 Example : while ( my $protsim = $in->next_protsim() ) {
988 return $obj->_next_element("protsim","protsim");
996 Function: Returns the next sts line from an array referred
997 to using $obj->{'sts'}
999 If you call this iterator again after it returned undef, it
1000 will re-cycle through the list of elements. Changes in the
1001 underlying array property while you loop over this iterator
1002 will not be reflected until you exhaust the iterator.
1004 Example : while ( my $sts = $in->next_sts() ) {
1015 return $obj->_next_element("sts","sts");
1022 Usage : next_txmap();
1023 Function: Returns the next txmap line from an array
1024 referred to using $obj->{'txmap'}
1026 If you call this iterator again after it returned undef, it
1027 will re-cycle through the list of elements. Changes in the
1028 underlying array property while you loop over this iterator
1029 will not be reflected until you exhaust the iterator.
1031 Example : while ( my $tsmap = $in->next_txmap() ) {
1042 return $obj->_next_element("txmap","txmap");
1045 ###############################
1048 # args: prefix name for the queue
1049 # name of the method from which to re-fill
1050 # returns: the next element from that queue, or undef if the queue is empty
1051 ###############################
1053 my ($self,$queuename,$meth) = @_;
1055 $queuename = "_".$queuename."_queue";
1056 if(! exists($self->{$queuename})) {
1057 # re-initialize from array of sequence data
1058 $self->{$queuename} = [@
{$self->$meth() }];
1060 my $queue = $self->{$queuename};
1061 # is queue exhausted (equivalent to end of stream)?
1063 # yes, remove queue and signal to the caller
1064 delete $self->{$queuename};
1067 return shift(@
$queue);
1070 =head1 L<Bio::IdentifiableI> methods
1077 Usage : $string = $obj->object_id()
1078 Function: a string which represents the stable primary identifier
1079 in this namespace of this object. For DNA sequences this
1080 is its accession_number, similarly for protein sequences
1082 This is aliased to unigene_id().
1090 return shift->unigene_id(@_);
1096 Usage : $version = $obj->version()
1097 Function: a number which differentiates between versions of
1098 the same object. Higher numbers are considered to be
1099 later and more relevant, but a single object described
1100 the same identifier should represent the same concept
1102 Unigene clusters usually won't have a version, so this
1103 will be mostly undefined.
1106 Args : on set, new value (a scalar or undef, optional)
1114 return $self->{'version'} = shift if @_;
1115 return $self->{'version'};
1122 Usage : $authority = $obj->authority()
1123 Function: a string which represents the organisation which
1124 granted the namespace, written as the DNS name for
1125 organisation (eg, wormbase.org)
1128 Args : on set, new value (a scalar or undef, optional)
1136 return $self->{'authority'} = shift if @_;
1137 return $self->{'authority'};
1144 Usage : $string = $obj->namespace()
1145 Function: A string representing the name space this identifier
1146 is valid in, often the database name or the name
1147 describing the collection
1150 Args : on set, new value (a scalar or undef, optional)
1158 return $self->{'namespace'} = shift if @_;
1159 return $self->{'namespace'};
1162 =head1 L<Bio::DescribableI> methods
1168 Title : display_name
1169 Usage : $string = $obj->display_name()
1170 Function: A string which is what should be displayed to the user
1171 the string should have no spaces (ideally, though a cautious
1172 user of this interface would not assume this) and should be
1173 less than thirty characters (though again, double checking
1174 this is a good idea)
1176 This is aliased to unigene_id().
1184 return shift->unigene_id(@_);
1188 =head2 description()
1191 Usage : $string = $obj->description()
1192 Function: A text string suitable for displaying to the user a
1193 description. This string is likely to have spaces, but
1194 should not have any newlines or formatting - just plain
1195 text. The string should not be greater than 255 characters
1196 and clients can feel justified at truncating strings at 255
1197 characters for the purposes of display
1199 This is already demanded by Bio::ClusterI and hence is
1208 =head1 L<Bio::Factory::SequenceStreamI> methods
1216 Function: Returns the next seq as a Seq object as defined by
1217 $seq->sequence_factory(),
1218 at present an empty Bio::Seq::RichSeq object with
1219 just the accession_number() and pid() set
1221 This iterator will not exhaust the array of member
1222 sequences. If you call next_seq() again after it returned
1223 undef, it will re-cycle through the list of member
1226 Example : while ( my $sequence = $in->next_seq() ) {
1227 print $sequence->accession_number() . "\n";
1229 Returns : Bio::PrimarySeqI object
1237 if(! exists($obj->{'_seq_queue'})) {
1238 # re-initialize from array of sequence data
1239 $obj->{'_seq_queue'} = [@
{$obj->sequences()}];
1241 my $queue = $obj->{'_seq_queue'};
1242 # is queue exhausted (equivalent to end of stream)?
1244 # yes, remove queue and signal to the caller
1245 delete $obj->{'_seq_queue'};
1248 # no, still data in the queue: get the next one from the queue
1249 my $seq_h = shift(@
$queue);
1250 # if this is not a simple hash ref, it's an object already, and we'll
1252 return $seq_h if(ref($seq_h) ne 'HASH');
1253 # nope, we need to assemble this object from scratch
1255 # assemble the annotation collection
1256 my $ac = Bio
::Annotation
::Collection
->new();
1257 foreach my $k (keys %$seq_h) {
1258 next if $k =~ /acc|pid|nid|version/;
1259 my $ann = Bio
::Annotation
::SimpleValue
->new(-tagname
=> $k,
1260 -value
=> $seq_h->{$k});
1261 $ac->add_Annotation($ann);
1263 # assemble the initialization parameters and create object
1264 my $seqobj = $obj->sequence_factory->create(
1265 -accession_number
=> $seq_h->{acc
},
1266 -pid
=> $seq_h->{pid
},
1267 # why does NCBI prepend a 'g' to its own identifiers??
1268 -primary_id
=> $seq_h->{nid
} && $seq_h->{nid
} =~ /^g\d+$/ ?
1269 substr($seq_h->{nid
},1) : $seq_h->{nid
},
1270 -display_id
=> $seq_h->{acc
},
1271 -seq_version
=> $seq_h->{version
},
1272 -alphabet
=> $obj->{'_alphabet'},
1273 -namespace
=> $seq_h->{acc
} =~ /^NM_/ ?
'RefSeq' : 'GenBank',
1274 -authority
=> $obj->authority(), # default is NCBI
1275 -species
=> $obj->species(),
1281 =head2 sequence_factory
1283 Title : sequence_factory
1284 Usage : $seqio->sequence_factory($seqfactory)
1285 Function: Get/Set the Bio::Factory::SequenceFactoryI
1286 Returns : Bio::Factory::SequenceFactoryI
1287 Args : [optional] Bio::Factory::SequenceFactoryI
1292 sub sequence_factory
{
1293 my ($self,$obj) = @_;
1294 if( defined $obj ) {
1295 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
1296 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()");
1298 $self->{'_seqfactory'} = $obj;
1300 $self->{'_seqfactory'};
1303 =head1 Private methods
1307 =head2 _annotation_value
1309 Title : _annotation_value
1311 Function: Private method.
1313 Returns : the value (a string)
1314 Args : annotation key (a string)
1315 on set, annotation value (a string)
1320 sub _annotation_value
{
1327 if(! defined($val)) {
1328 ($ann) = $self->annotation->remove_Annotations($key);
1329 return $ann ?
$ann->value() : undef;
1332 ($ann) = $self->annotation->get_Annotations($key);
1333 if(defined $ann && (! $val)) {
1334 # get mode and exists
1335 $val = $ann->value();
1339 $ann = Bio
::Annotation
::SimpleValue
->new(-tagname
=> $key);
1340 $self->annotation->add_Annotation($ann);
1348 =head2 _annotation_value_ary
1350 Title : _annotation_value_ary
1352 Function: Private method.
1354 Returns : reference to the array of values
1355 Args : annotation key (a string)
1356 on set, reference to an array holding the values
1361 sub _annotation_value_ary
{
1362 my ($self,$key,$arr) = @_;
1364 my $ac = $self->annotation;
1367 $ac->remove_Annotations($key);
1368 # then add as many values as are present
1369 foreach my $val (@
$arr) {
1370 my $ann = Bio
::Annotation
::SimpleValue
->new(-value
=> $val,
1373 $ac->add_Annotation($ann);
1376 my @vals = map { $_->value(); } $ac->get_Annotations($key);
1383 =head2 _annotation_dblink
1385 Title : _annotation_dblink
1387 Function: Private method.
1389 Returns : array of accessions for the given database (namespace)
1390 Args : annotation key (a string)
1391 dbname (a string) (optional on get, mandatory on set)
1392 on set, accession or ID (a string), and version
1397 sub _annotation_dblink
{
1398 my ($self,$key,$dbname,$acc,$version) = @_;
1401 # set mode -- this is adding here
1402 my $ann = Bio
::Annotation
::DBLink
->new(-tagname
=> $key,
1403 -primary_id
=> $acc,
1404 -database
=> $dbname,
1405 -version
=> $version);
1406 $self->annotation->add_Annotation($ann);
1410 my @anns = $self->annotation->get_Annotations($key);
1411 # filter out those that don't match the requested database
1413 @anns = grep { $_->database() eq $dbname; } @anns;
1415 return map { $_->primary_id(); } @anns;
1419 =head2 _remove_dblink
1421 Title : _remove_dblink
1423 Function: Private method.
1425 Returns : array of accessions for the given database (namespace)
1426 Args : annotation key (a string)
1427 dbname (a string) (optional)
1433 my ($self,$key,$dbname) = @_;
1435 my $ac = $self->annotation();
1438 foreach my $ann ($ac->remove_Annotations($key)) {
1439 if($ann->database() eq $dbname) {
1442 $ac->add_Annotation($ann);
1446 @anns = $ac->remove_Annotations($key);
1448 return map { $_->primary_id(); } @anns;
1452 #####################################################################
1453 # aliases for naming consistency or other reasons #
1454 #####################################################################
1456 *sequence
= \
&sequences
;