2 # bioperl module for Bio::SeqFeature::Tools::IDHandler
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Chris Mungall <cjm@fruitfly.org>
8 # Copyright Chris Mungall
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SeqFeature::Tools::IDHandler - maps $seq_feature-E<gt>primary_tag
21 use Bio::SeqFeature::Tools::IDHandler;
26 Class to map $seq_feature-E<gt>primary_tag
33 User feedback is an integral part of the evolution of this and other
34 Bioperl modules. Send your comments and suggestions preferably to the
35 Bioperl mailing lists Your participation is much appreciated.
37 bioperl-l@bioperl.org - General discussion
38 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42 Please direct usage questions or support issues to the mailing list:
44 I<bioperl-l@bioperl.org>
46 rather than to the module maintainer directly. Many experienced and
47 reponsive experts will be able look at the problem and quickly
48 address it. Please include a thorough description of the problem
49 with code and data examples if at all possible.
53 report bugs to the Bioperl bug tracking system to help us keep track
54 the bugs and their resolution. Bug reports can be submitted via the
57 https://github.com/bioperl/bioperl-live/issues
59 =head1 AUTHOR - Chris Mungall
61 Email: cjm@fruitfly.org
65 The rest of the documentation details each of the object
66 methods. Internal methods are usually preceded with a _
71 # Let the code begin...
73 package Bio
::SeqFeature
::Tools
::IDHandler
;
76 # Object preamble - inherits from Bio::Root::Root
78 use base
qw(Bio::Root::Root);
83 Usage : $unflattener = Bio::SeqFeature::Tools::IDHandler->new();
86 Returns : a new Bio::SeqFeature::Tools::IDHandler
93 my($class,@args) = @_;
94 my $self = $class->SUPER::new
(@args);
96 my($generate_id_sub) =
97 $self->_rearrange([qw(GENERATE_ID_SUB
101 return $self; # success - we hope!
104 =head2 set_ParentIDs_from_hierarchy()
106 Title : set_ParentIDs_from_hierarchy()
107 Usage : $idhandler->set_ParentIDs_from_hierarchy($fholder)
108 Function: populates tags Parent and ID via holder hierarchy
111 Args : Bio::featureHolderI (either a SeqFeature or a Seq)
113 This is mainly for GFF3 export
115 GFF3 uses the tags ID and Parent to represent the feature containment
116 hierarchy; it does NOT use the feature holder tree
118 This method sets Parent (and ID for any parents not set) based on
119 feature holder/containement hierarchy, ready for GFF3 output
123 # method author: cjm@fruitfly.org
124 sub set_ParentIDs_from_hierarchy
(){
126 my ($featholder) = @_;
128 # we will traverse the tree of contained seqfeatures
129 # (a seqfeature is itself a holder)
131 # start with the top-level features
132 my @sfs = $featholder->get_SeqFeatures;
134 # clear existing parent tags
135 # (we assume this is the desired behaviour)
136 my @all_sfs = $featholder->get_all_SeqFeatures;
138 if ($_->has_tag('Parent')) {
139 $_->remove_tag('Parent');
144 # iterate until entire tree traversed
147 my @subsfs = $sf->get_SeqFeatures;
150 my $id = $sf->primary_id;
152 # the skolem function feature(seq,start,end,type)
153 # is presumed to uniquely identify this feature, and
154 # to also be persistent
155 $id = $sf->generate_unique_persistent_id;
157 foreach my $subsf (@subsfs) {
158 $subsf->add_tag_value('Parent', $id);
161 # push children on to end of stack (breadth first search)
167 =head2 create_hierarchy_from_ParentIDs
169 Title : create_hierarchy_from_ParentIDs
170 Usage : $idhandler->set_ParentIDs_from_hierarchy($fholder)
171 Function: inverse of set_ParentIDs_from_hierarchy
173 Returns : list of top SeqFeatures
179 sub create_hierarchy_from_ParentIDs
{
180 my ($self,$featholder,@args) = @_;
182 my @sfs = $featholder->get_all_SeqFeatures;
185 my $id = $_->primary_id;
187 if ($sf_by_ID{$id}) {
188 $featholder->throw("DUPLICATE ID: $id");
191 $_->remove_SeqFeatures; # clear existing hierarchy (assume this is desired)
195 # this is actually expected behaviour for some kinds of data;
196 # eg lists of STSs - no containment hierarchy
202 my @parents = $_->get_tagset_values('Parent');
203 foreach my $parent (@parents) {
204 $sf_by_ID{$parent}->add_SeqFeature($_)
205 if exists $sf_by_ID{$parent};
209 $featholder->remove_SeqFeatures;
210 $featholder->add_SeqFeature($_) foreach @topsfs;
215 =head2 generate_unique_persistent_id
217 Title : generate_unique_persistent_id
219 Function: generates a unique and persistent identifier for this
221 Returns : value of primary_id (a scalar)
224 Will generate an ID, B<and> set primary_id() (see above)
226 The ID is a string generated from
233 There are three underlying assumptions: that all the above accessors
234 are set; that seq_id is a persistent and unique identifier for the
235 sequence containing this feature; and that
237 (seq_id, primary_tag, start, end)
239 is a "unique constraint" over features
241 The ID is persistent, so long as none of these values change - if they
242 do, it is considered a separate entity
246 # method author: cjm@fruitfly.org
247 sub generate_unique_persistent_id
{
248 my ($self,$sf,@args) = @_;
251 if (!$sf->isa("Bio::SeqFeatureI")) {
252 $sf->throw("not a Bio::SeqFeatureI");
254 my $seq_id = $sf->seq_id || $sf->throw("seq_id must be set: ".$sf->display_name);
255 #my $seq_id = $sf->seq_id || 'unknown_seq';
256 if ($sf->has_tag('transcript_id')) {
257 ($id) = $sf->get_tag_values('transcript_id');
259 elsif ($sf->has_tag('protein_id')) {
260 ($id) = $sf->get_tag_values('protein_id');
263 my $source = $sf->source_tag || $sf->throw("source tag must be set: ".$sf->display_name);
264 #my $source = $sf->source_tag || 'unknown_source';
265 my $start = $sf->start || $sf->throw("start must be set or is zero: ".$sf->display_name);
266 my $end = $sf->end || $sf->throw("end must be set");
267 my $type = $sf->primary_tag || $sf->throw("primary_tag/type must be set: ".$sf->display_name);
269 $id = "$source:$type:$seq_id:$start:$end";
271 $sf->primary_id($id);