maint: restructure to use Dist::Zilla
[bioperl-live.git] / lib / Bio / SeqFeature / Tools / IDHandler.pm
blob7529a7c8769fa8ab9c0f1308872721db1fb9bbfe
2 # bioperl module for Bio::SeqFeature::Tools::IDHandler
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Chris Mungall <cjm@fruitfly.org>
8 # Copyright Chris Mungall
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SeqFeature::Tools::IDHandler - maps $seq_feature-E<gt>primary_tag
18 =head1 SYNOPSIS
20 use Bio::SeqIO;
21 use Bio::SeqFeature::Tools::IDHandler;
24 =head1 DESCRIPTION
26 Class to map $seq_feature-E<gt>primary_tag
29 =head1 FEEDBACK
31 =head2 Mailing Lists
33 User feedback is an integral part of the evolution of this and other
34 Bioperl modules. Send your comments and suggestions preferably to the
35 Bioperl mailing lists Your participation is much appreciated.
37 bioperl-l@bioperl.org - General discussion
38 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
40 =head2 Support
42 Please direct usage questions or support issues to the mailing list:
44 I<bioperl-l@bioperl.org>
46 rather than to the module maintainer directly. Many experienced and
47 reponsive experts will be able look at the problem and quickly
48 address it. Please include a thorough description of the problem
49 with code and data examples if at all possible.
51 =head2 Reporting Bugs
53 report bugs to the Bioperl bug tracking system to help us keep track
54 the bugs and their resolution. Bug reports can be submitted via the
55 web:
57 https://github.com/bioperl/bioperl-live/issues
59 =head1 AUTHOR - Chris Mungall
61 Email: cjm@fruitfly.org
63 =head1 APPENDIX
65 The rest of the documentation details each of the object
66 methods. Internal methods are usually preceded with a _
68 =cut
71 # Let the code begin...
73 package Bio::SeqFeature::Tools::IDHandler;
74 use strict;
76 # Object preamble - inherits from Bio::Root::Root
78 use base qw(Bio::Root::Root);
80 =head2 new
82 Title : new
83 Usage : $unflattener = Bio::SeqFeature::Tools::IDHandler->new();
84 Function: constructor
85 Example :
86 Returns : a new Bio::SeqFeature::Tools::IDHandler
87 Args : see below
90 =cut
92 sub new {
93 my($class,@args) = @_;
94 my $self = $class->SUPER::new(@args);
96 my($generate_id_sub) =
97 $self->_rearrange([qw(GENERATE_ID_SUB
98 )],
99 @args);
101 return $self; # success - we hope!
104 =head2 set_ParentIDs_from_hierarchy()
106 Title : set_ParentIDs_from_hierarchy()
107 Usage : $idhandler->set_ParentIDs_from_hierarchy($fholder)
108 Function: populates tags Parent and ID via holder hierarchy
109 Example :
110 Returns :
111 Args : Bio::featureHolderI (either a SeqFeature or a Seq)
113 This is mainly for GFF3 export
115 GFF3 uses the tags ID and Parent to represent the feature containment
116 hierarchy; it does NOT use the feature holder tree
118 This method sets Parent (and ID for any parents not set) based on
119 feature holder/containement hierarchy, ready for GFF3 output
121 =cut
123 # method author: cjm@fruitfly.org
124 sub set_ParentIDs_from_hierarchy(){
125 my $self = shift;
126 my ($featholder) = @_;
128 # we will traverse the tree of contained seqfeatures
129 # (a seqfeature is itself a holder)
131 # start with the top-level features
132 my @sfs = $featholder->get_SeqFeatures;
134 # clear existing parent tags
135 # (we assume this is the desired behaviour)
136 my @all_sfs = $featholder->get_all_SeqFeatures;
137 foreach (@all_sfs) {
138 if ($_->has_tag('Parent')) {
139 $_->remove_tag('Parent');
144 # iterate until entire tree traversed
145 while (@sfs) {
146 my $sf = shift @sfs;
147 my @subsfs = $sf->get_SeqFeatures;
149 # see if the ID tag
150 my $id = $sf->primary_id;
151 if (!$id) {
152 # the skolem function feature(seq,start,end,type)
153 # is presumed to uniquely identify this feature, and
154 # to also be persistent
155 $id = $sf->generate_unique_persistent_id;
157 foreach my $subsf (@subsfs) {
158 $subsf->add_tag_value('Parent', $id);
161 # push children on to end of stack (breadth first search)
162 push(@sfs, @subsfs);
164 return;
167 =head2 create_hierarchy_from_ParentIDs
169 Title : create_hierarchy_from_ParentIDs
170 Usage : $idhandler->set_ParentIDs_from_hierarchy($fholder)
171 Function: inverse of set_ParentIDs_from_hierarchy
172 Example :
173 Returns : list of top SeqFeatures
174 Args :
177 =cut
179 sub create_hierarchy_from_ParentIDs{
180 my ($self,$featholder,@args) = @_;
182 my @sfs = $featholder->get_all_SeqFeatures;
183 my %sf_by_ID = ();
184 foreach (@sfs) {
185 my $id = $_->primary_id;
186 next unless $id;
187 if ($sf_by_ID{$id}) {
188 $featholder->throw("DUPLICATE ID: $id");
190 $sf_by_ID{$id} = $_;
191 $_->remove_SeqFeatures; # clear existing hierarchy (assume this is desired)
193 if (!%sf_by_ID) {
194 # warn??
195 # this is actually expected behaviour for some kinds of data;
196 # eg lists of STSs - no containment hierarchy
197 return;
200 my @topsfs =
201 grep {
202 my @parents = $_->get_tagset_values('Parent');
203 foreach my $parent (@parents) {
204 $sf_by_ID{$parent}->add_SeqFeature($_)
205 if exists $sf_by_ID{$parent};
207 !@parents;
208 } @sfs;
209 $featholder->remove_SeqFeatures;
210 $featholder->add_SeqFeature($_) foreach @topsfs;
211 return @topsfs;
215 =head2 generate_unique_persistent_id
217 Title : generate_unique_persistent_id
218 Usage :
219 Function: generates a unique and persistent identifier for this
220 Example :
221 Returns : value of primary_id (a scalar)
222 Args :
224 Will generate an ID, B<and> set primary_id() (see above)
226 The ID is a string generated from
228 seq_id
229 primary_tag
230 start
233 There are three underlying assumptions: that all the above accessors
234 are set; that seq_id is a persistent and unique identifier for the
235 sequence containing this feature; and that
237 (seq_id, primary_tag, start, end)
239 is a "unique constraint" over features
241 The ID is persistent, so long as none of these values change - if they
242 do, it is considered a separate entity
244 =cut
246 # method author: cjm@fruitfly.org
247 sub generate_unique_persistent_id{
248 my ($self,$sf,@args) = @_;
250 my $id;
251 if (!$sf->isa("Bio::SeqFeatureI")) {
252 $sf->throw("not a Bio::SeqFeatureI");
254 my $seq_id = $sf->seq_id || $sf->throw("seq_id must be set: ".$sf->display_name);
255 #my $seq_id = $sf->seq_id || 'unknown_seq';
256 if ($sf->has_tag('transcript_id')) {
257 ($id) = $sf->get_tag_values('transcript_id');
259 elsif ($sf->has_tag('protein_id')) {
260 ($id) = $sf->get_tag_values('protein_id');
262 else {
263 my $source = $sf->source_tag || $sf->throw("source tag must be set: ".$sf->display_name);
264 #my $source = $sf->source_tag || 'unknown_source';
265 my $start = $sf->start || $sf->throw("start must be set or is zero: ".$sf->display_name);
266 my $end = $sf->end || $sf->throw("end must be set");
267 my $type = $sf->primary_tag || $sf->throw("primary_tag/type must be set: ".$sf->display_name);
269 $id = "$source:$type:$seq_id:$start:$end";
271 $sf->primary_id($id);
272 return $id;