t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / Bio / SeqFeature / Gene / GeneStructure.pm
blobbf110ad6493f57fb4987c7b74db296b4b6629229
2 # BioPerl module for Bio::SeqFeature::Gene::GeneStructure
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Hilmar Lapp <hlapp@gmx.net>
8 # Copyright Hilmar Lapp
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SeqFeature::Gene::GeneStructure - A feature representing an arbitrarily complex structure of a gene
18 =head1 SYNOPSIS
20 # See documentation of methods.
22 =head1 DESCRIPTION
24 A feature representing a gene structure. As of now, a gene structure
25 really is only a collection of transcripts. See
26 L<Bio::SeqFeature::Gene::TranscriptI> (interface) and
27 L<Bio::SeqFeature::Gene::Transcript> (implementation) for the features
28 of such objects.
30 =head1 FEEDBACK
32 =head2 Mailing Lists
34 User feedback is an integral part of the evolution of this and other
35 Bioperl modules. Send your comments and suggestions preferably to one
36 of the Bioperl mailing lists. Your participation is much appreciated.
38 bioperl-l@bioperl.org - General discussion
39 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41 =head2 Support
43 Please direct usage questions or support issues to the mailing list:
45 I<bioperl-l@bioperl.org>
47 rather than to the module maintainer directly. Many experienced and
48 reponsive experts will be able look at the problem and quickly
49 address it. Please include a thorough description of the problem
50 with code and data examples if at all possible.
52 =head2 Reporting Bugs
54 Report bugs to the Bioperl bug tracking system to help us keep track
55 the bugs and their resolution. Bug reports can be submitted via the
56 web:
58 https://github.com/bioperl/bioperl-live/issues
60 =head1 AUTHOR - Hilmar Lapp
62 Email hlapp-at-gmx.net
64 =head1 APPENDIX
66 The rest of the documentation details each of the object
67 methods. Internal methods are usually preceded with a _
69 =cut
72 # Let the code begin...
75 package Bio::SeqFeature::Gene::GeneStructure;
76 use vars qw($WeakRefs);
77 use strict;
79 BEGIN {
80 eval "use Scalar::Util qw(weaken);";
81 if ($@) {
82 $Bio::SeqFeature::Gene::GeneStructure::WeakRefs = 0;
83 } else { $Bio::SeqFeature::Gene::GeneStructure::WeakRefs = 1; }
87 use base qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::GeneStructureI);
90 sub new {
91 my ($caller, @args) = @_;
92 my $self = $caller->SUPER::new(@args);
93 $self->_register_for_cleanup(\&gene_cleanup);
94 my ($primary) =
95 $self->_rearrange([qw(PRIMARY
96 )],@args);
98 $primary = 'genestructure' unless $primary;
99 $self->primary_tag($primary);
100 $self->strand(0) if(! defined($self->strand()));
101 return $self;
104 =head2 transcripts
106 Title : transcripts
107 Usage : @transcripts = $gene->transcripts();
108 Function: Get the transcripts of this gene structure. Many gene structures
109 will have only one transcript.
111 Returns : An array of Bio::SeqFeature::Gene::TranscriptI implementing objects.
112 Args :
115 =cut
117 sub transcripts {
118 return @{shift->{'_transcripts'} || []};
121 =head2 add_transcript
123 Title : add_transcript()
124 Usage : $gene->add_transcript($transcript);
125 Function: Add a transcript to this gene structure.
126 Returns :
127 Args : A Bio::SeqFeature::Gene::TranscriptI implementing object.
130 =cut
132 sub add_transcript {
133 my ($self, $fea) = @_;
135 if(!$fea || ! $fea->isa('Bio::SeqFeature::Gene::TranscriptI') ) {
136 $self->throw("$fea does not implement Bio::SeqFeature::Gene::TranscriptI");
138 unless( exists $self->{'_transcripts'} ) {
139 $self->{'_transcripts'} = [];
141 $self->_expand_region($fea);
142 if( $Bio::SeqFeature::Gene::GeneStructure::WeakRefs ) {
143 $fea->parent(weaken $self);
144 } else {
145 $fea->parent($self);
147 push(@{$self->{'_transcripts'}}, $fea);
150 =head2 flush_transcripts
152 Title : flush_transcripts()
153 Usage : $gene->flush_transcripts();
154 Function: Remove all transcripts from this gene structure.
155 Returns :
156 Args :
159 =cut
161 sub flush_transcripts {
162 my ($self) = @_;
163 if( defined $self->{'_transcripts'} ) {
164 foreach my $t ( grep {defined} @{$self->{'_transcripts'} || []} ) {
165 $t->parent(undef); # remove bkwds pointers
166 $t = undef;
168 delete($self->{'_transcripts'});
172 =head2 add_transcript_as_features
174 Title : add_transcript_as_features
175 Usage : $gene->add_transcript_as_features(@featurelist);
176 Function: take a list of Bio::SeqFeatureI objects and turn them into a
177 Bio::SeqFeature::Gene::Transcript object. Add that transcript to the gene.
178 Returns : nothing
179 Args : a list of Bio::SeqFeatureI compliant objects
182 =cut
184 sub add_transcript_as_features {
185 my ($self,@features) = @_;
186 my $transcript=Bio::SeqFeature::Gene::Transcript->new;
187 foreach my $fea (@features) {
188 if ($fea->primary_tag =~ /utr/i) { #UTR / utr/ 3' utr / utr5 etc.
189 $transcript->add_utr($fea);
190 } elsif ($fea->primary_tag =~ /promot/i) { #allow for spelling differences
191 $transcript->add_promoter($fea);
192 } elsif ($fea->primary_tag =~ /poly.*A/i) { #polyA, POLY_A, etc.
193 $transcript->poly_A_site($fea);
194 } else { #assume the rest are exons
195 $transcript->add_exon($fea);
198 $self->add_transcript($transcript);
202 =head2 promoters
204 Title : promoters
205 Usage : @prom_sites = $gene->promoters();
206 Function: Get the promoter features of this gene structure.
208 This method basically merges the promoters returned by transcripts.
210 Note that OO-modeling of regulatory elements is not stable yet.
211 This means that this method might change or even disappear in a
212 future release. Be aware of this if you use it.
214 Returns : An array of Bio::SeqFeatureI implementing objects.
215 Args :
218 =cut
220 sub promoters {
221 my ($self) = @_;
222 my @transcripts = $self->transcripts();
223 my @feas = ();
225 foreach my $tr (@transcripts) {
226 push(@feas, $tr->promoters());
228 return @feas;
232 =head2 exons
234 Title : exons()
235 Usage : @exons = $gene->exons();
236 @inital_exons = $gene->exons('Initial');
237 Function: Get all exon features or all exons of a specified type of this gene
238 structure.
240 Exon type is treated as a case-insensitive regular expression and
241 optional. For consistency, use only the following types:
242 initial, internal, terminal, utr, utr5prime, and utr3prime.
243 A special and virtual type is 'coding', which refers to all types
244 except utr.
246 This method basically merges the exons returned by transcripts.
248 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
249 Args : An optional string specifying the type of exon.
252 =cut
254 sub exons {
255 my ($self, @args) = @_;
256 my @transcripts = $self->transcripts();
257 my @feas = ();
259 foreach my $tr (@transcripts) {
260 push(@feas, $tr->exons(@args));
262 return @feas;
265 =head2 introns
267 Title : introns()
268 Usage : @introns = $gene->introns();
269 Function: Get all introns of this gene structure.
271 Note that this class currently generates these features on-the-fly,
272 that is, it simply treats all regions between exons as introns.
273 It assumes that the exons in the transcripts do not overlap.
275 This method basically merges the introns returned by transcripts.
277 Returns : An array of Bio::SeqFeatureI implementing objects.
278 Args :
281 =cut
283 sub introns {
284 my ($self) = @_;
285 my @transcripts = $self->transcripts();
286 my @feas = ();
288 foreach my $tr (@transcripts) {
289 push(@feas, $tr->introns());
291 return @feas;
294 =head2 poly_A_sites
296 Title : poly_A_sites()
297 Usage : @polyAsites = $gene->poly_A_sites();
298 Function: Get the poly-adenylation sites of this gene structure.
300 This method basically merges the poly-adenylation sites returned by
301 transcripts.
303 Returns : An array of Bio::SeqFeatureI implementing objects.
304 Args :
307 =cut
309 sub poly_A_sites {
310 my ($self) = @_;
311 my @transcripts = $self->transcripts();
312 my @feas = ();
314 foreach my $tr (@transcripts) {
315 push(@feas, $tr->poly_A_site());
317 return @feas;
320 =head2 utrs
322 Title : utrs()
323 Usage : @utr_sites = $gene->utrs('3prime');
324 @utr_sites = $gene->utrs('5prime');
325 @utr_sites = $gene->utrs();
326 Function: Get the features representing untranslated regions (UTR) of this
327 gene structure.
329 You may provide an argument specifying the type of UTR. Currently
330 the following types are recognized: 5prime 3prime for UTR on the
331 5' and 3' end of the CDS, respectively.
333 This method basically merges the UTRs returned by transcripts.
335 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects
336 representing the UTR regions or sites.
337 Args : Optionally, either 3prime, or 5prime for the the type of UTR
338 feature.
341 =cut
343 sub utrs {
344 my ($self,@args) = @_;
345 my @transcripts = $self->transcripts();
346 my @feas = ();
348 foreach my $tr (@transcripts) {
349 push(@feas, $tr->utrs(@args));
351 return @feas;
354 =head2 sub_SeqFeature
356 Title : sub_SeqFeature
357 Usage : @feats = $gene->sub_SeqFeature();
358 Function: Returns an array of all subfeatures.
360 This method is defined in Bio::SeqFeatureI. We override this here
361 to include the transcripts.
363 Returns : An array Bio::SeqFeatureI implementing objects.
364 Args : none
367 =cut
369 sub sub_SeqFeature {
370 my ($self) = @_;
371 my @feas = ();
373 # get what the parent already has
374 @feas = $self->SUPER::sub_SeqFeature();
375 push(@feas, $self->transcripts());
376 return @feas;
379 =head2 flush_sub_SeqFeature
381 Title : flush_sub_SeqFeature
382 Usage : $gene->flush_sub_SeqFeature();
383 $gene->flush_sub_SeqFeature(1);
384 Function: Removes all subfeatures.
386 This method is overridden from Bio::SeqFeature::Generic to flush
387 all additional subfeatures, i.e., transcripts, which is
388 almost certainly not what you want. To remove only features added
389 through $gene->add_sub_SeqFeature($feature) pass any
390 argument evaluating to TRUE.
392 Example :
393 Returns : none
394 Args : Optionally, an argument evaluating to TRUE will suppress flushing
395 of all gene structure-specific subfeatures (transcripts).
398 =cut
400 sub flush_sub_SeqFeature {
401 my ($self,$fea_only) = @_;
403 $self->SUPER::flush_sub_SeqFeature();
404 if(! $fea_only) {
405 $self->flush_transcripts();
409 sub gene_cleanup {
410 my $self = shift;
411 $self->flush_transcripts;