2 # BioPerl module for Bio::SeqFeature::Gene::Transcript
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Hilmar Lapp <hlapp@gmx.net>
8 # Copyright Hilmar Lapp
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SeqFeature::Gene::Transcript - A feature representing a transcript
20 # See documentation of methods.
24 A feature representing a transcript.
30 User feedback is an integral part of the evolution of this and other
31 Bioperl modules. Send your comments and suggestions preferably to one
32 of the Bioperl mailing lists. Your participation is much appreciated.
34 bioperl-l@bioperl.org - General discussion
35 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39 Please direct usage questions or support issues to the mailing list:
41 I<bioperl-l@bioperl.org>
43 rather than to the module maintainer directly. Many experienced and
44 reponsive experts will be able look at the problem and quickly
45 address it. Please include a thorough description of the problem
46 with code and data examples if at all possible.
50 Report bugs to the Bioperl bug tracking system to help us keep track
51 the bugs and their resolution. Bug reports can be submitted via the
54 https://github.com/bioperl/bioperl-live/issues
56 =head1 AUTHOR - Hilmar Lapp
62 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
67 # Let the code begin...
69 package Bio
::SeqFeature
::Gene
::Transcript
;
75 use base
qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::TranscriptI);
78 my ($caller, @args) = @_;
79 my $self = $caller->SUPER::new
(@args);
80 $self->_register_for_cleanup(\
&transcript_destroy
);
81 my ($primary) = $self->_rearrange([qw(PRIMARY)],@args);
83 $primary = 'transcript' unless $primary;
84 $self->primary_tag($primary);
85 $self->strand(0) if(! defined($self->strand()));
93 Usage : @proms = $transcript->promoters();
94 Function: Get the promoter features/sites of this transcript.
96 Note that OO-modeling of regulatory elements is not stable yet.
97 This means that this method might change or even disappear in a
98 future release. Be aware of this if you use it.
100 Returns : An array of Bio::SeqFeatureI implementing objects representing the
101 promoter regions or sites.
109 return $self->get_feature_type('Bio::SeqFeature::Gene::Promoter');
114 Title : add_promoter()
115 Usage : $transcript->add_promoter($feature);
116 Function: Add a promoter feature/site to this transcript.
119 Note that OO-modeling of regulatory elements is not stable yet.
120 This means that this method might change or even disappear in a
121 future release. Be aware of this if you use it.
124 Args : A Bio::SeqFeatureI implementing object.
130 my ($self, $fea) = @_;
131 $self->_add($fea,'Bio::SeqFeature::Gene::Promoter');
134 =head2 flush_promoters
136 Title : flush_promoters()
137 Usage : $transcript->flush_promoters();
138 Function: Remove all promoter features/sites from this transcript.
140 Note that OO-modeling of regulatory elements is not stable yet.
141 This means that this method might change or even disappear in a
142 future release. Be aware of this if you use it.
144 Returns : the removed features as a list
150 sub flush_promoters
{
152 return $self->_flush('Bio::SeqFeature::Gene::Promoter');
158 Usage : @exons = $gene->exons();
159 ($inital_exon) = $gene->exons('Initial');
160 Function: Get all exon features or all exons of specified type of this
163 Exon type is treated as a case-insensitive regular expression and
164 is optional. For consistency, use only the following types:
165 initial, internal, terminal.
167 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
168 Args : An optional string specifying the primary_tag of the feature.
174 my ($self, $type) = @_;
175 return $self->get_unordered_feature_type('Bio::SeqFeature::Gene::ExonI',
181 Title : exons_ordered
182 Usage : @exons = $gene->exons_ordered();
183 @exons = $gene->exons_ordered("Internal");
184 Function: Get an ordered list of all exon features or all exons of specified
185 type of this transcript.
187 Exon type is treated as a case-insensitive regular expression and
188 is optional. For consistency, use only the following types:
190 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
191 Args : An optional string specifying the primary_tag of the feature.
196 my ($self,$type) = @_;
197 return $self->get_feature_type('Bio::SeqFeature::Gene::ExonI', $type);
203 Usage : $transcript->add_exon($exon,'initial');
204 Function: Add a exon feature to this transcript.
206 The second argument denotes the type of exon. Mixing exons with and
207 without a type is likely to cause trouble in exons(). Either
208 leave out the type for all exons or for none.
210 Presently, the following types are known: initial, internal,
211 terminal, utr, utr5prime, and utr3prime (all case-insensitive).
212 UTR should better be added through utrs()/add_utr().
214 If you wish to use other or additional types, you will almost
215 certainly have to call exon_type_sortorder() in order to replace
216 the default sort order, or mrna(), cds(), protein(), and exons()
217 may yield unexpected results.
220 Args : A Bio::SeqFeature::Gene::ExonI implementing object.
221 A string indicating the type of the exon (optional).
227 my ($self, $fea, $type) = @_;
228 if(! $fea->isa('Bio::SeqFeature::Gene::ExonI') ) {
229 $self->throw("$fea does not implement Bio::SeqFeature::Gene::ExonI");
231 $self->_add($fea,'Bio::SeqFeature::Gene::Exon', $type);
236 Title : flush_exons()
237 Usage : $transcript->flush_exons();
238 $transcript->flush_exons('terminal');
239 Function: Remove all or a certain type of exon features from this transcript.
241 See add_exon() for documentation about types.
243 Calling without a type will not flush UTRs. Call flush_utrs() for
245 Returns : the deleted features as a list
246 Args : A string indicating the type of the exon (optional).
252 my ($self, $type) = @_;
253 return $self->_flush('Bio::SeqFeature::Gene::Exon',$type);
259 Usage : @introns = $gene->introns();
260 Function: Get all intron features this gene structure.
262 Note that this implementation generates these features
263 on-the-fly, that is, it simply treats all regions between
264 exons as introns, assuming that exons do not overlap. A
265 consequence is that a consistent correspondence between the
266 elements in the returned array and the array that exons()
267 returns will exist only if the exons are properly sorted
268 within their types (forward for plus- strand and reverse
269 for minus-strand transcripts). To ensure correctness the
270 elements in the array returned will always be sorted.
272 Returns : An array of Bio::SeqFeature::Gene::Intron objects representing
282 my @exons = $self->exons();
283 my ($strand, $rev_order);
285 # if there's 1 or less exons we're done
286 return () unless($#exons > 0);
287 # record strand and order (a minus-strand transcript is likely to have
288 # the exons stacked in reverse order)
289 foreach my $exon (@exons) {
290 $strand = $exon->strand();
291 last if $strand; # we're done if we've got 1 or -1
293 $rev_order = ($exons[0]->end() < $exons[1]->start() ?
0 : 1);
295 # Make sure exons are sorted. Because we assume they don't overlap, we
296 # simply sort by start position.
297 if((! defined($strand)) || ($strand != -1) || (! $rev_order)) {
298 # always sort forward for plus-strand transcripts, and for negative-
299 # strand transcripts that appear to be unsorted or forward sorted
300 @exons = map { $_->[0] } sort { $a->[1] <=> $b->[1] }
301 map { [ $_, $_->start * ($_->strand || 1)] } @exons;
303 # sort in reverse order for transcripts on the negative strand and
304 # found to be in reverse order
305 @exons = map { $_->[0] } sort { $b->[1] <=> $a->[1] } map { [ $_, $_->start()] } @exons;
307 # loop over all intervening gaps
308 while ((my $exonA = shift (@exons)) &&(my $exonB = shift(@exons))){
309 my $intron = Bio
::SeqFeature
::Gene
::Intron
->new(-primary
=>'intron');
310 $intron->upstream_Exon($exonA);
311 $intron->downstream_Exon($exonB);
312 $intron->attach_seq($self->entire_seq) if $self->entire_seq;
313 unshift(@exons,$exonB);
314 push @introns,$intron;
321 Title : poly_A_site()
322 Usage : $polyAsite = $transcript->poly_A_site();
323 Function: Get/set the poly-adenylation feature/site of this transcript.
324 Returns : A Bio::SeqFeatureI implementing object representing the
325 poly-adenylation region.
326 Args : A Bio::SeqFeatureI implementing object on set, or FALSE to flush
327 a previously set object.
333 my ($self, $fea) = @_;
335 $self->_add($fea,'Bio::SeqFeature::Gene::Poly_A_site');
337 return ($self->get_feature_type('Bio::SeqFeature::Gene::Poly_A_site'))[0];
343 Usage : @utr_sites = $transcript->utrs('utr3prime');
344 @utr_sites = $transcript->utrs('utr5prime');
345 @utr_sites = $transcript->utrs();
346 Function: Get the features representing untranslated regions (UTR) of this
349 You may provide an argument specifying the type of UTR. Currently
350 the following types are recognized: utr5prime utr3prime for UTR on the
351 5' and 3' end of the CDS, respectively.
353 Returns : An array of Bio::SeqFeature::Gene::UTR objects
354 representing the UTR regions or sites.
355 Args : Optionally, either utr3prime, or utr5prime for the the type of UTR
362 my ($self, $type) = @_;
363 return $self->get_feature_type('Bio::SeqFeature::Gene::UTR',$type);
370 Usage : $transcript->add_utr($utrobj, 'utr3prime');
371 $transcript->add_utr($utrobj);
372 Function: Add a UTR feature/site to this transcript.
374 The second parameter is optional and denotes the type of the UTR
375 feature. Presently recognized types include 'utr5prime' and 'utr3prime'
376 for UTR on the 5' and 3' end of a gene, respectively.
378 Calling this method is the same as calling
379 add_exon($utrobj, 'utr'.$type). In this sense a UTR object is a
380 special exon object, which is transcribed, not spliced out, but
383 Note that the object supplied should return FALSE for is_coding().
384 Otherwise cds() and friends will become confused.
387 Args : A Bio::SeqFeature::Gene::UTR implementing object.
393 my ($self, $fea, $type) = @_;
394 $self->_add($fea,'Bio::SeqFeature::Gene::UTR',$type);
400 Usage : $transcript->flush_utrs();
401 $transcript->flush_utrs('utr3prime');
402 Function: Remove all or a specific type of UTR features/sites from this
405 Cf. add_utr() for documentation about recognized types.
406 Returns : a list of the removed features
407 Args : Optionally a string denoting the type of UTR feature.
413 my ($self, $type) = @_;
414 return $self->_flush('Bio::SeqFeature::Gene::UTR',$type);
417 =head2 sub_SeqFeature
419 Title : sub_SeqFeature
420 Usage : @feats = $transcript->sub_SeqFeature();
421 Function: Returns an array of all subfeatures.
423 This method is defined in Bio::SeqFeatureI. We override this here
424 to include the exon etc features.
426 Returns : An array Bio::SeqFeatureI implementing objects.
436 # get what the parent already has
437 @feas = $self->SUPER::sub_SeqFeature
();
438 # add the features we have in addition
439 push(@feas, $self->exons()); # this includes UTR features
440 push(@feas, $self->promoters());
441 push(@feas, $self->poly_A_site()) if($self->poly_A_site());
445 =head2 flush_sub_SeqFeature
447 Title : flush_sub_SeqFeature
448 Usage : $transcript->flush_sub_SeqFeature();
449 $transcript->flush_sub_SeqFeature(1);
450 Function: Removes all subfeatures.
452 This method is overridden from Bio::SeqFeature::Generic to flush
453 all additional subfeatures like exons, promoters, etc., which is
454 almost certainly not what you want. To remove only features added
455 through $transcript->add_sub_SeqFeature($feature) pass any
456 argument evaluating to TRUE.
460 Args : Optionally, an argument evaluating to TRUE will suppress flushing
461 of all transcript-specific subfeatures (exons etc.).
466 sub flush_sub_SeqFeature
{
467 my ($self,$fea_only) = @_;
469 $self->SUPER::flush_sub_SeqFeature
();
471 $self->flush_promoters();
472 $self->flush_exons();
474 $self->poly_A_site(0);
482 Usage : $seq = $transcript->cds();
483 Function: Returns the CDS (coding sequence) as defined by the exons
484 of this transcript and the attached sequence.
486 If no sequence is attached this method will return false.
488 Note that the implementation provided here returns a
489 concatenation of all coding exons, thereby assuming that
490 exons do not overlap.
492 Note also that you cannot set the CDS via this method. Set
493 a single CDS feature as a single exon, or derive your own
494 class if you want to store a predicted CDS.
497 Returns : A Bio::PrimarySeqI implementing object.
504 my @exons = $self->exons_ordered(); #this is always sorted properly according to strand
507 return unless(@exons);
508 # record strand (a minus-strand transcript must have the exons sorted in
510 foreach my $exon (@exons) {
511 if(defined($exon->strand()) && (! $strand)) {
512 $strand = $exon->strand();
514 if($exon->strand() && (($exon->strand() * $strand) < 0)) {
515 $self->throw("Transcript mixes coding exons on plus and minus ".
516 "strand. This makes no sense.");
519 my $cds = $self->_make_cds(@exons);
521 return Bio
::PrimarySeq
->new('-id' => $self->seq_id(),
523 '-alphabet' => "dna");
529 Usage : $protein = $transcript->protein();
530 Function: Get the protein encoded by the transcript as a sequence object.
532 The implementation provided here simply calls translate() on the
533 object returned by cds().
535 Returns : A Bio::PrimarySeqI implementing object.
546 return $seq->translate() if $seq;
553 Usage : $mrna = $transcript->mrna();
554 Function: Get the mRNA of the transcript as a sequence object.
556 The difference to cds() is that the sequence object returned by
557 this methods will also include UTR and the poly-adenylation site,
558 but not promoter sequence (TBD).
560 HL: do we really need this method?
562 Returns : A Bio::PrimarySeqI implementing object.
570 my ($seq, $mrna, $elem);
572 # get the coding part
575 $seq = Bio
::PrimarySeq
->new('-id' => $self->seq_id(),
576 '-alphabet' => "rna",
579 # get and add UTR sequences
581 foreach $elem ($self->utrs('utr5prime')) {
582 $mrna .= $elem->seq()->seq();
584 $seq->seq($mrna . $seq->seq());
586 foreach $elem ($self->utrs('utr3prime')) {
587 $mrna .= $elem->seq()->seq();
589 $seq->seq($seq->seq() . $mrna);
590 if($self->poly_A_site()) {
591 $seq->seq($seq->seq() . $self->poly_A_site()->seq()->seq());
593 return if($seq->length() == 0);
597 sub _get_typed_keys
{
598 my ($self, $keyprefix, $type) = @_;
602 # make case-insensitive
603 $type = ($type ?
lc($type) : "");
604 # pull out all feature types that exist and match
605 @keys = grep { /^_$keyprefix$type/i; } (keys(%{$self}));
610 my ($self,@exons) = @_;
613 foreach my $exon (@exons) {
614 next if((! defined($exon->seq())) || (! $exon->is_coding()));
615 my $phase = length($cds) % 3;
616 # let's check the simple case
617 if((! defined($exon->frame())) || ($phase == $exon->frame())) {
618 # this one fits exactly, or frame of the exon is undefined (should
619 # we warn about that?); we bypass the $exon->cds() here (hmm,
620 # not very clean style, but I don't see where this screws up)
621 $cds .= $exon->seq()->seq();
623 # this one is probably from exon shuffling and needs some work
624 my $seq = $exon->cds(); # now $seq is guaranteed to be in frame 0
629 # how many Ns can we chop off the piece to be added?
631 if($seq =~ /^(n+)/i) {
632 $n_crop = length($1);
634 if($n_crop >= $phase) {
635 # chop off to match the phase
636 $seq = substr($seq, $phase);
639 $seq = ("n" x
(3-$phase)) . $seq;
651 Usage : my @features=$transcript->features;
652 Function: returns all the features associated with this transcript
653 Returns : a list of SeqFeatureI implementing objects
662 return grep { defined } @
{$self->{'_features'} || []};
665 =head2 features_ordered
667 Title : features_ordered
668 Usage : my @features=$transcript->features_ordered;
669 Function: returns all the features associated with this transcript,
670 in order by feature start, according to strand
671 Returns : a list of SeqFeatureI implementing objects
677 sub features_ordered
{
679 return $self->_stranded_sort(@
{$self->{'_features'} || []});
683 sub get_unordered_feature_type
{
684 my ($self, $type, $pri)=@_;
686 foreach ( $self->features) {
687 if ($_->isa($type)) {
688 if ($pri && $_->primary_tag !~ /$pri/i) {
698 sub get_feature_type
{
700 return $self->_stranded_sort($self->get_unordered_feature_type(@_));
703 #This was fixed by Gene Cutler - the indexing on the list being reversed
704 #fixed a bad bug. Thanks Gene!
706 my ($self, $type, $pri)=@_;
707 my @list=$self->features;
709 for (reverse (0..$#list)) {
710 if (defined $list[$_] &&
711 $list[$_]->isa($type)) {
712 if ($pri && $list[$_]->primary_tag !~ /$pri/i) {
715 push @cut, splice @list, $_, 1; #remove the element of $type from @list
716 #and return each of them in @cut
719 $self->{'_features'}=\
@list;
724 my ($self, $fea, $type, $pri)=@_;
725 require Bio
::SeqFeature
::Gene
::Promoter
;
726 require Bio
::SeqFeature
::Gene
::UTR
;
727 require Bio
::SeqFeature
::Gene
::Exon
;
728 require Bio
::SeqFeature
::Gene
::Intron
;
729 require Bio
::SeqFeature
::Gene
::Poly_A_site
;
731 if(! $fea->isa('Bio::SeqFeatureI') ) {
732 $self->throw("$fea does not implement Bio::SeqFeatureI");
734 if(! $fea->isa($type) || $pri) {
735 $fea=$self->_new_of_type($fea,$type,$pri);
737 if (! $self->strand) {
738 $self->strand($fea->strand);
740 if ($self->strand * $fea->strand == -1) {
741 $self->throw("$fea is on opposite strand from $self");
745 $self->_expand_region($fea);
746 if(defined($self->entire_seq()) && (! defined($fea->entire_seq())) &&
747 $fea->can('attach_seq')) {
748 $fea->attach_seq($self->entire_seq());
750 if (defined $self->parent) {
751 $self->parent->_expand_region($fea);
753 push(@
{$self->{'_features'}}, $fea);
760 foreach my $fea (@list) {
763 $strand = $fea->strand() if(! $strand);
764 if(($fea->strand() * $strand) < 0) {
770 if (defined $strand && $strand == - 1) { #reverse strand
771 return map { $_->[0] } sort {$b->[1] <=> $a->[1]} map { [$_, $_->start] } @list;
772 } else { #undef or forward strand
773 return map { $_->[0] } sort {$a->[1] <=> $b->[1]} map { [$_, $_->start] } @list;
778 my ($self, $fea, $type, $pri)= @_;
781 $primary = $pri; #can set new primary tag if desired
783 ($primary) = $type =~ /.*::(.+)/; #or else primary is just end of type string
786 $fea->primary_tag($primary);
790 sub transcript_destroy
{
792 # We're going to be really explicit to insure memory leaks
794 foreach my $f ( $self->features ) {
797 $self->parent(undef);