2 # BioPerl module for Bio::SeqFeature::Gene::Intron
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by David Block <dblock@gene.pbi.nrc.ca>
8 # Copyright David Block
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SeqFeature::Gene::Intron - An intron feature
20 Give standard usage here
24 Describe the object here
30 User feedback is an integral part of the evolution of this and other
31 Bioperl modules. Send your comments and suggestions preferably to
32 the Bioperl mailing list. Your participation is much appreciated.
34 bioperl-l@bioperl.org - General discussion
35 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39 Please direct usage questions or support issues to the mailing list:
41 I<bioperl-l@bioperl.org>
43 rather than to the module maintainer directly. Many experienced and
44 reponsive experts will be able look at the problem and quickly
45 address it. Please include a thorough description of the problem
46 with code and data examples if at all possible.
50 Report bugs to the Bioperl bug tracking system to help us keep track
51 of the bugs and their resolution. Bug reports can be submitted via the
54 https://github.com/bioperl/bioperl-live/issues
56 =head1 AUTHOR - David Block
58 Email dblock@gene.pbi.nrc.ca
62 The rest of the documentation details each of the object methods.
63 Internal methods are usually preceded with a _
68 # Let the code begin...
71 package Bio
::SeqFeature
::Gene
::Intron
;
75 use Bio
::SeqFeature
::Gene
::Exon
;
77 use base
qw(Bio::SeqFeature::Gene::NC_Feature);
80 my($class,@args) = @_;
82 # introns are non-coding by default
83 if(! grep { lc($_) eq '-is_coding'; } @args) {
84 push(@args, '-is_coding', 0);
86 my $self = $class->SUPER::new
(@args);
88 my ($primary, $prim) =
89 $self->_rearrange([qw(PRIMARY PRIMARY_TAG)],@args);
90 $self->primary_tag('intron') unless $primary || $prim;
98 Usage : $intron->upstream_Exon()
99 Function: exon upstream of the intron
100 Returns : Bio::EnsEMBL::Exon
106 my( $self, $exon ) = @_;
109 $self->{'_intron_location'} = undef;
110 $self->throw("'$exon' is not a Bio::SeqFeature::Gene::ExonI")
111 unless $exon->isa('Bio::SeqFeature::Gene::ExonI');
112 $self->{'_upstream_exon'} = $exon;
114 return $self->{'_upstream_exon'};
118 =head2 downstream_Exon
120 Title : downstream_Exon
121 Usage : $intron->downstream_Exon()
122 Function: exon downstream of the intron
123 Returns : Bio::EnsEMBL::Exon
128 sub downstream_Exon
{
129 my( $self, $exon ) = @_;
132 $self->{'_intron_location'} = undef;
133 $self->throw("'$exon' is not a Bio::SeqFeature::Gene::ExonI")
134 unless $exon->isa('Bio::SeqFeature::Gene::ExonI');
135 $self->{'_downstream_exon'} = $exon;
137 return $self->{'_downstream_exon'};
143 Usage : $intron->phase()
144 Function: returns the phase of the intron(where it interrupts the codon)
152 return $self->downstream_Exon->phase;
156 =head2 acceptor_splice_site
158 Title : acceptor_splice_site
159 Usage : $intron->acceptor_splice_site(21,3)
160 Function: returns the sequence corresponding to the
161 consensus acceptor splice site. If start and
162 end are provided, it will number of base pairs
163 left and right of the canonical AG. Here 21 means
164 21 bp into intron and 3 means 3 bp into the exon.
165 --Intron--21----|AG|-3-----Exon
173 sub acceptor_splice_site
{
174 my ($self,$ss_start,$ss_end) = @_;
175 $ss_start = 21 unless defined $ss_start;
176 $ss_end = 3 unless defined $ss_end;
177 if($self->strand < 0){
182 my $intron_end= $self->location->end;
183 my $down_exon = $self->downstream_Exon;
185 if($self->strand < 0){
186 $ss_start= $ss_start > $down_exon->length ?
$down_exon->length: $ss_start;
187 $ss_end= $ss_end > $self->length-2 ?
$self->length-2 : $ss_end;
188 $acceptor = Bio
::SeqFeature
::Generic
->new(-start
=>$self->start - ($ss_start) ,
189 -end
=>$self->start + ($ss_end+1),
190 -strand
=>$self->strand,
191 -primary_tag
=>"donor splice site");
194 $ss_start = $ss_start > $self->length-2 ?
$self->length-2 : $ss_start;
195 $ss_end = $ss_end > $down_exon->length ?
$down_exon->length : $ss_end;
198 $acceptor = Bio
::SeqFeature
::Generic
->new(-start
=>$self->end - ($ss_start + 1),
199 -end
=>$self->end + $ss_end,
200 -strand
=>$self->strand,
201 -primary_tag
=>"donor splice site");
203 $acceptor->attach_seq($self->entire_seq);
209 =head2 donor_splice_site
211 Title : donor_splice_site
212 Usage : $intron->donor_splice_site(3,6)
213 Function: returns the sequence corresponding to the
214 consensus donor splice site. If start and
215 end are provided, it will number of base pairs
216 left and right of the canonical GT. Here 3 means
217 3 bp into exon and 6 means 6 bp into the intron.
218 --Exon-3--|GT|-6----Intron-
226 sub donor_splice_site
{
227 my ($self,$ss_start,$ss_end) = @_;
228 $ss_start = 3 unless defined $ss_start;
229 $ss_end = 10 unless defined $ss_end;
230 if($self->strand < 0){
235 my $up_exon = $self->upstream_Exon;
237 if($self->strand < 0){
238 $ss_end = $ss_end > $up_exon->length ?
$up_exon->length : $ss_end;
239 $ss_start = $ss_start> $self->length -2 ?
$self->length -2 : $ss_start;
240 $donor = Bio
::SeqFeature
::Generic
->new(-start
=>$self->end - ($ss_start+1),
241 -end
=> $self->end + ($ss_end),
242 -strand
=>$self->strand,
243 -primary_tag
=>"acceptor splice site");
246 $ss_start = $ss_start > $up_exon->length ?
$up_exon->length : $ss_start;
247 $ss_end = $ss_end > $self->length -2 ?
$self->length -2 : $ss_end;
248 $donor = Bio
::SeqFeature
::Generic
->new(-start
=>$self->start - $ss_start,
249 -end
=> $self->start +($ss_end+1),
250 -strand
=>$self->strand,
251 -primary_tag
=>"acceptor splice site");
253 $donor->attach_seq($self->entire_seq);
260 unless ($self->{'_intron_location'}) {
261 my $loc = Bio
::Location
::Simple
->new;
263 my $up_exon = $self->upstream_Exon;
264 my $down_exon = $self->downstream_Exon;
266 # Get the PrimarySeqs attached to both and check it is the same sequence
267 my $up_seq = $up_exon ->entire_seq;
268 my $down_seq = $down_exon->entire_seq;
269 unless (ref($up_seq) eq ref($down_seq) ) {
270 $self->throw("upstream and downstream exons are attached to different sequences\n'$up_seq' and '$down_seq'");
273 # Check that the exons are on the same strand. (Do I need to bother?)
274 my $up_strand = $up_exon ->strand;
275 my $down_strand = $down_exon->strand;
276 unless ($up_strand == $down_strand) {
277 $self->throw("upstream and downstream exons are on different strands "
278 . "('$up_strand' and '$down_strand')");
280 $loc->strand($up_strand);
282 # $exon_end is the end of the exon which is 5' of the intron on the genomic sequence.
283 # $exon_start is the start of the exon which is 3' of the intron on the genomic sequence.
284 my( $exon_end, $exon_start );
285 if ($up_strand == 1) {
286 $exon_end = $up_exon ->end;
287 $exon_start = $down_exon->start;
289 $exon_end = $down_exon->end;
290 $exon_start = $up_exon ->start;
292 unless ($exon_end < $exon_start) {
293 $self->throw("Intron gap begins after '$exon_end' and ends before '$exon_start'");
295 $loc->start($exon_end + 1);
296 $loc->end ($exon_start - 1);
298 # Attach the sequence and location objects to the intron
299 $self->{'_intron_location'} = $loc;
302 return $self->{'_intron_location'};