2 # BioPerl module for Bio::SeqFeature::Gene::Intron
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by David Block <dblock@gene.pbi.nrc.ca>
8 # Copyright David Block
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SeqFeature::Gene::Intron - An intron feature
20 Give standard usage here
24 Describe the object here
30 User feedback is an integral part of the evolution of this and other
31 Bioperl modules. Send your comments and suggestions preferably to
32 the Bioperl mailing list. Your participation is much appreciated.
34 bioperl-l@bioperl.org - General discussion
35 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39 Please direct usage questions or support issues to the mailing list:
41 I<bioperl-l@bioperl.org>
43 rather than to the module maintainer directly. Many experienced and
44 reponsive experts will be able look at the problem and quickly
45 address it. Please include a thorough description of the problem
46 with code and data examples if at all possible.
50 Report bugs to the Bioperl bug tracking system to help us keep track
51 of the bugs and their resolution. Bug reports can be submitted via the
54 https://github.com/bioperl/bioperl-live/issues
56 =head1 AUTHOR - David Block
58 Email dblock@gene.pbi.nrc.ca
62 The rest of the documentation details each of the object methods.
63 Internal methods are usually preceded with a _
68 # Let the code begin...
71 package Bio
::SeqFeature
::Gene
::Intron
;
74 use Bio
::SeqFeature
::Gene
::Exon
;
76 use base
qw(Bio::SeqFeature::Gene::NC_Feature);
79 my($class,@args) = @_;
81 # introns are non-coding by default
82 if(! grep { lc($_) eq '-is_coding'; } @args) {
83 push(@args, '-is_coding', 0);
85 my $self = $class->SUPER::new
(@args);
87 my ($primary, $prim) =
88 $self->_rearrange([qw(PRIMARY PRIMARY_TAG)],@args);
89 $self->primary_tag('intron') unless $primary || $prim;
97 Usage : $intron->upstream_Exon()
98 Function: exon upstream of the intron
99 Returns : Bio::EnsEMBL::Exon
105 my( $self, $exon ) = @_;
108 $self->{'_intron_location'} = undef;
109 $self->throw("'$exon' is not a Bio::SeqFeature::Gene::ExonI")
110 unless $exon->isa('Bio::SeqFeature::Gene::ExonI');
111 $self->{'_upstream_exon'} = $exon;
113 return $self->{'_upstream_exon'};
117 =head2 downstream_Exon
119 Title : downstream_Exon
120 Usage : $intron->downstream_Exon()
121 Function: exon downstream of the intron
122 Returns : Bio::EnsEMBL::Exon
127 sub downstream_Exon
{
128 my( $self, $exon ) = @_;
131 $self->{'_intron_location'} = undef;
132 $self->throw("'$exon' is not a Bio::SeqFeature::Gene::ExonI")
133 unless $exon->isa('Bio::SeqFeature::Gene::ExonI');
134 $self->{'_downstream_exon'} = $exon;
136 return $self->{'_downstream_exon'};
142 Usage : $intron->phase()
143 Function: returns the phase of the intron(where it interrupts the codon)
151 return $self->downstream_Exon->phase;
155 =head2 acceptor_splice_site
157 Title : acceptor_splice_site
158 Usage : $intron->acceptor_splice_site(21,3)
159 Function: returns the sequence corresponding to the
160 consensus acceptor splice site. If start and
161 end are provided, it will number of base pairs
162 left and right of the canonical AG. Here 21 means
163 21 bp into intron and 3 means 3 bp into the exon.
164 --Intron--21----|AG|-3-----Exon
172 sub acceptor_splice_site
{
173 my ($self,$ss_start,$ss_end) = @_;
174 $ss_start = 21 unless defined $ss_start;
175 $ss_end = 3 unless defined $ss_end;
176 if($self->strand < 0){
181 my $intron_end= $self->location->end;
182 my $down_exon = $self->downstream_Exon;
184 if($self->strand < 0){
185 $ss_start= $ss_start > $down_exon->length ?
$down_exon->length: $ss_start;
186 $ss_end= $ss_end > $self->length-2 ?
$self->length-2 : $ss_end;
187 $acceptor = Bio
::SeqFeature
::Generic
->new(-start
=>$self->start - ($ss_start) ,
188 -end
=>$self->start + ($ss_end+1),
189 -strand
=>$self->strand,
190 -primary_tag
=>"donor splice site");
193 $ss_start = $ss_start > $self->length-2 ?
$self->length-2 : $ss_start;
194 $ss_end = $ss_end > $down_exon->length ?
$down_exon->length : $ss_end;
197 $acceptor = Bio
::SeqFeature
::Generic
->new(-start
=>$self->end - ($ss_start + 1),
198 -end
=>$self->end + $ss_end,
199 -strand
=>$self->strand,
200 -primary_tag
=>"donor splice site");
202 $acceptor->attach_seq($self->entire_seq);
208 =head2 donor_splice_site
210 Title : donor_splice_site
211 Usage : $intron->donor_splice_site(3,6)
212 Function: returns the sequence corresponding to the
213 consensus donor splice site. If start and
214 end are provided, it will number of base pairs
215 left and right of the canonical GT. Here 3 means
216 3 bp into exon and 6 means 6 bp into the intron.
217 --Exon-3--|GT|-6----Intron-
225 sub donor_splice_site
{
226 my ($self,$ss_start,$ss_end) = @_;
227 $ss_start = 3 unless defined $ss_start;
228 $ss_end = 10 unless defined $ss_end;
229 if($self->strand < 0){
234 my $up_exon = $self->upstream_Exon;
236 if($self->strand < 0){
237 $ss_end = $ss_end > $up_exon->length ?
$up_exon->length : $ss_end;
238 $ss_start = $ss_start> $self->length -2 ?
$self->length -2 : $ss_start;
239 $donor = Bio
::SeqFeature
::Generic
->new(-start
=>$self->end - ($ss_start+1),
240 -end
=> $self->end + ($ss_end),
241 -strand
=>$self->strand,
242 -primary_tag
=>"acceptor splice site");
245 $ss_start = $ss_start > $up_exon->length ?
$up_exon->length : $ss_start;
246 $ss_end = $ss_end > $self->length -2 ?
$self->length -2 : $ss_end;
247 $donor = Bio
::SeqFeature
::Generic
->new(-start
=>$self->start - $ss_start,
248 -end
=> $self->start +($ss_end+1),
249 -strand
=>$self->strand,
250 -primary_tag
=>"acceptor splice site");
252 $donor->attach_seq($self->entire_seq);
259 unless ($self->{'_intron_location'}) {
260 my $loc = Bio
::Location
::Simple
->new;
262 my $up_exon = $self->upstream_Exon;
263 my $down_exon = $self->downstream_Exon;
265 # Get the PrimarySeqs attached to both and check it is the same sequence
266 my $up_seq = $up_exon ->entire_seq;
267 my $down_seq = $down_exon->entire_seq;
268 unless (ref($up_seq) eq ref($down_seq) ) {
269 $self->throw("upstream and downstream exons are attached to different sequences\n'$up_seq' and '$down_seq'");
272 # Check that the exons are on the same strand. (Do I need to bother?)
273 my $up_strand = $up_exon ->strand;
274 my $down_strand = $down_exon->strand;
275 unless ($up_strand == $down_strand) {
276 $self->throw("upstream and downstream exons are on different strands "
277 . "('$up_strand' and '$down_strand')");
279 $loc->strand($up_strand);
281 # $exon_end is the end of the exon which is 5' of the intron on the genomic sequence.
282 # $exon_start is the start of the exon which is 3' of the intron on the genomic sequence.
283 my( $exon_end, $exon_start );
284 if ($up_strand == 1) {
285 $exon_end = $up_exon ->end;
286 $exon_start = $down_exon->start;
288 $exon_end = $down_exon->end;
289 $exon_start = $up_exon ->start;
291 unless ($exon_end < $exon_start) {
292 $self->throw("Intron gap begins after '$exon_end' and ends before '$exon_start'");
294 $loc->start($exon_end + 1);
295 $loc->end ($exon_start - 1);
297 # Attach the sequence and location objects to the intron
298 $self->{'_intron_location'} = $loc;
301 return $self->{'_intron_location'};