add SOFA as well
[bioperl-live.git] / lib / Bio / Variation / RNAChange.pm
blob846a6978ac6a581886ca049f3b9536a0d0d5a364
2 # BioPerl module for Bio::Variation::RNAChange
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # Copyright Heikki Lehvaslaiho
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Variation::RNAChange - Sequence change class for RNA level
18 =head1 SYNOPSIS
20 $rnachange = Bio::Variation::RNAChange->new
21 ('-start' => $start,
22 '-end' => $end,
23 '-length' => $len,
24 '-codon_pos' => $cp,
25 '-upStreamSeq' => $upflank,
26 '-dnStreamSeq' => $dnflank,
27 '-proof' => $proof,
28 '-isMutation' => 1,
29 '-mut_number' => $mut_number
31 $a1 = Bio::Variation::Allele->new;
32 $a1->seq('a');
33 $rnachange->allele_ori($a1);
34 my $a2 = Bio::Variation::Allele->new;
35 $a2->seq('t');
36 $rnachange->add_Allele($a2);
37 $rnachange->allele_mut($a2);
39 print "The codon change is ", $rnachange->codon_ori,
40 ">", $rnachange->codon_mut, "\n";
42 # add it to a SeqDiff container object
43 $seqdiff->add_Variant($rnachange);
45 # and create links to and from DNA level mutation objects
46 $rnachange->DNAMutation($dnamut);
47 $dnamut->RNAChange($rnachange);
49 =head1 DESCRIPTION
51 The instantiable class Bio::Variation::DNAMutation describes basic
52 sequence changes at RNA molecule level. It uses methods defined in
53 superclass Bio::Variation::VariantI. See L<Bio::Variation::VariantI>
54 for details.
56 You are normally expected to create a corresponding
57 Bio::Variation::DNAMutation object even if mutation is defined at
58 RNA level. The numbering follows then cDNA numbering. Link the
59 DNAMutation object to the RNAChange object using the method
60 DNAMutation(). If the variation described by a RNAChange object is
61 translated, link the corresponding Bio::Variation::AAChange object
62 to it using method AAChange(). See L<Bio::Variation::DNAMutation> and
63 L<Bio::Variation::AAChange> for more information.
66 =head1 FEEDBACK
68 =head2 Mailing Lists
70 User feedback is an integral part of the evolution of this and other
71 Bioperl modules. Send your comments and suggestions preferably to the
72 Bioperl mailing lists Your participation is much appreciated.
74 bioperl-l@bioperl.org - General discussion
75 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
78 =head2 Support
80 Please direct usage questions or support issues to the mailing list:
82 I<bioperl-l@bioperl.org>
84 rather than to the module maintainer directly. Many experienced and
85 reponsive experts will be able look at the problem and quickly
86 address it. Please include a thorough description of the problem
87 with code and data examples if at all possible.
89 =head2 Reporting Bugs
91 Report bugs to the Bioperl bug tracking system to help us keep track
92 the bugs and their resolution. Bug reports can be submitted via the
93 web:
95 https://github.com/bioperl/bioperl-live/issues
97 =head1 AUTHOR - Heikki Lehvaslaiho
99 Email: heikki-at-bioperl-dot-org
101 =head1 APPENDIX
103 The rest of the documentation details each of the object
104 methods. Internal methods are usually preceded with a _
106 =cut
109 # Let the code begin...
112 package Bio::Variation::RNAChange;
113 use strict;
115 # Object preamble - inheritance
117 use Bio::Tools::CodonTable;
119 use base qw(Bio::Variation::VariantI);
121 sub new {
122 my($class,@args) = @_;
123 my $self = $class->SUPER::new(@args);
125 my ($start, $end, $length, $strand, $primary, $source,
126 $frame, $score, $gff_string,
127 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq,
128 $label, $status, $proof, $region, $region_value, $region_dist, $numbering,
129 $mut_number, $isMutation,
130 $codon_ori, $codon_mut, $codon_pos, $codon_table, $cds_end) =
131 $self->_rearrange([qw(START
133 LENGTH
134 STRAND
135 PRIMARY
136 SOURCE
137 FRAME
138 SCORE
139 GFF_STRING
140 ALLELE_ORI
141 ALLELE_MUT
142 UPSTREAMSEQ
143 DNSTREAMSEQ
144 LABEL
145 STATUS
146 PROOF
147 REGION
148 REGION_VALUE
149 REGION_DIST
150 NUMBERING
151 MUT_NUMBER
152 ISMUTATION
153 CODON_ORI
154 CODON_MUT
155 CODON_POS
156 TRANSLATION_TABLE
157 CDS_END
158 )],@args);
160 $self->primary_tag("Variation");
162 $self->{ 'alleles' } = [];
164 $start && $self->start($start);
165 $end && $self->end($end);
166 $length && $self->length($length);
167 $strand && $self->strand($strand);
168 $primary && $self->primary_tag($primary);
169 $source && $self->source_tag($source);
170 $frame && $self->frame($frame);
171 $score && $self->score($score);
172 $gff_string && $self->_from_gff_string($gff_string);
174 $allele_ori && $self->allele_ori($allele_ori);
175 $allele_mut && $self->allele_mut($allele_mut);
176 $upstreamseq && $self->upStreamSeq($upstreamseq);
177 $dnstreamseq && $self->dnStreamSeq($dnstreamseq);
179 $label && $self->label($label);
180 $status && $self->status($status);
181 $proof && $self->proof($proof);
182 $region && $self->region($region);
183 $region_value && $self->region_value($region_value);
184 $region_dist && $self->region_dist($region_dist);
185 $numbering && $self->numbering($numbering);
186 $mut_number && $self->mut_number($mut_number);
187 $isMutation && $self->isMutation($isMutation);
189 $codon_ori && $self->codon_ori($codon_ori);
190 $codon_mut && $self->codon_mut($codon_mut);
191 $codon_pos && $self->codon_pos($codon_pos);
192 $codon_table && $self->codon_table($codon_table);
193 $cds_end && $self->cds_end($cds_end);
194 return $self; # success - we hope!
198 =head2 codon_ori
200 Title : codon_ori
201 Usage : $obj->codon_ori();
202 Function:
204 Sets and returns codon_ori triplet. If value is not set,
205 creates the codon triplet from the codon position and
206 flanking sequences. The string has to be three characters
207 long. The character content is not checked.
209 Example :
210 Returns : string
211 Args : string
213 =cut
215 sub codon_ori {
216 my ($self,$value) = @_;
217 if (defined $value) {
218 if (length $value != 3) {
219 $self->warn("Codon string \"$value\" is not three characters long");
221 $self->{'codon_ori'} = $value;
223 elsif (! $self->{'codon_ori'}) {
224 my $codon_ori = '';
226 if ($self->region eq 'coding' && $self->start && $self->start >= 1) {
228 $self->warn('Codon position is not defined')
229 if not defined $self->codon_pos;
230 $self->warn('Upstream flanking sequence is not defined')
231 if not defined $self->upStreamSeq;
232 $self->warn('Downstream flanking sequence is not defined')
233 if not defined $self->dnStreamSeq;
235 my $cpos = $self->codon_pos;
236 $codon_ori = substr($self->upStreamSeq, -$cpos +1 , $cpos-1);
237 $codon_ori .= substr($self->allele_ori->seq, 0, 4-$cpos)
238 if $self->allele_ori and $self->allele_ori->seq;
239 $codon_ori .= substr($self->dnStreamSeq, 0, 3-length($codon_ori));
241 $self->{'codon_ori'} = lc $codon_ori;
243 return $self->{'codon_ori'};
247 =head2 codon_mut
249 Title : codon_mut
250 Usage : $obj->codon_mut();
251 Function:
253 Sets and returns codon_mut triplet. If value is not
254 set, creates the codon triplet from the codon position and
255 flanking sequences. Return undef for other than point mutations.
257 Example :
258 Returns : string
259 Args : string
261 =cut
264 sub codon_mut {
265 my ($self,$value) = @_;
266 if (defined $value) {
267 if (length $value != 3 ) {
268 $self->warn("Codon string \"$value\" is not three characters long");
270 $self->{'codon_mut'} = $value;
272 else {
273 my $codon_mut = '';
274 if ($self->allele_ori->seq and $self->allele_mut->seq and
275 CORE::length($self->allele_ori->seq) == 1 and
276 CORE::length($self->allele_mut->seq) == 1 and
277 $self->region eq 'coding' and $self->start >= 1) {
279 $self->warn('Codon position is not defined')
280 if not defined $self->codon_pos;
281 $self->warn('Upstream flanking sequnce is not defined')
282 if not defined $self->upStreamSeq;
283 $self->warn('Downstream flanking sequnce is not defined')
284 if not defined $self->dnStreamSeq;
285 $self->throw('Mutated allele is not defined')
286 if not defined $self->allele_mut;
288 my $cpos = $self->codon_pos;
289 $codon_mut = substr($self->upStreamSeq, -$cpos +1 , $cpos-1);
290 $codon_mut .= substr($self->allele_mut->seq, 0, 4-$cpos)
291 if $self->allele_mut and $self->allele_mut->seq;
292 $codon_mut .= substr($self->dnStreamSeq, 0, 3-length($codon_mut));
294 $self->{'codon_mut'} = lc $codon_mut;
297 return $self->{'codon_mut'};
301 =head2 codon_pos
303 Title : codon_pos
304 Usage : $obj->codon_pos();
305 Function:
307 Sets and returns the position of the mutation start in the
308 codon. If value is not set, returns false.
310 Example :
311 Returns : 1,2,3
312 Args : none if get, the new value if set
314 =cut
317 sub codon_pos {
318 my ($self,$value) = @_;
319 if( defined $value) {
320 if ( $value !~ /[123]/ ) {
321 $self->throw("'$value' is not a valid codon position");
323 $self->{'codon_pos'} = $value;
325 return $self->{'codon_pos'};
329 =head2 codon_table
331 Title : codon_table
332 Usage : $obj->codon_table();
333 Function:
335 Sets and returns the codon table id of the RNA
336 If value is not set, returns 1, 'universal' code, as the default.
338 Example :
339 Returns : integer
340 Args : none if get, the new value if set
342 =cut
345 sub codon_table {
346 my ($self,$value) = @_;
347 if( defined $value) {
348 if ( not $value =~ /^\d$/ ) {
349 $self->throw("'$value' is not a valid codon table ID\n".
350 "Has to be a positive integer. Defaulting to 1\n");
351 } else {
352 $self->{'codon_table'} = $value;
355 if( ! exists $self->{'codon_table'} ) {
356 return 1;
357 } else {
358 return $self->{'codon_table'};
363 =head2 DNAMutation
365 Title : DNAMutation
366 Usage : $mutobj = $obj->DNAMutation;
367 : $mutobj = $obj->DNAMutation($objref);
368 Function: Returns or sets the link-reference to a mutation/change object.
369 If there is no link, it will return undef
370 Returns : an obj_ref or undef
372 =cut
375 sub DNAMutation {
376 my ($self,$value) = @_;
377 if (defined $value) {
378 if( ! $value->isa('Bio::Variation::DNAMutation') ) {
379 $self->throw("Is not a Bio::Variation::DNAMutation object but a [$self]");
380 return;
382 else {
383 $self->{'DNAMutation'} = $value;
386 unless (exists $self->{'DNAMutation'}) {
387 return;
388 } else {
389 return $self->{'DNAMutation'};
394 =head2 AAChange
396 Title : AAChange
397 Usage : $mutobj = $obj->AAChange;
398 : $mutobj = $obj->AAChange($objref);
399 Function: Returns or sets the link-reference to a mutation/change object.
400 If there is no link, it will return undef
401 Returns : an obj_ref or undef
403 =cut
405 sub AAChange {
406 my ($self,$value) = @_;
407 if (defined $value) {
408 if( ! $value->isa('Bio::Variation::AAChange') ) {
409 $self->throw("Is not a Bio::Variation::AAChange object but a [$self]");
410 return;
412 else {
413 $self->{'AAChange'} = $value;
416 unless (exists $self->{'AAChange'}) {
417 return;
418 } else {
419 return $self->{'AAChange'};
424 =head2 exons_modified
426 Title : exons_modified
427 Usage : $modified = $obj->exons_modified;
428 : $modified = $obj->exons_modified(1);
429 Function: Returns or sets information (example: a simple boolean flag) about
430 the modification of exons as a result of a mutation.
432 =cut
434 sub exons_modified {
435 my ($self,$value)=@_;
436 if (defined($value)) {
437 $self->{'exons_modified'}=$value;
439 return ($self->{'exons_modified'});
442 =head2 region
444 Title : region
445 Usage : $obj->region();
446 Function:
448 Sets and returns the name of the sequence region type or
449 protein domain at this location. If value is not set,
450 returns false.
452 Example :
453 Returns : string
454 Args : string
456 =cut
460 sub region {
461 my ($self,$value) = @_;
462 if( defined $value) {
463 $self->{'region'} = $value;
465 elsif (not defined $self->{'region'}) {
467 $self->warn('Mutation start position is not defined')
468 if not defined $self->start and $self->verbose;
469 $self->warn('Mutation end position is not defined')
470 if not defined $self->end and $self->verbose;
471 $self->warn('Length of the CDS is not defined, the mutation can be beyond coding region!')
472 if not defined $self->cds_end and $self->verbose;
474 $self->region('coding');
475 if ($self->end && $self->end < 0 ){
476 $self->region('5\'UTR');
478 elsif ($self->start && $self->cds_end && $self->start > $self->cds_end ) {
479 $self->region('3\'UTR');
482 return $self->{'region'};
485 =head2 cds_end
487 Title : cds_end
488 Usage : $cds_end = $obj->get_cds_end();
489 Function:
491 Sets or returns the cds_end from the beginning of the DNA sequence
492 to the coordinate start used to describe variants.
493 Should be the location of the last nucleotide of the
494 terminator codon of the gene.
496 Example :
497 Returns : value of cds_end, a scalar
498 Args :
500 =cut
504 sub cds_end {
505 my ($self, $value) = @_;
506 if (defined $value) {
507 $self->warn("[$value] is not a good value for sequence position")
508 if not $value =~ /^\d+$/ ;
509 $self->{'cds_end'} = $value;
510 } else {
511 $self->{'cds_end'} = $self->SeqDiff->cds_end if $self->SeqDiff;
513 return $self->{'cds_end'};
517 =head2 label
519 Title : label
520 Usage : $obj->label();
521 Function:
523 Sets and returns mutation event label(s). If value is not
524 set, or no argument is given returns false. Each
525 instantiable subclass of L<Bio::Variation::VariantI> needs
526 to implement this method. Valid values are listed in
527 'Mutation event controlled vocabulary' in
528 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
530 Example :
531 Returns : string
532 Args : string
534 =cut
536 sub label {
537 my ($self) = @_;
538 my ($o, $m, $type);
539 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
540 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
542 my $ct = Bio::Tools::CodonTable -> new ( -id => $self->codon_table );
543 if ($o and $m and CORE::length($o) == 1 and CORE::length($m) == 1) {
544 if (defined $self->AAChange) {
545 if ($self->start > 0 and $self->start < 4 ) {
546 $type = 'initiation codon';
548 elsif ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) {
549 #AAChange->allele_ori and $self->AAChange->allele_ori->seq eq '*' ) {
550 $type = 'termination codon';
552 elsif ($self->codon_mut && $ct->is_ter_codon($self->codon_mut) ) {
553 #elsif ($self->AAChange->allele_mut and $self->AAChange->allele_mut->seq eq "*") {
554 $type = 'nonsense';
556 elsif ($o and $m and ($o eq $m or
557 $self->AAChange->allele_ori->seq eq
558 $self->AAChange->allele_mut->seq)) {
559 $type = 'silent';
560 } else {
561 $type = 'missense';
563 } else {
564 $type = 'unknown';
566 } else {
567 my $len = 0;
568 $len = CORE::length($o) if $o;
569 $len -= CORE::length($m) if $m;
570 if ($len%3 == 0 ) {
571 $type = 'inframe';
572 } else {
573 $type = 'frameshift';
575 if (not $m ) {
576 $type .= ', '. 'deletion';
578 elsif (not $o ) {
579 $type .= ', '. 'insertion';
581 else {
582 $type .= ', '. 'complex';
584 if ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) {
585 $type .= ', '. 'termination codon';
589 $self->{'label'} = $type;
590 return $self->{'label'};
594 =head2 _change_codon_pos
596 Title : _change_codon_pos
597 Usage : $newCodonPos = _change_codon_pos($myCodonPos, 5)
598 Function:
600 Keeps track of the codon position in a changeing sequence
602 Returns : codon_pos = integer 1, 2 or 3
603 Args : valid codon position
604 signed integer offset to a new location in sequence
606 =cut
609 sub _change_codon_pos ($$) {
610 my ($cpos, $i) = @_;
612 $cpos = ($cpos + $i%3)%3;
613 if ($cpos > 3 ) {
614 $cpos = $cpos - 3;
616 elsif ($cpos < 1 ) {
617 $cpos = $cpos + 3;
619 return $cpos;