Bio::DB::Universal: move into its own distribution
[bioperl-live.git] / Bio / Variation / AAChange.pm
blob259fc21930981f8ca15fc3deb6584db362b50c63
2 # BioPerl module for Bio::Variation::AAChange
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # Copyright Heikki Lehvaslaiho
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Variation::AAChange - Sequence change class for polypeptides
18 =head1 SYNOPSIS
20 $aamut = Bio::Variation::AAChange->new
21 ('-start' => $start,
22 '-end' => $end,
23 '-length' => $len,
24 '-proof' => $proof,
25 '-isMutation' => 1,
26 '-mut_number' => $mut_number
29 my $a1 = Bio::Variation::Allele->new;
30 $a1->seq($ori) if $ori;
31 $aamut->allele_ori($a1);
32 my $a2 = Bio::Variation::Allele->new;
33 $a2->seq($mut) if $mut;
34 $aachange->add_Allele($a2);
35 $aachange->allele_mut($a2);
37 print "\n";
39 # add it to a SeqDiff container object
40 $seqdiff->add_Variant($rnachange);
42 # and create links to and from RNA level variant objects
43 $aamut->RNAChange($rnachange);
44 $rnachange->AAChange($rnachange);
46 =head1 DESCRIPTION
48 The instantiable class Bio::Variation::RNAChange describes basic
49 sequence changes at polypeptide level. It uses methods defined in
50 superclass Bio::Variation::VariantI, see L<Bio::Variation::VariantI>
51 for details.
53 If the variation described by a AAChange object has a known
54 Bio::Variation::RNAAChange object, create the link with method
55 AAChange(). See L<Bio::Variation::AAChange> for more information.
57 =head1 FEEDBACK
59 =head2 Mailing Lists
61 User feedback is an integral part of the evolution of this and other
62 Bioperl modules. Send your comments and suggestions preferably to the
63 Bioperl mailing lists Your participation is much appreciated.
65 bioperl-l@bioperl.org - General discussion
66 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
68 =head2 Support
70 Please direct usage questions or support issues to the mailing list:
72 I<bioperl-l@bioperl.org>
74 rather than to the module maintainer directly. Many experienced and
75 reponsive experts will be able look at the problem and quickly
76 address it. Please include a thorough description of the problem
77 with code and data examples if at all possible.
79 =head2 Reporting Bugs
81 Report bugs to the Bioperl bug tracking system to help us keep track
82 the bugs and their resolution. Bug reports can be submitted via the
83 web:
85 https://github.com/bioperl/bioperl-live/issues
87 =head1 AUTHOR - Heikki Lehvaslaiho
89 Email: heikki-at-bioperl-dot-org
91 =head1 APPENDIX
93 The rest of the documentation details each of the object
94 methods. Internal methods are usually preceded with a _
96 =cut
99 # Let the code begin...
102 package Bio::Variation::AAChange;
104 use vars qw($MATRIX);
105 use strict;
107 # Object preamble - inheritance
109 use base qw(Bio::Variation::VariantI);
111 BEGIN {
113 my $matrix = << "__MATRIX__";
114 # Matrix made by matblas from blosum62.iij
115 # * column uses minimum score
116 # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
117 # Blocks Database = /data/blocks_5.0/blocks.dat
118 # Cluster Percentage: >= 62
119 # Entropy = 0.6979, Expected = -0.5209
120 A R N D C Q E G H I L K M F P S T W Y V B Z X *
121 A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
122 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
123 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
124 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
125 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
126 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
127 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
128 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
129 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
130 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
131 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
132 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
133 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
134 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
135 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
136 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
137 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
138 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
139 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
140 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
141 B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
142 Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
143 X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
144 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
145 __MATRIX__
147 my %blosum = ();
148 $matrix =~ /^ +(.+)$/m;
149 my @aas = split / +/, $1;
150 foreach my $aa (@aas) {
151 my $tmp = $aa;
152 $tmp = "\\$aa" if $aa eq '*';
153 $matrix =~ /^($tmp) +([-+]?\d.*)$/m;
154 my @scores = split / +/, $2 if defined $2;
155 my $count = 0;
156 foreach my $ak (@aas) {
157 $blosum{$aa}->{$aas[$count]} = $scores[$count];
158 $count++;
161 sub _matrix;
162 $MATRIX = \%blosum;
165 sub new {
166 my($class,@args) = @_;
167 my $self = $class->SUPER::new(@args);
169 my ($start, $end, $length, $strand, $primary, $source,
170 $frame, $score, $gff_string,
171 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq,
172 $label, $status, $proof, $re_changes, $region, $region_value,
173 $region_dist,
174 $numbering, $mut_number, $ismutation) =
175 $self->_rearrange([qw(START
177 LENGTH
178 STRAND
179 PRIMARY
180 SOURCE
181 FRAME
182 SCORE
183 GFF_STRING
184 ALLELE_ORI
185 ALLELE_MUT
186 UPSTREAMSEQ
187 DNSTREAMSEQ
188 LABEL
189 STATUS
190 PROOF
191 RE_CHANGES
192 REGION
193 REGION_VALUE
194 REGION_DIST
195 NUMBERING
196 MUT_NUMBER
197 ISMUTATION
198 )],@args);
200 $self->primary_tag("Variation");
202 $self->{ 'alleles' } = [];
204 $start && $self->start($start);
205 $end && $self->end($end);
206 $length && $self->length($length);
207 $strand && $self->strand($strand);
208 $primary && $self->primary_tag($primary);
209 $source && $self->source_tag($source);
210 $frame && $self->frame($frame);
211 $score && $self->score($score);
212 $gff_string && $self->_from_gff_string($gff_string);
214 $allele_ori && $self->allele_ori($allele_ori);
215 $allele_mut && $self->allele_mut($allele_mut);
216 $upstreamseq && $self->upstreamseq($upstreamseq);
217 $dnstreamseq && $self->dnstreamseq($dnstreamseq);
219 $label && $self->label($label);
220 $status && $self->status($status);
221 $proof && $self->proof($proof);
222 $region && $self->region($region);
223 $region_value && $self->region_value($region_value);
224 $region_dist && $self->region_dist($region_dist);
225 $numbering && $self->numbering($numbering);
226 $mut_number && $self->mut_number($mut_number);
227 $ismutation && $self->isMutation($ismutation);
229 return $self; # success - we hope!
232 =head2 RNAChange
234 Title : RNAChange
235 Usage : $mutobj = $self->RNAChange;
236 : $mutobj = $self->RNAChange($objref);
237 Function: Returns or sets the link-reference to a mutation/change object.
238 If there is no link, it will return undef
239 Returns : an obj_ref or undef
241 =cut
243 sub RNAChange {
244 my ($self,$value) = @_;
245 if (defined $value) {
246 if( ! $value->isa('Bio::Variation::RNAChange') ) {
247 $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]");
248 return;
250 else {
251 $self->{'RNAChange'} = $value;
254 unless (exists $self->{'RNAChange'}) {
255 return;
256 } else {
257 return $self->{'RNAChange'};
263 =head2 label
265 Title : label
266 Usage : $obj->label();
267 Function:
269 Sets and returns mutation event label(s). If value is not
270 set, or no argument is given returns false. Each
271 instantiable subclass of L<Bio::Variation::VariantI> needs
272 to implement this method. Valid values are listed in
273 'Mutation event controlled vocabulary' in
274 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
276 Example :
277 Returns : string
278 Args : string
280 =cut
283 sub label {
284 my ($self) = @_;
285 my ($o, $m, $type);
286 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
287 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
289 if ($self->start == 1 ) {
290 if ($o and substr($o, 0, 1) ne substr($m, 0, 1)) {
291 $type = 'no translation';
293 elsif ($o and $m and $o eq $m ) {
294 $type = 'silent';
296 # more ...
298 elsif ($o and substr($o, 0, 1) eq '*' ) {
299 if ($m and substr($o, 0, 1) ne substr($m, 0, 1)) {
300 $type = 'post-elongation';
302 elsif ($m and $o eq $m ) {
303 $type = 'silent, conservative';
306 elsif ($o and $m and $o eq $m) {
307 $type = 'silent, conservative';
309 elsif ($m and $m eq '*') {
310 $type = 'truncation';
312 elsif ($o and $m and $o eq $m) {
313 $type = 'silent, conservative';
315 elsif (not $m or
316 ($o and $m and length($o) > length($m) and
317 substr($m, -1, 1) ne '*')) {
318 $type = 'deletion';
319 if ($o and $m and $o !~ $m and $o !~ $m) {
320 $type .= ', complex';
323 elsif (not $o or
324 ($o and $m and length($o) < length($m) and
325 substr($m, -1, 1) ne '*' ) ) {
326 $type = 'insertion';
327 if ($o and $m and $o !~ $m and $o !~ $m) {
328 $type .= ', complex';
331 elsif ($o and $m and $o ne $m and
332 length $o == 1 and length $m == 1 ) {
333 $type = 'substitution';
334 my $value = $self->similarity_score;
335 if (defined $value) {
336 my $cons = ($value < 0) ? 'nonconservative' : 'conservative';
337 $type .= ", ". $cons;
339 } else {
340 $type = 'out-of-frame translation, truncation';
342 $self->{'label'} = $type;
343 return $self->{'label'};
347 =head2 similarity_score
349 Title : similarity_score
350 Usage : $self->similarity_score
351 Function: Measure for evolutionary conservativeness
352 of single amino substitutions. Uses BLOSUM62.
353 Negative numbers are noncoservative changes.
354 Returns : integer, undef if not single amino acid change
356 =cut
358 sub similarity_score {
359 my ($self) = @_;
360 my ($o, $m, $type);
361 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
362 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
363 return unless $o and $m and length $o == 1 and length $m == 1;
364 return unless $o =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i and
365 $m =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i;
366 return $MATRIX->{"\U$o"}->{"\U$m"};
369 =head2 trivname
371 Title : trivname
372 Usage : $self->trivname
373 Function:
375 Given a Bio::Variation::AAChange object with linked
376 Bio::Variation::RNAChange and Bio::Variation::DNAMutation
377 objects, this subroutine creates a string corresponding to
378 the 'trivial name' of the mutation. Trivial name is
379 specified in Antonorakis & MDI Nomenclature Working Group:
380 Human Mutation 11:1-3, 1998.
382 Returns : string
384 =cut
387 sub trivname {
388 my ($self,$value) = @_;
389 if( defined $value) {
390 $self->{'trivname'} = $value;
391 } else {
392 my ( $aaori, $aamut,$aamutsymbol, $aatermnumber, $aamutterm) =
393 ('', '', '', '', '');
394 my $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
395 #my $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
397 $aaori = substr ($o, 0, 1) if $o;
398 $aaori =~ tr/\*/X/;
400 my $sep;
401 if ($self->isMutation) {
402 $sep = '>';
403 } else {
404 $sep = '|';
406 my $trivname = $aaori. $self->start;
407 $trivname .= $sep if $sep eq '|';
409 my @alleles = $self->each_Allele;
410 foreach my $allele (@alleles) {
411 my $m = $allele->seq if $allele->seq;
413 $self->allele_mut($allele);
414 #$trivname .= $sep. uc $m if $m;
416 $aamutterm = substr ($m, -1, 1) if $m;
417 if ($self->RNAChange->label =~ /initiation codon/ and
418 ( $o and $m and $o ne $m)) {
419 $aamut = 'X';
421 elsif (CORE::length($o) == 1 and CORE::length($m) == 1 ) {
422 $aamutsymbol = '';
423 $aamut = $aamutterm;
425 elsif ($self->RNAChange->label =~ /deletion/) {
426 $aamutsymbol = 'del';
427 if ($aamutterm eq '*') {
428 $aatermnumber = $self->start + length($m) -1;
429 $aamut = 'X'. $aatermnumber;
431 if ($self->RNAChange && $self->RNAChange->label =~ /inframe/){
432 $aamut = '-'. length($self->RNAChange->allele_ori->seq)/3 ;
435 elsif ($self->RNAChange->label =~ /insertion/) {
436 $aamutsymbol = 'ins';
437 if (($aamutterm eq '*') && (length($m)-1 != 0)) {
438 $aatermnumber = $self->start + length($m)-1;
439 $aamut = $aatermnumber. 'X';
441 if ($self->RNAChange->label =~ /inframe/){
442 $aamut = '+'. int length($self->RNAChange->allele_mut->seq)/3 ;
445 elsif ($self->RNAChange->label =~ /complex/ ) {
446 my $diff = length($m) - length($o);
447 if ($diff >= 0 ) {
448 $aamutsymbol = 'ins';
449 } else {
450 $aamutsymbol = 'del' ;
452 if (($aamutterm eq '*') && (length($m)-1 != 0)) {
453 $aatermnumber = $self->start + length($m)-1;
454 $aamut = $aatermnumber. 'X';
456 if ($self->RNAChange->label =~ /inframe/){
458 if ($diff >= 0 ) {
459 $aamut = '+'. $diff ;
460 } else {
461 $aamut = $diff ;
465 elsif ($self->label =~ /truncation/) {
466 $aamut = $m;
467 } else {
468 $aamutsymbol = '';
469 $aamut = $aamutterm;
471 $aamut =~ tr/\*/X/;
472 $trivname .= $aamutsymbol. $aamut. $sep;
474 chop $trivname;
475 $self->{'trivname'} = $trivname;
477 return $self->{'trivname'};