1 # bioperl module for Bio::Variation::SeqDiff
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
17 Bio::Variation::SeqDiff - Container class for mutation/variant descriptions
21 $seqDiff = Bio::Variation::SeqDiff->new (
26 -numbering => 'coding'
28 # get a DNAMutation object somehow
29 $seqDiff->add_Variant($dnamut);
30 print $seqDiff->sys_name(), "\n";
34 SeqDiff stores Bio::Variation::VariantI object references and
35 descriptive information common to all changes in a sequence. Mutations
36 are understood to be any kind of sequence markers and are expected to
37 occur in the same chromosome. See L<Bio::Variation::VariantI> for details.
39 The methods of SeqDiff are geared towards describing mutations in
40 human genes using gene-based coordinate system where 'A' of the
41 initiator codon has number 1 and the one before it -1. This is
42 according to conventions of human genetics.
44 There will be class Bio::Variation::Genotype to describe markers in
45 different chromosomes and diploid genototypes.
47 Classes implementing Bio::Variation::VariantI interface are
48 Bio::Variation::DNAMutation, Bio::Variation::RNAChange, and
49 Bio::Variation::AAChange. See L<Bio::Variation::VariantI>,
50 L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>, and
51 L<Bio::Variation::AAChange> for more information.
53 Variant objects can be added using two ways: an array passed to the
54 constructor or as individual Variant objects with add_Variant
62 User feedback is an integral part of the evolution of this and other
63 Bioperl modules. Send your comments and suggestions preferably to the
64 Bioperl mailing lists Your participation is much appreciated.
66 bioperl-l@bioperl.org - General discussion
67 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
72 Please direct usage questions or support issues to the mailing list:
74 I<bioperl-l@bioperl.org>
76 rather than to the module maintainer directly. Many experienced and
77 reponsive experts will be able look at the problem and quickly
78 address it. Please include a thorough description of the problem
79 with code and data examples if at all possible.
83 Report bugs to the Bioperl bug tracking system to help us keep track
84 the bugs and their resolution. Bug reports can be submitted via the
87 https://github.com/bioperl/bioperl-live/issues
89 =head1 AUTHOR - Heikki Lehvaslaiho
91 Email: heikki-at-bioperl-dot-org
95 Eckhard Lehmann, ecky@e-lehmann.de
99 The rest of the documentation details each of the object
100 methods. Internal methods are usually preceded with a _
104 # Let the code begin...
106 package Bio
::Variation
::SeqDiff
;
109 use Bio
::Tools
::CodonTable
;
112 use base
qw(Bio::Root::Root);
118 Usage : $seqDiff = Bio::Variation::SeqDiff->new;
119 Function: generates a new Bio::Variation::SeqDiff
120 Returns : reference to a new object of class SeqDiff
126 my($class,@args) = @_;
127 my $self = $class->SUPER::new
(@args);
129 my($id, $sysname, $trivname, $chr, $gene_symbol,
130 $desc, $alphabet, $numbering, $offset, $rna_offset, $rna_id, $cds_end,
131 $dna_ori, $dna_mut, $rna_ori, $rna_mut, $aa_ori, $aa_mut
134 $self->_rearrange([qw(ID
154 #my $make = $self->SUPER::_initialize(@args);
156 $id && $self->id($id);
157 $sysname && $self->sysname($sysname);
158 $trivname && $self->trivname($trivname);
159 $chr && $self->chromosome($chr);
160 $gene_symbol && $self->gene_symbol($chr);
161 $desc && $self->description($desc);
162 $alphabet && $self->alphabet($alphabet);
163 $numbering && $self->numbering($numbering);
164 $offset && $self->offset($offset);
165 $rna_offset && $self->rna_offset($rna_offset);
166 $rna_id && $self->rna_id($rna_id);
167 $cds_end && $self->cds_end($cds_end);
169 $dna_ori && $self->dna_ori($dna_ori);
170 $dna_mut && $self->dna_mut($dna_mut);
171 $rna_ori && $self->rna_ori($rna_ori);
172 $rna_mut && $self->rna_mut($rna_mut);
173 $aa_ori && $self->aa_ori ($aa_ori);
174 $aa_mut && $self->aa_mut ($aa_mut);
176 $self->{ 'variants' } = [];
177 #@variants && push(@{$self->{'variants'}},@variants);
179 $self->{ 'genes' } = [];
180 #@genes && push(@{$self->{'genes'}},@genes);
182 return $self; # success - we hope!
189 Usage : $obj->id(H0001); $id = $obj->id();
192 Sets or returns the id of the seqDiff.
193 Should be used to give the collection of variants a UID
194 without semantic associations.
197 Returns : value of id, a scalar
198 Args : newvalue (optional)
204 my ($self,$value) = @_;
205 if (defined $value) {
206 $self->{'id'} = $value;
209 return $self->{'id'};
217 Usage : $obj->sysname('5C>G'); $sysname = $obj->sysname();
220 Sets or returns the systematic name of the seqDiff. The
221 name should follow the HUGO Mutation Database Initiative
222 approved nomenclature. If called without first setting the
223 value, will generate it from L<Bio::Variation::DNAMutation>
227 Returns : value of sysname, a scalar
228 Args : newvalue (optional)
234 my ($self,$value) = @_;
235 if (defined $value) {
236 $self->{'sysname'} = $value;
238 elsif (not defined $self->{'sysname'}) {
242 foreach my $mut ($self->each_Variant) {
243 if( $mut->isa('Bio::Variation::DNAMutation') ) {
246 $sysname = $mut->sysname ;
249 $sysname .= ";". $mut->sysname;
253 $sysname = "[". $sysname. "]" if $c > 1;
254 $self->{'sysname'} = $sysname;
256 return $self->{'sysname'};
263 Usage : $obj->trivname('[A2G;T56G]'); $trivname = $obj->trivname();
266 Sets or returns the trivial name of the seqDiff.
267 The name should follow the HUGO Mutation Database Initiative
268 approved nomenclature. If called without first setting the
269 value, will generate it from L<Bio::Variation::AAChange>
273 Returns : value of trivname, a scalar
274 Args : newvalue (optional)
280 my ($self,$value) = @_;
281 if (defined $value) {
282 $self->{'trivname'} = $value;
284 elsif (not defined $self->{'trivname'}) {
288 foreach my $mut ($self->each_Variant) {
289 if( $mut->isa('Bio::Variation::AAChange') ) {
292 $trivname = $mut->trivname ;
295 $trivname .= ";". $mut->trivname;
299 $trivname = "[". $trivname. "]" if $c > 1;
300 $self->{'trivname'} = $trivname;
304 return $self->{'trivname'};
312 Usage : $obj->chromosome('X'); $chromosome = $obj->chromosome();
315 Sets or returns the chromosome ("linkage group") of the seqDiff.
318 Returns : value of chromosome, a scalar
319 Args : newvalue (optional)
325 my ($self,$value) = @_;
326 if (defined $value) {
327 $self->{'chromosome'} = $value;
330 return $self->{'chromosome'};
338 Usage : $obj->gene_symbol('FOS'); $gene_symbol = $obj->gene_symbol;
341 Sets or returns the gene symbol for the studied CDS.
344 Returns : value of gene_symbol, a scalar
345 Args : newvalue (optional)
351 my ($self,$value) = @_;
352 if (defined $value) {
353 $self->{'gene_symbol'} = $value;
356 return $self->{'gene_symbol'};
365 Usage : $obj->description('short description'); $descr = $obj->description();
368 Sets or returns the short description of the seqDiff.
371 Returns : value of description, a scalar
372 Args : newvalue (optional)
378 my ($self,$value) = @_;
379 if (defined $value) {
380 $self->{'description'} = $value;
383 return $self->{'description'};
391 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
392 Function: Returns the type of primary reference sequence being one of
393 'dna', 'rna' or 'protein'. This is case sensitive.
395 Returns : a string either 'dna','rna','protein'.
402 my ($self,$value) = @_;
403 my %type = (dna
=> 1,
406 if( defined $value ) {
408 $self->{'alphabet'} = $value;
410 $self->throw("$value is not valid alphabet value!");
413 return $self->{'alphabet'};
420 Usage : $obj->numbering('coding'); $numbering = $obj->numbering();
423 Sets or returns the string giving the numbering schema used
424 to describe the variants.
427 Returns : value of numbering, a scalar
428 Args : newvalue (optional)
435 my ($self,$value) = @_;
436 if (defined $value) {
437 $self->{'numbering'} = $value;
440 return $self->{'numbering'};
448 Usage : $obj->offset(124); $offset = $obj->offset();
451 Sets or returns the offset from the beginning of the DNA sequence
452 to the coordinate start used to describe variants. Typically
453 the beginning of the coding region of the gene.
454 The cds_start should be 1 + offset.
457 Returns : value of offset, a scalar
458 Args : newvalue (optional)
465 my ($self,$value) = @_;
466 if (defined $value) {
467 $self->{'offset'} = $value;
469 elsif (not defined $self->{'offset'} ) {
470 return $self->{'offset'} = 0;
473 return $self->{'offset'};
481 Usage : $obj->cds_start(123); $cds_start = $obj->cds_start();
484 Sets or returns the cds_start from the beginning of the DNA
485 sequence to the coordinate start used to describe
486 variants. Typically the beginning of the coding region of
487 the gene. Needs to be and is implemented as 1 + offset.
490 Returns : value of cds_start, a scalar
491 Args : newvalue (optional)
498 my ($self,$value) = @_;
499 if (defined $value) {
500 $self->{'offset'} = $value - 1;
503 return $self->{'offset'} + 1;
511 Usage : $obj->cds_end(321); $cds_end = $obj->cds_end();
514 Sets or returns the position of the last nucleotitide of the
515 termination codon. The coordinate system starts from cds_start.
518 Returns : value of cds_end, a scalar
519 Args : newvalue (optional)
526 my ($self,$value) = @_;
527 if (defined $value) {
528 $self->{'cds_end'} = $value;
531 return $self->{'cds_end'};
532 #$self->{'cds_end'} = CORE::length($self->SeqDiff->rna_ori)/3;
540 Usage : $obj->rna_offset(124); $rna_offset = $obj->rna_offset();
543 Sets or returns the rna_offset from the beginning of the RNA sequence
544 to the coordinate start used to describe variants. Typically
545 the beginning of the coding region of the gene.
548 Returns : value of rna_offset, a scalar
549 Args : newvalue (optional)
556 my ($self,$value) = @_;
557 if (defined $value) {
558 $self->{'rna_offset'} = $value;
560 elsif (not defined $self->{'rna_offset'} ) {
561 return $self->{'rna_offset'} = 0;
564 return $self->{'rna_offset'};
572 Usage : $obj->rna_id('transcript#3'); $rna_id = $obj->rna_id();
575 Sets or returns the ID for original RNA sequence of the seqDiff.
578 Returns : value of rna_id, a scalar
579 Args : newvalue (optional)
585 my ($self,$value) = @_;
586 if (defined $value) {
587 $self->{'rna_id'} = $value;
590 return $self->{'rna_id'};
599 Usage : $obj->add_Variant($variant)
602 Pushes one Bio::Variation::Variant into the list of variants.
603 At the same time, creates a link from the Variant to SeqDiff
604 using its SeqDiff method.
607 Returns : 1 when succeeds, 0 for failure.
608 Args : Variant object
613 my ($self,$value) = @_;
614 if (defined $value) {
615 if( ! $value->isa('Bio::Variation::VariantI') ) {
616 $self->throw("Is not a VariantI complying object but a [$self]");
620 push(@
{$self->{'variants'}},$value);
621 $value->SeqDiff($self);
634 Usage : $obj->each_Variant();
637 Returns a list of Variants.
640 Returns : list of Variants
646 my ($self,@args) = @_;
648 return @
{$self->{'variants'}};
656 Usage : $obj->add_Gene($gene)
659 Pushes one L<Bio::LiveSeq::Gene> into the list of genes.
662 Returns : 1 when succeeds, 0 for failure.
663 Args : Bio::LiveSeq::Gene object
665 See L<Bio::LiveSeq::Gene> for more information.
671 my ($self,$value) = @_;
672 if (defined $value) {
673 if( ! $value->isa('Bio::LiveSeq::Gene') ) {
674 $value->throw("Is not a Bio::LiveSeq::Gene object but a [$value]");
678 push(@
{$self->{'genes'}},$value);
691 Usage : $obj->each_Gene();
694 Returns a list of L<Bio::LiveSeq::Gene>s.
697 Returns : list of Genes
703 my ($self,@args) = @_;
705 return @
{$self->{'genes'}};
712 Usage : $obj->dna_ori('atgctgctgctgct'); $dna_ori = $obj->dna_ori();
715 Sets or returns the original DNA sequence string of the seqDiff.
718 Returns : value of dna_ori, a scalar
719 Args : newvalue (optional)
725 my ($self,$value) = @_;
726 if (defined $value) {
727 $self->{'dna_ori'} = $value;
730 return $self->{'dna_ori'};
738 Usage : $obj->dna_mut('atgctggtgctgct'); $dna_mut = $obj->dna_mut();
741 Sets or returns the mutated DNA sequence of the seqDiff.
742 If sequence has not been set generates it from the
743 original sequence and DNA mutations.
746 Returns : value of dna_mut, a scalar
747 Args : newvalue (optional)
753 my ($self,$value) = @_;
754 if (defined $value) {
755 $self->{'dna_mut'} = $value;
758 $self->_set_dnamut() unless $self->{'dna_mut'};
759 return $self->{'dna_mut'};
766 return unless $self->{'dna_ori'} && $self->each_Variant;
768 $self->{'dna_mut'} = $self->{'dna_ori'};
769 foreach ($self->each_Variant) {
770 next unless $_->isa('Bio::Variation::DNAMutation');
771 next unless $_->isMutation;
774 #lies the mutation less than 25 bases after the start of sequence?
775 if ($_->start < 25) {
776 $s = 0; $la = $_->start - 1;
778 $s = $_->start - 25; $la = 25;
781 #is the mutation an insertion?
782 $_->end($_->start) unless $_->allele_ori->seq;
784 #does the mutation end greater than 25 bases before the end of
786 if (($_->end + 25) > length($self->{'dna_mut'})) {
787 $le = length($self->{'dna_mut'}) - $_->end;
792 $_->dnStreamSeq(substr($self->{'dna_mut'}, $s, $la));
793 $_->upStreamSeq(substr($self->{'dna_mut'}, $_->end, $le));
795 my $s_ori = $_->dnStreamSeq . $_->allele_ori->seq . $_->upStreamSeq;
796 my $s_mut = $_->dnStreamSeq . $_->allele_mut->seq . $_->upStreamSeq;
798 (my $str = $self->{'dna_mut'}) =~ s/$s_ori/$s_mut/;
799 $self->{'dna_mut'} = $str;
807 Usage : $obj->rna_ori('atgctgctgctgct'); $rna_ori = $obj->rna_ori();
810 Sets or returns the original RNA sequence of the seqDiff.
813 Returns : value of rna_ori, a scalar
814 Args : newvalue (optional)
820 my ($self,$value) = @_;
821 if (defined $value) {
822 $self->{'rna_ori'} = $value;
825 return $self->{'rna_ori'};
833 Usage : $obj->rna_mut('atgctggtgctgct'); $rna_mut = $obj->rna_mut();
836 Sets or returns the mutated RNA sequence of the seqDiff.
839 Returns : value of rna_mut, a scalar
840 Args : newvalue (optional)
846 my ($self,$value) = @_;
847 if (defined $value) {
848 $self->{'rna_mut'} = $value;
851 return $self->{'rna_mut'};
859 Usage : $obj->aa_ori('MAGVLL*'); $aa_ori = $obj->aa_ori();
862 Sets or returns the original protein sequence of the seqDiff.
865 Returns : value of aa_ori, a scalar
866 Args : newvalue (optional)
872 my ($self,$value) = @_;
873 if (defined $value) {
874 $self->{'aa_ori'} = $value;
877 return $self->{'aa_ori'};
885 Usage : $obj->aa_mut('MA*'); $aa_mut = $obj->aa_mut();
888 Sets or returns the mutated protein sequence of the seqDiff.
891 Returns : value of aa_mut, a scalar
892 Args : newvalue (optional)
898 my ($self,$value) = @_;
899 if (defined $value) {
900 $self->{'aa_mut'} = $value;
903 return $self->{'aa_mut'};
911 Usage : $dnaobj = $obj->seqobj('dna_mut');
914 Returns the any original or mutated sequences as a
915 Bio::PrimarySeq object.
918 Returns : Bio::PrimarySeq object for the requested sequence
919 Args : string, method name for the sequence requested
921 See L<Bio::PrimarySeq> for more information.
926 my ($self,$value) = @_;
929 map {$_, 1} qw(dna_ori rna_ori aa_ori dna_mut rna_mut aa_mut);
930 $valid_obj{$value} ||
931 $self->throw("Sequence type '$value' is not a valid type (".
932 join(',', map "'$_'", sort keys %valid_obj) .") lowercase");
933 my ($alphabet) = $value =~ /([^_]+)/;
935 $id = $self->rna_id if $self->rna_id;
936 $alphabet = 'protein' if $alphabet eq 'aa';
937 $out = Bio
::PrimarySeq
->new
938 ( '-seq' => $self->{$value},
939 '-display_id' => $id,
940 '-accession_number' => $self->id,
941 '-alphabet' => $alphabet
942 ) if $self->{$value} ;
949 Usage : $obj->alignment
952 Returns a pretty RNA/AA sequence alignment from linked
953 objects. Under construction: Only simple coding region
954 point mutations work.
967 my $maxflanklen = 12;
969 foreach my $mut ($self->each_Variant) {
970 if( $mut->isa('Bio::Variation::RNAChange') ) {
972 my $upflank = $mut->upStreamSeq;
973 my $dnflank = $mut->dnStreamSeq;
974 my $cposd = $mut->codon_pos;
975 my $rori = $mut->allele_ori->seq;
976 my $rmut = $mut->allele_mut->seq;
982 my (@rseqori, @rseqmut );
985 if ($mut->DNAMutation->label =~ /point/) {
987 my $nt2d = substr($dnflank, 0, 2);
988 push @rseqori, $rori. $nt2d;
989 push @rseqmut, uc ($rmut). $nt2d;
990 $dnflank = substr($dnflank, 2);
992 elsif ($cposd == 2) {
993 my $ntu = chop $upflank;
994 my $ntd = substr($dnflank, 0, 1);
995 push @rseqori, $ntu. $rori. $ntd;
996 push @rseqmut, $ntu. uc ($rmut). $ntd;
997 $dnflank = substr($dnflank, 1);
999 elsif ($cposd == 3) {
1000 my $ntu1 = chop $upflank;
1001 my $ntu2 = chop $upflank;
1002 push (@rseqori, $ntu2. $ntu1. $rori);
1003 push (@rseqmut, $ntu2. $ntu1. uc $rmut);
1007 elsif ($mut->DNAMutation->label =~ /deletion/) {
1009 $rseqorid = chop $upflank;
1010 $rseqmutd = $rseqorid;
1012 for (my $i=1; $i<=$mut->length; $i++) {
1013 my $ntd .= substr($mut->allele_ori, $i-1, 1);
1015 if (length($rseqorid) == 3 ) {
1016 push (@rseqori, $rseqorid);
1017 push (@rseqmut, " ");
1023 $rseqorid .= substr($dnflank, 0, 3-$rseqorid);
1024 push (@rseqori, $rseqorid);
1025 push (@rseqmut, " ");
1026 $dnflank = substr($dnflank,3-$rseqorid);
1029 $upflank = reverse $upflank;
1030 # loop throught the flanks
1031 for (my $i=1; $i<=length($dnflank); $i++) {
1033 last if $i > $maxflanklen;
1035 my $ntd .= substr($dnflank, $i-1, 1);
1036 my $ntu .= substr($upflank, $i-1, 1);
1040 $rseqmutu = $ntu. $rseqmutu;
1041 $rseqoriu = $ntu. $rseqoriu;
1043 if (length($rseqorid) == 3 and length($rseqorid) == 3) {
1044 push (@rseqori, $rseqorid);
1045 push (@rseqmut, $rseqmutd);
1046 $rseqorid = $rseqmutd ='';
1048 if (length($rseqoriu) == 3 and length($rseqoriu) == 3) {
1049 unshift (@rseqori, $rseqoriu);
1050 unshift (@rseqmut, $rseqmutu);
1051 $rseqoriu = $rseqmutu ='';
1054 #print "|i=$i, $cposd, $rseqmutd, $rseqorid\n";
1055 #print "|i=$i, $cposu, $rseqmutu, $rseqoriu\n\n";
1059 push (@rseqori, $rseqorid);
1060 unshift (@rseqori, $rseqoriu);
1061 push (@rseqmut, $rseqmutd);
1062 unshift (@rseqmut, $rseqmutu);
1064 return unless $mut->AAChange;
1066 my $tr = Bio
::Tools
::CodonTable
->new('-id' => $mut->codon_table);
1067 my $apos = $mut->AAChange->start;
1068 my $aposmax = CORE
::length($self->aa_ori); #terminator codon no
1073 for (my $i = 0; $i <= $#rseqori; $i++) {
1076 $a = $tr->translate($rseqori[$i]) if length($rseqori[$i]) == 3;
1078 if (length($a) != 1 or
1079 $apos - ( $maxflanklen/2 -1) + $i < 1 or
1080 $apos - ( $maxflanklen/2 -1) + $i > $aposmax ) {
1083 $aaseqori .= " ". $a. " ";
1086 if (length($rseqmut[$i]) == 3) {
1087 if ($rseqmut[$i] eq ' ') {
1090 $b = $tr->translate($rseqmut[$i]);
1095 $apos - ( $maxflanklen/2 -1) + $i >= 1 ) or
1096 ( $apos - ( $maxflanklen/2 -1) + $i >= $aposmax and
1097 $mut->label =~ 'termination')
1099 $aaseqmut .= " ". $b. " ";
1104 if ($i == 0 and length($rseqori[$i]) != 3) {
1105 my $l = 3 - length($rseqori[$i]);
1106 $rseqori[$i] = (" " x
$l). $rseqori[$i];
1107 $rseqmut[$i] = (" " x
$l). $rseqmut[$i];
1109 $rseqori .= $rseqori[$i]. " " if $rseqori[$i] ne '';
1110 $rseqmut .= $rseqmut[$i]. " " if $rseqmut[$i] ne '';
1113 # collect the results
1117 $text = " ". $aaseqmut;
1121 $text = "Variant : ". $rseqmut;
1125 $text = "Reference: ". $rseqori;
1129 $text = " ". $aaseqori;
1141 foreach my $line (@entry) {