From 1f7b54a281a82e54767a878f7a46f3dd4c664447 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Carn=C3=AB=20Draug?= Date: Tue, 25 Sep 2018 16:08:46 +0100 Subject: [PATCH] Bio::Variation::* move namespace into its own distribution. Also move the program bin_flanks which although not actually using the Bio::Variation modules, it's to be used in the same context. --- Changes | 1 + bin/bp_flanks | 314 ---------- lib/Bio/Variation/AAChange.pm | 480 -------------- lib/Bio/Variation/AAReverseMutate.pm | 306 --------- lib/Bio/Variation/Allele.pm | 295 --------- lib/Bio/Variation/DNAMutation.pm | 397 ------------ lib/Bio/Variation/IO.pm | 366 ----------- lib/Bio/Variation/IO/flat.pm | 721 --------------------- lib/Bio/Variation/IO/xml.pm | 567 ----------------- lib/Bio/Variation/README | 30 - lib/Bio/Variation/RNAChange.pm | 622 ------------------ lib/Bio/Variation/SNP.pm | 231 ------- lib/Bio/Variation/SeqDiff.pm | 1147 ---------------------------------- lib/Bio/Variation/VariantI.pm | 1052 ------------------------------- t/Variation/AAChange.t | 96 --- t/Variation/AAReverseMutate.t | 53 -- t/Variation/Allele.t | 44 -- t/Variation/DNAMutation.t | 130 ---- t/Variation/RNAChange.t | 110 ---- t/Variation/SNP.t | 34 - t/Variation/SeqDiff.t | 99 --- t/Variation/Variation_IO.t | 104 --- t/data/mutations.dat | 350 ----------- t/data/mutations.old.dat | 350 ----------- t/data/mutations.old.xml | 402 ------------ t/data/mutations.xml | 388 ------------ t/data/polymorphism.dat | 74 --- t/data/polymorphism.old.xml | 86 --- t/data/polymorphism.xml | 85 --- 29 files changed, 1 insertion(+), 8933 deletions(-) delete mode 100644 bin/bp_flanks delete mode 100644 lib/Bio/Variation/AAChange.pm delete mode 100644 lib/Bio/Variation/AAReverseMutate.pm delete mode 100644 lib/Bio/Variation/Allele.pm delete mode 100644 lib/Bio/Variation/DNAMutation.pm delete mode 100644 lib/Bio/Variation/IO.pm delete mode 100644 lib/Bio/Variation/IO/flat.pm delete mode 100644 lib/Bio/Variation/IO/xml.pm delete mode 100644 lib/Bio/Variation/README delete mode 100644 lib/Bio/Variation/RNAChange.pm delete mode 100644 lib/Bio/Variation/SNP.pm delete mode 100644 lib/Bio/Variation/SeqDiff.pm delete mode 100644 lib/Bio/Variation/VariantI.pm delete mode 100644 t/Variation/AAChange.t delete mode 100644 t/Variation/AAReverseMutate.t delete mode 100644 t/Variation/Allele.t delete mode 100644 t/Variation/DNAMutation.t delete mode 100644 t/Variation/RNAChange.t delete mode 100644 t/Variation/SNP.t delete mode 100644 t/Variation/SeqDiff.t delete mode 100644 t/Variation/Variation_IO.t delete mode 100644 t/data/mutations.dat delete mode 100644 t/data/mutations.old.dat delete mode 100644 t/data/mutations.old.xml delete mode 100644 t/data/mutations.xml delete mode 100644 t/data/polymorphism.dat delete mode 100644 t/data/polymorphism.old.xml delete mode 100644 t/data/polymorphism.xml diff --git a/Changes b/Changes index 01826b027..27f287d59 100644 --- a/Changes +++ b/Changes @@ -64,6 +64,7 @@ be removed. Bio::SeqIO::exp Bio::SeqIO::pln Bio::SeqIO::ztr + Bio::Variation::* Bio::Tools::AlignFactory Bio::Tools::Phylo::Gumby Bio::Tools::dpAlign diff --git a/bin/bp_flanks b/bin/bp_flanks deleted file mode 100644 index 00f852c7e..000000000 --- a/bin/bp_flanks +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/perl -# -*-Perl-*- -# -# Heikki Lehvaslaiho -# Finding flanking sequences for a variant. -# -# -# v. 1 16 Mar 2001 -# v. 1.1 9 Aug 2001 interface change, more info in fasta header -# v. 2.0 23 Nov 2001 new code from the flanks CGI program -# support for EMBL-like positions -# v. 3.0 21 Feb 2003 new command line interface - - -use Bio::PrimarySeq; -use Bio::SeqIO; -use Bio::DB::EMBL; -use Bio::DB::GenBank; -use Getopt::Long; -use strict; -use warnings; - -use constant VERSION => '3.0'; - -my $help = ''; -my $flank = 100; # flank length on both sides of the region -my $in_format = 'EMBL'; # format of the file to read in -my @pos; # position(s) in the sequence - - -GetOptions ("help" => \$help, "flanklength=i" => \$flank, - "position=s" => \@pos ); - -@pos = split(/,/,join(',',@pos)); - -system("perldoc $0") if $help; -system("perldoc $0") unless @ARGV; -print STDERR "\nYou need to provide --position option\n" and system("perldoc $0") - unless @pos; - -my $file = shift; -$file || system("perldoc $0"); - -my $seq = get_seq($file); -exit 0 unless $seq; - -&extract($seq, \@pos, $flank); - -# -## end main -# - -sub get_seq { - my ($file) = @_; - my $IN_FORMAT = 'EMBL'; # format of the local file on disk - - if (-e $file ) { # local file - my $in = Bio::SeqIO->new('-file' => $file, - '-format' => $IN_FORMAT); - $seq = $in->next_seq(); - } - elsif ($file =~ /\./) { # sequence version from GenBank - eval { - my $gb = new Bio::DB::GenBank; - $seq = $gb->get_Seq_by_version($file); - }; - } else { # plain accession mumber from more reliable EMBL - eval { - my $gb = new Bio::DB::EMBL; - $seq = $gb->get_Seq_by_acc($file); - }; - - } - print STDERR "Could not find sequence [$file]" && return unless $seq; - return $seq; -} - -sub extract { - my ($seq, $pos, $flank) = @_; - my ($out_seq); - my $OUT_FORMAT = 'FASTA'; # output format, going into STDOUT - my $strand = 1; # default for the forward strand - - my $out = Bio::SeqIO->new('-format' => $OUT_FORMAT); - - my $count = 1; - foreach my $idpos (@$pos) { - - my ($id, $pos_range, $start, $end, $allele_len); - my $inbetween = 0; # handle 23^24 notation as well as plain integer (24) - # but set flag and make corrections when needed - - if ($idpos =~ /:/ ) { # id and position separator - ($id, $pos_range) = split /:/, $idpos; - } else { # no id - $id = $count; - $count++; - $pos_range = $idpos; - } - $strand = -1 if $pos_range =~ /-$/; # opposite strand - $pos_range = $1 if $pos_range =~ /(.+)-/; # remove trailing '-' - - if ($pos_range =~ /\^/) { # notation 23^24 used - ($start, $end) = split /\^/, $pos_range; - print STDERR $id, ": Give adjacent nucleotides flanking '^' character, not [", - $start, "] and [", $end, "]\n" and next - unless $end == $start + 1; - $end = $start; - $inbetween = 1; - } else { # notation 23..24 used - ($start, $end) = split /\.\./, $pos_range; - } - $end ||= $start; # notation 23 used - print STDERR $id, ": Start can not be larger than end. Not [", - $start, "] and [", $end, "]\n" and next - if $start > $end; - $allele_len = $end - $start; - - # sanity checks - next unless defined $start && $start =~ /\d+/ && $start != 0; - print STDERR "Position '$start' not in sequence '$file'\n", and next - if $start < 1 or $start > $seq->length; - print STDERR "End position '$end' not in sequence '$file'\n", and next - if $end < 1 or $end > $seq->length; - - # determine nucleotide positions - # left edge - my $five_start = $start - $flank; - $five_start = 1 if $five_start < 1; # not outside the sequence - # right edge - my $three_end = $start + $allele_len + $flank; - $three_end = $seq->length if $start + $allele_len + $flank > $seq->length; - $three_end-- if $inbetween; - - # extract the sequences - my $five_prime = lc $seq->subseq($five_start , $start - 1); # left flank - my $snp = uc $seq->subseq($start, $end); # allele (always > 0 length) - $snp = lc $snp if $inbetween; - - my $three_prime; # right flank - if ($end < $seq->length) { # make sure we are not beyond reference sequece - $three_prime = lc $seq->subseq($end + 1, $three_end); - } else { - $three_prime = ''; - } - - # allele positions in local, extracted coordinates - my $locpos = length($five_prime) + 1; - my $loc_end; - if ($allele_len) { - $loc_end = "..". ($locpos+$allele_len); - } else { - $loc_end = ''; - $loc_end = '^'. ($locpos+1) if $inbetween; - } - # build FASTA id and description line - my $fastaid = uc($id). "_". uc($file). - " oripos=$pos_range strand=$strand allelepos=$locpos$loc_end"; - - #build BioPerl sequence objects - if ($strand == -1) { - my $five_prime_seq = new Bio::PrimarySeq(-alphabet=>'dna',-seq=>$five_prime); - my $snp_seq = new Bio::PrimarySeq(-alphabet=>'dna',-seq=>$snp); - my $three_prime_seq = new Bio::PrimarySeq(-alphabet=>'dna',-seq=>$three_prime); - - my $str = $three_prime_seq->revcom->seq. " ". - $snp_seq->revcom->seq. " ". $five_prime_seq->revcom->seq; - $str =~ s/ //g; - $out_seq = new Bio::PrimarySeq (-id => $fastaid, - -alphabet=>'dna', - -seq => $str ); - } else { - my $str = $five_prime. " ". $snp. " ". $three_prime; - $str =~ s/ //g; - $out_seq = new Bio::PrimarySeq (-id => $fastaid, - -alphabet=>'dna', - -seq => $str ); - } - $out->write_seq($out_seq); # print sequence out - } -} - - - -=head1 NAME - -bp_flanks - finding flanking sequences for a variant in a sequence position - -=head1 SYNOPSIS - - bp_flanks --position POS [-p POS ...] [--flanklen INT] - accession | filename - -=head1 DESCRIPTION - -This script allows you to extract a subsequence around a region of -interest from an existing sequence. The output if fasta formatted -sequence entry where the header line contains additional information -about the location. - -=head1 OPTIONS - -The script takes one unnamed argument which be either a file name in -the local file system or a nucleotide sequence accession number. - - - -p Position uses simple nucleotide sequence feature table - --position notation to define the region of interest, typically a - SNP or microsatellite repeat around which the flanks are - defined. - - There can be more than one position option or you can - give a comma separated list to one position option. - - The format of a position is: - - [id:] int | range | in-between [-] - - The optional id is the name you want to call the new - sequence. If it not given in joins running number to the - entry name with an underscore. - - The position is either a point (e.g. 234), a range (e.g - 250..300) or insertion point between nucleotides - (e.g. 234^235) - - If the position is not completely within the source - sequence the output sequence will be truncated and it - will print a warning. - - The optional hyphen [-] at the end of the position - indicates that that you want the retrieved sequence to be - in the opposite strand. - - - -f Defaults to 100. This is the length of the nucleotides - --flanklen sequence retrieved on both sides of the given position. - - If the source file does not contain - -=head1 OUTPUT FORMAT - -The output is a fasta formatted entry where the description file -contains tag=value pairs for information about where in the original -sequence the subsequence was taken. - -The ID of the fasta entry is the name given at the command line joined -by hyphen to the filename or accesion of the source sequence. If no id -is given a series of consecutive integers is used. - -The tag=value pairs are: - -=over 3 - -=item oripos=int - -position in the source file - -=item strand=1|-1 - -strand of the sequence compared to the source sequence - -=item allelepos=int - -position of the region of interest in the current entry. -The tag is the same as used by dbSNP@NCBI - -=back - -The sequence highlights the allele variant position by showing it in -upper case and rest of the sequence in lower case characters. - -=head1 EXAMPLE - - % bp_flanks ~/seq/ar.embl - - >1_/HOME/HEIKKI/SEQ/AR.EMBL oripos=100 strand=1 allelepos=100 - taataactcagttcttatttgcacctacttcagtggacactgaatttggaaggtggagga - ttttgtttttttcttttaagatctgggcatcttttgaatCtacccttcaagtattaagag - acagactgtgagcctagcagggcagatcttgtccaccgtgtgtcttcttctgcacgagac - tttgaggctgtcagagcgct - - -=head1 TODO - -The input files are assumed to be in EMBL format and the sequences are -retrieved only from the EMB database. Make this more generic and use -the registry. - - -head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: Eheikki-at-bioperl-dot-orgE - -=cut diff --git a/lib/Bio/Variation/AAChange.pm b/lib/Bio/Variation/AAChange.pm deleted file mode 100644 index 259fc2193..000000000 --- a/lib/Bio/Variation/AAChange.pm +++ /dev/null @@ -1,480 +0,0 @@ -# -# BioPerl module for Bio::Variation::AAChange -# -# Please direct questions and support issues to -# -# Cared for by Heikki Lehvaslaiho -# -# Copyright Heikki Lehvaslaiho -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Variation::AAChange - Sequence change class for polypeptides - -=head1 SYNOPSIS - - $aamut = Bio::Variation::AAChange->new - ('-start' => $start, - '-end' => $end, - '-length' => $len, - '-proof' => $proof, - '-isMutation' => 1, - '-mut_number' => $mut_number - ); - - my $a1 = Bio::Variation::Allele->new; - $a1->seq($ori) if $ori; - $aamut->allele_ori($a1); - my $a2 = Bio::Variation::Allele->new; - $a2->seq($mut) if $mut; - $aachange->add_Allele($a2); - $aachange->allele_mut($a2); - - print "\n"; - - # add it to a SeqDiff container object - $seqdiff->add_Variant($rnachange); - - # and create links to and from RNA level variant objects - $aamut->RNAChange($rnachange); - $rnachange->AAChange($rnachange); - -=head1 DESCRIPTION - -The instantiable class Bio::Variation::RNAChange describes basic -sequence changes at polypeptide level. It uses methods defined in -superclass Bio::Variation::VariantI, see L -for details. - -If the variation described by a AAChange object has a known -Bio::Variation::RNAAChange object, create the link with method -AAChange(). See L for more information. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: heikki-at-bioperl-dot-org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::Variation::AAChange; - -use vars qw($MATRIX); -use strict; - -# Object preamble - inheritance - -use base qw(Bio::Variation::VariantI); - -BEGIN { - -my $matrix = << "__MATRIX__"; -# Matrix made by matblas from blosum62.iij -# * column uses minimum score -# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units -# Blocks Database = /data/blocks_5.0/blocks.dat -# Cluster Percentage: >= 62 -# Entropy = 0.6979, Expected = -0.5209 - A R N D C Q E G H I L K M F P S T W Y V B Z X * -A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 -R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 -N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 -D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 -C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 -Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 -E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 -H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 -I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 -L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 -K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 -M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 -F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 -P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 -S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 -T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 -W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 -Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 -V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 -B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 -Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 -X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 -* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 -__MATRIX__ - - my %blosum = (); - $matrix =~ /^ +(.+)$/m; - my @aas = split / +/, $1; - foreach my $aa (@aas) { - my $tmp = $aa; - $tmp = "\\$aa" if $aa eq '*'; - $matrix =~ /^($tmp) +([-+]?\d.*)$/m; - my @scores = split / +/, $2 if defined $2; - my $count = 0; - foreach my $ak (@aas) { - $blosum{$aa}->{$aas[$count]} = $scores[$count]; - $count++; - } - } - sub _matrix; - $MATRIX = \%blosum; -} - -sub new { - my($class,@args) = @_; - my $self = $class->SUPER::new(@args); - - my ($start, $end, $length, $strand, $primary, $source, - $frame, $score, $gff_string, - $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq, - $label, $status, $proof, $re_changes, $region, $region_value, - $region_dist, - $numbering, $mut_number, $ismutation) = - $self->_rearrange([qw(START - END - LENGTH - STRAND - PRIMARY - SOURCE - FRAME - SCORE - GFF_STRING - ALLELE_ORI - ALLELE_MUT - UPSTREAMSEQ - DNSTREAMSEQ - LABEL - STATUS - PROOF - RE_CHANGES - REGION - REGION_VALUE - REGION_DIST - NUMBERING - MUT_NUMBER - ISMUTATION - )],@args); - - $self->primary_tag("Variation"); - - $self->{ 'alleles' } = []; - - $start && $self->start($start); - $end && $self->end($end); - $length && $self->length($length); - $strand && $self->strand($strand); - $primary && $self->primary_tag($primary); - $source && $self->source_tag($source); - $frame && $self->frame($frame); - $score && $self->score($score); - $gff_string && $self->_from_gff_string($gff_string); - - $allele_ori && $self->allele_ori($allele_ori); - $allele_mut && $self->allele_mut($allele_mut); - $upstreamseq && $self->upstreamseq($upstreamseq); - $dnstreamseq && $self->dnstreamseq($dnstreamseq); - - $label && $self->label($label); - $status && $self->status($status); - $proof && $self->proof($proof); - $region && $self->region($region); - $region_value && $self->region_value($region_value); - $region_dist && $self->region_dist($region_dist); - $numbering && $self->numbering($numbering); - $mut_number && $self->mut_number($mut_number); - $ismutation && $self->isMutation($ismutation); - - return $self; # success - we hope! -} - -=head2 RNAChange - - Title : RNAChange - Usage : $mutobj = $self->RNAChange; - : $mutobj = $self->RNAChange($objref); - Function: Returns or sets the link-reference to a mutation/change object. - If there is no link, it will return undef - Returns : an obj_ref or undef - -=cut - -sub RNAChange { - my ($self,$value) = @_; - if (defined $value) { - if( ! $value->isa('Bio::Variation::RNAChange') ) { - $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]"); - return; - } - else { - $self->{'RNAChange'} = $value; - } - } - unless (exists $self->{'RNAChange'}) { - return; - } else { - return $self->{'RNAChange'}; - } -} - - - -=head2 label - - Title : label - Usage : $obj->label(); - Function: - - Sets and returns mutation event label(s). If value is not - set, or no argument is given returns false. Each - instantiable subclass of L needs - to implement this method. Valid values are listed in - 'Mutation event controlled vocabulary' in - http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. - - Example : - Returns : string - Args : string - -=cut - - -sub label { - my ($self) = @_; - my ($o, $m, $type); - $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; - $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; - - if ($self->start == 1 ) { - if ($o and substr($o, 0, 1) ne substr($m, 0, 1)) { - $type = 'no translation'; - } - elsif ($o and $m and $o eq $m ) { - $type = 'silent'; - } - # more ... - } - elsif ($o and substr($o, 0, 1) eq '*' ) { - if ($m and substr($o, 0, 1) ne substr($m, 0, 1)) { - $type = 'post-elongation'; - } - elsif ($m and $o eq $m ) { - $type = 'silent, conservative'; - } - } - elsif ($o and $m and $o eq $m) { - $type = 'silent, conservative'; - } - elsif ($m and $m eq '*') { - $type = 'truncation'; - } - elsif ($o and $m and $o eq $m) { - $type = 'silent, conservative'; - } - elsif (not $m or - ($o and $m and length($o) > length($m) and - substr($m, -1, 1) ne '*')) { - $type = 'deletion'; - if ($o and $m and $o !~ $m and $o !~ $m) { - $type .= ', complex'; - } - } - elsif (not $o or - ($o and $m and length($o) < length($m) and - substr($m, -1, 1) ne '*' ) ) { - $type = 'insertion'; - if ($o and $m and $o !~ $m and $o !~ $m) { - $type .= ', complex'; - } - } - elsif ($o and $m and $o ne $m and - length $o == 1 and length $m == 1 ) { - $type = 'substitution'; - my $value = $self->similarity_score; - if (defined $value) { - my $cons = ($value < 0) ? 'nonconservative' : 'conservative'; - $type .= ", ". $cons; - } - } else { - $type = 'out-of-frame translation, truncation'; - } - $self->{'label'} = $type; - return $self->{'label'}; -} - - -=head2 similarity_score - - Title : similarity_score - Usage : $self->similarity_score - Function: Measure for evolutionary conservativeness - of single amino substitutions. Uses BLOSUM62. - Negative numbers are noncoservative changes. - Returns : integer, undef if not single amino acid change - -=cut - -sub similarity_score { - my ($self) = @_; - my ($o, $m, $type); - $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; - $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; - return unless $o and $m and length $o == 1 and length $m == 1; - return unless $o =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i and - $m =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i; - return $MATRIX->{"\U$o"}->{"\U$m"}; -} - -=head2 trivname - - Title : trivname - Usage : $self->trivname - Function: - - Given a Bio::Variation::AAChange object with linked - Bio::Variation::RNAChange and Bio::Variation::DNAMutation - objects, this subroutine creates a string corresponding to - the 'trivial name' of the mutation. Trivial name is - specified in Antonorakis & MDI Nomenclature Working Group: - Human Mutation 11:1-3, 1998. - - Returns : string - -=cut - - -sub trivname { - my ($self,$value) = @_; - if( defined $value) { - $self->{'trivname'} = $value; - } else { - my ( $aaori, $aamut,$aamutsymbol, $aatermnumber, $aamutterm) = - ('', '', '', '', ''); - my $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; - #my $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; - - $aaori = substr ($o, 0, 1) if $o; - $aaori =~ tr/\*/X/; - - my $sep; - if ($self->isMutation) { - $sep = '>'; - } else { - $sep = '|'; - } - my $trivname = $aaori. $self->start; - $trivname .= $sep if $sep eq '|'; - - my @alleles = $self->each_Allele; - foreach my $allele (@alleles) { - my $m = $allele->seq if $allele->seq; - - $self->allele_mut($allele); - #$trivname .= $sep. uc $m if $m; - - $aamutterm = substr ($m, -1, 1) if $m; - if ($self->RNAChange->label =~ /initiation codon/ and - ( $o and $m and $o ne $m)) { - $aamut = 'X'; - } - elsif (CORE::length($o) == 1 and CORE::length($m) == 1 ) { - $aamutsymbol = ''; - $aamut = $aamutterm; - } - elsif ($self->RNAChange->label =~ /deletion/) { - $aamutsymbol = 'del'; - if ($aamutterm eq '*') { - $aatermnumber = $self->start + length($m) -1; - $aamut = 'X'. $aatermnumber; - } - if ($self->RNAChange && $self->RNAChange->label =~ /inframe/){ - $aamut = '-'. length($self->RNAChange->allele_ori->seq)/3 ; - } - } - elsif ($self->RNAChange->label =~ /insertion/) { - $aamutsymbol = 'ins'; - if (($aamutterm eq '*') && (length($m)-1 != 0)) { - $aatermnumber = $self->start + length($m)-1; - $aamut = $aatermnumber. 'X'; - } - if ($self->RNAChange->label =~ /inframe/){ - $aamut = '+'. int length($self->RNAChange->allele_mut->seq)/3 ; - } - } - elsif ($self->RNAChange->label =~ /complex/ ) { - my $diff = length($m) - length($o); - if ($diff >= 0 ) { - $aamutsymbol = 'ins'; - } else { - $aamutsymbol = 'del' ; - } - if (($aamutterm eq '*') && (length($m)-1 != 0)) { - $aatermnumber = $self->start + length($m)-1; - $aamut = $aatermnumber. 'X'; - } - if ($self->RNAChange->label =~ /inframe/){ - - if ($diff >= 0 ) { - $aamut = '+'. $diff ; - } else { - $aamut = $diff ; - } - } - } - elsif ($self->label =~ /truncation/) { - $aamut = $m; - } else { - $aamutsymbol = ''; - $aamut = $aamutterm; - } - $aamut =~ tr/\*/X/; - $trivname .= $aamutsymbol. $aamut. $sep; - } - chop $trivname; - $self->{'trivname'} = $trivname; - } - return $self->{'trivname'}; -} - -1; diff --git a/lib/Bio/Variation/AAReverseMutate.pm b/lib/Bio/Variation/AAReverseMutate.pm deleted file mode 100644 index 13cf1b76d..000000000 --- a/lib/Bio/Variation/AAReverseMutate.pm +++ /dev/null @@ -1,306 +0,0 @@ -# -# BioPerl module for Bio::Variation::AAReverseMutate -# -# Please direct questions and support issues to -# -# Cared for by Heikki Lehvaslaiho -# -# Copyright Heikki Lehvaslaiho -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Variation::AAReverseMutate - point mutation and codon - information from single amino acid changes - -=head1 SYNOPSIS - - $aamut = Bio::Variation::AAReverseMutate->new - (-aa_ori => 'F', - -aa_mut => 'S', - -codon_ori => 'ttc', # optional - -codon_table => '3' # defaults to 1 - ); - - @points = $aamut->each_Variant; - - if (scalar @points > 0 ) { - foreach $rnachange ( @points ) { - # $rnachange is a Bio::Variation::RNAChange object - print " ", $rnachange->allele_ori->seq, ">", - $rnachange->allele_mut->seq, " in ", - $rnachange->codon_ori, ">", $rnachange->codon_mut, - " at position ", $rnachange->codon_pos, "\n"; - } - } else { - print "No point mutations possible\n", - } - -=head1 DESCRIPTION - -Bio::Variation::AAReverseMutate objects take in reference and mutated -amino acid information and deduces potential point mutations at RNA -level leading to this change. The choice can be further limited by -letting the object know what is the the codon in the reference -sequence. The results are returned as L -objects. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: heikki-at-bioperl-dot-org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - -package Bio::Variation::AAReverseMutate; - -use strict; - -# Object preamble - inheritance -use Bio::Tools::CodonTable; -use Bio::Variation::RNAChange; -use Bio::Variation::Allele; - -use base qw(Bio::Root::Root); - -sub new { - my($class,@args) = @_; - my $self = $class->SUPER::new(@args); - - my ($aa_ori, $aa_mut, $codon_ori, $codon_table) = - $self->_rearrange([qw(AA_ORI - AA_MUT - CODON - CODON_TABLE - )],@args); - - $aa_ori && $self->aa_ori($aa_ori); - $aa_mut && $self->aa_mut($aa_mut); - $codon_ori && $self->codon_ori($codon_ori); - $codon_table && $self->codon_table($codon_table); - - return $self; # success - we hope! - -} - - -=head2 aa_ori - - Title : aa_ori - Usage : $obj->aa_ori(); - Function: - - Sets and returns original aa sequence. If value is not - set, returns false. - - Amino acid sequences are stored in upper case characters, - others in lower case. - - Example : - Returns : string - Args : single character amino acid code - -=cut - -sub aa_ori { - my ($self,$value) = @_; - if( defined $value) { - if ( uc($value) !~ /^[ARNDCQEGHILKMFPSTWYVBZX*]$/ ) { - $self->throw("'$value' is not a valid one letter amino acid symbol\n"); - } else { - $self->{'aa_ori'} = uc $value; - } - } - return $self->{'aa_ori'}; -} - - -=head2 aa_mut - - Title : aa_mut - Usage : $obj->aa_mut(); - Function: - - Sets and returns the mutated allele sequence. If value is not - set, returns false. - - Example : - Returns : string - Args : single character amino acid code - -=cut - - -sub aa_mut { - my ($self,$value) = @_; - if( defined $value) { - if ( uc($value) !~ /^[ARNDCQEGHILKMFPSTWYVBZX*]$/ ) { - $self->throw("'$value' is not a valid one letter amino acid symbol\n"); - } else { - $self->{'aa_mut'} = uc $value; - } - } - return $self->{'aa_mut'}; -} - - -=head2 codon_ori - - Title : codon_ori - Usage : $obj->codon_ori(); - Function: - - Sets and returns codon_ori triplet. If value is not set, - returns false. The string has to be three characters - long. The character content is not checked. - - Example : - Returns : string - Args : string - -=cut - -sub codon_ori { - my ($self,$value) = @_; - if( defined $value) { - if (length $value != 3 or lc $value =~ /[^atgc]/) { - $self->warn("Codon string \"$value\" is not valid unique codon"); - } - $self->{'codon_ori'} = lc $value; - } - return $self->{'codon_ori'}; -} - -=head2 codon_table - - Title : codon_table - Usage : $obj->codon_table(); - Function: - - Sets and returns the codon table id of the RNA - If value is not set, returns 1, 'universal' code, as the default. - - Example : - Returns : integer - Args : none if get, the new value if set - -=cut - - -sub codon_table { - my ($self,$value) = @_; - if( defined $value) { - if ( not $value =~ /^\d+$/ ) { - $self->throw("'$value' is not a valid codon table ID\n". - "Has to be a positive integer. Defaulting to 1\n"); - } else { - $self->{'codon_table'} = $value; - } - } - if( ! exists $self->{'codon_table'} ) { - return 1; - } else { - return $self->{'codon_table'}; - } -} - - -=head2 each_Variant - - Title : each_Variant - Usage : $obj->each_Variant(); - Function: - - Returns a list of Variants. - - Example : - Returns : list of Variants - Args : none - -=cut - -sub each_Variant{ - my ($self,@args) = @_; - - $self->throw("aa_ori is not defined\n") if not defined $self->aa_ori; - $self->throw("aa_mut is not defined\n") if not defined $self->aa_mut; - - my (@points, $codon_pos, $allele_ori, $allele_mut); - my $ct = Bio::Tools::CodonTable->new( '-id' => $self->codon_table ); - foreach my $codon_ori ($ct->revtranslate($self->aa_ori)) { - next if $self->codon_ori and $self->codon_ori ne $codon_ori; - foreach my $codon_mut ($ct->revtranslate($self->aa_mut)) { - my $k = 0; - my $length = 0; - $codon_pos = $allele_ori = $allele_mut = undef; - while ($k<3) { - my $nt_ori = substr ($codon_ori, $k, 1); - my $nt_mut = substr ($codon_mut, $k, 1); - if ($nt_ori ne $nt_mut) { - $length++; - $codon_pos = $k+1; - $allele_ori = $nt_ori; - $allele_mut = $nt_mut; - } - $k++; - } - if ($length == 1) { - my $rna = Bio::Variation::RNAChange->new - ('-length' => '1', - '-codon_ori' => $codon_ori, - '-codon_mut' => $codon_mut, - '-codon_pos' => $codon_pos, - '-isMutation' => 1 - ); - my $all_ori = Bio::Variation::Allele->new('-seq'=>$allele_ori); - $rna->allele_ori($all_ori); - my $all_mut = Bio::Variation::Allele->new('-seq'=>$allele_mut); - $rna->allele_mut($all_mut); - push @points, $rna; - } - } - } - return @points; -} - -1; diff --git a/lib/Bio/Variation/Allele.pm b/lib/Bio/Variation/Allele.pm deleted file mode 100644 index b220e3b1a..000000000 --- a/lib/Bio/Variation/Allele.pm +++ /dev/null @@ -1,295 +0,0 @@ -# -# BioPerl module for Bio::Variation::Allele -# -# Please direct questions and support issues to -# -# Cared for by Heikki Lehvaslaiho -# -# Copyright Heikki Lehvaslaiho -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Variation::Allele - Sequence object with allele-specific attributes - -=head1 SYNOPSIS - - $allele1 = Bio::Variation::Allele->new ( -seq => 'A', - -id => 'AC00001.1', - -alphabet => 'dna', - -is_reference => 1 - ); - -=head1 DESCRIPTION - -List of alleles describe known sequence alternatives in a variable region. -Alleles are contained in Bio::Variation::VariantI complying objects. -See L for details. - -Bio::Varation::Alleles are PrimarySeqI complying objects which can -contain database cross references as specified in -Bio::DBLinkContainerI interface, too. - -A lot of the complexity with dealing with Allele objects are caused by -null alleles; Allele objects that have zero length sequence string. - -In addition describing the allele by its sequence , it possible to -give describe repeat structure within the sequence. This done using -methods repeat_unit (e.g. 'ca') and repeat_count (e.g. 7). - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: heikki-at-bioperl-dot-org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - -package Bio::Variation::Allele; - -use strict; - -# Object preamble - inheritance - - -use base qw(Bio::PrimarySeq Bio::DBLinkContainerI); - -sub new { - my($class, @args) = @_; - my $self = $class->SUPER::new(@args); - - my($is_reference, $repeat_unit, $repeat_count) = - $self->_rearrange([qw(IS_REFERENCE - REPEAT_UNIT - REPEAT_COUNT - )], - @args); - - $is_reference && $self->is_reference($is_reference); - $repeat_unit && $self->repeat_unit($repeat_unit); - $repeat_count && $self->repeat_count($repeat_count); - - return $self; # success - we hope! -} - - -=head2 is_reference - - Title : is_reference - Usage : $obj->is_reference() - Function: sets and returns boolean values. - Unset values return false. - Example : $obj->is_reference() - Returns : boolean - Args : optional true of false value - - -=cut - - -sub is_reference { - my ($self,$value) = @_; - if( defined $value) { - $value ? ($value = 1) : ($value = 0); - $self->{'is_reference'} = $value; - } - if( ! exists $self->{'is_reference'} ) { - return 0; - } - else { - return $self->{'is_reference'}; - } -} - - -=head2 add_DBLink - - Title : add_DBLink - Usage : $self->add_DBLink($ref) - Function: adds a link object - Example : - Returns : - Args : - - -=cut - - -sub add_DBLink{ - my ($self,$com) = @_; - if( ! $com->isa('Bio::Annotation::DBLink') ) { - $self->throw("Is not a link object but a [$com]"); - } - push(@{$self->{'link'}},$com); -} - -=head2 each_DBLink - - Title : each_DBLink - Usage : foreach $ref ( $self->each_DBlink() ) - Function: gets an array of DBlink of objects - Example : - Returns : - Args : - - -=cut - -sub each_DBLink{ - my ($self) = @_; - return @{$self->{'link'}}; -} - -=head2 repeat_unit - - Title : repeat_unit - Usage : $obj->repeat_unit('ca'); - Function: - - Sets and returns the sequence of the repeat_unit the - allele is composed of. - - Example : - Returns : string - Args : string - -=cut - -sub repeat_unit { - my ($self,$value) = @_; - if( defined $value) { - $self->{'repeat_unit'} = $value; - } - if ($self->{'seq'} && $self->{'repeat_unit'} && $self->{'repeat_count'} ) { - $self->warn("Repeats do not add up!") - if ( $self->{'repeat_unit'} x $self->{'repeat_count'}) ne $self->{'seq'}; - } - return $self->{'repeat_unit'}; -} - -=head2 repeat_count - - Title : repeat_count - Usage : $obj->repeat_count(); - Function: - - Sets and returns the number of repeat units in the allele. - - Example : - Returns : string - Args : string - -=cut - - -sub repeat_count { - my ($self,$value) = @_; - if( defined $value) { - if ( not $value =~ /^\d+$/ ) { - $self->throw("[$value] for repeat_count has to be a positive integer\n"); - } else { - $self->{'repeat_count'} = $value; - } - } - if ($self->{'seq'} && $self->{'repeat_unit'} && $self->{'repeat_count'} ) { - $self->warn("Repeats do not add up!") - if ( $self->{'repeat_unit'} x $self->{'repeat_count'}) ne $self->{'seq'}; - } - return $self->{'repeat_count'}; -} - -=head2 count - - Title : count - Usage : $obj->count(); - Function: - - Sets and returns the number of times this allele was observed. - - Example : - Returns : string - Args : string - -=cut - -sub count { - my ($self,$value) = @_; - if( defined $value) { - if ( not $value =~ /^\d+$/ ) { - $self->throw("[$value] for count has to be a positive integer\n"); - } else { - $self->{'count'} = $value; - } - } - return $self->{'count'}; -} - - -=head2 frequency - - Title : frequency - Usage : $obj->frequency(); - Function: - - Sets and returns the frequency of the allele in the observed - population. - - Example : - Returns : string - Args : string - -=cut - -sub frequency { - my ($self,$value) = @_; - if( defined $value) { - if ( not $value =~ /^\d+$/ ) { - $self->throw("[$value] for frequency has to be a positive integer\n"); - } else { - $self->{'frequency'} = $value; - } - } - return $self->{'frequency'}; -} - - -1; diff --git a/lib/Bio/Variation/DNAMutation.pm b/lib/Bio/Variation/DNAMutation.pm deleted file mode 100644 index 9fbe59d11..000000000 --- a/lib/Bio/Variation/DNAMutation.pm +++ /dev/null @@ -1,397 +0,0 @@ -# -# BioPerl module for Bio::Variation::DNAMutation -# -# Please direct questions and support issues to -# -# Cared for by Heikki Lehvaslaiho -# -# Copyright Heikki Lehvaslaiho -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Variation::DNAMutation - DNA level mutation class - -=head1 SYNOPSIS - - $dnamut = Bio::Variation::DNAMutation->new - ('-start' => $start, - '-end' => $end, - '-length' => $len, - '-upStreamSeq' => $upflank, - '-dnStreamSeq' => $dnflank, - '-proof' => $proof, - '-isMutation' => 1, - '-mut_number' => $mut_number - ); - $a1 = Bio::Variation::Allele->new; - $a1->seq('a'); - $dnamut->allele_ori($a1); - my $a2 = Bio::Variation::Allele->new; - $a2->seq('t'); - $dnamut->add_Allele($a2); - - print "Restriction changes are ", $dnamut->restriction_changes, "\n"; - - # add it to a SeqDiff container object - $seqdiff->add_Variant($dnamut); - - -=head1 DESCRIPTION - -The instantiable class Bio::Variation::DNAMutation describes basic -sequence changes in genomic DNA level. It uses methods defined in -superclass Bio::Variation::VariantI. See L -for details. - -If the variation described by a DNAMutation object is transcibed, link -the corresponding Bio::Variation::RNAChange object to it using -method RNAChange(). See L for more information. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: heikki-at-bioperl-dot-org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::Variation::DNAMutation; -use strict; - -# Object preamble - inheritance - -use base qw(Bio::Variation::VariantI); - -sub new { - my($class,@args) = @_; - my $self = $class->SUPER::new(@args); - - my ($start, $end, $length, $strand, $primary, $source, - $frame, $score, $gff_string, - $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq, - $label, $status, $proof, $region, $region_value, $region_dist, $numbering, - $cpg, $mut_number, $ismutation) = - $self->_rearrange([qw(START - END - LENGTH - STRAND - PRIMARY - SOURCE - FRAME - SCORE - GFF_STRING - ALLELE_ORI - ALLELE_MUT - UPSTREAMSEQ - DNSTREAMSEQ - LABEL - STATUS - PROOF - REGION - REGION_VALUE - REGION_DIST - NUMBERING - CPG - MUT_NUMBER - ISMUTATION - )], - @args); - - $self->primary_tag("Variation"); - - $self->{ 'alleles' } = []; - - $start && $self->start($start); - $end && $self->end($end); - $length && $self->length($length); - $strand && $self->strand($strand); - $primary && $self->primary_tag($primary); - $source && $self->source_tag($source); - $frame && $self->frame($frame); - $score && $self->score($score); - $gff_string && $self->_from_gff_string($gff_string); - - $allele_ori && $self->allele_ori($allele_ori); - $allele_mut && $self->allele_mut($allele_mut); - $upstreamseq && $self->upStreamSeq($upstreamseq); - $dnstreamseq && $self->dnStreamSeq($dnstreamseq); - - $label && $self->label($label); - $status && $self->status($status); - $proof && $self->proof($proof); - $region && $self->region($region); - $region_value && $self->region_value($region_value); - $region_dist && $self->region_dist($region_dist); - $numbering && $self->numbering($numbering); - $mut_number && $self->mut_number($mut_number); - $ismutation && $self->isMutation($ismutation); - - $cpg && $self->CpG($cpg); - - return $self; # success - we hope! -} - - -=head2 CpG - - Title : CpG - Usage : $obj->CpG() - Function: sets and returns boolean values for variation - hitting a CpG site. Unset value return -1. - Example : $obj->CpG() - Returns : boolean - Args : optional true of false value - - -=cut - - -sub CpG { - my ($obj,$value) = @_; - if( defined $value) { - $value ? ($value = 1) : ($value = 0); - $obj->{'cpg'} = $value; - } - elsif (not defined $obj->{'label'}) { - $obj->{'cpg'} = $obj->_CpG_value; - } - else { - return $obj->{'cpg'}; - } -} - - - -sub _CpG_value { - my ($self) = @_; - if ($self->allele_ori eq $self->allele_mut and length ($self->allele_ori) == 1 ) { - - # valid only for point mutations - # CpG methylation-mediated deamination: - # CG -> TG | CG -> CA substitutions - # implementation here is less strict: if CpG dinucleotide was hit - - if ( ( ($self->allele_ori eq 'c') && (substr($self->upStreamSeq, 0, 1) eq 'g') ) || - ( ($self->allele_ori eq 'g') && (substr($self->dnStreamSeq, -1, 1) eq 'c') ) ) { - return 1; - } - else { - return 0; - } - } else { - $self->warn('CpG makes sense only in the context of point mutation'); - return; - } -} - - -=head2 RNAChange - - Title : RNAChange - Usage : $mutobj = $obj->RNAChange; - : $mutobj = $obj->RNAChange($objref); - Function: Returns or sets the link-reference to a mutation/change object. - If there is no link, it will return undef - Returns : an obj_ref or undef - -=cut - - -sub RNAChange { - my ($self,$value) = @_; - if (defined $value) { - if( ! $value->isa('Bio::Variation::RNAChange') ) { - $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]"); - return; - } - else { - $self->{'RNAChange'} = $value; - } - } - unless (exists $self->{'RNAChange'}) { - return; - } else { - return $self->{'RNAChange'}; - } -} - - -=head2 label - - Title : label - Usage : $obj->label(); - Function: - - Sets and returns mutation event label(s). If value is not - set, or no argument is given returns false. Each - instantiable subclass of L needs - to implement this method. Valid values are listed in - 'Mutation event controlled vocabulary' in - http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. - - Example : - Returns : string - Args : string - -=cut - - -sub label { - my ($self, $value) = @_; - my ($o, $m, $type); - $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; - $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; - - if (not $o and not $m ) { - $self->warn("[DNAMutation, label] Both alleles should not be empty!\n"); - $type = 'no change'; # is this enough? - } - elsif ($o && $m && length($o) == length($m) && length($o) == 1) { - $type = 'point'; - $type .= ", ". _point_type_label($o, $m); - } - elsif (not $o ) { - $type = 'insertion'; - } - elsif (not $m ) { - $type = 'deletion'; - } - else { - $type = 'complex'; - } - $self->{'label'} = $type; - return $self->{'label'}; -} - - -sub _point_type_label { - my ($o, $m) = @_; - my ($type); - my %transition = ('a' => 'g', - 'g' => 'a', - 'c' => 't', - 't' => 'c'); - $o = lc $o; - $m = lc $m; - if ($o eq $m) { - $type = 'no change'; - } - elsif ($transition{$o} eq $m ) { - $type = 'transition'; - } - else { - $type = 'transversion'; - } -} - - -=head2 sysname - - Title : sysname - Usage : $self->sysname - Function: - - This subroutine creates a string corresponding to the - 'systematic name' of the mutation. Systematic name is - specified in Antonorakis & MDI Nomenclature Working Group: - Human Mutation 11:1-3, 1998. - - Returns : string - -=cut - - -sub sysname { - my ($self,$value) = @_; - if( defined $value) { - $self->{'sysname'} = $value; - } else { - $self->warn('Mutation start position is not defined') - if not defined $self->start; - my $sysname = ''; - # show the alphabet only if $self->SeqDiff->alphabet is set; - my $mol = ''; - -if ($self->SeqDiff ) { - if ($self->SeqDiff && $self->SeqDiff->alphabet && $self->SeqDiff->alphabet eq 'dna') { - $mol = 'g.'; - } - elsif ($self->SeqDiff->alphabet && $self->SeqDiff->alphabet eq 'rna') { - $mol = 'c.'; - } - } - my $sep; - if ($self->isMutation) { - $sep = '>'; - } else { - $sep = '|'; - } - my $sign = '+'; - $sign = '' if $self->start < 1; - $sysname .= $mol ;#if $mol; - $sysname .= $sign. $self->start; - - my @alleles = $self->each_Allele; - $self->allele_mut($alleles[0]); - - $sysname .= 'del' if $self->label =~ /deletion/; - $sysname .= 'ins' if $self->label =~ /insertion/; - $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq; - - - - #push @alleles, $self->allele_mut if $self->allele_mut; - foreach my $allele (@alleles) { - $self->allele_mut($allele); - $sysname .= $sep if $self->label =~ /point/ or $self->label =~ /complex/; - $sysname .= uc $self->allele_mut->seq if $self->allele_mut->seq; - } - $self->{'sysname'} = $sysname; - #$self->{'sysname'} = $sign. $self->start. - # uc $self->allele_ori->seq. $sep. uc $self->allele_mut->seq; - } - return $self->{'sysname'}; -} - -1; diff --git a/lib/Bio/Variation/IO.pm b/lib/Bio/Variation/IO.pm deleted file mode 100644 index 4cbb606b4..000000000 --- a/lib/Bio/Variation/IO.pm +++ /dev/null @@ -1,366 +0,0 @@ -# -# BioPerl module for Bio::Variation::IO -# -# Please direct questions and support issues to -# -# Cared for by Heikki Lehvaslaiho -# -# Copyright Heikki Lehvaslaiho -# -# You may distribute this module under the same terms as perl itself -# -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Variation::IO - Handler for sequence variation IO Formats - -=head1 SYNOPSIS - - use Bio::Variation::IO; - - $in = Bio::Variation::IO->new(-file => "inputfilename" , - -format => 'flat'); - $out = Bio::Variation::IO->new(-file => ">outputfilename" , - -format => 'xml'); - - while ( my $seq = $in->next() ) { - $out->write($seq); - } - - # or - - use Bio::Variation::IO; - - #input file format can be read from the file extension (dat|xml) - $in = Bio::Variation::IO->newFh(-file => "inputfilename"); - $out = Bio::Variation::IO->newFh(-format => 'xml'); - - # World's shortest flat<->xml format converter: - print $out $_ while <$in>; - -=head1 DESCRIPTION - -Bio::Variation::IO is a handler module for the formats in the -Variation IO set (eg, Bio::Variation::IO::flat). It is the officially -sanctioned way of getting at the format objects, which most people -should use. - -The structure, conventions and most of the code is inherited from -L module. The main difference is that instead of using -methods next_seq and write_seq, you drop '_seq' from the method names. - -The idea is that you request a stream object for a particular format. -All the stream objects have a notion of an internal file that is read -from or written to. A particular SeqIO object instance is configured -for either input or output. A specific example of a stream object is -the Bio::Variation::IO::flat object. - -Each stream object has functions - - $stream->next(); - -and - - $stream->write($seqDiff); - -also - - $stream->type() # returns 'INPUT' or 'OUTPUT' - -As an added bonus, you can recover a filehandle that is tied to the -SeqIO object, allowing you to use the standard EE and print -operations to read and write sequence objects: - - use Bio::Variation::IO; - - $stream = Bio::Variation::IO->newFh(-format => 'flat'); - # read from standard input - - while ( $seq = <$stream> ) { - # do something with $seq - } - -and - - print $stream $seq; # when stream is in output mode - -This makes the simplest ever reformatter - - #!/usr/local/bin/perl - - $format1 = shift; - $format2 = shift; - - use Bio::Variation::IO; - - $in = Bio::Variation::IO->newFh(-format => $format1 ); - $out = Bio::Variation::IO->newFh(-format => $format2 ); - - print $out $_ while <$in>; - - -=head1 CONSTRUCTORS - -=head2 Bio::Variation::IO-Enew() - - $seqIO = Bio::Variation::IO->new(-file => 'filename', -format=>$format); - $seqIO = Bio::Variation::IO->new(-fh => \*FILEHANDLE, -format=>$format); - $seqIO = Bio::Variation::IO->new(-format => $format); - -The new() class method constructs a new Bio::Variation::IO object. The -returned object can be used to retrieve or print BioSeq objects. new() -accepts the following parameters: - -=over 4 - -=item -file - -A file path to be opened for reading or writing. The usual Perl -conventions apply: - - 'file' # open file for reading - '>file' # open file for writing - '>>file' # open file for appending - '+new(-fh => \*STDIN); - -Note that you must pass filehandles as references to globs. - -If neither a filehandle nor a filename is specified, then the module -will read from the @ARGV array or STDIN, using the familiar EE -semantics. - -=item -format - -Specify the format of the file. Supported formats include: - - flat pseudo EMBL format - xml seqvar xml format - -If no format is specified and a filename is given, then the module -will attempt to deduce it from the filename. If this is unsuccessful, -Fasta format is assumed. - -The format name is case insensitive. 'FLAT', 'Flat' and 'flat' are -all supported. - -=back - -=head2 Bio::Variation::IO-EnewFh() - - $fh = Bio::Variation::IO->newFh(-fh => \*FILEHANDLE, -format=>$format); - $fh = Bio::Variation::IO->newFh(-format => $format); - # etc. - - #e.g. - $out = Bio::Variation::IO->newFh( '-FORMAT' => 'flat'); - print $out $seqDiff; - -This constructor behaves like new(), but returns a tied filehandle -rather than a Bio::Variation::IO object. You can read sequences from this -object using the familiar EE operator, and write to it using print(). -The usual array and $_ semantics work. For example, you can read all -sequence objects into an array like this: - - @mutations = <$fh>; - -Other operations, such as read(), sysread(), write(), close(), and printf() -are not supported. - -=head1 OBJECT METHODS - -See below for more detailed summaries. The main methods are: - -=head2 $sequence = $seqIO-Enext() - -Fetch the next sequence from the stream. - -=head2 $seqIO-Ewrite($sequence [,$another_sequence,...]) - -Write the specified sequence(s) to the stream. - -=head2 TIEHANDLE(), READLINE(), PRINT() - -These provide the tie interface. See L for more details. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: heikki-at-bioperl-dot-org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Variation::IO; - -use strict; - - -use base qw(Bio::SeqIO Bio::Root::IO); - -=head2 new - - Title : new - Usage : $stream = Bio::Variation::IO->new(-file => $filename, -format => 'Format') - Function: Returns a new seqstream - Returns : A Bio::Variation::IO::Handler initialised with the appropriate format - Args : -file => $filename - -format => format - -fh => filehandle to attach to - -=cut - - -sub new { - my ($class, %param) = @_; - my ($format); - - @param{ map { lc $_ } keys %param } = values %param; # lowercase keys - $format = $param{'-format'} - || $class->_guess_format( $param{-file} || $ARGV[0] ) - || 'flat'; - $format = "\L$format"; # normalize capitalization to lower case - - return unless $class->_load_format_module($format); - return "Bio::Variation::IO::$format"->new(%param); -} - - -=head2 format - - Title : format - Usage : $format = $stream->format() - Function: Get the variation format - Returns : variation format - Args : none - -=cut - -# format() method inherited from Bio::Root::IO - - -sub _load_format_module { - my ($class, $format) = @_; - my $module = "Bio::Variation::IO::" . $format; - my $ok; - eval { - $ok = $class->_load_module($module); - }; - if ( $@ ) { - print STDERR <next - Function: reads the next $seqDiff object from the stream - Returns : a Bio::Variation::SeqDiff object - Args : - -=cut - -sub next { - my ($self, $seq) = @_; - $self->throw("Sorry, you cannot read from a generic Bio::Variation::IO object."); -} - -sub next_seq { - my ($self, $seq) = @_; - $self->throw("These are not sequence objects. Use method 'next' instead of 'next_seq'."); - $self->next($seq); -} - -=head2 write - - Title : write - Usage : $stream->write($seq) - Function: writes the $seq object into the stream - Returns : 1 for success and 0 for error - Args : Bio::Variation::SeqDiff object - -=cut - -sub write { - my ($self, $seq) = @_; - $self->throw("Sorry, you cannot write to a generic Bio::Variation::IO object."); -} - -sub write_seq { - my ($self, $seq) = @_; - $self->warn("These are not sequence objects. Use method 'write' instead of 'write_seq'."); - $self->write($seq); -} - -=head2 _guess_format - - Title : _guess_format - Usage : $obj->_guess_format($filename) - Function: - Example : - Returns : guessed format of filename (lower case) - Args : - -=cut - -sub _guess_format { - my $class = shift; - return unless $_ = shift; - return 'flat' if /\.dat$/i; - return 'xml' if /\.xml$/i; -} - - -1; diff --git a/lib/Bio/Variation/IO/flat.pm b/lib/Bio/Variation/IO/flat.pm deleted file mode 100644 index 54fe040fd..000000000 --- a/lib/Bio/Variation/IO/flat.pm +++ /dev/null @@ -1,721 +0,0 @@ -# BioPerl module for Bio::Variation::IO::flat -# -# Please direct questions and support issues to -# -# Cared for by Heikki Lehvaslaiho -# -# Copyright Heikki Lehvaslaiho -# -# You may distribute this module under the same terms as perl itself -# - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Variation::IO::flat - flat file sequence variation input/output stream - -=head1 SYNOPSIS - -Do not use this module directly. Use it via the Bio::Variation::IO class. - -=head1 DESCRIPTION - -This object can transform Bio::Variation::SeqDiff objects to and from -flat file databases. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: heikki-at-bioperl-dot-org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Variation::IO::flat; - -use strict; - -use Text::Wrap; -use Bio::Variation::SeqDiff; -use Bio::Variation::DNAMutation; -use Bio::Variation::RNAChange; -use Bio::Variation::AAChange; -use Bio::Variation::Allele; - - -use base qw(Bio::Variation::IO); - -sub new { - my($class, @args) = @_; - my $self = bless {}, $class; - $self->_initialize(@args); - return $self; -} - -sub _initialize { - my($self,@args) = @_; - return unless $self->SUPER::_initialize(@args); -} - -=head2 next - - - Title : next - Usage : $haplo = $stream->next() - Function: returns the next seqDiff in the stream - Returns : Bio::Variation::SeqDiff object - Args : NONE - -=cut - -sub next { - my( $self ) = @_; - local $/ = '//'; - return unless my $entry = $self->_readline; - - return if $entry =~ /^\s+$/; - - $entry =~ /\s*ID\s+\S+/ || $self->throw("We do need an ID!"); - - my ($id, $offset, $alphabet) = $entry =~ /\s*ID +([^:]+)..(\d+)[^\)]*.\[?([cg])?/ - or $self->throw("Can't parse ID line"); -# $self->throw("$1|$2|$3"); - my $h =Bio::Variation::SeqDiff->new(-id => $id, - -offset => $offset, - ); - if ($alphabet) { - if ($alphabet eq 'g') { - $alphabet = 'dna'; - } - elsif ($alphabet eq 'c') { - $alphabet = 'rna'; - } - $h->alphabet($alphabet); - } - # - # DNA - # - my @dna = split ( / DNA;/, $entry ); - shift @dna; - my $prevdnaobj; - foreach my $dna (@dna) { - $dna =~ s/Feature[ \t]+//g; - ($dna) = split "RNA; ", $dna; - #$self->warn("|$dna|") ; - #exit; - my ($mut_number, $proof, $location, $upflank, $change, $dnflank) = - $dna =~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+).+/upflank: ([ \n\w]+).+/change: ([^ /]+).+/dnflank: ([ \n\w]+)|s; - $change =~ s/[ \n]//g; - my ($ori, $mut) = split /[>\|]/, $change; - my ($variation_number, $change_number) = split /\./, $mut_number; - #$self->warn("|$mut_number|>|$variation_number|$change_number|"); - my $dnamut; - if ($change_number and $change_number > 1 ) { - my $a3 = Bio::Variation::Allele->new; - $a3->seq($mut) if $mut; - #$dnamut->add_Allele($a3); - $prevdnaobj->add_Allele($a3); - } else { - $upflank =~ s/[ \n]//g; - $dnflank =~ s/[ \n]//g; - my ($region, $junk, $region_value, $junk2, $region_dist) = - $dna =~ m|.+/region: ([\w\']+)(; )?(\w+)?( ?\(\+?)?(-?\d+)?|s; - #my $s = join ("|", $mut_number, $proof, $location, $upflank, - # $change, $dnflank, $region, $region_value, $region_dist, $1,$2,$3,$4,$5); - #$self->warn($s); - #exit; - my ($start, $sep, $end) = $location =~ /(-?\d+)(.)?\D?(-?\d+)?/; - $end = $start if not defined $end ; - my ($len) = $end - $start +1; - $len = 0, $start = $end if defined $sep and $sep eq '^'; - my $ismut = 0; - $ismut = 1 if $change =~ m/>/; - - $dnamut = Bio::Variation::DNAMutation->new - ('-start' => $start, - '-end' => $end, - '-length' => $len, - '-upStreamSeq' => $upflank, - '-dnStreamSeq' => $dnflank, - '-proof' => $proof, - '-mut_number' => $mut_number - ); - $prevdnaobj = $dnamut; - my $a1 = Bio::Variation::Allele->new; - $a1->seq($ori) if $ori; - $dnamut->allele_ori($a1); - my $a2 = Bio::Variation::Allele->new; - $a2->seq($mut) if $mut; - $dnamut->add_Allele($a2); - if ($ismut) { - $dnamut->isMutation(1); - $dnamut->allele_mut($a2); - } - $dnamut->region($region) if defined $region; - $dnamut->region_value($region_value) if defined $region_value; - $dnamut->region_dist($region_dist) if defined $region_dist; - - $h->add_Variant($dnamut); - $dnamut->SeqDiff($h); - } - } - - # - # RNA - # - my @rna = split ( / RNA;/, $entry ); - shift @rna; - my $prevrnaobj; - foreach my $rna (@rna) { - $rna = substr ($rna, 0, index($rna, 'Feature AA')); - $rna =~ s/Feature[ \t]+//g; - ($rna) = split "DNA; ", $rna; - #$self->warn("|$rna|") ; - my ($mut_number, $proof, $location, $upflank, $change, $dnflank) = - $rna =~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+).+/upflank: (\w+).+/change: ([^/]+).+/dnflank: (\w+)|s ;#' - my ($region, $junk, $region_value, $junk2, $region_dist) = - $rna =~ m|.+/region: ([\w\']+)(; )?(\w+)?( ?\(\+?)?(-?\d+)?|s; - #my $s = join ("|", $mut_number, $proof, $location, $upflank, - # $change, $dnflank, $region, $region_value, $region_dist, $1,$2,$3,$4,$5); - #$self->warn($s); - #exit; - $change =~ s/[ \n]//g; - my ($ori, $mut) = split /[>\|]/, $change; - my $rnamut; - my ($variation_number, $change_number) = split /\./, $mut_number; - if ($change_number and $change_number > 1 ) { - my $a3 = Bio::Variation::Allele->new; - $a3->seq($mut) if $mut; - #$rnamut->add_Allele($a3); - $prevrnaobj->add_Allele($a3); - } else { - my ($start, $sep, $end) = $location =~ /(-?\d+)(.)?\D?(-?\d+)?/; - $end = $start if not defined $end ; - my ($len) = $end - $start + 1; - $len = 0, $start = $end if defined $sep and $sep eq '^'; - my $ismut; - $ismut = 1 if $change =~ m/>/; - my ($codon_table) = $rna =~ m|.+/codon_table: (\d+)|s; - my ($codon_pos) = $rna =~ m|.+/codon:[^;]+; ([123])|s; - - $rnamut = Bio::Variation::RNAChange->new - ('-start' => $start, - '-end' => $end, - '-length' => $len, - '-upStreamSeq' => $upflank, - '-dnStreamSeq' => $dnflank, - '-proof' => $proof, - '-mut_number' => $mut_number - - ); - $prevrnaobj = $rnamut; - my $a1 = Bio::Variation::Allele->new; - $a1->seq($ori) if $ori; - $rnamut->allele_ori($a1); - my $a2 = Bio::Variation::Allele->new; - $a2->seq($mut) if $mut; - $rnamut->add_Allele($a2); - if ($ismut) { - $rnamut->isMutation(1); - $rnamut->allele_mut($a2); - } - $rnamut->region($region) if defined $region; - $rnamut->region_value($region_value) if defined $region_value; - $rnamut->region_dist($region_dist) if defined $region_dist; - - $rnamut->codon_table($codon_table) if $codon_table; - $rnamut->codon_pos($codon_pos) if $codon_pos; - $h->add_Variant($rnamut); - foreach my $mut ($h->each_Variant) { - if ($mut->isa('Bio::Variation::DNAMutation') ) { - if ($mut->mut_number == $rnamut->mut_number) { - $rnamut->DNAMutation($mut); - $mut->RNAChange($rnamut); - } - } - } - } - } - # - # AA - # - my @aa = split ( / AA;/, $entry ); - shift @aa; - my $prevaaobj; - foreach my $aa (@aa) { - $aa = substr ($aa, 0, index($aa, 'Feature AA')); - $aa =~ s/Feature[ \t]+//g; - ($aa) = split "DNA; ", $aa; - #$self->warn("|$aa|") ; - my ($mut_number, $proof, $location, $change) = - $aa =~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+)./change: ([^/;]+)|s; - $change =~ s/[ \n]//g; - #my $s = join ("|", $mut_number, $proof, $location, $change); - #$self->warn($s); - #exit; - $change =~ s/[ \n]//g; - $change =~ s/DNA$//; - my ($ori, $mut) = split /[>\|]/, $change; - #print "------$location----$ori-$mut-------------\n"; - my ($variation_number, $change_number) = split /\./, $mut_number; - my $aamut; - if ($change_number and $change_number > 1 ) { - my $a3 = Bio::Variation::Allele->new; - $a3->seq($mut) if $mut; - $prevaaobj->add_Allele($a3); - } else { - my ($start, $sep, $end) = $location =~ /(-?\d+)(.)?\D?(-?\d+)?/; - $end = $start if not defined $end ; - my ($len) = $end - $start + 1; - $len = 0, $start = $end if defined $sep and $sep eq '^'; - my $ismut; - $ismut = 1 if $change =~ m/>/; - my ($region) = $aa =~ m|.+/region: (\w+)|s ; - $aamut = Bio::Variation::AAChange->new - ('-start' => $start, - '-end' => $end, - '-length' => $len, - '-proof' => $proof, - '-mut_number' => $mut_number - ); - $prevaaobj = $aamut; - my $a1 = Bio::Variation::Allele->new; - $a1->seq($ori) if $ori; - $aamut->allele_ori($a1); - my $a2 = Bio::Variation::Allele->new; - $a2->seq($mut) if $mut; - $aamut->add_Allele($a2); - if ($ismut) { - $aamut->isMutation(1); - $aamut->allele_mut($a2); - } - $region && $aamut->region($region); - $h->add_Variant($aamut); - foreach my $mut ($h->each_Variant) { - if ($mut->isa('Bio::Variation::RNAChange') ) { - if ($mut->mut_number == $aamut->mut_number) { - $aamut->RNAChange($mut); - $mut->AAChange($aamut); - } - } - } - - } - } - return $h; -} - -=head2 write - - Title : write - Usage : $stream->write(@seqDiffs) - Function: writes the $seqDiff object into the stream - Returns : 1 for success and 0 for error - Args : Bio::Variation::SeqDiff object - - -=cut - -sub write { - my ($self,@h) = @_; - - #$columns = 75; #default for Text::Wrap - my %tag = - ( - 'ID' => 'ID ', - 'Description' => 'Description ', - 'FeatureKey' => 'Feature ', - 'FeatureQual' => "Feature ", - 'FeatureWrap' => "Feature ", - 'ErrorComment' => 'Comment ' - #'Comment' => 'Comment -!-', - #'CommentLine' => 'Comment ', - ); - - if( !defined $h[0] ) { - $self->throw("Attempting to write with no information!"); - } - - foreach my $h (@h) { - - my @entry =(); - - my ($text, $tmp, $tmp2, $sep); - my ($count) = 0; - - - $text = $tag{ID}; - - $text .= $h->id; - $text .= ":(". $h->offset; - $text .= "+1" if $h->sysname =~ /-/; - $text .= ")". $h->sysname; - $text .= "; ". $h->trivname if $h->trivname; - push (@entry, $text); - - #Variants need to be ordered accoding to mutation_number attribute - #put them into a hash of arrays holding the Variant objects - #This is necessary for cases like several distict mutations present - # in the same sequence. - my @allvariants = $h->each_Variant; - my %variants = (); - foreach my $mut ($h->each_Variant) { - push @{$variants{$mut->mut_number} }, $mut; - } - #my ($variation_number, $change_number) = split /\./, $mut_number; - foreach my $var (sort keys %variants) { - #print $var, ": ", join (" ", @{$variants{$var}}), "\n"; - - foreach my $mut (@{$variants{$var}}) { - # - # DNA - # - if ( $mut->isa('Bio::Variation::DNAMutation') ) { - #collect all non-reference alleles - $self->throw("allele_ori needs to be defined in [$mut]") - if not $mut->allele_ori; - if ($mut->isMutation) { - $sep = '>'; - } else { - $sep = '|'; - } - my @alleles = $mut->each_Allele; - #push @alleles, $mut->allele_mut if $mut->allele_mut; - my $count = 0; # two alleles - foreach my $allele (@alleles) { - $count++; - my ($variation_number, $change_number) = split /\./, $mut->mut_number; - if ($change_number and $change_number != $count){ - $mut->mut_number("$change_number.$count"); - } - $mut->allele_mut($allele); - push (@entry, - $tag{FeatureKey}. 'DNA'. "; ". $mut->mut_number - ); - #label - $text=$tag{FeatureQual}. '/label: '. $mut->label; - push (@entry, $text); - - #proof - if ($mut->proof) { - $text = $tag{FeatureQual}. '/proof: '. $mut->proof; - push (@entry, $text) ; - } - #location - $text = $tag{FeatureQual}. '/location: '; - #$mut->id. '; '. $mut->start; - if ($mut->length > 1 ) {# if ($mut->end - $mut->start ) { - my $l = $mut->start + $mut->length -1; - $text .= $mut->start. '..'. $l; - } - elsif ($mut->length == 0) { - my $tmp_start = $mut->start - 1; - $tmp_start-- if $tmp_start == 0; - $text .= $tmp_start. '^'. $mut->end; - } else { - $text .= $mut->start; - } - - if ($h->alphabet && $h->alphabet eq 'dna') { - $tmp = $mut->start + $h->offset; - $tmp-- if $tmp <= 0; - $mut->start < 1 && $tmp++; - #$text.= ' ('. $h->id. '::'. $tmp; - $tmp2 = $mut->end + $h->offset; - if ( $mut->length > 1 ) { - $mut->end < 1 && $tmp2++; - $text.= ' ('. $h->id. '::'. $tmp. "..". $tmp2; - } - elsif ($mut->length == 0) { - $tmp--; - $tmp-- if $tmp == 0; - $text .= ' ('. $h->id. '::'. $tmp. '^'. $tmp2; - } else { - $text.= ' ('. $h->id. '::'. $tmp; - } - $text .= ')'; - } - push (@entry, $text); - #sequence - push (@entry, - $tag{FeatureQual}. '/upflank: '. $mut->upStreamSeq - ); - $text = ''; - $text = $mut->allele_ori->seq if $mut->allele_ori->seq; - $text .= $sep; - $text .= $mut->allele_mut->seq if $mut->allele_mut->seq; - push (@entry, - wrap($tag{FeatureQual}. '/change: ', $tag{FeatureWrap}, - $text) - ); - - push (@entry, - $tag{FeatureQual}. '/dnflank: '. $mut->dnStreamSeq - ); - #restriction enzyme - if ($mut->restriction_changes ne '') { - $text = $mut->restriction_changes; - $text = wrap($tag{FeatureQual}. '/re_site: ', $tag{FeatureWrap}, $text); - push (@entry, - $text - ); - } - #region - if ($mut->region ) { - $text = $tag{FeatureQual}. '/region: '. $mut->region; - $text .= ';' if $mut->region_value or $mut->region_dist; - $text .= ' '. $mut->region_value if $mut->region_value; - if ($mut->region_dist ) { - $tmp = ''; - $tmp = '+' if $mut->region_dist > 1; - $text .= " (". $tmp. $mut->region_dist. ')'; - } - push (@entry, $text); - } - #CpG - if ($mut->CpG) { - push (@entry, - $tag{FeatureQual}. "/CpG" - ); - } - } - } - # - # RNA - # - elsif ($mut->isa('Bio::Variation::RNAChange') ) { - #collect all non-reference alleles - $self->throw("allele_ori needs to be defined in [$mut]") - if not $mut->allele_ori; - my @alleles = $mut->each_Allele; - #push @alleles, $mut->allele_mut if $mut->allele_mut; - if ($mut->isMutation) { - $sep = '>'; - } else { - $sep = '|'; - } - - my $count = 0; # two alleles - foreach my $allele (@alleles) { - $count++; - my ($variation_number, $change_number) = split /\./, $mut->mut_number; - if ($change_number and $change_number != $count){ - $mut->mut_number("$change_number.$count"); - } - $mut->allele_mut($allele); - push (@entry, - $tag{FeatureKey}. 'RNA'. "; ". $mut->mut_number - ); - #label - $text=$tag{FeatureQual}. '/label: '. $mut->label; - push (@entry, $text); - #proof - if ($mut->proof) { - $text = $tag{FeatureQual}. '/proof: '. $mut->proof; - push (@entry, $text) ; - } - #location - $text = $tag{FeatureQual}. '/location: ' ; - if ($mut->length > 1 ) { - $text .= $mut->start. '..'. $mut->end; - $tmp2 = $mut->end + $h->offset; - } - elsif ($mut->length == 0) { - my $tmp_start = $mut->start; - $tmp_start--; - $tmp_start-- if $tmp_start == 0; - $text .= $tmp_start. '^'. $mut->end; - } else { - $text .= $mut->start; - } - - if ($h->alphabet && $h->alphabet eq 'rna') { - $tmp = $mut->start + $h->offset; - $tmp-- if $tmp <= 0; - #$mut->start < 1 && $tmp++; - #$text.= ' ('. $h->id. '::'. $tmp; - $tmp2 = $mut->end + $h->offset; - #$mut->end < 1 && $tmp2++; - if ( $mut->length > 1 ) { - $text.= ' ('. $h->id. '::'. $tmp. "..". $tmp2; - } - elsif ($mut->length == 0) { - $tmp--; - $text .= ' ('. $h->id. '::'. $tmp. '^'. $tmp2; - } else { - $text.= ' ('. $h->id. '::'. $tmp; - } - - $text .= ')'; - } - push (@entry, $text); - - #sequence - push (@entry, - $tag{FeatureQual}. '/upflank: '. $mut->upStreamSeq - ); - $text = ''; - $text = $mut->allele_ori->seq if $mut->allele_ori->seq; - $text .= $sep; - $text .= $mut->allele_mut->seq if $mut->allele_mut->seq; - push (@entry, - wrap($tag{FeatureQual}. '/change: ', $tag{FeatureWrap}, - $text) - ); - push (@entry, - $tag{FeatureQual}. '/dnflank: '. $mut->dnStreamSeq - ); - #restriction - if ($mut->restriction_changes ne '') { - $text = $mut->restriction_changes; - $text = wrap($tag{FeatureQual}. '/re_site: ', $tag{FeatureWrap}, $text); - push (@entry, - $text - ); - } - #coding - if ($mut->region eq 'coding') { - #codon table - $text = $tag{FeatureQual}. '/codon_table: '; - $text .= $mut->codon_table; - push (@entry, $text); - #codon - - $text = $tag{FeatureQual}. '/codon: '. $mut->codon_ori. $sep; - if ($mut->DNAMutation->label =~ /.*point/) { - $text .= $mut->codon_mut; - } - else { - $text .= '-'; - } - $text .= "; ". $mut->codon_pos; - push (@entry, $text); - } - #region - if ($mut->region ) { - $text = $tag{FeatureQual}. '/region: '. $mut->region; - $text .= ';' if $mut->region_value or $mut->region_dist; - $text .= ' '. $mut->region_value if $mut->region_value; - if ($mut->region_dist ) { - $tmp = ''; - $tmp = '+' if $mut->region_dist > 1; - $text .= " (". $tmp. $mut->region_dist. ')'; - } - push (@entry, $text); - } - } - } - # - # AA - # - elsif ($mut->isa('Bio::Variation::AAChange')) { - #collect all non-reference alleles - $self->throw("allele_ori needs to be defined in [$mut]") - if not $mut->allele_ori; - if ($mut->isMutation) { - $sep = '>'; - } else { - $sep = '|'; - } - my @alleles = $mut->each_Allele; - #push @alleles, $mut->allele_mut if $mut->allele_mut; - my $count = 0; # two alleles - foreach my $allele (@alleles) { - $count++; - my ($variation_number, $change_number) = split /\./, $mut->mut_number; - if ($change_number and $change_number != $count){ - $mut->mut_number("$change_number.$count"); - } - $mut->allele_mut($allele); - push (@entry, - $tag{FeatureKey}. 'AA'. "; ". $mut->mut_number - ); - #label - $text=$tag{FeatureQual}. '/label: '. $mut->label; - push (@entry, $text) ; - #proof - if ($mut->proof) { - $text = $tag{FeatureQual}. '/proof: '. $mut->proof; - push (@entry, $text) ; - } - #location - $text = $tag{FeatureQual}. '/location: '. - #$mut->id. '; '. $mut->start; - $mut->start; - if ($mut->length > 1 ) { - $tmp = $mut->start + $mut->length -1; - $text .= '..'. $tmp; - } - push (@entry, $text); - #sequence - $text = ''; - $text = $mut->allele_ori->seq if $mut->allele_ori->seq; - $text .= $sep; - $text .= $mut->allele_mut->seq if $mut->allele_mut->seq; - push (@entry, - wrap($tag{FeatureQual}. '/change: ', $tag{FeatureWrap}, - $text) - ); - #region - if ($mut->region ) { - $text = $tag{FeatureQual}. '/region: '. $mut->region; - $text .= ';' if $mut->region_value or $mut->region_dist; - $text .= ' '. $mut->region_value if $mut->region_value; - if ($mut->region_dist ) { - $tmp = ''; - $tmp = '+' if $mut->region_dist > 1; - $text .= " (". $tmp. $mut->region_dist. ')'; - } - push (@entry, $text); - } - } - } - } - } - push (@entry, - "//" - ); - my $str = join ("\n", @entry). "\n"; - $str =~ s/\t/ /g; - $self->_print($str); - } - return 1; -} - -1; diff --git a/lib/Bio/Variation/IO/xml.pm b/lib/Bio/Variation/IO/xml.pm deleted file mode 100644 index c7c3e3b8a..000000000 --- a/lib/Bio/Variation/IO/xml.pm +++ /dev/null @@ -1,567 +0,0 @@ -# BioPerl module for Bio::Variation::IO::xml -# -# Please direct questions and support issues to -# -# Cared for by Heikki Lehvaslaiho -# -# Copyright Heikki Lehvaslaiho -# -# You may distribute this module under the same terms as perl itself -# - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Variation::IO::xml - XML sequence variation input/output stream - -=head1 SYNOPSIS - -Do not use this module directly. Use it via the Bio::Variation::IO class. - -=head1 DESCRIPTION - -This object can transform L objects to and from XML -file databases. - -The XML format, although consistent, is still evolving. The current -DTD for it is at L. - -=head1 REQUIREMENTS - -To use this code you need the module L which creates an -interface to L to read XML and modules L and -L to write XML out. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: heikki-at-bioperl-dot-org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Variation::IO::xml; - -use vars qw($seqdiff $var $prevdnaobj $prevrnaobj $prevaaobj); -use strict; - -use XML::Twig; -use XML::Writer 0.4; -use IO::String; -use Bio::Variation::SeqDiff; -use Bio::Variation::DNAMutation; -use Bio::Variation::RNAChange; -use Bio::Variation::AAChange; -use Bio::Variation::Allele; - -use base qw(Bio::Variation::IO); - -# _initialize is where the heavy stuff will happen when new is called - -sub new { - my ($class,@args) = @_; - my $self = bless {}, $class; - $self->_initialize(@args); - return $self; -} - -sub _initialize { - my($self,@args) = @_; - return unless $self->SUPER::_initialize(@args); -} - -=head2 next - - Title : next - Usage : $haplo = $stream->next() - Function: returns the next seqDiff in the stream - Returns : Bio::Variation::SeqDiff object - Args : NONE - -=cut - - -sub _seqDiff { - my ($t, $term)= @_; - $seqdiff->id( $term->att('id') ); - $seqdiff->alphabet( $term->att('moltype') ); - $seqdiff->offset( $term->att('offset') ); - - foreach my $child ($term->children) { - _variant($t, $child); - } -} - -sub _variant { - my ($t, $term)= @_; - my $var; - my $att = $term->atts(); - my ($variation_number, $change_number) = split /\./, $att->{number}; - - # if more than two alleles - if ($variation_number and $change_number and $change_number > 1 ) { - my $a3 = Bio::Variation::Allele->new; - $a3->seq( $term->first_child_text('allele_mut') ) - if $term->first_child_text('allele_mut'); - if ($term->gi eq 'DNA') { - $prevdnaobj->add_Allele($a3); - } - elsif ($term->gi eq 'RNA') { - $prevrnaobj->add_Allele($a3); - } else { # AA - $prevaaobj->add_Allele($a3); - } - } else { # create new variants - if ($term->gi eq 'DNA') { - $var = Bio::Variation::DNAMutation->new(); - } - elsif ($term->gi eq 'RNA') { - $var = Bio::Variation::RNAChange->new(); - } else { # AA - $var = Bio::Variation::AAChange->new(); - } - - # these are always present - $var->start( $att->{start} ); - $var->end( $att->{end}); - $var->length($att->{len}); - $var->mut_number( $att->{number}); - $var->upStreamSeq($term->first_child_text('upFlank')); - $var->dnStreamSeq($term->first_child_text('dnFlank')); - $var->proof($term->first_child_text('proof')); - - # region - my $region = $term->first_child('region'); - if ($region) { - $var->region($region->text); - my $region_atts = $region->atts; - $var->region_value( $region_atts->{value} ) - if $region_atts->{value}; - $var->region_dist( $region_atts->{dist} ) - if $region_atts->{dist}; - } - - # alleles - my $a1 = Bio::Variation::Allele->new; - $a1->seq($term->first_child_text('allele_ori') ) - if $term->first_child_text('allele_ori'); - $var->allele_ori($a1); - my $a2 = Bio::Variation::Allele->new; - $a2->seq($term->first_child_text('allele_mut') ) - if $term->first_child_text('allele_mut'); - $var->isMutation(1) if $term->att('isMutation'); - $var->allele_mut($a2); - $var->add_Allele($a2); - $var->length( $term->att('length') ); - $seqdiff->add_Variant($var); - - # variant specific code - if ($term->gi eq 'DNA') { - $prevdnaobj = $var; - } - elsif ($term->gi eq 'RNA') { - my $codon = $term->first_child('codon'); - if ($codon) { - my $codon_atts = $codon->atts; - $var->codon_table( $codon->att('codon_table') ) - if $codon_atts->{codon_table} and $codon_atts->{codon_table} != 1; - $var->codon_pos( $codon->att('codon_pos') ) - if $codon_atts->{codon_pos}; - } - $prevdnaobj->RNAChange($var); - $var->DNAMutation($prevdnaobj); - $prevrnaobj = $var; - } else { - $prevrnaobj->AAChange($var); - $var->RNAChange($prevrnaobj); - $prevaaobj = $var; - } - } -} - -sub next { - my( $self ) = @_; - - local $/ = "\n"; - return unless my $entry = $self->_readline; -# print STDERR "|$entry|"; - return unless $entry =~ /^\W*new; - - # create new parser object - my $twig_handlers = {'seqDiff' => \&_seqDiff }; - my $t = XML::Twig->new ( TwigHandlers => $twig_handlers, - KeepEncoding => 1 ); - $t->parse($entry); - - return $seqdiff; -} - -=head2 write - - Title : write - Usage : $stream->write(@haplos) - Function: writes the $seqDiff objects into the stream - Returns : 1 for success and 0 for error - Args : Bio::Variation::SeqDiff object - -=cut - -sub write { - my ($self,@h) = @_; - - if( !defined $h[0] ) { - $self->throw("Attempting to write with no information!"); - } - my $str; - my $output = IO::String->new($str); - my $w = XML::Writer->new(OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 4 ); - - foreach my $h (@h) { - # - # seqDiff - # - $h->alphabet || $self->throw("Moltype of the reference sequence is not set!"); - my $hasAA = 0; - foreach my $mut ($h->each_Variant) { - $hasAA = 1 if $mut->isa('Bio::Variation::AAChange'); - } - if ($hasAA) { - $w->startTag("seqDiff", - "id" => $h->id, - "moltype" => $h->alphabet, - "offset" => $h->offset, - "sysname" => $h->sysname, - "trivname" => $h->trivname - ); - } else { - $w->startTag("seqDiff", - "id" => $h->id, - "moltype" => $h->alphabet, - "offset" => $h->offset, - "sysname" => $h->sysname - ); - } - my @allvariants = $h->each_Variant; - #print "allvars:", scalar @allvariants, "\n"; - my %variants = (); - foreach my $mut ($h->each_Variant) { - #print STDERR $mut->mut_number, "\t", $mut, "\t", - #$mut->proof, "\t", scalar $mut->each_Allele, "\n"; - push @{$variants{$mut->mut_number} }, $mut; - } - foreach my $var (sort keys %variants) { - foreach my $mut (@{$variants{$var}}) { - # - # DNA - # - if( $mut->isa('Bio::Variation::DNAMutation') ) { - $mut->isMutation(0) if not $mut->isMutation; - my @alleles = $mut->each_Allele; - my $count = 0; - foreach my $allele (@alleles) { - $count++; - my ($variation_number, $change_number) = split /\./, $mut->mut_number; - if ($change_number and $change_number != $count){ - $mut->mut_number("$change_number.$count"); - } - $mut->allele_mut($allele); - $w->startTag("DNA", - "number" => $mut->mut_number, - "start" => $mut->start, - "end" => $mut->end, - "length" => $mut->length, - "isMutation" => $mut->isMutation - ); - if ($mut->label) { - foreach my $label (split ', ', $mut->label) { - $w->startTag("label"); - $w->characters($label); - $w->endTag; - } - } - if ($mut->proof) { - $w->startTag("proof"); - $w->characters($mut->proof ); - $w->endTag; - } - if ($mut->upStreamSeq) { - $w->startTag("upFlank"); - $w->characters($mut->upStreamSeq ); - $w->endTag; - } - #if ( $mut->isMutation) { - #if ($mut->allele_ori) { - $w->startTag("allele_ori"); - $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ; - $w->endTag; - #} - #if ($mut->allele_mut) { - $w->startTag("allele_mut"); - $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq; - $w->endTag; - #} - #} - if ($mut->dnStreamSeq) { - $w->startTag("dnFlank"); - $w->characters($mut->dnStreamSeq ); - $w->endTag; - } - if ($mut->restriction_changes) { - $w->startTag("restriction_changes"); - $w->characters($mut->restriction_changes); - $w->endTag; - } - if ($mut->region) { - if($mut->region_value and $mut->region_dist) { - $w->startTag("region", - "value" => $mut->region_value, - "dist" => $mut->region_dist - ); - } - elsif($mut->region_value) { - $w->startTag("region", - "value" => $mut->region_value - ); - } - elsif($mut->region_dist) { - $w->startTag("region", - "dist" => $mut->region_dist - ); - } else { - $w->startTag("region"); - } - $w->characters($mut->region ); - $w->endTag; - } - $w->endTag; #DNA - } - } - # - # RNA - # - elsif( $mut->isa('Bio::Variation::RNAChange') ) { - $mut->isMutation(0) if not $mut->isMutation; - my @alleles = $mut->each_Allele; - my $count = 0; - foreach my $allele (@alleles) { - $count++; - my ($variation_number, $change_number) = split /\./, $mut->mut_number; - if ($change_number and $change_number != $count){ - $mut->mut_number("$change_number.$count"); - } - $mut->allele_mut($allele); - $w->startTag("RNA", - "number" => $mut->mut_number, - "start" => $mut->start, - "end" => $mut->end, - "length" => $mut->length, - "isMutation" => $mut->isMutation - ); - - if ($mut->label) { - foreach my $label (split ', ', $mut->label) { - $w->startTag("label"); - $w->characters($label ); - $w->endTag; - } - } - if ($mut->proof) { - $w->startTag("proof"); - $w->characters($mut->proof ); - $w->endTag; - } - if ($mut->upStreamSeq) { - $w->startTag("upFlank"); - $w->characters($mut->upStreamSeq ); - $w->endTag; - } - #if ( $mut->isMutation) { - if ($mut->allele_ori) { - $w->startTag("allele_ori"); - $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ; - $w->endTag; - } - if ($mut->allele_mut) { - $w->startTag("allele_mut"); - $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq ; - $w->endTag; - } - #} - if ($mut->dnStreamSeq) { - $w->startTag("dnFlank"); - $w->characters($mut->dnStreamSeq ); - $w->endTag; - } - if ($mut->region eq 'coding') { - if (! $mut->codon_mut) { - $w->startTag("codon", - "codon_ori" => $mut->codon_ori, - "codon_pos" => $mut->codon_pos - ); - } else { - $w->startTag("codon", - "codon_ori" => $mut->codon_ori, - "codon_mut" => $mut->codon_mut, - "codon_pos" => $mut->codon_pos - ); - } - $w->endTag; - } - if ($mut->codon_table != 1) { - $w->startTag("codon_table"); - $w->characters($mut->codon_table); - $w->endTag; - } - - if ($mut->restriction_changes) { - $w->startTag("restriction_changes"); - $w->characters($mut->restriction_changes); - $w->endTag; - } - if ($mut->region) { - if($mut->region_value and $mut->region_dist) { - $w->startTag("region", - "value" => $mut->region_value, - "dist" => $mut->region_dist - ); - } - elsif($mut->region_value) { - $w->startTag("region", - "value" => $mut->region_value - ); - } - elsif($mut->region_dist) { - $w->startTag("region", - "dist" => $mut->region_dist - ); - } else { - $w->startTag("region"); - } - $w->characters($mut->region ); - $w->endTag; - } - $w->endTag; #RNA - } - } - # - # AA - # - elsif( $mut->isa('Bio::Variation::AAChange') ) { - $mut->isMutation(0) if not $mut->isMutation; - my @alleles = $mut->each_Allele; - my $count = 0; - foreach my $allele (@alleles) { - $count++; - my ($variation_number, $change_number) = split /\./, $mut->mut_number; - if ($change_number and $change_number != $count){ - $mut->mut_number("$change_number.$count"); - } - $mut->allele_mut($allele); - $w->startTag("AA", - "number" => $mut->mut_number, - "start" => $mut->start, - "end" => $mut->end, - "length" => $mut->length, - "isMutation" => $mut->isMutation - ); - - if ($mut->label) { - foreach my $label (split ', ', $mut->label) { - $w->startTag("label"); - $w->characters($label ); - $w->endTag; - } - } - if ($mut->proof) { - $w->startTag("proof"); - $w->characters($mut->proof ); - $w->endTag; - } - #if ( $mut->isMutation) { - if ($mut->allele_ori) { - $w->startTag("allele_ori"); - $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq; - $w->endTag; - } - if ($mut->allele_mut) { - $w->startTag("allele_mut"); - $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq; - $w->endTag; - } - #} - if ($mut->region) { - if($mut->region_value and $mut->region_dist) { - $w->startTag("region", - "value" => $mut->region_value, - "dist" => $mut->region_dist - ); - } - elsif($mut->region_value) { - $w->startTag("region", - "value" => $mut->region_value - ); - } - elsif($mut->region_dist) { - $w->startTag("region", - "dist" => $mut->region_dist - ); - } else { - $w->startTag("region"); - } - $w->characters($mut->region ); - $w->endTag; - } - $w->endTag; #AA - } - } - } - } - } - $w->endTag; - - - $w->end; - $self->_print($str); - $output = undef; - return 1; -} - -1; diff --git a/lib/Bio/Variation/README b/lib/Bio/Variation/README deleted file mode 100644 index 06dc21077..000000000 --- a/lib/Bio/Variation/README +++ /dev/null @@ -1,30 +0,0 @@ - - README for Bio::Variation classes - - -These classes are part of "Computational Mutation Expression Toolkit" -project at European Bioinformatics Institute -, but they are written to be -as general as possinble. - -Bio::Variation name space contains modules to store sequence variation -information as differences between the reference sequence and changes -sequences. Also included are classes to write out and recrete objects -from EMBL-like flat files and XML. Lastly, there are simple classes to -calculate values for sequence change objects. - -See "Computational Mutation Expression Toolkit" web pages for more -information: - - http://www.ebi.ac.uk/mutations/toolkit/ - - -Send bug reports using the bioperl bug-tracking system at - https://github.com/bioperl/bioperl-live/issues. - -Send general comments, questions, and feature requests to the bioperl -mailing list: - - bioperl-l@bioperl.org - -Heikki Lehväslaiho diff --git a/lib/Bio/Variation/RNAChange.pm b/lib/Bio/Variation/RNAChange.pm deleted file mode 100644 index 846a6978a..000000000 --- a/lib/Bio/Variation/RNAChange.pm +++ /dev/null @@ -1,622 +0,0 @@ -# -# BioPerl module for Bio::Variation::RNAChange -# -# Please direct questions and support issues to -# -# Cared for by Heikki Lehvaslaiho -# -# Copyright Heikki Lehvaslaiho -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Variation::RNAChange - Sequence change class for RNA level - -=head1 SYNOPSIS - - $rnachange = Bio::Variation::RNAChange->new - ('-start' => $start, - '-end' => $end, - '-length' => $len, - '-codon_pos' => $cp, - '-upStreamSeq' => $upflank, - '-dnStreamSeq' => $dnflank, - '-proof' => $proof, - '-isMutation' => 1, - '-mut_number' => $mut_number - ); - $a1 = Bio::Variation::Allele->new; - $a1->seq('a'); - $rnachange->allele_ori($a1); - my $a2 = Bio::Variation::Allele->new; - $a2->seq('t'); - $rnachange->add_Allele($a2); - $rnachange->allele_mut($a2); - - print "The codon change is ", $rnachange->codon_ori, - ">", $rnachange->codon_mut, "\n"; - - # add it to a SeqDiff container object - $seqdiff->add_Variant($rnachange); - - # and create links to and from DNA level mutation objects - $rnachange->DNAMutation($dnamut); - $dnamut->RNAChange($rnachange); - -=head1 DESCRIPTION - -The instantiable class Bio::Variation::DNAMutation describes basic -sequence changes at RNA molecule level. It uses methods defined in -superclass Bio::Variation::VariantI. See L -for details. - -You are normally expected to create a corresponding -Bio::Variation::DNAMutation object even if mutation is defined at -RNA level. The numbering follows then cDNA numbering. Link the -DNAMutation object to the RNAChange object using the method -DNAMutation(). If the variation described by a RNAChange object is -translated, link the corresponding Bio::Variation::AAChange object -to it using method AAChange(). See L and -L for more information. - - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: heikki-at-bioperl-dot-org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::Variation::RNAChange; -use strict; - -# Object preamble - inheritance - -use Bio::Tools::CodonTable; - -use base qw(Bio::Variation::VariantI); - -sub new { - my($class,@args) = @_; - my $self = $class->SUPER::new(@args); - - my ($start, $end, $length, $strand, $primary, $source, - $frame, $score, $gff_string, - $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq, - $label, $status, $proof, $region, $region_value, $region_dist, $numbering, - $mut_number, $isMutation, - $codon_ori, $codon_mut, $codon_pos, $codon_table, $cds_end) = - $self->_rearrange([qw(START - END - LENGTH - STRAND - PRIMARY - SOURCE - FRAME - SCORE - GFF_STRING - ALLELE_ORI - ALLELE_MUT - UPSTREAMSEQ - DNSTREAMSEQ - LABEL - STATUS - PROOF - REGION - REGION_VALUE - REGION_DIST - NUMBERING - MUT_NUMBER - ISMUTATION - CODON_ORI - CODON_MUT - CODON_POS - TRANSLATION_TABLE - CDS_END - )],@args); - - $self->primary_tag("Variation"); - - $self->{ 'alleles' } = []; - - $start && $self->start($start); - $end && $self->end($end); - $length && $self->length($length); - $strand && $self->strand($strand); - $primary && $self->primary_tag($primary); - $source && $self->source_tag($source); - $frame && $self->frame($frame); - $score && $self->score($score); - $gff_string && $self->_from_gff_string($gff_string); - - $allele_ori && $self->allele_ori($allele_ori); - $allele_mut && $self->allele_mut($allele_mut); - $upstreamseq && $self->upStreamSeq($upstreamseq); - $dnstreamseq && $self->dnStreamSeq($dnstreamseq); - - $label && $self->label($label); - $status && $self->status($status); - $proof && $self->proof($proof); - $region && $self->region($region); - $region_value && $self->region_value($region_value); - $region_dist && $self->region_dist($region_dist); - $numbering && $self->numbering($numbering); - $mut_number && $self->mut_number($mut_number); - $isMutation && $self->isMutation($isMutation); - - $codon_ori && $self->codon_ori($codon_ori); - $codon_mut && $self->codon_mut($codon_mut); - $codon_pos && $self->codon_pos($codon_pos); - $codon_table && $self->codon_table($codon_table); - $cds_end && $self->cds_end($cds_end); - return $self; # success - we hope! -} - - -=head2 codon_ori - - Title : codon_ori - Usage : $obj->codon_ori(); - Function: - - Sets and returns codon_ori triplet. If value is not set, - creates the codon triplet from the codon position and - flanking sequences. The string has to be three characters - long. The character content is not checked. - - Example : - Returns : string - Args : string - -=cut - -sub codon_ori { - my ($self,$value) = @_; - if (defined $value) { - if (length $value != 3) { - $self->warn("Codon string \"$value\" is not three characters long"); - } - $self->{'codon_ori'} = $value; - } - elsif (! $self->{'codon_ori'}) { - my $codon_ori = ''; - - if ($self->region eq 'coding' && $self->start && $self->start >= 1) { - - $self->warn('Codon position is not defined') - if not defined $self->codon_pos; - $self->warn('Upstream flanking sequence is not defined') - if not defined $self->upStreamSeq; - $self->warn('Downstream flanking sequence is not defined') - if not defined $self->dnStreamSeq; - - my $cpos = $self->codon_pos; - $codon_ori = substr($self->upStreamSeq, -$cpos +1 , $cpos-1); - $codon_ori .= substr($self->allele_ori->seq, 0, 4-$cpos) - if $self->allele_ori and $self->allele_ori->seq; - $codon_ori .= substr($self->dnStreamSeq, 0, 3-length($codon_ori)); - } - $self->{'codon_ori'} = lc $codon_ori; - } - return $self->{'codon_ori'}; -} - - -=head2 codon_mut - - Title : codon_mut - Usage : $obj->codon_mut(); - Function: - - Sets and returns codon_mut triplet. If value is not - set, creates the codon triplet from the codon position and - flanking sequences. Return undef for other than point mutations. - - Example : - Returns : string - Args : string - -=cut - - -sub codon_mut { - my ($self,$value) = @_; - if (defined $value) { - if (length $value != 3 ) { - $self->warn("Codon string \"$value\" is not three characters long"); - } - $self->{'codon_mut'} = $value; - } - else { - my $codon_mut = ''; - if ($self->allele_ori->seq and $self->allele_mut->seq and - CORE::length($self->allele_ori->seq) == 1 and - CORE::length($self->allele_mut->seq) == 1 and - $self->region eq 'coding' and $self->start >= 1) { - - $self->warn('Codon position is not defined') - if not defined $self->codon_pos; - $self->warn('Upstream flanking sequnce is not defined') - if not defined $self->upStreamSeq; - $self->warn('Downstream flanking sequnce is not defined') - if not defined $self->dnStreamSeq; - $self->throw('Mutated allele is not defined') - if not defined $self->allele_mut; - - my $cpos = $self->codon_pos; - $codon_mut = substr($self->upStreamSeq, -$cpos +1 , $cpos-1); - $codon_mut .= substr($self->allele_mut->seq, 0, 4-$cpos) - if $self->allele_mut and $self->allele_mut->seq; - $codon_mut .= substr($self->dnStreamSeq, 0, 3-length($codon_mut)); - - $self->{'codon_mut'} = lc $codon_mut; - } - } - return $self->{'codon_mut'}; -} - - -=head2 codon_pos - - Title : codon_pos - Usage : $obj->codon_pos(); - Function: - - Sets and returns the position of the mutation start in the - codon. If value is not set, returns false. - - Example : - Returns : 1,2,3 - Args : none if get, the new value if set - -=cut - - -sub codon_pos { - my ($self,$value) = @_; - if( defined $value) { - if ( $value !~ /[123]/ ) { - $self->throw("'$value' is not a valid codon position"); - } - $self->{'codon_pos'} = $value; - } - return $self->{'codon_pos'}; -} - - -=head2 codon_table - - Title : codon_table - Usage : $obj->codon_table(); - Function: - - Sets and returns the codon table id of the RNA - If value is not set, returns 1, 'universal' code, as the default. - - Example : - Returns : integer - Args : none if get, the new value if set - -=cut - - -sub codon_table { - my ($self,$value) = @_; - if( defined $value) { - if ( not $value =~ /^\d$/ ) { - $self->throw("'$value' is not a valid codon table ID\n". - "Has to be a positive integer. Defaulting to 1\n"); - } else { - $self->{'codon_table'} = $value; - } - } - if( ! exists $self->{'codon_table'} ) { - return 1; - } else { - return $self->{'codon_table'}; - } -} - - -=head2 DNAMutation - - Title : DNAMutation - Usage : $mutobj = $obj->DNAMutation; - : $mutobj = $obj->DNAMutation($objref); - Function: Returns or sets the link-reference to a mutation/change object. - If there is no link, it will return undef - Returns : an obj_ref or undef - -=cut - - -sub DNAMutation { - my ($self,$value) = @_; - if (defined $value) { - if( ! $value->isa('Bio::Variation::DNAMutation') ) { - $self->throw("Is not a Bio::Variation::DNAMutation object but a [$self]"); - return; - } - else { - $self->{'DNAMutation'} = $value; - } - } - unless (exists $self->{'DNAMutation'}) { - return; - } else { - return $self->{'DNAMutation'}; - } -} - - -=head2 AAChange - - Title : AAChange - Usage : $mutobj = $obj->AAChange; - : $mutobj = $obj->AAChange($objref); - Function: Returns or sets the link-reference to a mutation/change object. - If there is no link, it will return undef - Returns : an obj_ref or undef - -=cut - -sub AAChange { - my ($self,$value) = @_; - if (defined $value) { - if( ! $value->isa('Bio::Variation::AAChange') ) { - $self->throw("Is not a Bio::Variation::AAChange object but a [$self]"); - return; - } - else { - $self->{'AAChange'} = $value; - } - } - unless (exists $self->{'AAChange'}) { - return; - } else { - return $self->{'AAChange'}; - } -} - - -=head2 exons_modified - - Title : exons_modified - Usage : $modified = $obj->exons_modified; - : $modified = $obj->exons_modified(1); - Function: Returns or sets information (example: a simple boolean flag) about - the modification of exons as a result of a mutation. - -=cut - -sub exons_modified { - my ($self,$value)=@_; - if (defined($value)) { - $self->{'exons_modified'}=$value; - } - return ($self->{'exons_modified'}); -} - -=head2 region - - Title : region - Usage : $obj->region(); - Function: - - Sets and returns the name of the sequence region type or - protein domain at this location. If value is not set, - returns false. - - Example : - Returns : string - Args : string - -=cut - - - -sub region { - my ($self,$value) = @_; - if( defined $value) { - $self->{'region'} = $value; - } - elsif (not defined $self->{'region'}) { - - $self->warn('Mutation start position is not defined') - if not defined $self->start and $self->verbose; - $self->warn('Mutation end position is not defined') - if not defined $self->end and $self->verbose; - $self->warn('Length of the CDS is not defined, the mutation can be beyond coding region!') - if not defined $self->cds_end and $self->verbose; - - $self->region('coding'); - if ($self->end && $self->end < 0 ){ - $self->region('5\'UTR'); - } - elsif ($self->start && $self->cds_end && $self->start > $self->cds_end ) { - $self->region('3\'UTR'); - } - } - return $self->{'region'}; -} - -=head2 cds_end - - Title : cds_end - Usage : $cds_end = $obj->get_cds_end(); - Function: - - Sets or returns the cds_end from the beginning of the DNA sequence - to the coordinate start used to describe variants. - Should be the location of the last nucleotide of the - terminator codon of the gene. - - Example : - Returns : value of cds_end, a scalar - Args : - -=cut - - - -sub cds_end { - my ($self, $value) = @_; - if (defined $value) { - $self->warn("[$value] is not a good value for sequence position") - if not $value =~ /^\d+$/ ; - $self->{'cds_end'} = $value; - } else { - $self->{'cds_end'} = $self->SeqDiff->cds_end if $self->SeqDiff; - } - return $self->{'cds_end'}; -} - - -=head2 label - - Title : label - Usage : $obj->label(); - Function: - - Sets and returns mutation event label(s). If value is not - set, or no argument is given returns false. Each - instantiable subclass of L needs - to implement this method. Valid values are listed in - 'Mutation event controlled vocabulary' in - http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. - - Example : - Returns : string - Args : string - -=cut - -sub label { - my ($self) = @_; - my ($o, $m, $type); - $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; - $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; - - my $ct = Bio::Tools::CodonTable -> new ( -id => $self->codon_table ); - if ($o and $m and CORE::length($o) == 1 and CORE::length($m) == 1) { - if (defined $self->AAChange) { - if ($self->start > 0 and $self->start < 4 ) { - $type = 'initiation codon'; - } - elsif ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) { - #AAChange->allele_ori and $self->AAChange->allele_ori->seq eq '*' ) { - $type = 'termination codon'; - } - elsif ($self->codon_mut && $ct->is_ter_codon($self->codon_mut) ) { - #elsif ($self->AAChange->allele_mut and $self->AAChange->allele_mut->seq eq "*") { - $type = 'nonsense'; - } - elsif ($o and $m and ($o eq $m or - $self->AAChange->allele_ori->seq eq - $self->AAChange->allele_mut->seq)) { - $type = 'silent'; - } else { - $type = 'missense'; - } - } else { - $type = 'unknown'; - } - } else { - my $len = 0; - $len = CORE::length($o) if $o; - $len -= CORE::length($m) if $m; - if ($len%3 == 0 ) { - $type = 'inframe'; - } else { - $type = 'frameshift'; - } - if (not $m ) { - $type .= ', '. 'deletion'; - } - elsif (not $o ) { - $type .= ', '. 'insertion'; - } - else { - $type .= ', '. 'complex'; - } - if ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) { - $type .= ', '. 'termination codon'; - } - } - - $self->{'label'} = $type; - return $self->{'label'}; -} - - -=head2 _change_codon_pos - - Title : _change_codon_pos - Usage : $newCodonPos = _change_codon_pos($myCodonPos, 5) - Function: - - Keeps track of the codon position in a changeing sequence - - Returns : codon_pos = integer 1, 2 or 3 - Args : valid codon position - signed integer offset to a new location in sequence - -=cut - - -sub _change_codon_pos ($$) { - my ($cpos, $i) = @_; - - $cpos = ($cpos + $i%3)%3; - if ($cpos > 3 ) { - $cpos = $cpos - 3; - } - elsif ($cpos < 1 ) { - $cpos = $cpos + 3; - } - return $cpos; -} - -1; diff --git a/lib/Bio/Variation/SNP.pm b/lib/Bio/Variation/SNP.pm deleted file mode 100644 index bafdf0d12..000000000 --- a/lib/Bio/Variation/SNP.pm +++ /dev/null @@ -1,231 +0,0 @@ -# bioperl module for Bio::Variation::SNP -# -# Copyright Allen Day , Stan Nelson -# Human Genetics, UCLA Medical School, University of California, Los Angeles - -=head1 NAME - -Bio::Variation::SNP - submitted SNP - -=head1 SYNOPSIS - - $SNP = Bio::Variation::SNP->new (); - -=head1 DESCRIPTION - -Inherits from Bio::Variation::SeqDiff and Bio::Variation::Allele, with -additional methods that are (db)SNP specific (ie, refSNP/subSNP IDs, batch -IDs, validation methods). - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to one -of the Bioperl mailing lists. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - -Allen Day Eallenday@ucla.eduE - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Variation::SNP; - -use strict; -use vars qw($AUTOLOAD); -use Bio::Root::Root; - -use base qw(Bio::Variation::SeqDiff Bio::Variation::Allele); - -=head2 get/set-able methods - - Usage : $is = $snp->method() - Function: for getting/setting attributes - Returns : a value. probably a scalar. - Args : if you're trying to set an attribute, pass in the new value. - - Methods: - -------- - id - type - observed - seq_5 - seq_3 - ncbi_build - ncbi_chr_hits - ncbi_ctg_hits - ncbi_seq_loc - ucsc_build - ucsc_chr_hits - ucsc_ctg_hits - heterozygous - heterozygous_SE - validated - genotype - handle - batch_id - method - locus_id - symbol - mrna - protein - functional_class - -=cut - -#' -my %OK_AUTOLOAD = ( - id => '', - type => '', - observed => [], - seq_5 => '', - seq_3 => '', - ncbi_build => '', - ncbi_chr_hits => '', - ncbi_ctg_hits => '', - ncbi_seq_loc => '', - ucsc_build => '', - ucsc_chr_hits => '', - ucsc_ctg_hits => '', - heterozygous => '', - heterozygous_SE => '', - validated => '', - genotype => '', - handle => '', - batch_id => '', - method => '', - locus_id => '', - symbol => '', - mrna => '', - protein => '', - functional_class => '', - ); - -sub AUTOLOAD { - my $self = shift; - my $param = $AUTOLOAD; - $param =~ s/.*:://; - $self->throw(__PACKAGE__." doesn't implement $param") unless defined $OK_AUTOLOAD{$param}; - - if( ref $OK_AUTOLOAD{$param} eq 'ARRAY' ) { - push @{$self->{$param}}, shift if @_; - return $self->{$param}->[scalar(@{$self->{$param}}) - 1]; - } else { - $self->{$param} = shift if @_; - return $self->{$param}; - } -} - - -#foreach my $slot (keys %RWSLOT){ -# no strict "refs"; #add class methods to package -# *$slot = sub { -# shift; -# $RWSLOT{$slot} = shift if @_; -# return $RWSLOT{$slot}; -# }; -#} - - -=head2 is_subsnp - - Title : is_subsnp - Usage : $is = $snp->is_subsnp() - Function: returns 1 if $snp is a subSNP - Returns : 1 or undef - Args : NONE - -=cut - -sub is_subsnp { - return shift->{is_subsnp}; -} - -=head2 subsnp - - Title : subsnp - Usage : $subsnp = $snp->subsnp() - Function: returns the currently active subSNP of $snp - Returns : Bio::Variation::SNP - Args : NONE - -=cut - -sub subsnp { - my $self = shift; - return $self->{subsnps}->[ scalar($self->each_subsnp) - 1 ]; -} - -=head2 add_subsnp - - Title : add_subsnp - Usage : $subsnp = $snp->add_subsnp() - Function: pushes the previous value returned by subsnp() onto a stack, - accessible with each_subsnp(). - Sets return value of subsnp() to a new Bio::Variation::SNP - object, and returns that object. - Returns : Bio::Varitiation::SNP - Args : NONE - -=cut - -sub add_subsnp { - my $self = shift; - $self->throw("add_subsnp(): cannot add subSNP to subSNP, only to refSNP") - if $self->is_subsnp; - - my $subsnp = Bio::Variation::SNP->new; - push @{$self->{subsnps}}, $subsnp; - $self->subsnp->{is_subsnp} = 1; - return $self->subsnp; -} - -=head2 each_subsnp - - Title : each_subsnp - Usage : @subsnps = $snp->each_subsnp() - Function: returns a list of the subSNPs of a refSNP - Returns : list - Args : NONE - -=cut - -sub each_subsnp { - my $self = shift; - $self->throw("each_subsnp(): cannot be called on a subSNP") - if $self->is_subsnp; - return @{$self->{subsnps}}; -} - -1; diff --git a/lib/Bio/Variation/SeqDiff.pm b/lib/Bio/Variation/SeqDiff.pm deleted file mode 100644 index c19a47137..000000000 --- a/lib/Bio/Variation/SeqDiff.pm +++ /dev/null @@ -1,1147 +0,0 @@ -# bioperl module for Bio::Variation::SeqDiff -# -# Please direct questions and support issues to -# -# Cared for by Heikki Lehvaslaiho -# -# Copyright Heikki Lehvaslaiho -# -# You may distribute this module under the same terms as perl itself -# -# POD documentation - main docs before the code - -# cds_end definition? - -=head1 NAME - -Bio::Variation::SeqDiff - Container class for mutation/variant descriptions - -=head1 SYNOPSIS - - $seqDiff = Bio::Variation::SeqDiff->new ( - -id => $M20132, - -alphabet => 'rna', - -gene_symbol => 'AR' - -chromosome => 'X', - -numbering => 'coding' - ); - # get a DNAMutation object somehow - $seqDiff->add_Variant($dnamut); - print $seqDiff->sys_name(), "\n"; - -=head1 DESCRIPTION - -SeqDiff stores Bio::Variation::VariantI object references and -descriptive information common to all changes in a sequence. Mutations -are understood to be any kind of sequence markers and are expected to -occur in the same chromosome. See L for details. - -The methods of SeqDiff are geared towards describing mutations in -human genes using gene-based coordinate system where 'A' of the -initiator codon has number 1 and the one before it -1. This is -according to conventions of human genetics. - -There will be class Bio::Variation::Genotype to describe markers in -different chromosomes and diploid genototypes. - -Classes implementing Bio::Variation::VariantI interface are -Bio::Variation::DNAMutation, Bio::Variation::RNAChange, and -Bio::Variation::AAChange. See L, -L, L, and -L for more information. - -Variant objects can be added using two ways: an array passed to the -constructor or as individual Variant objects with add_Variant -method. - - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: heikki-at-bioperl-dot-org - -=head1 CONTRIBUTORS - -Eckhard Lehmann, ecky@e-lehmann.de - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Variation::SeqDiff; - -use strict; -use Bio::Tools::CodonTable; -use Bio::PrimarySeq; - -use base qw(Bio::Root::Root); - - -=head2 new - - Title : new - Usage : $seqDiff = Bio::Variation::SeqDiff->new; - Function: generates a new Bio::Variation::SeqDiff - Returns : reference to a new object of class SeqDiff - Args : - -=cut - -sub new { - my($class,@args) = @_; - my $self = $class->SUPER::new(@args); - - my($id, $sysname, $trivname, $chr, $gene_symbol, - $desc, $alphabet, $numbering, $offset, $rna_offset, $rna_id, $cds_end, - $dna_ori, $dna_mut, $rna_ori, $rna_mut, $aa_ori, $aa_mut - #@variants, @genes - ) = - $self->_rearrange([qw(ID - SYSNAME - TRIVNAME - CHR - GENE_SYMBOL - DESC - ALPHABET - NUMBERING - OFFSET - RNA_OFFSET - RNA_ID - CDS_END - DNA_ORI - DNA_MUT - RNA_ORI - AA_ORI - AA_MUT - )], - @args); - - #my $make = $self->SUPER::_initialize(@args); - - $id && $self->id($id); - $sysname && $self->sysname($sysname); - $trivname && $self->trivname($trivname); - $chr && $self->chromosome($chr); - $gene_symbol && $self->gene_symbol($chr); - $desc && $self->description($desc); - $alphabet && $self->alphabet($alphabet); - $numbering && $self->numbering($numbering); - $offset && $self->offset($offset); - $rna_offset && $self->rna_offset($rna_offset); - $rna_id && $self->rna_id($rna_id); - $cds_end && $self->cds_end($cds_end); - - $dna_ori && $self->dna_ori($dna_ori); - $dna_mut && $self->dna_mut($dna_mut); - $rna_ori && $self->rna_ori($rna_ori); - $rna_mut && $self->rna_mut($rna_mut); - $aa_ori && $self->aa_ori ($aa_ori); - $aa_mut && $self->aa_mut ($aa_mut); - - $self->{ 'variants' } = []; - #@variants && push(@{$self->{'variants'}},@variants); - - $self->{ 'genes' } = []; - #@genes && push(@{$self->{'genes'}},@genes); - - return $self; # success - we hope! -} - - -=head2 id - - Title : id - Usage : $obj->id(H0001); $id = $obj->id(); - Function: - - Sets or returns the id of the seqDiff. - Should be used to give the collection of variants a UID - without semantic associations. - - Example : - Returns : value of id, a scalar - Args : newvalue (optional) - -=cut - - -sub id { - my ($self,$value) = @_; - if (defined $value) { - $self->{'id'} = $value; - } - else { - return $self->{'id'}; - } -} - - -=head2 sysname - - Title : sysname - Usage : $obj->sysname('5C>G'); $sysname = $obj->sysname(); - Function: - - Sets or returns the systematic name of the seqDiff. The - name should follow the HUGO Mutation Database Initiative - approved nomenclature. If called without first setting the - value, will generate it from L - objects attached. - - Example : - Returns : value of sysname, a scalar - Args : newvalue (optional) - -=cut - - -sub sysname { - my ($self,$value) = @_; - if (defined $value) { - $self->{'sysname'} = $value; - } - elsif (not defined $self->{'sysname'}) { - - my $sysname = ''; - my $c = 0; - foreach my $mut ($self->each_Variant) { - if( $mut->isa('Bio::Variation::DNAMutation') ) { - $c++; - if ($c == 1 ) { - $sysname = $mut->sysname ; - } - else { - $sysname .= ";". $mut->sysname; - } - } - } - $sysname = "[". $sysname. "]" if $c > 1; - $self->{'sysname'} = $sysname; - } - return $self->{'sysname'}; -} - - -=head2 trivname - - Title : trivname - Usage : $obj->trivname('[A2G;T56G]'); $trivname = $obj->trivname(); - Function: - - Sets or returns the trivial name of the seqDiff. - The name should follow the HUGO Mutation Database Initiative - approved nomenclature. If called without first setting the - value, will generate it from L - objects attached. - - Example : - Returns : value of trivname, a scalar - Args : newvalue (optional) - -=cut - - -sub trivname { - my ($self,$value) = @_; - if (defined $value) { - $self->{'trivname'} = $value; - } - elsif (not defined $self->{'trivname'}) { - - my $trivname = ''; - my $c = 0; - foreach my $mut ($self->each_Variant) { - if( $mut->isa('Bio::Variation::AAChange') ) { - $c++; - if ($c == 1 ) { - $trivname = $mut->trivname ; - } - else { - $trivname .= ";". $mut->trivname; - } - } - } - $trivname = "[". $trivname. "]" if $c > 1; - $self->{'trivname'} = $trivname; - } - - else { - return $self->{'trivname'}; - } -} - - -=head2 chromosome - - Title : chromosome - Usage : $obj->chromosome('X'); $chromosome = $obj->chromosome(); - Function: - - Sets or returns the chromosome ("linkage group") of the seqDiff. - - Example : - Returns : value of chromosome, a scalar - Args : newvalue (optional) - -=cut - - -sub chromosome { - my ($self,$value) = @_; - if (defined $value) { - $self->{'chromosome'} = $value; - } - else { - return $self->{'chromosome'}; - } -} - - -=head2 gene_symbol - - Title : gene_symbol - Usage : $obj->gene_symbol('FOS'); $gene_symbol = $obj->gene_symbol; - Function: - - Sets or returns the gene symbol for the studied CDS. - - Example : - Returns : value of gene_symbol, a scalar - Args : newvalue (optional) - -=cut - - -sub gene_symbol { - my ($self,$value) = @_; - if (defined $value) { - $self->{'gene_symbol'} = $value; - } - else { - return $self->{'gene_symbol'}; - } -} - - - -=head2 description - - Title : description - Usage : $obj->description('short description'); $descr = $obj->description(); - Function: - - Sets or returns the short description of the seqDiff. - - Example : - Returns : value of description, a scalar - Args : newvalue (optional) - -=cut - - -sub description { - my ($self,$value) = @_; - if (defined $value) { - $self->{'description'} = $value; - } - else { - return $self->{'description'}; - } -} - - -=head2 alphabet - - Title : alphabet - Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } - Function: Returns the type of primary reference sequence being one of - 'dna', 'rna' or 'protein'. This is case sensitive. - - Returns : a string either 'dna','rna','protein'. - Args : none - - -=cut - -sub alphabet { - my ($self,$value) = @_; - my %type = (dna => 1, - rna => 1, - protein => 1); - if( defined $value ) { - if ($type{$value}) { - $self->{'alphabet'} = $value; - } else { - $self->throw("$value is not valid alphabet value!"); - } - } - return $self->{'alphabet'}; -} - - -=head2 numbering - - Title : numbering - Usage : $obj->numbering('coding'); $numbering = $obj->numbering(); - Function: - - Sets or returns the string giving the numbering schema used - to describe the variants. - - Example : - Returns : value of numbering, a scalar - Args : newvalue (optional) - -=cut - - - -sub numbering { - my ($self,$value) = @_; - if (defined $value) { - $self->{'numbering'} = $value; - } - else { - return $self->{'numbering'}; - } -} - - -=head2 offset - - Title : offset - Usage : $obj->offset(124); $offset = $obj->offset(); - Function: - - Sets or returns the offset from the beginning of the DNA sequence - to the coordinate start used to describe variants. Typically - the beginning of the coding region of the gene. - The cds_start should be 1 + offset. - - Example : - Returns : value of offset, a scalar - Args : newvalue (optional) - -=cut - - - -sub offset { - my ($self,$value) = @_; - if (defined $value) { - $self->{'offset'} = $value; - } - elsif (not defined $self->{'offset'} ) { - return $self->{'offset'} = 0; - } - else { - return $self->{'offset'}; - } -} - - -=head2 cds_start - - Title : cds_start - Usage : $obj->cds_start(123); $cds_start = $obj->cds_start(); - Function: - - Sets or returns the cds_start from the beginning of the DNA - sequence to the coordinate start used to describe - variants. Typically the beginning of the coding region of - the gene. Needs to be and is implemented as 1 + offset. - - Example : - Returns : value of cds_start, a scalar - Args : newvalue (optional) - -=cut - - - -sub cds_start { - my ($self,$value) = @_; - if (defined $value) { - $self->{'offset'} = $value - 1; - } - else { - return $self->{'offset'} + 1; - } -} - - -=head2 cds_end - - Title : cds_end - Usage : $obj->cds_end(321); $cds_end = $obj->cds_end(); - Function: - - Sets or returns the position of the last nucleotitide of the - termination codon. The coordinate system starts from cds_start. - - Example : - Returns : value of cds_end, a scalar - Args : newvalue (optional) - -=cut - - - -sub cds_end { - my ($self,$value) = @_; - if (defined $value) { - $self->{'cds_end'} = $value; - } - else { - return $self->{'cds_end'}; - #$self->{'cds_end'} = CORE::length($self->SeqDiff->rna_ori)/3; - } -} - - -=head2 rna_offset - - Title : rna_offset - Usage : $obj->rna_offset(124); $rna_offset = $obj->rna_offset(); - Function: - - Sets or returns the rna_offset from the beginning of the RNA sequence - to the coordinate start used to describe variants. Typically - the beginning of the coding region of the gene. - - Example : - Returns : value of rna_offset, a scalar - Args : newvalue (optional) - -=cut - - - -sub rna_offset { - my ($self,$value) = @_; - if (defined $value) { - $self->{'rna_offset'} = $value; - } - elsif (not defined $self->{'rna_offset'} ) { - return $self->{'rna_offset'} = 0; - } - else { - return $self->{'rna_offset'}; - } -} - - -=head2 rna_id - - Title : rna_id - Usage : $obj->rna_id('transcript#3'); $rna_id = $obj->rna_id(); - Function: - - Sets or returns the ID for original RNA sequence of the seqDiff. - - Example : - Returns : value of rna_id, a scalar - Args : newvalue (optional) - -=cut - - -sub rna_id { - my ($self,$value) = @_; - if (defined $value) { - $self->{'rna_id'} = $value; - } - else { - return $self->{'rna_id'}; - } -} - - - -=head2 add_Variant - - Title : add_Variant - Usage : $obj->add_Variant($variant) - Function: - - Pushes one Bio::Variation::Variant into the list of variants. - At the same time, creates a link from the Variant to SeqDiff - using its SeqDiff method. - - Example : - Returns : 1 when succeeds, 0 for failure. - Args : Variant object - -=cut - -sub add_Variant { - my ($self,$value) = @_; - if (defined $value) { - if( ! $value->isa('Bio::Variation::VariantI') ) { - $self->throw("Is not a VariantI complying object but a [$self]"); - return 0; - } - else { - push(@{$self->{'variants'}},$value); - $value->SeqDiff($self); - return 1; - } - } - else { - return 0; - } -} - - -=head2 each_Variant - - Title : each_Variant - Usage : $obj->each_Variant(); - Function: - - Returns a list of Variants. - - Example : - Returns : list of Variants - Args : none - -=cut - -sub each_Variant{ - my ($self,@args) = @_; - - return @{$self->{'variants'}}; -} - - - -=head2 add_Gene - - Title : add_Gene - Usage : $obj->add_Gene($gene) - Function: - - Pushes one L into the list of genes. - - Example : - Returns : 1 when succeeds, 0 for failure. - Args : Bio::LiveSeq::Gene object - -See L for more information. - -=cut - - -sub add_Gene { - my ($self,$value) = @_; - if (defined $value) { - if( ! $value->isa('Bio::LiveSeq::Gene') ) { - $value->throw("Is not a Bio::LiveSeq::Gene object but a [$value]"); - return 0; - } - else { - push(@{$self->{'genes'}},$value); - return 1; - } - } - else { - return 0; - } -} - - -=head2 each_Gene - - Title : each_Gene - Usage : $obj->each_Gene(); - Function: - - Returns a list of Ls. - - Example : - Returns : list of Genes - Args : none - -=cut - -sub each_Gene{ - my ($self,@args) = @_; - - return @{$self->{'genes'}}; -} - - -=head2 dna_ori - - Title : dna_ori - Usage : $obj->dna_ori('atgctgctgctgct'); $dna_ori = $obj->dna_ori(); - Function: - - Sets or returns the original DNA sequence string of the seqDiff. - - Example : - Returns : value of dna_ori, a scalar - Args : newvalue (optional) - -=cut - - -sub dna_ori { - my ($self,$value) = @_; - if (defined $value) { - $self->{'dna_ori'} = $value; - } - else { - return $self->{'dna_ori'}; - } -} - - -=head2 dna_mut - - Title : dna_mut - Usage : $obj->dna_mut('atgctggtgctgct'); $dna_mut = $obj->dna_mut(); - Function: - - Sets or returns the mutated DNA sequence of the seqDiff. - If sequence has not been set generates it from the - original sequence and DNA mutations. - - Example : - Returns : value of dna_mut, a scalar - Args : newvalue (optional) - -=cut - - -sub dna_mut { - my ($self,$value) = @_; - if (defined $value) { - $self->{'dna_mut'} = $value; - } - else { - $self->_set_dnamut() unless $self->{'dna_mut'}; - return $self->{'dna_mut'}; - } -} - -sub _set_dnamut { - my $self = shift; - - return unless $self->{'dna_ori'} && $self->each_Variant; - - $self->{'dna_mut'} = $self->{'dna_ori'}; - foreach ($self->each_Variant) { - next unless $_->isa('Bio::Variation::DNAMutation'); - next unless $_->isMutation; - - my ($s, $la, $le); - #lies the mutation less than 25 bases after the start of sequence? - if ($_->start < 25) { - $s = 0; $la = $_->start - 1; - } else { - $s = $_->start - 25; $la = 25; - } - - #is the mutation an insertion? - $_->end($_->start) unless $_->allele_ori->seq; - - #does the mutation end greater than 25 bases before the end of - #sequence? - if (($_->end + 25) > length($self->{'dna_mut'})) { - $le = length($self->{'dna_mut'}) - $_->end; - } else { - $le = 25; - } - - $_->dnStreamSeq(substr($self->{'dna_mut'}, $s, $la)); - $_->upStreamSeq(substr($self->{'dna_mut'}, $_->end, $le)); - - my $s_ori = $_->dnStreamSeq . $_->allele_ori->seq . $_->upStreamSeq; - my $s_mut = $_->dnStreamSeq . $_->allele_mut->seq . $_->upStreamSeq; - - (my $str = $self->{'dna_mut'}) =~ s/$s_ori/$s_mut/; - $self->{'dna_mut'} = $str; - } -} - - -=head2 rna_ori - - Title : rna_ori - Usage : $obj->rna_ori('atgctgctgctgct'); $rna_ori = $obj->rna_ori(); - Function: - - Sets or returns the original RNA sequence of the seqDiff. - - Example : - Returns : value of rna_ori, a scalar - Args : newvalue (optional) - -=cut - - -sub rna_ori { - my ($self,$value) = @_; - if (defined $value) { - $self->{'rna_ori'} = $value; - } - else { - return $self->{'rna_ori'}; - } -} - - -=head2 rna_mut - - Title : rna_mut - Usage : $obj->rna_mut('atgctggtgctgct'); $rna_mut = $obj->rna_mut(); - Function: - - Sets or returns the mutated RNA sequence of the seqDiff. - - Example : - Returns : value of rna_mut, a scalar - Args : newvalue (optional) - -=cut - - -sub rna_mut { - my ($self,$value) = @_; - if (defined $value) { - $self->{'rna_mut'} = $value; - } - else { - return $self->{'rna_mut'}; - } -} - - -=head2 aa_ori - - Title : aa_ori - Usage : $obj->aa_ori('MAGVLL*'); $aa_ori = $obj->aa_ori(); - Function: - - Sets or returns the original protein sequence of the seqDiff. - - Example : - Returns : value of aa_ori, a scalar - Args : newvalue (optional) - -=cut - - -sub aa_ori { - my ($self,$value) = @_; - if (defined $value) { - $self->{'aa_ori'} = $value; - } - else { - return $self->{'aa_ori'}; - } -} - - -=head2 aa_mut - - Title : aa_mut - Usage : $obj->aa_mut('MA*'); $aa_mut = $obj->aa_mut(); - Function: - - Sets or returns the mutated protein sequence of the seqDiff. - - Example : - Returns : value of aa_mut, a scalar - Args : newvalue (optional) - -=cut - - -sub aa_mut { - my ($self,$value) = @_; - if (defined $value) { - $self->{'aa_mut'} = $value; - } - else { - return $self->{'aa_mut'}; - } -} - - -=head2 seqobj - - Title : seqobj - Usage : $dnaobj = $obj->seqobj('dna_mut'); - Function: - - Returns the any original or mutated sequences as a - Bio::PrimarySeq object. - - Example : - Returns : Bio::PrimarySeq object for the requested sequence - Args : string, method name for the sequence requested - -See L for more information. - -=cut - -sub seqobj { - my ($self,$value) = @_; - my $out; - my %valid_obj = - map {$_, 1} qw(dna_ori rna_ori aa_ori dna_mut rna_mut aa_mut); - $valid_obj{$value} || - $self->throw("Sequence type '$value' is not a valid type (". - join(',', map "'$_'", sort keys %valid_obj) .") lowercase"); - my ($alphabet) = $value =~ /([^_]+)/; - my $id = $self->id; - $id = $self->rna_id if $self->rna_id; - $alphabet = 'protein' if $alphabet eq 'aa'; - $out = Bio::PrimarySeq->new - ( '-seq' => $self->{$value}, - '-display_id' => $id, - '-accession_number' => $self->id, - '-alphabet' => $alphabet - ) if $self->{$value} ; - return $out; -} - -=head2 alignment - - Title : alignment - Usage : $obj->alignment - Function: - - Returns a pretty RNA/AA sequence alignment from linked - objects. Under construction: Only simple coding region - point mutations work. - - Example : - Returns : - Args : none - -=cut - - -sub alignment { - my $self = shift; - my (@entry, $text); - - my $maxflanklen = 12; - - foreach my $mut ($self->each_Variant) { - if( $mut->isa('Bio::Variation::RNAChange') ) { - - my $upflank = $mut->upStreamSeq; - my $dnflank = $mut->dnStreamSeq; - my $cposd = $mut->codon_pos; - my $rori = $mut->allele_ori->seq; - my $rmut = $mut->allele_mut->seq; - my $rseqoriu = ''; - my $rseqmutu = ''; - my $rseqorid = ''; - my $rseqmutd = ''; - my $aaseqmutu = ''; - my (@rseqori, @rseqmut ); - - # point - if ($mut->DNAMutation->label =~ /point/) { - if ($cposd == 1 ) { - my $nt2d = substr($dnflank, 0, 2); - push @rseqori, $rori. $nt2d; - push @rseqmut, uc ($rmut). $nt2d; - $dnflank = substr($dnflank, 2); - } - elsif ($cposd == 2) { - my $ntu = chop $upflank; - my $ntd = substr($dnflank, 0, 1); - push @rseqori, $ntu. $rori. $ntd; - push @rseqmut, $ntu. uc ($rmut). $ntd; - $dnflank = substr($dnflank, 1); - } - elsif ($cposd == 3) { - my $ntu1 = chop $upflank; - my $ntu2 = chop $upflank; - push (@rseqori, $ntu2. $ntu1. $rori); - push (@rseqmut, $ntu2. $ntu1. uc $rmut); - } - } - #deletion - elsif ($mut->DNAMutation->label =~ /deletion/) { - if ($cposd == 2 ) { - $rseqorid = chop $upflank; - $rseqmutd = $rseqorid; - } - for (my $i=1; $i<=$mut->length; $i++) { - my $ntd .= substr($mut->allele_ori, $i-1, 1); - $rseqorid .= $ntd; - if (length($rseqorid) == 3 ) { - push (@rseqori, $rseqorid); - push (@rseqmut, " "); - $rseqorid = ''; - } - } - - if ($rseqorid) { - $rseqorid .= substr($dnflank, 0, 3-$rseqorid); - push (@rseqori, $rseqorid); - push (@rseqmut, " "); - $dnflank = substr($dnflank,3-$rseqorid); - } - } - $upflank = reverse $upflank; - # loop throught the flanks - for (my $i=1; $i<=length($dnflank); $i++) { - - last if $i > $maxflanklen; - - my $ntd .= substr($dnflank, $i-1, 1); - my $ntu .= substr($upflank, $i-1, 1); - - $rseqmutd .= $ntd; - $rseqorid .= $ntd; - $rseqmutu = $ntu. $rseqmutu; - $rseqoriu = $ntu. $rseqoriu; - - if (length($rseqorid) == 3 and length($rseqorid) == 3) { - push (@rseqori, $rseqorid); - push (@rseqmut, $rseqmutd); - $rseqorid = $rseqmutd =''; - } - if (length($rseqoriu) == 3 and length($rseqoriu) == 3) { - unshift (@rseqori, $rseqoriu); - unshift (@rseqmut, $rseqmutu); - $rseqoriu = $rseqmutu =''; - } - - #print "|i=$i, $cposd, $rseqmutd, $rseqorid\n"; - #print "|i=$i, $cposu, $rseqmutu, $rseqoriu\n\n"; - - } - - push (@rseqori, $rseqorid); - unshift (@rseqori, $rseqoriu); - push (@rseqmut, $rseqmutd); - unshift (@rseqmut, $rseqmutu); - - return unless $mut->AAChange; - #translate - my $tr = Bio::Tools::CodonTable->new('-id' => $mut->codon_table); - my $apos = $mut->AAChange->start; - my $aposmax = CORE::length($self->aa_ori); #terminator codon no - my $rseqori; - my $rseqmut; - my $aaseqori; - my $aaseqmut = ""; - for (my $i = 0; $i <= $#rseqori; $i++) { - my $a = ''; - - $a = $tr->translate($rseqori[$i]) if length($rseqori[$i]) == 3; - - if (length($a) != 1 or - $apos - ( $maxflanklen/2 -1) + $i < 1 or - $apos - ( $maxflanklen/2 -1) + $i > $aposmax ) { - $aaseqori .= " "; - } else { - $aaseqori .= " ". $a. " "; - } - my $b = ''; - if (length($rseqmut[$i]) == 3) { - if ($rseqmut[$i] eq ' ') { - $b = "_"; - } else { - $b = $tr->translate($rseqmut[$i]); - } - } - if (( $b ne $a and - length($b) == 1 and - $apos - ( $maxflanklen/2 -1) + $i >= 1 ) or - ( $apos - ( $maxflanklen/2 -1) + $i >= $aposmax and - $mut->label =~ 'termination') - ) { - $aaseqmut .= " ". $b. " "; - } else { - $aaseqmut .= " "; - } - - if ($i == 0 and length($rseqori[$i]) != 3) { - my $l = 3 - length($rseqori[$i]); - $rseqori[$i] = (" " x $l). $rseqori[$i]; - $rseqmut[$i] = (" " x $l). $rseqmut[$i]; - } - $rseqori .= $rseqori[$i]. " " if $rseqori[$i] ne ''; - $rseqmut .= $rseqmut[$i]. " " if $rseqmut[$i] ne ''; - } - - # collect the results - push (@entry, - "\n" - ); - $text = " ". $aaseqmut; - push (@entry, - $text - ); - $text = "Variant : ". $rseqmut; - push (@entry, - $text - ); - $text = "Reference: ". $rseqori; - push (@entry, - $text - ); - $text = " ". $aaseqori; - push (@entry, - $text - ); - push (@entry, - "\n" - ); - } - - } - - my $res; - foreach my $line (@entry) { - $res .= "$line\n"; - } - return $res; -} - -1; diff --git a/lib/Bio/Variation/VariantI.pm b/lib/Bio/Variation/VariantI.pm deleted file mode 100644 index de8c65651..000000000 --- a/lib/Bio/Variation/VariantI.pm +++ /dev/null @@ -1,1052 +0,0 @@ -# -# BioPerl module for Bio::Variation::VariantI -# -# Please direct questions and support issues to -# -# Cared for by Heikki Lehvaslaiho -# -# Copyright Heikki Lehvaslaiho -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Variation::VariantI - Sequence Change SeqFeature abstract class - -=head1 SYNOPSIS - - #get Bio::Variant::VariantI somehow - print $var->restriction_changes, "\n"; - foreach $allele ($var->each_Allele) { - #work on Bio::Variation::Allele objects - } - -=head1 DESCRIPTION - -This superclass defines common methods to basic sequence changes. The -instantiable classes Bio::Variation::DNAMutation, -Bio::Variation::RNAChange and Bio::Variation::AAChange use them. -See L, L, -and L for more information. - -These classes store information, heavy computation to determine allele -sequences is done elsewhere. - -The database cross-references are implemented as -Bio::Annotation::DBLink objects. The methods to access them are -defined in Bio::DBLinkContainerI. See L -and L for details. - -Bio::Variation::VariantI redifines and extends -Bio::SeqFeature::Generic for sequence variations. This class -describes specific sequence change events. These events are always -from a specific reference sequence to something different. See -L for more information. - -IMPORTANT: The notion of reference sequence permeates all -Bio::Variation classes. This is especially important to remember when -dealing with Alleles. In a polymorphic site, there can be a large -number of alleles. One of then has to be selected to be the reference -allele (allele_ori). ALL the rest has to be passed to the Variant -using the method add_Allele, including the mutated allele in a -canonical mutation. The IO modules and generated attributes depend on -it. They ignore the allele linked to using allele_mut and circulate -each Allele returned by each_Allele into allele_mut and calculate -the changes between that and allele_ori. - - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing lists Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Heikki Lehvaslaiho - -Email: heikki-at-bioperl-dot-org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::Variation::VariantI; -use strict; -# Object preamble - inheritance - -use base qw(Bio::Root::Root Bio::SeqFeature::Generic Bio::DBLinkContainerI); - -=head2 id - - Title : id - Usage : $obj->id - Function: - - Read only method. Returns the id of the variation object. - The id is the id of the first DBLink object attached to this object. - - Example : - Returns : scalar - Args : none - -=cut - -sub id { - my ($self) = @_; - my @ids = $self->each_DBLink; - my $id = $ids[0] if scalar @ids > 0; - return $id->database. "::". $id->primary_id if $id; -} - - -=head2 add_Allele - - Title : add_Allele - Usage : $self->add_Allele($allele) - Function: - - Adds one Bio::Variation::Allele into the list of alleles. - Note that the method forces the convention that nucleotide - sequence is in lower case and amino acds are in upper - case. - - Example : - Returns : 1 when succeeds, 0 for failure. - Args : Allele object - -=cut - - -sub add_Allele { - my ($self,$value) = @_; - if (defined $value) { - if( ! $value->isa('Bio::Variation::Allele') ) { - my $com = ref $value; - $self->throw("Is not a Allele object but a [$com]"); - return 0; - } else { - if ( $self->isa('Bio::Variation::AAChange') ) { - $value->seq( uc $value->seq) if $value->seq; - } else { - $value->seq( lc $value->seq) if $value->seq; - } - push(@{$self->{'alleles'}},$value); - $self->allele_mut($value); #???? - return 1; - } - } else { - return 0; - } -} - - -=head2 each_Allele - - Title : alleles - Usage : $obj->each_Allele(); - Function: - - Returns a list of Bio::Variation::Allele objects - - Example : - Returns : list of Alleles - Args : none - -=cut - -sub each_Allele{ - my ($self,@args) = @_; - return @{$self->{'alleles'}}; -} - - -=head2 isMutation - - Title : isMutation - Usage : print join('/', $obj->each_Allele) if not $obj->isMutation; - Function: - - Returns or sets the boolean value indicating that the - variant described is a canonical mutation with two alleles - assinged to be the original (wild type) allele and mutated - allele, respectively. If this value is not set, it is - assumed that the Variant describes polymorphisms. - - Returns : a boolean - -=cut - -sub isMutation { - my ($self,$value) = @_; - if (defined $value) { - if ($value ) { - $self->{'isMutation'} = 1; - } else { - $self->{'isMutation'} = 0; - } - } - return $self->{'isMutation'}; -} - - -=head2 allele_ori - - Title : allele_ori - Usage : $obj->allele_ori(); - Function: - - Links to and returns the Bio::Variation::Allele object. - If value is not set, returns false. All other Alleles are - compared to this. - - Amino acid sequences are stored in upper case characters, - others in lower case. - - Example : - Returns : string - Args : string - -See L for more. - -=cut - -sub allele_ori { - my ($self,$value) = @_; - if( defined $value) { - if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) { - $self->throw("Value is not Bio::Variation::Allele but [$value]"); - } else { - if ( $self->isa('Bio::Variation::AAChange') ) { - $value->seq( uc $value->seq) if $value->seq; - } else { - $value->seq( lc $value->seq) if $value->seq; - } - $self->{'allele_ori'} = $value; - } - } - return $self->{'allele_ori'}; -} - - -=head2 allele_mut - - Title : allele_mut - Usage : $obj->allele_mut(); - Function: - - Links to and returns the Bio::Variation::Allele - object. Sets and returns the mutated allele sequence. - If value is not set, returns false. - - Amino acid sequences are stored in upper case characters, - others in lower case. - - Example : - Returns : string - Args : string - -See L for more. - -=cut - - -sub allele_mut { - my ($self,$value) = @_; - if( defined $value) { - if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) { - $self->throw("Value is not Bio::Variation::Allele but [$value]"); - } else { - if ( $self->isa('Bio::Variation::AAChange') ) { - $value->seq( uc $value->seq) if $value->seq; - } else { - $value->seq( lc $value->seq) if $value->seq; - } - $self->{'allele_mut'} = $value; - } - } - return $self->{'allele_mut'}; -} - -=head2 length - - Title : length - Usage : $obj->length(); - Function: - - Sets and returns the length of the affected original - allele sequence. If value is not set, returns false == 0. - - Value 0 means that the variant position is before the - start=end sequence position. (Value 1 would denote a point - mutation). This follows the convension to report an - insertion (2insT) in equivalent way to a corresponding - deletion (2delT) (Think about indel polymorpism ATC <=> AC - where the origianal state is not known ). - - Example : - Returns : string - Args : string - -=cut - - -sub length { - my ($self,$value) = @_; - if ( defined $value) { - $self->{'length'} = $value; - } - if ( ! exists $self->{'length'} ) { - return 0; - } - return $self->{'length'}; -} - -=head2 upStreamSeq - - Title : upStreamSeq - Usage : $obj->upStreamSeq(); - Function: - - Sets and returns upstream flanking sequence string. If - value is not set, returns false. The sequence should be - >=25 characters long, if possible. - - Example : - Returns : string or false - Args : string - -=cut - - -sub upStreamSeq { - my ($self,$value) = @_; - if( defined $value) { - $self->{'upstreamseq'} = $value; - } - return $self->{'upstreamseq'}; -} - - -=head2 dnStreamSeq - - Title : dnStreamSeq - Usage : $obj->dnStreamSeq(); - Function: - - Sets and returns dnstream flanking sequence string. If - value is not set, returns false. The sequence should be - >=25 characters long, if possible. - - Example : - Returns : string or false - Args : string - -=cut - - -sub dnStreamSeq { - my ($self,$value) = @_; - if( defined $value) { - $self->{'dnstreamseq'} = $value; - } - return $self->{'dnstreamseq'}; - -} - - -=head2 label - - Title : label - Usage : $obj->label(); - Function: - - Sets and returns mutation event label(s). If value is not - set, or no argument is given returns false. Each - instantiable class needs to implement this method. Valid - values are listed in 'Mutation event controlled vocabulary' in - http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. - - Example : - Returns : string - Args : string - -=cut - - -sub label { - my ($self,$value) = @_; - $self->throw_not_implemented(); -} - - - -=head2 status - - Title : status - Usage : $obj->status() - Function: - - Returns the status of the sequence change object. - Valid values are: 'suspected' and 'proven' - - Example : $obj->status('proven'); - Returns : scalar - Args : valid string (optional, for setting) - - -=cut - - -sub status { - my ($self,$value) = @_; - my %status = (suspected => 1, - proven => 1 - ); - - if( defined $value) { - $value = lc $value; - if ($status{$value}) { - $self->{'status'} = $value; - } - else { - $self->throw("$value is not valid status value!"); - } - } - if( ! exists $self->{'status'} ) { - return "$self"; - } - return $self->{'status'}; -} - - -=head2 proof - - Title : proof - Usage : $obj->proof() - Function: - - Returns the proof of the sequence change object. - Valid values are: 'computed' and 'experimental'. - - Example : $obj->proof('computed'); - Returns : scalar - Args : valid string (optional, for setting) - - -=cut - - -sub proof { - my ($self,$value) = @_; - my %proof = (computed => 1, - experimental => 1 - ); - - if( defined $value) { - $value = lc $value; - if ($proof{$value}) { - $self->{'proof'} = $value; - } else { - $self->throw("$value is not valid proof value!"); - } - } - return $self->{'proof'}; -} - - -=head2 region - - Title : region - Usage : $obj->region(); - Function: - - Sets and returns the name of the sequence region type or - protein domain at this location. If value is not set, - returns false. - - Example : - Returns : string - Args : string - -=cut - - -sub region { - my ($self,$value) = @_; - if( defined $value) { - $self->{'region'} = $value; - } - return $self->{'region'}; -} - - -=head2 region_value - - Title : region_value - Usage : $obj->region_value(); - Function: - - Sets and returns the name of the sequence region_value or - protein domain at this location. If value is not set, - returns false. - - Example : - Returns : string - Args : string - -=cut - - -sub region_value { - my ($self,$value) = @_; - if( defined $value) { - $self->{'region_value'} = $value; - } - return $self->{'region_value'}; -} - -=head2 region_dist - - Title : region_dist - Usage : $obj->region_dist(); - Function: - - Sets and returns the distance tot the closest region - (i.e. intro/exon or domain) boundary. If distance is not - set, returns false. - - Example : - Returns : integer - Args : integer - -=cut - - -sub region_dist { - my ($self,$value) = @_; - if( defined $value) { - if ( not $value =~ /^[+-]?\d+$/ ) { - $self->throw("[$value] for region_dist has to be an integer\n"); - } else { - $self->{'region_dist'} = $value; - } - } - return $self->{'region_dist'}; -} - - -=head2 numbering - - Title : numbering - Usage : $obj->numbering() - Function: - - Returns the numbering chema used locating sequnce features. - Valid values are: 'entry' and 'coding' - - Example : $obj->numbering('coding'); - Returns : scalar - Args : valid string (optional, for setting) - - -=cut - - -sub numbering { - my ($self,$value) = @_; - my %numbering = (entry => 1, - coding => 1 - ); - - if( defined $value) { - $value = lc $value; - if ($numbering{$value}) { - $self->{'numbering'} = $value; - } - else { - $self->throw("'$value' is not a valid for numbering!"); - } - } - if( ! exists $self->{'numbering'} ) { - return "$self"; - } - return $self->{'numbering'}; -} - -=head2 mut_number - - Title : mut_number - Usage : $num = $obj->mut_number; - : $num = $obj->mut_number($number); - Function: - - Returns or sets the number identifying the order in which the - mutation has been issued. Numbers shouldstart from 1. - If the number has never been set, the method will return '' - - If you want the output from IO modules look nice and, for - multivariant/allele variations, make sense you better set - this attribute. - - Returns : an integer - -=cut - - -sub mut_number { - my ($self,$value) = @_; - if (defined $value) { - $self->{'mut_number'} = $value; - } - unless (exists $self->{'mut_number'}) { - return (''); - } else { - return $self->{'mut_number'}; - } -} - - -=head2 SeqDiff - - Title : SeqDiff - Usage : $mutobj = $obj->SeqDiff; - : $mutobj = $obj->SeqDiff($objref); - Function: - - Returns or sets the link-reference to the umbrella - Bio::Variation::SeqDiff object. If there is no link, - it will return undef - - Note: Adding a variant into a SeqDiff object will - automatically set this value. - - Returns : an obj_ref or undef - -See L for more information. - -=cut - -sub SeqDiff { - my ($self,$value) = @_; - if (defined $value) { - if( ! $value->isa('Bio::Variation::SeqDiff') ) { - $self->throw("Is not a Bio::Variation::SeqDiff object but a [$value]"); - return; - } - else { - $self->{'seqDiff'} = $value; - } - } - unless (exists $self->{'seqDiff'}) { - return; - } else { - return $self->{'seqDiff'}; - } -} - -=head2 add_DBLink - - Title : add_DBLink - Usage : $self->add_DBLink($ref) - Function: adds a link object - Example : - Returns : - Args : - - -=cut - - -sub add_DBLink{ - my ($self,$com) = @_; - if( $com && ! $com->isa('Bio::Annotation::DBLink') ) { - $self->throw("Is not a link object but a [$com]"); - } - $com && push(@{$self->{'link'}},$com); -} - -=head2 each_DBLink - - Title : each_DBLink - Usage : foreach $ref ( $self->each_DBlink() ) - Function: gets an array of DBlink of objects - Example : - Returns : - Args : - - -=cut - -sub each_DBLink{ - my ($self) = @_; - - return @{$self->{'link'}}; -} - -=head2 restriction_changes - - Title : restriction_changes - Usage : $obj->restriction_changes(); - Function: - - Returns a string containing a list of restriction - enzyme changes of form +EcoRI, separated by - commas. Strings need to be valid restriction enzyme names - as stored in REBASE. allele_ori and allele_mut need to be assigned. - - Example : - Returns : string - Args : string - -=cut - -sub restriction_changes { - my ($self) = @_; - - if (not $self->{'re_changes'}) { - my %re = &_enzymes; - - # complain if used on AA data - if ($self->isa('Bio::Variation::AAChange')) { - $self->throw('Restriction enzymes do not bite polypeptides!'); - } - - #sanity checks - $self->warn('Upstream sequence is empty!') - if $self->upStreamSeq eq ''; - $self->warn('Downstream sequence is empty!') - if $self->dnStreamSeq eq ''; -# $self->warn('Original allele sequence is empty!') -# if $self->allele_ori eq ''; -# $self->warn('Mutated allele sequence is empty!') -# if $self->allele_mut eq ''; - - #reuse the non empty DNA level list at RNA level if the flanks are identical - #Hint: Check DNAMutation object first - if ($self->isa('Bio::Variation::RNAChange') and $self->DNAMutation and - $self->upStreamSeq eq $self->DNAMutation->upStreamSeq and - $self->dnStreamSeq eq $self->DNAMutation->dnStreamSeq and - $self->DNAMutation->restriction_changes ne '' ) { - $self->{'re_changes'} = $self->DNAMutation->restriction_changes; - } else { - - #maximum length of a type II restriction site in the current REBASE - my ($le_dn) = 15; - my ($le_up) = $le_dn; - - #reduce the flank lengths if the desired length is not available - $le_dn = CORE::length ($self->dnStreamSeq) if $le_dn > CORE::length ($self->dnStreamSeq); - $le_up = CORE::length ($self->upStreamSeq) if $le_up > CORE::length ($self->upStreamSeq); - - #Build sequence strings to compare - my ($oriseq, $mutseq); - $oriseq = $mutseq = substr($self->upStreamSeq, -$le_up, $le_up); - $oriseq .= $self->allele_ori->seq if $self->allele_ori->seq; - $mutseq .= $self->allele_mut->seq if $self->allele_mut->seq; - $oriseq .= substr($self->dnStreamSeq, 0, $le_dn); - $mutseq .= substr($self->dnStreamSeq, 0, $le_dn); - - # ... and their reverse complements - my $oriseq_rev = _revcompl ($oriseq); - my $mutseq_rev = _revcompl ($mutseq); - - # collect results into a string - my $rec = ''; - foreach my $enz (sort keys (%re)) { - my $site = $re{$enz}; - my @ori = ($oriseq=~ /$site/g); - my @mut = ($mutseq=~ /$site/g); - my @ori_r = ($oriseq_rev =~ /$site/g); - my @mut_r = ($mutseq_rev =~ /$site/g); - - $rec .= '+'. $enz. ", " - if (scalar @ori < scalar @mut) or (scalar @ori_r < scalar @mut_r); - $rec .= '-'. $enz. ", " - if (scalar @ori > scalar @mut) or (scalar @ori_r > scalar @mut_r); - - } - $rec = substr($rec, 0, CORE::length($rec) - 2) if $rec ne ''; - $self->{'re_changes'} = $rec; - } - } - return $self->{'re_changes'} -} - - -sub _revcompl { - # side effect: lower case letters - my ($seq) = shift; - - $seq = lc $seq; - $seq =~ tr/acgtrymkswhbvdnx/tgcayrkmswdvbhnx/; - return CORE::reverse $seq; -} - - -sub _enzymes { - #REBASE version 005 type2.005 - my %enzymes = ( - 'AarI' => 'cacctgc', - 'AatII' => 'gacgtc', - 'AccI' => 'gt[ac][gt]ac', - 'AceIII' => 'cagctc', - 'AciI' => 'ccgc', - 'AclI' => 'aacgtt', - 'AcyI' => 'g[ag]cg[ct]c', - 'AflII' => 'cttaag', - 'AflIII' => 'ac[ag][ct]gt', - 'AgeI' => 'accggt', - 'AhaIII' => 'tttaaa', - 'AloI' => 'gaac[acgt][acgt][acgt][acgt][acgt][acgt]tcc', - 'AluI' => 'agct', - 'AlwNI' => 'cag[acgt][acgt][acgt]ctg', - 'ApaBI' => 'gca[acgt][acgt][acgt][acgt][acgt]tgc', - 'ApaI' => 'gggccc', - 'ApaLI' => 'gtgcac', - 'ApoI' => '[ag]aatt[ct]', - 'AscI' => 'ggcgcgcc', - 'AsuI' => 'gg[acgt]cc', - 'AsuII' => 'ttcgaa', - 'AvaI' => 'c[ct]cg[ag]g', - 'AvaII' => 'gg[at]cc', - 'AvaIII' => 'atgcat', - 'AvrII' => 'cctagg', - 'BaeI' => 'ac[acgt][acgt][acgt][acgt]gta[ct]c', - 'BalI' => 'tggcca', - 'BamHI' => 'ggatcc', - 'BbvCI' => 'cctcagc', - 'BbvI' => 'gcagc', - 'BbvII' => 'gaagac', - 'BccI' => 'ccatc', - 'Bce83I' => 'cttgag', - 'BcefI' => 'acggc', - 'BcgI' => 'cga[acgt][acgt][acgt][acgt][acgt][acgt]tgc', - 'BciVI' => 'gtatcc', - 'BclI' => 'tgatca', - 'BetI' => '[at]ccgg[at]', - 'BfiI' => 'actggg', - 'BglI' => 'gcc[acgt][acgt][acgt][acgt][acgt]ggc', - 'BglII' => 'agatct', - 'BinI' => 'ggatc', - 'BmgI' => 'g[gt]gccc', - 'BplI' => 'gag[acgt][acgt][acgt][acgt][acgt]ctc', - 'Bpu10I' => 'cct[acgt]agc', - 'BsaAI' => '[ct]acgt[ag]', - 'BsaBI' => 'gat[acgt][acgt][acgt][acgt]atc', - 'BsaXI' => 'ac[acgt][acgt][acgt][acgt][acgt]ctcc', - 'BsbI' => 'caacac', - 'BscGI' => 'cccgt', - 'BseMII' => 'ctcag', - 'BsePI' => 'gcgcgc', - 'BseRI' => 'gaggag', - 'BseSI' => 'g[gt]gc[ac]c', - 'BsgI' => 'gtgcag', - 'BsiI' => 'cacgag', - 'BsiYI' => 'cc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gg', - 'BsmAI' => 'gtctc', - 'BsmI' => 'gaatgc', - 'Bsp1407I' => 'tgtaca', - 'Bsp24I' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]tgg', - 'BspGI' => 'ctggac', - 'BspHI' => 'tcatga', - 'BspLU11I' => 'acatgt', - 'BspMI' => 'acctgc', - 'BspMII' => 'tccgga', - 'BsrBI' => 'ccgctc', - 'BsrDI' => 'gcaatg', - 'BsrI' => 'actgg', - 'BstEII' => 'ggt[acgt]acc', - 'BstXI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]tgg', - 'BtrI' => 'cacgtc', - 'BtsI' => 'gcagtg', - 'Cac8I' => 'gc[acgt][acgt]gc', - 'CauII' => 'cc[cg]gg', - 'Cfr10I' => '[ag]ccgg[ct]', - 'CfrI' => '[ct]ggcc[ag]', - 'CjeI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]gt', - 'CjePI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt]tc', - 'ClaI' => 'atcgat', - 'CviJI' => '[ag]gc[ct]', - 'CviRI' => 'tgca', - 'DdeI' => 'ct[acgt]ag', - 'DpnI' => 'gatc', - 'DraII' => '[ag]gg[acgt]cc[ct]', - 'DraIII' => 'cac[acgt][acgt][acgt]gtg', - 'DrdI' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]gtc', - 'DrdII' => 'gaacca', - 'DsaI' => 'cc[ag][ct]gg', - 'Eam1105I' => 'gac[acgt][acgt][acgt][acgt][acgt]gtc', - 'EciI' => 'ggcgga', - 'Eco31I' => 'ggtctc', - 'Eco47III' => 'agcgct', - 'Eco57I' => 'ctgaag', - 'EcoNI' => 'cct[acgt][acgt][acgt][acgt][acgt]agg', - 'EcoRI' => 'gaattc', - 'EcoRII' => 'cc[at]gg', - 'EcoRV' => 'gatatc', - 'Esp3I' => 'cgtctc', - 'EspI' => 'gct[acgt]agc', - 'FauI' => 'cccgc', - 'FinI' => 'gggac', - 'Fnu4HI' => 'gc[acgt]gc', - 'FnuDII' => 'cgcg', - 'FokI' => 'ggatg', - 'FseI' => 'ggccggcc', - 'GdiII' => 'cggcc[ag]', - 'GsuI' => 'ctggag', - 'HaeI' => '[at]ggcc[at]', - 'HaeII' => '[ag]gcgc[ct]', - 'HaeIII' => 'ggcc', - 'HaeIV' => 'ga[ct][acgt][acgt][acgt][acgt][acgt][ag]tc', - 'HgaI' => 'gacgc', - 'HgiAI' => 'g[at]gc[at]c', - 'HgiCI' => 'gg[ct][ag]cc', - 'HgiEII' => 'acc[acgt][acgt][acgt][acgt][acgt][acgt]ggt', - 'HgiJII' => 'g[ag]gc[ct]c', - 'HhaI' => 'gcgc', - 'Hin4I' => 'ga[cgt][acgt][acgt][acgt][acgt][acgt][acg]tc', - 'HindII' => 'gt[ct][ag]ac', - 'HindIII' => 'aagctt', - 'HinfI' => 'ga[acgt]tc', - 'HpaI' => 'gttaac', - 'HpaII' => 'ccgg', - 'HphI' => 'ggtga', - 'Hpy178III' => 'tc[acgt][acgt]ga', - 'Hpy188I' => 'tc[acgt]ga', - 'Hpy99I' => 'cg[at]cg', - 'KpnI' => 'ggtacc', - 'Ksp632I' => 'ctcttc', - 'MaeI' => 'ctag', - 'MaeII' => 'acgt', - 'MaeIII' => 'gt[acgt]ac', - 'MboI' => 'gatc', - 'MboII' => 'gaaga', - 'McrI' => 'cg[ag][ct]cg', - 'MfeI' => 'caattg', - 'MjaIV' => 'gt[acgt][acgt]ac', - 'MluI' => 'acgcgt', - 'MmeI' => 'tcc[ag]ac', - 'MnlI' => 'cctc', - 'MseI' => 'ttaa', - 'MslI' => 'ca[ct][acgt][acgt][acgt][acgt][ag]tg', - 'MstI' => 'tgcgca', - 'MwoI' => 'gc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gc', - 'NaeI' => 'gccggc', - 'NarI' => 'ggcgcc', - 'NcoI' => 'ccatgg', - 'NdeI' => 'catatg', - 'NheI' => 'gctagc', - 'NlaIII' => 'catg', - 'NlaIV' => 'gg[acgt][acgt]cc', - 'NotI' => 'gcggccgc', - 'NruI' => 'tcgcga', - 'NspBII' => 'c[ac]gc[gt]g', - 'NspI' => '[ag]catg[ct]', - 'PacI' => 'ttaattaa', - 'Pfl1108I' => 'tcgtag', - 'PflMI' => 'cca[acgt][acgt][acgt][acgt][acgt]tgg', - 'PleI' => 'gagtc', - 'PmaCI' => 'cacgtg', - 'PmeI' => 'gtttaaac', - 'PpiI' => 'gaac[acgt][acgt][acgt][acgt][acgt]ctc', - 'PpuMI' => '[ag]gg[at]cc[ct]', - 'PshAI' => 'gac[acgt][acgt][acgt][acgt]gtc', - 'PsiI' => 'ttataa', - 'PstI' => 'ctgcag', - 'PvuI' => 'cgatcg', - 'PvuII' => 'cagctg', - 'RleAI' => 'cccaca', - 'RsaI' => 'gtac', - 'RsrII' => 'cgg[at]ccg', - 'SacI' => 'gagctc', - 'SacII' => 'ccgcgg', - 'SalI' => 'gtcgac', - 'SanDI' => 'ggg[at]ccc', - 'SapI' => 'gctcttc', - 'SauI' => 'cct[acgt]agg', - 'ScaI' => 'agtact', - 'ScrFI' => 'cc[acgt]gg', - 'SduI' => 'g[agt]gc[act]c', - 'SecI' => 'cc[acgt][acgt]gg', - 'SexAI' => 'acc[at]ggt', - 'SfaNI' => 'gcatc', - 'SfeI' => 'ct[ag][ct]ag', - 'SfiI' => 'ggcc[acgt][acgt][acgt][acgt][acgt]ggcc', - 'SgfI' => 'gcgatcgc', - 'SgrAI' => 'c[ag]ccgg[ct]g', - 'SimI' => 'gggtc', - 'SmaI' => 'cccggg', - 'SmlI' => 'ct[ct][ag]ag', - 'SnaBI' => 'tacgta', - 'SnaI' => 'gtatac', - 'SpeI' => 'actagt', - 'SphI' => 'gcatgc', - 'SplI' => 'cgtacg', - 'SrfI' => 'gcccgggc', - 'Sse232I' => 'cgccggcg', - 'Sse8387I' => 'cctgcagg', - 'Sse8647I' => 'agg[at]cct', - 'SspI' => 'aatatt', - 'Sth132I' => 'cccg', - 'StuI' => 'aggcct', - 'StyI' => 'cc[at][at]gg', - 'SwaI' => 'atttaaat', - 'TaqI' => 'tcga', - 'TaqII' => 'gaccga', - 'TatI' => '[at]gtac[at]', - 'TauI' => 'gc[cg]gc', - 'TfiI' => 'ga[at]tc', - 'TseI' => 'gc[at]gc', - 'Tsp45I' => 'gt[cg]ac', - 'Tsp4CI' => 'ac[acgt]gt', - 'TspEI' => 'aatt', - 'TspRI' => 'ca[cg]tg[acgt][acgt]', - 'Tth111I' => 'gac[acgt][acgt][acgt]gtc', - 'Tth111II' => 'caa[ag]ca', - 'UbaGI' => 'cac[acgt][acgt][acgt][acgt]gtg', - 'UbaPI' => 'cgaacg', - 'VspI' => 'attaat', - 'XbaI' => 'tctaga', - 'XcmI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt]tgg', - 'XhoI' => 'ctcgag', - 'XhoII' => '[ag]gatc[ct]', - 'XmaIII' => 'cggccg', - 'XmnI' => 'gaa[acgt][acgt][acgt][acgt]ttc' - ); - - return %enzymes; -} - -1; diff --git a/t/Variation/AAChange.t b/t/Variation/AAChange.t deleted file mode 100644 index ebacbf98e..000000000 --- a/t/Variation/AAChange.t +++ /dev/null @@ -1,96 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id$ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 27); - - use_ok('Bio::Variation::Allele'); - use_ok('Bio::Variation::AAChange'); - use_ok('Bio::Variation::RNAChange'); -} - - -ok my $obj = Bio::Variation::AAChange->new(); -isa_ok $obj, 'Bio::Variation::AAChange'; - -$obj->start(3); -is $obj->start, 3; - -$obj->end(3); -is $obj->end, 3; - -$obj->length(3); - -is $obj->length, 3; - -$obj->strand('1'); -is $obj->strand, '1'; - -is $obj->primary_tag, 'Variation'; - -$obj->source_tag('source'); -is $obj->source_tag, 'source'; - -$obj->frame(2); -is $obj->frame,2; - -$obj->score(2); -is $obj->score, 2; - -$obj->isMutation(1); -ok $obj->isMutation; - -my $a1 = Bio::Variation::Allele->new(-seq => 'V'); -$obj->allele_ori($a1); - -is $obj->allele_ori->seq, 'V'; - -my $a2 = Bio::Variation::Allele->new('-seq' => 'A'); -$obj->add_Allele($a2); - -is $obj->allele_mut->seq, 'A'; - -is $obj->similarity_score, 0; - -$obj->upStreamSeq('upStreamSeq'); -is $obj->upStreamSeq, 'upStreamSeq'; - -$obj->dnStreamSeq('dnStreamSeq'); -is $obj->dnStreamSeq, 'dnStreamSeq' ; - -is $obj->label, 'substitution, conservative'; - -$obj->status('proven'); -is $obj->status, 'proven'; - -$obj->proof('experimental'); -is $obj->proof, 'experimental'; - -$obj->region('region'); -is $obj->region, 'region'; - -$obj->region_value('region_value'); -is $obj->region_value, 'region_value'; - -$obj->numbering('coding'); -is $obj->numbering, 'coding'; - -my $obj2 = Bio::Variation::RNAChange->new(-start => 7, - -end => 7, - -cds_end => 100, - -codon_pos => 1, - -upStreamSeq => 'acgcgcgcgc', - -dnStreamSeq => 'acgcgcgcgc' - ); -$obj2->label('missense'); -$obj->RNAChange($obj2); - -is $obj->trivname, 'V3A', "Trivial name is [". $obj->trivname. "]"; - -$obj->mut_number(2); -is $obj->mut_number, 2; diff --git a/t/Variation/AAReverseMutate.t b/t/Variation/AAReverseMutate.t deleted file mode 100644 index 3f113804e..000000000 --- a/t/Variation/AAReverseMutate.t +++ /dev/null @@ -1,53 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id$ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 16); - - use_ok('Bio::Variation::AAReverseMutate'); -} - -my $obj = Bio::Variation::AAReverseMutate->new - ('-aa_ori' => 'F', - '-aa_mut' => 'S' - ); -ok defined $obj; -isa_ok($obj, 'Bio::Variation::AAReverseMutate'); - -is $obj->aa_ori, 'F'; - -is $obj->aa_mut, 'S'; - -my @points = $obj->each_Variant; -# F>S has two solutions -is scalar @points, 2; - -$obj->codon_ori('ttc'); -ok defined $obj; - -#now there should be only one left -@points = $obj->each_Variant; -is scalar @points, 1; - -$obj->codon_table(3); -is $obj->codon_table, 3; - -#Check the returned object -my $rna = pop @points; -isa_ok($rna, 'Bio::Variation::RNAChange'); - -is $rna->length, 1; -is $rna->allele_ori->seq, 't'; -is $rna->allele_mut->seq, 'c'; - -is $rna->codon_ori, 'ttc', "Codon_ori is |". $rna->codon_ori. "|"; - -is $rna->codon_pos, 2; - -$obj->codon_table(11); -is $obj->codon_table, 11; diff --git a/t/Variation/Allele.t b/t/Variation/Allele.t deleted file mode 100644 index ef72fd04d..000000000 --- a/t/Variation/Allele.t +++ /dev/null @@ -1,44 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id$ - - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 14); - - use_ok('Bio::Variation::Allele'); -} - -my($a,$trunc,$rev); - -$a = Bio::Variation::Allele->new(-seq=>'ACTGACTGACTG', - -display_id => 'new-id', - -alphabet => 'dna', - -accession_number => 'X677667', - -desc=>'Sample Bio::Seq object'); -isa_ok($a, 'Bio::Variation::Allele'); - -is $a->accession_number(), 'X677667'; -is $a->seq(), 'ACTGACTGACTG'; -is $a->display_id(),'new-id' ; -is $a->desc, 'Sample Bio::Seq object'; -is $a->alphabet(), 'dna'; - -ok defined($trunc = $a->trunc(1,4)); -is $trunc->seq(), 'ACTG'; - -ok defined($rev = $a->revcom()); -is $rev->seq(), 'CAGTCAGTCAGT'; - -$a->is_reference(1); -ok $a->is_reference; - -$a->repeat_unit('ACTG'); -is $a->repeat_unit, 'ACTG'; - -$a->repeat_count(3); -is $a->repeat_count, 3; diff --git a/t/Variation/DNAMutation.t b/t/Variation/DNAMutation.t deleted file mode 100644 index 1ea49ce2f..000000000 --- a/t/Variation/DNAMutation.t +++ /dev/null @@ -1,130 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id$ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 37); - - use_ok('Bio::Variation::DNAMutation'); - use_ok('Bio::Variation::Allele'); -} - -my($obj,$a1,$a2,$obj2); -$obj = Bio::Variation::DNAMutation -> new; - -ok defined $obj; - -$obj->start(3); -is $obj->start, 3; - -$obj->end(3); -is $obj->end, 3; - -$obj->length(2); -is $obj->length, 2; - -$obj->strand('1'); -is $obj->strand, '1'; - -is $obj->primary_tag, 'Variation'; - -$obj->source_tag('source'); -is $obj->source_tag, 'source'; - -$obj->frame(2); -is $obj->frame,2; - -$obj->score(2); -is $obj->score, 2; - -if( $obj->can('dna_mut') ) { - #test gff string - $obj->dna_mut('dna_mut'); - is( $obj->dna_mut,'dna_mut'); -} - -$a1 = Bio::Variation::Allele->new(-seq => 'c'); -$obj->allele_ori($a1); - -is $obj->allele_ori->seq, 'c'; - -$a2 = Bio::Variation::Allele->new('-seq' => 'g'); -$obj->allele_mut($a2); - -is $obj->allele_mut->seq, 'g'; - -$obj->upStreamSeq('agcacctcccggcgccagtttgctg'); -is $obj->upStreamSeq, 'agcacctcccggcgccagtttgctg'; - -$obj->dnStreamSeq('tgctgcagcagcagcagcagcagca'); -is $obj->dnStreamSeq, 'tgctgcagcagcagcagcagcagca'; - - -is $obj->label, 'point, transversion' ; - -$obj->status('proven'); -is $obj->status, 'proven'; - - -$obj->proof('experimental'); -is $obj->proof, 'experimental'; - - -is $obj->restriction_changes, '-BbvI, +BstXI, -Fnu4HI, -TseI'; - -$obj->region('region'); -is $obj->region, 'region'; - -$obj->region_value('region_value'); -is $obj->region_value, 'region_value'; - -$obj->region_dist(-5); -is $obj->region_dist, -5; - -$obj->numbering('coding'); -is $obj->numbering, 'coding'; - -ok not $obj->CpG; - -$obj->mut_number(2); -is $obj->mut_number, 2; - - -ok defined ($obj2 = Bio::Variation::DNAMutation -> new - ('-mut_number' => 2)); - -is $obj2->mut_number, 2; - - -$obj->isMutation(1); -ok $obj->isMutation; - -$obj->add_Allele($a1); -$obj->add_Allele($a2); - -is scalar ($obj->each_Allele), 2; - - -$obj = Bio::Variation::DNAMutation->new - ('-start' => 23, - '-end' => 24, - '-length' => 2, - '-upStreamSeq' => 'gt', - '-dnStreamSeq' => 'at', - '-proof' => 'experimental', - '-isMutation' => 1, - '-mut_number' => 2 - ); - -is $obj->start(), 23; -is $obj->end(), 24; -is $obj->length(), 2; -is $obj->upStreamSeq(), 'gt'; -is $obj->dnStreamSeq(), 'at'; -is $obj->proof(), 'experimental'; -is $obj->mut_number(), 2; -ok $obj->isMutation; diff --git a/t/Variation/RNAChange.t b/t/Variation/RNAChange.t deleted file mode 100644 index cb64724b8..000000000 --- a/t/Variation/RNAChange.t +++ /dev/null @@ -1,110 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id$ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 31); - - use_ok('Bio::Variation::Allele'); - use_ok('Bio::Variation::RNAChange'); - use_ok('Bio::Variation::AAChange'); -} - -ok my $obj = Bio::Variation::RNAChange->new(); - -$obj->start(4); -is $obj->start, 4; - -$obj->end(4); -is $obj->end, 4; - -$obj->length(1); - -is $obj->length, 1; - -$obj->strand('1'); -is $obj->strand, '1'; - -is ($obj->primary_tag, 'Variation' ); - -$obj->source_tag('source'); -is ($obj->source_tag, 'source' ); - -$obj->frame(2); -is ($obj->frame, 2 ); - -$obj->score(2); -is ($obj->score, 2 ); - -#test gff string -#$obj->dna_mut('dna_mut'); -#if ($obj->dna_mut eq 'dna_mut' ) { -# print "ok 11\n"; -#} else { -# print "not ok 11\n"; -#} - -my $a1 = Bio::Variation::Allele->new(-seq => 'g'); -$obj->allele_ori($a1); - -is( $obj->allele_ori->seq, 'g' ); - -my $a2 = Bio::Variation::Allele->new('-seq' => 'a'); -$obj->allele_mut($a2); - -is($obj->allele_mut->seq, 'a' ); - -$obj->upStreamSeq('gaagattcagccaagctcaaggatg'); -is ($obj->upStreamSeq, 'gaagattcagccaagctcaaggatg' ); - -$obj->cds_end(1000); -is ($obj->cds_end, 1000 ); - -$obj->dnStreamSeq('aagtgcagttagggctgggaagggt'); -is ($obj->dnStreamSeq, 'aagtgcagttagggctgggaagggt' ); - -$obj->codon_pos(1); -is ($obj->codon_pos, 1 ); - -my $obj3 = Bio::Variation::AAChange->new(); -$obj3->start(2); -$obj->AAChange($obj3); -$obj3->allele_ori($a1); -$obj3->allele_mut($a2); - -is ($obj->label, 'missense' , "label is". $obj->label); - - -$obj->status('proven'); -is ($obj->status, 'proven' ); - -$obj->proof('experimental'); -is ($obj->proof, 'experimental' ); -is ($obj->restriction_changes, '-BccI' ); - -$obj->region('coding'); -is ($obj->region, 'coding' ); -$obj->numbering('coding'); -is ($obj->numbering, 'coding' ); - -is ($obj->codon_ori, 'gaa', "Codon_ori is |". $obj->codon_ori. "|"); - -is($obj->codon_mut, 'aaa' , "Codon_mut is |". $obj->codon_mut. "|"); - - -$obj->codon_pos(1); -is ($obj->codon_pos, 1 ); -is( $obj->codon_table, 1 ); - -$obj->codon_table(3); -is ( $obj->codon_table, 3 ); - -$obj->mut_number(2); -is ( $obj->mut_number, 2 ); - -$obj->verbose(2); -is ( $obj->verbose, 2 ); diff --git a/t/Variation/SNP.t b/t/Variation/SNP.t deleted file mode 100644 index 86cbe57eb..000000000 --- a/t/Variation/SNP.t +++ /dev/null @@ -1,34 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id$ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 13); - - use_ok('Bio::Variation::SNP'); -} - -my($a); - -# -# SNP -# - -ok $a = Bio::Variation::SNP->new(); -is $a->id('123'), 123; -eval { $a->di('123'); }; -ok $@; -is $a->validated('by-cluster'), 'by-cluster'; -my @alleles = ('A', 'T'); -is $a->validated(\@alleles), \@alleles; -is $a->desc('abc'), 'abc'; # Bio::Variation::Allele method -is $a->chromosome('X'), 'X'; # Bio::Variation::Allele method -ok my $s = $a->add_subsnp; -ok $s->is_subsnp; -is $s->handle('HGBASE'), 'HGBASE'; -ok $a->add_subsnp; -is $a->each_subsnp, 2; diff --git a/t/Variation/SeqDiff.t b/t/Variation/SeqDiff.t deleted file mode 100644 index f1c9710cd..000000000 --- a/t/Variation/SeqDiff.t +++ /dev/null @@ -1,99 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id$ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 44); - - use_ok('Bio::Variation::SeqDiff'); - use_ok('Bio::Variation::DNAMutation'); - use_ok('Bio::Variation::Allele'); -} - -my ($obj, $mm, $aa, $dna, $m); - -ok $obj = Bio::Variation::SeqDiff->new(); - -ok $obj->id('id'); -is $obj->id, 'id'; - -ok $obj->sysname('sysname'); -is $obj->sysname, 'sysname'; - -$obj->trivname('trivname'); -is $obj->trivname, 'trivname'; - -ok $obj->chromosome('chr'); -is $obj->chromosome, 'chr'; - -ok $obj->description('desc'); -is $obj->description, 'desc'; - -ok $obj->numbering('numbering'); -is $obj->numbering, 'numbering'; - -ok $obj->offset(100); -is $obj->offset, 100; -# 12345678901234567890 -ok $obj->dna_ori ('gctgctgatcgatcgtagctagctag'); -is $obj->dna_ori, 'gctgctgatcgatcgtagctagctag'; - -# generate mutated DNA seq from the mutation -ok $m = Bio::Variation::DNAMutation->new(-isMutation => 1, -start=>14, -end=>14); -ok $a = Bio::Variation::Allele->new(-seq=>'c'); -$b = Bio::Variation::Allele->new(-seq=>'g'); -ok $m->allele_ori($a); -ok $m->allele_mut($b); -ok $obj->add_Variant($m); -my $m2 = Bio::Variation::DNAMutation->new(-isMutation => 1, -start=>19, -end=>19); -my $a2 = Bio::Variation::Allele->new(-seq=>'c'); -my $b2 = Bio::Variation::Allele->new(-seq=>'g'); -$m2->allele_ori($a2); -$m2->allele_mut($b2); -$obj->add_Variant($m2); - -#ok $obj->dna_mut('gctgctgatcggtcgtagctagctag'); -is $obj->dna_mut, 'gctgctgatcgatggtaggtagctag'; - -ok $obj->rna_ori('gctgctgatcgatcgtagctagctag'); -is $obj->rna_ori, 'gctgctgatcgatcgtagctagctag'; - -$obj->rna_mut('gctgctgatcgatcgtagctagctag'); -is $obj->rna_mut, 'gctgctgatcgatcgtagctagctag'; - -ok $obj->aa_ori('MHYTRD'); -is $obj->aa_ori, 'MHYTRD'; - -ok $obj->aa_mut('MHGTRD'); -is $obj->aa_mut, 'MHGTRD'; - -foreach $mm ( $obj->each_Variant ) { - $mm->primary_tag('a'); - isa_ok($mm,'Bio::Variation::VariantI'); -} - - -ok $obj->gene_symbol('fos'); -is $obj->gene_symbol, 'fos'; - -ok $obj->rna_offset(10); -is $obj->rna_offset, 10; - -ok $obj->rna_id('transcript#3'); -is $obj->rna_id, 'transcript#3'; - -ok $dna = $obj->seqobj('dna_ori'); -isa_ok($dna,'Bio::PrimarySeq'); - -$obj->aa_mut(''); -$aa = $obj->seqobj('aa_mut'); -ok not defined $aa; - -eval { - $dna = $obj->seqobj('dna_ri'); -}; -ok $@; diff --git a/t/Variation/Variation_IO.t b/t/Variation/Variation_IO.t deleted file mode 100644 index 1e8bc59e8..000000000 --- a/t/Variation/Variation_IO.t +++ /dev/null @@ -1,104 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id$ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 26, - -requires_modules => ['Text::Wrap 98', 'XML::Writer']); - - use_ok('Bio::Variation::IO'); -} - -sub io { - my ($t_file, $o_file, $out_format) = @_; - my $res; - - my ($o_ext) = $out_format eq 'flat' ? 'dat' : 'xml'; - my ($o_format) = $out_format; - my ($t_name) = $t_file =~ /(.*)....$/; - - my( $before ); - { - local $/ = undef; - open my $BEFORE, '<', "$t_name.$o_ext" or die "Could not read file '$t_name.$o_ext': $!\n"; - $before = <$BEFORE>; - close $BEFORE; - } - - ok $before;#"Error in reading input file [$t_name.$o_ext]"; - - my $in = Bio::Variation::IO->new( -file => $t_file); - my @entries ; - while (my $e = $in->next) { - push @entries, $e; - } - my $count = scalar @entries; - cmp_ok @entries, '>', 0;# "No SeqDiff objects [$count]"; - - my $out = Bio::Variation::IO->new( -FILE => ">$o_file", - -FORMAT => $o_format); - my $out_ok = 1; - foreach my $e (@entries) { - $out->write($e) or $out_ok = 0; - } - undef($out); # Flush to disk - ok $out_ok;# "error writing variants"; - - my( $after ); - { - local $/ = undef; - open my $AFTER, '<', $o_file or die "Could not read file '$o_file': $!\n"; - $after = <$AFTER>; - close $AFTER; - } - - ok $after;# "Error in reading in again the output file [$o_file]"; - is $before, $after, "test output file compared to input"; - print STDERR `diff $t_file $o_file` if $before ne $after; -} - -io (test_input_file('mutations.dat'), - test_output_file(), 'flat'); #1..5 -io (test_input_file('polymorphism.dat'), - test_output_file(), 'flat'); #6..10 - -SKIP: { - test_skip(-tests => 15, -requires_modules => [qw(XML::Twig - XML::Writer - IO::String)]); - - eval { - if( $XML::Writer::VERSION >= 0.5 ) { - io (test_input_file('mutations.xml'), - test_output_file(), 'xml'); #10..12 - } else { - io (test_input_file('mutations.old.xml'), - test_output_file(), 'xml'); #10..12 - } - }; - - eval { - if( $XML::Writer::VERSION >= 0.5 ) { - io (test_input_file('polymorphism.xml'), - test_output_file(), 'xml'); #13..14 - } else { - io (test_input_file('polymorphism.old.xml'), - test_output_file(), 'xml'); #13..14 - - } - }; - - eval { - if( $XML::Writer::VERSION >= 0.5 ) { - io (test_input_file('mutations.dat'), - test_output_file(), 'xml'); #15..25 - } else { - io (test_input_file('mutations.old.dat'), - test_output_file(), 'xml'); #15..25 - } - }; -} diff --git a/t/data/mutations.dat b/t/data/mutations.dat deleted file mode 100644 index a60090b13..000000000 --- a/t/data/mutations.dat +++ /dev/null @@ -1,350 +0,0 @@ -ID M20132:(362)c.+4G>A; E2K -Feature DNA; 1 -Feature /label: point, transition -Feature /proof: computed -Feature /location: 4 -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g>a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature RNA; 1 -Feature /label: missense -Feature /proof: experimental -Feature /location: 4 (M20132::366) -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g>a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature /codon_table: 1 -Feature /codon: gaa>aaa; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: substitution, conservative -Feature /proof: computed -Feature /location: 2 -Feature /change: E>K -// -ID M20132:(362)c.+14T>A; L5X -Feature DNA; 1 -Feature /label: point, transversion -Feature /proof: computed -Feature /location: 14 -Feature /upflank: ccaagctcaaggatggaagtgcagt -Feature /change: t>a -Feature /dnflank: agggctgggaagggtctaccctcgg -Feature RNA; 1 -Feature /label: nonsense -Feature /proof: experimental -Feature /location: 14 (M20132::376) -Feature /upflank: ccaagctcaaggatggaagtgcagt -Feature /change: t>a -Feature /dnflank: agggctgggaagggtctaccctcgg -Feature /codon_table: 1 -Feature /codon: tta>taa; 2 -Feature /region: coding -Feature AA; 1 -Feature /label: truncation -Feature /proof: computed -Feature /location: 5 -Feature /change: L>* -// -ID M20132:(362)c.+4G>A; E2K -Feature DNA; 1 -Feature /label: point, transition -Feature /proof: computed -Feature /location: 4 -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g>a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature RNA; 1 -Feature /label: missense -Feature /proof: experimental -Feature /location: 4 (M20132::366) -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g>a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature /codon_table: 1 -Feature /codon: gaa>aaa; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: substitution, conservative -Feature /proof: computed -Feature /location: 2 -Feature /change: E>K -// -ID M20132:(362)c.+100delATCCAG; I34del-2 -Feature DNA; 1 -Feature /label: deletion -Feature /proof: computed -Feature /location: 100..105 -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: atccag> -Feature /dnflank: aacccgggccccaggcacccagagg -Feature /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV -Feature RNA; 1 -Feature /label: inframe, deletion -Feature /proof: experimental -Feature /location: 100..105 (M20132::462..467) -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: atccag> -Feature /dnflank: aacccgggccccaggcacccagagg -Feature /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV -Feature /codon_table: 1 -Feature /codon: atc>-; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: deletion -Feature /proof: computed -Feature /location: 34..35 -Feature /change: IQ> -// -ID M20132:(362)c.+101delT; I34delX172 -Feature DNA; 1 -Feature /label: deletion -Feature /proof: computed -Feature /location: 101 -Feature /upflank: ctgttccagagcgtgcgcgaagtga -Feature /change: t> -Feature /dnflank: ccagaacccgggccccaggcaccca -Feature /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I -Feature RNA; 1 -Feature /label: frameshift, deletion -Feature /proof: experimental -Feature /location: 101 (M20132::463) -Feature /upflank: ctgttccagagcgtgcgcgaagtga -Feature /change: t> -Feature /dnflank: ccagaacccgggccccaggcaccca -Feature /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I -Feature /codon_table: 1 -Feature /codon: atc>-; 2 -Feature /region: coding -Feature AA; 1 -Feature /label: out-of-frame translation, truncation -Feature /proof: computed -Feature /location: 34 -Feature /change: I>TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAP -Feature GSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPAR -Feature GCRSSCQHLRTRMTQLPHPRCPCWAPLSPA* -// -ID M20132:(362)c.+101insGGGCCC; I34ins+2 -Feature DNA; 1 -Feature /label: insertion -Feature /proof: computed -Feature /location: 100^101 -Feature /upflank: ctgttccagagcgtgcgcgaagtga -Feature /change: >gggccc -Feature /dnflank: tccagaacccgggccccaggcaccc -Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, -Feature +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, -Feature +SduI -Feature RNA; 1 -Feature /label: inframe, insertion -Feature /proof: experimental -Feature /location: 100^101 (M20132::462^463) -Feature /upflank: ctgttccagagcgtgcgcgaagtga -Feature /change: >gggccc -Feature /dnflank: tccagaacccgggccccaggcaccc -Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, -Feature +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, -Feature +SduI -Feature /codon_table: 1 -Feature /codon: atc>-; 2 -Feature /region: coding -Feature AA; 1 -Feature /label: insertion, complex -Feature /proof: computed -Feature /location: 34 -Feature /change: I>RAL -// -ID M20132:(362)c.+100insG; I34ins81X -Feature DNA; 1 -Feature /label: insertion -Feature /proof: computed -Feature /location: 99^100 -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: >g -Feature /dnflank: atccagaacccgggccccaggcacc -Feature /re_site: +BamHI, +BinI, +NlaIV, +XhoII -Feature RNA; 1 -Feature /label: frameshift, insertion -Feature /proof: experimental -Feature /location: 99^100 (M20132::461^462) -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: >g -Feature /dnflank: atccagaacccgggccccaggcacc -Feature /re_site: +BamHI, +BinI, +NlaIV, +XhoII -Feature /codon_table: 1 -Feature /codon: atc>-; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: out-of-frame translation, truncation -Feature /proof: computed -Feature /location: 34 -Feature /change: I>DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* -// -ID M20132:(362)c.+100AT>GGGCCC; I34ins82X -Feature DNA; 1 -Feature /label: complex -Feature /proof: computed -Feature /location: 100..101 -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: at>gggccc -Feature /dnflank: ccagaacccgggccccaggcaccca -Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, -Feature +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI -Feature RNA; 1 -Feature /label: frameshift, complex -Feature /proof: experimental -Feature /location: 100..101 (M20132::462..463) -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: at>gggccc -Feature /dnflank: ccagaacccgggccccaggcaccca -Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, -Feature +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI -Feature /codon_table: 1 -Feature /codon: atc>-; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: out-of-frame translation, truncation -Feature /proof: computed -Feature /location: 34 -Feature /change: I>GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* -// -ID M20132:(362+1)c.-1G>A -Feature DNA; 1 -Feature /label: point, transition -Feature /proof: computed -Feature /location: -1 -Feature /upflank: ggtggaagattcagccaagctcaag -Feature /change: g>a -Feature /dnflank: atggaagtgcagttagggctgggaa -Feature /re_site: -BccI, -FokI, +Hpy178III -Feature RNA; 1 -Feature /label: unknown -Feature /proof: experimental -Feature /location: -1 (M20132::361) -Feature /upflank: ggtggaagattcagccaagctcaag -Feature /change: g>a -Feature /dnflank: atggaagtgcagttagggctgggaa -Feature /re_site: -BccI, -FokI, +Hpy178III -Feature /region: 5'UTR -// -ID M20132:(362)c.+2766T>C -Feature DNA; 1 -Feature /label: point, transition -Feature /proof: computed -Feature /location: 2766 -Feature /upflank: tctatttccacacccagtgaagcat -Feature /change: t>c -Feature /dnflank: ggaaaccctatttccccaccccagc -Feature /re_site: +Hpy188I, +SfaNI, -XcmI -Feature RNA; 1 -Feature /label: unknown -Feature /proof: experimental -Feature /location: 2766 (M20132::3128) -Feature /upflank: tctatttccacacccagtgaagcat -Feature /change: t>c -Feature /dnflank: ggaaaccctatttccccaccccagc -Feature /re_site: +Hpy188I, +SfaNI, -XcmI -Feature /region: 3'UTR -// -ID J02933:(521)g.+12165A>G -Feature DNA; 1 -Feature /label: point, transition -Feature /proof: experimental -Feature /location: 12165 (J02933::12686) -Feature /upflank: cgcacacctgtggtgcctgccaccc -Feature /change: a>g -Feature /dnflank: ctgggttgcccatgattcatttttg -Feature /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -Feature -TspRI -Feature /region: 3'UTR; (+1027) -Feature RNA; 1 -Feature /label: unknown -Feature /proof: computed -Feature /location: 2428 -Feature /upflank: cgcacacctgtggtgcctgccaccc -Feature /change: a>g -Feature /dnflank: ctgggttgcccatgattcatttttg -Feature /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -Feature -TspRI -Feature /region: 3'UTR; (-1) -// -ID J02933:(521)g.+4G>T; V2F -Feature DNA; 1 -Feature /label: point, transversion -Feature /proof: computed -Feature /location: 4 (J02933::525) -Feature /upflank: gcagcactgcagagatttcatcatg -Feature /change: g>t -Feature /dnflank: tctcccaggccctcaggctcctctg -Feature /re_site: -BsmAI, -Eco31I -Feature /region: exon; 1 (+4) -Feature RNA; 1 -Feature /label: missense -Feature /proof: experimental -Feature /location: 4 -Feature /upflank: gcagcactgcagagatttcatcatg -Feature /change: g>t -Feature /dnflank: tctcccaggccctcaggctcctctg -Feature /re_site: -BsmAI, -Eco31I -Feature /codon_table: 1 -Feature /codon: gtc>ttc; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: substitution, nonconservative -Feature /proof: computed -Feature /location: 2 -Feature /change: V>F -// -ID J02933:(521)g.+1168G>T; D34Y -Feature DNA; 1 -Feature /label: point, transversion -Feature /proof: computed -Feature /location: 1168 (J02933::1689) -Feature /upflank: taaggcctcaggaggagaaacacgg -Feature /change: g>t -Feature /dnflank: acatgccgtggaagccggggcctca -Feature /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, -Feature +Tsp4CI -Feature /region: exon; 1 (-29) -Feature RNA; 1 -Feature /label: missense -Feature /proof: experimental -Feature /location: 100 -Feature /upflank: taaggcctcaggaggagaaacacgg -Feature /change: g>t -Feature /dnflank: acatgccgtggaagccggggcctca -Feature /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, -Feature +Tsp4CI -Feature /codon_table: 1 -Feature /codon: gac>tac; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: substitution, nonconservative -Feature /proof: computed -Feature /location: 34 -Feature /change: D>Y -// -ID J02933:(521+1)g.-4C>G -Feature DNA; 1 -Feature /label: point, transversion -Feature /proof: computed -Feature /location: -4 (J02933::518) -Feature /upflank: ggcaggggcagcactgcagagattt -Feature /change: c>g -Feature /dnflank: atcatggtctcccaggccctcaggc -Feature /re_site: +BclI, +DpnI, +MboI -Feature /region: 5'UTR; (-4) -Feature RNA; 1 -Feature /label: unknown -Feature /proof: experimental -Feature /location: -4 -Feature /upflank: ggcaggggcagcactgcagagattt -Feature /change: c>g -Feature /dnflank: atcatggtctcccaggccctcaggc -Feature /re_site: +BclI, +DpnI, +MboI -Feature /region: 5'UTR; (+31) -// diff --git a/t/data/mutations.old.dat b/t/data/mutations.old.dat deleted file mode 100644 index a60090b13..000000000 --- a/t/data/mutations.old.dat +++ /dev/null @@ -1,350 +0,0 @@ -ID M20132:(362)c.+4G>A; E2K -Feature DNA; 1 -Feature /label: point, transition -Feature /proof: computed -Feature /location: 4 -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g>a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature RNA; 1 -Feature /label: missense -Feature /proof: experimental -Feature /location: 4 (M20132::366) -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g>a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature /codon_table: 1 -Feature /codon: gaa>aaa; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: substitution, conservative -Feature /proof: computed -Feature /location: 2 -Feature /change: E>K -// -ID M20132:(362)c.+14T>A; L5X -Feature DNA; 1 -Feature /label: point, transversion -Feature /proof: computed -Feature /location: 14 -Feature /upflank: ccaagctcaaggatggaagtgcagt -Feature /change: t>a -Feature /dnflank: agggctgggaagggtctaccctcgg -Feature RNA; 1 -Feature /label: nonsense -Feature /proof: experimental -Feature /location: 14 (M20132::376) -Feature /upflank: ccaagctcaaggatggaagtgcagt -Feature /change: t>a -Feature /dnflank: agggctgggaagggtctaccctcgg -Feature /codon_table: 1 -Feature /codon: tta>taa; 2 -Feature /region: coding -Feature AA; 1 -Feature /label: truncation -Feature /proof: computed -Feature /location: 5 -Feature /change: L>* -// -ID M20132:(362)c.+4G>A; E2K -Feature DNA; 1 -Feature /label: point, transition -Feature /proof: computed -Feature /location: 4 -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g>a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature RNA; 1 -Feature /label: missense -Feature /proof: experimental -Feature /location: 4 (M20132::366) -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g>a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature /codon_table: 1 -Feature /codon: gaa>aaa; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: substitution, conservative -Feature /proof: computed -Feature /location: 2 -Feature /change: E>K -// -ID M20132:(362)c.+100delATCCAG; I34del-2 -Feature DNA; 1 -Feature /label: deletion -Feature /proof: computed -Feature /location: 100..105 -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: atccag> -Feature /dnflank: aacccgggccccaggcacccagagg -Feature /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV -Feature RNA; 1 -Feature /label: inframe, deletion -Feature /proof: experimental -Feature /location: 100..105 (M20132::462..467) -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: atccag> -Feature /dnflank: aacccgggccccaggcacccagagg -Feature /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV -Feature /codon_table: 1 -Feature /codon: atc>-; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: deletion -Feature /proof: computed -Feature /location: 34..35 -Feature /change: IQ> -// -ID M20132:(362)c.+101delT; I34delX172 -Feature DNA; 1 -Feature /label: deletion -Feature /proof: computed -Feature /location: 101 -Feature /upflank: ctgttccagagcgtgcgcgaagtga -Feature /change: t> -Feature /dnflank: ccagaacccgggccccaggcaccca -Feature /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I -Feature RNA; 1 -Feature /label: frameshift, deletion -Feature /proof: experimental -Feature /location: 101 (M20132::463) -Feature /upflank: ctgttccagagcgtgcgcgaagtga -Feature /change: t> -Feature /dnflank: ccagaacccgggccccaggcaccca -Feature /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I -Feature /codon_table: 1 -Feature /codon: atc>-; 2 -Feature /region: coding -Feature AA; 1 -Feature /label: out-of-frame translation, truncation -Feature /proof: computed -Feature /location: 34 -Feature /change: I>TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAP -Feature GSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPAR -Feature GCRSSCQHLRTRMTQLPHPRCPCWAPLSPA* -// -ID M20132:(362)c.+101insGGGCCC; I34ins+2 -Feature DNA; 1 -Feature /label: insertion -Feature /proof: computed -Feature /location: 100^101 -Feature /upflank: ctgttccagagcgtgcgcgaagtga -Feature /change: >gggccc -Feature /dnflank: tccagaacccgggccccaggcaccc -Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, -Feature +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, -Feature +SduI -Feature RNA; 1 -Feature /label: inframe, insertion -Feature /proof: experimental -Feature /location: 100^101 (M20132::462^463) -Feature /upflank: ctgttccagagcgtgcgcgaagtga -Feature /change: >gggccc -Feature /dnflank: tccagaacccgggccccaggcaccc -Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, -Feature +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, -Feature +SduI -Feature /codon_table: 1 -Feature /codon: atc>-; 2 -Feature /region: coding -Feature AA; 1 -Feature /label: insertion, complex -Feature /proof: computed -Feature /location: 34 -Feature /change: I>RAL -// -ID M20132:(362)c.+100insG; I34ins81X -Feature DNA; 1 -Feature /label: insertion -Feature /proof: computed -Feature /location: 99^100 -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: >g -Feature /dnflank: atccagaacccgggccccaggcacc -Feature /re_site: +BamHI, +BinI, +NlaIV, +XhoII -Feature RNA; 1 -Feature /label: frameshift, insertion -Feature /proof: experimental -Feature /location: 99^100 (M20132::461^462) -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: >g -Feature /dnflank: atccagaacccgggccccaggcacc -Feature /re_site: +BamHI, +BinI, +NlaIV, +XhoII -Feature /codon_table: 1 -Feature /codon: atc>-; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: out-of-frame translation, truncation -Feature /proof: computed -Feature /location: 34 -Feature /change: I>DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* -// -ID M20132:(362)c.+100AT>GGGCCC; I34ins82X -Feature DNA; 1 -Feature /label: complex -Feature /proof: computed -Feature /location: 100..101 -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: at>gggccc -Feature /dnflank: ccagaacccgggccccaggcaccca -Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, -Feature +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI -Feature RNA; 1 -Feature /label: frameshift, complex -Feature /proof: experimental -Feature /location: 100..101 (M20132::462..463) -Feature /upflank: tctgttccagagcgtgcgcgaagtg -Feature /change: at>gggccc -Feature /dnflank: ccagaacccgggccccaggcaccca -Feature /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, -Feature +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI -Feature /codon_table: 1 -Feature /codon: atc>-; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: out-of-frame translation, truncation -Feature /proof: computed -Feature /location: 34 -Feature /change: I>GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* -// -ID M20132:(362+1)c.-1G>A -Feature DNA; 1 -Feature /label: point, transition -Feature /proof: computed -Feature /location: -1 -Feature /upflank: ggtggaagattcagccaagctcaag -Feature /change: g>a -Feature /dnflank: atggaagtgcagttagggctgggaa -Feature /re_site: -BccI, -FokI, +Hpy178III -Feature RNA; 1 -Feature /label: unknown -Feature /proof: experimental -Feature /location: -1 (M20132::361) -Feature /upflank: ggtggaagattcagccaagctcaag -Feature /change: g>a -Feature /dnflank: atggaagtgcagttagggctgggaa -Feature /re_site: -BccI, -FokI, +Hpy178III -Feature /region: 5'UTR -// -ID M20132:(362)c.+2766T>C -Feature DNA; 1 -Feature /label: point, transition -Feature /proof: computed -Feature /location: 2766 -Feature /upflank: tctatttccacacccagtgaagcat -Feature /change: t>c -Feature /dnflank: ggaaaccctatttccccaccccagc -Feature /re_site: +Hpy188I, +SfaNI, -XcmI -Feature RNA; 1 -Feature /label: unknown -Feature /proof: experimental -Feature /location: 2766 (M20132::3128) -Feature /upflank: tctatttccacacccagtgaagcat -Feature /change: t>c -Feature /dnflank: ggaaaccctatttccccaccccagc -Feature /re_site: +Hpy188I, +SfaNI, -XcmI -Feature /region: 3'UTR -// -ID J02933:(521)g.+12165A>G -Feature DNA; 1 -Feature /label: point, transition -Feature /proof: experimental -Feature /location: 12165 (J02933::12686) -Feature /upflank: cgcacacctgtggtgcctgccaccc -Feature /change: a>g -Feature /dnflank: ctgggttgcccatgattcatttttg -Feature /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -Feature -TspRI -Feature /region: 3'UTR; (+1027) -Feature RNA; 1 -Feature /label: unknown -Feature /proof: computed -Feature /location: 2428 -Feature /upflank: cgcacacctgtggtgcctgccaccc -Feature /change: a>g -Feature /dnflank: ctgggttgcccatgattcatttttg -Feature /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -Feature -TspRI -Feature /region: 3'UTR; (-1) -// -ID J02933:(521)g.+4G>T; V2F -Feature DNA; 1 -Feature /label: point, transversion -Feature /proof: computed -Feature /location: 4 (J02933::525) -Feature /upflank: gcagcactgcagagatttcatcatg -Feature /change: g>t -Feature /dnflank: tctcccaggccctcaggctcctctg -Feature /re_site: -BsmAI, -Eco31I -Feature /region: exon; 1 (+4) -Feature RNA; 1 -Feature /label: missense -Feature /proof: experimental -Feature /location: 4 -Feature /upflank: gcagcactgcagagatttcatcatg -Feature /change: g>t -Feature /dnflank: tctcccaggccctcaggctcctctg -Feature /re_site: -BsmAI, -Eco31I -Feature /codon_table: 1 -Feature /codon: gtc>ttc; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: substitution, nonconservative -Feature /proof: computed -Feature /location: 2 -Feature /change: V>F -// -ID J02933:(521)g.+1168G>T; D34Y -Feature DNA; 1 -Feature /label: point, transversion -Feature /proof: computed -Feature /location: 1168 (J02933::1689) -Feature /upflank: taaggcctcaggaggagaaacacgg -Feature /change: g>t -Feature /dnflank: acatgccgtggaagccggggcctca -Feature /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, -Feature +Tsp4CI -Feature /region: exon; 1 (-29) -Feature RNA; 1 -Feature /label: missense -Feature /proof: experimental -Feature /location: 100 -Feature /upflank: taaggcctcaggaggagaaacacgg -Feature /change: g>t -Feature /dnflank: acatgccgtggaagccggggcctca -Feature /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, -Feature +Tsp4CI -Feature /codon_table: 1 -Feature /codon: gac>tac; 1 -Feature /region: coding -Feature AA; 1 -Feature /label: substitution, nonconservative -Feature /proof: computed -Feature /location: 34 -Feature /change: D>Y -// -ID J02933:(521+1)g.-4C>G -Feature DNA; 1 -Feature /label: point, transversion -Feature /proof: computed -Feature /location: -4 (J02933::518) -Feature /upflank: ggcaggggcagcactgcagagattt -Feature /change: c>g -Feature /dnflank: atcatggtctcccaggccctcaggc -Feature /re_site: +BclI, +DpnI, +MboI -Feature /region: 5'UTR; (-4) -Feature RNA; 1 -Feature /label: unknown -Feature /proof: experimental -Feature /location: -4 -Feature /upflank: ggcaggggcagcactgcagagattt -Feature /change: c>g -Feature /dnflank: atcatggtctcccaggccctcaggc -Feature /re_site: +BclI, +DpnI, +MboI -Feature /region: 5'UTR; (+31) -// diff --git a/t/data/mutations.old.xml b/t/data/mutations.old.xml deleted file mode 100644 index d7f84709e..000000000 --- a/t/data/mutations.old.xml +++ /dev/null @@ -1,402 +0,0 @@ - - - - - - computed - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - -BccI - - - - experimental - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - - -BccI - coding - - - - - computed - E - K - - - - - - - - computed - ccaagctcaaggatggaagtgcagt - t - a - agggctgggaagggtctaccctcgg - - - - experimental - ccaagctcaaggatggaagtgcagt - t - a - agggctgggaagggtctaccctcgg - - coding - - - - computed - L - * - - - - - - - - computed - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - -BccI - - - - experimental - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - - -BccI - coding - - - - - computed - E - K - - - - - - - computed - tctgttccagagcgtgcgcgaagtg - atccag - - aacccgggccccaggcacccagagg - -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV - - - - - experimental - tctgttccagagcgtgcgcgaagtg - atccag - - aacccgggccccaggcacccagagg - - -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV - coding - - - - computed - IQ - - - - - - - - computed - ctgttccagagcgtgcgcgaagtga - t - - ccagaacccgggccccaggcaccca - -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I - - - - - experimental - ctgttccagagcgtgcgcgaagtga - t - - ccagaacccgggccccaggcaccca - - -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I - coding - - - - - computed - I - TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAPGSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPARGCRSSCQHLRTRMTQLPHPRCPCWAPLSPA* - - - - - - - computed - ctgttccagagcgtgcgcgaagtga - - gggccc - tccagaacccgggccccaggcaccc - +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI - - - - - experimental - ctgttccagagcgtgcgcgaagtga - - gggccc - tccagaacccgggccccaggcaccc - - +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI - coding - - - - - computed - I - RAL - - - - - - - computed - tctgttccagagcgtgcgcgaagtg - - g - atccagaacccgggccccaggcacc - +BamHI, +BinI, +NlaIV, +XhoII - - - - - experimental - tctgttccagagcgtgcgcgaagtg - - g - atccagaacccgggccccaggcacc - - +BamHI, +BinI, +NlaIV, +XhoII - coding - - - - - computed - I - DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* - - - - - - - computed - tctgttccagagcgtgcgcgaagtg - at - gggccc - ccagaacccgggccccaggcaccca - +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI - - - - - experimental - tctgttccagagcgtgcgcgaagtg - at - gggccc - ccagaacccgggccccaggcaccca - - +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI - coding - - - - - computed - I - GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* - - - - - - - - computed - ggtggaagattcagccaagctcaag - g - a - atggaagtgcagttagggctgggaa - -BccI, -FokI, +Hpy178III - - - - experimental - ggtggaagattcagccaagctcaag - g - a - atggaagtgcagttagggctgggaa - -BccI, -FokI, +Hpy178III - 5'UTR - - - - - - - - computed - tctatttccacacccagtgaagcat - t - c - ggaaaccctatttccccaccccagc - +Hpy188I, +SfaNI, -XcmI - - - - experimental - tctatttccacacccagtgaagcat - t - c - ggaaaccctatttccccaccccagc - +Hpy188I, +SfaNI, -XcmI - 3'UTR - - - - - - - - experimental - cgcacacctgtggtgcctgccaccc - a - g - ctgggttgcccatgattcatttttg - +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI - 3'UTR - - - - computed - cgcacacctgtggtgcctgccaccc - a - g - ctgggttgcccatgattcatttttg - +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI - 3'UTR - - - - - - - - computed - gcagcactgcagagatttcatcatg - g - t - tctcccaggccctcaggctcctctg - -BsmAI, -Eco31I - exon - - - - experimental - gcagcactgcagagatttcatcatg - g - t - tctcccaggccctcaggctcctctg - - -BsmAI, -Eco31I - coding - - - - - computed - V - F - - - - - - - - computed - taaggcctcaggaggagaaacacgg - g - t - acatgccgtggaagccggggcctca - -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI - exon - - - - experimental - taaggcctcaggaggagaaacacgg - g - t - acatgccgtggaagccggggcctca - - -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI - coding - - - - - computed - D - Y - - - - - - - - computed - ggcaggggcagcactgcagagattt - c - g - atcatggtctcccaggccctcaggc - +BclI, +DpnI, +MboI - 5'UTR - - - - experimental - ggcaggggcagcactgcagagattt - c - g - atcatggtctcccaggccctcaggc - +BclI, +DpnI, +MboI - 5'UTR - - diff --git a/t/data/mutations.xml b/t/data/mutations.xml deleted file mode 100644 index f1e77a957..000000000 --- a/t/data/mutations.xml +++ /dev/null @@ -1,388 +0,0 @@ - - - - - computed - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - -BccI - - - - experimental - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - - -BccI - coding - - - - - computed - E - K - - - - - - - computed - ccaagctcaaggatggaagtgcagt - t - a - agggctgggaagggtctaccctcgg - - - - experimental - ccaagctcaaggatggaagtgcagt - t - a - agggctgggaagggtctaccctcgg - - coding - - - - computed - L - * - - - - - - - computed - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - -BccI - - - - experimental - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - - -BccI - coding - - - - - computed - E - K - - - - - - computed - tctgttccagagcgtgcgcgaagtg - atccag - - aacccgggccccaggcacccagagg - -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV - - - - - experimental - tctgttccagagcgtgcgcgaagtg - atccag - - aacccgggccccaggcacccagagg - - -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV - coding - - - - computed - IQ - - - - - - - computed - ctgttccagagcgtgcgcgaagtga - t - - ccagaacccgggccccaggcaccca - -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I - - - - - experimental - ctgttccagagcgtgcgcgaagtga - t - - ccagaacccgggccccaggcaccca - - -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I - coding - - - - - computed - I - TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAPGSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPARGCRSSCQHLRTRMTQLPHPRCPCWAPLSPA* - - - - - - computed - ctgttccagagcgtgcgcgaagtga - - gggccc - tccagaacccgggccccaggcaccc - +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI - - - - - experimental - ctgttccagagcgtgcgcgaagtga - - gggccc - tccagaacccgggccccaggcaccc - - +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI - coding - - - - - computed - I - RAL - - - - - - computed - tctgttccagagcgtgcgcgaagtg - - g - atccagaacccgggccccaggcacc - +BamHI, +BinI, +NlaIV, +XhoII - - - - - experimental - tctgttccagagcgtgcgcgaagtg - - g - atccagaacccgggccccaggcacc - - +BamHI, +BinI, +NlaIV, +XhoII - coding - - - - - computed - I - DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* - - - - - - computed - tctgttccagagcgtgcgcgaagtg - at - gggccc - ccagaacccgggccccaggcaccca - +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI - - - - - experimental - tctgttccagagcgtgcgcgaagtg - at - gggccc - ccagaacccgggccccaggcaccca - - +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI - coding - - - - - computed - I - GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD* - - - - - - - computed - ggtggaagattcagccaagctcaag - g - a - atggaagtgcagttagggctgggaa - -BccI, -FokI, +Hpy178III - - - - experimental - ggtggaagattcagccaagctcaag - g - a - atggaagtgcagttagggctgggaa - -BccI, -FokI, +Hpy178III - 5'UTR - - - - - - - computed - tctatttccacacccagtgaagcat - t - c - ggaaaccctatttccccaccccagc - +Hpy188I, +SfaNI, -XcmI - - - - experimental - tctatttccacacccagtgaagcat - t - c - ggaaaccctatttccccaccccagc - +Hpy188I, +SfaNI, -XcmI - 3'UTR - - - - - - - experimental - cgcacacctgtggtgcctgccaccc - a - g - ctgggttgcccatgattcatttttg - +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI - 3'UTR - - - - computed - cgcacacctgtggtgcctgccaccc - a - g - ctgggttgcccatgattcatttttg - +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI - 3'UTR - - - - - - - computed - gcagcactgcagagatttcatcatg - g - t - tctcccaggccctcaggctcctctg - -BsmAI, -Eco31I - exon - - - - experimental - gcagcactgcagagatttcatcatg - g - t - tctcccaggccctcaggctcctctg - - -BsmAI, -Eco31I - coding - - - - - computed - V - F - - - - - - - computed - taaggcctcaggaggagaaacacgg - g - t - acatgccgtggaagccggggcctca - -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI - exon - - - - experimental - taaggcctcaggaggagaaacacgg - g - t - acatgccgtggaagccggggcctca - - -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI - coding - - - - - computed - D - Y - - - - - - - computed - ggcaggggcagcactgcagagattt - c - g - atcatggtctcccaggccctcaggc - +BclI, +DpnI, +MboI - 5'UTR - - - - experimental - ggcaggggcagcactgcagagattt - c - g - atcatggtctcccaggccctcaggc - +BclI, +DpnI, +MboI - 5'UTR - - diff --git a/t/data/polymorphism.dat b/t/data/polymorphism.dat deleted file mode 100644 index a60f7ea47..000000000 --- a/t/data/polymorphism.dat +++ /dev/null @@ -1,74 +0,0 @@ -ID M20132:(362)[c.+4G|A|T;c.+31C|A]; [E2|K|X;Q11|K] -Feature DNA; 1.1 -Feature /label: point, transition -Feature /proof: computed -Feature /location: 4 -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g|a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature DNA; 1.2 -Feature /label: point, transversion -Feature /proof: computed -Feature /location: 4 -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g|t -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature RNA; 1.1 -Feature /label: missense -Feature /proof: experimental -Feature /location: 4 (M20132::366) -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g|a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature /codon_table: 1 -Feature /codon: gaa|aaa; 1 -Feature /region: coding -Feature RNA; 1.2 -Feature /label: nonsense -Feature /proof: experimental -Feature /location: 4 (M20132::366) -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: g|t -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -BccI -Feature /codon_table: 1 -Feature /codon: gaa|taa; 1 -Feature /region: coding -Feature AA; 1.1 -Feature /label: substitution, conservative -Feature /proof: computed -Feature /location: 2 -Feature /change: E|K -Feature AA; 1.2 -Feature /label: truncation -Feature /proof: computed -Feature /location: 2 -Feature /change: E|* -Feature DNA; 2 -Feature /label: point, transversion -Feature /proof: computed -Feature /location: 31 -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: c|a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -CviRI, -SfaNI -Feature RNA; 2 -Feature /label: missense -Feature /proof: experimental -Feature /location: 31 (M20132::393) -Feature /upflank: gaagattcagccaagctcaaggatg -Feature /change: c|a -Feature /dnflank: aagtgcagttagggctgggaagggt -Feature /re_site: -CviRI, -SfaNI -Feature /codon_table: 1 -Feature /codon: caa|aaa; 1 -Feature /region: coding -Feature AA; 2 -Feature /label: substitution, conservative -Feature /proof: computed -Feature /location: 11 -Feature /change: Q|K -// diff --git a/t/data/polymorphism.old.xml b/t/data/polymorphism.old.xml deleted file mode 100644 index 3e3fec567..000000000 --- a/t/data/polymorphism.old.xml +++ /dev/null @@ -1,86 +0,0 @@ - - - - - - computed - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - -BccI - - - - - computed - gaagattcagccaagctcaaggatg - g - t - aagtgcagttagggctgggaagggt - -BccI - - - - experimental - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - - -BccI - coding - - - - experimental - gaagattcagccaagctcaaggatg - g - t - aagtgcagttagggctgggaagggt - - -BccI - coding - - - - - computed - E - K - - - - computed - E - * - - - - - computed - gaagattcagccaagctcaaggatg - c - a - aagtgcagttagggctgggaagggt - -CviRI, -SfaNI - - - - experimental - gaagattcagccaagctcaaggatg - c - a - aagtgcagttagggctgggaagggt - - -CviRI, -SfaNI - coding - - - - - computed - Q - K - - diff --git a/t/data/polymorphism.xml b/t/data/polymorphism.xml deleted file mode 100644 index edd3ae66b..000000000 --- a/t/data/polymorphism.xml +++ /dev/null @@ -1,85 +0,0 @@ - - - - - computed - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - -BccI - - - - - computed - gaagattcagccaagctcaaggatg - g - t - aagtgcagttagggctgggaagggt - -BccI - - - - experimental - gaagattcagccaagctcaaggatg - g - a - aagtgcagttagggctgggaagggt - - -BccI - coding - - - - experimental - gaagattcagccaagctcaaggatg - g - t - aagtgcagttagggctgggaagggt - - -BccI - coding - - - - - computed - E - K - - - - computed - E - * - - - - - computed - gaagattcagccaagctcaaggatg - c - a - aagtgcagttagggctgggaagggt - -CviRI, -SfaNI - - - - experimental - gaagattcagccaagctcaaggatg - c - a - aagtgcagttagggctgggaagggt - - -CviRI, -SfaNI - coding - - - - - computed - Q - K - - -- 2.11.4.GIT