Bio::Tools::CodonTable and Bio::Tools::IUPAC: prepare with dzil.
[bioperl-live.git] / lib / Bio / Tools / CodonTable.pm
blobbdf8a836c895e0c98d667558da7b29fe8939a326
1 package Bio::Tools::CodonTable;
3 use utf8;
4 use strict;
5 use warnings;
7 use Bio::Tools::IUPAC;
8 use Bio::SeqUtils;
10 use base qw(Bio::Root::Root);
12 # ABSTRACT: Codon table object
13 # AUTHOR: Heikki Lehvaslaiho <heikki@bioperl.org>
14 # OWNER: Heikki Lehvaslaiho <heikki@bioperl.org>
15 # LICENSE: Perl_5
17 =head1 SYNOPSIS
19 # This is a read-only class for all known codon tables. The IDs are
20 # the ones used by nucleotide sequence databases. All common IUPAC
21 # ambiguity codes for DNA, RNA and amino acids are recognized.
23 use Bio::Tools::CodonTable;
25 # defaults to ID 1 "Standard"
26 $myCodonTable = Bio::Tools::CodonTable->new();
27 $myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 );
29 # change codon table
30 $myCodonTable->id(5);
32 # examine codon table
33 print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
34 "is:", $myCodonTable->name(), "\n");
36 # print possible codon tables
37 $tables = Bio::Tools::CodonTable->tables;
38 while ( ($id,$name) = each %{$tables} ) {
39 print "$id = $name\n";
42 # translate a codon
43 $aa = $myCodonTable->translate('ACU');
44 $aa = $myCodonTable->translate('act');
45 $aa = $myCodonTable->translate('ytr');
47 # reverse translate an amino acid
48 @codons = $myCodonTable->revtranslate('A');
49 @codons = $myCodonTable->revtranslate('Ser');
50 @codons = $myCodonTable->revtranslate('Glx');
51 @codons = $myCodonTable->revtranslate('cYS', 'rna');
53 # reverse translate an entire amino acid sequence into a IUPAC
54 # nucleotide string
56 my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
57 my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
59 # boolean tests
60 print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
61 print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar');
62 print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
64 =head1 DESCRIPTION
66 Codon tables are also called translation tables or genetic codes
67 since that is what they represent. A bit more complete picture
68 of the full complexity of codon usage in various taxonomic groups
69 is presented at the NCBI Genetic Codes Home page.
71 CodonTable is a BioPerl class that knows all current translation
72 tables that are used by primary nucleotide sequence databases
73 (GenBank, EMBL and DDBJ). It provides methods to output information
74 about tables and relationships between codons and amino acids.
76 This class and its methods recognized all common IUPAC ambiguity codes
77 for DNA, RNA and animo acids. The translation method follows the
78 conventions in EMBL and TREMBL databases.
80 It is a nuisance to separate RNA and cDNA representations of nucleic
81 acid transcripts. The CodonTable object accepts codons of both type as
82 input and allows the user to set the mode for output when reverse
83 translating. Its default for output is DNA.
85 Note:
87 This class deals primarily with individual codons and amino
88 acids. However in the interest of speed you can L<translate>
89 longer sequence, too. The full complexity of protein translation
90 is tackled by L<Bio::PrimarySeqI::translate>.
93 The amino acid codes are IUPAC recommendations for common amino acids:
95 A Ala Alanine
96 R Arg Arginine
97 N Asn Asparagine
98 D Asp Aspartic acid
99 C Cys Cysteine
100 Q Gln Glutamine
101 E Glu Glutamic acid
102 G Gly Glycine
103 H His Histidine
104 I Ile Isoleucine
105 L Leu Leucine
106 K Lys Lysine
107 M Met Methionine
108 F Phe Phenylalanine
109 P Pro Proline
110 O Pyl Pyrrolysine (22nd amino acid)
111 U Sec Selenocysteine (21st amino acid)
112 S Ser Serine
113 T Thr Threonine
114 W Trp Tryptophan
115 Y Tyr Tyrosine
116 V Val Valine
117 B Asx Aspartic acid or Asparagine
118 Z Glx Glutamine or Glutamic acid
119 J Xle Isoleucine or Valine (mass spec ambiguity)
120 X Xaa Any or unknown amino acid
123 It is worth noting that, "Bacterial" codon table no. 11 produces an
124 polypeptide that is, confusingly, identical to the standard one. The
125 only differences are in available initiator codons.
128 NCBI Genetic Codes home page:
129 (Last update of the Genetic Codes: Apr. 25, 2024)
130 https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
132 The "value notation" / "print form" ASN.1 version is at:
133 ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
135 Thanks to Matteo diTomasso for the original Perl implementation
136 of these tables.
138 =cut
141 # set internal values for all translation tables
142 use constant CODONSIZE => 3;
143 our $GAP = '-';
144 our $CODONGAP = $GAP x CODONSIZE;
145 our %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
146 our %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
147 our %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
148 our $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
149 our $TERMINATOR = '*';
151 our (@NAMES, @TABLES, @STARTS);
152 # Parse the ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt file which
153 # is below __DATA__ in this module (see the end of the file). This
154 # fills the @NAMES, @TABLES, and @STARTS variables. To update to a
155 # new release of gc.prt, replace the content below __DATA__.
157 # Init tables has with special option (id=0) for ATG-only start
158 my %tables = (
159 0 => {
160 name => "Strict",
161 ncbieaa => "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
162 sncbieaa => "----------**--*--------------------M----------------------------",
166 while (defined(my $line = <DATA>)) {
167 next if $line =~ /^\s*--/; # skip comment lines
168 if ($line =~ /^\s*\{\s*$/) { # start of a table description
169 my $name = "";
170 my $id = 0;
171 my $ncbieaa = "";
172 my $sncbieaa = "";
173 do {
174 if ($line =~ /^\s*(name|id|ncbieaa|sncbieaa)\s+(.+)/) {
175 my $key = $1;
176 my $rem = $2;
177 if ($key eq "id") {
178 $rem =~ /^(\d+)/;
179 $id = int $1;
180 } else {
181 # The remaining keys --- name, ncbieaa, and
182 # sncbieaa --- are strings which may be
183 # multi-line (e.g., name for table with id 4).
184 # We are assuming that there is no " character
185 # inside the value so we keep appending lines
186 # until we find an end ".
187 while ($rem !~ /^"(.*)"/ && ! eof DATA) {
188 $rem .= <DATA>;
190 $rem =~ s/\n//g;
191 $rem =~ /^"(.*)"/;
192 my $str = $1;
193 if ($key eq "name" && ! $name) {
194 # ignore alternative names, e.g. SGC0,
195 # only keep the first name listed.
196 $name = $str;
197 } elsif ($key eq "ncbieaa") {
198 $ncbieaa = $str;
199 } elsif ($key eq "sncbieaa") {
200 $sncbieaa = $str;
204 } until (($line = <DATA>) =~ /^\s*}\s*,?$/); # we reached the end of table description
205 $tables{$id} = {
206 name => $name,
207 ncbieaa => $ncbieaa,
208 sncbieaa => $sncbieaa
212 close DATA;
213 # use Data::Dumper;
214 # print Dumper %tables;
216 # After parsing gc.prt, fill in @NAMES, @TABLES, and @STARTS
217 my $highest_id = (sort {$a <=> $b} keys %tables)[-1];
218 for (my $i = 0; $i < $highest_id; $i++) {
219 if (defined $tables{$i}) {
220 push @NAMES, $tables{$i}->{name};
221 push @TABLES, $tables{$i}->{ncbieaa};
222 push @STARTS, $tables{$i}->{sncbieaa};
223 } else {
224 push @NAMES, '';
225 push @TABLES, '';
226 push @STARTS, '';
231 our ($TRCOL, $CODONS);
233 my @nucs = qw(t c a g);
234 my $x = 0;
235 ($CODONS, $TRCOL) = ({}, {});
236 for my $i (@nucs) {
237 for my $j (@nucs) {
238 for my $k (@nucs) {
239 my $codon = "$i$j$k";
240 $CODONS->{$codon} = $x;
241 $TRCOL->{$x} = $codon;
242 $x++;
248 sub new {
249 my($class,@args) = @_;
250 my $self = $class->SUPER::new(@args);
252 my($id) =
253 $self->_rearrange([qw(ID
255 @args);
257 $id = 1 if ( ! defined ( $id ) );
258 $id && $self->id($id);
259 return $self; # success - we hope!
262 =head2 id
264 Title : id
265 Usage : $obj->id(3); $id_integer = $obj->id();
266 Function: Sets or returns the id of the translation table. IDs are
267 integers from 0 (special ATG-only start) to 25, excluding
268 7-8 and 17-20 which have been removed. If an invalid ID is
269 given the method returns 1, the standard table.
270 Example :
271 Returns : value of id, a scalar, warn and fall back to 1 (standard table)
272 if specified id is not valid
273 Args : newvalue (optional)
275 =cut
277 sub id{
278 my ($self,$value) = @_;
279 if( defined $value) {
280 if ( not defined $TABLES[$value] or $TABLES[$value] eq '') {
281 $self->warn("Not a valid codon table ID [$value], using [1] instead ");
282 $value = 1;
284 $self->{'id'} = $value;
286 return $self->{'id'};
289 =head2 name
291 Title : name
292 Usage : $obj->name()
293 Function: returns the descriptive name of the translation table
294 Example :
295 Returns : A string
296 Args : None
299 =cut
301 sub name{
302 my ($self) = @_;
304 my ($id) = $self->{'id'};
305 return $NAMES[$id];
308 =head2 tables
310 Title : tables
311 Usage : $obj->tables() or Bio::Tools::CodonTable->tables()
312 Function: returns a hash reference where each key is a valid codon
313 table id() number, and each value is the corresponding
314 codon table name() string
315 Example :
316 Returns : A hashref
317 Args : None
320 =cut
322 sub tables{
323 my %tables;
324 for my $id (0 .. $#NAMES) {
325 my $name = $NAMES[$id];
326 $tables{$id} = $name if $name;
328 return \%tables;
331 =head2 translate
333 Title : translate
334 Usage : $obj->translate('YTR')
335 Function: Returns a string of one letter amino acid codes from
336 nucleotide sequence input. The imput can be of any length.
338 Returns 'X' for unknown codons and codons that code for
339 more than one amino acid. Returns an empty string if input
340 is not three characters long. Exceptions for these are:
342 - IUPAC amino acid code B for Aspartic Acid and
343 Asparagine, is used.
344 - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
345 used.
346 - if the codon is two nucleotides long and if by adding
347 an a third character 'N', it codes for a single amino
348 acid (with exceptions above), return that, otherwise
349 return empty string.
351 Returns empty string for other input strings that are not
352 three characters long.
354 Example :
355 Returns : a string of one letter ambiguous IUPAC amino acid codes
356 Args : ambiguous IUPAC nucleotide string
359 =cut
361 sub translate {
362 my ($self, $seq, $complete_codon) = @_;
363 $self->throw("Calling translate without a seq argument!") unless defined $seq;
364 return '' unless $seq;
366 my $id = $self->id;
367 my ($partial) = 0;
368 $partial = 2 if length($seq) % CODONSIZE == 2;
370 $seq = lc $seq;
371 $seq =~ tr/u/t/;
372 my $protein = "";
373 if ($seq =~ /[^actg]/ ) { #ambiguous chars
374 for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
375 my $triplet = substr($seq, $i, CODONSIZE);
376 if( $triplet eq $CODONGAP ) {
377 $protein .= $GAP;
378 } elsif (exists $CODONS->{$triplet}) {
379 $protein .= substr($TABLES[$id],
380 $CODONS->{$triplet},1);
381 } else {
382 $protein .= $self->_translate_ambiguous_codon($triplet);
385 } else { # simple, strict translation
386 for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
387 my $triplet = substr($seq, $i, CODONSIZE);
388 if( $triplet eq $CODONGAP ) {
389 $protein .= $GAP;
391 if (exists $CODONS->{$triplet}) {
392 $protein .= substr($TABLES[$id], $CODONS->{$triplet}, 1);
393 } else {
394 $protein .= 'X';
398 if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides
399 my $triplet = substr($seq, ($partial -4)). "n";
400 if( $triplet eq $CODONGAP ) {
401 $protein .= $GAP;
402 } elsif (exists $CODONS->{$triplet}) {
403 my $aa = substr($TABLES[$id], $CODONS->{$triplet},1);
404 $protein .= $aa;
405 } else {
406 $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
409 return $protein;
412 sub _translate_ambiguous_codon {
413 my ($self, $triplet, $partial) = @_;
414 $partial ||= 0;
415 my $id = $self->id;
416 my $aa;
417 my @codons = $self->unambiguous_codons($triplet);
418 my %aas =();
419 foreach my $codon (@codons) {
420 $aas{substr($TABLES[$id],$CODONS->{$codon},1)} = 1;
422 my $count = scalar keys %aas;
423 if ( $count == 1 ) {
424 $aa = (keys %aas)[0];
426 elsif ( $count == 2 ) {
427 if ($aas{'D'} and $aas{'N'}) {
428 $aa = 'B';
430 elsif ($aas{'E'} and $aas{'Q'}) {
431 $aa = 'Z';
432 } else {
433 $partial ? ($aa = '') : ($aa = 'X');
435 } else {
436 $partial ? ($aa = '') : ($aa = 'X');
438 return $aa;
441 =head2 translate_strict
443 Title : translate_strict
444 Usage : $obj->translate_strict('ACT')
445 Function: returns one letter amino acid code for a codon input
447 Fast and simple translation. User is responsible to resolve
448 ambiguous nucleotide codes before calling this
449 method. Returns 'X' for unknown codons and an empty string
450 for input strings that are not three characters long.
452 It is not recommended to use this method in a production
453 environment. Use method translate, instead.
455 Example :
456 Returns : A string
457 Args : a codon = a three nucleotide character string
460 =cut
462 sub translate_strict{
463 my ($self, $value) = @_;
464 my $id = $self->{'id'};
466 $value = lc $value;
467 $value =~ tr/u/t/;
469 return '' unless length $value == 3;
471 return 'X' unless defined $CODONS->{$value};
473 return substr( $TABLES[$id], $CODONS->{$value}, 1 );
476 =head2 revtranslate
478 Title : revtranslate
479 Usage : $obj->revtranslate('G')
480 Function: returns codons for an amino acid
482 Returns an empty string for unknown amino acid
483 codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and
484 Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
485 letter amino acid codes are accepted. '*' and 'Ter' are
486 used for terminator.
488 By default, the output codons are shown in DNA. If the
489 output is needed in RNA (tr/t/u/), add a second argument
490 'RNA'.
492 Example : $obj->revtranslate('Gly', 'RNA')
493 Returns : An array of three lower case letter strings i.e. codons
494 Args : amino acid, 'RNA'
496 =cut
498 sub revtranslate {
499 my ($self, $value, $coding) = @_;
500 my @codons;
502 if (length($value) == 3 ) {
503 $value = lc $value;
504 $value = ucfirst $value;
505 $value = $THREELETTERSYMBOLS{$value};
507 if ( defined $value and $value =~ /$VALID_PROTEIN/
508 and length($value) == 1
510 my $id = $self->{'id'};
512 $value = uc $value;
513 my @aas = @{$IUPAC_AA{$value}};
514 foreach my $aa (@aas) {
515 #print $aa, " -2\n";
516 $aa = '\*' if $aa eq '*';
517 while ($TABLES[$id] =~ m/$aa/g) {
518 my $p = pos $TABLES[$id];
519 push (@codons, $TRCOL->{--$p});
524 if ($coding and uc ($coding) eq 'RNA') {
525 for my $i (0..$#codons) {
526 $codons[$i] =~ tr/t/u/;
530 return @codons;
533 =head2 reverse_translate_all
535 Title : reverse_translate_all
536 Usage : my $iup_str = $cttable->reverse_translate_all($seq_object)
537 my $iup_str = $cttable->reverse_translate_all($seq_object,
538 $cutable,
539 15);
540 Function: reverse translates a protein sequence into IUPAC nucleotide
541 sequence. An 'X' in the protein sequence is converted to 'NNN'
542 in the nucleotide sequence.
543 Returns : a string
544 Args : a Bio::PrimarySeqI compatible object (mandatory)
545 a Bio::CodonUsage::Table object and a threshold if only
546 codons with a relative frequency above the threshold are
547 to be considered.
548 =cut
550 sub reverse_translate_all {
551 my ($self, $obj, $cut, $threshold) = @_;
553 ## check args are OK
555 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
556 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
557 ref($obj) . "]");
559 if($obj->alphabet ne 'protein') {
560 $self->throw("Cannot reverse translate, need an amino acid sequence .".
561 "This sequence is of type [" . $obj->alphabet ."]");
563 my @data;
564 my @seq = split '', $obj->seq;
566 ## if we're not supplying a codon usage table...
567 if( !$cut && !$threshold) {
568 ## get lists of possible codons for each aa.
569 for my $aa (@seq) {
570 if ($aa =~ /x/i) {
571 push @data, (['NNN']);
572 }else {
573 my @cods = $self->revtranslate($aa);
574 push @data, \@cods;
577 }else{
578 #else we are supplying a codon usage table, we just want common codons
579 #check args first.
580 if(!$cut->isa('Bio::CodonUsage::Table')) {
581 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
582 ref($cut). "].");
584 my $cod_ref = $cut->probable_codons($threshold);
585 for my $aa (@seq) {
586 if ($aa =~ /x/i) {
587 push @data, (['NNN']);
588 next;
590 push @data, $cod_ref->{$aa};
594 return $self->_make_iupac_string(\@data);
597 =head2 reverse_translate_best
599 Title : reverse_translate_best
600 Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
601 Function: Reverse translates a protein sequence into plain nucleotide
602 sequence (GATC), uses the most common codon for each amino acid
603 Returns : A string
604 Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
606 =cut
608 sub reverse_translate_best {
610 my ($self, $obj, $cut) = @_;
612 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
613 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
614 ref($obj) . "]");
616 if ($obj->alphabet ne 'protein') {
617 $self->throw("Cannot reverse translate, need an amino acid sequence .".
618 "This sequence is of type [" . $obj->alphabet ."]");
620 if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) {
621 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
622 ref($cut). "].");
625 my $str = '';
626 my @seq = split '', $obj->seq;
628 my $cod_ref = $cut->most_common_codons();
630 for my $aa ( @seq ) {
631 if ($aa =~ /x/i) {
632 $str .= 'NNN';
633 next;
635 if ( defined $cod_ref->{$aa} ) {
636 $str .= $cod_ref->{$aa};
637 } else {
638 $self->throw("Input sequence contains invalid character: $aa");
641 return $str;
644 =head2 is_start_codon
646 Title : is_start_codon
647 Usage : $obj->is_start_codon('ATG')
648 Function: returns true (1) for all codons that can be used as a
649 translation start, false (0) for others.
650 Example : $myCodonTable->is_start_codon('ATG')
651 Returns : boolean
652 Args : codon
654 =cut
656 sub is_start_codon{
657 shift->_codon_is( shift, \@STARTS, 'M' );
660 =head2 is_ter_codon
662 Title : is_ter_codon
663 Usage : $obj->is_ter_codon('GAA')
664 Function: returns true (1) for all codons that can be used as a
665 translation tarminator, false (0) for others.
666 Example : $myCodonTable->is_ter_codon('ATG')
667 Returns : boolean
668 Args : codon
670 =cut
672 sub is_ter_codon{
673 my ($self, $value) = @_;
674 my $id = $self->{'id'};
676 # We need to ensure U is mapped to T (ie. UAG)
677 $value = uc $value;
678 $value =~ tr/U/T/;
680 if (length $value != 3 ) {
681 # Incomplete codons are not stop codons
682 return 0;
683 } else {
684 my $result = 0;
686 # For all the possible codons, if any are not a stop
687 # codon, fail immediately
688 for my $c ( $self->unambiguous_codons($value) ) {
689 my $m = substr( $TABLES[$id], $CODONS->{$c}, 1 );
690 if($m eq $TERMINATOR) {
691 $result = 1;
692 } else {
693 return 0;
696 return $result;
700 # desc: compares the passed value with a single entry in the given
701 # codon table
702 # args: a value (typically a three-char string like 'atg'),
703 # a reference to the appropriate set of codon tables,
704 # a single-character value to check for at the position in the
705 # given codon table
706 # ret: boolean, true if the given codon table contains the $key at the
707 # position corresponding to $value
708 sub _codon_is {
709 my ($self, $value, $table, $key ) = @_;
711 return 0 unless length $value == 3;
713 $value = lc $value;
714 $value =~ tr/u/t/;
716 my $id = $self->{'id'};
717 for my $c ( $self->unambiguous_codons($value) ) {
718 my $m = substr( $table->[$id], $CODONS->{$c}, 1 );
719 if ($m eq $key) { return 1; }
721 return 0;
724 =head2 is_unknown_codon
726 Title : is_unknown_codon
727 Usage : $obj->is_unknown_codon('GAJ')
728 Function: returns false (0) for all codons that are valid,
729 true (1) for others.
730 Example : $myCodonTable->is_unknown_codon('NTG')
731 Returns : boolean
732 Args : codon
735 =cut
737 sub is_unknown_codon{
738 my ($self, $value) = @_;
739 $value = lc $value;
740 $value =~ tr/u/t/;
741 return 1 unless $self->unambiguous_codons($value);
742 return 0;
745 =head2 unambiguous_codons
747 Title : unambiguous_codons
748 Usage : @codons = $self->unambiguous_codons('ACN')
749 Returns : array of strings (one-letter unambiguous amino acid codes)
750 Args : a codon = a three IUPAC nucleotide character string
752 =cut
754 sub unambiguous_codons{
755 my ($self,$value) = @_;
756 my @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
758 my @codons;
759 for my $i ( @{$nts[0]} ) {
760 for my $j ( @{$nts[1]} ) {
761 for my $k ( @{$nts[2]} ) {
762 push @codons, lc "$i$j$k";
764 return @codons;
767 =head2 _unambiquous_codons
769 deprecated, now an alias for unambiguous_codons
771 =cut
773 sub _unambiquous_codons {
774 unambiguous_codons( undef, @_ );
777 =head2 add_table
779 Title : add_table
780 Usage : $newid = $ct->add_table($name, $table, $starts)
781 Function: Add a custom Codon Table into the object.
782 Know what you are doing, only the length of
783 the argument strings is checked!
784 Returns : the id of the new codon table
785 Args : name, a string, optional (can be empty)
786 table, a string of 64 characters
787 startcodons, a string of 64 characters, defaults to standard
789 =cut
791 sub add_table {
792 my ($self, $name, $table, $starts) = @_;
794 $name ||= 'Custom' . $#NAMES + 1;
795 $starts ||= $STARTS[1];
796 $self->throw('Suspect input!')
797 unless length($table) == 64 and length($starts) == 64;
799 push @NAMES, $name;
800 push @TABLES, $table;
801 push @STARTS, $starts;
803 return $#NAMES;
806 sub _make_iupac_string {
807 my ($self, $cod_ref) = @_;
808 if(ref($cod_ref) ne 'ARRAY') {
809 $self->throw(" I need a reference to a list of references to codons, ".
810 " not a [". ref($cod_ref) . "].");
812 my %iupac_hash = Bio::Tools::IUPAC->iupac_rev_iub();
813 my $iupac_string = ''; ## the string to be returned
814 for my $aa (@$cod_ref) {
816 ## scan through codon positions, record the differing values,
817 # then look up in the iub hash
818 for my $index(0..2) {
819 my %h;
820 map { my $k = substr($_,$index,1);
821 $h{$k} = undef;} @$aa;
822 my $lookup_key = join '', sort{$a cmp $b}keys %h;
824 ## extend string
825 $iupac_string .= $iupac_hash{uc$lookup_key};
828 return $iupac_string;
834 # Follows the content of
835 # ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt, which is the NCBI
836 # genetic codon table in ASN.1 value notation / print format. We do
837 # not have a ASN.1 decoder for value notation but it's easy enough to
838 # parse.
840 __DATA__
841 --**************************************************************************
842 -- This is the NCBI genetic code table
843 -- Initial base data set from Andrzej Elzanowski while at PIR International
844 -- Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI
845 -- Base 1-3 of each codon have been added as comments to facilitate
846 -- readability at the suggestion of Peter Rice, EMBL
847 -- Later additions by Taxonomy Group staff at NCBI
849 -- Version 4.6
850 -- Renamed genetic code 24 to Rhabdopleuridae Mitochondrial
852 -- Version 4.5
853 -- Added Cephalodiscidae mitochondrial genetic code 33
855 -- Version 4.4
856 -- Added GTG as start codon for genetic code 3
857 -- Added Balanophoraceae plastid genetic code 32
859 -- Version 4.3
860 -- Change to CTG -> Leu in genetic codes 27, 28, 29, 30
862 -- Version 4.2
863 -- Added Karyorelict nuclear genetic code 27
864 -- Added Condylostoma nuclear genetic code 28
865 -- Added Mesodinium nuclear genetic code 29
866 -- Added Peritrich nuclear genetic code 30
867 -- Added Blastocrithidia nuclear genetic code 31
869 -- Version 4.1
870 -- Added Pachysolen tannophilus nuclear genetic code 26
872 -- Version 4.0
873 -- Updated version to reflect numerous undocumented changes:
874 -- Corrected start codons for genetic code 25
875 -- Name of new genetic code is Candidate Division SR1 and Gracilibacteria
876 -- Added candidate division SR1 nuclear genetic code 25
877 -- Added GTG as start codon for genetic code 24
878 -- Corrected Pterobranchia Mitochondrial genetic code (24)
879 -- Added genetic code 24, Pterobranchia Mitochondrial
880 -- Genetic code 11 is now Bacterial, Archaeal and Plant Plastid
881 -- Fixed capitalization of mitochondrial in codes 22 and 23
882 -- Added GTG, ATA, and TTG as alternative start codons to code 13
884 -- Version 3.9
885 -- Code 14 differs from code 9 only by translating UAA to Tyr rather than
886 -- STOP. A recent study (Telford et al, 2000) has found no evidence that
887 -- the codon UAA codes for Tyr in the flatworms, but other opinions exist.
888 -- There are very few GenBank records that are translated with code 14,
889 -- but a test translation shows that retranslating these records with code
890 -- 9 can cause premature terminations. Therefore, GenBank will maintain
891 -- code 14 until further information becomes available.
893 -- Version 3.8
894 -- Added GTG start to Echinoderm mitochondrial code, code 9
896 -- Version 3.7
897 -- Added code 23 Thraustochytrium mitochondrial code
898 -- formerly OGMP code 93
899 -- submitted by Gertraude Berger, Ph.D.
901 -- Version 3.6
902 -- Added code 22 TAG-Leu, TCA-stop
903 -- found in mitochondrial DNA of Scenedesmus obliquus
904 -- submitted by Gertraude Berger, Ph.D.
905 -- Organelle Genome Megasequencing Program, Univ Montreal
907 -- Version 3.5
908 -- Added code 21, Trematode Mitochondrial
909 -- (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)
910 -- Added code 16, Chlorophycean Mitochondrial
911 -- (TAG can translated to Leucine instaed to STOP in chlorophyceans
912 -- and fungi)
914 -- Version 3.4
915 -- Added CTG,TTG as allowed alternate start codons in Standard code.
916 -- Prats et al. 1989, Hann et al. 1992
918 -- Version 3.3 - 10/13/95
919 -- Added alternate intiation codon ATC to code 5
920 -- based on complete mitochondrial genome of honeybee
921 -- Crozier and Crozier (1993)
923 -- Version 3.2 - 6/24/95
924 -- Code Comments
925 -- 10 Alternative Ciliate Macronuclear renamed to Euplotid Macro...
926 -- 15 Blepharisma Macro.. code added
927 -- 5 Invertebrate Mito.. GTG allowed as alternate initiator
928 -- 11 Eubacterial renamed to Bacterial as most alternate starts
929 -- have been found in Archea
932 -- Version 3.1 - 1995
933 -- Updated as per Andrzej Elzanowski at NCBI
934 -- Complete documentation in NCBI toolkit documentation
935 -- Note: 2 genetic codes have been deleted
937 -- Old id Use id - Notes
939 -- id 7 id 4 - Kinetoplast code now merged in code id 4
940 -- id 8 id 1 - all plant chloroplast differences due to RNA edit
943 --*************************************************************************
945 Genetic-code-table ::= {
947 name "Standard" ,
948 name "SGC0" ,
949 id 1 ,
950 ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
951 sncbieaa "---M------**--*----M---------------M----------------------------"
952 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
953 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
954 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
957 name "Vertebrate Mitochondrial" ,
958 name "SGC1" ,
959 id 2 ,
960 ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
961 sncbieaa "----------**--------------------MMMM----------**---M------------"
962 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
963 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
964 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
967 name "Yeast Mitochondrial" ,
968 name "SGC2" ,
969 id 3 ,
970 ncbieaa "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
971 sncbieaa "----------**----------------------MM---------------M------------"
972 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
973 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
974 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
977 name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate
978 Mitochondrial; Mycoplasma; Spiroplasma" ,
979 name "SGC3" ,
980 id 4 ,
981 ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
982 sncbieaa "--MM------**-------M------------MMMM---------------M------------"
983 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
984 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
985 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
988 name "Invertebrate Mitochondrial" ,
989 name "SGC4" ,
990 id 5 ,
991 ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
992 sncbieaa "---M------**--------------------MMMM---------------M------------"
993 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
994 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
995 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
998 name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
999 name "SGC5" ,
1000 id 6 ,
1001 ncbieaa "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1002 sncbieaa "--------------*--------------------M----------------------------"
1003 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1004 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1005 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1008 name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
1009 name "SGC8" ,
1010 id 9 ,
1011 ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
1012 sncbieaa "----------**-----------------------M---------------M------------"
1013 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1014 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1015 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1018 name "Euplotid Nuclear" ,
1019 name "SGC9" ,
1020 id 10 ,
1021 ncbieaa "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1022 sncbieaa "----------**-----------------------M----------------------------"
1023 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1024 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1025 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1028 name "Bacterial, Archaeal and Plant Plastid" ,
1029 id 11 ,
1030 ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1031 sncbieaa "---M------**--*----M------------MMMM---------------M------------"
1032 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1033 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1034 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1037 name "Alternative Yeast Nuclear" ,
1038 id 12 ,
1039 ncbieaa "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1040 sncbieaa "----------**--*----M---------------M----------------------------"
1041 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1042 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1043 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1046 name "Ascidian Mitochondrial" ,
1047 id 13 ,
1048 ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
1049 sncbieaa "---M------**----------------------MM---------------M------------"
1050 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1051 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1052 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1055 name "Alternative Flatworm Mitochondrial" ,
1056 id 14 ,
1057 ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
1058 sncbieaa "-----------*-----------------------M----------------------------"
1059 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1060 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1061 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1064 name "Blepharisma Macronuclear" ,
1065 id 15 ,
1066 ncbieaa "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1067 sncbieaa "----------*---*--------------------M----------------------------"
1068 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1069 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1070 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1073 name "Chlorophycean Mitochondrial" ,
1074 id 16 ,
1075 ncbieaa "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1076 sncbieaa "----------*---*--------------------M----------------------------"
1077 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1078 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1079 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1082 name "Trematode Mitochondrial" ,
1083 id 21 ,
1084 ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
1085 sncbieaa "----------**-----------------------M---------------M------------"
1086 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1087 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1088 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1091 name "Scenedesmus obliquus Mitochondrial" ,
1092 id 22 ,
1093 ncbieaa "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1094 sncbieaa "------*---*---*--------------------M----------------------------"
1095 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1096 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1097 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1100 name "Thraustochytrium Mitochondrial" ,
1101 id 23 ,
1102 ncbieaa "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1103 sncbieaa "--*-------**--*-----------------M--M---------------M------------"
1104 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1105 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1106 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1109 name "Rhabdopleuridae Mitochondrial" ,
1110 id 24 ,
1111 ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
1112 sncbieaa "---M------**-------M---------------M---------------M------------"
1113 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1114 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1115 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1118 name "Candidate Division SR1 and Gracilibacteria" ,
1119 id 25 ,
1120 ncbieaa "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1121 sncbieaa "---M------**-----------------------M---------------M------------"
1122 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1123 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1124 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1127 name "Pachysolen tannophilus Nuclear" ,
1128 id 26 ,
1129 ncbieaa "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1130 sncbieaa "----------**--*----M---------------M----------------------------"
1131 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1132 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1133 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1136 name "Karyorelict Nuclear" ,
1137 id 27 ,
1138 ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1139 sncbieaa "--------------*--------------------M----------------------------"
1140 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1141 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1142 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1145 name "Condylostoma Nuclear" ,
1146 id 28 ,
1147 ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1148 sncbieaa "----------**--*--------------------M----------------------------"
1149 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1150 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1151 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1154 name "Mesodinium Nuclear" ,
1155 id 29 ,
1156 ncbieaa "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1157 sncbieaa "--------------*--------------------M----------------------------"
1158 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1159 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1160 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1163 name "Peritrich Nuclear" ,
1164 id 30 ,
1165 ncbieaa "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1166 sncbieaa "--------------*--------------------M----------------------------"
1167 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1168 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1169 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1172 name "Blastocrithidia Nuclear" ,
1173 id 31 ,
1174 ncbieaa "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1175 sncbieaa "----------**-----------------------M----------------------------"
1176 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1177 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1178 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1181 name "Balanophoraceae Plastid" ,
1182 id 32 ,
1183 ncbieaa "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1184 sncbieaa "---M------*---*----M------------MMMM---------------M------------"
1185 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1186 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1187 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1190 name "Cephalodiscidae Mitochondrial" ,
1191 id 33 ,
1192 ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
1193 sncbieaa "---M-------*-------M---------------M---------------M------------"
1194 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1195 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1196 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG