1 package Bio
::Tools
::CodonTable
;
10 use base
qw(Bio::Root::Root);
12 # ABSTRACT: Codon table object
13 # AUTHOR: Heikki Lehvaslaiho <heikki@bioperl.org>
14 # OWNER: Heikki Lehvaslaiho <heikki@bioperl.org>
19 # This is a read-only class for all known codon tables. The IDs are
20 # the ones used by nucleotide sequence databases. All common IUPAC
21 # ambiguity codes for DNA, RNA and amino acids are recognized.
23 use Bio::Tools::CodonTable;
25 # defaults to ID 1 "Standard"
26 $myCodonTable = Bio::Tools::CodonTable->new();
27 $myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 );
33 print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
34 "is:", $myCodonTable->name(), "\n");
36 # print possible codon tables
37 $tables = Bio::Tools::CodonTable->tables;
38 while ( ($id,$name) = each %{$tables} ) {
39 print "$id = $name\n";
43 $aa = $myCodonTable->translate('ACU');
44 $aa = $myCodonTable->translate('act');
45 $aa = $myCodonTable->translate('ytr');
47 # reverse translate an amino acid
48 @codons = $myCodonTable->revtranslate('A');
49 @codons = $myCodonTable->revtranslate('Ser');
50 @codons = $myCodonTable->revtranslate('Glx');
51 @codons = $myCodonTable->revtranslate('cYS', 'rna');
53 # reverse translate an entire amino acid sequence into a IUPAC
56 my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
57 my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
60 print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
61 print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar');
62 print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
66 Codon tables are also called translation tables or genetic codes
67 since that is what they represent. A bit more complete picture
68 of the full complexity of codon usage in various taxonomic groups
69 is presented at the NCBI Genetic Codes Home page.
71 CodonTable is a BioPerl class that knows all current translation
72 tables that are used by primary nucleotide sequence databases
73 (GenBank, EMBL and DDBJ). It provides methods to output information
74 about tables and relationships between codons and amino acids.
76 This class and its methods recognized all common IUPAC ambiguity codes
77 for DNA, RNA and animo acids. The translation method follows the
78 conventions in EMBL and TREMBL databases.
80 It is a nuisance to separate RNA and cDNA representations of nucleic
81 acid transcripts. The CodonTable object accepts codons of both type as
82 input and allows the user to set the mode for output when reverse
83 translating. Its default for output is DNA.
87 This class deals primarily with individual codons and amino
88 acids. However in the interest of speed you can L<translate>
89 longer sequence, too. The full complexity of protein translation
90 is tackled by L<Bio::PrimarySeqI::translate>.
93 The amino acid codes are IUPAC recommendations for common amino acids:
110 O Pyl Pyrrolysine (22nd amino acid)
111 U Sec Selenocysteine (21st amino acid)
117 B Asx Aspartic acid or Asparagine
118 Z Glx Glutamine or Glutamic acid
119 J Xle Isoleucine or Valine (mass spec ambiguity)
120 X Xaa Any or unknown amino acid
123 It is worth noting that, "Bacterial" codon table no. 11 produces an
124 polypeptide that is, confusingly, identical to the standard one. The
125 only differences are in available initiator codons.
128 NCBI Genetic Codes home page:
129 (Last update of the Genetic Codes: Apr. 25, 2024)
130 https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
132 The "value notation" / "print form" ASN.1 version is at:
133 ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
135 Thanks to Matteo diTomasso for the original Perl implementation
141 # set internal values for all translation tables
142 use constant CODONSIZE
=> 3;
144 our $CODONGAP = $GAP x CODONSIZE
;
145 our %IUPAC_DNA = Bio
::Tools
::IUPAC
->iupac_iub();
146 our %IUPAC_AA = Bio
::Tools
::IUPAC
->iupac_iup();
147 our %THREELETTERSYMBOLS = Bio
::SeqUtils
->valid_aa(2);
148 our $VALID_PROTEIN = '['.join('',Bio
::SeqUtils
->valid_aa(0)).']';
149 our $TERMINATOR = '*';
151 our (@NAMES, @TABLES, @STARTS);
152 # Parse the ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt file which
153 # is below __DATA__ in this module (see the end of the file). This
154 # fills the @NAMES, @TABLES, and @STARTS variables. To update to a
155 # new release of gc.prt, replace the content below __DATA__.
157 # Init tables has with special option (id=0) for ATG-only start
161 ncbieaa
=> "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
162 sncbieaa
=> "----------**--*--------------------M----------------------------",
166 while (defined(my $line = <DATA
>)) {
167 next if $line =~ /^\s*--/; # skip comment lines
168 if ($line =~ /^\s*\{\s*$/) { # start of a table description
174 if ($line =~ /^\s*(name|id|ncbieaa|sncbieaa)\s+(.+)/) {
181 # The remaining keys --- name, ncbieaa, and
182 # sncbieaa --- are strings which may be
183 # multi-line (e.g., name for table with id 4).
184 # We are assuming that there is no " character
185 # inside the value so we keep appending lines
186 # until we find an end ".
187 while ($rem !~ /^"(.*)"/ && ! eof DATA
) {
193 if ($key eq "name" && ! $name) {
194 # ignore alternative names, e.g. SGC0,
195 # only keep the first name listed.
197 } elsif ($key eq "ncbieaa") {
199 } elsif ($key eq "sncbieaa") {
204 } until (($line = <DATA
>) =~ /^\s*}\s*,?$/); # we reached the end of table description
208 sncbieaa
=> $sncbieaa
214 # print Dumper %tables;
216 # After parsing gc.prt, fill in @NAMES, @TABLES, and @STARTS
217 my $highest_id = (sort {$a <=> $b} keys %tables)[-1];
218 for (my $i = 0; $i < $highest_id; $i++) {
219 if (defined $tables{$i}) {
220 push @NAMES, $tables{$i}->{name
};
221 push @TABLES, $tables{$i}->{ncbieaa
};
222 push @STARTS, $tables{$i}->{sncbieaa
};
231 our ($TRCOL, $CODONS);
233 my @nucs = qw(t c a g);
235 ($CODONS, $TRCOL) = ({}, {});
239 my $codon = "$i$j$k";
240 $CODONS->{$codon} = $x;
241 $TRCOL->{$x} = $codon;
249 my($class,@args) = @_;
250 my $self = $class->SUPER::new
(@args);
253 $self->_rearrange([qw(ID
257 $id = 1 if ( ! defined ( $id ) );
259 return $self; # success - we hope!
265 Usage : $obj->id(3); $id_integer = $obj->id();
266 Function: Sets or returns the id of the translation table. IDs are
267 integers from 0 (special ATG-only start) to 25, excluding
268 7-8 and 17-20 which have been removed. If an invalid ID is
269 given the method returns 1, the standard table.
271 Returns : value of id, a scalar, warn and fall back to 1 (standard table)
272 if specified id is not valid
273 Args : newvalue (optional)
278 my ($self,$value) = @_;
279 if( defined $value) {
280 if (! defined $TABLES[$value] || $TABLES[$value] eq '' || $value < 0) {
281 $self->warn("Not a valid codon table ID [$value], using [1] instead ");
284 $self->{'id'} = $value;
286 return $self->{'id'};
293 Function: returns the descriptive name of the translation table
304 my ($id) = $self->{'id'};
311 Usage : $obj->tables() or Bio::Tools::CodonTable->tables()
312 Function: returns a hash reference where each key is a valid codon
313 table id() number, and each value is the corresponding
314 codon table name() string
324 for my $id (0 .. $#NAMES) {
325 my $name = $NAMES[$id];
326 $tables{$id} = $name if $name;
334 Usage : $obj->translate('YTR')
335 Function: Returns a string of one letter amino acid codes from
336 nucleotide sequence input. The imput can be of any length.
338 Returns 'X' for unknown codons and codons that code for
339 more than one amino acid. Returns an empty string if input
340 is not three characters long. Exceptions for these are:
342 - IUPAC amino acid code B for Aspartic Acid and
344 - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
346 - if the codon is two nucleotides long and if by adding
347 an a third character 'N', it codes for a single amino
348 acid (with exceptions above), return that, otherwise
351 Returns empty string for other input strings that are not
352 three characters long.
355 Returns : a string of one letter ambiguous IUPAC amino acid codes
356 Args : ambiguous IUPAC nucleotide string
362 my ($self, $seq, $complete_codon) = @_;
363 $self->throw("Calling translate without a seq argument!") unless defined $seq;
364 return '' unless $seq;
368 $partial = 2 if length($seq) % CODONSIZE
== 2;
373 if ($seq =~ /[^actg]/ ) { #ambiguous chars
374 for (my $i = 0; $i < (length($seq) - (CODONSIZE
-1)); $i+= CODONSIZE
) {
375 my $triplet = substr($seq, $i, CODONSIZE
);
376 if( $triplet eq $CODONGAP ) {
378 } elsif (exists $CODONS->{$triplet}) {
379 $protein .= substr($TABLES[$id],
380 $CODONS->{$triplet},1);
382 $protein .= $self->_translate_ambiguous_codon($triplet);
385 } else { # simple, strict translation
386 for (my $i = 0; $i < (length($seq) - (CODONSIZE
-1)); $i+=CODONSIZE
) {
387 my $triplet = substr($seq, $i, CODONSIZE
);
388 if( $triplet eq $CODONGAP ) {
391 if (exists $CODONS->{$triplet}) {
392 $protein .= substr($TABLES[$id], $CODONS->{$triplet}, 1);
398 if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides
399 my $triplet = substr($seq, ($partial -4)). "n";
400 if( $triplet eq $CODONGAP ) {
402 } elsif (exists $CODONS->{$triplet}) {
403 my $aa = substr($TABLES[$id], $CODONS->{$triplet},1);
406 $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
412 sub _translate_ambiguous_codon
{
413 my ($self, $triplet, $partial) = @_;
417 my @codons = $self->unambiguous_codons($triplet);
419 foreach my $codon (@codons) {
420 $aas{substr($TABLES[$id],$CODONS->{$codon},1)} = 1;
422 my $count = scalar keys %aas;
424 $aa = (keys %aas)[0];
426 elsif ( $count == 2 ) {
427 if ($aas{'D'} and $aas{'N'}) {
430 elsif ($aas{'E'} and $aas{'Q'}) {
433 $partial ?
($aa = '') : ($aa = 'X');
436 $partial ?
($aa = '') : ($aa = 'X');
441 =head2 translate_strict
443 Title : translate_strict
444 Usage : $obj->translate_strict('ACT')
445 Function: returns one letter amino acid code for a codon input
447 Fast and simple translation. User is responsible to resolve
448 ambiguous nucleotide codes before calling this
449 method. Returns 'X' for unknown codons and an empty string
450 for input strings that are not three characters long.
452 It is not recommended to use this method in a production
453 environment. Use method translate, instead.
457 Args : a codon = a three nucleotide character string
462 sub translate_strict
{
463 my ($self, $value) = @_;
464 my $id = $self->{'id'};
469 return '' unless length $value == 3;
471 return 'X' unless defined $CODONS->{$value};
473 return substr( $TABLES[$id], $CODONS->{$value}, 1 );
479 Usage : $obj->revtranslate('G')
480 Function: returns codons for an amino acid
482 Returns an empty string for unknown amino acid
483 codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and
484 Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
485 letter amino acid codes are accepted. '*' and 'Ter' are
488 By default, the output codons are shown in DNA. If the
489 output is needed in RNA (tr/t/u/), add a second argument
492 Example : $obj->revtranslate('Gly', 'RNA')
493 Returns : An array of three lower case letter strings i.e. codons
494 Args : amino acid, 'RNA'
499 my ($self, $value, $coding) = @_;
502 if (length($value) == 3 ) {
504 $value = ucfirst $value;
505 $value = $THREELETTERSYMBOLS{$value};
507 if ( defined $value and $value =~ /$VALID_PROTEIN/
508 and length($value) == 1
510 my $id = $self->{'id'};
513 my @aas = @
{$IUPAC_AA{$value}};
514 foreach my $aa (@aas) {
516 $aa = '\*' if $aa eq '*';
517 while ($TABLES[$id] =~ m/$aa/g) {
518 my $p = pos $TABLES[$id];
519 push (@codons, $TRCOL->{--$p});
524 if ($coding and uc ($coding) eq 'RNA') {
525 for my $i (0..$#codons) {
526 $codons[$i] =~ tr/t/u/;
533 =head2 reverse_translate_all
535 Title : reverse_translate_all
536 Usage : my $iup_str = $cttable->reverse_translate_all($seq_object)
537 my $iup_str = $cttable->reverse_translate_all($seq_object,
540 Function: reverse translates a protein sequence into IUPAC nucleotide
541 sequence. An 'X' in the protein sequence is converted to 'NNN'
542 in the nucleotide sequence.
544 Args : a Bio::PrimarySeqI compatible object (mandatory)
545 a Bio::CodonUsage::Table object and a threshold if only
546 codons with a relative frequency above the threshold are
550 sub reverse_translate_all
{
551 my ($self, $obj, $cut, $threshold) = @_;
555 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
556 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
559 if($obj->alphabet ne 'protein') {
560 $self->throw("Cannot reverse translate, need an amino acid sequence .".
561 "This sequence is of type [" . $obj->alphabet ."]");
564 my @seq = split '', $obj->seq;
566 ## if we're not supplying a codon usage table...
567 if( !$cut && !$threshold) {
568 ## get lists of possible codons for each aa.
571 push @data, (['NNN']);
573 my @cods = $self->revtranslate($aa);
578 #else we are supplying a codon usage table, we just want common codons
580 if(!$cut->isa('Bio::CodonUsage::Table')) {
581 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
584 my $cod_ref = $cut->probable_codons($threshold);
587 push @data, (['NNN']);
590 push @data, $cod_ref->{$aa};
594 return $self->_make_iupac_string(\
@data);
597 =head2 reverse_translate_best
599 Title : reverse_translate_best
600 Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
601 Function: Reverse translates a protein sequence into plain nucleotide
602 sequence (GATC), uses the most common codon for each amino acid
604 Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
608 sub reverse_translate_best
{
610 my ($self, $obj, $cut) = @_;
612 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
613 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
616 if ($obj->alphabet ne 'protein') {
617 $self->throw("Cannot reverse translate, need an amino acid sequence .".
618 "This sequence is of type [" . $obj->alphabet ."]");
620 if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) {
621 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
626 my @seq = split '', $obj->seq;
628 my $cod_ref = $cut->most_common_codons();
630 for my $aa ( @seq ) {
635 if ( defined $cod_ref->{$aa} ) {
636 $str .= $cod_ref->{$aa};
638 $self->throw("Input sequence contains invalid character: $aa");
644 =head2 is_start_codon
646 Title : is_start_codon
647 Usage : $obj->is_start_codon('ATG')
648 Function: returns true (1) for all codons that can be used as a
649 translation start, false (0) for others. In the case of
650 ambiguous codons, e.g., 'NTG', only returns true if all
651 possible codons are true.
652 Example : $myCodonTable->is_start_codon('ATG')
659 return shift->_codon_is(shift, \
@STARTS, 'M');
665 Usage : $obj->is_ter_codon('GAA')
666 Function: returns true (1) for all codons that can be used as a
667 translation terminator, false (0) for others. In the case
668 of ambiguous codons, e.g., 'TAN', only returns true if all
669 possible codons are true.
670 Example : $myCodonTable->is_ter_codon('ATG')
677 return shift->_codon_is(shift, \
@STARTS, $TERMINATOR);
680 # desc: compares the passed value with a single entry in the given
682 # args: a value (typically a three-char string like 'atg'), a
683 # reference to the appropriate set of codon tables, a
684 # single-character value to check for at the position in the
686 # ret: boolean, true if the given codon table contains the $key at the
687 # position corresponding to $value. In the case of ambiguous
688 # codons, only returns true if all possibilities match $key.
690 my ($self, $value, $table, $key ) = @_;
692 return 0 unless length $value == 3;
697 my $id = $self->{'id'};
699 for my $c ( $self->unambiguous_codons($value) ) {
700 my $m = substr( $table->[$id], $CODONS->{$c}, 1 );
710 =head2 is_unknown_codon
712 Title : is_unknown_codon
713 Usage : $obj->is_unknown_codon('GAJ')
714 Function: returns false (0) for all codons that are valid,
716 Example : $myCodonTable->is_unknown_codon('NTG')
723 sub is_unknown_codon
{
724 my ($self, $value) = @_;
727 return 1 unless $self->unambiguous_codons($value);
731 =head2 unambiguous_codons
733 Title : unambiguous_codons
734 Usage : @codons = $self->unambiguous_codons('ACN')
735 Returns : array of strings (one-letter unambiguous amino acid codes)
736 Args : a codon = a three IUPAC nucleotide character string
740 sub unambiguous_codons
{
741 my ($self,$value) = @_;
742 my @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
745 for my $i ( @
{$nts[0]} ) {
746 for my $j ( @
{$nts[1]} ) {
747 for my $k ( @
{$nts[2]} ) {
748 push @codons, lc "$i$j$k";
753 =head2 _unambiquous_codons
755 deprecated, now an alias for unambiguous_codons
759 sub _unambiquous_codons
{
760 unambiguous_codons
( undef, @_ );
766 Usage : $newid = $ct->add_table($name, $table, $starts)
767 Function: Add a custom Codon Table into the object.
768 Know what you are doing, only the length of
769 the argument strings is checked!
770 Returns : the id of the new codon table
771 Args : name, a string, optional (can be empty)
772 table, a string of 64 characters
773 startcodons, a string of 64 characters, defaults to standard
778 my ($self, $name, $table, $starts) = @_;
780 $name ||= 'Custom' . $#NAMES + 1;
781 $starts ||= $STARTS[1];
782 $self->throw('Suspect input!')
783 unless length($table) == 64 and length($starts) == 64;
786 push @TABLES, $table;
787 push @STARTS, $starts;
792 sub _make_iupac_string
{
793 my ($self, $cod_ref) = @_;
794 if(ref($cod_ref) ne 'ARRAY') {
795 $self->throw(" I need a reference to a list of references to codons, ".
796 " not a [". ref($cod_ref) . "].");
798 my %iupac_hash = Bio
::Tools
::IUPAC
->iupac_rev_iub();
799 my $iupac_string = ''; ## the string to be returned
800 for my $aa (@
$cod_ref) {
802 ## scan through codon positions, record the differing values,
803 # then look up in the iub hash
804 for my $index(0..2) {
806 map { my $k = substr($_,$index,1);
807 $h{$k} = undef;} @
$aa;
808 my $lookup_key = join '', sort{$a cmp $b}keys %h;
811 $iupac_string .= $iupac_hash{uc$lookup_key};
814 return $iupac_string;
820 # Follows the content of
821 # ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt, which is the NCBI
822 # genetic codon table in ASN.1 value notation / print format. We do
823 # not have a ASN.1 decoder for value notation but it's easy enough to
827 --**************************************************************************
828 -- This is the NCBI genetic code table
829 -- Initial base data set from Andrzej Elzanowski
while at PIR International
830 -- Addition of Eubacterial
and Alternative Yeast by J
.Ostell at NCBI
831 -- Base
1-3 of
each codon have been added as comments to facilitate
832 -- readability at the suggestion of Peter Rice
, EMBL
833 -- Later additions by Taxonomy Group staff at NCBI
836 -- Renamed genetic code
24 to Rhabdopleuridae Mitochondrial
839 -- Added Cephalodiscidae mitochondrial genetic code
33
842 -- Added GTG as start codon
for genetic code
3
843 -- Added Balanophoraceae plastid genetic code
32
846 -- Change to CTG
-> Leu
in genetic codes
27, 28, 29, 30
849 -- Added Karyorelict nuclear genetic code
27
850 -- Added Condylostoma nuclear genetic code
28
851 -- Added Mesodinium nuclear genetic code
29
852 -- Added Peritrich nuclear genetic code
30
853 -- Added Blastocrithidia nuclear genetic code
31
856 -- Added Pachysolen tannophilus nuclear genetic code
26
859 -- Updated version to reflect numerous undocumented changes
:
860 -- Corrected start codons
for genetic code
25
861 -- Name of new genetic code is Candidate Division SR1
and Gracilibacteria
862 -- Added candidate division SR1 nuclear genetic code
25
863 -- Added GTG as start codon
for genetic code
24
864 -- Corrected Pterobranchia Mitochondrial genetic code
(24)
865 -- Added genetic code
24, Pterobranchia Mitochondrial
866 -- Genetic code
11 is now Bacterial
, Archaeal
and Plant Plastid
867 -- Fixed capitalization of mitochondrial
in codes
22 and 23
868 -- Added GTG
, ATA
, and TTG as alternative start codons to code
13
871 -- Code
14 differs from code
9 only by translating UAA to Tyr rather than
872 -- STOP
. A recent
study (Telford et al
, 2000) has found
no evidence that
873 -- the codon UAA codes
for Tyr
in the flatworms
, but other opinions exist
.
874 -- There are very few GenBank records that are translated with code
14,
875 -- but a test translation shows that retranslating these records with code
876 -- 9 can cause premature terminations
. Therefore
, GenBank will maintain
877 -- code
14 until further information becomes available
.
880 -- Added GTG start to Echinoderm mitochondrial code
, code
9
883 -- Added code
23 Thraustochytrium mitochondrial code
884 -- formerly OGMP code
93
885 -- submitted by Gertraude Berger
, Ph
.D
.
888 -- Added code
22 TAG
-Leu
, TCA
-stop
889 -- found
in mitochondrial DNA of Scenedesmus obliquus
890 -- submitted by Gertraude Berger
, Ph
.D
.
891 -- Organelle Genome Megasequencing Program
, Univ Montreal
894 -- Added code
21, Trematode Mitochondrial
895 -- (as deduced from
: Garey
& Wolstenholme
,1989; Ohama et al
, 1990)
896 -- Added code
16, Chlorophycean Mitochondrial
897 -- (TAG can translated to Leucine instaed to STOP
in chlorophyceans
901 -- Added CTG
,TTG as allowed alternate start codons
in Standard code
.
902 -- Prats et al
. 1989, Hann et al
. 1992
904 -- Version
3.3 - 10/13/95
905 -- Added alternate intiation codon ATC to code
5
906 -- based on complete mitochondrial genome of honeybee
907 -- Crozier
and Crozier
(1993)
909 -- Version
3.2 - 6/24/95
911 -- 10 Alternative Ciliate Macronuclear renamed to Euplotid Macro
...
912 -- 15 Blepharisma Macro
.. code added
913 -- 5 Invertebrate Mito
.. GTG allowed as alternate initiator
914 -- 11 Eubacterial renamed to Bacterial as most alternate starts
915 -- have been found
in Archea
918 -- Version
3.1 - 1995
919 -- Updated as per Andrzej Elzanowski at NCBI
920 -- Complete documentation
in NCBI toolkit documentation
921 -- Note
: 2 genetic codes have been deleted
923 -- Old id Use id
- Notes
925 -- id
7 id
4 - Kinetoplast code now merged
in code id
4
926 -- id
8 id
1 - all plant chloroplast differences due to RNA edit
929 --*************************************************************************
931 Genetic
-code
-table
::= {
936 ncbieaa
"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
937 sncbieaa
"---M------**--*----M---------------M----------------------------"
938 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
939 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
940 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
943 name
"Vertebrate Mitochondrial" ,
946 ncbieaa
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
947 sncbieaa
"----------**--------------------MMMM----------**---M------------"
948 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
949 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
950 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
953 name
"Yeast Mitochondrial" ,
956 ncbieaa
"FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
957 sncbieaa
"----------**----------------------MM---------------M------------"
958 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
959 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
960 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
963 name
"Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate
964 Mitochondrial; Mycoplasma; Spiroplasma" ,
967 ncbieaa
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
968 sncbieaa
"--MM------**-------M------------MMMM---------------M------------"
969 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
970 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
971 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
974 name
"Invertebrate Mitochondrial" ,
977 ncbieaa
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
978 sncbieaa
"---M------**--------------------MMMM---------------M------------"
979 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
980 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
981 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
984 name
"Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
987 ncbieaa
"FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
988 sncbieaa
"--------------*--------------------M----------------------------"
989 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
990 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
991 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
994 name
"Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
997 ncbieaa
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
998 sncbieaa
"----------**-----------------------M---------------M------------"
999 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1000 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1001 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1004 name
"Euplotid Nuclear" ,
1007 ncbieaa
"FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1008 sncbieaa
"----------**-----------------------M----------------------------"
1009 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1010 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1011 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1014 name
"Bacterial, Archaeal and Plant Plastid" ,
1016 ncbieaa
"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1017 sncbieaa
"---M------**--*----M------------MMMM---------------M------------"
1018 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1019 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1020 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1023 name
"Alternative Yeast Nuclear" ,
1025 ncbieaa
"FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1026 sncbieaa
"----------**--*----M---------------M----------------------------"
1027 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1028 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1029 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1032 name
"Ascidian Mitochondrial" ,
1034 ncbieaa
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
1035 sncbieaa
"---M------**----------------------MM---------------M------------"
1036 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1037 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1038 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1041 name
"Alternative Flatworm Mitochondrial" ,
1043 ncbieaa
"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
1044 sncbieaa
"-----------*-----------------------M----------------------------"
1045 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1046 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1047 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1050 name
"Blepharisma Macronuclear" ,
1052 ncbieaa
"FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1053 sncbieaa
"----------*---*--------------------M----------------------------"
1054 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1055 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1056 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1059 name
"Chlorophycean Mitochondrial" ,
1061 ncbieaa
"FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1062 sncbieaa
"----------*---*--------------------M----------------------------"
1063 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1064 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1065 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1068 name
"Trematode Mitochondrial" ,
1070 ncbieaa
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
1071 sncbieaa
"----------**-----------------------M---------------M------------"
1072 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1073 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1074 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1077 name
"Scenedesmus obliquus Mitochondrial" ,
1079 ncbieaa
"FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1080 sncbieaa
"------*---*---*--------------------M----------------------------"
1081 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1082 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1083 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1086 name
"Thraustochytrium Mitochondrial" ,
1088 ncbieaa
"FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1089 sncbieaa
"--*-------**--*-----------------M--M---------------M------------"
1090 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1091 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1092 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1095 name
"Rhabdopleuridae Mitochondrial" ,
1097 ncbieaa
"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
1098 sncbieaa
"---M------**-------M---------------M---------------M------------"
1099 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1100 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1101 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1104 name
"Candidate Division SR1 and Gracilibacteria" ,
1106 ncbieaa
"FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1107 sncbieaa
"---M------**-----------------------M---------------M------------"
1108 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1109 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1110 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1113 name
"Pachysolen tannophilus Nuclear" ,
1115 ncbieaa
"FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1116 sncbieaa
"----------**--*----M---------------M----------------------------"
1117 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1118 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1119 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1122 name
"Karyorelict Nuclear" ,
1124 ncbieaa
"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1125 sncbieaa
"--------------*--------------------M----------------------------"
1126 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1127 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1128 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1131 name
"Condylostoma Nuclear" ,
1133 ncbieaa
"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1134 sncbieaa
"----------**--*--------------------M----------------------------"
1135 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1136 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1137 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1140 name
"Mesodinium Nuclear" ,
1142 ncbieaa
"FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1143 sncbieaa
"--------------*--------------------M----------------------------"
1144 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1145 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1146 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1149 name
"Peritrich Nuclear" ,
1151 ncbieaa
"FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1152 sncbieaa
"--------------*--------------------M----------------------------"
1153 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1154 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1155 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1158 name
"Blastocrithidia Nuclear" ,
1160 ncbieaa
"FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1161 sncbieaa
"----------**-----------------------M----------------------------"
1162 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1163 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1164 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1167 name
"Balanophoraceae Plastid" ,
1169 ncbieaa
"FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1170 sncbieaa
"---M------*---*----M------------MMMM---------------M------------"
1171 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1172 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1173 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1176 name
"Cephalodiscidae Mitochondrial" ,
1178 ncbieaa
"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
1179 sncbieaa
"---M-------*-------M---------------M---------------M------------"
1180 -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1181 -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1182 -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG