lib/Bio/Tools/CodonTable.pm

   1 package Bio::Tools::CodonTable;
   2
   3 use utf8;
   4 use strict;
   5 use warnings;
   6
   7 use Bio::Tools::IUPAC;
   8 use Bio::SeqUtils;
   9
  10 use base qw(Bio::Root::Root);
  11
  12 # ABSTRACT: Codon table object
  13 # AUTHOR: Heikki Lehvaslaiho <heikki@bioperl.org>
  14 # OWNER: Heikki Lehvaslaiho <heikki@bioperl.org>
  15 # LICENSE: Perl_5
  16
  17 =head1 SYNOPSIS
  18
  19   # This is a read-only class for all known codon tables.  The IDs are
  20   # the ones used by nucleotide sequence databases.  All common IUPAC
  21   # ambiguity codes for DNA, RNA and amino acids are recognized.
  22
  23   use Bio::Tools::CodonTable;
  24
  25   # defaults to ID 1 "Standard"
  26   $myCodonTable   = Bio::Tools::CodonTable->new();
  27   $myCodonTable2  = Bio::Tools::CodonTable->new( -id => 3 );
  28
  29   # change codon table
  30   $myCodonTable->id(5);
  31
  32   # examine codon table
  33   print  join (' ', "The name of the codon table no.", $myCodonTable->id(4),
  34            "is:", $myCodonTable->name(), "\n");
  35
  36   # print possible codon tables
  37   $tables = Bio::Tools::CodonTable->tables;
  38   while ( ($id,$name) = each %{$tables} ) {
  39     print "$id = $name\n";
  40   }
  41
  42   # translate a codon
  43   $aa = $myCodonTable->translate('ACU');
  44   $aa = $myCodonTable->translate('act');
  45   $aa = $myCodonTable->translate('ytr');
  46
  47   # reverse translate an amino acid
  48   @codons = $myCodonTable->revtranslate('A');
  49   @codons = $myCodonTable->revtranslate('Ser');
  50   @codons = $myCodonTable->revtranslate('Glx');
  51   @codons = $myCodonTable->revtranslate('cYS', 'rna');
  52
  53   # reverse translate an entire amino acid sequence into a IUPAC
  54   # nucleotide string
  55
  56   my $seqobj    = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
  57   my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
  58
  59   # boolean tests
  60   print "Is a start\n"       if $myCodonTable->is_start_codon('ATG');
  61   print "Is a terminator\n"  if $myCodonTable->is_ter_codon('tar');
  62   print "Is a unknown\n"     if $myCodonTable->is_unknown_codon('JTG');
  63
  64 =head1 DESCRIPTION
  65
  66 Codon tables are also called translation tables or genetic codes
  67 since that is what they represent. A bit more complete picture
  68 of the full complexity of codon usage in various taxonomic groups
  69 is presented at the NCBI Genetic Codes Home page.
  70
  71 CodonTable is a BioPerl class that knows all current translation
  72 tables that are used by primary nucleotide sequence databases
  73 (GenBank, EMBL and DDBJ). It provides methods to output information
  74 about tables and relationships between codons and amino acids.
  75
  76 This class and its methods recognized all common IUPAC ambiguity codes
  77 for DNA, RNA and animo acids. The translation method follows the
  78 conventions in EMBL and TREMBL databases.
  79
  80 It is a nuisance to separate RNA and cDNA representations of nucleic
  81 acid transcripts. The CodonTable object accepts codons of both type as
  82 input and allows the user to set the mode for output when reverse
  83 translating. Its default for output is DNA.
  84
  85 Note:
  86
  87 This class deals primarily with individual codons and amino
  88 acids. However in the interest of speed you can L<translate>
  89 longer sequence, too. The full complexity of protein translation
  90 is tackled by L<Bio::PrimarySeqI::translate>.
  91
  92
  93 The amino acid codes are IUPAC recommendations for common amino acids:
  94
  95           A           Ala            Alanine
  96           R           Arg            Arginine
  97           N           Asn            Asparagine
  98           D           Asp            Aspartic acid
  99           C           Cys            Cysteine
 100           Q           Gln            Glutamine
 101           E           Glu            Glutamic acid
 102           G           Gly            Glycine
 103           H           His            Histidine
 104           I           Ile            Isoleucine
 105           L           Leu            Leucine
 106           K           Lys            Lysine
 107           M           Met            Methionine
 108           F           Phe            Phenylalanine
 109           P           Pro            Proline
 110           O           Pyl            Pyrrolysine (22nd amino acid)
 111           U           Sec            Selenocysteine (21st amino acid)
 112           S           Ser            Serine
 113           T           Thr            Threonine
 114           W           Trp            Tryptophan
 115           Y           Tyr            Tyrosine
 116           V           Val            Valine
 117           B           Asx            Aspartic acid or Asparagine
 118           Z           Glx            Glutamine or Glutamic acid
 119           J           Xle            Isoleucine or Valine (mass spec ambiguity)
 120           X           Xaa            Any or unknown amino acid
 121
 122
 123 It is worth noting that, "Bacterial" codon table no. 11 produces an
 124 polypeptide that is, confusingly, identical to the standard one. The
 125 only differences are in available initiator codons.
 126
 127
 128 NCBI Genetic Codes home page:
 129      (Last update of the Genetic Codes: Apr. 25, 2024)
 130      https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
 131
 132 The "value notation" / "print form" ASN.1 version is at:
 133      ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
 134
 135 Thanks to Matteo diTomasso for the original Perl implementation
 136 of these tables.
 137
 138 =cut
 139
 140
 141 # set internal values for all translation tables
 142 use constant CODONSIZE => 3;
 143 our $GAP = '-';
 144 our $CODONGAP = $GAP x CODONSIZE;
 145 our %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
 146 our %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
 147 our %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
 148 our $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
 149 our $TERMINATOR = '*';
 150
 151 our (@NAMES, @TABLES, @STARTS);
 152 # Parse the ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt file which
 153 # is below __DATA__ in this module (see the end of the file).  This
 154 # fills the @NAMES, @TABLES, and @STARTS variables.  To update to a
 155 # new release of gc.prt, replace the content below __DATA__.
 156 {
 157     # Init tables has with special option (id=0) for ATG-only start
 158     my %tables = (
 159         0 => {
 160             name => "Strict",
 161             ncbieaa => "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
 162             sncbieaa => "----------**--*--------------------M----------------------------",
 163         },
 164     );
 165
 166     while (defined(my $line = <DATA>)) {
 167         next if $line =~ /^\s*--/;  # skip comment lines
 168         if ($line =~ /^\s*\{\s*$/) {  # start of a table description
 169             my $name = "";
 170             my $id = 0;
 171             my $ncbieaa = "";
 172             my $sncbieaa = "";
 173             do {
 174                 if ($line =~ /^\s*(name|id|ncbieaa|sncbieaa)\s+(.+)/) {
 175                     my $key = $1;
 176                     my $rem = $2;
 177                     if ($key eq "id") {
 178                         $rem =~ /^(\d+)/;
 179                         $id = int $1;
 180                     } else {
 181                         # The remaining keys --- name, ncbieaa, and
 182                         # sncbieaa --- are strings which may be
 183                         # multi-line (e.g., name for table with id 4).
 184                         # We are assuming that there is no " character
 185                         # inside the value so we keep appending lines
 186                         # until we find an end ".
 187                         while ($rem !~ /^"(.*)"/ && ! eof DATA) {
 188                             $rem .= <DATA>;
 189                         }
 190                         $rem =~ s/\n//g;
 191                         $rem =~ /^"(.*)"/;
 192                         my $str = $1;
 193                         if ($key eq "name" && ! $name) {
 194                             # ignore alternative names, e.g. SGC0,
 195                             # only keep the first name listed.
 196                             $name = $str;
 197                         } elsif ($key eq "ncbieaa") {
 198                             $ncbieaa = $str;
 199                         } elsif ($key eq "sncbieaa") {
 200                             $sncbieaa = $str;
 201                         }
 202                     }
 203                 }
 204             } until (($line = <DATA>) =~ /^\s*}\s*,?$/);  # we reached the end of table description
 205             $tables{$id} = {
 206                 name => $name,
 207                 ncbieaa => $ncbieaa,
 208                 sncbieaa => $sncbieaa
 209             };
 210         }
 211     }
 212     close DATA;
 213     # use Data::Dumper;
 214     # print Dumper %tables;
 215
 216     # After parsing gc.prt, fill in @NAMES, @TABLES, and @STARTS
 217     my $highest_id = (sort {$a <=> $b} keys %tables)[-1];
 218     for (my $i = 0; $i < $highest_id; $i++) {
 219         if (defined $tables{$i}) {
 220             push @NAMES, $tables{$i}->{name};
 221             push @TABLES, $tables{$i}->{ncbieaa};
 222             push @STARTS, $tables{$i}->{sncbieaa};
 223         } else {
 224             push @NAMES, '';
 225             push @TABLES, '';
 226             push @STARTS, '';
 227         }
 228     }
 229 }
 230
 231 our ($TRCOL, $CODONS);
 232 {
 233     my @nucs = qw(t c a g);
 234     my $x = 0;
 235     ($CODONS, $TRCOL) = ({}, {});
 236     for my $i (@nucs) {
 237         for my $j (@nucs) {
 238             for my $k (@nucs) {
 239                 my $codon = "$i$j$k";
 240                 $CODONS->{$codon} = $x;
 241                 $TRCOL->{$x} = $codon;
 242                 $x++;
 243             }
 244         }
 245     }
 246 }
 247
 248 sub new {
 249     my($class,@args) = @_;
 250     my $self = $class->SUPER::new(@args);
 251
 252     my($id) =
 253         $self->_rearrange([qw(ID
 254                  )],
 255              @args);
 256
 257     $id = 1 if ( ! defined ( $id ) );
 258     $self->id($id);
 259     return $self; # success - we hope!
 260 }
 261
 262 =head2 id
 263
 264  Title   : id
 265  Usage   : $obj->id(3); $id_integer = $obj->id();
 266  Function: Sets or returns the id of the translation table.  IDs are
 267            integers from 0 (special ATG-only start) to 25, excluding
 268            7-8 and 17-20 which have been removed. If an invalid ID is
 269            given the method returns 1, the standard table.
 270  Example :
 271  Returns : value of id, a scalar, warn and fall back to 1 (standard table)
 272            if specified id is not valid
 273  Args    : newvalue (optional)
 274
 275 =cut
 276
 277 sub id{
 278     my ($self,$value) = @_;
 279     if( defined $value) {
 280         if (! defined $TABLES[$value] || $TABLES[$value] eq '' || $value < 0) {
 281             $self->warn("Not a valid codon table ID [$value], using [1] instead ");
 282             $value = 1;
 283         }
 284         $self->{'id'} = $value;
 285     }
 286     return $self->{'id'};
 287 }
 288
 289 =head2 name
 290
 291  Title   : name
 292  Usage   : $obj->name()
 293  Function: returns the descriptive name of the translation table
 294  Example :
 295  Returns : A string
 296  Args    : None
 297
 298
 299 =cut
 300
 301 sub name{
 302    my ($self) = @_;
 303
 304    my ($id) = $self->{'id'};
 305    return $NAMES[$id];
 306 }
 307
 308 =head2 tables
 309
 310  Title   : tables
 311  Usage   : $obj->tables()  or  Bio::Tools::CodonTable->tables()
 312  Function: returns a hash reference where each key is a valid codon
 313            table id() number, and each value is the corresponding
 314            codon table name() string
 315  Example :
 316  Returns : A hashref
 317  Args    : None
 318
 319
 320 =cut
 321
 322 sub tables{
 323   my %tables;
 324   for my $id (0 .. $#NAMES) {
 325     my $name = $NAMES[$id];
 326     $tables{$id} = $name if $name;
 327   }
 328   return \%tables;
 329 }
 330
 331 =head2 translate
 332
 333  Title   : translate
 334  Usage   : $obj->translate('YTR')
 335  Function: Returns a string of one letter amino acid codes from
 336            nucleotide sequence input. The imput can be of any length.
 337
 338            Returns 'X' for unknown codons and codons that code for
 339            more than one amino acid. Returns an empty string if input
 340            is not three characters long. Exceptions for these are:
 341
 342              - IUPAC amino acid code B for Aspartic Acid and
 343                Asparagine, is used.
 344              - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
 345                used.
 346              - if the codon is two nucleotides long and if by adding
 347                an a third character 'N', it codes for a single amino
 348                acid (with exceptions above), return that, otherwise
 349                return empty string.
 350
 351            Returns empty string for other input strings that are not
 352            three characters long.
 353
 354  Example :
 355  Returns : a string of one letter ambiguous IUPAC amino acid codes
 356  Args    : ambiguous IUPAC nucleotide string
 357
 358
 359 =cut
 360
 361 sub translate {
 362     my ($self, $seq, $complete_codon) = @_;
 363     $self->throw("Calling translate without a seq argument!") unless defined $seq;
 364     return '' unless $seq;
 365
 366     my $id = $self->id;
 367     my ($partial) = 0;
 368     $partial = 2 if length($seq) % CODONSIZE == 2;
 369
 370     $seq = lc $seq;
 371     $seq =~ tr/u/t/;
 372     my $protein = "";
 373     if ($seq =~ /[^actg]/ ) { #ambiguous chars
 374         for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
 375             my $triplet = substr($seq, $i, CODONSIZE);
 376             if( $triplet eq $CODONGAP ) {
 377                 $protein .= $GAP;
 378             } elsif (exists $CODONS->{$triplet}) {
 379                 $protein .= substr($TABLES[$id],
 380                            $CODONS->{$triplet},1);
 381             } else {
 382                 $protein .= $self->_translate_ambiguous_codon($triplet);
 383             }
 384         }
 385     } else { # simple, strict translation
 386         for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
 387             my $triplet = substr($seq, $i, CODONSIZE);
 388             if( $triplet eq $CODONGAP ) {
 389                 $protein .= $GAP;
 390             }
 391             if (exists $CODONS->{$triplet}) {
 392                 $protein .= substr($TABLES[$id], $CODONS->{$triplet}, 1);
 393             } else {
 394                 $protein .= 'X';
 395             }
 396         }
 397     }
 398     if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides
 399         my $triplet = substr($seq, ($partial -4)). "n";
 400         if( $triplet eq $CODONGAP ) {
 401             $protein .= $GAP;
 402         } elsif (exists $CODONS->{$triplet}) {
 403             my $aa = substr($TABLES[$id], $CODONS->{$triplet},1);
 404             $protein .= $aa;
 405         } else {
 406             $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
 407         }
 408     }
 409     return $protein;
 410 }
 411
 412 sub _translate_ambiguous_codon {
 413     my ($self, $triplet, $partial) = @_;
 414     $partial ||= 0;
 415     my $id = $self->id;
 416     my $aa;
 417     my @codons = $self->unambiguous_codons($triplet);
 418     my %aas =();
 419     foreach my $codon (@codons) {
 420         $aas{substr($TABLES[$id],$CODONS->{$codon},1)} = 1;
 421     }
 422     my $count = scalar keys %aas;
 423     if ( $count == 1 ) {
 424         $aa = (keys %aas)[0];
 425     }
 426     elsif ( $count == 2 ) {
 427         if ($aas{'D'} and $aas{'N'}) {
 428             $aa = 'B';
 429         }
 430         elsif ($aas{'E'} and $aas{'Q'}) {
 431             $aa = 'Z';
 432         } else {
 433             $partial ? ($aa = '') : ($aa = 'X');
 434         }
 435     } else {
 436         $partial ? ($aa = '') :  ($aa = 'X');
 437     }
 438     return $aa;
 439 }
 440
 441 =head2 translate_strict
 442
 443  Title   : translate_strict
 444  Usage   : $obj->translate_strict('ACT')
 445  Function: returns one letter amino acid code for a codon input
 446
 447            Fast and simple translation. User is responsible to resolve
 448            ambiguous nucleotide codes before calling this
 449            method. Returns 'X' for unknown codons and an empty string
 450            for input strings that are not three characters long.
 451
 452            It is not recommended to use this method in a production
 453            environment. Use method translate, instead.
 454
 455  Example :
 456  Returns : A string
 457  Args    : a codon = a three nucleotide character string
 458
 459
 460 =cut
 461
 462 sub translate_strict{
 463    my ($self, $value) = @_;
 464    my $id = $self->{'id'};
 465
 466    $value  = lc $value;
 467    $value  =~ tr/u/t/;
 468
 469    return '' unless length $value == 3;
 470
 471    return 'X' unless defined $CODONS->{$value};
 472
 473    return substr( $TABLES[$id], $CODONS->{$value}, 1 );
 474 }
 475
 476 =head2 revtranslate
 477
 478  Title   : revtranslate
 479  Usage   : $obj->revtranslate('G')
 480  Function: returns codons for an amino acid
 481
 482            Returns an empty string for unknown amino acid
 483            codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and
 484            Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
 485            letter amino acid codes are accepted. '*' and 'Ter' are
 486            used for terminator.
 487
 488            By default, the output codons are shown in DNA.  If the
 489            output is needed in RNA (tr/t/u/), add a second argument
 490            'RNA'.
 491
 492  Example : $obj->revtranslate('Gly', 'RNA')
 493  Returns : An array of three lower case letter strings i.e. codons
 494  Args    : amino acid, 'RNA'
 495
 496 =cut
 497
 498 sub revtranslate {
 499     my ($self, $value, $coding) = @_;
 500     my @codons;
 501
 502     if (length($value) == 3 ) {
 503         $value = lc $value;
 504         $value = ucfirst $value;
 505         $value = $THREELETTERSYMBOLS{$value};
 506     }
 507     if (    defined $value and $value =~ /$VALID_PROTEIN/
 508         and length($value) == 1
 509         ) {
 510         my $id = $self->{'id'};
 511
 512         $value = uc $value;
 513         my @aas = @{$IUPAC_AA{$value}};
 514         foreach my $aa (@aas) {
 515             #print $aa, " -2\n";
 516             $aa = '\*' if $aa eq '*';
 517             while ($TABLES[$id] =~ m/$aa/g) {
 518                 my $p = pos $TABLES[$id];
 519                 push (@codons, $TRCOL->{--$p});
 520             }
 521         }
 522     }
 523
 524     if ($coding and uc ($coding) eq 'RNA') {
 525         for my $i (0..$#codons)  {
 526             $codons[$i] =~ tr/t/u/;
 527         }
 528     }
 529
 530    return @codons;
 531 }
 532
 533 =head2 reverse_translate_all
 534
 535  Title   : reverse_translate_all
 536  Usage   : my $iup_str = $cttable->reverse_translate_all($seq_object)
 537            my $iup_str = $cttable->reverse_translate_all($seq_object,
 538                                                          $cutable,
 539                                                          15);
 540  Function: reverse translates a protein sequence into IUPAC nucleotide
 541            sequence. An 'X' in the protein sequence is converted to 'NNN'
 542            in the nucleotide sequence.
 543  Returns : a string
 544  Args    : a Bio::PrimarySeqI compatible object (mandatory)
 545            a Bio::CodonUsage::Table object and a threshold if only
 546              codons with a relative frequency above the threshold are
 547              to be considered.
 548 =cut
 549
 550 sub reverse_translate_all {
 551     my ($self, $obj, $cut, $threshold) = @_;
 552
 553     ## check args are OK
 554
 555     if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
 556         $self->throw(" I need a Bio::PrimarySeqI object, not a [".
 557                         ref($obj) . "]");
 558         }
 559     if($obj->alphabet ne 'protein') {
 560         $self->throw("Cannot reverse translate, need an amino acid sequence .".
 561                      "This sequence is of type [" . $obj->alphabet ."]");
 562         }
 563     my @data;
 564     my @seq = split '', $obj->seq;
 565
 566     ## if we're not supplying a codon usage table...
 567     if( !$cut && !$threshold) {
 568         ## get lists of possible codons for each aa.
 569         for my $aa (@seq) {
 570             if ($aa =~ /x/i) {
 571                 push @data, (['NNN']);
 572             }else {
 573                 my @cods = $self->revtranslate($aa);
 574                 push @data, \@cods;
 575             }
 576         }
 577     }else{
 578     #else we are supplying a codon usage table, we just want common codons
 579     #check args first.
 580         if(!$cut->isa('Bio::CodonUsage::Table'))    {
 581             $self->throw("I need a Bio::CodonUsage::Table object, not a [".
 582                      ref($cut). "].");
 583             }
 584         my $cod_ref = $cut->probable_codons($threshold);
 585         for my $aa (@seq) {
 586             if ($aa =~ /x/i) {
 587                 push @data, (['NNN']);
 588                 next;
 589                 }
 590             push @data, $cod_ref->{$aa};
 591         }
 592     }
 593
 594     return $self->_make_iupac_string(\@data);
 595 }
 596
 597 =head2 reverse_translate_best
 598
 599  Title   : reverse_translate_best
 600  Usage   : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
 601  Function: Reverse translates a protein sequence into plain nucleotide
 602            sequence (GATC), uses the most common codon for each amino acid
 603  Returns : A string
 604  Args    : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
 605
 606 =cut
 607
 608 sub reverse_translate_best {
 609
 610     my ($self, $obj, $cut) = @_;
 611
 612     if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
 613         $self->throw(" I need a Bio::PrimarySeqI object, not a [".
 614                          ref($obj) . "]");
 615     }
 616     if ($obj->alphabet ne 'protein')    {
 617         $self->throw("Cannot reverse translate, need an amino acid sequence .".
 618                          "This sequence is of type [" . $obj->alphabet ."]");
 619     }
 620     if ( !$cut | !$cut->isa('Bio::CodonUsage::Table'))  {
 621         $self->throw("I need a Bio::CodonUsage::Table object, not a [".
 622                          ref($cut). "].");
 623     }
 624
 625     my $str = '';
 626     my @seq = split '', $obj->seq;
 627
 628     my $cod_ref = $cut->most_common_codons();
 629
 630     for my $aa ( @seq ) {
 631         if ($aa =~ /x/i) {
 632             $str .= 'NNN';
 633             next;
 634         }
 635         if ( defined $cod_ref->{$aa} ) {
 636             $str .= $cod_ref->{$aa};
 637         } else {
 638             $self->throw("Input sequence contains invalid character: $aa");
 639         }
 640     }
 641    return $str;
 642 }
 643
 644 =head2 is_start_codon
 645
 646  Title   : is_start_codon
 647  Usage   : $obj->is_start_codon('ATG')
 648  Function: returns true (1) for all codons that can be used as a
 649            translation start, false (0) for others.  In the case of
 650            ambiguous codons, e.g., 'NTG', only returns true if all
 651            possible codons are true.
 652  Example : $myCodonTable->is_start_codon('ATG')
 653  Returns : boolean
 654  Args    : codon
 655
 656 =cut
 657
 658 sub is_start_codon{
 659    return shift->_codon_is(shift, \@STARTS, 'M');
 660 }
 661
 662 =head2 is_ter_codon
 663
 664  Title   : is_ter_codon
 665  Usage   : $obj->is_ter_codon('GAA')
 666  Function: returns true (1) for all codons that can be used as a
 667            translation terminator, false (0) for others. In the case
 668            of ambiguous codons, e.g., 'TAN', only returns true if all
 669            possible codons are true.
 670  Example : $myCodonTable->is_ter_codon('ATG')
 671  Returns : boolean
 672  Args    : codon
 673
 674 =cut
 675
 676 sub is_ter_codon{
 677    return shift->_codon_is(shift, \@STARTS, $TERMINATOR);
 678 }
 679
 680 # desc: compares the passed value with a single entry in the given
 681 #       codon table
 682 # args: a value (typically a three-char string like 'atg'), a
 683 #       reference to the appropriate set of codon tables, a
 684 #       single-character value to check for at the position in the
 685 #       given codon table.
 686 # ret:  boolean, true if the given codon table contains the $key at the
 687 #       position corresponding to $value.  In the case of ambiguous
 688 #       codons, only returns true if all possibilities match $key.
 689 sub _codon_is {
 690    my ($self, $value, $table, $key ) = @_;
 691
 692    return 0 unless length $value == 3;
 693
 694    $value  = lc $value;
 695    $value  =~ tr/u/t/;
 696
 697    my $id = $self->{'id'};
 698    my $result = 0;
 699    for my $c ( $self->unambiguous_codons($value) ) {
 700        my $m = substr( $table->[$id], $CODONS->{$c}, 1 );
 701        if ($m eq $key) {
 702            $result = 1;
 703        } else {
 704            return 0;
 705        }
 706    }
 707    return $result;
 708 }
 709
 710 =head2 is_unknown_codon
 711
 712  Title   : is_unknown_codon
 713  Usage   : $obj->is_unknown_codon('GAJ')
 714  Function: returns false (0) for all codons that are valid,
 715         true (1) for others.
 716  Example : $myCodonTable->is_unknown_codon('NTG')
 717  Returns : boolean
 718  Args    : codon
 719
 720
 721 =cut
 722
 723 sub is_unknown_codon{
 724    my ($self, $value) = @_;
 725    $value  = lc $value;
 726    $value  =~ tr/u/t/;
 727    return 1 unless $self->unambiguous_codons($value);
 728    return 0;
 729 }
 730
 731 =head2 unambiguous_codons
 732
 733  Title   : unambiguous_codons
 734  Usage   : @codons = $self->unambiguous_codons('ACN')
 735  Returns : array of strings (one-letter unambiguous amino acid codes)
 736  Args    : a codon = a three IUPAC nucleotide character string
 737
 738 =cut
 739
 740 sub unambiguous_codons{
 741     my ($self,$value) = @_;
 742     my @nts = map { $IUPAC_DNA{uc $_} }  split(//, $value);
 743
 744     my @codons;
 745     for my $i ( @{$nts[0]} ) {
 746     for my $j ( @{$nts[1]} ) {
 747     for my $k ( @{$nts[2]} ) {
 748         push @codons, lc "$i$j$k";
 749     }}}
 750     return @codons;
 751 }
 752
 753 =head2 _unambiquous_codons
 754
 755 deprecated, now an alias for unambiguous_codons
 756
 757 =cut
 758
 759 sub _unambiquous_codons {
 760     unambiguous_codons( undef, @_ );
 761 }
 762
 763 =head2 add_table
 764
 765  Title   : add_table
 766  Usage   : $newid = $ct->add_table($name, $table, $starts)
 767  Function: Add a custom Codon Table into the object.
 768            Know what you are doing, only the length of
 769            the argument strings is checked!
 770  Returns : the id of the new codon table
 771  Args    : name, a string, optional (can be empty)
 772            table, a string of 64 characters
 773            startcodons, a string of 64 characters, defaults to standard
 774
 775 =cut
 776
 777 sub add_table {
 778     my ($self, $name, $table, $starts) = @_;
 779
 780     $name   ||= 'Custom' . $#NAMES + 1;
 781     $starts ||= $STARTS[1];
 782     $self->throw('Suspect input!')
 783         unless length($table) == 64 and length($starts) == 64;
 784
 785     push @NAMES,  $name;
 786     push @TABLES, $table;
 787     push @STARTS, $starts;
 788
 789     return $#NAMES;
 790 }
 791
 792 sub _make_iupac_string {
 793     my ($self, $cod_ref) = @_;
 794     if(ref($cod_ref) ne 'ARRAY') {
 795         $self->throw(" I need a reference to a list of references to codons, ".
 796                      " not a [". ref($cod_ref) . "].");
 797         }
 798     my %iupac_hash   = Bio::Tools::IUPAC->iupac_rev_iub();
 799     my $iupac_string = ''; ## the string to be returned
 800     for my $aa (@$cod_ref) {
 801
 802         ## scan through codon positions, record the differing values,
 803         # then look up in the iub hash
 804         for my $index(0..2) {
 805             my %h;
 806             map { my $k = substr($_,$index,1);
 807                 $h{$k}  = undef;} @$aa;
 808             my $lookup_key = join '', sort{$a cmp $b}keys %h;
 809
 810             ## extend string
 811             $iupac_string .= $iupac_hash{uc$lookup_key};
 812         }
 813     }
 814     return $iupac_string;
 815 }
 816
 817
 818 1;
 819
 820 # Follows the content of
 821 # ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt, which is the NCBI
 822 # genetic codon table in ASN.1 value notation / print format.  We do
 823 # not have a ASN.1 decoder for value notation but it's easy enough to
 824 # parse.
 825
 826 __DATA__
 827 --**************************************************************************
 828 --  This is the NCBI genetic code table
 829 --  Initial base data set from Andrzej Elzanowski while at PIR International
 830 --  Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI
 831 --  Base 1-3 of each codon have been added as comments to facilitate
 832 --    readability at the suggestion of Peter Rice, EMBL
 833 --  Later additions by Taxonomy Group staff at NCBI
 834 --
 835 --  Version 4.6
 836 --     Renamed genetic code 24 to Rhabdopleuridae Mitochondrial
 837 --
 838 --  Version 4.5
 839 --     Added Cephalodiscidae mitochondrial genetic code 33
 840 --
 841 --  Version 4.4
 842 --     Added GTG as start codon for genetic code 3
 843 --     Added Balanophoraceae plastid genetic code 32
 844 --
 845 --  Version 4.3
 846 --     Change to CTG -> Leu in genetic codes 27, 28, 29, 30
 847 --
 848 --  Version 4.2
 849 --     Added Karyorelict nuclear genetic code 27
 850 --     Added Condylostoma nuclear genetic code 28
 851 --     Added Mesodinium nuclear genetic code 29
 852 --     Added Peritrich nuclear genetic code 30
 853 --     Added Blastocrithidia nuclear genetic code 31
 854 --
 855 --  Version 4.1
 856 --     Added Pachysolen tannophilus nuclear genetic code 26
 857 --
 858 --  Version 4.0
 859 --     Updated version to reflect numerous undocumented changes:
 860 --     Corrected start codons for genetic code 25
 861 --     Name of new genetic code is Candidate Division SR1 and Gracilibacteria
 862 --     Added candidate division SR1 nuclear genetic code 25
 863 --     Added GTG as start codon for genetic code 24
 864 --     Corrected Pterobranchia Mitochondrial genetic code (24)
 865 --     Added genetic code 24, Pterobranchia Mitochondrial
 866 --     Genetic code 11 is now Bacterial, Archaeal and Plant Plastid
 867 --     Fixed capitalization of mitochondrial in codes 22 and 23
 868 --     Added GTG, ATA, and TTG as alternative start codons to code 13
 869 --
 870 --  Version 3.9
 871 --     Code 14 differs from code 9 only by translating UAA to Tyr rather than
 872 --     STOP.  A recent study (Telford et al, 2000) has found no evidence that
 873 --     the codon UAA codes for Tyr in the flatworms, but other opinions exist.
 874 --     There are very few GenBank records that are translated with code 14,
 875 --     but a test translation shows that retranslating these records with code
 876 --     9 can cause premature terminations.  Therefore, GenBank will maintain
 877 --     code 14 until further information becomes available.
 878 --
 879 --  Version 3.8
 880 --     Added GTG start to Echinoderm mitochondrial code, code 9
 881 --
 882 --  Version 3.7
 883 --     Added code 23 Thraustochytrium mitochondrial code
 884 --        formerly OGMP code 93
 885 --        submitted by Gertraude Berger, Ph.D.
 886 --
 887 --  Version 3.6
 888 --     Added code 22 TAG-Leu, TCA-stop
 889 --        found in mitochondrial DNA of Scenedesmus obliquus
 890 --        submitted by Gertraude Berger, Ph.D.
 891 --        Organelle Genome Megasequencing Program, Univ Montreal
 892 --
 893 --  Version 3.5
 894 --     Added code 21, Trematode Mitochondrial
 895 --       (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)
 896 --     Added code 16, Chlorophycean Mitochondrial
 897 --       (TAG can translated to Leucine instaed to STOP in chlorophyceans
 898 --        and fungi)
 899 --
 900 --  Version 3.4
 901 --     Added CTG,TTG as allowed alternate start codons in Standard code.
 902 --        Prats et al. 1989, Hann et al. 1992
 903 --
 904 --  Version 3.3 - 10/13/95
 905 --     Added alternate intiation codon ATC to code 5
 906 --        based on complete mitochondrial genome of honeybee
 907 --        Crozier and Crozier (1993)
 908 --
 909 --  Version 3.2 - 6/24/95
 910 --  Code       Comments
 911 --   10        Alternative Ciliate Macronuclear renamed to Euplotid Macro...
 912 --   15        Blepharisma Macro.. code added
 913 --    5        Invertebrate Mito.. GTG allowed as alternate initiator
 914 --   11        Eubacterial renamed to Bacterial as most alternate starts
 915 --               have been found in Archea
 916 --
 917 --
 918 --  Version 3.1 - 1995
 919 --  Updated as per Andrzej Elzanowski at NCBI
 920 --     Complete documentation in NCBI toolkit documentation
 921 --  Note: 2 genetic codes have been deleted
 922 --
 923 --   Old id   Use id     - Notes
 924 --
 925 --   id 7      id 4      - Kinetoplast code now merged in code id 4
 926 --   id 8      id 1      - all plant chloroplast differences due to RNA edit
 927 --
 928 --
 929 --*************************************************************************
 930
 931 Genetic-code-table ::= {
 932  {
 933   name "Standard" ,
 934   name "SGC0" ,
 935   id 1 ,
 936   ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
 937   sncbieaa "---M------**--*----M---------------M----------------------------"
 938   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
 939   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
 940   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 941  },
 942  {
 943   name "Vertebrate Mitochondrial" ,
 944   name "SGC1" ,
 945   id 2 ,
 946   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
 947   sncbieaa "----------**--------------------MMMM----------**---M------------"
 948   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
 949   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
 950   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 951  },
 952  {
 953   name "Yeast Mitochondrial" ,
 954   name "SGC2" ,
 955   id 3 ,
 956   ncbieaa  "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
 957   sncbieaa "----------**----------------------MM---------------M------------"
 958   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
 959   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
 960   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 961  },
 962  {
 963     name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate
 964  Mitochondrial; Mycoplasma; Spiroplasma" ,
 965   name "SGC3" ,
 966   id 4 ,
 967   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
 968   sncbieaa "--MM------**-------M------------MMMM---------------M------------"
 969   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
 970   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
 971   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 972  },
 973  {
 974   name "Invertebrate Mitochondrial" ,
 975   name "SGC4" ,
 976   id 5 ,
 977   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
 978   sncbieaa "---M------**--------------------MMMM---------------M------------"
 979   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
 980   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
 981   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 982  },
 983  {
 984   name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
 985   name "SGC5" ,
 986   id 6 ,
 987   ncbieaa  "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
 988   sncbieaa "--------------*--------------------M----------------------------"
 989   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
 990   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
 991   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 992  },
 993  {
 994   name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
 995   name "SGC8" ,
 996   id 9 ,
 997   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
 998   sncbieaa "----------**-----------------------M---------------M------------"
 999   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1000   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1001   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1002  },
1003  {
1004   name "Euplotid Nuclear" ,
1005   name "SGC9" ,
1006   id 10 ,
1007   ncbieaa  "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1008   sncbieaa "----------**-----------------------M----------------------------"
1009   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1010   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1011   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1012  },
1013  {
1014   name "Bacterial, Archaeal and Plant Plastid" ,
1015   id 11 ,
1016   ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1017   sncbieaa "---M------**--*----M------------MMMM---------------M------------"
1018   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1019   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1020   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1021  },
1022  {
1023   name "Alternative Yeast Nuclear" ,
1024   id 12 ,
1025   ncbieaa  "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1026   sncbieaa "----------**--*----M---------------M----------------------------"
1027   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1028   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1029   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1030  },
1031  {
1032   name "Ascidian Mitochondrial" ,
1033   id 13 ,
1034   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
1035   sncbieaa "---M------**----------------------MM---------------M------------"
1036   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1037   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1038   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1039  },
1040  {
1041   name "Alternative Flatworm Mitochondrial" ,
1042   id 14 ,
1043   ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
1044   sncbieaa "-----------*-----------------------M----------------------------"
1045   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1046   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1047   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1048  } ,
1049  {
1050   name "Blepharisma Macronuclear" ,
1051   id 15 ,
1052   ncbieaa  "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1053   sncbieaa "----------*---*--------------------M----------------------------"
1054   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1055   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1056   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1057  } ,
1058  {
1059   name "Chlorophycean Mitochondrial" ,
1060   id 16 ,
1061   ncbieaa  "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1062   sncbieaa "----------*---*--------------------M----------------------------"
1063   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1064   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1065   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1066  } ,
1067  {
1068   name "Trematode Mitochondrial" ,
1069   id 21 ,
1070   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
1071   sncbieaa "----------**-----------------------M---------------M------------"
1072   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1073   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1074   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1075  } ,
1076  {
1077   name "Scenedesmus obliquus Mitochondrial" ,
1078   id 22 ,
1079   ncbieaa  "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1080   sncbieaa "------*---*---*--------------------M----------------------------"
1081   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1082   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1083   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1084  } ,
1085  {
1086   name "Thraustochytrium Mitochondrial" ,
1087   id 23 ,
1088   ncbieaa  "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1089   sncbieaa "--*-------**--*-----------------M--M---------------M------------"
1090   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1091   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1092   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1093  } ,
1094  {
1095   name "Rhabdopleuridae Mitochondrial" ,
1096   id 24 ,
1097   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
1098   sncbieaa "---M------**-------M---------------M---------------M------------"
1099   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1100   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1101   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1102  } ,
1103  {
1104   name "Candidate Division SR1 and Gracilibacteria" ,
1105   id 25 ,
1106   ncbieaa  "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1107   sncbieaa "---M------**-----------------------M---------------M------------"
1108   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1109   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1110   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1111  } ,
1112  {
1113   name "Pachysolen tannophilus Nuclear" ,
1114   id 26 ,
1115   ncbieaa  "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1116   sncbieaa "----------**--*----M---------------M----------------------------"
1117   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1118   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1119   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1120  } ,
1121  {
1122   name "Karyorelict Nuclear" ,
1123   id 27 ,
1124   ncbieaa  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1125   sncbieaa "--------------*--------------------M----------------------------"
1126   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1127   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1128   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1129  } ,
1130  {
1131   name "Condylostoma Nuclear" ,
1132   id 28 ,
1133   ncbieaa  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1134   sncbieaa "----------**--*--------------------M----------------------------"
1135   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1136   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1137   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1138  } ,
1139  {
1140   name "Mesodinium Nuclear" ,
1141   id 29 ,
1142   ncbieaa  "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1143   sncbieaa "--------------*--------------------M----------------------------"
1144   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1145   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1146   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1147  } ,
1148  {
1149   name "Peritrich Nuclear" ,
1150   id 30 ,
1151   ncbieaa  "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1152   sncbieaa "--------------*--------------------M----------------------------"
1153   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1154   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1155   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1156  } ,
1157  {
1158   name "Blastocrithidia Nuclear" ,
1159   id 31 ,
1160   ncbieaa  "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1161   sncbieaa "----------**-----------------------M----------------------------"
1162   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1163   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1164   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1165  } ,
1166  {
1167   name "Balanophoraceae Plastid" ,
1168   id 32 ,
1169   ncbieaa  "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1170   sncbieaa "---M------*---*----M------------MMMM---------------M------------"
1171   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1172   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1173   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1174  } ,
1175  {
1176   name "Cephalodiscidae Mitochondrial" ,
1177   id 33 ,
1178   ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
1179   sncbieaa "---M-------*-------M---------------M---------------M------------"
1180   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1181   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1182   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1183  }
1184 }