From b533f0eb93adb23ed16da217f5d6a3e8093725fa Mon Sep 17 00:00:00 2001
From: =?utf8?q?Carn=C3=AB=20Draug?= <carandraug+dev@gmail.com>
Date: Thu, 13 Sep 2018 12:26:17 +0100
Subject: [PATCH] Bio::DB::TFBS namespace has been moved to its own
 distribution named after itself

---
 Bio/DB/TFBS.pm                    |  166 ----
 Bio/DB/TFBS/transfac_pro.pm       | 1970 -------------------------------------
 Changes                           |    3 +
 t/LocalDB/transfac_pro.t          |  262 -----
 t/data/transfac_pro/factor.dat    |  342 -------
 t/data/transfac_pro/fragment.dat  |  111 ---
 t/data/transfac_pro/gene.dat      |  349 -------
 t/data/transfac_pro/matrix.dat    |  243 -----
 t/data/transfac_pro/readme.txt    |    5 -
 t/data/transfac_pro/reference.dat |   92 --
 t/data/transfac_pro/site.dat      |  806 ---------------
 11 files changed, 3 insertions(+), 4346 deletions(-)
 delete mode 100644 Bio/DB/TFBS.pm
 delete mode 100644 Bio/DB/TFBS/transfac_pro.pm
 delete mode 100644 t/LocalDB/transfac_pro.t
 delete mode 100644 t/data/transfac_pro/factor.dat
 delete mode 100644 t/data/transfac_pro/fragment.dat
 delete mode 100644 t/data/transfac_pro/gene.dat
 delete mode 100644 t/data/transfac_pro/matrix.dat
 delete mode 100644 t/data/transfac_pro/readme.txt
 delete mode 100644 t/data/transfac_pro/reference.dat
 delete mode 100644 t/data/transfac_pro/site.dat

diff --git a/Bio/DB/TFBS.pm b/Bio/DB/TFBS.pm
deleted file mode 100644
index 2309c5158..000000000
--- a/Bio/DB/TFBS.pm
+++ /dev/null
@@ -1,166 +0,0 @@
-# $Id: TFBS.pm,v 1.11 2006/08/12 11:00:03 sendu Exp $
-#
-# BioPerl module for Bio::DB::TFBS
-#
-# Please direct questions and support issues to <bioperl-l@bioperl.org> 
-#
-# Cared for by Sendu Bala <bix@sendu.me.uk>
-#
-# Copyright Sendu Bala
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::DB::TFBS - Access to a Transcription Factor Binding Site database
-
-=head1 SYNOPSIS
-
-  use Bio::DB::TFBS;
-
-  my $db = Bio::DB::TFBS->new(-source => 'transfac');
-  my ($factor_id) = $db->get_factor_ids('PPAR-gamma1');
-  my ($matrix_id) = $db->get_matrix_ids('PPAR-gamma1');
-
-  # get a Bio::Map::TranscriptionFactor with all the positions of a given factor
-  my $factor = $db->get_factor(-factor_id => $factor_id);
-
-  # get a Bio::Map::GeneMap containing all the factors that bind near a given gene
-  my $gene_map = $db->get_gene_map(-gene_name => 'AQP 7');
-
-  # get a PSM (Bio::Matrix::PSM) of a given matrix
-  my $psm = $db->get_matrix(-matrix_id => $matrix_id);
-
-  # get the aligned sequences (Bio::SimpleAlign) that were used to build a given
-  # matrix
-  my $align = $db->get_alignment(-matrix_id => $matrix_id);
-
-  # get a specific instance sequence (Bio::LocatableSeq)
-  my $seq = $db->get_seq($id);
-
-=head1 DESCRIPTION
-
-This is a front end module for access to a Transcription Factor Binding Site
-database.
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to
-the Bioperl mailing list.  Your participation is much appreciated.
-
-  bioperl-l@bioperl.org                  - General discussion
-  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
-
-=head2 Support 
-
-Please direct usage questions or support issues to the mailing list:
-
-I<bioperl-l@bioperl.org>
-
-rather than to the module maintainer directly. Many experienced and 
-reponsive experts will be able look at the problem and quickly 
-address it. Please include a thorough description of the problem 
-with code and data examples if at all possible.
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-of the bugs and their resolution. Bug reports can be submitted via
-the web:
-
-  https://github.com/bioperl/bioperl-live/issues
-
-=head1 AUTHOR - Sendu Bala
-
-Email bix@sendu.me.uk
-
-=head1 CONTRIBUTORS
-
-Based on Bio::DB::Taxonomy by Jason Stajich
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object methods.
-Internal methods are usually preceded with a _
-
-=cut
-
-# Let the code begin...
-
-package Bio::DB::TFBS;
-use strict;
-
-use Bio::Root::Root;
-
-use base qw(Bio::Root::Root);
-
-our $DefaultSource = 'transfac';
-
-=head2 new
-
- Title   : new
- Usage   : my $obj = Bio::DB::TFBS->new(-source => 'transfac');
- Function: Builds a new Bio::DB::TFBS object.
- Returns : an instance of Bio::DB::TFBS
- Args    : -source => which database source: currently only 'transfac_pro'
-
-=cut
-
-sub new {
-    my ($class, @args) = @_;
-  
-    if ($class =~ /Bio::DB::TFBS::(\S+)/) {
-        my ($self) = $class->SUPER::new(@args);
-        $self->_initialize(@args);
-        return $self;
-    }
-    else { 
-        my %param = @args;
-        @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
-        my $source = $param{'-source'} || $DefaultSource;
-        
-        $source = "\L$source";	# normalize capitalization to lower case
-        
-        # normalize capitalization
-        return unless( $class->_load_tax_module($source) );
-        return "Bio::DB::TFBS::$source"->new(@args);
-    }
-}
-
-# empty for now
-sub _initialize { }
-
-=head2 _load_tax_module
-
- Title   : _load_tax_module
- Usage   : *INTERNAL Bio::DB::TFBS stuff*
- Function: Loads up (like use) a module at run time on demand
-
-=cut
-
-sub _load_tax_module {
-    my ($self, $source) = @_;
-    my $module = "Bio::DB::TFBS::" . $source;
-    my $ok;
-
-    eval { $ok = $self->_load_module($module) };
-    if ( $@ ) {
-	print STDERR $@;
-	print STDERR <<END;
-$self: $source cannot be found
-Exception $@
-For more information about the Bio::DB::TFBS system please see
-the Bio::DB::TFBS docs.  This includes ways of checking for 
-formats at compile time, not run time.
-END
-  ;
-    }
-    return $ok;
-}
-
-1;
diff --git a/Bio/DB/TFBS/transfac_pro.pm b/Bio/DB/TFBS/transfac_pro.pm
deleted file mode 100644
index 3e3658365..000000000
--- a/Bio/DB/TFBS/transfac_pro.pm
+++ /dev/null
@@ -1,1970 +0,0 @@
-# $Id: transfac_pro.pm,v 1.15 2006/08/12 11:00:03 sendu Exp $
-#
-# BioPerl module for Bio::DB::TFBS::transfac_pro
-#
-# Please direct questions and support issues to <bioperl-l@bioperl.org> 
-#
-# Cared for by Sendu Bala <bix@sendu.me.uk>
-#
-# Copyright Sendu Bala
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::DB::TFBS::transfac_pro - An implementation of Bio::DB::TFBS
-which uses local flat files for transfac pro
-
-=head1 SYNOPSIS
-
-  use Bio::DB::Taxonomy;
-
-  my $db = new Bio::DB::Taxonomy(-source => 'transfac_pro'
-                                 -dat_dir => $directory);
-
-  # we're interested in the gene P5
-  my ($gene_id) = $db->get_gene_ids(-name => 'P5'); # G000001
-
-  # we want all the transcription factors that bind to our gene
-  my @factor_ids = $db->get_factor_ids(-gene => $gene_id);
-
-  # get info about those TFs
-  foreach my $factor_id (@factor_ids) {
-    my $factor = $db->get_factor($factor_id);
-    my $name = $factor->universal_name;
-    # etc. - see Bio::Map::TranscriptionFactor, eg. find out where it binds
-  }
-
-  # get a matrix
-  my $matrix = $db->get_matrix('M00001');
-
-  # get a binding site sequence
-  my $seq = $db->get_site('R00001');
-
-=head1 DESCRIPTION
-
-This is an implementation which uses local flat files and the DB_File
-module RECNO data structures to manage a local copy of the Transfac Pro TFBS
-database.
-
-Required database files require a license which can be obtained via
-http://www.biobase-international.com/pages/index.php?id=170
-
-Within the linux installation tarball you will find a cgibin tar ball, and
-inside that is a data directory containing the .dat files needed by this
-module. Point to that data directory with -dat_dir
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to
-the Bioperl mailing list.  Your participation is much appreciated.
-
-  bioperl-l@bioperl.org                  - General discussion
-  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
-
-=head2 Support 
-
-Please direct usage questions or support issues to the mailing list:
-
-I<bioperl-l@bioperl.org>
-
-rather than to the module maintainer directly. Many experienced and 
-reponsive experts will be able look at the problem and quickly 
-address it. Please include a thorough description of the problem 
-with code and data examples if at all possible.
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-of the bugs and their resolution. Bug reports can be submitted via
-the web:
-
-  https://github.com/bioperl/bioperl-live/issues
-
-=head1 AUTHOR - Sendu Bala
-
-Email bix@sendu.me.uk
-
-=head1 CONTRIBUTORS
-
-Based on Bio::DB::Taxonomy::flatfile by Jason Stajich
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object methods.
-Internal methods are usually preceded with a _
-
-=cut
-
-# Let the code begin...
-
-package Bio::DB::TFBS::transfac_pro;
-use strict;
-use Bio::Annotation::Reference;
-use Bio::Annotation::SimpleValue;
-use Bio::LocatableSeq;
-use Bio::SimpleAlign;
-use Bio::Matrix::PSM::SiteMatrix;
-use Bio::AlignIO;
-use Bio::Map::GeneMap;
-use Bio::Map::TranscriptionFactor;
-use Bio::Map::Position;
-use Bio::Map::Relative;
-use DB_File;
-
-use constant SEPARATOR => ':!:';
-use constant INTERNAL_SEPARATOR => '!:!';
-
-$DB_BTREE->{'flags'} = R_DUP; # allow duplicate values in DB_File BTREEs
-
-use base qw(Bio::DB::TFBS);
-
-=head2 new
-
- Title   : new
- Usage   : my $obj = new Bio::DB::TFBS::transfac_pro();
- Function: Builds a new Bio::DB::TFBS::transfac_pro object 
- Returns : an instance of Bio::DB::TTFBS::transfac_pro
- Args    : -dat_dir   => name of directory where Transfac Pro .dat files
-                         (required to initially build indexes)
-           -tax_db    => Bio::DB::Taxonomy object, used when initially building
-                         indexes, gives better results for species information
-                         but not required.
-           -index_dir => name of directory where index files should be created
-                         or already exist. (defaults to -dat_dir, required if
-                         -dat_dir not supplied)
-           -force     => 1 replace current indexes even if they exist
-
-=cut
-
-sub new {
-    my ($class, @args) = @_;
-    
-    my $self = $class->SUPER::new(@args);
-    
-    my ($dat_dir, $index_dir, $tax_db, $force) = $self->_rearrange([qw(DAT_DIR INDEX_DIR TAX_DB FORCE)], @args);
-    $self->throw("At least one of -dat_dir and -index_dir must be supplied") unless ($dat_dir || $index_dir);
-    
-    $self->index_directory($index_dir || $dat_dir);
-    $self->{_tax_db} = $tax_db if $tax_db;
-    
-    if ($dat_dir) {
-        $self->_build_index($dat_dir, $force);
-    }
-    
-    $self->_db_connect;
-    return $self;
-}
-
-=head2 Bio::DB::TFBS Interface implementation
-
-=cut
-
-sub _get_ids {
-    my ($self, $dat, @args) = @_;
-    @args % 2 == 0 || $self->throw("Must provide key => value pairs");
-    my $hash = $self->{$dat} || $self->throw("Unknown .dat type '$dat'");
-    
-    if (@args) {
-        # get a subset corresponding to args
-        my @final;
-        my %args = @args;
-        my $multiple = 0;
-        while (my ($type, $value) = each %args) {
-            unless ($value) {
-                $self->warn("Arguement '$type' has no value, ignored");
-                next;
-            }
-            $type =~ s/-//;
-            $type = lc($type);
-            my $converter = $hash->{$type};
-            unless ($converter) {
-                $self->warn("Unknown search type '$type' for .dat type '$dat'");
-                next;
-            }
-            
-            my @ids = $converter->get_dup($value);
-            unless (@ids) {
-                @ids = $converter->get_dup(lc($value));
-            }
-            
-            if ($multiple) {
-                # we can have multiple types given at once, find the ids that
-                # satisfy all criteria
-                @final || return;
-                my %final = map { $_ => 1 } @final;
-                @final = grep { $final{$_} } @ids;
-            }
-            else {
-                @final = @ids;
-                $multiple++;
-            }
-        }
-        
-        return @final;
-    }
-    else {
-        # get them all
-        my $db_file_hash = $self->{$dat}->{id};
-        
-        my ($key, $prev_key, $value) = ('_!_', '!_!');
-        my @ids;
-        while (1) {
-            $db_file_hash->seq($key, $value, R_NEXT);
-            last if $prev_key eq $key;
-            push(@ids, $value); # confusing? when creating objects we store
-                                # $value as accession and $key as id, but from
-                                # this method we return $value as id given $id!
-            $prev_key = $key;
-        }
-        
-        return @ids;
-    }
-}
-
-=head2 get_reference
-
- Title   : get_reference
- Usage   : my $ref = $obj->get_reference($id);
- Function: Get a literature reference.
- Returns : Bio::Annotation::Reference
- Args    : string - a reference id ('RE...')
-
-=cut
-
-sub get_reference {
-    my ($self, $id) = @_;
-    $id || return;
-    my $data = $self->{reference}->{data}->{$id} || return;
-    my @data = split(SEPARATOR, $data);
-    
-    return Bio::Annotation::Reference->new(-pubmed   => $data[0],
-                                           -authors  => $data[1],
-                                           -title    => $data[2],
-                                           -location => $data[3] );
-}
-
-=head2 get_genemap
-
- Title   : get_genemap
- Usage   : my $map = $obj->get_genemap($id);
- Function: Get a GeneMap for a gene.
- Returns : Bio::Map::GeneMap
- Args    : string - a gene id ('G...'), and optionally int (number of bp
-           upstream)
-
-=cut
-
-sub get_genemap {
-    my ($self, $id, $upstream) = @_;
-    $id || return;
-    return $self->{got_map}->{$id} if defined $self->{got_map}->{$id};
-    $upstream ||= 1000;
-    my $data = $self->{gene}->{data}->{$id} || return;
-    my @data = split(SEPARATOR, $data);
-    
-    # accession = id name description species_tax_id_or_raw_string
-    my $taxon = $self->{_tax_db} ? $self->{_tax_db}->get_taxon($data[3]) || $data[3] : $data[3];
-    my $map = Bio::Map::GeneMap->get(-uid => $id,
-                                     -gene => $data[1],
-                                     -species => $taxon,
-                                     -description => $data[2],
-                                     -upstream => $upstream);
-    $self->{got_map}->{$id} = $map; # prevents infinite recurse when we call get_factor below
-    
-    # spawn all the factors that belong on this gene map
-    # get_factor_ids(-gene => ...) only works for genes that encode factors;
-    # have to go via sites
-    foreach my $sid ($self->get_site_ids(-gene => $id)) {
-        foreach my $fid ($self->get_factor_ids(-site => $sid)) {
-            # it is quite deliberate that we deeply recurse to arrive at the
-            # correct answer, which involves pulling in most of the database
-            no warnings "recursion";
-            $self->get_factor($fid);
-        }
-    }
-    
-    return $map;
-}
-
-=head2 get_seq
-
- Title   : get_seq
- Usage   : my $seq = $obj->get_seq($id);
- Function: Get the sequence of a site. The sequence will be annotated with the
-           the tags 'relative_start', 'relative_end', 'relative_type' and
-           'relative_to'.
- Returns : Bio::Seq
- Args    : string - a site id ('R...')
-
-=cut
-
-sub get_seq {
-    my ($self, $id) = @_;
-    $id || return;
-    my $data = $self->{site}->{data}->{$id} || return;
-    my @data = split(SEPARATOR, $data);
-    
-    my $seq = Bio::Seq->new(-seq              => $data[2],
-                            -accession_number => $id,
-                            -description      => $data[6] ? 'Genomic sequence' : 'Consensus or artificial sequence',
-                            -id               => $data[0],
-                            -strand           => 1,
-                            -alphabet         => $data[7] || 'dna',
-                            -species          => $data[6]);
-    
-    my $annot = $seq->annotation;
-    my $sv = Bio::Annotation::SimpleValue->new(-tagname => 'relative_start', -value => $data[4] || 1);
-    $annot->add_Annotation($sv);
-    $sv = Bio::Annotation::SimpleValue->new(-tagname => 'relative_end', -value => $data[5] || ($data[4] || 1 + length($data[2]) - 1));
-    $annot->add_Annotation($sv);
-    $sv = Bio::Annotation::SimpleValue->new(-tagname => 'relative_type', -value => $data[3] || 'artificial');
-    $annot->add_Annotation($sv);
-    $sv = Bio::Annotation::SimpleValue->new(-tagname => 'relative_to', -value => $data[1]);
-    $annot->add_Annotation($sv);
-    
-    return $seq;
-}
-
-=head2 get_fragment
-
- Title   : get_fragment
- Usage   : my $seq = $obj->get_fragment($id);
- Function: Get the sequence of a fragment.
- Returns : Bio::Seq
- Args    : string - a site id ('FR...')
-
-=cut
-
-sub get_fragment {
-    my ($self, $id) = @_;
-    $id || return;
-    my $data = $self->{fragment}->{data}->{$id} || return;
-    my @data = split(SEPARATOR, $data);
-    
-    # accession = id gene_id1 gene_id2 species_tax_id_or_raw_string sequence source
-    return new Bio::Seq( -seq              => $data[4],
-                         -accession_number => $id,
-                         -description      => 'Between genes '.$data[1].' and '.$data[2],
-                         -species          => $data[3],
-                         -id               => $data[0],
-                         -alphabet         => 'dna' );
-}
-
-=head2 get_matrix
-
- Title   : get_matrix
- Usage   : my $matrix = $obj->get_matrix($id);
- Function: Get a matrix that describes a binding site.
- Returns : Bio::Matrix::PSM::SiteMatrix
- Args    : string - a matrix id ('M...'), optionally a sequence string from
-           which base frequencies will be calculated for the matrix model
-           (default 0.25 each)
-
-=cut
-
-sub get_matrix {
-    my ($self, $id, $seq) = @_;
-    $id || return;
-    $seq ||= 'atgc';
-    $seq = lc($seq);
-    my $data = $self->{matrix}->{data}->{$id} || return;
-    my @data = split(SEPARATOR, $data);
-    $data[4] || $self->throw("Matrix data missing for $id");
-    
-    my ($a, $c, $g, $t);
-    foreach my $position (split(INTERNAL_SEPARATOR, $data[4])) {
-        my ($a_count, $c_count, $g_count, $t_count) = split("\t", $position);
-        push(@{$a}, $a_count);
-        push(@{$c}, $c_count);
-        push(@{$g}, $g_count);
-        push(@{$t}, $t_count);
-    }
-    
-    # our psms include a simple background model so we can use
-    # sequence_match_weight() if desired
-    my $a_freq = ($seq =~ tr/a//) / length($seq);
-    my $c_freq = ($seq =~ tr/c//) / length($seq);
-    my $g_freq = ($seq =~ tr/g//) / length($seq);
-    my $t_freq = ($seq =~ tr/t//) / length($seq);
-    
-    my $psm = Bio::Matrix::PSM::SiteMatrix->new(-pA => $a,
-                                                -pC => $c,
-                                                -pG => $g,
-                                                -pT => $t,
-                                                -id => $data[0],
-                                                -accession_number => $id,
-                                                -sites => $data[3],
-                                                -width => scalar(@{$a}),
-                                                -correction => 1,
-                                                -model => { A => $a_freq, C => $c_freq, G => $g_freq, T => $t_freq } );
-    
-    #*** used to make a Bio::Matrix::PSM::Psm and add references, but it
-    #    didn't seem worth it. You can get references from the database by:
-    #foreach my $ref_id ($db->get_reference_ids(-matrix => $id)) {
-    #    my $ref = $db->get_reference($ref_id);
-    #}
-    
-    return $psm;
-}
-
-=head2 get_aln
-
- Title   : get_aln
- Usage   : my $aln = $obj->get_aln($id);
- Function: Get the alignment that was used to generate a matrix. Each sequence
-           in the alignment will have an accession_number corresponding to the
-           Transfac site id, and id() based on that but unique within the
-           alignment.
- Returns : Bio::SimpleAlign
- Args    : string - a matrix id ('M...'), optionally true to, when a matrix
-           lists no sequences, search for sequences via the matrix's factors,
-           picking the sites that best match the matrix
-
-=cut
-
-my %VALID_STRAND = map {$_ => 1} qw(-1 0 1);
-
-sub get_aln {
-    my ($self, $id, $via_factors) = @_;
-    $id || return;
-    my $data = $self->{matrix}->{data}->{$id} || $self->throw("matrix '$id' had no data in DB_File");
-    my @data = split(SEPARATOR, $data);
-
-    if (! $data[5] && $via_factors) {
-        # This is a matrix with no site sequences given in matrix.dat.
-        # Find some matching site sequences via factors.
-        
-        # First, check its factors for sites
-        my %site_seqs;
-        my %factor_ids;
-        foreach my $factor_id ($self->get_factor_ids(-matrix => $id)) {
-            $factor_ids{$factor_id} = 1;
-            foreach my $site_id ($self->get_site_ids(-factor => $factor_id)) {
-                next if defined $site_seqs{$site_id};
-                my $seq = $self->get_seq($site_id);
-                
-                # skip sites that have no sequence, or have IUPAC symbols in
-                # their sequence (most probably the 'consensus' sequence itself
-                # that was used to make and exactly corresponds to the matrix)
-                my $seq_str = $seq->seq || next;
-                $seq_str =~ /[MRWSYKVHDB]/ and next;
-                
-                $site_seqs{$site_id} = $seq;
-            }
-        }
-        my @seqs = values %site_seqs;
-        
-        if (@seqs > 1) {
-            # pick the sub-seqs that match to the matrix with the best scores
-            my $matrix = $self->get_matrix($id);
-            my $desired_sequences = $matrix->sites;
-            return if @seqs < $desired_sequences;
-            
-            my $desired_length = $matrix->width;
-            my %best_seqs;
-            foreach my $seq (@seqs) {
-                my $for_str = $seq->seq;
-                next if length($for_str) < $desired_length;
-                my $rev_str = $seq->revcom->seq;
-                
-                my $best_score = 0;
-                my $best_subseq = '';
-                my $best_i = 0;
-                my $best_subseq_caps = 0;
-                my $best_revcom;
-                my $revcom = 0;
-                foreach my $seq_str ($for_str, $rev_str) {
-                    for my $i (0..(length($seq_str) - $desired_length)) {
-                        my $subseq = substr($seq_str, $i, $desired_length);
-                        $subseq =~ s/[^ACGTacgt]//g; # can only score atcg
-                        next unless length($subseq) == $desired_length; # short or 0-length seqs could get the highest scores!
-                        my $score = $matrix->sequence_match_weight($subseq);
-                        
-                        # caps represent the author-chosen bit of a site
-                        # sequence so we would prefer to choose a subseq that
-                        # contains it
-                        my $caps = $subseq =~ tr/ACGT//;
-                        
-                        #*** (don't know why numeric == fails for comparing
-                        #     scores, when the string eq works)
-                        if ($score > $best_score || ("$score" eq "$best_score" && $caps > $best_subseq_caps)) {
-                            $best_score = $score;
-                            $best_subseq_caps = $caps;
-                            $best_subseq = $subseq;
-                            $best_i = $i;
-                            $best_revcom = $revcom;
-                        }
-                    }
-                    $revcom++;
-                }
-                
-                if ($best_score) {
-                    $best_seqs{$seq->accession_number} = [$best_subseq, $seq->accession_number, ($best_i + 1), $revcom ? -1 : 1, $best_score];
-                }
-            }
-            my @sorted = sort { $best_seqs{$b}->[-1] <=> $best_seqs{$a}->[-1] } keys %best_seqs;
-            return if @sorted < $desired_sequences;
-            splice(@sorted, $desired_sequences);
-            my %wanted = map { $_ => 1 } @sorted;
-            
-            my @site_data;
-            foreach my $seq (@seqs) {
-                next unless exists $wanted{$seq->accession_number};
-                my @data = @{$best_seqs{$seq->accession_number}};
-                pop(@data);
-                push(@site_data, join('_', @data));
-            }
-            
-            $data[5] = join(INTERNAL_SEPARATOR, @site_data);
-            $self->{matrix}->{data}->{$id} = join(SEPARATOR, @data);
-        }
-    }
-    $data[5] || return;
-    
-    my @blocks = split(INTERNAL_SEPARATOR, $data[5]);
-    
-    # append gap chars to all sequences to make them the same length
-    # (applies to sequences found via factors, presumably, since we already
-    # do this for matrix alignments in transfac_pro.pm)
-    my $longest = 0;
-    foreach (@blocks) {
-        my ($seq) = split('_', $_);
-        my $length = length($seq);
-        if ($length > $longest) {
-            $longest = $length;
-        }
-    }
-    foreach my $i (0..$#blocks) {
-        my $block = $blocks[$i];
-        my ($seq, $seq_id) = split('_', $block);
-        my $length = length($seq);
-        if ($length < $longest) {
-            my $orig_seq = $seq;
-            $seq .= '-'x($longest - $length);
-            $block =~ s/^${orig_seq}_/${seq}_/;
-            $blocks[$i] = $block;
-        }
-    }
-    
-    # build the alignment
-    my $aln = Bio::SimpleAlign->new(-source => 'transfac_pro');
-    my %done_ids;
-    foreach (@blocks) {
-        my ($seq, $seq_acc, $start, $strand) = split('_', $_);
-        
-        $self->throw("Invalid strand $strand found in block $_")
-            unless exists $VALID_STRAND{$strand};
-        # we can get back multiple different subparts of the same site (sequence),
-        # so $seq_acc isn't unique across this loop. Can't use it as the seq id
-        # of the alignment (ids must be unique in SimpleAlign), so we
-        # uniquify the id and store the original id as the accession_number
-        my $seq_id;
-        $done_ids{$seq_acc}++;
-        if ($done_ids{$seq_acc} > 1) {
-            $seq_id = $seq_acc.'_'.$done_ids{$seq_acc};
-        }
-        else {
-            $seq_id = $seq_acc;
-        }
-        
-        my $gaps = $seq =~ tr/-//;
-        my $length = length($seq) - $gaps;
-        $self->throw("seq '$seq_id' for matrix '$id' had seq '$seq'") unless $length;
-        $aln->add_seq(Bio::LocatableSeq->new(-seq    => $seq,
-                                             -id     => $seq_id,
-                                             -accession_number => $seq_acc,
-                                             -start  => $start,
-                                             -end    => $start + $length - 1,
-                                             -strand => $strand));
-    }
-    $aln->id($id);
-    # could also store score? of?
-    
-    return $aln;
-}
-
-=head2 get_factor
-
- Title   : get_factor
- Usage   : my $factor = $obj->get_factor($id);
- Function: Get the details of a transcription factor.
- Returns : Bio::Map::TranscriptionFactor
- Args    : string - a factor id ('T...')
-
-=cut
-
-sub get_factor {
-    my ($self, $id) = @_;
-    $id || return;
-    return $self->{got_factor}->{$id} if defined $self->{got_factor}->{$id};
-    my $data = $self->{factor}->{data}->{$id} || return;
-    my @data = split(SEPARATOR, $data);
-    
-    # accession = id name species sequence
-    my $tf = Bio::Map::TranscriptionFactor->get(-id => $id,
-                                                -universal_name => $data[1]);
-    #*** not sure what to do with species and sequence, since we don't want to
-    # confuse the idea that a TF is a general thing that could bind to any
-    # species... then again, you might want to model species-specific variants
-    # of a TF with different binding abilities...
-    #*** idea of having inclusion and exclusion species so you can prevent/
-    # ignore a tf that binds to the wrong species (a species that doesn't even
-    # have the tf), and associating sequence with each species/tf combo so you
-    # can see how diverged the tf is and make assumptions about site difference
-    # allowance
-    
-    # place it on all its genemaps
-    foreach my $sid ($self->get_site_ids(-factor => $id)) {
-        my $s_data = $self->{site}->{data}->{$sid} || next;
-        my @s_data = split(SEPARATOR, $s_data);
-        
-        # accession = id gene_id sequence relative_to first_position last_position species_tax_id_or_raw_string
-        $s_data[1] || next; # site isn't relative to a gene, meaningless
-        $s_data[4] || next; # don't know where its supposed to be, can't model it
-        $s_data[5] ||= $s_data[4] + ($s_data[2] ? length($s_data[2]) - 1 : 0);
-        
-        # it is quite deliberate that we deeply recurse to arrive at the
-        # correct answer, which involves pulling in most of the database
-        no warnings "recursion";
-        my $gene_map = $self->get_genemap($s_data[1]) || next;
-        return $self->{got_factor}->{$id} if defined $self->{got_factor}->{$id};
-        
-        #*** not always relative to gene start...
-        #    we need Bio::Map::Gene s to have some default tss and atg positions
-        #    that we can be relative to
-        my $rel = Bio::Map::Relative->new(-element => $gene_map->gene, -description => $s_data[3]);
-        Bio::Map::Position->new(-map => $gene_map, -element => $tf, -start => $s_data[4], -end => $s_data[5], -relative => $rel);
-    }
-    
-    $self->{got_factor}->{$id} = $tf;
-    return $tf;
-}
-
-# since get_factor() is uncertain, just have direct access methods to factor
-# information
-sub get_factor_name {
-    my ($self, $id) = @_;
-    my $details = $self->_get_factor_details($id) || return;
-    return $details->{name};
-}
-sub get_factor_species {
-    my ($self, $id) = @_;
-    my $details = $self->_get_factor_details($id) || return;
-    return $details->{species};
-}
-sub get_factor_sequence {
-    my ($self, $id) = @_;
-    my $details = $self->_get_factor_details($id) || return;
-    return $details->{sequence};
-}
-sub _get_factor_details {
-    my ($self, $id) = @_;
-    $id || return;
-    
-    return $self->{factor_details}->{$id} if defined $self->{factor_details}->{$id};
-    
-    my $data = $self->{factor}->{data}->{$id} || return;
-    my @data = split(SEPARATOR, $data);
-    
-    # accession = id name species sequence
-    
-    my %details = (name => $data[1], species => $data[2], sequence => $data[3]);
-    $self->{factor_details}->{$id} = \%details;
-    
-    return \%details;
-}
-
-=head2 get_reference_ids
-
- Title   : get_reference_ids
- Usage   : my @ids = $obj->get_reference_ids(-key => $value);
- Function: Get all the reference ids that are associated with the supplied
-           args.
- Returns : list of strings (ids)
- Args    : -key => value, where value is a string id, and key is one of:
-           -pubmed -site -gene -matrix -factor
-
-=cut
-
-sub get_reference_ids {
-    my $self = shift;
-    return $self->_get_ids('reference', @_);
-}
-
-# -id -name -species -site -factor -reference
-sub get_gene_ids {
-    my $self = shift;
-    return $self->_get_ids('gene', @_);
-}
-
-=head2 get_site_ids
-
- Title   : get_site_ids
- Usage   : my @ids = $obj->get_site_ids(-key => $value);
- Function: Get all the site ids that are associated with the supplied
-           args.
- Returns : list of strings (ids)
- Args    : -key => value, where value is a string id, and key is one of:
-           -id -species -gene -matrix -factor -reference
-
-=cut
-
-sub get_site_ids {
-    my $self = shift;
-    return $self->_get_ids('site', @_);
-}
-
-=head2 get_matrix_ids
-
- Title   : get_matrix_ids
- Usage   : my @ids = $obj->get_matrix_ids(-key => $value);
- Function: Get all the matrix ids that are associated with the supplied
-           args.
- Returns : list of strings (ids)
- Args    : -key => value, where value is a string id, and key is one of:
-           -id -name -site -factor -reference
-
-=cut
-
-sub get_matrix_ids {
-    my $self = shift;
-    return $self->_get_ids('matrix', @_);
-}
-
-=head2 get_factor_ids
-
- Title   : get_factor_ids
- Usage   : my @ids = $obj->get_factor_ids(-key => $value);
- Function: Get all the factor ids that are associated with the supplied
-           args.
- Returns : list of strings (ids)
- Args    : -key => value, where value is a string id, and key is one of:
-           -id -name -species -interactors -gene -matrix -site -reference
-           NB: -gene only gets factor ids for genes that encode factors
-
-=cut
-
-sub get_factor_ids {
-    my $self = shift;
-    return $self->_get_ids('factor', @_);
-}
-
-=head2 get_fragment_ids
-
- Title   : get_fragment_ids
- Usage   : my @ids = $obj->get_fragment_ids(-key => $value);
- Function: Get all the fragment ids that are associated with the supplied
-           args.
- Returns : list of strings (ids)
- Args    : -key => value, where value is a string id, and key is one of:
-           -id -species -gene -factor -reference
-
-=cut
-
-sub get_fragment_ids {
-    my $self = shift;
-    return $self->_get_ids('fragment', @_);
-}
-
-=head2 Helper methods 
-
-=cut
-
-# internal method which does the indexing
-sub _build_index {
-    my ($self, $dat_dir, $force) = @_;
-    
-    # MLDBM would give us transparent complex data structures with DB_File,
-    # allowing just one index file, but its yet another requirement and we
-    # don't strictly need it
-    
-    my $index_dir = $self->index_directory;
-    my $gene_index      = "$index_dir/gene.dat.index";
-    my $reference_index = "$index_dir/reference.dat.index";
-    my $matrix_index    = "$index_dir/matrix.dat.index";
-    my $factor_index    = "$index_dir/factor.dat.index";
-    my $fragment_index  = "$index_dir/fragment.dat.index";
-    my $site_index      = "$index_dir/site.dat.index";
-    
-    my $reference_dat = "$dat_dir/reference.dat";
-    if (! -e $reference_index || $force) {
-        open my $REF, '<', $reference_dat or $self->throw("Could not read reference file '$reference_dat': $!");
-        
-        my %references;
-        unlink $reference_index;
-        my $ref = tie(%references, 'DB_File', $reference_index, O_RDWR|O_CREAT, 0644, $DB_HASH)
-            or $self->throw("CCould not open file '$reference_index': $!");
-        
-        my %pubmed;
-        my $reference_pubmed = $reference_index.'.pubmed';
-        unlink $reference_pubmed;
-        my $pub = tie(%pubmed, 'DB_File', $reference_pubmed, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$reference_pubmed': $!");
-        
-        my %gene;
-        my $reference_gene = $gene_index.'.reference';
-        unlink $reference_gene;
-        my $gene = tie(%gene, 'DB_File', $reference_gene, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$reference_gene': $!");
-        
-        my %site;
-        my $reference_site = $site_index.'.reference';
-        unlink $reference_site;
-        my $site = tie(%site, 'DB_File', $reference_site, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$reference_site': $!");
-        
-        my %fragment;
-        my $reference_fragment = $fragment_index.'.reference';
-        unlink $reference_fragment;
-        my $fragment = tie(%fragment, 'DB_File', $reference_fragment, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$reference_fragment': $!");
-        
-        my %factor;
-        my $reference_factor = $factor_index.'.reference';
-        unlink $reference_factor;
-        my $factor = tie(%factor, 'DB_File', $reference_factor, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$reference_factor': $!");
-        
-        my %matrix;
-        my $reference_matrix = $matrix_index.'.reference';
-        unlink $reference_matrix;
-        my $matrix = tie(%matrix, 'DB_File', $reference_matrix, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$reference_matrix': $!");
-        
-        # skip the first three header lines
-        <$REF>; <$REF>; <$REF>;
-        
-        my @data;
-        while (<$REF>) {
-            if (/^AC  (\S+)/) {
-                $data[0] = $1;
-            }
-            elsif (/^RX  PUBMED: (\d+)/) {
-                $data[1] = $1;
-                $pub->put("$1", $data[0]);
-            }
-            elsif (/^RA  (.+)\n$/) {
-                $data[2] = $1;
-            }
-            elsif (/^RT  (.+?)\.?\n$/) {
-                $data[3] = $1;
-            }
-            elsif (/^RL  (.+?)\.?\n$/) {
-                $data[4] = $1;
-            }
-            elsif (/^GE  TRANSFAC: (\w\d+)/) {
-                $gene->put($data[0], "$1");
-            }
-            elsif (/^BS  TRANSFAC: (\w\d+)/) {
-                $site->put($data[0], "$1");
-            }
-            elsif (/^FA  TRANSFAC: (\w\d+)/) {
-                $factor->put($data[0], "$1");
-            }
-            elsif (/^FR  TRANSFAC: (FR\d+)/) {
-                $fragment->put($data[0], "$1");
-            }
-            elsif (/^MX  TRANSFAC: (\w\d+)/) {
-                $matrix->put($data[0], "$1");
-            }
-            elsif (/^\/\//) {
-                # end of a record, store previous data and reset
-                
-                # accession = pubmed authors title location
-                $references{$data[0]} = join(SEPARATOR, ($data[1] || '',
-                                                         $data[2] || '',
-                                                         $data[3] || '',
-                                                         $data[4] || ''));
-                
-                @data = ();
-            }
-        }
-        close $REF;
-        
-        $ref = $pub = $gene = $site = $fragment = $factor = $matrix = undef;
-        untie %references;
-        untie %pubmed;
-        untie %gene;
-        untie %site;
-        untie %fragment;
-        untie %factor;
-        untie %matrix;
-    }
-    
-    my $gene_dat = "$dat_dir/gene.dat";
-    if (! -e $gene_index || $force) {
-        open my $GEN, '<', $gene_dat or $self->throw("Could not read gene file '$gene_dat': $!");
-        
-        my %genes;
-        unlink $gene_index;
-        my $gene = tie(%genes, 'DB_File', $gene_index, O_RDWR|O_CREAT, 0644, $DB_HASH)
-            or $self->throw("Could not open file '$gene_index': $!");
-        
-        my %id;
-        my $gene_id = $gene_index.'.id';
-        unlink $gene_id;
-        my $id = tie(%id, 'DB_File', $gene_id, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$gene_id': $!");
-        
-        my %name;
-        my $gene_name = $gene_index.'.name';
-        unlink $gene_name;
-        my $name = tie(%name, 'DB_File', $gene_name, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$gene_name': $!");
-        
-        my %species;
-        my $gene_species = $gene_index.'.species';
-        unlink $gene_species;
-        my $species = tie(%species, 'DB_File', $gene_species, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$gene_species': $!");
-        
-        my %site;
-        my $gene_site = $site_index.'.gene';
-        unlink $gene_site;
-        my $site = tie(%site, 'DB_File', $gene_site, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$gene_site': $!");
-        
-        my %factor;
-        my $gene_factor = $factor_index.'.gene';
-        unlink $gene_factor;
-        my $factor = tie(%factor, 'DB_File', $gene_factor, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$gene_factor': $!");
-        
-        my %fragment;
-        my $gene_fragment = $fragment_index.'.gene';
-        unlink $gene_fragment;
-        my $fragment = tie(%fragment, 'DB_File', $gene_fragment, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$gene_fragment': $!");
-        
-        my %reference;
-        my $gene_reference = $reference_index.'.gene';
-        unlink $gene_reference;
-        my $reference = tie(%reference, 'DB_File', $gene_reference, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$gene_reference': $!");
-        
-        # skip the first three header lines
-        <$GEN>; <$GEN>; <$GEN>;
-        
-        my @data;
-        while (<$GEN>) {
-            if (/^AC  (\S+)/) {
-                $data[0] = $1;
-            }
-            elsif (/^ID  (\S+)/) {
-                $data[1] = $1;
-                $id->put("$1", $data[0]);
-            }
-            elsif (/^SD  (.+)$/) {
-                $data[2] = lc("$1");
-                $name->put(lc("$1"), $data[0]);
-            }
-            elsif (/^SY  (.+)\.$/) {
-                foreach (split('; ', lc("$1"))) {
-                    $name->put($_, $data[0]);
-                }
-            }
-            elsif (/^DE  (.+)$/) {
-                $data[3] = $1;
-            }
-            elsif (/^OS  (.+)$/) {
-                my $raw_species = $1;
-                my $taxid = $self->_species_to_taxid($raw_species);
-                $data[4] = $taxid || $raw_species;
-                $species->put($data[4], $data[0]);
-            }
-            elsif (/^RN  .+?(RE\d+)/) {
-                $reference->put($data[0], "$1");
-            }
-            elsif (/^BS  .+?(R\d+)/) {
-                $site->put($data[0], "$1");
-            }
-            elsif (/^FA  (T\d+)/) {
-                $factor->put($data[0], "$1");
-            }
-            elsif (/^BR  (FR\d+)/) {
-                $fragment->put($data[0], "$1");
-            }
-            elsif (/^\/\//) {
-                # end of a record, store previous data and reset
-                
-                # accession = id name description species_tax_id_or_raw_string
-                $genes{$data[0]} = join(SEPARATOR, ($data[1] || '',
-                                                    $data[2] || '',
-                                                    $data[3] || '',
-                                                    $data[4] || ''));
-                
-                @data = ();
-            }
-        }
-        close $GEN;
-        
-        $gene = $id = $name = $species = $site = $factor = $reference = undef;
-        untie %genes;
-        untie %id;
-        untie %name;
-        untie %species;
-        untie %site;
-        untie %factor;
-        untie %reference;
-    }
-    
-    my $site_dat = "$dat_dir/site.dat";
-    if (! -e $site_index || $force) {
-        open my $SIT, '<', $site_dat or $self->throw("Could not read site file '$site_dat': $!");
-        
-        my %sites;
-        unlink $site_index;
-        my $site = tie(%sites, 'DB_File', $site_index, O_RDWR|O_CREAT, 0644, $DB_HASH)
-            or $self->throw("Could not open file '$site_index': $!");
-        
-        my %id;
-        my $site_id = $site_index.'.id';
-        unlink $site_id;
-        my $id = tie(%id, 'DB_File', $site_id, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$site_id': $!");
-        
-        my %species;
-        my $site_species = $site_index.'.species';
-        unlink $site_species;
-        my $species = tie(%species, 'DB_File', $site_species, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$site_species': $!");
-        
-        my %qualities;
-        my $site_qualities = $site_index.'.qual';
-        unlink $site_qualities;
-        my $quality = tie(%qualities, 'DB_File', $site_qualities, O_RDWR|O_CREAT, 0644, $DB_HASH)
-            or $self->throw("Could not open file '$site_qualities': $!");
-        
-        my %gene;
-        my $site_gene = $gene_index.'.site';
-        unlink $site_gene;
-        my $gene = tie(%gene, 'DB_File', $site_gene, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$site_gene': $!");
-        
-        my %matrix;
-        my $site_matrix = $matrix_index.'.site';
-        unlink $site_matrix;
-        my $matrix = tie(%matrix, 'DB_File', $site_matrix, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$site_matrix': $!");
-        
-        my %factor;
-        my $site_factor = $factor_index.'.site';
-        unlink $site_factor;
-        my $factor = tie(%factor, 'DB_File', $site_factor, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$site_factor': $!");
-        
-        my %reference;
-        my $site_reference = $reference_index.'.site';
-        unlink $site_reference;
-        my $reference = tie(%reference, 'DB_File', $site_reference, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$site_reference': $!");
-        
-        # skip the first three header lines
-        <$SIT>; <$SIT>; <$SIT>;
-        
-        my @data;
-        while (<$SIT>) {
-            if (/^AC  (\S+)/) {
-                $data[0] = $1;
-            }
-            elsif (/^ID  (\S+)/) {
-                $data[1] = $1;
-                $id->put("$1", $data[0]);
-            }
-            elsif (/^TY  (.+)$/) {
-                $data[8] = $1;
-            }
-            elsif (/^DE  .*Gene: (G\d+)/) {
-                $data[2] = $1;
-                $gene->put($data[0], "$1");
-                
-                # if it has no gene it is an artificial sequence, unless it
-                # has a species (OS line), in which case it is unassigned
-                # genomic; either way we won't be able to make a
-                # Bio::Map::PositionI later on, so such sites won't be
-                # on any MapI.
-            }
-            elsif (/^OS  (.+)$/) {
-                # Since not all sites in site.dat with a species have a gene,
-                # (small handful are unassigned 'genomic') can't delegate to
-                # gene.dat and must parse species here (effectively again)
-                my $raw_species = $1;
-                my $taxid = $self->_species_to_taxid($raw_species);
-                $data[7] = $taxid || $raw_species;
-                $species->put($data[7], $data[0]);
-            }
-            elsif (/^SQ  (.+)\.$/) {
-                $data[3] = $1;
-                # there can actually be more than one SQ line, seemingly with
-                # variations of the sequence (not a long sequence split over
-                # two lines); not sure what to do with data; currently we end
-                # up storing only the last variant.
-            }
-            elsif (/^S1  (.+)$/) {
-                $data[4] = $1;
-                # if S1 not present, means transcriptional start
-            }
-            elsif (/^SF  (.+)$/) {
-                $data[5] = $1;
-            }
-            elsif (/^ST  (.+)$/) {
-                $data[6] = $1;
-            }
-            elsif (/^RN  .+?(RE\d+)/) {
-                $reference->put($data[0], "$1");
-            }
-            elsif (/^MX  (M\d+)/) {
-                $matrix->put($data[0], "$1");
-            }
-            elsif (/^BF  (T\d+); .+?; Quality: (\d)/) {
-                $factor->put($data[0], "$1");
-                $qualities{$data[0].SEPARATOR.$1} = $2;
-            }
-            elsif (/^\/\//) {
-                # end of a record, store previous data and reset
-                
-                # accession = id gene_id sequence relative_to first_position last_position species_tax_id_or_raw_string type
-                $sites{$data[0]} = join(SEPARATOR, ($data[1] || '',
-                                                    $data[2] || '',
-                                                    $data[3] || '',
-                                                    $data[4] || 'TSS',
-                                                    $data[5] || '',
-                                                    $data[6] || '',
-                                                    $data[7] || '',
-                                                    $data[8] || ''));
-                
-                @data = ();
-            }
-        }
-        close $SIT;
-        
-        $site = $id = $species = $quality = $gene = $matrix = $factor = $reference = undef;
-        untie %sites;
-        untie %id;
-        untie %species;
-        untie %qualities;
-        untie %gene;
-        untie %matrix;
-        untie %factor;
-        untie %reference;
-    }
-    
-    my $matrix_dat = "$dat_dir/matrix.dat";
-    if (! -e $matrix_index || $force) {
-        open my $MAT, '<', $matrix_dat or $self->throw("Could not read matrix file '$matrix_dat': $!");
-        
-        my %matrices;
-        unlink $matrix_index;
-        my $matrix = tie(%matrices, 'DB_File', $matrix_index, O_RDWR|O_CREAT, 0644, $DB_HASH)
-            or $self->throw("Could not open file '$matrix_index': $!");
-        
-        my %id;
-        my $matrix_id = $matrix_index.'.id';
-        unlink $matrix_id;
-        my $id = tie(%id, 'DB_File', $matrix_id, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$matrix_id': $!");
-        
-        my %name;
-        my $matrix_name = $matrix_index.'.name';
-        unlink $matrix_name;
-        my $name = tie(%name, 'DB_File', $matrix_name, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$matrix_name': $!");
-        
-        my %site;
-        my $matrix_site = $site_index.'.matrix';
-        unlink $matrix_site;
-        my $site = tie(%site, 'DB_File', $matrix_site, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$matrix_site': $!");
-        
-        my %factor;
-        my $matrix_factor = $factor_index.'.matrix';
-        unlink $matrix_factor;
-        my $factor = tie(%factor, 'DB_File', $matrix_factor, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$matrix_factor': $!");
-        
-        my %reference;
-        my $matrix_reference = $reference_index.'.matrix';
-        unlink $matrix_reference;
-        my $reference = tie(%reference, 'DB_File', $matrix_reference, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$matrix_reference': $!");
-        
-        # skip the first three header lines
-        <$MAT>; <$MAT>; <$MAT>;
-        
-        my @data;
-        my @matrix_data;
-        my @site_data;
-        while (<$MAT>) {
-            if (/^AC  (\S+)/) {
-                $data[0] = $1;
-            }
-            elsif (/^ID  (\S+)/) {
-                $data[1] = $1;
-                $id->put("$1", $data[0]);
-            }
-            elsif (/^NA  (.+)$/) {
-                $data[2] = $1;
-                $name->put("$1", $data[0]);
-            }
-            elsif (/^DE  (.+)$/) {
-                $data[3] = $1;
-            }
-            elsif (/^\d\d  \s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/) {
-                # a, c, g, t counts/weights
-                push(@matrix_data, join("\t", ($1, $2, $3, $4)));
-                
-                # Work out the number of sites as the largest number of
-                # sites amongst all positions in the sequences. (The BA
-                # line isn't reliable for telling us the correct number of
-                # sites all the time)
-                my $num = $1 + $2 + $3 + $4;
-                $data[4] ||= 0;
-                if ($num > $data[4]) {
-                    $data[4] = $num;
-                }
-            }
-            elsif (/^BS  ([\sa-zA-Z]+); (.+?); (-?\d+); \d+;.*; ([np])/) {
-                # sequence id start strand
-                push(@site_data, join('_', ($1, $2, $3, $4 eq 'p' ? 1 : -1)));
-                $site->put($data[0], $2);
-            }
-            elsif (/^BF  (T\d+)/) {
-                $factor->put($data[0], "$1");
-            }
-            elsif (/^RN  .+?(RE\d+)/) {
-                $reference->put($data[0], "$1");
-            }
-            elsif (/^\/\//) {
-                # end of a record, store previous data and reset
-                my $matrix_data = join(INTERNAL_SEPARATOR, @matrix_data) || '';
-                
-                # sites of a matrix are pre-aligned but padded with spaces on
-                # the left and no padding on the right; pad with -s both sides
-                my $longest_seq = 0;
-                
-                # For all the work, does anything meaningful actually get passed
-                # on here? Commenting out fixes the latest crashes on trunk.
-                # 5-10-10 cjfields
-                
-                #foreach my $site_seq (map {my ($seq) = split("_", $_ ,2); $seq;} @site_data) {
-                #    $site_seq =~ s/ /-/g;
-                #    my $length = length($site_seq);
-                #    if ($length > $longest_seq) {
-                #        $longest_seq = $length;
-                #    }
-                #}
-                #foreach my $site (@site_data) {
-                #    my ($site_seq) = split("_", $site ,2);
-                #    my $length = length($site_seq);
-                #    if ($length < $longest_seq) {
-                #        $site_seq .= '-' x ($longest_seq - $length);
-                #    }
-                #}
-
-                my $site_data = join(INTERNAL_SEPARATOR, @site_data) || '';
-                
-                # accession = id name description num_of_sites matrix_data site_data
-                $matrices{$data[0]} = join(SEPARATOR, ($data[1] || '',
-                                                       $data[2] || '',
-                                                       $data[3] || '',
-                                                       $data[4],
-                                                       $matrix_data,
-                                                       $site_data));
-                
-                @data = @matrix_data = @site_data = ();
-            }
-        }
-        close $MAT;
-        
-        $matrix = $id = $name = $site = $factor = $reference = undef;
-        untie %matrices;
-        untie %id;
-        untie %name;
-        untie %site;
-        untie %factor;
-        untie %reference;
-    }
-    
-    my $factor_dat = "$dat_dir/factor.dat";
-    if (! -e $factor_index || $force) {
-        open my $FAC, '<', $factor_dat or $self->throw("Could not read factor file '$factor_dat': $!");
-        
-        my %factors;
-        unlink $factor_index;
-        my $factor = tie(%factors, 'DB_File', $factor_index, O_RDWR|O_CREAT, 0644, $DB_HASH)
-            or $self->throw("Could not open file '$factor_index': $!");
-        
-        my %id;
-        my $factor_id = $factor_index.'.id';
-        unlink $factor_id;
-        my $id = tie(%id, 'DB_File', $factor_id, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$factor_id': $!");
-        
-        my %name;
-        my $factor_name = $factor_index.'.name';
-        unlink $factor_name;
-        my $name = tie(%name, 'DB_File', $factor_name, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$factor_name': $!");
-        
-        my %species;
-        my $factor_species = $factor_index.'.species';
-        unlink $factor_species;
-        my $species = tie(%species, 'DB_File', $factor_species, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$factor_species': $!");
-        
-        my %interactors;
-        my $factor_interactors = $factor_index.'.interactors';
-        unlink $factor_interactors;
-        my $interact = tie(%interactors, 'DB_File', $factor_interactors, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$factor_interactors': $!");
-        
-        my %gene;
-        my $factor_gene = $gene_index.'.factor';
-        unlink $factor_gene;
-        my $gene = tie(%gene, 'DB_File', $factor_gene, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$factor_gene': $!");
-        
-        my %matrix;
-        my $factor_matrix = $matrix_index.'.factor';
-        unlink $factor_matrix;
-        my $matrix = tie(%matrix, 'DB_File', $factor_matrix, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$factor_matrix': $!");
-        
-        my %site;
-        my $factor_site = $site_index.'.factor';
-        unlink $factor_site;
-        my $site = tie(%site, 'DB_File', $factor_site, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$factor_site': $!");
-        
-        my %fragment;
-        my $factor_fragment = $fragment_index.'.factor';
-        unlink $factor_fragment;
-        my $fragment = tie(%fragment, 'DB_File', $factor_fragment, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$factor_fragment': $!");
-        
-        my %reference;
-        my $factor_reference = $reference_index.'.factor';
-        unlink $factor_reference;
-        my $reference = tie(%reference, 'DB_File', $factor_reference, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-            or $self->throw("Could not open file '$factor_reference': $!");
-        
-        # skip the first three header lines
-        <$FAC>; <$FAC>; <$FAC>;
-        
-        my @data;
-        my $sequence = '';
-        while (<$FAC>) {
-            if (/^AC  (\S+)/) {
-                $data[0] = $1;
-            }
-            elsif (/^ID  (\S+)/) {
-                # IDs are always the same as AC? Is this needed?
-                $data[1] = $1;
-                $id->put("$1", $data[0]);
-            }
-            elsif (/^FA  (.+)$/) {
-                $data[2] = $1;
-                $name->put("$1", $data[0]);
-            }
-            elsif (/^OS  (.+)$/) {
-                # This is the species the actual factor came from, which may
-                # differ from the species of any sequences it is described as
-                # binding to. Not all factors that have a species have a gene,
-                # so can't delegate species to a gene lookup.
-                my $raw_species = $1;
-                my $taxid = $self->_species_to_taxid($raw_species);
-                $data[3] = $taxid || $raw_species;
-                $species->put($data[3], $data[0]);
-            }
-            elsif (/^GE  (G\d+)/) {
-                $gene->put($data[0], "$1");
-            }
-            elsif (/^SQ  (.+)$/) {
-                $sequence .= $1;
-            }
-            elsif (/^IN  (T\d+)/) {
-                $interact->put($data[0], "$1");
-            }
-            elsif (/^MX  (M\d+)/) {
-                $matrix->put($data[0], "$1");
-            }
-            elsif (/^BS  (R\d+)/) {
-                $site->put($data[0], "$1");
-            }
-            elsif (/^BR  (FR\d+)/) {
-                $fragment->put($data[0], "$1");
-            }
-            elsif (/^RN  .+?(RE\d+)/) {
-                $reference->put($data[0], "$1");
-            }
-            elsif (/^\/\//) {
-                # end of a record, store previous data and reset
-                
-                # accession = id name species sequence
-                $factors{$data[0]} = join(SEPARATOR, ($data[1] || '',
-                                                      $data[2] || '',
-                                                      $data[3] || '',
-                                                      $sequence));
-                
-                @data = ();
-                $sequence = '';
-            }
-        }
-        close $FAC;
-        
-        $factor = $id = $name = $species = $interact = $gene = $matrix = $site = $fragment = $reference = undef;
-        untie %factors;
-        untie %id;
-        untie %name;
-        untie %species;
-        untie %interactors;
-        untie %gene;
-        untie %matrix;
-        untie %site;
-        untie %fragment;
-        untie %reference;
-    }
-    
-    my $fragment_dat = "$dat_dir/fragment.dat";
-    if (! -e $fragment_index || $force) {
-        if (open my $FRA, '<', $fragment_dat) {
-            my %fragments;
-            unlink $fragment_index;
-            my $fragment = tie(%fragments, 'DB_File', $fragment_index, O_RDWR|O_CREAT, 0644, $DB_HASH)
-                or $self->throw("Could not open file '$fragment_index': $!");
-            
-            my %id;
-            my $fragment_id = $fragment_index.'.id';
-            unlink $fragment_id;
-            my $id = tie(%id, 'DB_File', $fragment_id, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-                or $self->throw("Could not open file '$fragment_id': $!");
-            
-            my %qualities;
-            my $fragment_qualities = $fragment_index.'.qual';
-            unlink $fragment_qualities;
-            my $quality = tie(%qualities, 'DB_File', $fragment_qualities, O_RDWR|O_CREAT, 0644, $DB_HASH)
-                or $self->throw("Could not open file '$fragment_qualities': $!");
-            
-            my %species;
-            my $fragment_species = $fragment_index.'.species';
-            unlink $fragment_species;
-            my $species = tie(%species, 'DB_File', $fragment_species, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-                or $self->throw("Could not open file '$fragment_species': $!");
-            
-            my %gene;
-            my $fragment_gene = $gene_index.'.fragment';
-            unlink $fragment_gene;
-            my $gene = tie(%gene, 'DB_File', $fragment_gene, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-                or $self->throw("Could not open file '$fragment_gene': $!");
-            
-            my %factor;
-            my $fragment_factor = $factor_index.'.fragment';
-            unlink $fragment_factor;
-            my $factor = tie(%factor, 'DB_File', $fragment_factor, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-                or $self->throw("Could not open file '$fragment_factor': $!");
-            
-            my %reference;
-            my $fragment_reference = $reference_index.'.fragment';
-            unlink $fragment_reference;
-            my $reference = tie(%reference, 'DB_File', $fragment_reference, O_RDWR|O_CREAT, 0644, $DB_BTREE)
-                or $self->throw("Could not open file '$fragment_reference': $!");
-            
-            # skip the first three header lines
-            <$FRA>; <$FRA>; <$FRA>;
-            
-            my @data;
-            while (<$FRA>) {
-                if (/^AC  (\S+)/) {
-                    $data[0] = $1;
-                }
-                elsif (/^ID  (\S+)/) {
-                    # IDs are always the same as AC? Is this needed?
-                    $data[1] = $1;
-                    $id->put("$1", $data[0]);
-                }
-                elsif (/^DE  Gene: (G\d+)(?:.+Gene: (G\d+))?/) {
-                    my ($gene1, $gene2) = ($1, $2);
-                    $data[2] = $gene1;
-                    $data[3] = $gene2; # could be undef
-                    $gene->put($data[0], $gene1);
-                    $gene->put($data[0], $gene2) if $gene2;
-                }
-                elsif (/^OS  (.+)$/) {
-                    # As per the site.dat parsing
-                    my $raw_species = $1;
-                    my $taxid = $self->_species_to_taxid($raw_species);
-                    $data[4] = $taxid || $raw_species;
-                    $species->put($data[4], $data[0]);
-                }
-                elsif (/^SQ  [atcgn]*([ATCGN]+)[atcgn]*/) {
-                    $data[5] .= $1;
-                    # there can be (usually are) multiple SQ lines with a single
-                    # long seq split over them. The 'real' sequence is in caps
-                }
-                elsif (/^SC  Build (\S+):$/) {
-                    $data[6] = $1;
-                    # maybe parse it out a little more? We have build,
-                    # chromosomal coords and strand, eg.
-                    # SC  Build HSA_May2004: Chr.2 43976692..43978487 (FORWARD).
-                }
-                elsif (/^RN  .+?(RE\d+)/) {
-                    $reference->put($data[0], "$1");
-                }
-                elsif (/^BF  (T\d+); .+?; Quality: (\d)/) {
-                    $factor->put($data[0], "$1");
-                    $qualities{$data[0].SEPARATOR.$1} = $2;
-                }
-                elsif (/^\/\//) {
-                    # end of a record, store previous data and reset
-                    
-                    # accession = id gene_id1 gene_id2 species_tax_id_or_raw_string sequence source
-                    $fragments{$data[0]} = join(SEPARATOR, ($data[1] || '',
-                                                            $data[2] || '',
-                                                            $data[3] || '',
-                                                            $data[4] || '',
-                                                            $data[5] || '',
-                                                            $data[6] || ''));
-                    
-                    @data = ();
-                }
-            }
-            close $FRA;
-            
-            $fragment = $id = $species = $quality = $gene = $factor = $reference = undef;
-            untie %fragments;
-            untie %id;
-            untie %species;
-            untie %qualities;
-            untie %gene;
-            untie %factor;
-            untie %reference;
-        }
-        else {
-            $self->warn("Could not read fragment file '$fragment_dat', assuming you have an old version of Transfac Pro with no fragment.dat file");
-        }
-    }
-}
-
-# connect the internal db handle
-sub _db_connect {
-    my $self = shift;
-    return if $self->{'_initialized'};
-    
-    my $index_dir = $self->index_directory;
-    my $gene_index = "$index_dir/gene.dat.index";
-    my $reference_index = "$index_dir/reference.dat.index";
-    my $matrix_index = "$index_dir/matrix.dat.index";
-    my $factor_index = "$index_dir/factor.dat.index";
-    my $site_index = "$index_dir/site.dat.index";
-    my $fragment_index = "$index_dir/fragment.dat.index";
-    
-    foreach ($gene_index, $reference_index, $matrix_index, $factor_index, $site_index, $fragment_index) {
-        if (! -e $_) {
-            #$self->warn("Index files have not been created");
-            #return 0;
-        }
-    }
-    
-    # reference
-    {
-        $self->{reference}->{data} = {};
-        tie (%{$self->{reference}->{data}}, 'DB_File', $reference_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$reference_index': $!");
-        
-        my $reference_pubmed = $reference_index.'.pubmed';
-        $self->{reference}->{pubmed} = tie (%{$self->{reference}->{pubmed}}, 'DB_File', $reference_pubmed, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$reference_pubmed': $!");
-        
-        my $reference_gene = $gene_index.'.reference';
-        $self->{gene}->{reference} = tie (%{$self->{gene}->{reference}}, 'DB_File', $reference_gene, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$reference_gene': $!");
-        
-        my $reference_site = $site_index.'.reference';
-        $self->{site}->{reference} = tie (%{$self->{site}->{reference}}, 'DB_File', $reference_site, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$reference_site': $!");
-        
-        my $reference_fragment = $fragment_index.'.reference';
-        $self->{fragment}->{reference} = tie (%{$self->{fragment}->{reference}}, 'DB_File', $reference_fragment, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$reference_fragment': $!");
-        
-        my $reference_factor = $factor_index.'.reference';
-        $self->{factor}->{reference} = tie (%{$self->{factor}->{reference}}, 'DB_File', $reference_factor, undef, 0644, $DB_BTREE) || $self->throw("Cannot open file '$reference_factor': $!");
-        
-        my $reference_matrix = $matrix_index.'.reference';
-        $self->{matrix}->{reference} = tie (%{$self->{matrix}->{reference}}, 'DB_File', $reference_matrix, undef, 0644, $DB_BTREE) || $self->throw("Cannot open file '$reference_matrix': $!");
-    }
-    
-    # gene
-    {
-        $self->{gene}->{data} = {};
-        tie (%{$self->{gene}->{data}}, 'DB_File', $gene_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$gene_index': $!");
-        
-        my $gene_id = $gene_index.'.id';
-        $self->{gene}->{id} = tie(%{$self->{gene}->{id}}, 'DB_File', $gene_id, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_id': $!");
-        
-        my $gene_name = $gene_index.'.name';
-        $self->{gene}->{name} = tie(%{$self->{gene}->{name}}, 'DB_File', $gene_name, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_name': $!");
-        
-        my $gene_species = $gene_index.'.species';
-        $self->{gene}->{species} = tie(%{$self->{gene}->{species}}, 'DB_File', $gene_species, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_species': $!");
-        
-        my $gene_site = $site_index.'.gene';
-        $self->{site}->{gene} = tie(%{$self->{site}->{gene}}, 'DB_File', $gene_site, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_site': $!");
-        
-        my $gene_fragment = $fragment_index.'.gene';
-        $self->{fragment}->{gene} = tie(%{$self->{fragment}->{gene}}, 'DB_File', $gene_fragment, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_fragment': $!");
-        
-        my $gene_factor = $factor_index.'.gene';
-        $self->{factor}->{gene} = tie(%{$self->{factor}->{gene}}, 'DB_File', $gene_factor, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_factor': $!");
-        
-        my $gene_reference = $reference_index.'.gene';
-        $self->{reference}->{gene} = tie(%{$self->{reference}->{gene}}, 'DB_File', $gene_reference, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_reference': $!");
-    }
-    
-    # site
-    {
-        $self->{site}->{data} = {};
-        tie (%{$self->{site}->{data}}, 'DB_File', $site_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$site_index': $!");
-        
-        my $site_id = $site_index.'.id';
-        $self->{site}->{id} = tie(%{$self->{site}->{id}}, 'DB_File', $site_id, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$site_id': $!");
-        
-        my $site_species = $site_index.'.species';
-        $self->{site}->{species} = tie(%{$self->{site}->{species}}, 'DB_File', $site_species, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file $site_species': $!");
-        
-        #*** quality not actually used by anything (yet)
-        my $site_qualities = $site_index.'.qual';
-        $self->{quality} = {};
-        tie(%{$self->{quality}}, 'DB_File', $site_qualities, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$site_qualities': $!");
-        
-        my $site_gene = $gene_index.'.site';
-        $self->{gene}->{site} = tie(%{$self->{gene}->{site}}, 'DB_File', $site_gene, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$site_gene': $!");
-        
-        my $site_matrix = $matrix_index.'.site';
-        $self->{matrix}->{site} = tie(%{$self->{matrix}->{site}}, 'DB_File', $site_matrix, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$site_matrix': $!");
-        
-        my $site_factor = $factor_index.'.site';
-        $self->{factor}->{site} = tie(%{$self->{factor}->{site}}, 'DB_File', $site_factor, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$site_factor': $!");
-        
-        my $site_reference = $reference_index.'.site';
-        $self->{reference}->{site} = tie(%{$self->{reference}->{site}}, 'DB_File', $site_reference, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$site_reference': $!");
-    }
-    
-    # fragment (may not be in older databases)
-    if (-e $fragment_index) {
-        $self->{fragment}->{data} = {};
-        tie (%{$self->{fragment}->{data}}, 'DB_File', $fragment_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$fragment_index': $!");
-        
-        my $fragment_id = $fragment_index.'.id';
-        $self->{fragment}->{id} = tie(%{$self->{fragment}->{id}}, 'DB_File', $fragment_id, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$fragment_id': $!");
-        
-        my $fragment_species = $fragment_index.'.species';
-        $self->{fragment}->{species} = tie(%{$self->{fragment}->{species}}, 'DB_File', $fragment_species, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file $fragment_species': $!");
-        
-        #*** quality not actually used by anything (yet)
-        my $fragment_qualities = $fragment_index.'.qual';
-        $self->{fragment_quality} = {};
-        tie(%{$self->{fragment_quality}}, 'DB_File', $fragment_qualities, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$fragment_qualities': $!");
-        
-        my $fragment_gene = $gene_index.'.fragment';
-        $self->{gene}->{fragment} = tie(%{$self->{gene}->{fragment}}, 'DB_File', $fragment_gene, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$fragment_gene': $!");
-        
-        my $fragment_factor = $factor_index.'.fragment';
-        $self->{factor}->{fragment} = tie(%{$self->{factor}->{fragment}}, 'DB_File', $fragment_factor, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$fragment_factor': $!");
-        
-        my $fragment_reference = $reference_index.'.fragment';
-        $self->{reference}->{fragment} = tie(%{$self->{reference}->{fragment}}, 'DB_File', $fragment_reference, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$fragment_reference': $!");
-    }
-    else {
-        die "no fragment_index at '$fragment_index'\n";
-    }
-    
-    # matrix
-    {
-        $self->{matrix}->{data} = {};
-        tie (%{$self->{matrix}->{data}}, 'DB_File', $matrix_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$matrix_index': $!");
-        
-        my $matrix_id = $matrix_index.'.id';
-        $self->{matrix}->{id} = tie(%{$self->{matrix}->{id}}, 'DB_File', $matrix_id, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$matrix_id': $!");
-        
-        my $matrix_name = $matrix_index.'.name';
-        $self->{matrix}->{name} = tie(%{$self->{matrix}->{name}}, 'DB_File', $matrix_name, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$matrix_name': $!");
-        
-        my $matrix_site = $site_index.'.matrix';
-        $self->{site}->{matrix} = tie(%{$self->{site}->{matrix}}, 'DB_File', $matrix_site, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$matrix_site': $!");
-        
-        my $matrix_factor = $factor_index.'.matrix';
-        $self->{factor}->{matrix} = tie(%{$self->{factor}->{matrix}}, 'DB_File', $matrix_factor, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$matrix_factor': $!");
-        
-        my $matrix_reference = $reference_index.'.matrix';
-        $self->{reference}->{matrix} = tie(%{$self->{reference}->{matrix}}, 'DB_File', $matrix_reference, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$matrix_reference': $!");
-    }
-    
-    # factor
-    {
-        $self->{factor}->{data} = {};
-        tie (%{$self->{factor}->{data}}, 'DB_File', $factor_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$factor_index': $!");
-        
-        my $factor_id = $factor_index.'.id';
-        $self->{factor}->{id} = tie(%{$self->{factor}->{id}}, 'DB_File', $factor_id, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file 'factor_id': $!");
-        
-        my $factor_name = $factor_index.'.name';
-        $self->{factor}->{name} = tie(%{$self->{factor}->{name}}, 'DB_File', $factor_name, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_name': $!");
-        
-        my $factor_species = $factor_index.'.species';
-        $self->{factor}->{species} = tie(%{$self->{factor}->{species}}, 'DB_File', $factor_species, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_species': $!");
-        
-        my $factor_interactors = $factor_index.'.interactors';
-        $self->{factor}->{interactors} = tie(%{$self->{factor}->{interactors}}, 'DB_File', $factor_interactors, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_interactors': $!");
-        
-        my $factor_gene = $gene_index.'.factor';
-        $self->{gene}->{factor} = tie(%{$self->{gene}->{factor}}, 'DB_File', $factor_gene, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_gene': $!");
-        
-        my $factor_matrix = $matrix_index.'.factor';
-        $self->{matrix}->{factor} = tie(%{$self->{matrix}->{factor}}, 'DB_File', $factor_matrix, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_matrix': $!");
-        
-        my $factor_site = $site_index.'.factor';
-        $self->{site}->{factor} = tie(%{$self->{site}->{factor}}, 'DB_File', $factor_site, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_site': $!");
-        
-        my $factor_fragment = $fragment_index.'.factor';
-        $self->{fragment}->{factor} = tie(%{$self->{fragment}->{factor}}, 'DB_File', $factor_fragment, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_fragment': $!");
-        
-        my $factor_reference = $reference_index.'.factor';
-        $self->{reference}->{factor} = tie(%{$self->{reference}->{factor}}, 'DB_File', $factor_reference, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_reference': $!");
-    }
-    
-    $self->{'_initialized'}  = 1;
-}
-
-=head2 index_directory
-
- Title   : index_directory
- Function : Get/set the location that index files are stored. (this module
-           will index the supplied database)
- Usage   : $obj->index_directory($newval)
- Returns : value of index_directory (a scalar)
- Args    : on set, new value (a scalar or undef, optional)
-
-=cut
-
-sub index_directory {
-    my $self = shift;
-    return $self->{'index_directory'} = shift if @_;
-    return $self->{'index_directory'};
-}
-
-# resolve a transfac species string into an ncbi taxid
-sub _species_to_taxid {
-    my ($self, $raw_species) = @_;
-    $raw_species or return;
-    
-    my $species_string;
-    my @split = split(', ', $raw_species);
-    (@split > 1) ? ($species_string = $split[1]) : ($species_string = $split[0]);
-    
-    my $ncbi_taxid;
-    if ($species_string =~ /^[A-Z]\S+ \S+$/) {
-        SWITCH: for ($species_string) {
-            # some species don't classify so custom handling
-            /^Darnel ryegrass/ && do { $ncbi_taxid = 34176; last; };
-            /^Coix lacryma/ && do { $ncbi_taxid = 4505; last; };
-            /^Rattus spec/ && do { $ncbi_taxid = 10116; last; };
-            /^Mus spec/ && do { $ncbi_taxid = 10090; last; };
-            /^Equus spec/ && do { $ncbi_taxid = 9796; last; };
-            /^Cavia sp/ && do { $ncbi_taxid = 10141; last; };
-            /^Marsh marigold/ && do { $ncbi_taxid = 3449; last; };
-            /^Phalaenopsis sp/ && do { $ncbi_taxid = 36900; last; };
-            /^Anthirrhinum majus/ && do { $ncbi_taxid = 4151; last; };
-            /^Equus spec/ && do { $ncbi_taxid = 9796; last; };
-            /^Lycopodium spec/ && do { $ncbi_taxid = 13840; last; };
-            /^Autographa californica/ && do { $ncbi_taxid = 307456; last; };
-            /^E26 AEV/ && do { $ncbi_taxid = 31920; last; };
-            /^Pseudocentrotus miliaris/ && do { $ncbi_taxid = 7677; last; }; # the genus is 7677 but this species isn't there
-            /^SL3-3 (?:retro)?virus/ && do { $ncbi_taxid = 53454; last; }; # 53454 is unclassified MLV-related, SL3-3 a variant of that?
-            /^Petunia sp/ && do { $ncbi_taxid = 4104; last; };
-        }
-        if (! $ncbi_taxid && defined $self->{_tax_db}) {
-            ($ncbi_taxid) = $self->{_tax_db}->get_taxonids($species_string);
-        }
-    }
-    else {
-        # some species lines are poorly formatted so custom handling
-        SWITCH: for ($raw_species) {
-            # for speed, go by common first letters
-            my $first_letter = substr($raw_species, 0, 1);
-            
-            $first_letter eq 'A' && do {
-                /^Adiantum raddianum/ && do { $ncbi_taxid = 32168; last; };
-                /^Avian sarcoma virus \(strain 17\)/ && do { $ncbi_taxid = 11877; last; };
-                /^AMV/ && do { $ncbi_taxid = 11866; last; };
-                /^AEV/ && do { $ncbi_taxid = 11861; last; };
-                /^AS42|^Avian musculoaponeurotic/ && do { $ncbi_taxid = 11873; last; };
-                /^Avian myelocytomatosis/ && do { $ncbi_taxid = 11869; last; };
-                /^ASV 31/ && do { $ncbi_taxid = 35270; last; };
-                /^A-MuLV/ && do { $ncbi_taxid = 188539; last; };
-                /^Asparagus officinalis/ && do { $ncbi_taxid = 4686; last; };
-                /^Agrobacterium tumefaciens/ && do { $ncbi_taxid = 358; last; };
-                /^ALV/ && do { $ncbi_taxid = 11864; last; };
-                /^AAV/ && do { $ncbi_taxid = 272636; last; };
-                /^AKV MLV/ && do { $ncbi_taxid = 11791; last; };
-                last;
-            };
-            
-            $first_letter eq 'B' && do {
-                /^BPV-1/ && do { $ncbi_taxid = 10559; last; };
-                /^BKV/ && do { $ncbi_taxid = 10629; last; };
-                /^Bolivian squirrel monkey/ && do { $ncbi_taxid = 39432; last; };
-                last;
-            };
-            
-            $first_letter eq 'C' && do {
-                /^Cauliflower/ && do { $ncbi_taxid = 3715; last; };
-                /^Chamek/ && do { $ncbi_taxid = 118643; last; };
-                /^Candida albicans/ && do { $ncbi_taxid = 5476; last; };
-                /^CaMV/ && do { $ncbi_taxid = 10641; last; };
-                last;
-            };
-            
-            $first_letter eq 'E' && do {
-                /^Eucalyptus gunnii/ && do { $ncbi_taxid = 3933; last; };
-                /^EBV, Epstein-Barr virus/ && do { $ncbi_taxid = 10376; last; };
-                /^Eucalyptus globulus subsp. bicostata/ && do { $ncbi_taxid = 71272; last; };
-                /^Eucalyptus globulus subsp. globulus/ && do { $ncbi_taxid = 71271; last; };
-                last;
-            };
-            
-            $first_letter eq 'F' && do {
-                /^FBR MuLV/ && do { $ncbi_taxid = 11806; last; };
-                /^FBJ MuLV/ && do { $ncbi_taxid = 11805; last; };
-                /^FeLV|Feline leukemia/ && do { $ncbi_taxid = 11923; last; };
-                /^Flaveria trinervia/ && do { $ncbi_taxid = 4227; last; };
-                /^FSV/ && do { $ncbi_taxid = 11885; last; };
-                /^F-MuLV/ && do { $ncbi_taxid = 11795; last; };
-                last;
-            };
-            
-            $first_letter eq 'H' && do {
-                /^HSV-1/ && do { $ncbi_taxid = 10298; last; };
-                /^HTLV-I/ && do { $ncbi_taxid = 11908; last; };
-                /^HIV-1/ && do { $ncbi_taxid = 11676; last; };
-                /^HPV-16/ && do { $ncbi_taxid = 333760; last; };
-                /^HBV/ && do { $ncbi_taxid = 10407; last; };
-                /^HBI/ && do { $ncbi_taxid = 11867; last; };
-                /^HPV-8/ && do { $ncbi_taxid = 10579; last; };
-                /^HPV-11/ && do { $ncbi_taxid = 10580; last; };
-                /^HPV-18/ && do { $ncbi_taxid = 333761; last; };
-                /^HCMV/ && do { $ncbi_taxid = 10359; last; };
-                /^HSV/ && do { $ncbi_taxid = 126283; last; };
-                /^HSV-2/ && do { $ncbi_taxid = 10310; last; };
-                /^HCV/ && do { $ncbi_taxid = 11108; last; };
-                /^HIV-2/ && do { $ncbi_taxid = 11709; last; };
-                last;
-            };
-            
-            $first_letter eq 'M' && do {
-                /^MMTV/ && do { $ncbi_taxid = 11757; last; };
-                /^Mo-MuLV/ && do { $ncbi_taxid = 11801; last; };
-                /^MuLV/ && do { $ncbi_taxid = 11786; last; };
-                /^MSV/ && do { $ncbi_taxid = 11802; last; };
-                /^MC29/ && do { $ncbi_taxid = 11868; last; };
-                /^MVM/ && do { $ncbi_taxid = 10794; last; };
-                /^MH2E21/ && do { $ncbi_taxid = 11955; last; }; # 11955 is a species, presumably MH2E21 is the strain
-                last;
-            };
-            
-            $first_letter eq 'R' && do {
-                /^Raphanus sativus/ && do { $ncbi_taxid = 3726; last; };
-                /^REV-T/ && do { $ncbi_taxid = 11636; last; };
-                /^RAV-0/ && do { $ncbi_taxid = 11867; last; }; # should be rous-associated virus 0 variant
-                /^RSV/ && do { $ncbi_taxid = 11886; last; };
-                /^RadLV/ && do { $ncbi_taxid = 31689; last; };
-                /^RTBV/ && do { $ncbi_taxid = 10654; last; };
-                last;
-            };
-            
-            $first_letter eq 'S' && do {
-                /^SV40/ && do { $ncbi_taxid = 10633; last; };
-                /^Sesbania rostrata/ && do { $ncbi_taxid = 3895; last; };
-                /^SIV/ && do { $ncbi_taxid = 11723; last; };
-                /^Spinacia oleracea/ && do { $ncbi_taxid = 3562; last; };
-                /^SCMV/ && do { $ncbi_taxid = 10364; last; }; # supposed to be AGM isolate
-                last;
-            };
-            
-            # and lower case
-            $first_letter eq 'a' && do {
-                /^adenovirus type 5/ && do { $ncbi_taxid = 28285; last; };
-                /^adenovirus type 2/ && do { $ncbi_taxid = 10515; last; };
-                /^adenovirus/ && do { $ncbi_taxid = 189831; last; }; # 189831 ('unclassified Adenoviridae') is the closest I can get, but this has no genus and is not a species
-                last;
-            };
-            
-            $first_letter eq 'b' && do {
-                /^bell pepper/ && do { $ncbi_taxid = 4072; last; };
-                /^baculovirus, Autographa californica/ && do { $ncbi_taxid = 46015; last; };
-                /^broccoli/ && do { $ncbi_taxid = 36774; last; };
-                /^barley/ && do { $ncbi_taxid = 112509; last; };
-                last;
-            };
-            
-            $first_letter eq 'c' && do {
-                /^clawed frog/ && do { $ncbi_taxid = 8355; last; };
-                /^chipmunk/ && do { $ncbi_taxid = 64680; last; };
-                /^common tree shrew/ && do { $ncbi_taxid = 37347; last; };
-                /^cat/ && do { $ncbi_taxid = 9685; last; };
-                last;
-            };
-            
-            # and misc
-            /^NK24/ && do { $ncbi_taxid = 11955; last; };
-            /^OK10/ && do { $ncbi_taxid = 11871; last; };
-            /^Dendrobium grex/ && do { $ncbi_taxid = 84618; last; };
-            /^KSHV/ && do { $ncbi_taxid = 37296; last; };
-            /^Oncidium/ && do { $ncbi_taxid = 96474; last; };
-            /^Japanese quail/ && do { $ncbi_taxid = 93934; last; };
-            /^Nile tilapia/ && do { $ncbi_taxid = 8128; last; };
-            /^GALV/ && do { $ncbi_taxid = 11840; last; };
-            /^JCV/ && do { $ncbi_taxid = 10632; last; };
-            /^LPV/ && do { $ncbi_taxid = 10574; last; };
-            /^Py,/ && do { $ncbi_taxid = 36362; last; };
-            /^DHBV/ && do { $ncbi_taxid = 12639; last; };
-            /^VZV/ && do { $ncbi_taxid = 10335; last; };
-            /^Vicia faba/ && do { $ncbi_taxid = 3906; last; };
-            
-            /^hamster/ && do { $ncbi_taxid = 10029; last; };
-            /^sea urchin/ && do { $ncbi_taxid = 7668; last; };
-            /^fruit fly/ && do { $ncbi_taxid = 7227; last; };
-            /^halibut/ && do { $ncbi_taxid = 8267; last; };
-            /^vaccinia virus/ && do { $ncbi_taxid = 10245; last; };
-            /^taxonomic class Mammalia/ && do { $ncbi_taxid = 40674; last; }; # not a species
-            /^taxonomic class Vertebrata/ && do { $ncbi_taxid = 7742; last; }; # not a species
-            /^dog/ && do { $ncbi_taxid = 9615; last; };
-            /^parsley/ && do { $ncbi_taxid = 4043; last; };
-            /^mouse, Mus domesticus Torino/ && do { $ncbi_taxid = 10092; last; }; # 10092 is domesticus subspecies, but not the Torino strain
-            /^lemur, Eulemur fulvus collaris/ && do { $ncbi_taxid = 47178; last; };
-            /^red sea bream/ && do { $ncbi_taxid = 143350; last; };
-            /^zebra finch/ && do { $ncbi_taxid = 59729; last; };
-            /^mung bean/ && do { $ncbi_taxid = 3916; last; };
-            /^soybean/ && do { $ncbi_taxid = 3847; last; };
-            /^oat/ && do { $ncbi_taxid = 4498; last; };
-            /^pseudorabies virus/ && do { $ncbi_taxid = 10345; last; };
-        }
-    }
-    
-    $self->warn("Didn't know what species '$raw_species' was, unable to classify") unless $ncbi_taxid;
-    return $ncbi_taxid;
-}
-
-sub DESTROY {
-    my $self = shift;
-    # Destroy tied references to close filehandles
-    # and allow proper temporary files deletion
-    undef $self->{_tax_db}->{'_nodes'};
-    undef $self->{_tax_db}->{'_id2name'};
-    undef $self->{_tax_db}->{'_name2id'};
-    undef $self->{_tax_db}->{'_parent2children'};
-    undef $self->{_tax_db}->{'_parentbtree'};
-}
-
-1;
diff --git a/Changes b/Changes
index eb913ec62..5b0995bdf 100644
--- a/Changes
+++ b/Changes
@@ -123,6 +123,9 @@ be removed.
       own distribution named Bio-DB-HIV.  This also drops the bioperl
       dependency on XML-Simple and Term-ReadLine.
 
+    * The entire Bio::DB::TFBS namespace has been moved to its own
+      distribution named after itself.
+
 
 1.7.2 - "Entebbe"
 
diff --git a/t/LocalDB/transfac_pro.t b/t/LocalDB/transfac_pro.t
deleted file mode 100644
index 80e4e3566..000000000
--- a/t/LocalDB/transfac_pro.t
+++ /dev/null
@@ -1,262 +0,0 @@
-use strict;
-use warnings;
-
-BEGIN {
-	use lib '.';
-    use Bio::Root::Test;
-    test_begin(-tests           => 115,
-               -requires_module => 'DB_File');
-    
-    use_ok('Bio::Matrix::PSM::IO');
-    use_ok('Bio::DB::TFBS');
-    use_ok('Bio::DB::Taxonomy');
-}
-
-#*** need to test getting all ids of a certain kind, like $db->get_matrix_ids();
-#    but hard to do without a complete tax dump
-
-my $temp_dir = test_output_dir();
-my $tax_db = Bio::DB::Taxonomy->new(-source => 'flatfile',
-                                    -directory => $temp_dir,
-                                    -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
-                                    -namesfile => test_input_file('taxdump', 'names.dmp'));
-
-# test transfac pro (local flat files)
-{
-    ok my $db = Bio::DB::TFBS->new(-source => 'transfac_pro',
-                                   -index_dir => $temp_dir,
-                                   -dat_dir => test_input_file('transfac_pro'),
-                                   -tax_db => $tax_db,
-                                   -force => 1);
-    
-    # reference.dat
-    {
-        ok my ($ref_id) = $db->get_reference_ids(-pubmed => 16574738);
-        is $ref_id, 'RE0047775';
-        ok my $ref = $db->get_reference($ref_id);
-        isa_ok $ref, 'Bio::Annotation::Reference';
-        is $ref->primary_id, 16574738;
-        is $ref->pubmed, $ref->primary_id;
-        is $ref->database, 'PUBMED';
-        is $ref->authors, '..Bet S . ,.u i rMeK ,,d. vWeWk KaS.ee.nyNk mJMMih. a, i P';
-        is $ref->location, 'Mc (o0o.. 0n)lnir.do 2E:6l';
-        is $ref->title, 'INDD VDGT C1AALEBEI.EIT IYIHLA6ITTE E ANV  ITSL MTRTANYE TM NISP TNBAUTPOIORSL I- NVTOD,MHIRRLINSDX TRPY NO CAELUAOA SNMMNT CED5CTH NII TERTOI2IMTVPEH3DSAI';
-        
-        my @sites = $db->get_site_ids(-reference => $ref_id);
-        is join(' ', sort @sites), 'R19310 R19311 R19312 R19313 R19314 R19315 R19316';
-        my @genes = $db->get_gene_ids(-reference => $ref_id);
-        is "@genes", 'G036757';
-        my @ref_ids = $db->get_reference_ids(-site => 'R19310');
-        is "@ref_ids", $ref_id;
-        @ref_ids = $db->get_reference_ids(-gene => 'G036757');
-        is "@ref_ids", $ref_id;
-        
-        $ref_id = 'RE0047531';
-        my @matrices = $db->get_matrix_ids(-reference => $ref_id);
-        is join(' ', sort @matrices), 'M01123 M01124 M01125';
-        my @factors = $db->get_factor_ids(-reference => $ref_id);
-        like "@factors", qr/T08800/;
-        @ref_ids = $db->get_reference_ids(-matrix => 'M01123');
-        is join(' ', sort @ref_ids), "$ref_id RE0047626";
-        @ref_ids = $db->get_reference_ids(-factor => 'T08800');
-        is join(' ', sort @ref_ids), "$ref_id RE0047634 RE0047637 RE0047645";
-		
-		$ref_id = 'RE0023998';
-		my %fragments = map { $_ => 1 } $db->get_fragment_ids(-reference => $ref_id);
-		ok $fragments{'FR0002267'};
-		@ref_ids = $db->get_reference_ids(-fragment => 'FR0002267');
-		is "@ref_ids", $ref_id;
-    }
-    
-    # gene.dat
-    {
-        ok my ($gene_id) = $db->get_gene_ids(-name => 'P5');
-        is $gene_id, 'G000001';
-		
-		#*** get_genemap with ensembl lookup being fantastically slow
-        #ok defined Bio::Map::Gene->set_from_db; # will try and do ensembl lookups for gene info
-        #ok my $gene_map = $db->get_genemap($gene_id, 1000);
-        #Bio::Tools::Run::Ensembl->_stats;
-        #ok $gene_map->isa('Bio::Map::GeneMap');
-        #ok $gene_map->unique_id, 'G000001';
-        #ok $gene_map->universal_name, 'P5';
-        #ok $gene_map->species->scientific_name, 'Adeno-associated virus';
-        #my @factors = grep { $_->isa("Bio::Map::TranscriptionFactor") } $gene_map->get_elements;
-        #ok @factors, 3;
-        
-        ($gene_id) = $db->get_gene_ids(-id => 'AAV$P5');
-        is $gene_id, 'G000001';
-        my @gene_ids = $db->get_gene_ids(-species => '9606');
-        is @gene_ids, 5;
-        is [sort @gene_ids]->[0], 'G000060'; # in real data this would be G000174, but since our taxdump doesn't have chicken in it, G000060 was changed to human
-        ($gene_id) = $db->get_gene_ids(-site => 'R03174');
-        is $gene_id, 'G000001';
-        ($gene_id) = $db->get_gene_ids(-factor => 'T00267');
-        is $gene_id, 'G000060';
-		my %gene_ids = map { $_ => 1 } $db->get_gene_ids(-fragment => 'FR0002267');
-		ok $gene_ids{'G020751'};
-        # get_gene_ids(-reference => ...) already tested
-        
-        my @site_ids = $db->get_site_ids(-gene => 'G000001');
-        is join(' ', sort @site_ids), 'R03174 R03175 R03176';
-        my @factor_ids = $db->get_factor_ids(-gene => 'G000060');
-        is join(' ', sort @factor_ids), 'T00267 T08293'; # only found for genes that encode factors
-		my %fragment_ids = map { $_ => 1 } $db->get_fragment_ids(-gene => 'G020751');
-		ok $fragment_ids{'FR0002267'};
-        # get_reference_ids(-gene => ...) already tested
-    }
-    
-    # site.dat
-    {
-        ok my ($site_id) = $db->get_site_ids(-id => 'HS$IFI616_01');
-        is $site_id, 'R00001';
-        ok my $seq = $db->get_seq($site_id);
-        isa_ok $seq, 'Bio::Seq';
-        is $seq->id, 'HS$IFI616_01';
-        is $seq->accession_number, 'R00001';
-        is $seq->seq, 'aGAGACATAAGTgA';
-        my $annot = $seq->annotation;
-        is [$annot->get_Annotations('relative_start')]->[0]->value, -172;
-        is [$annot->get_Annotations('relative_end')]->[0]->value, -98;
-        is [$annot->get_Annotations('relative_type')]->[0]->value, 'TSS';
-        is [$annot->get_Annotations('relative_to')]->[0]->value, 'G000176';
-        is $seq->species, 9606;
-        
-        my @site_ids = $db->get_site_ids(-species => '9606');
-        is @site_ids, 14;
-        is [sort @site_ids]->[0], 'R00001';
-        # get_site_ids(-gene => ...) already tested
-        ($site_id) = $db->get_site_ids(-matrix => 'M00972');
-        is $site_id, 'R00001';
-        my %site_ids = map { $_ => 1 } $db->get_site_ids(-factor => 'T00428');
-        ok $site_ids{R00001};
-        # get_site_ids(-reference => ...) already tested
-        
-        # get_gene_ids(-site => ...) already tested
-        my @matrix_ids = $db->get_matrix_ids(-site => 'R00001');
-        is "@matrix_ids", 'M00972';
-        my @factor_ids = $db->get_factor_ids(-site => 'R00001');
-        is "@factor_ids", 'T00428';
-        # get_reference_ids(-site => ...) already tested
-    }
-    
-    # matrix.dat
-    {
-        ok my ($matrix_id) = $db->get_matrix_ids(-id => 'V$E47_01');
-        is $matrix_id, 'M00002';
-        ok my $matrix = $db->get_matrix($matrix_id);
-        isa_ok $matrix, 'Bio::Matrix::PSM::SiteMatrix';
-        
-        # detailed psm tests
-        {
-            # Lets try to compress and uncompress the frequencies, see if
-            # there is no considerable loss of data.
-            my $fA = $matrix->get_compressed_freq('A');
-            my @check = Bio::Matrix::PSM::SiteMatrix::_uncompress_string($fA,1,1);
-            my @A = $matrix->get_array('A');
-            my ($var, $max) = (0, 0);
-            for (my $i = 0; $i < @check; $i++) {
-                my $diff = abs(abs($check[$i]) - abs($A[$i]));
-                $var += $diff;
-                $max = $diff if ($diff > $max);
-            }
-            my $avg = $var / @check;
-            cmp_ok $avg, '<', 0.01; # Loss of data under 1 percent
-            
-            # SiteMatrixI methods
-            is $matrix->id, 'V$E47_01';
-            is $matrix->accession_number, $matrix_id;
-            is $matrix->consensus, 'ATGCATGCATGC';
-            is $matrix->IUPAC, 'NNNNNNNNNNNN';
-            is $matrix->regexp, '\S\S\S\S\S\S\S\S\S\S\S\S';
-            is $matrix->width, 12;
-            is $matrix->sites, 5;
-            ok ! $matrix->IC;
-            ok ! $matrix->e_val;
-        }
-        
-        ok my $aln = $db->get_aln($matrix_id);
-        isa_ok $aln, 'Bio::SimpleAlign';
-        is $aln->length, 12;
-        is $aln->num_residues, 132;
-        ok $aln->is_flush;
-        is $aln->num_sequences, 11;
-        my @ids = qw(R05108 R05109 R05110 R05111 R05112 R05113 R05114 R05115 R05116 R05117 R05118);
-        foreach my $seq ($aln->each_alphabetically) {
-            is $seq->id, shift(@ids);
-        }
-        is @ids, 0;
-        ok ! $db->get_aln('M00001'); # no seqs in db
-        ok $aln = $db->get_aln('M00001', 1); # force to find seqs, store in db
-        ok $aln = $db->get_aln('M00001'); # seqs now in db
-        is $aln->num_sequences, 5;
-		
-        ($matrix_id) = $db->get_matrix_ids(-name => 'MyoD');
-        is $matrix_id, 'M00001';
-        # get_matrix_ids(-site =>  ...) already tested
-        my %matrix_ids = map { $_ => 1 } $db->get_matrix_ids(-factor => 'T00526');
-        ok $matrix_ids{M00001};
-        # get_matrix_ids(-reference => ...) already tested
-        
-        # get_site_ids(-matrix => ...) already tested
-        my @factor_ids = $db->get_factor_ids(-matrix => 'M00001');
-        is join(' ', sort @factor_ids), 'T00526 T09177';
-        # get_reference_ids(-matrix => ...) already tested
-    }
-    
-	# fragment.dat
-	{
-		ok my ($fragment_id) = $db->get_fragment_ids(-id => 'FR0002267');
-        is $fragment_id, 'FR0002267'; # id and accession are the same for fragments
-		ok my $seq = $db->get_fragment($fragment_id);
-		isa_ok $seq, 'Bio::SeqI';
-        is $seq->id, 'FR0002267';
-        is $seq->seq, 'GTCTACAACACTCTTGCGGACGGAGAGCCGAAGAGCAAAGCGTCGCCGGGTAAGACGAACGCTCAAGGGGGTACGAGCAGCGTAACGACGGAAACGGTGACGCCCCGGGATTTGGGGCTCAGCTAGGGTCGCCGAGTAGGGGGCCGCGGGGACAACGGGGGCGACACGCCGCTTTCCCTGCGTCTGTGGAGCCTATGGTACGGCGTAACCGGTTGTGTGATGAACTGTCCAGACCGCACGTAGTCCCAGCGCAAGGTCTATGCCGCCTAGAGGCAAGACGGGCCGTCTCCTACTTAGTAGCCAGCTACGGGGCGTTGGTCCCCTCGGTAGTGCAACTATCCAGCCACGGCGTCCGCCGGGCTGAGCCTCAGCAGAGCTGGGGGGGTATCATTCCGACGCTGTTTAATTCGTCAGCAGGACCCACTACACGCTCTGTCATTCGCCTGAGCAGTTGTAAATTAGCGCGGCGATCTTGCAAGAGACAAGGAGGCGAACCTGGGGTCGGGACGTAAGGACGAACGGCAGTACAGACGCTGGGGGACGCCACGTGCCAGAACCTCTCACGACCGGAGGTTCAACGCTGATTGGGGCGCAACAGAGGGCGGAGCAGCGAGGTGGCGCTGGTGGGATGGGGCGAGACAAACCCAAGCTGACGCCGAAGGGCCCGCGTGGCCGGGCTGGGGCCCGTAGAACGAGGGAATTGTATGCGGCGCCTGAATGGGCGCACCACA';
-		is $seq->species, 9606;
-		
-        # -id -species -gene -factor -reference
-        my @fragment_ids = $db->get_fragment_ids(-species => '9606');
-        is @fragment_ids, 2;
-        is [sort @fragment_ids]->[0], 'FR0000001';
-        my %fragment_ids = map { $_ => 1 } $db->get_fragment_ids(-factor => 'T03828');
-        ok $fragment_ids{'FR0002267'};
-        # get_fragment_ids(-gene => ...) already tested
-        # get_fragment_ids(-reference => ...) already tested
-        
-        my ($factor_id) = $db->get_factor_ids(-fragment => 'FR0002267');
-        is $factor_id, 'T03828';
-        # get_gene_ids(-fragment => ...) already tested
-        # get_reference_ids(-fragment => ...) already tested
-	}
-	
-    # factor.dat
-    {
-        ok my ($factor_id) = $db->get_factor_ids(-id => 'T00001');
-        is $factor_id, 'T00001'; # id and accession are the same for factors
-        ok my $factor = $db->get_factor($factor_id);
-        isa_ok $factor, 'Bio::Map::TranscriptionFactor';
-        is $factor->id, 'T00001';
-        is $factor->universal_name, 'AAF';
-        is $factor->known_maps, 1;
-        my @positions = $factor->get_positions;
-        is @positions, 1;
-        
-        ($factor_id) = $db->get_factor_ids(-name => 'AAF');
-        is $factor_id, 'T00001';
-        my @factor_ids = $db->get_factor_ids(-species => '9606');
-        is @factor_ids, 7;
-        is [sort @factor_ids]->[0], 'T00001';
-        @factor_ids = $db->get_factor_ids(-interactors => 'T03200');
-        is [sort @factor_ids]->[0], 'T00002';
-        # get_factor_ids(-gene => ...) already tested
-        # get_factor_ids(-site => ...) already tested
-        # get_factor_ids(-matrix => ...) already tested
-        # get_factor_ids(-fragment => ...) already tested
-        # get_factor_ids(-reference => ...) already tested
-        
-        # get_*_ids(-factor => ...) already tested
-    }
-}
-
-# how to get something like ok $psmIO->release, '10.2--2006-06-30'; ?
-# or all factors, all sites, all matrices, all genes etc.?
diff --git a/t/data/transfac_pro/factor.dat b/t/data/transfac_pro/factor.dat
deleted file mode 100644
index b4b0eeec8..000000000
--- a/t/data/transfac_pro/factor.dat
+++ /dev/null
@@ -1,342 +0,0 @@
-VV  TRANSFAC FACTOR TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH
-XX
-//
-AC  T00001
-XX
-ID  T00001
-XX
-DT  e)ce.; r1916d69(9. wiaet.0
-CO  Copyright (C), Biobase GmbH.
-XX
-FA  AAF
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-SF   isAi; tlaorFGm
-XX
-FF   ibbhef -n nty25(ntu1licb-dd iad,r0h -ayeiorp'i);n Aee3P
-XX
-BS  R02116; AAF$CONS; Quality: 6.
-BS  R03064; HS$GBP_02; Quality: 6; GBP, G000264; human, Homo sapiens.
-XX
-DR  2 PM0H.03N0AS6:0TOTRA40
-XX
-RN  [1]; RE0000446.
-RX  PUBMED: 1901265.
-RA  rw oltL.k e, J. eJJi.iewrD D.re , M cT.h ,ElnDack.
-RT  tovtAcipaeu,bna aado F   ii -enIrarFl-smGongagifaclyCmdNfgmDAitN-ttn aco
-RL  9 3EMB11(O)-2. 9:9.2J19 70
-RN  [2]; RE0001471.
-RX  PUBMED: 1833631.
-RA  Je rrTel D,elJwcD . ..,. e D.a kLnE
-RT  chobslarroe neyhertin mtarg tlrtittnnfoidutnacftna niidttarpsnenaeouatodsii cw vt ytagh Tgon-eninopieti- tuyiaooisdgppi taebnas tpdn-ln   ceafnwrcen  eo
-RL  l111l.55M 9 -l41lB(. e 9oi.)o7:.3511C
-XX
-//
-AC  T00267
-XX
-ID  T00267
-XX
-DT   1e.a5;)e291i9 0.de1wrt(c.
-DT  pk21;a d2...51(0te0 )oe2du
-CO  Copyright (C), Biobase GmbH.
-XX
-FA  GATA-1
-XX
-SY  EF1; EFgammaa; Eryf-1; Eryf1; GF-1; NF-E1; NF-E1a.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; aves; neornithes; neognathae; galliformes; phasianidae
-XX
-GE  G000060; GATA1.
-XX
-MX  M00203; V$GATA_C.
-MX  M00789; V$GATA_Q6.
-MX  M00126; V$GATA1_02.
-MX  M00127; V$GATA1_03.
-MX  M00128; V$GATA1_04.
-MX  M00346; V$GATA1_05.
-MX  M00347; V$GATA1_06.
-XX
-BS  R08219; CHICK$H5_09; Quality: 4; H5, G000063; human, Homo sapiens.
-BS  R08220; CHICK$H5_10; Quality: 4; H5, G000063; human, Homo sapiens.
-BS  R08221; CHICK$H5_11; Quality: 4; H5, G000063; human, Homo sapiens.
-BS  R08222; CHICK$H5_12; Quality: 4; H5, G000063; human, Homo sapiens.
-BS  R08225; CHICK$H5_15; Quality: 4; H5, G000063; human, Homo sapiens.
-BS  R08229; CHICK$H5_19; Quality: 3; H5, G000063; human, Homo sapiens.
-BS  R04798; GATA1$CONS_02; Quality: 6.
-BS  R04799; GATA1$CONS_03; Quality: 6.
-XX
-//
-AC  T00428
-XX
-ID  T00428
-XX
-DT  e1 ci(50)2r.9.a e9.t;dw1e1
-DT  ;124a0u0 p) (12m..d.t4aedv
-CO  Copyright (C), Biobase GmbH.
-XX
-FA  ISGF-3
-XX
-SY  E factor; factor e; ISGF-3.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-MX  M00972; V$IRF_Q6_01.
-MX  M00258; V$ISRE_01.
-XX
-BS  R07792; AS$ISRE_03; Quality: 6.
-BS  R07793; AS$ISRE_04; Quality: 6.
-BS  R07794; AS$ISRE_05; Quality: 6.
-BS  R07795; AS$ISRE_06; Quality: 6.
-BS  R07796; AS$ISRE_07; Quality: 6.
-BS  R07797; AS$ISRE_08; Quality: 6.
-BS  R07798; AS$ISRE_09; Quality: 6.
-BS  R07799; AS$ISRE_10; Quality: 6.
-BS  R07800; AS$ISRE_11; Quality: 6.
-BS  R07801; AS$ISRE_12; Quality: 6.
-BS  R07802; AS$ISRE_13; Quality: 6.
-BS  R07803; AS$ISRE_14; Quality: 6.
-BS  R07804; AS$ISRE_15; Quality: 6.
-BS  R02402; HS$IFI_03; Quality: 6; IFI-56K, G000305; human, Homo sapiens.
-BS  R00001; HS$IFI616_01; Quality: 6; IFI-6-16, G000176; human, Homo sapiens.
-BS  R00003; HS$IFITM1_01; Quality: 6; IFITM1, G000179; human, Homo sapiens.
-BS  R00947; HS$ISG15_03; Quality: 6; ISG15, G000325; human, Homo sapiens.
-BS  R00949; HS$ISG15_05; Quality: 6; ISG15, G000325; human, Homo sapiens.
-BS  R00950; HS$ISG54_01; Quality: 6; ISG54, G000326; human, Homo sapiens.
-BS  R02188; ISGF3$CONS_01; Quality: 6.
-BS  R04607; ISGF3$CONS_02; Quality: 6.
-XX
-//
-AC  T00526
-XX
-ID  T00526
-XX
-DT  (; w11ie95.e).a.tc9re21 d0
-DT  aa;u.t(ek20d73000. p). 1ud
-CO  Copyright (C), Biobase GmbH.
-XX
-FA  MyoD
-XX
-SY  MEF1; Myf-3 (human); MyoD; MyoD1.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; rodentia; myomorpha; muridae; murinae
-XX
-GE  G000576; MyoD1.
-XX
-MX  M00804; V$E2A_Q2.
-MX  M00973; V$E2A_Q6.
-MX  M01034; V$EBOX_Q6_01.
-MX  M00001; V$MYOD_01.
-MX  M00184; V$MYOD_Q6.
-MX  M00929; V$MYOD_Q6_01.
-XX
-BS  R02452; HS$AAC_20; Quality: 6; ACTC, G000193; human, Homo sapiens.
-BS  R02418; HS$MLC_01; Quality: 2; MLC, G000347; human, Homo sapiens.
-BS  R02419; HS$MLC_02; Quality: 2; MLC, G000347; human, Homo sapiens.
-BS  R00019; MOUSE$ACRD_02; Quality: 6; AChR delta, G000457; human, Homo sapiens.
-BS  R00850; MOUSE$IGH_16; Quality: 6; IgH, G000537; human, Homo sapiens.
-XX
-//
-AC  T03200
-XX
-ID  T03200
-XX
-DT  .o);a. r30mdc(0. 200eeth02
-CO  Copyright (C), Biobase GmbH.
-XX
-FA  ASH1
-XX
-OS  human, Homo sapiens
-OC  Eukaryota; Fungi; Ascomycota; Hemiascomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces.
-XX
-GE  G004277; ASH1.
-XX
-CL  3Cn.;i0 z40
-XX
-SZ  ;a)7 8.Dk . (a 5)(gAAe6l85n cce.
-XX
-SQ  NASAHHSKALRPFMYLSLLTSDSINNNNNLSSYYGSPDSSKSNSDSTIIYPNRLIPLANT
-SQ  STSFLSNGSITLHLLNVANIHNEIPAPSHQKLASSIYKNFLPMSPSDSGISPSNAEPSKP
-SQ  LLRRGLPLSRRVLSLLPGQDNQPFLSQNNNHLSLPSYSGRPSLFSSSELELQLPSPQTPK
-SQ  DKDKLSRTKLNNFVNFNWYARNCNTESFQKTYDPRGYDAVNNSPHIQLASWEPDTTKHIT
-SQ  PKEKIKFPPTHIALTESVKISNLQTSDRSFPNWSRTALSRSPEPESSPSNPTMKFKLDTD
-SQ  NIFVILFLNSKESMSFLKHHKPRQPNDSESKSRGTMQNAEVEVFRGASNNTRPTIKEGQS
-SQ  NAKPTKKVRGSNSLRVRKSSIKKDLVAKVLLQSFNNLNSSNAMSNSYIQKNQLFKASDIA
-SQ  KYRTRPQSPSSYSRSKSISMRSGSPTSPPLSNHSASPSRKPTDPASTASPPRKRFPNVSS
-SQ  YSCNVSSGPKRKKSLQNHHCRSKRWQGSCRLDSSTRLSVGPTTRSRTPHTCPSWRDDCCS
-SQ  YNDNKRPCYERGENERVFIFECPKCLCIGTIGTNSLEIEVKMKINIET
-XX
-IN  T00002; ACE2; human, Homo sapiens.
-IN  T00776; SWI5; human, Homo sapiens.
-XX
-RN  [1]; RE0014319.
-RX  PUBMED: 9219698.
-RA  sn   R H.a nJ igR  z .ne ,. mn ,-ea,o..MnL,.zgthn. ogG,eNMl.PrsaXSIeKy.R
-RT  ytitAenzln ingAwcaiS aolpccl m HNleitetaitan stsmoR1i oietancfMysob ogmyr  d ryh 
-RL  7n37(87 :S7c3c)3.92 i9e-e81
-RN  [2]; RE0014327.
-RX  PUBMED: 10409653.
-RA   tY. cl.,.J ne Hi.i.JMYl dBDr,uS ma 
-RT   a ostwsDcipit ntnrichon cda5foongivAt crnsso  apeigaeicideinera ftfutreqrtnirti o2  fe creec r iaeSs
-RL  )i2e.9J1 h22C:3  o9 0401.729(.9-B1ml.6
-RN  [3]; RE0014330.
-RX  PUBMED: 10212145.
-RA  -n.e  Peu h tr.,RaJa, eS..ocSsunnCMw
-RT  cds o mtsttonMnsd ha eie tpmcrossoypyiVsaR anetrg hs So woleAi4N sidi  ninhegod n eacleA apfieeobl s yssi dnune
-RL  )l(119.95S91Ce 28J i l1 c.-11:15.1
-RN  [4]; RE0014360.
-RX  PUBMED: 10209099.
-RA  IoBsunS.e.  G.,o,mam .A. N zn oo vlhanBe ,s hnUyKozt
-RT  na osyteiir1svitln toan osrcA eruA msyiaath meAlcoslnizltausH scnm nevltlioSaee1ano nldnReNpatteultps i r r 
-RL  1 i.(: r5rC.92)03u9B743o9l3. -
-RN  [5]; RE0014361.
-RX  PUBMED: 10319811.
-RA   ,.mCa,aa K MyTa nkP.mNs hoTts ..a
-RT  let   iodvOceoleenlnettr-loceyo r rlge pydliiamerio l afteltnneo gtardcpoefdathusrpan idracomcr ntmc nmaesd on detrmrrut reac
-RL  ll 3C1)9192-:. 799e91(9
-RN  [6]; RE0015933.
-RX  PUBMED: 8625409.
-RA   s HIzo.wtelk,r AiSi.
-RT  lfocffhmneni dqnc al1yo tyrei-oieaiOpteetdr.laioac iiipce d sr t e raospug elezied afsermcsltenAnIttarefctinios gn Hahemrn,, ity
-RL  e9 .-6172781 l92l(14)C:
-XX
-//
-AC  T03828
-XX
-ID  T03828
-XX
-DT  d(b50ma).0 c.290ert0 ;.1ee
-DT  s.k2;d)ua1.2 02.pa7td( e00
-CO  Copyright (C), Biobase GmbH.
-XX
-FA  HNF-4alpha
-XX
-SY  hepatocyte nuclear factor 4; HNF-4; HNF-4A; HNF4; NR2A1.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-GE  G001926; HNF4A; HGNC: HNF4A.
-XX
-IN  T05295; PGC-1; human, Homo sapiens.
-IN  T05296; PGC-1; human, Homo sapiens.
-IN  T05271; Smad3:Smad4; human, Homo sapiens.
-IN  T04096; Smad3; human, Homo sapiens.
-IN  T04292; Smad4; human, Homo sapiens.
-IN  T01560; SREBP-2; human, Homo sapiens.
-XX
-MX  M00762; V$DR1_Q3.
-MX  M00764; V$HNF4_DR1_Q3.
-MX  M00967; V$HNF4_Q6.
-MX  M01031; V$HNF4_Q6_01.
-MX  M01032; V$HNF4_Q6_02.
-MX  M01033; V$HNF4_Q6_03.
-MX  M00638; V$HNF4ALPHA_Q6.
-XX
-BS  R13037; HS$AKR1C4_01; Quality: 3; AKR1C4, G004794; human, Homo sapiens.
-BS  R15850; HS$AMBP_01; Quality: 3; AMBP, G013496; human, Homo sapiens.
-BS  R15854; HS$AMBP_02; Quality: 3; AMBP, G013496; human, Homo sapiens.
-BS  R20250; HS$AMBP_03; Quality: 3; AMBP, G013496; human, Homo sapiens.
-BS  R15917; HS$CYP27A1_01; Quality: 3; CYP27A1, G013536; human, Homo sapiens.
-BS  R19630; HS$CYP27A1_05; Quality: 3; CYP27A1, G013536; human, Homo sapiens.
-BS  R15907; HS$CYP3A4_06; Quality: 3; CYP3A4, G002572; human, Homo sapiens.
-BS  R15908; HS$CYP3A4_07; Quality: 3; CYP3A4, G002572; human, Homo sapiens.
-BS  R19198; HS$CYP3A4_11; Quality: 1; CYP3A4, G002572; human, Homo sapiens.
-BS  R15916; HS$CYP8B1_01; Quality: 3; CYP8B1, G013537; human, Homo sapiens.
-BS  R13034; HS$F11_01; Quality: 3; F11, G004793; human, Homo sapiens.
-BS  R13059; HS$F7_01; Quality: 3; F7, G004821; human, Homo sapiens.
-BS  R15921; HS$GK_01; Quality: 6; GK, G013544; human, Homo sapiens.
-BS  R15922; HS$HO_07; Quality: 3; HO, G000287; human, Homo sapiens.
-BS  R19255; HS$TCF1_01; Quality: 1; TCF1, G004661; human, Homo sapiens.
-BS  R20585; MOUSE$CYP2A5_01; Quality: 3; Cyp2a5, G021669; human, Homo sapiens.
-BS  R13067; RAT$CPT1A_02; Quality: 1; Cpt1a, G000721; rat, Rattus norvegicus.
-XX
-BR  FR0001534; Quality: 6; Species: human, Homo sapiens.
-BR  FR0001535; Quality: 6; Species: human, Homo sapiens.
-BR  FR0001536; Quality: 6; Species: human, Homo sapiens.
-BR  FR0002267; Quality: 6; Species: human, Homo sapiens.
-XX
-//
-AC  T08800
-XX
-ID  T08800
-XX
-DT  24ac06s0(0 0l;e.er..atd 4)
-DT  a1mvd .0u.)1ea6d202 ;2pt(.
-CO  Copyright (C), Biobase GmbH.
-XX
-FA  Nanog
-XX
-SY  2410002E02Rik; Embryonic stem cell specific homeobox protein; ENK; ES cells cDNA, RIKEN full-length enriched library, clone:2410002E02 product:Nanog homeobox, full insert sequence; Nanog; Nanog homeobox.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; rodentia; myomorpha; muridae; murinae
-XX
-GE  G031512; Nanog.
-XX
-CL  h0e o06omC;.0
-XX
-TY  c.aibs
-XX
-HP  N)8m(. nT9g0;a96o
-XX
-SZ  .3.cD N((5)k 3 AAc . alc2AD0;)4a
-XX
-SQ  EPGATSYGTEMLPPASHSHMASNSMGSESSEGLQLPCPLENADAAPESNSSVLRPCSVFS
-SQ  EQQQKLSSKLPASEEDSSMEARLGKEVFPYTKQKDLVCRKSNQTKAKAQFQDLKGPRPSP
-SQ  QQKSSQSSNGRLSWQSQRLEKVTLLTKNILNYNIEKWWMYKAKLPPVCLQQVIHQKFGQS
-SQ  GTSLWGCASWNWSPSTASTWYNNTSWGSTATSNNTGWLPWQQMQWNWVQNQSTYPQTPST
-SQ  NAHDLHENTLLAQQEQSEEFSPGSHQFLSNFQSFLPLTPPVYLARQEAYFLNAVVWANDT
-SQ  PPEGI
-XX
-SC  A5f:laBnME743otBdrm0t s9Lrae 
-XX
-FT  aon 1p N0 . o e  <T r   2 8 etnt in5s 7  89>gan 1 
-FT  .i l  e1 9 4110 [x H ] 1   5 
-FT  4 1 2   ]3   x  e i23 Hl[1.1 
-FT  33. 1x li     4  [] e H173  5
-XX
-SF  in1mrncn-ta dngni;poai[e o]oemiotoh
-SF  aN-ustnc1o  N 2enle;s rgeeannaerobhns pm eutth5  qagii t
-XX
-CP  ltsencaey le2epd i.x  clsmy ei]il omnbfslptierleiu[ppiarr enscscto
-CN  aosl e,tue netmr, ,,entniysls, s[be y,keme] sc[aevl b ,o gu,yhhrt,viiihpk e,m scpe, e ,inksrveinal gikt3 reoe  milomw,pin r ;oao nl  t s s,e,,rsimt u ,nylih,, anf snyavbatusc ,,i],,,vtdsrude,ktlina soarlm,lued ncereoundcelevsa rirhuue.gysey4oe
-XX
-FF  ld ]crlroe3steaMslnoo et c ar yebi p;siilItnn)  dui tS c emysfmni(ypl(cin l nacc mer)c[aegnEirrnuCl
-FF  tnEemLioaea  ;tl-es  wbiaSItNolihnn[i 3uo e haF  srnt]yh wtfaialtg
-XX
-MX  M01123; V$NANOG_01.
-XX
-DR  T0TMO0N7HPS.708A :A098R
-DR  MumonaH PK.SanNoBg;u: Ls :De
-DR  A 97;5BA44BBML73E. 0:3095
-DR  A;MYB5Y  .1285:782EL99A71
-DR  :WR5TI5MTST8O_.SNQN77 EQS8PUOS; 
-DR  O ZZ4QSM;6.8U06SE_P0WQ:SO4IS TR8
-XX
-RN  [1]; RE0047531.
-RX  PUBMED: 16518401.
-RA  ou L ., L HiL.,. ,huL J QYb   ,eBhLYn.  , .,a DCK, Rohht..  ..iPe ,es. gzmW, e.KV WaRq.X..K,oo.K u,J.    WnL iYCotBGag v. V. nna,P V,givnu.,L uG . hn    WS. .oa..W o, urnL p,LeB n,. Z uH.H. h .i W eehg,,e wCC . esn,etLiN,eo. Wg cg.Z o. .,BorJo.nXCg u.AS
-RT  alcononir r muumrt.g y seinleo tontorNecnet  keaml pc 4esngdsilpehatirbaT  cwOcenn titypsoures
-RL  024 .6-0t..4Nn:a4 8G)(3e1t30 e
-RN  [2]; RE0047634.
-RX  PUBMED: 15743839.
-RA   HK  ditoja, SNa,T  .r,oati,onMd  umsu..kHT  .Y,N,aao,.d.aTr u utSKruH aaa.KmeHTaa. b . i
-RT  toortrNpndl nfaaseie eo ertenr p ireemr ra al tg g xfionesema otdcrueno.leneiag oxscuaniirqsS Ocsn
-RL   M0B2o)5:l(. .5 -C4.i222l58el7 o50l4.
-RN  [3]; RE0047637.
-RX  PUBMED: 12787504.
-RA  mki o.awrei.w aM,Synaia zMk Kk. u.r,,K iau...M t,a,.ud  ea . TKaa m YaaSTa u , gsHaMksaMaa,hmahhIo tYaM,M
-RT  nshintNeefeie upp racrengnnooc uatenSelehsfem lm oriucTendqtymrila  os  nso it.   oipEldaoapiobre 
-RL  0402(-3 1ll6)e3 C1216:3.
-RN  [4]; RE0047645.
-RX  PUBMED: 12787505.
-RA  bhc  o,mei sMo ,eSNywe ,.CAbSsh oaIel..mr,  s.TriSte,e, n DdLJ e.bht ..oCil R
-RT  lloogtria xygninfmmnucsciouletaa o tc etssuncs ltlcri  coeronaFg.eNiinonieoe f rppnypnn bsa,sni  
-RL  1210l: e6)04l36(C353- 5.
-XX
-//
diff --git a/t/data/transfac_pro/fragment.dat b/t/data/transfac_pro/fragment.dat
deleted file mode 100644
index b5133cf49..000000000
--- a/t/data/transfac_pro/fragment.dat
+++ /dev/null
@@ -1,111 +0,0 @@
-VV  TRANSFAC FRAGMENT TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH
-XX
-//
-AC  FR0000001
-XX
-ID  FR0000001
-XX
-DT  5.0.h2el(ct2a)k0. ;055d re
-DT  tp5. )00m0e22 d5d;ul.a(k.5
-CO  Copyright (C), Biobase GmbH.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-SQ  TGGGCTCCTGGCATCGTTTACGCGAGCTGGTTCCAGGAAAGGCACCTCGGTTCATGCGAG
-SQ  GCGCTGATTCTATTGTGTAAACTGTTTCGAGTGACTAGGCCACGAAACCTTTTCATGTAA
-SQ  GGTGAATTGGGGGTTTTCATTTAACGCGGGGTACGGTTCGTTGGTGGGAAGTGGTTTCGG
-SQ  ATGTAACGTGGTACGTCGGATATATTGCATGGATTAATACGAAGACGCTCGAGAAAGTCG
-SQ  AGAGTACAGAAAGTTAAACCGCGGGTTTACGAAAGCATAACTAAAGAAGGTAAATTGATA
-SQ  GATTCACAGAATGCGGTTGTATGAAACCTATTGACAGCGTCCGTTCGTGCGTTAGATGGT
-SQ  GTTCTGCGGGGTTCAATATCATGTGAGCGGTCGTGAGGGCAGTTATCGATACTACATGTG
-SQ  TCGGGGGGCATGCGCTCACCGACTTATCATGGCGGGTACGAACGAACGGGAATCGGATTG
-SQ  AGGAGGAGCAAAGGATTAGAATTAAAGAGCGGACTTACTGATGGTTCAGACCGTCGTCTT
-SQ  ACAACAACATCGTATGCTACTGGGAAGTATCAGCTCATCGGGTGGCGATCACGAGAATCG
-SQ  AAACATACATTGTCTATGTACTGGCGGATCAAAACCCATTCAGTATATACTAGGTTTTTG
-SQ  TTCGGACTAGCTGGCTGTGCGTACGCCATTGGAGACAACAGTCCGTGGGCAATGCTACAT
-SQ  AAGTGGGATTGGCGACTGTGGTCGCCCAGACAACCGGAGCAAGACTGTCCTGTTTCCGGA
-SQ  CTTAGGGGAGTAAGGCGCTACAGCTAGGGATCATTAGGCAGAATAGGCAATAATCAGCAT
-SQ  TTTGACTGGGTTATATCACAACCAAACGGGTGTAGGAGGGGCCGAGATCATATAGTGAGG
-SQ  GAGATTTTCCTTGTATGTAGGCTGCATATTATCATTACGGACGTGACTTATTAAATCCAG
-SQ  TATTCATATAACACGAACCCTACCAGAGTGAGTCGAACCTAGTGTTTTGATAAGTGTAAT
-SQ  TAAATTCTTGCTCGTCGCTTAATTGTGGACCATGTACAGTGATGTACTCATTTAATCGGG
-SQ  TTTGCTCGATGGGCACACTGGCCGCAAAATTAGGGTTACTCCTCAGTAGCCAT
-XX
-SC  uRr. d0.)140H 1R1O6iF.621W40D.6(AS_620C:lh7y0a8MA1 7B 1
-XX
-BF  T00140; c-Myc; Quality: 6; Species: human, Homo sapiens; Cellular source: 0123, Jurkat.
-XX
-MM  h hruCgnded -a-IntiehPothilo re-cysyacginriloo
-XX
-DR  (:94B9L3AM:39F3)8; .5114E.12 80 695
-RN  [1]; RE0035138.
-RX  PUBMED: 14980218.
-RA  o,r.emWCVmaTkog.J,r Tk. kwls a  o,r laWtneSsihB, at ek aPmrG   aSlnRR.m ,l. eA knK Soa..hu   nn.SuiAlHl,nn,  BeEe.PKGi.bA iKviaraCea.l ,e,Mahw taeeJ, ygp..,r . o.nS,a B,aH,a t oWk Y ., .S ipnrS  cDP eg H,vNb..e,..l,ngcJDegma, H  m rahen.i.ceroe n.
-RT  iedgmisnnrNdngcpbor smr2ittlpocasiep.ei ogdanoft gAaoo nt2Uud ndp i iasfpu r taw  mo n ninsnsfngnlaoimtboot2cec  dR er anarn o hsosoiieas1h
-RL  -10.542(69 9C)el0l1: 049
-RN  [2]; RE0035139.
-RX  PUBMED: 11988577.
-RA    k a,rv    eeE.k.G rePaolL,gnJrR,rgvwBKsr,.S na r .ebuoPai..n., a,Fr .. eDsTwo od yStn..ipR oaSCS
-RT  ntcaostccid sir1ntra  aevyo rn-la msago.mlLtr p2o2esehina 2ici
-RL  e01ic0269 2 S6ec:-1992n.9()
-XX
-//
-AC  FR0002267
-XX
-ID  FR0002267
-XX
-DT  )er2.. 0;00aet9cd(0hl5 .k7
-DT  0m00  07.p.2dl;ae)5(9.ktud
-CO  Copyright (C), Biobase GmbH.
-XX
-DE  Gene: G020751; Gene: G020752.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-SQ  gtggcgggcctctataacgtacacggctccgctgggccttcccgctgtaaaccgggaatt
-SQ  acgtcgagaggtgcaaggtgaaaccaccgactgaccccggccggagaccggagcagcgca
-SQ  ctccgtcccaactacatgctgttcctgccacatgcaagcgttccctagtgaatctcgcgg
-SQ  cgggactggcgcattacgcgcactccggtttcgagcgcggtcacaaccttacgttttacc
-SQ  gcgttggttatgaccggtggtatgcggcgccggtgctgggcagatcagggtggtttcgct
-SQ  gtcccattcatgcctccgatttctccactccaacgtctcctcagatgttacctcgtcatg
-SQ  atcgcatacggtctgcgtagtatgggtcaccgatccggaatgcacgcgatgctgagcgtt
-SQ  atgtacgcccgaccgtgcctgcttaccccccgcgcggaagacggcggctgttgcgttctg
-SQ  GTCTACAACtGaCctActGcCtCAAGCCCcGATcCCcCctgGCAAcCGcTcGAaCcTCGt
-SQ  ACTCTTGCGGACGGAGAGCCGAAGAGCAAAGCGTCGCCGGGTAAGACGAACGCTCAAGGG
-SQ  GGTACGAGCAGCGTAACGACGGAAACGGTGACGCCCCGGGATTTGGGGCTCAGCTAGGGT
-SQ  CGCCGAGTAGGGGGCCGCGGGGACAACGGGGGCGACACGCCGCTTTCCCTGCGTCTGTGG
-SQ  AGCCTATGGTACGGCGTAACCGGTTGTGTGATGAACTGTCCAGACCGCACGTAGTCCCAG
-SQ  CGCAAGGTCTATGCCGCCTAGAGGCAAGACGGGCCGTCTCCTACTTAGTAGCCAGCTACG
-SQ  GGGCGTTGGTCCCCTCGGTAGTGCAACTATCCAGCCACGGCGTCCGCCGGGCTGAGCCTC
-SQ  AGCAGAGCTGGGGGGGTATCATTCCGACGCTGTTTAATTCGTCAGCAGGACCCACTACAC
-SQ  GCTCTGTCATTCGCCTGAGCAGTTGTAAATTAGCGCGGCGATCTTGCAAGAGACAAGGAG
-SQ  GCGAACCTGGGGTCGGGACGTAAGGACGAACGGCAGTACAGACGCTGGGGGACGCCACGT
-SQ  GCCAGAACCTCTCACGACCGGAGGTTCAACGCTGATTGGGGCGCAACAGAGGGCGGAGCA
-SQ  GCGAGGTGGCGCTGGTGGGATGGGGCGAGACAAACCCAAGCTGACGCCGAAGGGCCCGCG
-SQ  TGGCCGGGCTGGGGCCCGTAGAACGAGGGAATTGTATGCGGCGCCTGAATGGGCGCACCA
-SQ  tCAgTCTTaGcacTCcCGCATTcTtcATcACaCtcACcCAtctAaggGAAcTaCGCCgGT
-SQ  gctaagcaaggagtcgatcaacggaaaggtgctaggggtaactgggtcttgcgcggcagc
-SQ  cgtgtactcggcttgtaacgacaccgcgactgtaagcgccctacttgcgtatattaacgt
-SQ  aatgttttagtatagttcgttttatctcatctcatgagcttgttttgttgtgtctcgttt
-SQ  tcgttttttttattttgtttttcgtttttttttttctttgtgtctatcgttcattttttt
-SQ  ttatagtggacgcccctgggtagtgcgctcgggcctgtcggagatctcgtccaacgtctc
-SQ  tatcactgctagtgttgccaagcttgctgtaactgaagcggacagtcccacccccactcc
-SQ  ttaagtaagtagacttattccgtttgttttccgttagcgtcccgaagcggtttgaataac
-SQ  ctcgtgcacccgaatactgctatacacatgactaaccaggtcctcccgtgtgcccc
-XX
-SC  30997.0aADHBrM346. d68 .9(y4 iS:W.2lA2F2RC hROu4_77)48
-XX
-BF  T03828; HNF-4alpha; Quality: 6; Species: human, Homo sapiens; Cellular source: 2614, hepatocytes.
-XX
-MM  yCrtea IropnPhr-or-moa
-XX
-DR   M8C081A09:(78:1E 1.6;8L488)B3 
-RN  [1]; RE0023998.
-RX  PUBMED: 14988562.
-RA  rgSM,k .z, .E l.dBopf dn. ..,,f.A..seL  bo .aG. mHKJ l.e.e T  R.tPeR , rlIOAeoGy ,.lG DneZ.l Gc.o B.lJF rNdfe, i,.il Wh rR,erd,,oB.Tro.n ana  li rr. D  YeliL ,erN uVg or. Diuk 
-RT  aF lnn ee tscfH bt  eNarseeorc npsarr prnsCsdnaioooei vonnlit partogycir.xf
-RL  7i3)1S088e 1:n4c0-ce20(3.1 33
-XX
-//
diff --git a/t/data/transfac_pro/gene.dat b/t/data/transfac_pro/gene.dat
deleted file mode 100644
index aea978a19..000000000
--- a/t/data/transfac_pro/gene.dat
+++ /dev/null
@@ -1,349 +0,0 @@
-VV  TRANSFAC GENE TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH
-XX
-//
-AC  G000001
-XX
-ID  AAV$P5
-XX
-DT  )b5oed40(.t9d.;161 ra .9ec
-DT  00e.ad0.mu1(vta.2p9d0 )1; 
-CO  Copyright (C), Biobase GmbH.
-XX
-SD  P5
-XX
-OS  AAV, adeno-associated virus
-OC  viridae; ss-DNA nonenveloped viruses; parvoviridae; dependovirus
-XX
-BC  .6.36
-XX
-BS       -60        0   R03174; AAV$P5_01; Binding factors: YY1 <T00915>.
-BS       -50        0   R03176; AAV$P5_03; Binding factors: 96K-protein <T00453>, factor 2 <T00282>.
-BS         1        0   R03175; AAV$P5_02; Binding factors: YY1 <T00915>.
-XX
-DR  .0H R0GN:P100TTA0SA
-DR  109;L2JX.X 1 E0B:M
-XX
-RN  [1]; RE0006609.
-RX  PUBMED: 8413258.
-RA  ,oSMa   oCo. ,.w.matehKLysCH K.cn  Prirott. hyk ,.Aa
-RT  lnafrtnatYrfettincurvmcipelmmsos A ttet ciittctihaocreYrcndr   s pe N ss1mt ci:te e1 oer tpa npiatioo riDYelsorfaeensane wet loiYia-eironnctaptTAt oe rcctoislanp tpinf rhettr
-RL  18.o -B3l9e .MC2)36ll1(..9166l26 o :i
-RN  [2]; RE0000230.
-RX  PUBMED: 1655281.
-RA  n- h S ..   SCo.ETY,h.SntaS,ge eih.,kL
-RT  fyaAdoh b dcalt1ernre- pirte u ,neTosrruseteEsra  olGYsnas rpibapne Kn-eeatonp lrlm u  ivoeLnyneonpeYfiripr1i,ae rdrsiIopi 
-RL  3713e9:l81C77. 9l8( )-6
-XX
-//
-AC  G000060
-XX
-ID  CHICK$GATA1
-XX
-DT  5a0) .b1 e.r9d.;c9to16(d4e
-DT  1;doa22. 0e50k 1.u)te(pd2.
-CO  Copyright (C), Biobase GmbH.
-XX
-SD  GATA1
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; aves; neornithes; neognathae; galliformes; phasianidae
-XX
-BC  622..4..1
-XX
-BS      -643     -634   R03087; CHICK$GATA1_01; Binding factors: GATA-1 <T08293>.
-BS      -592     -572   R03088; CHICK$GATA1_02.
-BS      -568     -555   R03089; CHICK$GATA1_03.
-BS      -485     -471   R03090; CHICK$GATA1_04.
-BS      -442     -429   R03091; CHICK$GATA1_05.
-BS      -409     -389   R03092; CHICK$GATA1_06.
-BS      -373     -349   R03093; CHICK$GATA1_07; Binding factors: GATA-1 <T08293>.
-BS      -257     -239   R03094; CHICK$GATA1_08.
-BS      -235     -221   R03095; CHICK$GATA1_09.
-BS      -177     -157   R03096; CHICK$GATA1_10.
-BS      -153     -137   R03097; CHICK$GATA1_11.
-BS      -121     -102   R03098; CHICK$GATA1_12.
-BS       -59      -49   R03099; CHICK$GATA1_13.
-XX
-FA  T00267; GATA-1 (isogroup).
-FA  T08293; GATA-1 (basic).
-XX
-DR  0R00A6A0TNS 0GT.:HP
-DR  735 :LG.;AG 99MEG1ABMT
-DR  40935ZE:.EGN TENER6
-DR  T00R .R3D52:
-XX
-RN  [1]; RE0012279.
-RX  PUBMED: 8628290.
-RA  hV..n F.bo.loennafeT.dr TnoMOC..  J ld c.ro. C,eG isna, nG.GirDror ,,enir  .GA nskieMl g e, Lm
-RT  sniieeeraagth ueAoeAAiTogrir1 rtointh cirtno-N1G  t ri--bpiefpb GtfitriAeh h an rqof eny-ciTatiaof stmDe ooornirfds ddthgr iei unvirlAlntAmtezn gcs bymw na
-RL  iB 2Co..1M26l 13-().l:.7  l6o92e842l9
-RN  [2]; RE0008084.
-RX  PUBMED: 1400499.
-RA  Ama,n  .Digih .nJlZ
-RT  dsihioeleym uo1en baece en mtnaa 1e mieigPneTensaeoos in  ex Dg nn titsec- advAelu lifagettnaAns-dPm t bhldehs
-RL  -1m1.:C .2.)7 69hJl1eo29 428B9 i2(.090
-RN  [3]; RE0002473.
-RX  PUBMED: 2014222.
-RA  . H.R sn l Tln ,uva.aoHnednGlEn efFodse,, G.
-RT  ftonc-fteiirronSreoctidAr ugoretorei rarort shocuheAeha ttp tm  p tnef naaiyrvyGdt1T  tc 
-RL   :c8 3d.AA.0occ 011l 9rPt 93.8SiS0N4)..(8aa-0 U
-XX
-//
-AC  G000174
-XX
-ID  HS$4F2H
-XX
-DT  ae06 )r..;1(.o9c4bd 91det5
-DT  ().am1au2ddtep0 .v2.01; 11
-CO  Copyright (C), Biobase GmbH.
-XX
-SD  4F2HC
-XX
-DE  yvheFahn 4 2ica
-XX
-SY  4F2; 4F2HC; 4T2HC; CD98; MDU1; MDV1; NACAE; SLC3A2; solute carrier family 3 (activators of dibasic and neutral amino acid transport), member 2.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-CH  11q13
-XX
-BC  616..
-XX
-BS         0        0   R00005; HS$4F2H_01; Binding factors: NF-4FA <T00569>.
-BS         0        0   R00006; HS$4F2H_02; Binding factors: NF-4FC <T00571>.
-BS         0        0   R00007; HS$4F2H_03; Binding factors: c-Jun <T00133>.
-BS         0        0   R00008; HS$4F2H_04; Binding factors: NF-4FB <T00570>.
-XX
-BR  FR0008041; downstream; Species: human, Homo sapiens.
-BR  FR0008042; downstream; Species: human, Homo sapiens.
-BR  FR0008043; downstream; Species: human, Homo sapiens.
-BR  FR0008047; downstream; Species: human, Homo sapiens.
-XX
-DR   m:HmLunSPH3AD 2LBnK:a; C.uaS
-DR  4P0:.1NAH0T7GR0AT S
-DR  9M4GE2 HF :1L1S8MB.;82H
-DR  10.S:0M00N0GL080BE SENE63
-DR   0F:A_2_2atYEu0_HF9osF IRMYG.F_XFTAs:4c
-DR  _HYFLI2YM33 Ta0.FXF_S2Us9_E F1_t_:RU_240APAG:
-DR  23.ATFF_ 1aGA3_ __FXUMRFt4YAs:2:EY00IH9
-DR  03_G AMs1F__34XUtAHT:F_F.FI9E Y:R22A_2Y0a
-DR  5XRME9:AY09H.FY_tG8AFF TIU F__aA:32
-DR  F AY9FE2MU_39 .X2_v5YF:AIT:H0aFRG_t8A
-DR  F:U2A7 EYG:_I3_ 3Mt_RE9H5_FYFaF.AT2Xs
-DR   _:tU33a9_AATs_R_FE7 GYXFFI3Y.H5E:FM8
-DR  8 _FI:50HMXE_tY_5.F4GAF:R9a_Ts9YF AEU
-DR  _T9XGMaYeLFn_tAeF0M AYRFFIF.4:2E :Hu1
-DR  XaP:1T0a5_5 E_0I_3t1_3F3AYFMAX. FgpFUR44Y:3_
-DR  00B B1M8;101L:E010BAA.8 0
-DR  A. 01F3F50;LE 531BMA0:055
-DR  E;552K5AKM:A 024L .88540B
-DR  995B00A8MEL8;75 .A070KK: 
-DR  2K9.4;A A 2EB60KL4600M:09
-DR  :006 010AP10 1L;P.M0BAE61
-DR  0C100B001;0LM:6.116BB E C
-DR  L 0.0BB: 0C0M0030C3BE000;
-DR  MF;27.2LAEJ :H9B 046S
-DR  2 9LGH:3B0.SF9ME; 2J4
-DR  NJ;T:A.HB A5L L396M0SE
-DR  22:E9S 9. 8MM;GH421LFBH
-DR  SMH:F3GB M2.2L0 4H910;E
-DR  M42B.F 2:19H4SHLM;E1G 0
-DR  B M F;ESGHH2024.M:129L5
-DR   H0M:H4 23G6FE12S9ML.B;
-DR   H4247BG1HE:9;.20S MFML
-DR  T0EN52:6.GREENZ E
-DR  51.8 OM:M70I0
-DR  23CSA N.:HCGL
-DR  06RSF11_62 Q001N.ME:E
-DR  R26106.:0NF E_S012MEQ
-DR  3RES1F162.00_EQ0:6 MN
-DR  F61. EEQ1:R00S2N0M_46
-DR  0MFESN51:2 01Q0.1E3R_
-DR  :0M.SF3_R4 E90QEN2
-DR  .0:ENSQ21E0_P7F061R 9
-DR  F0E06:2ERS_NP08 01.1Q
-DR  111PEEQ0006SFR8:2_ .N
-DR  0E60R08NE21:2_. QPFS1
-DR  ._PE16F0Q1S2NE 3R900:
-DR  Q:32E8P FRSE_5.N00
-DR  :.7sEG6HUNI92.0 N5E
-DR  00:0DR2RT .0
-DR   5_4SRN1:S4TPA.AO_H6R
-DR  .A__HP5RT4 NS2AORS4:6
-DR  R4HORT3.N_A4_SS6 PA5:
-XX
-RN  [1]; RE0001199.
-RX  PUBMED: 2761540.
-RA  edDAahr. Pc LKeea.ngi,Mrk.n el.sH, M. sL..arCJ GY.npi- ,io.,i     B
-RT  eenrr pefrn nnhca-n4nossm oeeul  en ii aetyaeTlnTGhnCiFtttens CieraoePNcnEloaatlBt t ht aI Htrivhidnr iirMhs   2ncsaFop TaluE
-RL  9o -e.oM12(9l.8llli759.29.8C5 :8B)  
-XX
-//
-AC  G000264
-XX
-ID  HS$GBP
-XX
-DT  et b4;10c.a(do.erd.)19596 
-DT  d9.6; 29ba5d1t.4e(0p d)uo.
-CO  Copyright (C), Biobase GmbH.
-XX
-SD  GBP
-XX
-DE  oaa-ginelentiupntybrgndi 
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-BC  165..
-XX
-BS      -216      -76   R02897; HS$GBP_01; Binding factors: IRF-1 <T00423>, ISGF-1 <T00427>.
-BS      -128      -99   R03064; HS$GBP_02; Binding factors: AAF <T00001>, GAF <T00300>.
-XX
-DR  TN.ATGS0264PR HA00:
-DR  D.00:0R 0TR2
-XX
-RN  [1]; RE0000446.
-RX  PUBMED: 1901265.
-RA  D   r,lMLr  ., eieJe.Jwre. kTahcD witoEJlk,D.cn.. 
-RT  nf A,t  ntotrboa-c apscm aNeogi tnCieaaldva mfDFalIiidFcyigrN--utAnGgamo
-RL  9-1E0M .2 .3(7:J19O991)2B 
-RN  [2]; RE0001471.
-RX  PUBMED: 1833631.
-RA  . cel,arn JrDlkeJ..w  eT E.e ,.DD L
-RT  tfcerufn c eoo op enbetgisgetywitdltugoreantrtp inropsd  eytubidotnl otat he- a ntsiaiceasdtcann cnmpnhnonh tai -panititodergriir- aitannniTsl vawaynoef
-RL  l(lo3i9el5 B91l.1)1-M .Co.. 5151741 :
-RN  [3]; RE0001567.
-RX  PUBMED: 1898761.
-RA  h...l ro IkeDl Jrc e .e. LwehTe.JnD,rSDlt,   w,Ea
-RT  it snop dpteneyoagtc nileib O iruloocm oegrgnrnaai nartieetf  ahm-mmb revenotanmt htagaiernsgipra iraplneun enpnllnteedpdtey anidi 
-RL  o-(. ..ll1C:l11 1BM 1o119. 9lie)298
-XX
-//
-AC  G020751
-XX
-ID  HS$ABCG5
-XX
-DT  ;k2c.0are..05d509t 0 )me(l
-CO  Copyright (C), Biobase GmbH.
-XX
-SD  Abcg5
-XX
-DE  imAT rmiese(r as-stbaitb,l )fiP(bcuI5dWeGs  He tnT n),l-menyEo g1
-XX
-SY  Abcg5; ATP-binding cassette, sub-family G (WHITE), member 5 (sterolin 1); sterolin 1.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-CH  2p21
-XX
-BR  FR0002267; upstream; Species: human, Homo sapiens.
-BR  FR0003356; upstream; Species: human, Homo sapiens.
-BR  FR0006822; upstream; Species: human, Homo sapiens.
-XX
-DR  :Dn;mAHu 5L aGPS.KHmunaBC: B
-DR  PA 0S:1T0G72A.TNH5R
-DR  LENS78.3:0E00MB100S 0E5NG
-DR  E_FF3R 2Y3:_F. GucFoY2_AHFs0T:8XAtaMI
-DR  8L_U1.3UP_:32: atGFAFM3RFY2T2_0E_3IFYAXSH _
-DR  F_t03T A GX3:8Y_2U1H3AYFaE.F_R2FIMA3:
-DR  Ua 8X3t1._A:FE23_AH3G_Y23:I_F FM20FTAYR
-DR  7Y3DU_ 9AtA.F 8GFTF_a9XE:F_H5Y2I:MR
-DR  41C2M1;L001:2 42BACE2. A1
-DR  :1 4M0;LCC.A E686187AB047
-DR  E M1:;2.5LFA717133 1AF52B
-DR  BE;A:M33F022 9F02.339 L2A
-DR  10M4L:A6F1BE6;F4 440A. 00
-DR  A; LMB40:007744F4.0 A1F1E
-DR  ML;7K0 9K9 AB.9A910E9719:
-DR  141511 B MLB;1BC4.51CE1:1
-DR  2E:0EZE.44ET GN6NR
-DR  .:201 0OIM5M2
-DR  O.4505I69: MM
-DR  G.H g5c:ANCb
-DR  F 2E3:6M2RSE4_N.Q0
-DR  E7E0P._S8F1N1 :R8Q
-DR  EN3G2:2.Us.IEN99 1H
-DR  _P .R11_:2RN4SSAT7H0OA
-DR  SNO.P1S70:_A2HT 4_R2RA
-DR  7HT4SN31.R0A AS:RP_2O_
-DR  74_PHSTS:4AR2N R0.1AO_
-DR  A T2_A0.R:SN4PHRS1O75_
-DR  A.SNOP0 _6:R_2SAT74H1R
-XX
-//
-AC  G036757
-XX
-ID  HS$TRPV6
-XX
-DT  a0) ect2..(d5 r0e;4e02ch.6
-DT  e0 2(0.)ah2dtde4..pu;6 50c
-CO  Copyright (C), Biobase GmbH.
-XX
-SD  TRPV6
-XX
-DE  psai etc irnba nuV,oaee ebrh af nlrtnolrtc  enmmi,tlse6cpneiayomtt
-XX
-SY  ABP/ZF; CAT1; CATL; ECAC2; HSA277909; LP6728; transient receptor potential cation channel, subfamily V, member 6; TRPV6; ZFAB.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-CH  7q33-q34
-XX
-BS     -5508    -5494   R19310; HS$TRPV6_01; Binding factors: VDR:RXR-alpha <T08716>.
-BS     -4337    -4323   R19311; HS$TRPV6_02; Binding factors: VDR:RXR-alpha <T08716>.
-BS     -4287    -4273   R19312; HS$TRPV6_03; Binding factors: VDR:RXR-alpha <T08716>.
-BS     -3519    -3505   R19313; HS$TRPV6_04; Binding factors: VDR:RXR-alpha <T08716>.
-BS     -2169    -2155   R19314; HS$TRPV6_05; Binding factors: VDR:RXR-alpha <T08716>.
-BS     -2119    -2105   R19315; HS$TRPV6_06; Binding factors: VDR:RXR-alpha <T08716>.
-BS     -1283    -1269   R19316; HS$TRPV6_07; Binding factors: VDR:RXR-alpha <T08716>.
-XX
-DR   nHBaa;muPSK :umH:nDT RPLV.6
-DR  5PTHAT:7 G.R60N3S7A
-DR  2:5 24YM46L1A;612.5AB2Y E
-DR  ES0B: 6GE00S11E05N0M.52LN
-DR  _YFFtcG7sE:X8FH.As2uF :_o6RYF_TI_M a20A
-DR  6XEPRY_1_: :8_2GAHtF3UTL aIA.Fs_M72UYS32_FF0_
-DR  RUYA72MFE AA3I1__6GtFF3F2Y8_0_aH:sT. X:
-DR  FYME 20s_FG: 6F_13Ya.28H_A:XTAI3_U7R_tAF2
-DR  _tAFIXA38:YT5 _FRA69aU6FM:F H0Y_GE.
-DR  2.Yv_3F:G9I  TAa066_ERA8:FHtXFFYU5_AM
-DR  1_eHeXFInM32A8AE LF6YuGUT.:FRF:_t FYa
-DR  A 3FM:L34 066B44E43A03;.F
-DR  9:36.A B3AE6L57;952 7F2FM
-DR  5AM9836L: A.F2F6 35;82B9E
-DR   M5034HJL. :A03024BA25ES;0
-DR   L32;M1:5AA40.BJ1325 H40ES
-DR  92.;L727J007799H S9: MEABA
-DR   484BA7M;AH6E47SJL:9846 9.
-DR  KA13:51BL5.7;0 K00 AE370M
-DR  6 E002MK0:7BL76 AA5.25K;0
-DR   0Y5ML05.0;0B30EYA0:33A 3
-DR  2A4:3;7900YY9MEL4A37.2B  
-DR  3E68U212UH  .6318B;MS:L
-DR  7:E7XALHSBMZF 88 .;B3
-DR  ET5.R5E0E GZENN:35
-DR  6O60I6:8.M 0M
-DR  :CHNT. PR6GV
-DR  N 6E4108S:R6_M.QEF
-DR  S6E1Q_.RF6 1P:E0N1
-DR  XE4121MR:65F0 Q01S_.E
-DR  05.6P F1140_2EQ1S:XER
-DR  2I007HE.NUs:4N 3EG.
-DR  66HGU:NsEIE 8.36N9.
-DR   .46U.IsE7N3N8EH:G0
-DR  O8RN_HA1TP7 A:4_.RSS1
-DR  TRA8SAONS4.:7 2R1_P_H
-DR  178P4 3NH:ATR_R_OSSA.
-DR  A4SO18RT4NSR.A_HP _7:
-DR  O7.:TAN41SSARH8R5__ P
-XX
-RN  [1]; RE0047775.
-RX  PUBMED: 16574738.
-RA  Mh. ,. JeS.emtM,vK.S,ed W yP ,i  an u.e aNKe.krkWBM. .i i 
-RT  TTSDLIN TO16 NETIYE SHANEVIL  HDIPDOTTP-EIUON LNTRIICGVMAD I2 EATSTL  IAEMAIINC NIIDT PT TXYT OLESIADI5.ITC MUDSTRHTCNAMANHBVEPEORIM BR,NAO RLRSNM3 A VEY T
-RL  nrM o00d2 .:c.l6.( noEoi)l
-XX
-//
diff --git a/t/data/transfac_pro/matrix.dat b/t/data/transfac_pro/matrix.dat
deleted file mode 100644
index e49eab7be..000000000
--- a/t/data/transfac_pro/matrix.dat
+++ /dev/null
@@ -1,243 +0,0 @@
-VV  TRANSFAC MATRIX TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH
-XX
-//
-AC  M00001
-XX
-ID  V$MYOD_01
-XX
-DT   (d9.01.w9 a29ee1;)1ciet.r
-DT  dd.p ..b2uo7ae9)2;0 9(1td1
-CO  Copyright (C), Biobase GmbH.
-XX
-NA  MyoD
-XX
-DE  ounedpt tadtiorlieboe aesynrmg ntmc
-XX
-BF  T00526; MyoD; Species: mouse, Mus musculus.
-BF  T09177; MyoD; Species: mouse, Mus musculus.
-XX
-P0      A      C      G      T
-01      5      0      0      0      A
-02      0      0      0      5      T
-03      0      0      5      0      G
-04      0      5      0      0      C
-05      5      0      0      0      A
-06      0      0      0      5      T
-07      0      0      5      0      G
-08      0      5      0      0      C
-09      5      0      0      0      A
-10      0      0      0      5      T
-11      0      0      5      0      G
-12      0      5      0      0      C
-XX
-BA  on3  nfsen  snng5eluaetemii ctle
-XX
-//
-AC  M00002
-XX
-ID  V$E47_01
-XX
-DT  w.e9921td19i1e .e ).;(0car
-DT  7.0di12 e8t0;dep).(uw00 a.
-CO  Copyright (C), Biobase GmbH.
-XX
-NA  4E7
-XX
-DE  7E4
-XX
-BF  T00207; E47; Species: human, Homo sapiens.
-XX
-P0      A      C      G      T
-01      5      0      0      0      A
-02      0      0      0      5      T
-03      0      0      5      0      G
-04      0      5      0      0      C
-05      5      0      0      0      A
-06      0      0      0      5      T
-07      0      0      5      0      G
-08      0      5      0      0      C
-09      5      0      0      0      A
-10      0      0      0      5      T
-11      0      0      5      0      G
-12      0      5      0      0      C
-XX
-BA  fs EMyeignnt1  tkid yarEtawn4oDeoEeD1o) 7fMl1d sre7n 2-(,b+c o, riseoE 1 g4ds2 
-XX
-BS  ATGCATGCATGC; R05108; 3; 15; 17; p.
-BS  ATGCATGCATGC; R05109; 1; 15;; p.
-BS  ATGCATGCATGC; R05110; 5; 15;; p.
-BS  ATGCATGCATGC; R05111; 5; 15;; p.
-BS  ATGCATGCATGC; R05112; 8; 15;; p.
-BS  ATGCATGCATGC; R05113; 9; 15;; p.
-BS  ATGCATGCATGC; R05114; 8; 15;; p.
-BS  ATGCATGCATGC; R05115; 7; 15;; p.
-BS  ATGCATGCATGC; R05116; 11; 15;; p.
-BS  ATGCATGCATGC; R05117; 5; 15;; p.
-BS  ATGCATGCATGC; R05118; 5; 15;; p.
-XX
-CC  iefn bmtetle tc r ne;nudetrntEd errEsNunrxo[(p-i ]srv2aoueri=  hi1eaager pt deiiiIic  )od;tl nt dahu1o mte2tcGisi sctl1o 5sbn e scc cor qgNldyo en
-XX
-RN  [1]; RE0000231.
-RX  PUBMED: 1846322.
-RA  Hlr..neDto. i,-Bam XS u
-RT  te ciot emfAandnenn ri 1irrdt2o oiniEm1stEt h pNAdmrmyora2neut  iisspni iosrg  fthbDvoin odotinrbnoEhr  be cie n12a
-RL  . -0)1:47l9C49(el4165 9
-XX
-//
-AC  M00972
-XX
-ID  V$IRF_Q6_01
-XX
-DT  ..ea24e0dt0;r2 d10tc( c3).
-DT  p.e2tt2c.dd 0 ;)45a6u.d(00
-CO  Copyright (C), Biobase GmbH.
-XX
-NA  RFI
-XX
-P0      A      C      G      T
-01      5      0      0      0      A
-02      0      0      0      5      T
-03      0      0      5      0      G
-04      0      5      0      0      C
-05      5      0      0      0      A
-06      0      0      0      5      T
-07      0      0      5      0      G
-08      0      5      0      0      C
-09      5      0      0      0      A
-10      0      0      0      5      T
-11      0      0      5      0      G
-12      0      5      0      0      C
-XX
-BA  oen mlpoptACNn3TNgccsCdeElidiM16  cRu  isGeIqei e lSsuesmn1O
-XX
-BS  ATGCATGCATGC; R00001; 3; 11;; p.
-XX
-RN  [1]; RE0024406.
-RA  NATSTmF.aR_AeC
-RT  RAnMATFt CnSIATvese N XreoRi Rifsoi
-RL  0RoR0eC 0NA 4)(4r2s A.p0:F0tTS3
-RN  [2]; RE0017571.
-RA  mCNF_aTA.eRSTA
-RT  NR XT nRACMsFStrew NTeIiAeA
-RL  Fops2r)R00S0(T:AC1Rt0. N 20e3 A
-XX
-//
-AC  M01123
-XX
-ID  V$NANOG_01
-XX
-DT  d..6e2cr660(a0a)v.e ;tm 00
-DT  d0aeavt7.)..( u;2 606pm00d
-CO  Copyright (C), Biobase GmbH.
-XX
-NA  aNogn
-XX
-BF  T08800; Nanog; Species: mouse, Mus musculus.
-XX
-P0      A      C      G      T
-01      5      0      0      0      A
-02      0      0      0      5      T
-03      0      0      5      0      G
-04      0      5      0      0      C
-05      5      0      0      0      A
-06      0      0      0      5      T
-07      0      0      5      0      G
-08      0      5      0      0      C
-09      5      0      0      0      A
-10      0      0      0      5      T
-11      0      0      5      0      G
-12      0      5      0      0      C
-XX
-CC   tai1effdthnPo dd(fiemrwN drMoi[ahs i lseo)eoitCEsPiana gANt aCTe ssdIto  - nh gcIiy]iihttm bonmevt 
-XX
-RN  [1]; RE0047531.
-RX  PUBMED: 16518401.
-RA   a..A,egiBm   ,L,...gVh  er Chp.nP .LaeuRWuu.nJot..e Se PDW B,.  YC  .  .K  Ynbtini,vX son vg,   L L.oq.o,ngnYC ,Cn.o,,,nZe  ..hV.sK .zh, Wug e,eah LL.e. VRW.Hc .r,,,w B ua, SioiC e.  Bng .   ,.Leh.LGQtLHH ,Z,N,W,Ju ..GW . .uL W  uioKJgo.oo .h.  oX a K 
-RT  rp4nyoiomes  mnwntsstrncermtn iach on pngrcog.staelei ikcylept uaotilOu neebrdtr sN Tea  clueo
-RL  .3n20aN08t (-.teG4640 1 3:4).e
-RN  [2]; RE0047626.
-RA  _TSNRAm.TeAFaC
-RT  atwmeris.iNtr nxe e
-RL  0oRACAe2St0s)p1RF10.rNe  0 T:(20Rl06
-XX
-//
-AC  M01124
-XX
-ID  V$OCT4_02
-XX
-DT  (6e m.atr0;. 200)6c0avd.6e
-DT  2ap0m0d 760.tdv0(;a).e6u. 
-CO  Copyright (C), Biobase GmbH.
-XX
-NA  OF5tP-4c)U(O1 
-XX
-BF  T00651; POU5F1; Species: mouse, Mus musculus.
-XX
-P0      A      C      G      T
-01      5      0      0      0      A
-02      0      0      0      5      T
-03      0      0      5      0      G
-04      0      5      0      0      C
-05      5      0      0      0      A
-06      0      0      0      5      T
-07      0      0      5      0      G
-08      0      5      0      0      C
-09      5      0      0      0      A
-10      0      0      0      5      T
-11      0      0      5      0      G
-12      0      5      0      0      C
-XX
-CC   sEsmf- edCyhnoT rsdti tcdas caitOMttfiPa won 1he)4ai-Nvtd Cgs itiIeIoAi bo i( h[nme]ttP dflheremio 
-XX
-RN  [1]; RE0047531.
-RX  PUBMED: 16518401.
-RA    C.C.., gg.K eLu.W.urhh,V,ege KC Xue. q   W  QeLsWuuvL hgi  s   P B, .H iJ.,B. ni.   o . Hn,W.  LXB.DLY.Sat e.h ie neue.,o.oe.. .L  ,.oK ,.ou,a R,piGL zG  ow.RgK .,o , NYHoL,,hC   ntn CWnA,Si nW, ..oZ go ahr.mV.g J,V,co,b.,Ln. . uvtnBhLZanJW..a ,PeY  .
-RT    an o c peetetilsois OwlNulr gerdtmitiluhptcmep cnsecunsbeaorre.kneor cntannyyiTa tsngrmo 4 o
-RL  3 0.G3082a:6t4)e40N4. n1e .(t-
-RN  [2]; RE0047626.
-RA  FeTAm_Aa.TNCRS
-RT  rweNa rtx imneis.te
-RL  p)NFtR0l02:Te11oC0 A0 rs 0.e6RAS20(R
-XX
-//
-AC  M01125
-XX
-ID  V$OCT4_01
-XX
-DT  .ce66 tea0 avm;0)r0d(.60.2
-DT  1.pko)e9; 61ad2e0d.t0. (u0
-CO  Copyright (C), Biobase GmbH.
-XX
-NA  tcU5-1O)(P O4F
-XX
-DE  dpl2iotciOreeoisi  ,n4 -mindiotfxctSojtc  
-XX
-BF  T00651; POU5F1; Species: mouse, Mus musculus.
-XX
-P0      A      C      G      T
-01      5      0      0      0      A
-02      0      0      0      5      T
-03      0      0      5      0      G
-04      0      5      0      0      C
-05      5      0      0      0      A
-06      0      0      0      5      T
-07      0      0      5      0      G
-08      0      5      0      0      C
-09      5      0      0      0      A
-10      0      0      0      5      T
-11      0      0      5      0      G
-12      0      5      0      0      C
-XX
-CC   tesinocerost cdfT  wI-)lefiirbtentPt4(heihn a]iPs -ei g h1 eoafe t dmsmadmiyiO dhrttd[ ovdaeoCtEWi s
-XX
-RN  [1]; RE0047531.
-RX  PUBMED: 16518401.
-RA  n BrnC.,  he.BY ,ViB.aKnWL     L W, hV.R Cu.Ze.LHoaD,o Cu B..J.s b  i.g ,,v... ,NnnX.geKL heH. c.ipW .PK,eH   .g, noq r Jag,oi.. Lo..aYWJo e  o,h nG ..hn ootgeXW,o Y L, u  o ,wtK  zL.e .CZ.SL, W R  n  uSV,a L,Lih.WAP,.gs.i.u,em.,e  .n.,h uGveQg,uCt . u 
-RT   tceyntmb aieoT atkeioycsmoo odisa regnupln eu ONepeinit nrgtellssrmsr.wrtr c4cla ce tuh nonnp
-RL  eaN3( 4842e16)G-..0t4tn3:0.  0
-RN  [2]; RE0047626.
-RA  _NaAeT.CFATRmS
-RT  itNeasi  x.wtrrmeen
-RL  NT20oSA(RCe6R00RA:s0p0F  0 .)1lr21et
-XX
-//
diff --git a/t/data/transfac_pro/readme.txt b/t/data/transfac_pro/readme.txt
deleted file mode 100644
index 30f7355d2..000000000
--- a/t/data/transfac_pro/readme.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-The data here is a very cut-down version of Transfac Pro, used without permission.
-However, the only thing these files represent is the file format which isn't
-beleived to be copyrightable. IDs are intact, but the actual useful data is
-randomised or artifically created. At the very least the usage is believed to
-be 'fair' according to copyright law.
diff --git a/t/data/transfac_pro/reference.dat b/t/data/transfac_pro/reference.dat
deleted file mode 100644
index 404cda184..000000000
--- a/t/data/transfac_pro/reference.dat
+++ /dev/null
@@ -1,92 +0,0 @@
-VV  TRANSFAC REFERENCE TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH
-XX
-//
-AC  RE0023998
-RX  PUBMED: 14988562.
-RA  eb .,r.ohG.,lr ka a. e.lLf.   ,lo L  k n fdlP rotRiFpmocfl e el. u oY ,I..VeGZDrS.inl.gl ,e,T RiT,,elr,DyB . or.dHM.NrddrGJir..BOD. i,a e.z.Ee oBn R .   ANsgA,JK .G.  n r e.W,u
-RT  tnissnatr pocaHlnctabp oar  gn t Naersnrdr xocvffi Fireseoynoieerpsn.elC o 
-RL  .830 3(e0 n)48c2S01c:3-137i1e
-FA  TRANSFAC: T08612; HNF-4alpha; taxonomic class Mammalia.
-FR  TRANSFAC: FR0001534; human, Homo sapiens.
-FR  TRANSFAC: FR0002267; human, Homo sapiens.
-FR  TRANSFAC: FR0003841; human, Homo sapiens.
-XX
-//
-AC  RE0047531
-RX  PUBMED: 16518401.
-RA  o on.gLg  WJ  eZeWQ,..n . aC,L Pt Yn.,o Yn   cqg iBu Wz.aK.onL..ogL,, .VSh,a  G,,JW.L , ..n.L.h pXi C.. u,huh hK eg..su..uoo giuh.H ,nh .,,t o  Y.vACaCgS.V,.LeG  R.ZiLn sWJ o.,ee B u, n a.rb HPKeVo,CWe,.o v i L..,rmB L.  Bu . . H eiNn DeR tX ,W w, Ke,  
-RT  pe.s  ra m eTsrttt4nkenceeepgnyniowldlsmle rt aoNitng  eOa cycrip nbcmt onruoislirhseuo tauonc
-RL  tn020e..t ) :G1eN04- 34.8a46(3
-FA  TRANSFAC: T08800; Nanog; mouse, Mus musculus.
-FA  TRANSFAC: T08969; Nanog; mouse, Mus musculus.
-FR  TRANSFAC: FR0012904; mouse, Mus musculus.
-FR  TRANSFAC: FR0016884; mouse, Mus musculus.
-MX  TRANSFAC: M01123; Nanog.
-MX  TRANSFAC: M01124; Oct-4 (POU5F1).
-MX  TRANSFAC: M01125; Oct-4 (POU5F1).
-XX
-//
-AC  RE0047775
-RX  PUBMED: 16574738.
-RA  ..Bet S . ,.u i rMeK ,,d. vWeWk KaS.ee.nyNk mJMMih. a, i P
-RT  INDD VDGT C1AALEBEI.EIT IYIHLA6ITTE E ANV  ITSL MTRTANYE TM NISP TNBAUTPOIORSL I- NVTOD,MHIRRLINSDX TRPY NO CAELUAOA SNMMNT CED5CTH NII TERTOI2IMTVPEH3DSAI
-RL  Mc (o0o.. 0n)lnir.do 2E:6l
-GE  TRANSFAC: G036757; TRPV6; human, Homo sapiens.
-BS  TRANSFAC: R19310;; human, Homo sapiens.
-BS  TRANSFAC: R19311;; human, Homo sapiens.
-BS  TRANSFAC: R19312;; human, Homo sapiens.
-BS  TRANSFAC: R19313;; human, Homo sapiens.
-BS  TRANSFAC: R19314;; human, Homo sapiens.
-BS  TRANSFAC: R19315;; human, Homo sapiens.
-BS  TRANSFAC: R19316;; human, Homo sapiens.
-XX
-//
-AC  RE0047626
-RA  _eCART.mTaANFS
-RT  ixNr e r.setitnmwae
-RL  ) :N0Se0R1rRs2o 0 TR010CFA6A2tp0(le.
-MX  TRANSFAC: M01105; ZBRK1.
-MX  TRANSFAC: M01107; RUSH-1alpha.
-MX  TRANSFAC: M01108; HOXA7.
-MX  TRANSFAC: M01109; SZF1-1.
-MX  TRANSFAC: M01111; RBP-Jkappa.
-MX  TRANSFAC: M01112; RBP-Jkappa.
-MX  TRANSFAC: M01113; CACD.
-MX  TRANSFAC: M01114; E2F.
-MX  TRANSFAC: M01116; CLOCK:BMAL.
-MX  TRANSFAC: M01117; OTX.
-MX  TRANSFAC: M01118; WT1.
-MX  TRANSFAC: M01119; KAISO.
-MX  TRANSFAC: M01122; ZNF219.
-MX  TRANSFAC: M01123; Nanog.
-MX  TRANSFAC: M01124; Oct-4 (POU5F1).
-MX  TRANSFAC: M01125; Oct-4 (POU5F1).
-XX
-//
-AC  RE0047634
-RX  PUBMED: 15743839.
-RA  STi,rNHHKs  aa..Sd .Tdu m,,aj Kaauroo,Ho.ia. M  ,.Kk,,H u atnua   oa t aYdba..mirT TuN.et
-RT  aoeNfnmusiriOorrner. tcec eqdio  aeissnxrxnooa csnmt fa eoesgir el eogltrdatipnanreete rla   ngpSu
-RL  0404eC2:8Ml55. l52()25.o i7ll o. 2-.B
-FA  TRANSFAC: T08800; Nanog; mouse, Mus musculus.
-GE  TRANSFAC: G031512; Nanog; mouse, Mus musculus.
-BS  TRANSFAC: R19260; Nanog (Nanog homeobox); mouse, Mus musculus.
-BS  TRANSFAC: R19510; Nanog (Nanog homeobox); mouse, Mus musculus.
-XX
-//
-AC  RE0047637
-RX  PUBMED: 12787504.
-RA  h  .KeyMarKa  mk.sYiia . ,MHMtMa.aSnzaw,a kaghTakum ,a..oIwd. MMuTaia,  ua ,i,hra.ama, t eaMu   oYS,.akKs
-RT  Smoeitnt T  els iresde  irnoeqcofEgp dm aeuriasho n  hycaasr.onoilenbilicrunnlnemt tNo epfepape uo
-RL  33 02l102 -1.l(C136):46e
-FA  TRANSFAC: T08800; Nanog; mouse, Mus musculus.
-XX
-//
-AC  RE0047645
-RX  PUBMED: 12787505.
-RA  S.eRedtbt i..,Mhb, lowmCnh .iTLro. eD ,S,AsmicC., h   ylbIseo, Naees . roJ Se
-RT  ioei.N fclFoilsngers aeu ypoern  aesinsxen nprcostmottiolc n cc p,llymt nuigau tofacnb niinagsorn
-RL  536:34.3-260051()1 el Cl
-FA  TRANSFAC: T08800; Nanog; mouse, Mus musculus.
-XX
-//
diff --git a/t/data/transfac_pro/site.dat b/t/data/transfac_pro/site.dat
deleted file mode 100644
index 6bad8267c..000000000
--- a/t/data/transfac_pro/site.dat
+++ /dev/null
@@ -1,806 +0,0 @@
-VV  TRANSFAC SITE TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH
-XX
-//
-AC  R00001
-XX
-ID  HS$IFI616_01
-XX
-DT  0r0d.c)i(e.;t1a 90ee 9w.26
-DT  0vpd12.am1)t(ua d1;.1.0 2e
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  IFI-6-16 (interferon-induced gene 6-16); Gene: G000176.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-SQ  aGAGACATAAGTgA.
-XX
-EL  SEIR
-XX
-SF  -172
-ST  -98
-XX
-BF  T00428; ISGF-3; Quality: 6; Species: human, Homo sapiens.
-XX
-MX  M00972; V$IRF_Q6_01
-XX
-SO   lNa3sl;IBro00hFtaB -p+8.i8
-SO  .0i;uDa01d6 
-SO  -p1 lah eH .;N0I1a+F0aL
-SO  .80H1 ;0FF
-SO  u .Ci L0D0M8;d+3a 
-XX
-MM  onerl aorithptre/ogopinephcpinnntf
-MM  ia fhrittecoynerneneetl
-MM  rtdinarotleae g
-MM  atittne meiernorhefcneyl
-MM  aolmtnocptheoir ettnyi
-XX
-CC  -rro  iaFiobnsirFntpFeidutkIamhtn fseh s)lfu abtwHhfaol ina( cN
-XX
-DR  (N6:E.L:HNI 9 0BI0);0242YS8 558MF
-DR  H 9P7_SNP.202IDIE:;E0 
-DR   9HS9TRA__1SN:.1OAP6R
-DR  26 _OTRRA.H:9SA1_NSP9
-XX
-RN  [1]; RE0002135.
-RX  PUBMED: 1707163.
-RA  boyuhB.,CRe  l .eL
-RT  ean Atfhttn cn otnrpinrpatteanescnmif ertlor-f ni ioteeDt:voaa  emieNoseo rrnto-orite ipAoeroHn rpDe lmosNs
-RL  R ie11.is9171Ne l:9A.c 9 )udsc5(c
-RN  [2]; RE0001119.
-RX  PUBMED: 2243138.
-RA   ,jS. R.AaS, d,nnDoo. ZtD ,nM,MBt eiarrars m. g i.  J ,epoh.Tsat eEis .oaz.wnColKne , J. Hir s,. SO aH.ee.Hhtesr
-RT  avl tpbciemennioshhsni ainecotcdao nrdcmris  onitgvlilape intasnl trc ppnodslitneerioduip aosdailiieonayelt riemro caaaotytp oeglsabisunkh rsecep dytrn nuog bhcnnfut- nes reteeme   teifp ll o nrlhe obu etlimornleua efrrsiA
-RL  9 .1...148 Jt6v -Iei6(s097C1n60l 6):n
-RN  [3]; RE0002131.
-RX  PUBMED: 2123539.
-RA  ..k lM e.I . Mi  , l I A, ...rm.c amtKTDS,GeA. Rr  kM .ararAl.,AC
-RT  a redupr ren ooicisrdalnapnem nTasnbmincyst trhcdia aotfag f
-RL  )R8.0. d1 A9s39ucec8N:50ii-(e6s6 15l7 c
-RN  [4]; RE0000307.
-RX  PUBMED: 2721502.
-RA  J.rewl ..rG,Rae.s S. .AR Rtr oMDKnt a roe..eMe,u ,e.r. M  n.LGIAkP .C i  ,, J   .iGl,lCT  ..
-RT  nstoniie  corens-evne iitstafpo DalmdvOtpoiuvtei lAsca d ariticlribnd rn dNtrpida ivgerctuntentnoiufinrsndfs nego 
-RL  91EO3)1.8  :99(3M. J-88B8
-RN  [5]; RE0002261.
-RX  PUBMED: 2919169.
-RA    .I..R .K.r,.etlCma.  rMkMaDe SI.raA, m G T,  
-RT  epeanpncotRbemraioDenoiiiepeat  pytga Afe tdar a t fr l ennnvapsyniric d tt lsbatdl etloraoon ftt.lh hiNnusci e-n
-RL  c.o2.2l8 c1038 .P:9S6. () irNd .1S a79Aa0-AU1ct
-RN  [6]; RE0000306.
-RX  PUBMED: 3359997.
-RA  h ew r C A G lY ICtro.ib . .GkaMeo .rarGj,a.STe.ie,.s,k  K,. ..r,rCR.l.tCre PDtS n  
-RT  n rmol6eenI 6tepgentf eneouftnea-e mn e hsshr1o re
-RL  -8E1)9.9B .:M7 58(JO2 8
-XX
-//
-AC  R00019
-XX
-ID  MOUSE$ACRD_02
-XX
-DT  ceeei0w 2a0()906.91r.d ;.t
-DT  1di;7. wad191)ut 9epe(.12.
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  AChR delta (acetylcholine receptor, delta-subunit); Gene: G000457.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; rodentia; myomorpha; muridae; murinae
-XX
-SQ  gccgcATGCATGCATGCtttaagc.
-XX
-EL  E1
-XX
-SF  -92
-ST  1-5
-XX
-BF  T01786; E12; Quality: 6; Species: human, Homo sapiens.
-BF  T01788; E47; Quality: 6; Species: human, Homo sapiens.
-BF  T00526; MyoD; Quality: 6; Species: human, Homo sapiens.
-BF  T00528; myogenin; Quality: 6; Species: human, Homo sapiens.
-XX
-MX  M00693; V$E12_Q6
-MX  M00804; V$E2A_Q2
-MX  M00973; V$E2A_Q6
-MX  M01034; V$EBOX_Q6_01
-MX  M00929; V$MYOD_Q6_01
-MX  M00712; V$MYOGENIN_Q6
-XX
-SO  2C4sa. s2b0mt0y; lo
-SO  mrec3- )l0oi.;Eocu(03.se
-SO  oyt 8u0se3C;b7m.2 
-XX
-MM  elih esfrgtti dc
-MM  olht rteyeicternnniefea
-MM  n cnynastlfoaausiil
-MM   teigrntraoaedl
-MM  tlhntotfnme reeciraieyen
-XX
-CC  4th]iiMrd m tna oo  gobE1a  s  hr1te[weyehiiodnsni7erd E2enDmey r
-XX
-DR  8HARE )B: 6M4:M6L6.(481A137 M33MCD;
-DR  :169 C385BRLHM71:1M3M4 3 (AX9.E;)
-DR  _3RP.S1N1U_77TA RM2:OM
-DR  _PSM.1URM_O8AN7 2TR6:
-DR  P:2TO_278RN U.MS_6ARM
-XX
-RN  [1]; RE0005844.
-RX  PUBMED: 8355673.
-RA  M neu.SSA.JnrBo.mid ,  . 
-RT  at son seiaecdv eu fticgetorsleos tnsnn e-x rcadnidehinghEtiu tAandtsoganly pee  bm lieni aryiorbpeecouetro me
-RL  31.ei13o551)91l -(3:34.loll..9 C0M B 
-RN  [2]; RE0001768.
-RX  PUBMED: 2797207.
-RA   ,d.J..wn .BSu TJieaBn lr d
-RT  -tlcepmubn ae ancigt neMkeerii nnciir gns-se f perooooelee ydcDed bss1ia ogatcsgllMrlxyti nluey
-RL  r2 t u9416e8(3)7109.1-Na:7
-XX
-//
-AC  R00850
-XX
-ID  MOUSE$IGH_16
-XX
-DT  w .69)i01e.0e(2 ar.;dte9c0
-DT  . de)et1(i60u 05p9d7.;aw9.
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  IgH (immunoglobulin heavy chain); Gene: G000537.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; rodentia; myomorpha; muridae; murinae
-XX
-SQ  ATGCATGCATGC.
-XX
-EL  n,mEurhee nca2
-XX
-SF  378
-ST  913
-XX
-BF  T00207; E47; Quality: 6; Species: human, Homo sapiens.
-BF  T09514; HTF4gamma; Quality: 2; Species: human, Homo sapiens.
-BF  T00406; INSAF; Quality: 6; Species: hamster, Cricetulus sp.
-BF  T00526; MyoD; Quality: 6; Species: human, Homo sapiens.
-BF  T08853; myogenin; Quality: 2; Species: human, Homo sapiens.
-BF  T01799; Tal-1; Quality: 6; Species: human, Homo sapiens.
-BF  T01800; Tal-1; Quality: 6; Species: chick, Gallus gallus.
-BF  T00790; Tal1-PP42; Quality: 6; Species: human, Homo sapiens.
-XX
-MX  M00698; V$HEB_Q6
-MX  M00712; V$MYOGENIN_Q6
-MX  M00993; V$TAL1_Q6
-XX
-SO   I3;N0H30T0-3.
-SO  JA630 B.;-B0
-SO  .0ae00 HL;1
-SO  ; 16S1940.8
-SO  Ucl 462;2 3.0
-SO  .; 42EJ40
-SO  2;0 523.4D
-SO  h (.r0uty3calms)n;r ee3-2
-SO  me.r6;y(ls3ec0r )o- 2tseu
-SO  ais6r);oec0a7.oe-Eu6-b.m (dllcm
-XX
-MM  iechte  sfdlrgit
-MM   ott IieaspnrofNiDgn
-MM  itoo yep a/ioincgnmoct nervtih olinmvet 
-XX
-RN  [1]; RE0003087.
-RX  PUBMED: 1312219.
-RA   ns.H. R ul.o Kt,E.,.i sSE NO-gnon .J
-RT  2 odanoendarEaibsit t h g oi aeucu-ameoF2x p e hirtHgct ci eNo yfdr,igdaoatml tt Tt relAtlEB-n rfleax-oiolyt honDhIpnyabeeiAln l
-RL  oi4(102eM139.10:.l1 ol9-l12.C lB  )2.
-RN  [2]; RE0001629.
-RX  PUBMED: 2038315.
-RA    - CeaC .Js,u nH..nQ- e,B.hh.gTLerH ,.R
-RT  -eog ni2oxe pn tlo Ecs1o/xlton nnrci--aii hhlon eraottievpwi4ey afiithnastE1 d 7e tnh isEhl-tiitbocnoihperac
-RL  92lli..1M7Bo llo34C031:1-0 () 91.3e. 
-RN  [3]; RE0001447.
-RX  PUBMED: 2111447.
-RA  .rMmBtinaD me.AnL ore, e .oadL b r.a,Tnli
-RT  no ohporanillbec circG evvdplpIEne aeheil-mm oem hnyhamoIief   s ecn naeEniuSlRs onynatoeeiet fCgLnHtnugSxedvooin fti-uEmn
-RL  0l092  -5.i6.1.1:oleB3 93Col1M)1. (5l
-RN  [4]; RE0001998.
-RX  PUBMED: 2181401.
-RA   ,C okenoW. M.AW  e lrPiha,Rr.A., D.nA  smrek.a
-RT   iootca ootfsrtbct aelmtip cieeAiico aacnNu   henmtoisua e grApaerr nn  n vgflniDnpttuaescra fnl ecde
-RL  5-1 cdcs i 1ecl(e9 A811.N.1u9:Ri60s)691
-RN  [5]; RE0002608.
-RX  PUBMED: 3917574.
-RA   gr Gw,huaS EtChoee.niplriMG . c.auhAbsW r.,.T ,s 
-RT  meuFunoepictoe iinI runo-n nnofsgmncei Iw a eCb ncoiSnVErliaflctt hre oLlaai lrhcg lvaaiBt
-RL  (n1)2Sc4 8 0e-71c932i5.:e41
-XX
-//
-AC  R02116
-XX
-ID  AAF$CONS
-XX
-DT  (1sr9 tF)de c4;1ee2.0aiaMs.r9ty.e1/
-DT  0 ddb5od 19.p.t(.1)3e6a9u;
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  consensus.
-XX
-SQ  ACCTTTATTTTTCA.
-XX
-BF  T00001; AAF; Quality: 6; Species: human, Homo sapiens.
-XX
-RN  [1]; RE0000446.
-RX  PUBMED: 1901265.
-RA  . ri Dk.Tn   l.l,.w cEer,DhoLr M.DJiJweeJc.ta  k,e
-RT  iNtm ctcFynt,o ad-r GF-Csagrcmiatfl D-moaNgAtieinaIidnua vAebalag on ofp
-RL   MO9.JB21920-:3 E1(.99 )71
-XX
-//
-AC  R02418
-XX
-ID  HS$MLC_01
-XX
-DT  9(1;0 e2ctd1.)2 1.wie.aer9
-DT  2pe.;maua .v00d.d007 6t8)(
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  MLC (myosin light chain 1/3 locus); Gene: G000347.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-SQ  ATGCATGCATGC.
-XX
-EL  hc r'aen3en
-XX
-SF  214
-ST  142
-XX
-BF  T10006; Myf-5; Quality: 2; Species: human, Homo sapiens.
-BF  T00522; Myf-6; Quality: 2; Species: human, Homo sapiens.
-BF  T00526; MyoD; Quality: 2; Species: human, Homo sapiens.
-BF  T09197; MyoD; Quality: 2; Species: human, Homo sapiens.
-BF  T00520; Myogenin; Quality: 2; Species: human, Homo sapiens.
-XX
-SO  3Ece)mrsc0. lei0(-;o.3uo
-SO  E(m36ich;unre.0co0-. a)l
-XX
-MM  lat ogrreditean
-MM   eraeettcltfioynreihmnne
-XX
-CC  4osydfnn]ogb gydfM3 k tta,i,by- Mb Muy  nnrfiy  1-n -nfMetiow6g  l,o-ti[5
-XX
-RN  [1]; RE0002118.
-RX  PUBMED: 2243772.
-RA  r,be.Bhl oueu tMe noBtBr.eH h  nrrg,Ee-w o, ol M.o.un,eidaarNWA.EgdWtn s  Bht .B R,,lHTn.nn . r.B. Do.,
-RT  fiasnndt ityuseome ceen rr eth wh1enor3aredtico rlh ln e eom/sgeolc soyr adenoiicgsC mta n gr  maovttfimatuhcLp ahMAfnul
-RL  91e1dR2cA98036 N. .ies 9uicl (c:)s
-XX
-//
-AC  R02419
-XX
-ID  HS$MLC_02
-XX
-DT   tea2.)w1ce0(.i12de91;.9r 
-DT  2) 8.uapm0d; 06(0ea7.d.0vt
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  MLC (myosin light chain 1/3 locus); Gene: G000347.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-SQ  ATGCATGCATGCATGCATGC.
-XX
-S1  within conserved region
-SF  527
-ST  030
-XX
-BF  T10006; Myf-5; Quality: 2; Species: human, Homo sapiens.
-BF  T00526; MyoD; Quality: 2; Species: human, Homo sapiens.
-BF  T09197; MyoD; Quality: 2; Species: human, Homo sapiens.
-BF  T00520; Myogenin; Quality: 2; Species: human, Homo sapiens.
-XX
-SO  .o3u)(;i0E3msole-re c.0c
-SO  E0ce03cai(hr )6;mn.l-u.o
-XX
-MM  oattgaedierrl n
-MM  hre niertiatnemfoyelntec
-XX
-CC  n) tndiknbfse[6ffn to  Marly n1,-   wa] edn y3o  rybktMg u- nnto 4bei-y oMiag(wvo5Mgd-dat fyyi
-XX
-RN  [1]; RE0002118.
-RX  PUBMED: 2243772.
-RA   ,a r ,gt .D. e,baMBHBlWhnweuR. .dN-EH rrBsn  , r.Brtot,e,eEg .ltTooihnl oe,Mn  dhAB B eorWoun.....nun 
-RT  impidcLosdn crM uan inym o3helaug eec  hAt re nmint vmieh o rineutos tawllsorcag/yare1ftdhah clse trCfanort neso mogtfee
-RL  9l 8AidNss.i(1  6c.9)Ru 9c1c2:30ee
-XX
-//
-AC  R02452
-XX
-ID  HS$AAC_20
-XX
-DT  2)911re.ede(.1.tc9iwa 2 ;3
-DT  0d.itued26)(1 ;.a003n6 dp.
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  ACTC (actin, alpha, cardiac muscle); Gene: G000193.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-RE  promoter
-XX
-SQ  ATGCATGCATGC.
-XX
-EL  xoE-b
-XX
-SF  -65
-ST  -49
-XX
-BF  T00526; MyoD; Quality: 6; Species: human, Homo sapiens.
-BF  T06361; MyoD:E12; Quality: 1; Species: human, Homo sapiens.
-BF  T00528; myogenin; Quality: 3; Species: human, Homo sapiens.
-XX
-MX  M00804; V$E2A_Q2
-MX  M00973; V$E2A_Q6
-MX  M01034; V$EBOX_Q6_01
-MX  M00929; V$MYOD_Q6_01
-XX
-SO   2;C. 2mabs0sy0tl4o
-SO  23C120.04; C
-SO  c3ml.o()ir03cEe-0;eo s.u
-SO  h ci60are.ouE-c3m(n0.)l;
-XX
-MM  asyinncalnfoaul tsi
-MM   adittloergraen
-MM  ottelhmfeigip i cnost
-MM  eiheeeon nrtcifeayttnlrm
-MM  npfiti(dgnbsiie)o tnsab uhryd
-XX
-CC   Demnsym trsaedo lysp0re  icta ap eftnct4phi-u0iet aStufyioqreuMsnsaa cl-ap  scal0nc>fielhr ss2osccfo Ritnco[0unc 1m   oono tu  ob h lie 1doedg ctelituoRcinta]h fnipaerh6nrpi- inetw,rpsa1aRmglcaits  ei<,secucfcvoiSaoF2 d7e le
-XX
-DR  ; 3HM)27.8(E4:ATC48  41BM3SA:C3L
-DR  6T3;SP:E 0P1. CHE3C_DA
-DR  1SNO8_:10STP7ARAH. R
-XX
-RN  [1]; RE0025127.
-RX  PUBMED: 10082523.
-RA  sl,iHB aea.ias arE r KmV,id.,Y. oto mSe.ilre aeLd
-RT  ltosol mesdahpro  thattmpreSlnal-syc aioeipopaagcnountni iepfsdcieni hm  nciova etx -nulmyh ica  cqicle coMpmoteix oebao .1rtnr intrtroniciaterhsnfaauxpprfil ad eaet  p-rticrr oo
-RL  lC5)o  .8. ol:.5.9l l79(49217Bi9-eM21
-RN  [2]; RE0000718.
-RX  PUBMED: 2123467.
-RA    . .,bKK.oltlteaL r,sV.SWei  sederrAe
-RT  ecMsaiie gpuGeisaci-onc csn haDqybp1adrbtc ot-ea rrMCren1onceputl- e  cidsixaeo  reg irhafn,i osfxSlfAn,p d
-RL  .:18 e9.9DG)21v 81e1ne(420s-1 
-XX
-//
-AC  R03064
-XX
-ID  HS$GBP_02
-XX
-DT  e91r).ai;9. e (d23ct10w.e5
-DT  a1 wi.1u).15htid(199d1p. e;
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  GBP (guanylate-binding protein); Gene: G000264.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-SQ  tatcttatagtCTTTTATCTCAATTaaaccc.
-XX
-EL  GAS
-XX
-SF  -821
-ST  -99
-XX
-BF  T00001; AAF; Quality: 6; Species: human, Homo sapiens.
-BF  T00300; GAF; Quality: 6; Species: human, Homo sapiens.
-XX
-SO  I;e LlhF.- N0+10p1aHaa 
-SO  .g5m;H+-  FNaaILme0a4 0
-SO  Ia6+ .4 m-a 0S;2FN0gmF
-SO  - Fs7d4 boat ;fN+ lapd0lsbaioh0iplrI.i
-XX
-MM   diutnaseeecessloxg
-XX
-RN  [1]; RE0000446.
-RX  PUBMED: 1901265.
-RA  aD.r er ETc J i.Jw.  l,eiokce.D.e.n, L lk rwhMtJD,
-RT  Aa-n,ot  FslFctgdo-mvCadioaaD mfnuoaeefGt ANagnaapi iirtymlntcg IrN b-ic
-RL  -E 917.1.MB12 (2O0J 39)99:
-RN  [2]; RE0001471.
-RX  PUBMED: 1833631.
-RA   EJeeD  wJD. L.n, ae .T rDe.k.,clrl
-RT  ttihnonbeg-ifnitoeyaa il ritngtolr sb-t slarci w foa e nasiphotahc dnva deoe enntinponpn-nsaodeietapinrndncafrpo tteusgtuaoctiwtrTti e ruyagce imn dtynt
-RL  olC541-19l.o17.5e Bl5.1:1M 9)l ( .31i
-XX
-//
-AC  R03174
-XX
-ID  AAV$P5_01
-XX
-DT  d)4art.9  3c0w9e6.;ie1.e(1
-DT    89(dpr9ard.1e8.1da.u;t2ee0)
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  P5; Gene: G000001.
-XX
-OS  AAV, adeno-associated virus
-OC  viridae; ss-DNA nonenveloped viruses; parvoviridae; dependovirus
-XX
-SQ  CAGACGCTCTGTTGTACTGTAT.
-XX
-SF  6-0
-XX
-BF  T00915; YY1; Quality: 2; Species: human, Homo sapiens.
-XX
-MX  M00793; V$YY1_Q6
-MX  M01035; V$YY1_Q6_02
-XX
-SO  .0aHe100L ;
-SO  acu);(ire.cn0-El0mh. 36o
-SO  77 P;-0Y.S82
-XX
-MM  rdhf ilgstcti ee
-MM  otNngpIiis enrt aofD
-MM  cetfi ohntlip iogemts
-MM  reymreinienn leatfteothc
-XX
-CC   ian1idi2k]s w[[ebnet  g]
-XX
-DR  19X01212. J270: ;: B3M)6(ELX
-DR   VAE1AP D:2.N7;6V2P0C_AE
-XX
-RN  [1]; RE0006609.
-RX  PUBMED: 8413258.
-RA  oSo,y.twio    My.rKHtmh.C a,cro. aK.enAkh.C   P,asLt
-RT  i1  nttiit srpneaspiw Aftinpocarspottc telrcterersfasYYani mt  cvctltpe etetpoaitmeocif s  oeninlYrfop stnteDa aticN iremrhoi aclrecYurr  rrnoteA: ledtT tne aetahcoiot-smnn1i
-RL  33) ll.l661:9le96(B6C.-. . iM1 1oo228
-RN  [2]; RE0000230.
-RX  PUBMED: 1655281.
-RA  SSnS,e.hat,ko .,L .-nhgC S  YheT. . Ei
-RT  ena fy1laYoso rltneesI-  sroo imn,anr rrp dpphnGcK e ALdrlobYriaen pfrearivpe ntl e -asnnrbi,irpieosydeuaou1 ireeutEse itTp
-RL  98e363-: 77l19(Cl. )871
-XX
-//
-AC  R19310
-XX
-ID  HS$TRPV6_01
-XX
-DT  r  ..6d2e0c;0.2h540()ateec
-DT  0d0;.(a6 upt 3)ee0c25hd..0
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  Gene: G036757.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-RE  distal promoter region
-XX
-SQ  aGTCTAGGGCATAga.
-XX
-EL  V.DER-55 
-XX
-SF  85-05
-ST  445-9
-XX
-BF  T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens.
-XX
-SO  c ;32aC.o92-0
-SO  amei-20tn(bah.r1c;u)ac 6re
-SO  0.S2010;38L 
-XX
-MM  poeiinnaioctmhm)apuoPcC mhIirrt(ni t
-MM  stgh rfeltdeiic 
-MM  syalc uiflnatoasnin
-MM  hegtipicsofimeo tltn 
-XX
-DR  6MA3422-9215).-1:A( 6:;Y 57Y45B55L912 E
-DR  AS OH:P_T7A3S4.R8_NR1
-DR  RA44ST81SOH7. P_NR:_A
-XX
-RN  [1]; RE0047775.
-RX  PUBMED: 16574738.
-RA   te.k.. .,PiuKB.a e.NM e,raS. W, eJ ,dW Mym inMKvk   eSi.h
-RT   NINCTRE2AIML  IT6CYIREHT TP ONA  TTMTIDOLIEANGVTIIMIENLPUPLH1SUN ABR SHTTEVEIXOPNMVTDORY N,ENRIM RA5S .ADI-T  AIOBAOTTETASDIIY EVSLTDET ICC NL3MS INDD A H
-RL  M.lnoE.6 :).n20(lodcor0 i 
-XX
-//
-AC  R19311
-XX
-ID  HS$TRPV6_02
-XX
-DT  c5a.0dr)e0ee2.5;(0.2 c th6
-DT  t. h;52d)06.dp00 ac.(u30ee
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  Gene: G036757.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-RE  distal promoter region
-XX
-SQ  TACCCAGAGTAGCGT.
-XX
-EL   .a43DRVE-
-XX
-SF  37-34
-ST  23-43
-XX
-BF  T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens.
-XX
-SO  C2;a90 .o-2c3
-SO  -cic )m2(0thbeeau1rr;6a.an
-SO  0S; 0L38210.
-XX
-MM  mtih(m)onntpcei irroPiIupotChcianm a
-MM  di etisfertgch l
-MM  tasfisnuanlnycolai 
-MM  itihnoe tcteiosm fglp
-XX
-DR  E5YA42067121511:1AY4:  7L262.;M ()27B
-DR  .R3A_ PR_TO718HS:A4SN
-DR  8:RR_T.SO_47NSH 1P4AA
-XX
-RN  [1]; RE0047775.
-RX  PUBMED: 16574738.
-RA   Ji,NBm y .,iSekM.h.K  , i  tM. ...kee,aue WMvWdKrSe .aP n
-RT  TIAPT.A3IIERA-YIO HEUACHLIMTES2NATMS  XR TADPTEDOL N N OETTN1HNESM IIPT MRV S6NIIAPA5NSCTLVDIS CTGRA BOTMIDE OVNYRIDBDTL  INL INMDCEA,HYLTT  IRTEENT UVI IO
-RL  )0Mro ll:.ooiE  nnc6.2(.0d
-XX
-//
-AC  R19312
-XX
-ID  HS$TRPV6_03
-XX
-DT  c5at( 6e).0crd;0. 05h2ee2.
-DT  2d0da;65 (ec0..u03e0 h)t.p
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  Gene: G036757.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-RE  distal promoter region
-XX
-SQ  gyGACTAGGGGAGTg.
-XX
-EL  R.EDb 4-3V
-XX
-SF  4-287
-ST  327-4
-XX
-BF  T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens.
-XX
-SO  .o0-C23 9ca;2
-SO  6 rmr)n(.1baahcut;ei-c02ae
-SO  810LS 032;.0
-XX
-MM  moci)mrmipoti tipPaa(tCorhhIi necunn
-MM  esli htfti grdce
-MM  cyl usaoalfniatisnn
-MM  itispftcto lei megonh
-XX
-DR  6)EY-2 2512114LAB(5.718 7265-472:MY ;A:
-DR  _8P4 .7TSORS31R:A_AHN
-DR  :H SAR74_AN.R_S81P4TO
-XX
-RN  [1]; RE0047775.
-RX  PUBMED: 16574738.
-RA    MeW.Nd., Bv ie, .MKSri.  ,mu. ,WeMJ yPeate.S  ai Khkn.k.
-RT  DGNLLXNDC HOE2TU YTO3 I5VOCORNN  IIADN EPI AEIVYES TRSTLUATSTCI HEDP ITDA VIADT MTTI,R MT-T CTTE IOHBIAINLVM INILNETMISTSPLA.E EYMRSNINA REA 1RANOTMIBPH6D 
-RL   .lno:0..0oMiE2rl  onc(d6)
-XX
-//
-AC  R19313
-XX
-ID  HS$TRPV6_04
-XX
-DT   0 5002t(ea2h5..c6cree)d;.
-DT  d.t; 0)3h.u20.ac(6ee 5dp00
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  Gene: G036757.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-RE  distal promoter region
-XX
-SQ  gagGCGAAGGGGTCA.
-XX
-EL   5.3-VERD
-XX
-SF  3591-
-ST  5530-
-XX
-BF  T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens.
-XX
-SO  90;Ca o3c.-22
-SO  uretea2r).i b(acha-601mc;n
-SO  L 38;0S1200.
-XX
-MM  riiam pt(paPCrtmhnucm)iihetncoo Inoi
-MM  scftdt rii geehl
-MM  cssiayn iaafnlunlot
-MM  ehfoemsptitcitnogli  
-XX
-DR  7;-1L .2-425 64Y:AM21E8224Y79 9)B:A65(2
-DR  APR TNO4:_H3_1SRA.S87
-DR  4R_S:ST.AA71 ON8PRH4_
-XX
-RN  [1]; RE0047775.
-RX  PUBMED: 16574738.
-RA   ,K Mer  SuyW.eedi M., m., kiP h eti ...BakK.M aeJv.,NS Wn
-RT   RAL5RT NIA1PVIMATBDSEE P-HIAR.SUTERDHIN T   IO,L IPATNNITNTCMSTNTSCRINDATDMIN6 IL  YRTATO HTID E NO UVVLD BNM  GXHOIDLEO TV TTEI32PMLEAYCIMAEASOSCEITINIYE
-RL  or.0 n.n.ol lE(0 :Mdio62)c
-XX
-//
-AC  R19314
-XX
-ID  HS$TRPV6_05
-XX
-DT  t 05)d;r.0c. h.ee5ea2c2(06
-DT  .hp ;c0e.)d0e3t00d2(5u .a6
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  Gene: G036757.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-RE  distal promoter region
-XX
-SQ  GTTTTGAGGCGACGT.
-XX
-EL  21.DER -Va
-XX
-SF  619-2
-ST  52-15
-XX
-BF  T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens.
-XX
-SO  2; c-0C.93oa2
-SO   cie;rbm1ac6n-r0ha.e2u(ta)
-SO  0L0;2.1S83 0
-XX
-MM  pcoitmtuinaho)nChreItriomnc (imiaP p
-MM  eridf ts getlihc
-MM  acsntanulyfl inisoa
-MM  iefh ntmeltioispgo tc
-XX
-DR  462Y9A7 22:M42515B).;A:12 Y762 1LE99(
-DR  S_RA:8S1_HANP73T. 4OR
-DR  47.8HST_AAR4:P_ NSO1R
-XX
-RN  [1]; RE0047775.
-RX  PUBMED: 16574738.
-RA  v  i,,MNk JmW Syan.at.,e PSe. Wh. uKKreB    dMi,e .Mei..k.
-RT  I DXNNVNLT3 DT RSOYBLRNTUANSE ITETIITM  SRMN1SNRTMAILVI PDIMIETEETLP2Y.T IS HAIRIRVCA EHCTTVEAPONN TTIOD NA 5HUHD  NCTOLE DIEAL6IMIAGTPDY AO MS ,TCBIO IAE-
-RL  :lE.c.iono 06.o0 rM) n(ld2
-XX
-//
-AC  R19315
-XX
-ID  HS$TRPV6_06
-XX
-DT  5de.(2 c )a.0eer5;c0t2h.60
-DT  te0.0ap( 2 h;05e0.3d.c)ud6
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  Gene: G036757.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-RE  distal promoter region
-XX
-SQ  CCCCATGCCCGACAA.
-XX
-EL  b-D2VR 1.E
-XX
-SF  -1912
-ST  20-15
-XX
-BF  T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens.
-XX
-SO  3cC0.ao2 92-;
-SO  6riearmah)10e(;a2ccbutn.- 
-SO  01;8L30 2.S0
-XX
-MM  (cnoCrPnmtociihm oaiah pipruIitm)net
-MM  eiscrgh l fttide
-MM  ifnsaslncunaa otyil
-MM  fititepo nhce oigltsm
-XX
-DR  346E9A21.61Y:2A5;B9 43:2 4Y5(2 M721)L
-DR  R _R.AO7N8AP:1TS3_4HS
-DR  8RS_ O1.THP:4RS4AN_A7
-XX
-RN  [1]; RE0047775.
-RX  PUBMED: 16574738.
-RA  u,nek,.mW ,e PMJedre  viKM.a. tBWh .M S N. kSi ae .y Ki,..
-RT  APITALNI  SSI I3 TOTONMA6YAUT1OT2 IAS  H AADNR IVDVTNOPNTRYILT IVIMVRDC PCLDXTAEDAIT-BR M E5CNLSCBS,INEDOTITHS TIINGNRHEHEI.PTODIY T RLIN ENEUM EATM TME EL
-RL  Er0l:o.n )i.ldM6o02con(  .
-XX
-//
-AC  R19316
-XX
-ID  HS$TRPV6_07
-XX
-DT  2)(00eh5t ;e5cr0cd.2.ae. 6
-DT  a 3)p.5 cde062d.(00.0te;hu
-CO  Copyright (C), Biobase GmbH.
-XX
-TY  DNA
-XX
-DE  Gene: G036757.
-XX
-OS  human, Homo sapiens
-OC  eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates
-XX
-RE  distal promoter region
-XX
-SQ  ttTCAGATGACTGAt.
-XX
-EL  E2-V.R 1D
-XX
-SF  -3821
-ST  12-96
-XX
-BF  T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens.
-XX
-SO  co;9-3 22aC.0
-SO  ;cmrn a-(u1.0ea6c2t)bheiar
-SO  0S02 10.;38L
-XX
-MM  (miaiorcp ImiuiitnantChohP rm)tocpne
-MM  hlger ficitsetd 
-MM  nini yasnasalocltfu
-MM  gtihe tpeftn cmisooli
-XX
-DR  :A416Y41E02B( A12Y502.2;:8 61-41 LM)715-6
-DR  _RA3A1S8 TH7_RPSN.O:4
-DR  _P4.H T8A:A1R_S4S7RNO
-XX
-RN  [1]; RE0047775.
-RX  PUBMED: 16574738.
-RA  mn.KWie WeS.  a,M Nt,a ,K.d  B.e.uP.r ,MkkiheJ . My S v.ei
-RT  IMTICNTDL PHIC LHBLIA OMT 6  EN  TAIEV NTS  E25NMT1LMISPGEVTADAIIXEAN REEAATIMNITIV P NRE ISD EO ISIO,TNORDADECO YTYONVBYT.SRRDIPURNTLLCNAMIDTAUTHH I -T3TS
-RL  E.rlond o(nilo):  M.20.c60
-XX
-//
-- 
2.11.4.GIT