From b533f0eb93adb23ed16da217f5d6a3e8093725fa Mon Sep 17 00:00:00 2001 From: =?utf8?q?Carn=C3=AB=20Draug?= Date: Thu, 13 Sep 2018 12:26:17 +0100 Subject: [PATCH] Bio::DB::TFBS namespace has been moved to its own distribution named after itself --- Bio/DB/TFBS.pm | 166 ---- Bio/DB/TFBS/transfac_pro.pm | 1970 ------------------------------------- Changes | 3 + t/LocalDB/transfac_pro.t | 262 ----- t/data/transfac_pro/factor.dat | 342 ------- t/data/transfac_pro/fragment.dat | 111 --- t/data/transfac_pro/gene.dat | 349 ------- t/data/transfac_pro/matrix.dat | 243 ----- t/data/transfac_pro/readme.txt | 5 - t/data/transfac_pro/reference.dat | 92 -- t/data/transfac_pro/site.dat | 806 --------------- 11 files changed, 3 insertions(+), 4346 deletions(-) delete mode 100644 Bio/DB/TFBS.pm delete mode 100644 Bio/DB/TFBS/transfac_pro.pm delete mode 100644 t/LocalDB/transfac_pro.t delete mode 100644 t/data/transfac_pro/factor.dat delete mode 100644 t/data/transfac_pro/fragment.dat delete mode 100644 t/data/transfac_pro/gene.dat delete mode 100644 t/data/transfac_pro/matrix.dat delete mode 100644 t/data/transfac_pro/readme.txt delete mode 100644 t/data/transfac_pro/reference.dat delete mode 100644 t/data/transfac_pro/site.dat diff --git a/Bio/DB/TFBS.pm b/Bio/DB/TFBS.pm deleted file mode 100644 index 2309c5158..000000000 --- a/Bio/DB/TFBS.pm +++ /dev/null @@ -1,166 +0,0 @@ -# $Id: TFBS.pm,v 1.11 2006/08/12 11:00:03 sendu Exp $ -# -# BioPerl module for Bio::DB::TFBS -# -# Please direct questions and support issues to -# -# Cared for by Sendu Bala -# -# Copyright Sendu Bala -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::DB::TFBS - Access to a Transcription Factor Binding Site database - -=head1 SYNOPSIS - - use Bio::DB::TFBS; - - my $db = Bio::DB::TFBS->new(-source => 'transfac'); - my ($factor_id) = $db->get_factor_ids('PPAR-gamma1'); - my ($matrix_id) = $db->get_matrix_ids('PPAR-gamma1'); - - # get a Bio::Map::TranscriptionFactor with all the positions of a given factor - my $factor = $db->get_factor(-factor_id => $factor_id); - - # get a Bio::Map::GeneMap containing all the factors that bind near a given gene - my $gene_map = $db->get_gene_map(-gene_name => 'AQP 7'); - - # get a PSM (Bio::Matrix::PSM) of a given matrix - my $psm = $db->get_matrix(-matrix_id => $matrix_id); - - # get the aligned sequences (Bio::SimpleAlign) that were used to build a given - # matrix - my $align = $db->get_alignment(-matrix_id => $matrix_id); - - # get a specific instance sequence (Bio::LocatableSeq) - my $seq = $db->get_seq($id); - -=head1 DESCRIPTION - -This is a front end module for access to a Transcription Factor Binding Site -database. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via -the web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Sendu Bala - -Email bix@sendu.me.uk - -=head1 CONTRIBUTORS - -Based on Bio::DB::Taxonomy by Jason Stajich - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::DB::TFBS; -use strict; - -use Bio::Root::Root; - -use base qw(Bio::Root::Root); - -our $DefaultSource = 'transfac'; - -=head2 new - - Title : new - Usage : my $obj = Bio::DB::TFBS->new(-source => 'transfac'); - Function: Builds a new Bio::DB::TFBS object. - Returns : an instance of Bio::DB::TFBS - Args : -source => which database source: currently only 'transfac_pro' - -=cut - -sub new { - my ($class, @args) = @_; - - if ($class =~ /Bio::DB::TFBS::(\S+)/) { - my ($self) = $class->SUPER::new(@args); - $self->_initialize(@args); - return $self; - } - else { - my %param = @args; - @param{ map { lc $_ } keys %param } = values %param; # lowercase keys - my $source = $param{'-source'} || $DefaultSource; - - $source = "\L$source"; # normalize capitalization to lower case - - # normalize capitalization - return unless( $class->_load_tax_module($source) ); - return "Bio::DB::TFBS::$source"->new(@args); - } -} - -# empty for now -sub _initialize { } - -=head2 _load_tax_module - - Title : _load_tax_module - Usage : *INTERNAL Bio::DB::TFBS stuff* - Function: Loads up (like use) a module at run time on demand - -=cut - -sub _load_tax_module { - my ($self, $source) = @_; - my $module = "Bio::DB::TFBS::" . $source; - my $ok; - - eval { $ok = $self->_load_module($module) }; - if ( $@ ) { - print STDERR $@; - print STDERR < -# -# Cared for by Sendu Bala -# -# Copyright Sendu Bala -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::DB::TFBS::transfac_pro - An implementation of Bio::DB::TFBS -which uses local flat files for transfac pro - -=head1 SYNOPSIS - - use Bio::DB::Taxonomy; - - my $db = new Bio::DB::Taxonomy(-source => 'transfac_pro' - -dat_dir => $directory); - - # we're interested in the gene P5 - my ($gene_id) = $db->get_gene_ids(-name => 'P5'); # G000001 - - # we want all the transcription factors that bind to our gene - my @factor_ids = $db->get_factor_ids(-gene => $gene_id); - - # get info about those TFs - foreach my $factor_id (@factor_ids) { - my $factor = $db->get_factor($factor_id); - my $name = $factor->universal_name; - # etc. - see Bio::Map::TranscriptionFactor, eg. find out where it binds - } - - # get a matrix - my $matrix = $db->get_matrix('M00001'); - - # get a binding site sequence - my $seq = $db->get_site('R00001'); - -=head1 DESCRIPTION - -This is an implementation which uses local flat files and the DB_File -module RECNO data structures to manage a local copy of the Transfac Pro TFBS -database. - -Required database files require a license which can be obtained via -http://www.biobase-international.com/pages/index.php?id=170 - -Within the linux installation tarball you will find a cgibin tar ball, and -inside that is a data directory containing the .dat files needed by this -module. Point to that data directory with -dat_dir - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via -the web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Sendu Bala - -Email bix@sendu.me.uk - -=head1 CONTRIBUTORS - -Based on Bio::DB::Taxonomy::flatfile by Jason Stajich - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::DB::TFBS::transfac_pro; -use strict; -use Bio::Annotation::Reference; -use Bio::Annotation::SimpleValue; -use Bio::LocatableSeq; -use Bio::SimpleAlign; -use Bio::Matrix::PSM::SiteMatrix; -use Bio::AlignIO; -use Bio::Map::GeneMap; -use Bio::Map::TranscriptionFactor; -use Bio::Map::Position; -use Bio::Map::Relative; -use DB_File; - -use constant SEPARATOR => ':!:'; -use constant INTERNAL_SEPARATOR => '!:!'; - -$DB_BTREE->{'flags'} = R_DUP; # allow duplicate values in DB_File BTREEs - -use base qw(Bio::DB::TFBS); - -=head2 new - - Title : new - Usage : my $obj = new Bio::DB::TFBS::transfac_pro(); - Function: Builds a new Bio::DB::TFBS::transfac_pro object - Returns : an instance of Bio::DB::TTFBS::transfac_pro - Args : -dat_dir => name of directory where Transfac Pro .dat files - (required to initially build indexes) - -tax_db => Bio::DB::Taxonomy object, used when initially building - indexes, gives better results for species information - but not required. - -index_dir => name of directory where index files should be created - or already exist. (defaults to -dat_dir, required if - -dat_dir not supplied) - -force => 1 replace current indexes even if they exist - -=cut - -sub new { - my ($class, @args) = @_; - - my $self = $class->SUPER::new(@args); - - my ($dat_dir, $index_dir, $tax_db, $force) = $self->_rearrange([qw(DAT_DIR INDEX_DIR TAX_DB FORCE)], @args); - $self->throw("At least one of -dat_dir and -index_dir must be supplied") unless ($dat_dir || $index_dir); - - $self->index_directory($index_dir || $dat_dir); - $self->{_tax_db} = $tax_db if $tax_db; - - if ($dat_dir) { - $self->_build_index($dat_dir, $force); - } - - $self->_db_connect; - return $self; -} - -=head2 Bio::DB::TFBS Interface implementation - -=cut - -sub _get_ids { - my ($self, $dat, @args) = @_; - @args % 2 == 0 || $self->throw("Must provide key => value pairs"); - my $hash = $self->{$dat} || $self->throw("Unknown .dat type '$dat'"); - - if (@args) { - # get a subset corresponding to args - my @final; - my %args = @args; - my $multiple = 0; - while (my ($type, $value) = each %args) { - unless ($value) { - $self->warn("Arguement '$type' has no value, ignored"); - next; - } - $type =~ s/-//; - $type = lc($type); - my $converter = $hash->{$type}; - unless ($converter) { - $self->warn("Unknown search type '$type' for .dat type '$dat'"); - next; - } - - my @ids = $converter->get_dup($value); - unless (@ids) { - @ids = $converter->get_dup(lc($value)); - } - - if ($multiple) { - # we can have multiple types given at once, find the ids that - # satisfy all criteria - @final || return; - my %final = map { $_ => 1 } @final; - @final = grep { $final{$_} } @ids; - } - else { - @final = @ids; - $multiple++; - } - } - - return @final; - } - else { - # get them all - my $db_file_hash = $self->{$dat}->{id}; - - my ($key, $prev_key, $value) = ('_!_', '!_!'); - my @ids; - while (1) { - $db_file_hash->seq($key, $value, R_NEXT); - last if $prev_key eq $key; - push(@ids, $value); # confusing? when creating objects we store - # $value as accession and $key as id, but from - # this method we return $value as id given $id! - $prev_key = $key; - } - - return @ids; - } -} - -=head2 get_reference - - Title : get_reference - Usage : my $ref = $obj->get_reference($id); - Function: Get a literature reference. - Returns : Bio::Annotation::Reference - Args : string - a reference id ('RE...') - -=cut - -sub get_reference { - my ($self, $id) = @_; - $id || return; - my $data = $self->{reference}->{data}->{$id} || return; - my @data = split(SEPARATOR, $data); - - return Bio::Annotation::Reference->new(-pubmed => $data[0], - -authors => $data[1], - -title => $data[2], - -location => $data[3] ); -} - -=head2 get_genemap - - Title : get_genemap - Usage : my $map = $obj->get_genemap($id); - Function: Get a GeneMap for a gene. - Returns : Bio::Map::GeneMap - Args : string - a gene id ('G...'), and optionally int (number of bp - upstream) - -=cut - -sub get_genemap { - my ($self, $id, $upstream) = @_; - $id || return; - return $self->{got_map}->{$id} if defined $self->{got_map}->{$id}; - $upstream ||= 1000; - my $data = $self->{gene}->{data}->{$id} || return; - my @data = split(SEPARATOR, $data); - - # accession = id name description species_tax_id_or_raw_string - my $taxon = $self->{_tax_db} ? $self->{_tax_db}->get_taxon($data[3]) || $data[3] : $data[3]; - my $map = Bio::Map::GeneMap->get(-uid => $id, - -gene => $data[1], - -species => $taxon, - -description => $data[2], - -upstream => $upstream); - $self->{got_map}->{$id} = $map; # prevents infinite recurse when we call get_factor below - - # spawn all the factors that belong on this gene map - # get_factor_ids(-gene => ...) only works for genes that encode factors; - # have to go via sites - foreach my $sid ($self->get_site_ids(-gene => $id)) { - foreach my $fid ($self->get_factor_ids(-site => $sid)) { - # it is quite deliberate that we deeply recurse to arrive at the - # correct answer, which involves pulling in most of the database - no warnings "recursion"; - $self->get_factor($fid); - } - } - - return $map; -} - -=head2 get_seq - - Title : get_seq - Usage : my $seq = $obj->get_seq($id); - Function: Get the sequence of a site. The sequence will be annotated with the - the tags 'relative_start', 'relative_end', 'relative_type' and - 'relative_to'. - Returns : Bio::Seq - Args : string - a site id ('R...') - -=cut - -sub get_seq { - my ($self, $id) = @_; - $id || return; - my $data = $self->{site}->{data}->{$id} || return; - my @data = split(SEPARATOR, $data); - - my $seq = Bio::Seq->new(-seq => $data[2], - -accession_number => $id, - -description => $data[6] ? 'Genomic sequence' : 'Consensus or artificial sequence', - -id => $data[0], - -strand => 1, - -alphabet => $data[7] || 'dna', - -species => $data[6]); - - my $annot = $seq->annotation; - my $sv = Bio::Annotation::SimpleValue->new(-tagname => 'relative_start', -value => $data[4] || 1); - $annot->add_Annotation($sv); - $sv = Bio::Annotation::SimpleValue->new(-tagname => 'relative_end', -value => $data[5] || ($data[4] || 1 + length($data[2]) - 1)); - $annot->add_Annotation($sv); - $sv = Bio::Annotation::SimpleValue->new(-tagname => 'relative_type', -value => $data[3] || 'artificial'); - $annot->add_Annotation($sv); - $sv = Bio::Annotation::SimpleValue->new(-tagname => 'relative_to', -value => $data[1]); - $annot->add_Annotation($sv); - - return $seq; -} - -=head2 get_fragment - - Title : get_fragment - Usage : my $seq = $obj->get_fragment($id); - Function: Get the sequence of a fragment. - Returns : Bio::Seq - Args : string - a site id ('FR...') - -=cut - -sub get_fragment { - my ($self, $id) = @_; - $id || return; - my $data = $self->{fragment}->{data}->{$id} || return; - my @data = split(SEPARATOR, $data); - - # accession = id gene_id1 gene_id2 species_tax_id_or_raw_string sequence source - return new Bio::Seq( -seq => $data[4], - -accession_number => $id, - -description => 'Between genes '.$data[1].' and '.$data[2], - -species => $data[3], - -id => $data[0], - -alphabet => 'dna' ); -} - -=head2 get_matrix - - Title : get_matrix - Usage : my $matrix = $obj->get_matrix($id); - Function: Get a matrix that describes a binding site. - Returns : Bio::Matrix::PSM::SiteMatrix - Args : string - a matrix id ('M...'), optionally a sequence string from - which base frequencies will be calculated for the matrix model - (default 0.25 each) - -=cut - -sub get_matrix { - my ($self, $id, $seq) = @_; - $id || return; - $seq ||= 'atgc'; - $seq = lc($seq); - my $data = $self->{matrix}->{data}->{$id} || return; - my @data = split(SEPARATOR, $data); - $data[4] || $self->throw("Matrix data missing for $id"); - - my ($a, $c, $g, $t); - foreach my $position (split(INTERNAL_SEPARATOR, $data[4])) { - my ($a_count, $c_count, $g_count, $t_count) = split("\t", $position); - push(@{$a}, $a_count); - push(@{$c}, $c_count); - push(@{$g}, $g_count); - push(@{$t}, $t_count); - } - - # our psms include a simple background model so we can use - # sequence_match_weight() if desired - my $a_freq = ($seq =~ tr/a//) / length($seq); - my $c_freq = ($seq =~ tr/c//) / length($seq); - my $g_freq = ($seq =~ tr/g//) / length($seq); - my $t_freq = ($seq =~ tr/t//) / length($seq); - - my $psm = Bio::Matrix::PSM::SiteMatrix->new(-pA => $a, - -pC => $c, - -pG => $g, - -pT => $t, - -id => $data[0], - -accession_number => $id, - -sites => $data[3], - -width => scalar(@{$a}), - -correction => 1, - -model => { A => $a_freq, C => $c_freq, G => $g_freq, T => $t_freq } ); - - #*** used to make a Bio::Matrix::PSM::Psm and add references, but it - # didn't seem worth it. You can get references from the database by: - #foreach my $ref_id ($db->get_reference_ids(-matrix => $id)) { - # my $ref = $db->get_reference($ref_id); - #} - - return $psm; -} - -=head2 get_aln - - Title : get_aln - Usage : my $aln = $obj->get_aln($id); - Function: Get the alignment that was used to generate a matrix. Each sequence - in the alignment will have an accession_number corresponding to the - Transfac site id, and id() based on that but unique within the - alignment. - Returns : Bio::SimpleAlign - Args : string - a matrix id ('M...'), optionally true to, when a matrix - lists no sequences, search for sequences via the matrix's factors, - picking the sites that best match the matrix - -=cut - -my %VALID_STRAND = map {$_ => 1} qw(-1 0 1); - -sub get_aln { - my ($self, $id, $via_factors) = @_; - $id || return; - my $data = $self->{matrix}->{data}->{$id} || $self->throw("matrix '$id' had no data in DB_File"); - my @data = split(SEPARATOR, $data); - - if (! $data[5] && $via_factors) { - # This is a matrix with no site sequences given in matrix.dat. - # Find some matching site sequences via factors. - - # First, check its factors for sites - my %site_seqs; - my %factor_ids; - foreach my $factor_id ($self->get_factor_ids(-matrix => $id)) { - $factor_ids{$factor_id} = 1; - foreach my $site_id ($self->get_site_ids(-factor => $factor_id)) { - next if defined $site_seqs{$site_id}; - my $seq = $self->get_seq($site_id); - - # skip sites that have no sequence, or have IUPAC symbols in - # their sequence (most probably the 'consensus' sequence itself - # that was used to make and exactly corresponds to the matrix) - my $seq_str = $seq->seq || next; - $seq_str =~ /[MRWSYKVHDB]/ and next; - - $site_seqs{$site_id} = $seq; - } - } - my @seqs = values %site_seqs; - - if (@seqs > 1) { - # pick the sub-seqs that match to the matrix with the best scores - my $matrix = $self->get_matrix($id); - my $desired_sequences = $matrix->sites; - return if @seqs < $desired_sequences; - - my $desired_length = $matrix->width; - my %best_seqs; - foreach my $seq (@seqs) { - my $for_str = $seq->seq; - next if length($for_str) < $desired_length; - my $rev_str = $seq->revcom->seq; - - my $best_score = 0; - my $best_subseq = ''; - my $best_i = 0; - my $best_subseq_caps = 0; - my $best_revcom; - my $revcom = 0; - foreach my $seq_str ($for_str, $rev_str) { - for my $i (0..(length($seq_str) - $desired_length)) { - my $subseq = substr($seq_str, $i, $desired_length); - $subseq =~ s/[^ACGTacgt]//g; # can only score atcg - next unless length($subseq) == $desired_length; # short or 0-length seqs could get the highest scores! - my $score = $matrix->sequence_match_weight($subseq); - - # caps represent the author-chosen bit of a site - # sequence so we would prefer to choose a subseq that - # contains it - my $caps = $subseq =~ tr/ACGT//; - - #*** (don't know why numeric == fails for comparing - # scores, when the string eq works) - if ($score > $best_score || ("$score" eq "$best_score" && $caps > $best_subseq_caps)) { - $best_score = $score; - $best_subseq_caps = $caps; - $best_subseq = $subseq; - $best_i = $i; - $best_revcom = $revcom; - } - } - $revcom++; - } - - if ($best_score) { - $best_seqs{$seq->accession_number} = [$best_subseq, $seq->accession_number, ($best_i + 1), $revcom ? -1 : 1, $best_score]; - } - } - my @sorted = sort { $best_seqs{$b}->[-1] <=> $best_seqs{$a}->[-1] } keys %best_seqs; - return if @sorted < $desired_sequences; - splice(@sorted, $desired_sequences); - my %wanted = map { $_ => 1 } @sorted; - - my @site_data; - foreach my $seq (@seqs) { - next unless exists $wanted{$seq->accession_number}; - my @data = @{$best_seqs{$seq->accession_number}}; - pop(@data); - push(@site_data, join('_', @data)); - } - - $data[5] = join(INTERNAL_SEPARATOR, @site_data); - $self->{matrix}->{data}->{$id} = join(SEPARATOR, @data); - } - } - $data[5] || return; - - my @blocks = split(INTERNAL_SEPARATOR, $data[5]); - - # append gap chars to all sequences to make them the same length - # (applies to sequences found via factors, presumably, since we already - # do this for matrix alignments in transfac_pro.pm) - my $longest = 0; - foreach (@blocks) { - my ($seq) = split('_', $_); - my $length = length($seq); - if ($length > $longest) { - $longest = $length; - } - } - foreach my $i (0..$#blocks) { - my $block = $blocks[$i]; - my ($seq, $seq_id) = split('_', $block); - my $length = length($seq); - if ($length < $longest) { - my $orig_seq = $seq; - $seq .= '-'x($longest - $length); - $block =~ s/^${orig_seq}_/${seq}_/; - $blocks[$i] = $block; - } - } - - # build the alignment - my $aln = Bio::SimpleAlign->new(-source => 'transfac_pro'); - my %done_ids; - foreach (@blocks) { - my ($seq, $seq_acc, $start, $strand) = split('_', $_); - - $self->throw("Invalid strand $strand found in block $_") - unless exists $VALID_STRAND{$strand}; - # we can get back multiple different subparts of the same site (sequence), - # so $seq_acc isn't unique across this loop. Can't use it as the seq id - # of the alignment (ids must be unique in SimpleAlign), so we - # uniquify the id and store the original id as the accession_number - my $seq_id; - $done_ids{$seq_acc}++; - if ($done_ids{$seq_acc} > 1) { - $seq_id = $seq_acc.'_'.$done_ids{$seq_acc}; - } - else { - $seq_id = $seq_acc; - } - - my $gaps = $seq =~ tr/-//; - my $length = length($seq) - $gaps; - $self->throw("seq '$seq_id' for matrix '$id' had seq '$seq'") unless $length; - $aln->add_seq(Bio::LocatableSeq->new(-seq => $seq, - -id => $seq_id, - -accession_number => $seq_acc, - -start => $start, - -end => $start + $length - 1, - -strand => $strand)); - } - $aln->id($id); - # could also store score? of? - - return $aln; -} - -=head2 get_factor - - Title : get_factor - Usage : my $factor = $obj->get_factor($id); - Function: Get the details of a transcription factor. - Returns : Bio::Map::TranscriptionFactor - Args : string - a factor id ('T...') - -=cut - -sub get_factor { - my ($self, $id) = @_; - $id || return; - return $self->{got_factor}->{$id} if defined $self->{got_factor}->{$id}; - my $data = $self->{factor}->{data}->{$id} || return; - my @data = split(SEPARATOR, $data); - - # accession = id name species sequence - my $tf = Bio::Map::TranscriptionFactor->get(-id => $id, - -universal_name => $data[1]); - #*** not sure what to do with species and sequence, since we don't want to - # confuse the idea that a TF is a general thing that could bind to any - # species... then again, you might want to model species-specific variants - # of a TF with different binding abilities... - #*** idea of having inclusion and exclusion species so you can prevent/ - # ignore a tf that binds to the wrong species (a species that doesn't even - # have the tf), and associating sequence with each species/tf combo so you - # can see how diverged the tf is and make assumptions about site difference - # allowance - - # place it on all its genemaps - foreach my $sid ($self->get_site_ids(-factor => $id)) { - my $s_data = $self->{site}->{data}->{$sid} || next; - my @s_data = split(SEPARATOR, $s_data); - - # accession = id gene_id sequence relative_to first_position last_position species_tax_id_or_raw_string - $s_data[1] || next; # site isn't relative to a gene, meaningless - $s_data[4] || next; # don't know where its supposed to be, can't model it - $s_data[5] ||= $s_data[4] + ($s_data[2] ? length($s_data[2]) - 1 : 0); - - # it is quite deliberate that we deeply recurse to arrive at the - # correct answer, which involves pulling in most of the database - no warnings "recursion"; - my $gene_map = $self->get_genemap($s_data[1]) || next; - return $self->{got_factor}->{$id} if defined $self->{got_factor}->{$id}; - - #*** not always relative to gene start... - # we need Bio::Map::Gene s to have some default tss and atg positions - # that we can be relative to - my $rel = Bio::Map::Relative->new(-element => $gene_map->gene, -description => $s_data[3]); - Bio::Map::Position->new(-map => $gene_map, -element => $tf, -start => $s_data[4], -end => $s_data[5], -relative => $rel); - } - - $self->{got_factor}->{$id} = $tf; - return $tf; -} - -# since get_factor() is uncertain, just have direct access methods to factor -# information -sub get_factor_name { - my ($self, $id) = @_; - my $details = $self->_get_factor_details($id) || return; - return $details->{name}; -} -sub get_factor_species { - my ($self, $id) = @_; - my $details = $self->_get_factor_details($id) || return; - return $details->{species}; -} -sub get_factor_sequence { - my ($self, $id) = @_; - my $details = $self->_get_factor_details($id) || return; - return $details->{sequence}; -} -sub _get_factor_details { - my ($self, $id) = @_; - $id || return; - - return $self->{factor_details}->{$id} if defined $self->{factor_details}->{$id}; - - my $data = $self->{factor}->{data}->{$id} || return; - my @data = split(SEPARATOR, $data); - - # accession = id name species sequence - - my %details = (name => $data[1], species => $data[2], sequence => $data[3]); - $self->{factor_details}->{$id} = \%details; - - return \%details; -} - -=head2 get_reference_ids - - Title : get_reference_ids - Usage : my @ids = $obj->get_reference_ids(-key => $value); - Function: Get all the reference ids that are associated with the supplied - args. - Returns : list of strings (ids) - Args : -key => value, where value is a string id, and key is one of: - -pubmed -site -gene -matrix -factor - -=cut - -sub get_reference_ids { - my $self = shift; - return $self->_get_ids('reference', @_); -} - -# -id -name -species -site -factor -reference -sub get_gene_ids { - my $self = shift; - return $self->_get_ids('gene', @_); -} - -=head2 get_site_ids - - Title : get_site_ids - Usage : my @ids = $obj->get_site_ids(-key => $value); - Function: Get all the site ids that are associated with the supplied - args. - Returns : list of strings (ids) - Args : -key => value, where value is a string id, and key is one of: - -id -species -gene -matrix -factor -reference - -=cut - -sub get_site_ids { - my $self = shift; - return $self->_get_ids('site', @_); -} - -=head2 get_matrix_ids - - Title : get_matrix_ids - Usage : my @ids = $obj->get_matrix_ids(-key => $value); - Function: Get all the matrix ids that are associated with the supplied - args. - Returns : list of strings (ids) - Args : -key => value, where value is a string id, and key is one of: - -id -name -site -factor -reference - -=cut - -sub get_matrix_ids { - my $self = shift; - return $self->_get_ids('matrix', @_); -} - -=head2 get_factor_ids - - Title : get_factor_ids - Usage : my @ids = $obj->get_factor_ids(-key => $value); - Function: Get all the factor ids that are associated with the supplied - args. - Returns : list of strings (ids) - Args : -key => value, where value is a string id, and key is one of: - -id -name -species -interactors -gene -matrix -site -reference - NB: -gene only gets factor ids for genes that encode factors - -=cut - -sub get_factor_ids { - my $self = shift; - return $self->_get_ids('factor', @_); -} - -=head2 get_fragment_ids - - Title : get_fragment_ids - Usage : my @ids = $obj->get_fragment_ids(-key => $value); - Function: Get all the fragment ids that are associated with the supplied - args. - Returns : list of strings (ids) - Args : -key => value, where value is a string id, and key is one of: - -id -species -gene -factor -reference - -=cut - -sub get_fragment_ids { - my $self = shift; - return $self->_get_ids('fragment', @_); -} - -=head2 Helper methods - -=cut - -# internal method which does the indexing -sub _build_index { - my ($self, $dat_dir, $force) = @_; - - # MLDBM would give us transparent complex data structures with DB_File, - # allowing just one index file, but its yet another requirement and we - # don't strictly need it - - my $index_dir = $self->index_directory; - my $gene_index = "$index_dir/gene.dat.index"; - my $reference_index = "$index_dir/reference.dat.index"; - my $matrix_index = "$index_dir/matrix.dat.index"; - my $factor_index = "$index_dir/factor.dat.index"; - my $fragment_index = "$index_dir/fragment.dat.index"; - my $site_index = "$index_dir/site.dat.index"; - - my $reference_dat = "$dat_dir/reference.dat"; - if (! -e $reference_index || $force) { - open my $REF, '<', $reference_dat or $self->throw("Could not read reference file '$reference_dat': $!"); - - my %references; - unlink $reference_index; - my $ref = tie(%references, 'DB_File', $reference_index, O_RDWR|O_CREAT, 0644, $DB_HASH) - or $self->throw("CCould not open file '$reference_index': $!"); - - my %pubmed; - my $reference_pubmed = $reference_index.'.pubmed'; - unlink $reference_pubmed; - my $pub = tie(%pubmed, 'DB_File', $reference_pubmed, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$reference_pubmed': $!"); - - my %gene; - my $reference_gene = $gene_index.'.reference'; - unlink $reference_gene; - my $gene = tie(%gene, 'DB_File', $reference_gene, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$reference_gene': $!"); - - my %site; - my $reference_site = $site_index.'.reference'; - unlink $reference_site; - my $site = tie(%site, 'DB_File', $reference_site, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$reference_site': $!"); - - my %fragment; - my $reference_fragment = $fragment_index.'.reference'; - unlink $reference_fragment; - my $fragment = tie(%fragment, 'DB_File', $reference_fragment, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$reference_fragment': $!"); - - my %factor; - my $reference_factor = $factor_index.'.reference'; - unlink $reference_factor; - my $factor = tie(%factor, 'DB_File', $reference_factor, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$reference_factor': $!"); - - my %matrix; - my $reference_matrix = $matrix_index.'.reference'; - unlink $reference_matrix; - my $matrix = tie(%matrix, 'DB_File', $reference_matrix, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$reference_matrix': $!"); - - # skip the first three header lines - <$REF>; <$REF>; <$REF>; - - my @data; - while (<$REF>) { - if (/^AC (\S+)/) { - $data[0] = $1; - } - elsif (/^RX PUBMED: (\d+)/) { - $data[1] = $1; - $pub->put("$1", $data[0]); - } - elsif (/^RA (.+)\n$/) { - $data[2] = $1; - } - elsif (/^RT (.+?)\.?\n$/) { - $data[3] = $1; - } - elsif (/^RL (.+?)\.?\n$/) { - $data[4] = $1; - } - elsif (/^GE TRANSFAC: (\w\d+)/) { - $gene->put($data[0], "$1"); - } - elsif (/^BS TRANSFAC: (\w\d+)/) { - $site->put($data[0], "$1"); - } - elsif (/^FA TRANSFAC: (\w\d+)/) { - $factor->put($data[0], "$1"); - } - elsif (/^FR TRANSFAC: (FR\d+)/) { - $fragment->put($data[0], "$1"); - } - elsif (/^MX TRANSFAC: (\w\d+)/) { - $matrix->put($data[0], "$1"); - } - elsif (/^\/\//) { - # end of a record, store previous data and reset - - # accession = pubmed authors title location - $references{$data[0]} = join(SEPARATOR, ($data[1] || '', - $data[2] || '', - $data[3] || '', - $data[4] || '')); - - @data = (); - } - } - close $REF; - - $ref = $pub = $gene = $site = $fragment = $factor = $matrix = undef; - untie %references; - untie %pubmed; - untie %gene; - untie %site; - untie %fragment; - untie %factor; - untie %matrix; - } - - my $gene_dat = "$dat_dir/gene.dat"; - if (! -e $gene_index || $force) { - open my $GEN, '<', $gene_dat or $self->throw("Could not read gene file '$gene_dat': $!"); - - my %genes; - unlink $gene_index; - my $gene = tie(%genes, 'DB_File', $gene_index, O_RDWR|O_CREAT, 0644, $DB_HASH) - or $self->throw("Could not open file '$gene_index': $!"); - - my %id; - my $gene_id = $gene_index.'.id'; - unlink $gene_id; - my $id = tie(%id, 'DB_File', $gene_id, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$gene_id': $!"); - - my %name; - my $gene_name = $gene_index.'.name'; - unlink $gene_name; - my $name = tie(%name, 'DB_File', $gene_name, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$gene_name': $!"); - - my %species; - my $gene_species = $gene_index.'.species'; - unlink $gene_species; - my $species = tie(%species, 'DB_File', $gene_species, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$gene_species': $!"); - - my %site; - my $gene_site = $site_index.'.gene'; - unlink $gene_site; - my $site = tie(%site, 'DB_File', $gene_site, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$gene_site': $!"); - - my %factor; - my $gene_factor = $factor_index.'.gene'; - unlink $gene_factor; - my $factor = tie(%factor, 'DB_File', $gene_factor, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$gene_factor': $!"); - - my %fragment; - my $gene_fragment = $fragment_index.'.gene'; - unlink $gene_fragment; - my $fragment = tie(%fragment, 'DB_File', $gene_fragment, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$gene_fragment': $!"); - - my %reference; - my $gene_reference = $reference_index.'.gene'; - unlink $gene_reference; - my $reference = tie(%reference, 'DB_File', $gene_reference, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$gene_reference': $!"); - - # skip the first three header lines - <$GEN>; <$GEN>; <$GEN>; - - my @data; - while (<$GEN>) { - if (/^AC (\S+)/) { - $data[0] = $1; - } - elsif (/^ID (\S+)/) { - $data[1] = $1; - $id->put("$1", $data[0]); - } - elsif (/^SD (.+)$/) { - $data[2] = lc("$1"); - $name->put(lc("$1"), $data[0]); - } - elsif (/^SY (.+)\.$/) { - foreach (split('; ', lc("$1"))) { - $name->put($_, $data[0]); - } - } - elsif (/^DE (.+)$/) { - $data[3] = $1; - } - elsif (/^OS (.+)$/) { - my $raw_species = $1; - my $taxid = $self->_species_to_taxid($raw_species); - $data[4] = $taxid || $raw_species; - $species->put($data[4], $data[0]); - } - elsif (/^RN .+?(RE\d+)/) { - $reference->put($data[0], "$1"); - } - elsif (/^BS .+?(R\d+)/) { - $site->put($data[0], "$1"); - } - elsif (/^FA (T\d+)/) { - $factor->put($data[0], "$1"); - } - elsif (/^BR (FR\d+)/) { - $fragment->put($data[0], "$1"); - } - elsif (/^\/\//) { - # end of a record, store previous data and reset - - # accession = id name description species_tax_id_or_raw_string - $genes{$data[0]} = join(SEPARATOR, ($data[1] || '', - $data[2] || '', - $data[3] || '', - $data[4] || '')); - - @data = (); - } - } - close $GEN; - - $gene = $id = $name = $species = $site = $factor = $reference = undef; - untie %genes; - untie %id; - untie %name; - untie %species; - untie %site; - untie %factor; - untie %reference; - } - - my $site_dat = "$dat_dir/site.dat"; - if (! -e $site_index || $force) { - open my $SIT, '<', $site_dat or $self->throw("Could not read site file '$site_dat': $!"); - - my %sites; - unlink $site_index; - my $site = tie(%sites, 'DB_File', $site_index, O_RDWR|O_CREAT, 0644, $DB_HASH) - or $self->throw("Could not open file '$site_index': $!"); - - my %id; - my $site_id = $site_index.'.id'; - unlink $site_id; - my $id = tie(%id, 'DB_File', $site_id, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$site_id': $!"); - - my %species; - my $site_species = $site_index.'.species'; - unlink $site_species; - my $species = tie(%species, 'DB_File', $site_species, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$site_species': $!"); - - my %qualities; - my $site_qualities = $site_index.'.qual'; - unlink $site_qualities; - my $quality = tie(%qualities, 'DB_File', $site_qualities, O_RDWR|O_CREAT, 0644, $DB_HASH) - or $self->throw("Could not open file '$site_qualities': $!"); - - my %gene; - my $site_gene = $gene_index.'.site'; - unlink $site_gene; - my $gene = tie(%gene, 'DB_File', $site_gene, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$site_gene': $!"); - - my %matrix; - my $site_matrix = $matrix_index.'.site'; - unlink $site_matrix; - my $matrix = tie(%matrix, 'DB_File', $site_matrix, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$site_matrix': $!"); - - my %factor; - my $site_factor = $factor_index.'.site'; - unlink $site_factor; - my $factor = tie(%factor, 'DB_File', $site_factor, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$site_factor': $!"); - - my %reference; - my $site_reference = $reference_index.'.site'; - unlink $site_reference; - my $reference = tie(%reference, 'DB_File', $site_reference, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$site_reference': $!"); - - # skip the first three header lines - <$SIT>; <$SIT>; <$SIT>; - - my @data; - while (<$SIT>) { - if (/^AC (\S+)/) { - $data[0] = $1; - } - elsif (/^ID (\S+)/) { - $data[1] = $1; - $id->put("$1", $data[0]); - } - elsif (/^TY (.+)$/) { - $data[8] = $1; - } - elsif (/^DE .*Gene: (G\d+)/) { - $data[2] = $1; - $gene->put($data[0], "$1"); - - # if it has no gene it is an artificial sequence, unless it - # has a species (OS line), in which case it is unassigned - # genomic; either way we won't be able to make a - # Bio::Map::PositionI later on, so such sites won't be - # on any MapI. - } - elsif (/^OS (.+)$/) { - # Since not all sites in site.dat with a species have a gene, - # (small handful are unassigned 'genomic') can't delegate to - # gene.dat and must parse species here (effectively again) - my $raw_species = $1; - my $taxid = $self->_species_to_taxid($raw_species); - $data[7] = $taxid || $raw_species; - $species->put($data[7], $data[0]); - } - elsif (/^SQ (.+)\.$/) { - $data[3] = $1; - # there can actually be more than one SQ line, seemingly with - # variations of the sequence (not a long sequence split over - # two lines); not sure what to do with data; currently we end - # up storing only the last variant. - } - elsif (/^S1 (.+)$/) { - $data[4] = $1; - # if S1 not present, means transcriptional start - } - elsif (/^SF (.+)$/) { - $data[5] = $1; - } - elsif (/^ST (.+)$/) { - $data[6] = $1; - } - elsif (/^RN .+?(RE\d+)/) { - $reference->put($data[0], "$1"); - } - elsif (/^MX (M\d+)/) { - $matrix->put($data[0], "$1"); - } - elsif (/^BF (T\d+); .+?; Quality: (\d)/) { - $factor->put($data[0], "$1"); - $qualities{$data[0].SEPARATOR.$1} = $2; - } - elsif (/^\/\//) { - # end of a record, store previous data and reset - - # accession = id gene_id sequence relative_to first_position last_position species_tax_id_or_raw_string type - $sites{$data[0]} = join(SEPARATOR, ($data[1] || '', - $data[2] || '', - $data[3] || '', - $data[4] || 'TSS', - $data[5] || '', - $data[6] || '', - $data[7] || '', - $data[8] || '')); - - @data = (); - } - } - close $SIT; - - $site = $id = $species = $quality = $gene = $matrix = $factor = $reference = undef; - untie %sites; - untie %id; - untie %species; - untie %qualities; - untie %gene; - untie %matrix; - untie %factor; - untie %reference; - } - - my $matrix_dat = "$dat_dir/matrix.dat"; - if (! -e $matrix_index || $force) { - open my $MAT, '<', $matrix_dat or $self->throw("Could not read matrix file '$matrix_dat': $!"); - - my %matrices; - unlink $matrix_index; - my $matrix = tie(%matrices, 'DB_File', $matrix_index, O_RDWR|O_CREAT, 0644, $DB_HASH) - or $self->throw("Could not open file '$matrix_index': $!"); - - my %id; - my $matrix_id = $matrix_index.'.id'; - unlink $matrix_id; - my $id = tie(%id, 'DB_File', $matrix_id, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$matrix_id': $!"); - - my %name; - my $matrix_name = $matrix_index.'.name'; - unlink $matrix_name; - my $name = tie(%name, 'DB_File', $matrix_name, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$matrix_name': $!"); - - my %site; - my $matrix_site = $site_index.'.matrix'; - unlink $matrix_site; - my $site = tie(%site, 'DB_File', $matrix_site, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$matrix_site': $!"); - - my %factor; - my $matrix_factor = $factor_index.'.matrix'; - unlink $matrix_factor; - my $factor = tie(%factor, 'DB_File', $matrix_factor, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$matrix_factor': $!"); - - my %reference; - my $matrix_reference = $reference_index.'.matrix'; - unlink $matrix_reference; - my $reference = tie(%reference, 'DB_File', $matrix_reference, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$matrix_reference': $!"); - - # skip the first three header lines - <$MAT>; <$MAT>; <$MAT>; - - my @data; - my @matrix_data; - my @site_data; - while (<$MAT>) { - if (/^AC (\S+)/) { - $data[0] = $1; - } - elsif (/^ID (\S+)/) { - $data[1] = $1; - $id->put("$1", $data[0]); - } - elsif (/^NA (.+)$/) { - $data[2] = $1; - $name->put("$1", $data[0]); - } - elsif (/^DE (.+)$/) { - $data[3] = $1; - } - elsif (/^\d\d \s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/) { - # a, c, g, t counts/weights - push(@matrix_data, join("\t", ($1, $2, $3, $4))); - - # Work out the number of sites as the largest number of - # sites amongst all positions in the sequences. (The BA - # line isn't reliable for telling us the correct number of - # sites all the time) - my $num = $1 + $2 + $3 + $4; - $data[4] ||= 0; - if ($num > $data[4]) { - $data[4] = $num; - } - } - elsif (/^BS ([\sa-zA-Z]+); (.+?); (-?\d+); \d+;.*; ([np])/) { - # sequence id start strand - push(@site_data, join('_', ($1, $2, $3, $4 eq 'p' ? 1 : -1))); - $site->put($data[0], $2); - } - elsif (/^BF (T\d+)/) { - $factor->put($data[0], "$1"); - } - elsif (/^RN .+?(RE\d+)/) { - $reference->put($data[0], "$1"); - } - elsif (/^\/\//) { - # end of a record, store previous data and reset - my $matrix_data = join(INTERNAL_SEPARATOR, @matrix_data) || ''; - - # sites of a matrix are pre-aligned but padded with spaces on - # the left and no padding on the right; pad with -s both sides - my $longest_seq = 0; - - # For all the work, does anything meaningful actually get passed - # on here? Commenting out fixes the latest crashes on trunk. - # 5-10-10 cjfields - - #foreach my $site_seq (map {my ($seq) = split("_", $_ ,2); $seq;} @site_data) { - # $site_seq =~ s/ /-/g; - # my $length = length($site_seq); - # if ($length > $longest_seq) { - # $longest_seq = $length; - # } - #} - #foreach my $site (@site_data) { - # my ($site_seq) = split("_", $site ,2); - # my $length = length($site_seq); - # if ($length < $longest_seq) { - # $site_seq .= '-' x ($longest_seq - $length); - # } - #} - - my $site_data = join(INTERNAL_SEPARATOR, @site_data) || ''; - - # accession = id name description num_of_sites matrix_data site_data - $matrices{$data[0]} = join(SEPARATOR, ($data[1] || '', - $data[2] || '', - $data[3] || '', - $data[4], - $matrix_data, - $site_data)); - - @data = @matrix_data = @site_data = (); - } - } - close $MAT; - - $matrix = $id = $name = $site = $factor = $reference = undef; - untie %matrices; - untie %id; - untie %name; - untie %site; - untie %factor; - untie %reference; - } - - my $factor_dat = "$dat_dir/factor.dat"; - if (! -e $factor_index || $force) { - open my $FAC, '<', $factor_dat or $self->throw("Could not read factor file '$factor_dat': $!"); - - my %factors; - unlink $factor_index; - my $factor = tie(%factors, 'DB_File', $factor_index, O_RDWR|O_CREAT, 0644, $DB_HASH) - or $self->throw("Could not open file '$factor_index': $!"); - - my %id; - my $factor_id = $factor_index.'.id'; - unlink $factor_id; - my $id = tie(%id, 'DB_File', $factor_id, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$factor_id': $!"); - - my %name; - my $factor_name = $factor_index.'.name'; - unlink $factor_name; - my $name = tie(%name, 'DB_File', $factor_name, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$factor_name': $!"); - - my %species; - my $factor_species = $factor_index.'.species'; - unlink $factor_species; - my $species = tie(%species, 'DB_File', $factor_species, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$factor_species': $!"); - - my %interactors; - my $factor_interactors = $factor_index.'.interactors'; - unlink $factor_interactors; - my $interact = tie(%interactors, 'DB_File', $factor_interactors, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$factor_interactors': $!"); - - my %gene; - my $factor_gene = $gene_index.'.factor'; - unlink $factor_gene; - my $gene = tie(%gene, 'DB_File', $factor_gene, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$factor_gene': $!"); - - my %matrix; - my $factor_matrix = $matrix_index.'.factor'; - unlink $factor_matrix; - my $matrix = tie(%matrix, 'DB_File', $factor_matrix, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$factor_matrix': $!"); - - my %site; - my $factor_site = $site_index.'.factor'; - unlink $factor_site; - my $site = tie(%site, 'DB_File', $factor_site, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$factor_site': $!"); - - my %fragment; - my $factor_fragment = $fragment_index.'.factor'; - unlink $factor_fragment; - my $fragment = tie(%fragment, 'DB_File', $factor_fragment, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$factor_fragment': $!"); - - my %reference; - my $factor_reference = $reference_index.'.factor'; - unlink $factor_reference; - my $reference = tie(%reference, 'DB_File', $factor_reference, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$factor_reference': $!"); - - # skip the first three header lines - <$FAC>; <$FAC>; <$FAC>; - - my @data; - my $sequence = ''; - while (<$FAC>) { - if (/^AC (\S+)/) { - $data[0] = $1; - } - elsif (/^ID (\S+)/) { - # IDs are always the same as AC? Is this needed? - $data[1] = $1; - $id->put("$1", $data[0]); - } - elsif (/^FA (.+)$/) { - $data[2] = $1; - $name->put("$1", $data[0]); - } - elsif (/^OS (.+)$/) { - # This is the species the actual factor came from, which may - # differ from the species of any sequences it is described as - # binding to. Not all factors that have a species have a gene, - # so can't delegate species to a gene lookup. - my $raw_species = $1; - my $taxid = $self->_species_to_taxid($raw_species); - $data[3] = $taxid || $raw_species; - $species->put($data[3], $data[0]); - } - elsif (/^GE (G\d+)/) { - $gene->put($data[0], "$1"); - } - elsif (/^SQ (.+)$/) { - $sequence .= $1; - } - elsif (/^IN (T\d+)/) { - $interact->put($data[0], "$1"); - } - elsif (/^MX (M\d+)/) { - $matrix->put($data[0], "$1"); - } - elsif (/^BS (R\d+)/) { - $site->put($data[0], "$1"); - } - elsif (/^BR (FR\d+)/) { - $fragment->put($data[0], "$1"); - } - elsif (/^RN .+?(RE\d+)/) { - $reference->put($data[0], "$1"); - } - elsif (/^\/\//) { - # end of a record, store previous data and reset - - # accession = id name species sequence - $factors{$data[0]} = join(SEPARATOR, ($data[1] || '', - $data[2] || '', - $data[3] || '', - $sequence)); - - @data = (); - $sequence = ''; - } - } - close $FAC; - - $factor = $id = $name = $species = $interact = $gene = $matrix = $site = $fragment = $reference = undef; - untie %factors; - untie %id; - untie %name; - untie %species; - untie %interactors; - untie %gene; - untie %matrix; - untie %site; - untie %fragment; - untie %reference; - } - - my $fragment_dat = "$dat_dir/fragment.dat"; - if (! -e $fragment_index || $force) { - if (open my $FRA, '<', $fragment_dat) { - my %fragments; - unlink $fragment_index; - my $fragment = tie(%fragments, 'DB_File', $fragment_index, O_RDWR|O_CREAT, 0644, $DB_HASH) - or $self->throw("Could not open file '$fragment_index': $!"); - - my %id; - my $fragment_id = $fragment_index.'.id'; - unlink $fragment_id; - my $id = tie(%id, 'DB_File', $fragment_id, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$fragment_id': $!"); - - my %qualities; - my $fragment_qualities = $fragment_index.'.qual'; - unlink $fragment_qualities; - my $quality = tie(%qualities, 'DB_File', $fragment_qualities, O_RDWR|O_CREAT, 0644, $DB_HASH) - or $self->throw("Could not open file '$fragment_qualities': $!"); - - my %species; - my $fragment_species = $fragment_index.'.species'; - unlink $fragment_species; - my $species = tie(%species, 'DB_File', $fragment_species, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$fragment_species': $!"); - - my %gene; - my $fragment_gene = $gene_index.'.fragment'; - unlink $fragment_gene; - my $gene = tie(%gene, 'DB_File', $fragment_gene, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$fragment_gene': $!"); - - my %factor; - my $fragment_factor = $factor_index.'.fragment'; - unlink $fragment_factor; - my $factor = tie(%factor, 'DB_File', $fragment_factor, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$fragment_factor': $!"); - - my %reference; - my $fragment_reference = $reference_index.'.fragment'; - unlink $fragment_reference; - my $reference = tie(%reference, 'DB_File', $fragment_reference, O_RDWR|O_CREAT, 0644, $DB_BTREE) - or $self->throw("Could not open file '$fragment_reference': $!"); - - # skip the first three header lines - <$FRA>; <$FRA>; <$FRA>; - - my @data; - while (<$FRA>) { - if (/^AC (\S+)/) { - $data[0] = $1; - } - elsif (/^ID (\S+)/) { - # IDs are always the same as AC? Is this needed? - $data[1] = $1; - $id->put("$1", $data[0]); - } - elsif (/^DE Gene: (G\d+)(?:.+Gene: (G\d+))?/) { - my ($gene1, $gene2) = ($1, $2); - $data[2] = $gene1; - $data[3] = $gene2; # could be undef - $gene->put($data[0], $gene1); - $gene->put($data[0], $gene2) if $gene2; - } - elsif (/^OS (.+)$/) { - # As per the site.dat parsing - my $raw_species = $1; - my $taxid = $self->_species_to_taxid($raw_species); - $data[4] = $taxid || $raw_species; - $species->put($data[4], $data[0]); - } - elsif (/^SQ [atcgn]*([ATCGN]+)[atcgn]*/) { - $data[5] .= $1; - # there can be (usually are) multiple SQ lines with a single - # long seq split over them. The 'real' sequence is in caps - } - elsif (/^SC Build (\S+):$/) { - $data[6] = $1; - # maybe parse it out a little more? We have build, - # chromosomal coords and strand, eg. - # SC Build HSA_May2004: Chr.2 43976692..43978487 (FORWARD). - } - elsif (/^RN .+?(RE\d+)/) { - $reference->put($data[0], "$1"); - } - elsif (/^BF (T\d+); .+?; Quality: (\d)/) { - $factor->put($data[0], "$1"); - $qualities{$data[0].SEPARATOR.$1} = $2; - } - elsif (/^\/\//) { - # end of a record, store previous data and reset - - # accession = id gene_id1 gene_id2 species_tax_id_or_raw_string sequence source - $fragments{$data[0]} = join(SEPARATOR, ($data[1] || '', - $data[2] || '', - $data[3] || '', - $data[4] || '', - $data[5] || '', - $data[6] || '')); - - @data = (); - } - } - close $FRA; - - $fragment = $id = $species = $quality = $gene = $factor = $reference = undef; - untie %fragments; - untie %id; - untie %species; - untie %qualities; - untie %gene; - untie %factor; - untie %reference; - } - else { - $self->warn("Could not read fragment file '$fragment_dat', assuming you have an old version of Transfac Pro with no fragment.dat file"); - } - } -} - -# connect the internal db handle -sub _db_connect { - my $self = shift; - return if $self->{'_initialized'}; - - my $index_dir = $self->index_directory; - my $gene_index = "$index_dir/gene.dat.index"; - my $reference_index = "$index_dir/reference.dat.index"; - my $matrix_index = "$index_dir/matrix.dat.index"; - my $factor_index = "$index_dir/factor.dat.index"; - my $site_index = "$index_dir/site.dat.index"; - my $fragment_index = "$index_dir/fragment.dat.index"; - - foreach ($gene_index, $reference_index, $matrix_index, $factor_index, $site_index, $fragment_index) { - if (! -e $_) { - #$self->warn("Index files have not been created"); - #return 0; - } - } - - # reference - { - $self->{reference}->{data} = {}; - tie (%{$self->{reference}->{data}}, 'DB_File', $reference_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$reference_index': $!"); - - my $reference_pubmed = $reference_index.'.pubmed'; - $self->{reference}->{pubmed} = tie (%{$self->{reference}->{pubmed}}, 'DB_File', $reference_pubmed, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$reference_pubmed': $!"); - - my $reference_gene = $gene_index.'.reference'; - $self->{gene}->{reference} = tie (%{$self->{gene}->{reference}}, 'DB_File', $reference_gene, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$reference_gene': $!"); - - my $reference_site = $site_index.'.reference'; - $self->{site}->{reference} = tie (%{$self->{site}->{reference}}, 'DB_File', $reference_site, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$reference_site': $!"); - - my $reference_fragment = $fragment_index.'.reference'; - $self->{fragment}->{reference} = tie (%{$self->{fragment}->{reference}}, 'DB_File', $reference_fragment, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$reference_fragment': $!"); - - my $reference_factor = $factor_index.'.reference'; - $self->{factor}->{reference} = tie (%{$self->{factor}->{reference}}, 'DB_File', $reference_factor, undef, 0644, $DB_BTREE) || $self->throw("Cannot open file '$reference_factor': $!"); - - my $reference_matrix = $matrix_index.'.reference'; - $self->{matrix}->{reference} = tie (%{$self->{matrix}->{reference}}, 'DB_File', $reference_matrix, undef, 0644, $DB_BTREE) || $self->throw("Cannot open file '$reference_matrix': $!"); - } - - # gene - { - $self->{gene}->{data} = {}; - tie (%{$self->{gene}->{data}}, 'DB_File', $gene_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$gene_index': $!"); - - my $gene_id = $gene_index.'.id'; - $self->{gene}->{id} = tie(%{$self->{gene}->{id}}, 'DB_File', $gene_id, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_id': $!"); - - my $gene_name = $gene_index.'.name'; - $self->{gene}->{name} = tie(%{$self->{gene}->{name}}, 'DB_File', $gene_name, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_name': $!"); - - my $gene_species = $gene_index.'.species'; - $self->{gene}->{species} = tie(%{$self->{gene}->{species}}, 'DB_File', $gene_species, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_species': $!"); - - my $gene_site = $site_index.'.gene'; - $self->{site}->{gene} = tie(%{$self->{site}->{gene}}, 'DB_File', $gene_site, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_site': $!"); - - my $gene_fragment = $fragment_index.'.gene'; - $self->{fragment}->{gene} = tie(%{$self->{fragment}->{gene}}, 'DB_File', $gene_fragment, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_fragment': $!"); - - my $gene_factor = $factor_index.'.gene'; - $self->{factor}->{gene} = tie(%{$self->{factor}->{gene}}, 'DB_File', $gene_factor, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_factor': $!"); - - my $gene_reference = $reference_index.'.gene'; - $self->{reference}->{gene} = tie(%{$self->{reference}->{gene}}, 'DB_File', $gene_reference, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$gene_reference': $!"); - } - - # site - { - $self->{site}->{data} = {}; - tie (%{$self->{site}->{data}}, 'DB_File', $site_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$site_index': $!"); - - my $site_id = $site_index.'.id'; - $self->{site}->{id} = tie(%{$self->{site}->{id}}, 'DB_File', $site_id, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$site_id': $!"); - - my $site_species = $site_index.'.species'; - $self->{site}->{species} = tie(%{$self->{site}->{species}}, 'DB_File', $site_species, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file $site_species': $!"); - - #*** quality not actually used by anything (yet) - my $site_qualities = $site_index.'.qual'; - $self->{quality} = {}; - tie(%{$self->{quality}}, 'DB_File', $site_qualities, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$site_qualities': $!"); - - my $site_gene = $gene_index.'.site'; - $self->{gene}->{site} = tie(%{$self->{gene}->{site}}, 'DB_File', $site_gene, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$site_gene': $!"); - - my $site_matrix = $matrix_index.'.site'; - $self->{matrix}->{site} = tie(%{$self->{matrix}->{site}}, 'DB_File', $site_matrix, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$site_matrix': $!"); - - my $site_factor = $factor_index.'.site'; - $self->{factor}->{site} = tie(%{$self->{factor}->{site}}, 'DB_File', $site_factor, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$site_factor': $!"); - - my $site_reference = $reference_index.'.site'; - $self->{reference}->{site} = tie(%{$self->{reference}->{site}}, 'DB_File', $site_reference, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$site_reference': $!"); - } - - # fragment (may not be in older databases) - if (-e $fragment_index) { - $self->{fragment}->{data} = {}; - tie (%{$self->{fragment}->{data}}, 'DB_File', $fragment_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$fragment_index': $!"); - - my $fragment_id = $fragment_index.'.id'; - $self->{fragment}->{id} = tie(%{$self->{fragment}->{id}}, 'DB_File', $fragment_id, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$fragment_id': $!"); - - my $fragment_species = $fragment_index.'.species'; - $self->{fragment}->{species} = tie(%{$self->{fragment}->{species}}, 'DB_File', $fragment_species, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file $fragment_species': $!"); - - #*** quality not actually used by anything (yet) - my $fragment_qualities = $fragment_index.'.qual'; - $self->{fragment_quality} = {}; - tie(%{$self->{fragment_quality}}, 'DB_File', $fragment_qualities, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$fragment_qualities': $!"); - - my $fragment_gene = $gene_index.'.fragment'; - $self->{gene}->{fragment} = tie(%{$self->{gene}->{fragment}}, 'DB_File', $fragment_gene, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$fragment_gene': $!"); - - my $fragment_factor = $factor_index.'.fragment'; - $self->{factor}->{fragment} = tie(%{$self->{factor}->{fragment}}, 'DB_File', $fragment_factor, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$fragment_factor': $!"); - - my $fragment_reference = $reference_index.'.fragment'; - $self->{reference}->{fragment} = tie(%{$self->{reference}->{fragment}}, 'DB_File', $fragment_reference, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$fragment_reference': $!"); - } - else { - die "no fragment_index at '$fragment_index'\n"; - } - - # matrix - { - $self->{matrix}->{data} = {}; - tie (%{$self->{matrix}->{data}}, 'DB_File', $matrix_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$matrix_index': $!"); - - my $matrix_id = $matrix_index.'.id'; - $self->{matrix}->{id} = tie(%{$self->{matrix}->{id}}, 'DB_File', $matrix_id, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$matrix_id': $!"); - - my $matrix_name = $matrix_index.'.name'; - $self->{matrix}->{name} = tie(%{$self->{matrix}->{name}}, 'DB_File', $matrix_name, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$matrix_name': $!"); - - my $matrix_site = $site_index.'.matrix'; - $self->{site}->{matrix} = tie(%{$self->{site}->{matrix}}, 'DB_File', $matrix_site, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$matrix_site': $!"); - - my $matrix_factor = $factor_index.'.matrix'; - $self->{factor}->{matrix} = tie(%{$self->{factor}->{matrix}}, 'DB_File', $matrix_factor, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$matrix_factor': $!"); - - my $matrix_reference = $reference_index.'.matrix'; - $self->{reference}->{matrix} = tie(%{$self->{reference}->{matrix}}, 'DB_File', $matrix_reference, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$matrix_reference': $!"); - } - - # factor - { - $self->{factor}->{data} = {}; - tie (%{$self->{factor}->{data}}, 'DB_File', $factor_index, O_RDWR, undef, $DB_HASH) || $self->throw("Cannot open file '$factor_index': $!"); - - my $factor_id = $factor_index.'.id'; - $self->{factor}->{id} = tie(%{$self->{factor}->{id}}, 'DB_File', $factor_id, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file 'factor_id': $!"); - - my $factor_name = $factor_index.'.name'; - $self->{factor}->{name} = tie(%{$self->{factor}->{name}}, 'DB_File', $factor_name, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_name': $!"); - - my $factor_species = $factor_index.'.species'; - $self->{factor}->{species} = tie(%{$self->{factor}->{species}}, 'DB_File', $factor_species, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_species': $!"); - - my $factor_interactors = $factor_index.'.interactors'; - $self->{factor}->{interactors} = tie(%{$self->{factor}->{interactors}}, 'DB_File', $factor_interactors, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_interactors': $!"); - - my $factor_gene = $gene_index.'.factor'; - $self->{gene}->{factor} = tie(%{$self->{gene}->{factor}}, 'DB_File', $factor_gene, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_gene': $!"); - - my $factor_matrix = $matrix_index.'.factor'; - $self->{matrix}->{factor} = tie(%{$self->{matrix}->{factor}}, 'DB_File', $factor_matrix, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_matrix': $!"); - - my $factor_site = $site_index.'.factor'; - $self->{site}->{factor} = tie(%{$self->{site}->{factor}}, 'DB_File', $factor_site, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_site': $!"); - - my $factor_fragment = $fragment_index.'.factor'; - $self->{fragment}->{factor} = tie(%{$self->{fragment}->{factor}}, 'DB_File', $factor_fragment, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_fragment': $!"); - - my $factor_reference = $reference_index.'.factor'; - $self->{reference}->{factor} = tie(%{$self->{reference}->{factor}}, 'DB_File', $factor_reference, O_RDWR, undef, $DB_BTREE) || $self->throw("Cannot open file '$factor_reference': $!"); - } - - $self->{'_initialized'} = 1; -} - -=head2 index_directory - - Title : index_directory - Function : Get/set the location that index files are stored. (this module - will index the supplied database) - Usage : $obj->index_directory($newval) - Returns : value of index_directory (a scalar) - Args : on set, new value (a scalar or undef, optional) - -=cut - -sub index_directory { - my $self = shift; - return $self->{'index_directory'} = shift if @_; - return $self->{'index_directory'}; -} - -# resolve a transfac species string into an ncbi taxid -sub _species_to_taxid { - my ($self, $raw_species) = @_; - $raw_species or return; - - my $species_string; - my @split = split(', ', $raw_species); - (@split > 1) ? ($species_string = $split[1]) : ($species_string = $split[0]); - - my $ncbi_taxid; - if ($species_string =~ /^[A-Z]\S+ \S+$/) { - SWITCH: for ($species_string) { - # some species don't classify so custom handling - /^Darnel ryegrass/ && do { $ncbi_taxid = 34176; last; }; - /^Coix lacryma/ && do { $ncbi_taxid = 4505; last; }; - /^Rattus spec/ && do { $ncbi_taxid = 10116; last; }; - /^Mus spec/ && do { $ncbi_taxid = 10090; last; }; - /^Equus spec/ && do { $ncbi_taxid = 9796; last; }; - /^Cavia sp/ && do { $ncbi_taxid = 10141; last; }; - /^Marsh marigold/ && do { $ncbi_taxid = 3449; last; }; - /^Phalaenopsis sp/ && do { $ncbi_taxid = 36900; last; }; - /^Anthirrhinum majus/ && do { $ncbi_taxid = 4151; last; }; - /^Equus spec/ && do { $ncbi_taxid = 9796; last; }; - /^Lycopodium spec/ && do { $ncbi_taxid = 13840; last; }; - /^Autographa californica/ && do { $ncbi_taxid = 307456; last; }; - /^E26 AEV/ && do { $ncbi_taxid = 31920; last; }; - /^Pseudocentrotus miliaris/ && do { $ncbi_taxid = 7677; last; }; # the genus is 7677 but this species isn't there - /^SL3-3 (?:retro)?virus/ && do { $ncbi_taxid = 53454; last; }; # 53454 is unclassified MLV-related, SL3-3 a variant of that? - /^Petunia sp/ && do { $ncbi_taxid = 4104; last; }; - } - if (! $ncbi_taxid && defined $self->{_tax_db}) { - ($ncbi_taxid) = $self->{_tax_db}->get_taxonids($species_string); - } - } - else { - # some species lines are poorly formatted so custom handling - SWITCH: for ($raw_species) { - # for speed, go by common first letters - my $first_letter = substr($raw_species, 0, 1); - - $first_letter eq 'A' && do { - /^Adiantum raddianum/ && do { $ncbi_taxid = 32168; last; }; - /^Avian sarcoma virus \(strain 17\)/ && do { $ncbi_taxid = 11877; last; }; - /^AMV/ && do { $ncbi_taxid = 11866; last; }; - /^AEV/ && do { $ncbi_taxid = 11861; last; }; - /^AS42|^Avian musculoaponeurotic/ && do { $ncbi_taxid = 11873; last; }; - /^Avian myelocytomatosis/ && do { $ncbi_taxid = 11869; last; }; - /^ASV 31/ && do { $ncbi_taxid = 35270; last; }; - /^A-MuLV/ && do { $ncbi_taxid = 188539; last; }; - /^Asparagus officinalis/ && do { $ncbi_taxid = 4686; last; }; - /^Agrobacterium tumefaciens/ && do { $ncbi_taxid = 358; last; }; - /^ALV/ && do { $ncbi_taxid = 11864; last; }; - /^AAV/ && do { $ncbi_taxid = 272636; last; }; - /^AKV MLV/ && do { $ncbi_taxid = 11791; last; }; - last; - }; - - $first_letter eq 'B' && do { - /^BPV-1/ && do { $ncbi_taxid = 10559; last; }; - /^BKV/ && do { $ncbi_taxid = 10629; last; }; - /^Bolivian squirrel monkey/ && do { $ncbi_taxid = 39432; last; }; - last; - }; - - $first_letter eq 'C' && do { - /^Cauliflower/ && do { $ncbi_taxid = 3715; last; }; - /^Chamek/ && do { $ncbi_taxid = 118643; last; }; - /^Candida albicans/ && do { $ncbi_taxid = 5476; last; }; - /^CaMV/ && do { $ncbi_taxid = 10641; last; }; - last; - }; - - $first_letter eq 'E' && do { - /^Eucalyptus gunnii/ && do { $ncbi_taxid = 3933; last; }; - /^EBV, Epstein-Barr virus/ && do { $ncbi_taxid = 10376; last; }; - /^Eucalyptus globulus subsp. bicostata/ && do { $ncbi_taxid = 71272; last; }; - /^Eucalyptus globulus subsp. globulus/ && do { $ncbi_taxid = 71271; last; }; - last; - }; - - $first_letter eq 'F' && do { - /^FBR MuLV/ && do { $ncbi_taxid = 11806; last; }; - /^FBJ MuLV/ && do { $ncbi_taxid = 11805; last; }; - /^FeLV|Feline leukemia/ && do { $ncbi_taxid = 11923; last; }; - /^Flaveria trinervia/ && do { $ncbi_taxid = 4227; last; }; - /^FSV/ && do { $ncbi_taxid = 11885; last; }; - /^F-MuLV/ && do { $ncbi_taxid = 11795; last; }; - last; - }; - - $first_letter eq 'H' && do { - /^HSV-1/ && do { $ncbi_taxid = 10298; last; }; - /^HTLV-I/ && do { $ncbi_taxid = 11908; last; }; - /^HIV-1/ && do { $ncbi_taxid = 11676; last; }; - /^HPV-16/ && do { $ncbi_taxid = 333760; last; }; - /^HBV/ && do { $ncbi_taxid = 10407; last; }; - /^HBI/ && do { $ncbi_taxid = 11867; last; }; - /^HPV-8/ && do { $ncbi_taxid = 10579; last; }; - /^HPV-11/ && do { $ncbi_taxid = 10580; last; }; - /^HPV-18/ && do { $ncbi_taxid = 333761; last; }; - /^HCMV/ && do { $ncbi_taxid = 10359; last; }; - /^HSV/ && do { $ncbi_taxid = 126283; last; }; - /^HSV-2/ && do { $ncbi_taxid = 10310; last; }; - /^HCV/ && do { $ncbi_taxid = 11108; last; }; - /^HIV-2/ && do { $ncbi_taxid = 11709; last; }; - last; - }; - - $first_letter eq 'M' && do { - /^MMTV/ && do { $ncbi_taxid = 11757; last; }; - /^Mo-MuLV/ && do { $ncbi_taxid = 11801; last; }; - /^MuLV/ && do { $ncbi_taxid = 11786; last; }; - /^MSV/ && do { $ncbi_taxid = 11802; last; }; - /^MC29/ && do { $ncbi_taxid = 11868; last; }; - /^MVM/ && do { $ncbi_taxid = 10794; last; }; - /^MH2E21/ && do { $ncbi_taxid = 11955; last; }; # 11955 is a species, presumably MH2E21 is the strain - last; - }; - - $first_letter eq 'R' && do { - /^Raphanus sativus/ && do { $ncbi_taxid = 3726; last; }; - /^REV-T/ && do { $ncbi_taxid = 11636; last; }; - /^RAV-0/ && do { $ncbi_taxid = 11867; last; }; # should be rous-associated virus 0 variant - /^RSV/ && do { $ncbi_taxid = 11886; last; }; - /^RadLV/ && do { $ncbi_taxid = 31689; last; }; - /^RTBV/ && do { $ncbi_taxid = 10654; last; }; - last; - }; - - $first_letter eq 'S' && do { - /^SV40/ && do { $ncbi_taxid = 10633; last; }; - /^Sesbania rostrata/ && do { $ncbi_taxid = 3895; last; }; - /^SIV/ && do { $ncbi_taxid = 11723; last; }; - /^Spinacia oleracea/ && do { $ncbi_taxid = 3562; last; }; - /^SCMV/ && do { $ncbi_taxid = 10364; last; }; # supposed to be AGM isolate - last; - }; - - # and lower case - $first_letter eq 'a' && do { - /^adenovirus type 5/ && do { $ncbi_taxid = 28285; last; }; - /^adenovirus type 2/ && do { $ncbi_taxid = 10515; last; }; - /^adenovirus/ && do { $ncbi_taxid = 189831; last; }; # 189831 ('unclassified Adenoviridae') is the closest I can get, but this has no genus and is not a species - last; - }; - - $first_letter eq 'b' && do { - /^bell pepper/ && do { $ncbi_taxid = 4072; last; }; - /^baculovirus, Autographa californica/ && do { $ncbi_taxid = 46015; last; }; - /^broccoli/ && do { $ncbi_taxid = 36774; last; }; - /^barley/ && do { $ncbi_taxid = 112509; last; }; - last; - }; - - $first_letter eq 'c' && do { - /^clawed frog/ && do { $ncbi_taxid = 8355; last; }; - /^chipmunk/ && do { $ncbi_taxid = 64680; last; }; - /^common tree shrew/ && do { $ncbi_taxid = 37347; last; }; - /^cat/ && do { $ncbi_taxid = 9685; last; }; - last; - }; - - # and misc - /^NK24/ && do { $ncbi_taxid = 11955; last; }; - /^OK10/ && do { $ncbi_taxid = 11871; last; }; - /^Dendrobium grex/ && do { $ncbi_taxid = 84618; last; }; - /^KSHV/ && do { $ncbi_taxid = 37296; last; }; - /^Oncidium/ && do { $ncbi_taxid = 96474; last; }; - /^Japanese quail/ && do { $ncbi_taxid = 93934; last; }; - /^Nile tilapia/ && do { $ncbi_taxid = 8128; last; }; - /^GALV/ && do { $ncbi_taxid = 11840; last; }; - /^JCV/ && do { $ncbi_taxid = 10632; last; }; - /^LPV/ && do { $ncbi_taxid = 10574; last; }; - /^Py,/ && do { $ncbi_taxid = 36362; last; }; - /^DHBV/ && do { $ncbi_taxid = 12639; last; }; - /^VZV/ && do { $ncbi_taxid = 10335; last; }; - /^Vicia faba/ && do { $ncbi_taxid = 3906; last; }; - - /^hamster/ && do { $ncbi_taxid = 10029; last; }; - /^sea urchin/ && do { $ncbi_taxid = 7668; last; }; - /^fruit fly/ && do { $ncbi_taxid = 7227; last; }; - /^halibut/ && do { $ncbi_taxid = 8267; last; }; - /^vaccinia virus/ && do { $ncbi_taxid = 10245; last; }; - /^taxonomic class Mammalia/ && do { $ncbi_taxid = 40674; last; }; # not a species - /^taxonomic class Vertebrata/ && do { $ncbi_taxid = 7742; last; }; # not a species - /^dog/ && do { $ncbi_taxid = 9615; last; }; - /^parsley/ && do { $ncbi_taxid = 4043; last; }; - /^mouse, Mus domesticus Torino/ && do { $ncbi_taxid = 10092; last; }; # 10092 is domesticus subspecies, but not the Torino strain - /^lemur, Eulemur fulvus collaris/ && do { $ncbi_taxid = 47178; last; }; - /^red sea bream/ && do { $ncbi_taxid = 143350; last; }; - /^zebra finch/ && do { $ncbi_taxid = 59729; last; }; - /^mung bean/ && do { $ncbi_taxid = 3916; last; }; - /^soybean/ && do { $ncbi_taxid = 3847; last; }; - /^oat/ && do { $ncbi_taxid = 4498; last; }; - /^pseudorabies virus/ && do { $ncbi_taxid = 10345; last; }; - } - } - - $self->warn("Didn't know what species '$raw_species' was, unable to classify") unless $ncbi_taxid; - return $ncbi_taxid; -} - -sub DESTROY { - my $self = shift; - # Destroy tied references to close filehandles - # and allow proper temporary files deletion - undef $self->{_tax_db}->{'_nodes'}; - undef $self->{_tax_db}->{'_id2name'}; - undef $self->{_tax_db}->{'_name2id'}; - undef $self->{_tax_db}->{'_parent2children'}; - undef $self->{_tax_db}->{'_parentbtree'}; -} - -1; diff --git a/Changes b/Changes index eb913ec62..5b0995bdf 100644 --- a/Changes +++ b/Changes @@ -123,6 +123,9 @@ be removed. own distribution named Bio-DB-HIV. This also drops the bioperl dependency on XML-Simple and Term-ReadLine. + * The entire Bio::DB::TFBS namespace has been moved to its own + distribution named after itself. + 1.7.2 - "Entebbe" diff --git a/t/LocalDB/transfac_pro.t b/t/LocalDB/transfac_pro.t deleted file mode 100644 index 80e4e3566..000000000 --- a/t/LocalDB/transfac_pro.t +++ /dev/null @@ -1,262 +0,0 @@ -use strict; -use warnings; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - test_begin(-tests => 115, - -requires_module => 'DB_File'); - - use_ok('Bio::Matrix::PSM::IO'); - use_ok('Bio::DB::TFBS'); - use_ok('Bio::DB::Taxonomy'); -} - -#*** need to test getting all ids of a certain kind, like $db->get_matrix_ids(); -# but hard to do without a complete tax dump - -my $temp_dir = test_output_dir(); -my $tax_db = Bio::DB::Taxonomy->new(-source => 'flatfile', - -directory => $temp_dir, - -nodesfile => test_input_file('taxdump', 'nodes.dmp'), - -namesfile => test_input_file('taxdump', 'names.dmp')); - -# test transfac pro (local flat files) -{ - ok my $db = Bio::DB::TFBS->new(-source => 'transfac_pro', - -index_dir => $temp_dir, - -dat_dir => test_input_file('transfac_pro'), - -tax_db => $tax_db, - -force => 1); - - # reference.dat - { - ok my ($ref_id) = $db->get_reference_ids(-pubmed => 16574738); - is $ref_id, 'RE0047775'; - ok my $ref = $db->get_reference($ref_id); - isa_ok $ref, 'Bio::Annotation::Reference'; - is $ref->primary_id, 16574738; - is $ref->pubmed, $ref->primary_id; - is $ref->database, 'PUBMED'; - is $ref->authors, '..Bet S . ,.u i rMeK ,,d. vWeWk KaS.ee.nyNk mJMMih. a, i P'; - is $ref->location, 'Mc (o0o.. 0n)lnir.do 2E:6l'; - is $ref->title, 'INDD VDGT C1AALEBEI.EIT IYIHLA6ITTE E ANV ITSL MTRTANYE TM NISP TNBAUTPOIORSL I- NVTOD,MHIRRLINSDX TRPY NO CAELUAOA SNMMNT CED5CTH NII TERTOI2IMTVPEH3DSAI'; - - my @sites = $db->get_site_ids(-reference => $ref_id); - is join(' ', sort @sites), 'R19310 R19311 R19312 R19313 R19314 R19315 R19316'; - my @genes = $db->get_gene_ids(-reference => $ref_id); - is "@genes", 'G036757'; - my @ref_ids = $db->get_reference_ids(-site => 'R19310'); - is "@ref_ids", $ref_id; - @ref_ids = $db->get_reference_ids(-gene => 'G036757'); - is "@ref_ids", $ref_id; - - $ref_id = 'RE0047531'; - my @matrices = $db->get_matrix_ids(-reference => $ref_id); - is join(' ', sort @matrices), 'M01123 M01124 M01125'; - my @factors = $db->get_factor_ids(-reference => $ref_id); - like "@factors", qr/T08800/; - @ref_ids = $db->get_reference_ids(-matrix => 'M01123'); - is join(' ', sort @ref_ids), "$ref_id RE0047626"; - @ref_ids = $db->get_reference_ids(-factor => 'T08800'); - is join(' ', sort @ref_ids), "$ref_id RE0047634 RE0047637 RE0047645"; - - $ref_id = 'RE0023998'; - my %fragments = map { $_ => 1 } $db->get_fragment_ids(-reference => $ref_id); - ok $fragments{'FR0002267'}; - @ref_ids = $db->get_reference_ids(-fragment => 'FR0002267'); - is "@ref_ids", $ref_id; - } - - # gene.dat - { - ok my ($gene_id) = $db->get_gene_ids(-name => 'P5'); - is $gene_id, 'G000001'; - - #*** get_genemap with ensembl lookup being fantastically slow - #ok defined Bio::Map::Gene->set_from_db; # will try and do ensembl lookups for gene info - #ok my $gene_map = $db->get_genemap($gene_id, 1000); - #Bio::Tools::Run::Ensembl->_stats; - #ok $gene_map->isa('Bio::Map::GeneMap'); - #ok $gene_map->unique_id, 'G000001'; - #ok $gene_map->universal_name, 'P5'; - #ok $gene_map->species->scientific_name, 'Adeno-associated virus'; - #my @factors = grep { $_->isa("Bio::Map::TranscriptionFactor") } $gene_map->get_elements; - #ok @factors, 3; - - ($gene_id) = $db->get_gene_ids(-id => 'AAV$P5'); - is $gene_id, 'G000001'; - my @gene_ids = $db->get_gene_ids(-species => '9606'); - is @gene_ids, 5; - is [sort @gene_ids]->[0], 'G000060'; # in real data this would be G000174, but since our taxdump doesn't have chicken in it, G000060 was changed to human - ($gene_id) = $db->get_gene_ids(-site => 'R03174'); - is $gene_id, 'G000001'; - ($gene_id) = $db->get_gene_ids(-factor => 'T00267'); - is $gene_id, 'G000060'; - my %gene_ids = map { $_ => 1 } $db->get_gene_ids(-fragment => 'FR0002267'); - ok $gene_ids{'G020751'}; - # get_gene_ids(-reference => ...) already tested - - my @site_ids = $db->get_site_ids(-gene => 'G000001'); - is join(' ', sort @site_ids), 'R03174 R03175 R03176'; - my @factor_ids = $db->get_factor_ids(-gene => 'G000060'); - is join(' ', sort @factor_ids), 'T00267 T08293'; # only found for genes that encode factors - my %fragment_ids = map { $_ => 1 } $db->get_fragment_ids(-gene => 'G020751'); - ok $fragment_ids{'FR0002267'}; - # get_reference_ids(-gene => ...) already tested - } - - # site.dat - { - ok my ($site_id) = $db->get_site_ids(-id => 'HS$IFI616_01'); - is $site_id, 'R00001'; - ok my $seq = $db->get_seq($site_id); - isa_ok $seq, 'Bio::Seq'; - is $seq->id, 'HS$IFI616_01'; - is $seq->accession_number, 'R00001'; - is $seq->seq, 'aGAGACATAAGTgA'; - my $annot = $seq->annotation; - is [$annot->get_Annotations('relative_start')]->[0]->value, -172; - is [$annot->get_Annotations('relative_end')]->[0]->value, -98; - is [$annot->get_Annotations('relative_type')]->[0]->value, 'TSS'; - is [$annot->get_Annotations('relative_to')]->[0]->value, 'G000176'; - is $seq->species, 9606; - - my @site_ids = $db->get_site_ids(-species => '9606'); - is @site_ids, 14; - is [sort @site_ids]->[0], 'R00001'; - # get_site_ids(-gene => ...) already tested - ($site_id) = $db->get_site_ids(-matrix => 'M00972'); - is $site_id, 'R00001'; - my %site_ids = map { $_ => 1 } $db->get_site_ids(-factor => 'T00428'); - ok $site_ids{R00001}; - # get_site_ids(-reference => ...) already tested - - # get_gene_ids(-site => ...) already tested - my @matrix_ids = $db->get_matrix_ids(-site => 'R00001'); - is "@matrix_ids", 'M00972'; - my @factor_ids = $db->get_factor_ids(-site => 'R00001'); - is "@factor_ids", 'T00428'; - # get_reference_ids(-site => ...) already tested - } - - # matrix.dat - { - ok my ($matrix_id) = $db->get_matrix_ids(-id => 'V$E47_01'); - is $matrix_id, 'M00002'; - ok my $matrix = $db->get_matrix($matrix_id); - isa_ok $matrix, 'Bio::Matrix::PSM::SiteMatrix'; - - # detailed psm tests - { - # Lets try to compress and uncompress the frequencies, see if - # there is no considerable loss of data. - my $fA = $matrix->get_compressed_freq('A'); - my @check = Bio::Matrix::PSM::SiteMatrix::_uncompress_string($fA,1,1); - my @A = $matrix->get_array('A'); - my ($var, $max) = (0, 0); - for (my $i = 0; $i < @check; $i++) { - my $diff = abs(abs($check[$i]) - abs($A[$i])); - $var += $diff; - $max = $diff if ($diff > $max); - } - my $avg = $var / @check; - cmp_ok $avg, '<', 0.01; # Loss of data under 1 percent - - # SiteMatrixI methods - is $matrix->id, 'V$E47_01'; - is $matrix->accession_number, $matrix_id; - is $matrix->consensus, 'ATGCATGCATGC'; - is $matrix->IUPAC, 'NNNNNNNNNNNN'; - is $matrix->regexp, '\S\S\S\S\S\S\S\S\S\S\S\S'; - is $matrix->width, 12; - is $matrix->sites, 5; - ok ! $matrix->IC; - ok ! $matrix->e_val; - } - - ok my $aln = $db->get_aln($matrix_id); - isa_ok $aln, 'Bio::SimpleAlign'; - is $aln->length, 12; - is $aln->num_residues, 132; - ok $aln->is_flush; - is $aln->num_sequences, 11; - my @ids = qw(R05108 R05109 R05110 R05111 R05112 R05113 R05114 R05115 R05116 R05117 R05118); - foreach my $seq ($aln->each_alphabetically) { - is $seq->id, shift(@ids); - } - is @ids, 0; - ok ! $db->get_aln('M00001'); # no seqs in db - ok $aln = $db->get_aln('M00001', 1); # force to find seqs, store in db - ok $aln = $db->get_aln('M00001'); # seqs now in db - is $aln->num_sequences, 5; - - ($matrix_id) = $db->get_matrix_ids(-name => 'MyoD'); - is $matrix_id, 'M00001'; - # get_matrix_ids(-site => ...) already tested - my %matrix_ids = map { $_ => 1 } $db->get_matrix_ids(-factor => 'T00526'); - ok $matrix_ids{M00001}; - # get_matrix_ids(-reference => ...) already tested - - # get_site_ids(-matrix => ...) already tested - my @factor_ids = $db->get_factor_ids(-matrix => 'M00001'); - is join(' ', sort @factor_ids), 'T00526 T09177'; - # get_reference_ids(-matrix => ...) already tested - } - - # fragment.dat - { - ok my ($fragment_id) = $db->get_fragment_ids(-id => 'FR0002267'); - is $fragment_id, 'FR0002267'; # id and accession are the same for fragments - ok my $seq = $db->get_fragment($fragment_id); - isa_ok $seq, 'Bio::SeqI'; - is $seq->id, 'FR0002267'; - is $seq->seq, 'GTCTACAACACTCTTGCGGACGGAGAGCCGAAGAGCAAAGCGTCGCCGGGTAAGACGAACGCTCAAGGGGGTACGAGCAGCGTAACGACGGAAACGGTGACGCCCCGGGATTTGGGGCTCAGCTAGGGTCGCCGAGTAGGGGGCCGCGGGGACAACGGGGGCGACACGCCGCTTTCCCTGCGTCTGTGGAGCCTATGGTACGGCGTAACCGGTTGTGTGATGAACTGTCCAGACCGCACGTAGTCCCAGCGCAAGGTCTATGCCGCCTAGAGGCAAGACGGGCCGTCTCCTACTTAGTAGCCAGCTACGGGGCGTTGGTCCCCTCGGTAGTGCAACTATCCAGCCACGGCGTCCGCCGGGCTGAGCCTCAGCAGAGCTGGGGGGGTATCATTCCGACGCTGTTTAATTCGTCAGCAGGACCCACTACACGCTCTGTCATTCGCCTGAGCAGTTGTAAATTAGCGCGGCGATCTTGCAAGAGACAAGGAGGCGAACCTGGGGTCGGGACGTAAGGACGAACGGCAGTACAGACGCTGGGGGACGCCACGTGCCAGAACCTCTCACGACCGGAGGTTCAACGCTGATTGGGGCGCAACAGAGGGCGGAGCAGCGAGGTGGCGCTGGTGGGATGGGGCGAGACAAACCCAAGCTGACGCCGAAGGGCCCGCGTGGCCGGGCTGGGGCCCGTAGAACGAGGGAATTGTATGCGGCGCCTGAATGGGCGCACCACA'; - is $seq->species, 9606; - - # -id -species -gene -factor -reference - my @fragment_ids = $db->get_fragment_ids(-species => '9606'); - is @fragment_ids, 2; - is [sort @fragment_ids]->[0], 'FR0000001'; - my %fragment_ids = map { $_ => 1 } $db->get_fragment_ids(-factor => 'T03828'); - ok $fragment_ids{'FR0002267'}; - # get_fragment_ids(-gene => ...) already tested - # get_fragment_ids(-reference => ...) already tested - - my ($factor_id) = $db->get_factor_ids(-fragment => 'FR0002267'); - is $factor_id, 'T03828'; - # get_gene_ids(-fragment => ...) already tested - # get_reference_ids(-fragment => ...) already tested - } - - # factor.dat - { - ok my ($factor_id) = $db->get_factor_ids(-id => 'T00001'); - is $factor_id, 'T00001'; # id and accession are the same for factors - ok my $factor = $db->get_factor($factor_id); - isa_ok $factor, 'Bio::Map::TranscriptionFactor'; - is $factor->id, 'T00001'; - is $factor->universal_name, 'AAF'; - is $factor->known_maps, 1; - my @positions = $factor->get_positions; - is @positions, 1; - - ($factor_id) = $db->get_factor_ids(-name => 'AAF'); - is $factor_id, 'T00001'; - my @factor_ids = $db->get_factor_ids(-species => '9606'); - is @factor_ids, 7; - is [sort @factor_ids]->[0], 'T00001'; - @factor_ids = $db->get_factor_ids(-interactors => 'T03200'); - is [sort @factor_ids]->[0], 'T00002'; - # get_factor_ids(-gene => ...) already tested - # get_factor_ids(-site => ...) already tested - # get_factor_ids(-matrix => ...) already tested - # get_factor_ids(-fragment => ...) already tested - # get_factor_ids(-reference => ...) already tested - - # get_*_ids(-factor => ...) already tested - } -} - -# how to get something like ok $psmIO->release, '10.2--2006-06-30'; ? -# or all factors, all sites, all matrices, all genes etc.? diff --git a/t/data/transfac_pro/factor.dat b/t/data/transfac_pro/factor.dat deleted file mode 100644 index b4b0eeec8..000000000 --- a/t/data/transfac_pro/factor.dat +++ /dev/null @@ -1,342 +0,0 @@ -VV TRANSFAC FACTOR TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH -XX -// -AC T00001 -XX -ID T00001 -XX -DT e)ce.; r1916d69(9. wiaet.0 -CO Copyright (C), Biobase GmbH. -XX -FA AAF -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -SF isAi; tlaorFGm -XX -FF ibbhef -n nty25(ntu1licb-dd iad,r0h -ayeiorp'i);n Aee3P -XX -BS R02116; AAF$CONS; Quality: 6. -BS R03064; HS$GBP_02; Quality: 6; GBP, G000264; human, Homo sapiens. -XX -DR 2 PM0H.03N0AS6:0TOTRA40 -XX -RN [1]; RE0000446. -RX PUBMED: 1901265. -RA rw oltL.k e, J. eJJi.iewrD D.re , M cT.h ,ElnDack. -RT tovtAcipaeu,bna aado F ii -enIrarFl-smGongagifaclyCmdNfgmDAitN-ttn aco -RL 9 3EMB11(O)-2. 9:9.2J19 70 -RN [2]; RE0001471. -RX PUBMED: 1833631. -RA Je rrTel D,elJwcD . ..,. e D.a kLnE -RT chobslarroe neyhertin mtarg tlrtittnnfoidutnacftna niidttarpsnenaeouatodsii cw vt ytagh Tgon-eninopieti- tuyiaooisdgppi taebnas tpdn-ln ceafnwrcen eo -RL l111l.55M 9 -l41lB(. e 9oi.)o7:.3511C -XX -// -AC T00267 -XX -ID T00267 -XX -DT 1e.a5;)e291i9 0.de1wrt(c. -DT pk21;a d2...51(0te0 )oe2du -CO Copyright (C), Biobase GmbH. -XX -FA GATA-1 -XX -SY EF1; EFgammaa; Eryf-1; Eryf1; GF-1; NF-E1; NF-E1a. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; aves; neornithes; neognathae; galliformes; phasianidae -XX -GE G000060; GATA1. -XX -MX M00203; V$GATA_C. -MX M00789; V$GATA_Q6. -MX M00126; V$GATA1_02. -MX M00127; V$GATA1_03. -MX M00128; V$GATA1_04. -MX M00346; V$GATA1_05. -MX M00347; V$GATA1_06. -XX -BS R08219; CHICK$H5_09; Quality: 4; H5, G000063; human, Homo sapiens. -BS R08220; CHICK$H5_10; Quality: 4; H5, G000063; human, Homo sapiens. -BS R08221; CHICK$H5_11; Quality: 4; H5, G000063; human, Homo sapiens. -BS R08222; CHICK$H5_12; Quality: 4; H5, G000063; human, Homo sapiens. -BS R08225; CHICK$H5_15; Quality: 4; H5, G000063; human, Homo sapiens. -BS R08229; CHICK$H5_19; Quality: 3; H5, G000063; human, Homo sapiens. -BS R04798; GATA1$CONS_02; Quality: 6. -BS R04799; GATA1$CONS_03; Quality: 6. -XX -// -AC T00428 -XX -ID T00428 -XX -DT e1 ci(50)2r.9.a e9.t;dw1e1 -DT ;124a0u0 p) (12m..d.t4aedv -CO Copyright (C), Biobase GmbH. -XX -FA ISGF-3 -XX -SY E factor; factor e; ISGF-3. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -MX M00972; V$IRF_Q6_01. -MX M00258; V$ISRE_01. -XX -BS R07792; AS$ISRE_03; Quality: 6. -BS R07793; AS$ISRE_04; Quality: 6. -BS R07794; AS$ISRE_05; Quality: 6. -BS R07795; AS$ISRE_06; Quality: 6. -BS R07796; AS$ISRE_07; Quality: 6. -BS R07797; AS$ISRE_08; Quality: 6. -BS R07798; AS$ISRE_09; Quality: 6. -BS R07799; AS$ISRE_10; Quality: 6. -BS R07800; AS$ISRE_11; Quality: 6. -BS R07801; AS$ISRE_12; Quality: 6. -BS R07802; AS$ISRE_13; Quality: 6. -BS R07803; AS$ISRE_14; Quality: 6. -BS R07804; AS$ISRE_15; Quality: 6. -BS R02402; HS$IFI_03; Quality: 6; IFI-56K, G000305; human, Homo sapiens. -BS R00001; HS$IFI616_01; Quality: 6; IFI-6-16, G000176; human, Homo sapiens. -BS R00003; HS$IFITM1_01; Quality: 6; IFITM1, G000179; human, Homo sapiens. -BS R00947; HS$ISG15_03; Quality: 6; ISG15, G000325; human, Homo sapiens. -BS R00949; HS$ISG15_05; Quality: 6; ISG15, G000325; human, Homo sapiens. -BS R00950; HS$ISG54_01; Quality: 6; ISG54, G000326; human, Homo sapiens. -BS R02188; ISGF3$CONS_01; Quality: 6. -BS R04607; ISGF3$CONS_02; Quality: 6. -XX -// -AC T00526 -XX -ID T00526 -XX -DT (; w11ie95.e).a.tc9re21 d0 -DT aa;u.t(ek20d73000. p). 1ud -CO Copyright (C), Biobase GmbH. -XX -FA MyoD -XX -SY MEF1; Myf-3 (human); MyoD; MyoD1. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; rodentia; myomorpha; muridae; murinae -XX -GE G000576; MyoD1. -XX -MX M00804; V$E2A_Q2. -MX M00973; V$E2A_Q6. -MX M01034; V$EBOX_Q6_01. -MX M00001; V$MYOD_01. -MX M00184; V$MYOD_Q6. -MX M00929; V$MYOD_Q6_01. -XX -BS R02452; HS$AAC_20; Quality: 6; ACTC, G000193; human, Homo sapiens. -BS R02418; HS$MLC_01; Quality: 2; MLC, G000347; human, Homo sapiens. -BS R02419; HS$MLC_02; Quality: 2; MLC, G000347; human, Homo sapiens. -BS R00019; MOUSE$ACRD_02; Quality: 6; AChR delta, G000457; human, Homo sapiens. -BS R00850; MOUSE$IGH_16; Quality: 6; IgH, G000537; human, Homo sapiens. -XX -// -AC T03200 -XX -ID T03200 -XX -DT .o);a. r30mdc(0. 200eeth02 -CO Copyright (C), Biobase GmbH. -XX -FA ASH1 -XX -OS human, Homo sapiens -OC Eukaryota; Fungi; Ascomycota; Hemiascomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces. -XX -GE G004277; ASH1. -XX -CL 3Cn.;i0 z40 -XX -SZ ;a)7 8.Dk . (a 5)(gAAe6l85n cce. -XX -SQ NASAHHSKALRPFMYLSLLTSDSINNNNNLSSYYGSPDSSKSNSDSTIIYPNRLIPLANT -SQ STSFLSNGSITLHLLNVANIHNEIPAPSHQKLASSIYKNFLPMSPSDSGISPSNAEPSKP -SQ LLRRGLPLSRRVLSLLPGQDNQPFLSQNNNHLSLPSYSGRPSLFSSSELELQLPSPQTPK -SQ DKDKLSRTKLNNFVNFNWYARNCNTESFQKTYDPRGYDAVNNSPHIQLASWEPDTTKHIT -SQ PKEKIKFPPTHIALTESVKISNLQTSDRSFPNWSRTALSRSPEPESSPSNPTMKFKLDTD -SQ NIFVILFLNSKESMSFLKHHKPRQPNDSESKSRGTMQNAEVEVFRGASNNTRPTIKEGQS -SQ NAKPTKKVRGSNSLRVRKSSIKKDLVAKVLLQSFNNLNSSNAMSNSYIQKNQLFKASDIA -SQ KYRTRPQSPSSYSRSKSISMRSGSPTSPPLSNHSASPSRKPTDPASTASPPRKRFPNVSS -SQ YSCNVSSGPKRKKSLQNHHCRSKRWQGSCRLDSSTRLSVGPTTRSRTPHTCPSWRDDCCS -SQ YNDNKRPCYERGENERVFIFECPKCLCIGTIGTNSLEIEVKMKINIET -XX -IN T00002; ACE2; human, Homo sapiens. -IN T00776; SWI5; human, Homo sapiens. -XX -RN [1]; RE0014319. -RX PUBMED: 9219698. -RA sn R H.a nJ igR z .ne ,. mn ,-ea,o..MnL,.zgthn. ogG,eNMl.PrsaXSIeKy.R -RT ytitAenzln ingAwcaiS aolpccl m HNleitetaitan stsmoR1i oietancfMysob ogmyr d ryh -RL 7n37(87 :S7c3c)3.92 i9e-e81 -RN [2]; RE0014327. -RX PUBMED: 10409653. -RA tY. cl.,.J ne Hi.i.JMYl dBDr,uS ma -RT a ostwsDcipit ntnrichon cda5foongivAt crnsso apeigaeicideinera ftfutreqrtnirti o2 fe creec r iaeSs -RL )i2e.9J1 h22C:3 o9 0401.729(.9-B1ml.6 -RN [3]; RE0014330. -RX PUBMED: 10212145. -RA -n.e Peu h tr.,RaJa, eS..ocSsunnCMw -RT cds o mtsttonMnsd ha eie tpmcrossoypyiVsaR anetrg hs So woleAi4N sidi ninhegod n eacleA apfieeobl s yssi dnune -RL )l(119.95S91Ce 28J i l1 c.-11:15.1 -RN [4]; RE0014360. -RX PUBMED: 10209099. -RA IoBsunS.e. G.,o,mam .A. N zn oo vlhanBe ,s hnUyKozt -RT na osyteiir1svitln toan osrcA eruA msyiaath meAlcoslnizltausH scnm nevltlioSaee1ano nldnReNpatteultps i r r -RL 1 i.(: r5rC.92)03u9B743o9l3. - -RN [5]; RE0014361. -RX PUBMED: 10319811. -RA ,.mCa,aa K MyTa nkP.mNs hoTts ..a -RT let iodvOceoleenlnettr-loceyo r rlge pydliiamerio l afteltnneo gtardcpoefdathusrpan idracomcr ntmc nmaesd on detrmrrut reac -RL ll 3C1)9192-:. 799e91(9 -RN [6]; RE0015933. -RX PUBMED: 8625409. -RA s HIzo.wtelk,r AiSi. -RT lfocffhmneni dqnc al1yo tyrei-oieaiOpteetdr.laioac iiipce d sr t e raospug elezied afsermcsltenAnIttarefctinios gn Hahemrn,, ity -RL e9 .-6172781 l92l(14)C: -XX -// -AC T03828 -XX -ID T03828 -XX -DT d(b50ma).0 c.290ert0 ;.1ee -DT s.k2;d)ua1.2 02.pa7td( e00 -CO Copyright (C), Biobase GmbH. -XX -FA HNF-4alpha -XX -SY hepatocyte nuclear factor 4; HNF-4; HNF-4A; HNF4; NR2A1. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -GE G001926; HNF4A; HGNC: HNF4A. -XX -IN T05295; PGC-1; human, Homo sapiens. -IN T05296; PGC-1; human, Homo sapiens. -IN T05271; Smad3:Smad4; human, Homo sapiens. -IN T04096; Smad3; human, Homo sapiens. -IN T04292; Smad4; human, Homo sapiens. -IN T01560; SREBP-2; human, Homo sapiens. -XX -MX M00762; V$DR1_Q3. -MX M00764; V$HNF4_DR1_Q3. -MX M00967; V$HNF4_Q6. -MX M01031; V$HNF4_Q6_01. -MX M01032; V$HNF4_Q6_02. -MX M01033; V$HNF4_Q6_03. -MX M00638; V$HNF4ALPHA_Q6. -XX -BS R13037; HS$AKR1C4_01; Quality: 3; AKR1C4, G004794; human, Homo sapiens. -BS R15850; HS$AMBP_01; Quality: 3; AMBP, G013496; human, Homo sapiens. -BS R15854; HS$AMBP_02; Quality: 3; AMBP, G013496; human, Homo sapiens. -BS R20250; HS$AMBP_03; Quality: 3; AMBP, G013496; human, Homo sapiens. -BS R15917; HS$CYP27A1_01; Quality: 3; CYP27A1, G013536; human, Homo sapiens. -BS R19630; HS$CYP27A1_05; Quality: 3; CYP27A1, G013536; human, Homo sapiens. -BS R15907; HS$CYP3A4_06; Quality: 3; CYP3A4, G002572; human, Homo sapiens. -BS R15908; HS$CYP3A4_07; Quality: 3; CYP3A4, G002572; human, Homo sapiens. -BS R19198; HS$CYP3A4_11; Quality: 1; CYP3A4, G002572; human, Homo sapiens. -BS R15916; HS$CYP8B1_01; Quality: 3; CYP8B1, G013537; human, Homo sapiens. -BS R13034; HS$F11_01; Quality: 3; F11, G004793; human, Homo sapiens. -BS R13059; HS$F7_01; Quality: 3; F7, G004821; human, Homo sapiens. -BS R15921; HS$GK_01; Quality: 6; GK, G013544; human, Homo sapiens. -BS R15922; HS$HO_07; Quality: 3; HO, G000287; human, Homo sapiens. -BS R19255; HS$TCF1_01; Quality: 1; TCF1, G004661; human, Homo sapiens. -BS R20585; MOUSE$CYP2A5_01; Quality: 3; Cyp2a5, G021669; human, Homo sapiens. -BS R13067; RAT$CPT1A_02; Quality: 1; Cpt1a, G000721; rat, Rattus norvegicus. -XX -BR FR0001534; Quality: 6; Species: human, Homo sapiens. -BR FR0001535; Quality: 6; Species: human, Homo sapiens. -BR FR0001536; Quality: 6; Species: human, Homo sapiens. -BR FR0002267; Quality: 6; Species: human, Homo sapiens. -XX -// -AC T08800 -XX -ID T08800 -XX -DT 24ac06s0(0 0l;e.er..atd 4) -DT a1mvd .0u.)1ea6d202 ;2pt(. -CO Copyright (C), Biobase GmbH. -XX -FA Nanog -XX -SY 2410002E02Rik; Embryonic stem cell specific homeobox protein; ENK; ES cells cDNA, RIKEN full-length enriched library, clone:2410002E02 product:Nanog homeobox, full insert sequence; Nanog; Nanog homeobox. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; rodentia; myomorpha; muridae; murinae -XX -GE G031512; Nanog. -XX -CL h0e o06omC;.0 -XX -TY c.aibs -XX -HP N)8m(. nT9g0;a96o -XX -SZ .3.cD N((5)k 3 AAc . alc2AD0;)4a -XX -SQ EPGATSYGTEMLPPASHSHMASNSMGSESSEGLQLPCPLENADAAPESNSSVLRPCSVFS -SQ EQQQKLSSKLPASEEDSSMEARLGKEVFPYTKQKDLVCRKSNQTKAKAQFQDLKGPRPSP -SQ QQKSSQSSNGRLSWQSQRLEKVTLLTKNILNYNIEKWWMYKAKLPPVCLQQVIHQKFGQS -SQ GTSLWGCASWNWSPSTASTWYNNTSWGSTATSNNTGWLPWQQMQWNWVQNQSTYPQTPST -SQ NAHDLHENTLLAQQEQSEEFSPGSHQFLSNFQSFLPLTPPVYLARQEAYFLNAVVWANDT -SQ PPEGI -XX -SC A5f:laBnME743otBdrm0t s9Lrae -XX -FT aon 1p N0 . o e gan 1 -FT .i l e1 9 4110 [x H ] 1 5 -FT 4 1 2 ]3 x e i23 Hl[1.1 -FT 33. 1x li 4 [] e H173 5 -XX -SF in1mrncn-ta dngni;poai[e o]oemiotoh -SF aN-ustnc1o N 2enle;s rgeeannaerobhns pm eutth5 qagii t -XX -CP ltsencaey le2epd i.x clsmy ei]il omnbfslptierleiu[ppiarr enscscto -CN aosl e,tue netmr, ,,entniysls, s[be y,keme] sc[aevl b ,o gu,yhhrt,viiihpk e,m scpe, e ,inksrveinal gikt3 reoe milomw,pin r ;oao nl t s s,e,,rsimt u ,nylih,, anf snyavbatusc ,,i],,,vtdsrude,ktlina soarlm,lued ncereoundcelevsa rirhuue.gysey4oe -XX -FF ld ]crlroe3steaMslnoo et c ar yebi p;siilItnn) dui tS c emysfmni(ypl(cin l nacc mer)c[aegnEirrnuCl -FF tnEemLioaea ;tl-es wbiaSItNolihnn[i 3uo e haF srnt]yh wtfaialtg -XX -MX M01123; V$NANOG_01. -XX -DR T0TMO0N7HPS.708A :A098R -DR MumonaH PK.SanNoBg;u: Ls :De -DR A 97;5BA44BBML73E. 0:3095 -DR A;MYB5Y .1285:782EL99A71 -DR :WR5TI5MTST8O_.SNQN77 EQS8PUOS; -DR O ZZ4QSM;6.8U06SE_P0WQ:SO4IS TR8 -XX -RN [1]; RE0047531. -RX PUBMED: 16518401. -RA ou L ., L HiL.,. ,huL J QYb ,eBhLYn. , .,a DCK, Rohht.. ..iPe ,es. gzmW, e.KV WaRq.X..K,oo.K u,J. WnL iYCotBGag v. V. nna,P V,givnu.,L uG . hn WS. .oa..W o, urnL p,LeB n,. Z uH.H. h .i W eehg,,e wCC . esn,etLiN,eo. Wg cg.Z o. .,BorJo.nXCg u.AS -RT alcononir r muumrt.g y seinleo tontorNecnet keaml pc 4esngdsilpehatirbaT cwOcenn titypsoures -RL 024 .6-0t..4Nn:a4 8G)(3e1t30 e -RN [2]; RE0047634. -RX PUBMED: 15743839. -RA HK ditoja, SNa,T .r,oati,onMd umsu..kHT .Y,N,aao,.d.aTr u utSKruH aaa.KmeHTaa. b . i -RT toortrNpndl nfaaseie eo ertenr p ireemr ra al tg g xfionesema otdcrueno.leneiag oxscuaniirqsS Ocsn -RL M0B2o)5:l(. .5 -C4.i222l58el7 o50l4. -RN [3]; RE0047637. -RX PUBMED: 12787504. -RA mki o.awrei.w aM,Synaia zMk Kk. u.r,,K iau...M t,a,.ud ea . TKaa m YaaSTa u , gsHaMksaMaa,hmahhIo tYaM,M -RT nshintNeefeie upp racrengnnooc uatenSelehsfem lm oriucTendqtymrila os nso it. oipEldaoapiobre -RL 0402(-3 1ll6)e3 C1216:3. -RN [4]; RE0047645. -RX PUBMED: 12787505. -RA bhc o,mei sMo ,eSNywe ,.CAbSsh oaIel..mr, s.TriSte,e, n DdLJ e.bht ..oCil R -RT lloogtria xygninfmmnucsciouletaa o tc etssuncs ltlcri coeronaFg.eNiinonieoe f rppnypnn bsa,sni -RL 1210l: e6)04l36(C353- 5. -XX -// diff --git a/t/data/transfac_pro/fragment.dat b/t/data/transfac_pro/fragment.dat deleted file mode 100644 index b5133cf49..000000000 --- a/t/data/transfac_pro/fragment.dat +++ /dev/null @@ -1,111 +0,0 @@ -VV TRANSFAC FRAGMENT TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH -XX -// -AC FR0000001 -XX -ID FR0000001 -XX -DT 5.0.h2el(ct2a)k0. ;055d re -DT tp5. )00m0e22 d5d;ul.a(k.5 -CO Copyright (C), Biobase GmbH. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -SQ TGGGCTCCTGGCATCGTTTACGCGAGCTGGTTCCAGGAAAGGCACCTCGGTTCATGCGAG -SQ GCGCTGATTCTATTGTGTAAACTGTTTCGAGTGACTAGGCCACGAAACCTTTTCATGTAA -SQ GGTGAATTGGGGGTTTTCATTTAACGCGGGGTACGGTTCGTTGGTGGGAAGTGGTTTCGG -SQ ATGTAACGTGGTACGTCGGATATATTGCATGGATTAATACGAAGACGCTCGAGAAAGTCG -SQ AGAGTACAGAAAGTTAAACCGCGGGTTTACGAAAGCATAACTAAAGAAGGTAAATTGATA -SQ GATTCACAGAATGCGGTTGTATGAAACCTATTGACAGCGTCCGTTCGTGCGTTAGATGGT -SQ GTTCTGCGGGGTTCAATATCATGTGAGCGGTCGTGAGGGCAGTTATCGATACTACATGTG -SQ TCGGGGGGCATGCGCTCACCGACTTATCATGGCGGGTACGAACGAACGGGAATCGGATTG -SQ AGGAGGAGCAAAGGATTAGAATTAAAGAGCGGACTTACTGATGGTTCAGACCGTCGTCTT -SQ ACAACAACATCGTATGCTACTGGGAAGTATCAGCTCATCGGGTGGCGATCACGAGAATCG -SQ AAACATACATTGTCTATGTACTGGCGGATCAAAACCCATTCAGTATATACTAGGTTTTTG -SQ TTCGGACTAGCTGGCTGTGCGTACGCCATTGGAGACAACAGTCCGTGGGCAATGCTACAT -SQ AAGTGGGATTGGCGACTGTGGTCGCCCAGACAACCGGAGCAAGACTGTCCTGTTTCCGGA -SQ CTTAGGGGAGTAAGGCGCTACAGCTAGGGATCATTAGGCAGAATAGGCAATAATCAGCAT -SQ TTTGACTGGGTTATATCACAACCAAACGGGTGTAGGAGGGGCCGAGATCATATAGTGAGG -SQ GAGATTTTCCTTGTATGTAGGCTGCATATTATCATTACGGACGTGACTTATTAAATCCAG -SQ TATTCATATAACACGAACCCTACCAGAGTGAGTCGAACCTAGTGTTTTGATAAGTGTAAT -SQ TAAATTCTTGCTCGTCGCTTAATTGTGGACCATGTACAGTGATGTACTCATTTAATCGGG -SQ TTTGCTCGATGGGCACACTGGCCGCAAAATTAGGGTTACTCCTCAGTAGCCAT -XX -SC uRr. d0.)140H 1R1O6iF.621W40D.6(AS_620C:lh7y0a8MA1 7B 1 -XX -BF T00140; c-Myc; Quality: 6; Species: human, Homo sapiens; Cellular source: 0123, Jurkat. -XX -MM h hruCgnded -a-IntiehPothilo re-cysyacginriloo -XX -DR (:94B9L3AM:39F3)8; .5114E.12 80 695 -RN [1]; RE0035138. -RX PUBMED: 14980218. -RA o,r.emWCVmaTkog.J,r Tk. kwls a o,r laWtneSsihB, at ek aPmrG aSlnRR.m ,l. eA knK Soa..hu nn.SuiAlHl,nn, BeEe.PKGi.bA iKviaraCea.l ,e,Mahw taeeJ, ygp..,r . o.nS,a B,aH,a t oWk Y ., .S ipnrS cDP eg H,vNb..e,..l,ngcJDegma, H m rahen.i.ceroe n. -RT iedgmisnnrNdngcpbor smr2ittlpocasiep.ei ogdanoft gAaoo nt2Uud ndp i iasfpu r taw mo n ninsnsfngnlaoimtboot2cec dR er anarn o hsosoiieas1h -RL -10.542(69 9C)el0l1: 049 -RN [2]; RE0035139. -RX PUBMED: 11988577. -RA k a,rv eeE.k.G rePaolL,gnJrR,rgvwBKsr,.S na r .ebuoPai..n., a,Fr .. eDsTwo od yStn..ipR oaSCS -RT ntcaostccid sir1ntra aevyo rn-la msago.mlLtr p2o2esehina 2ici -RL e01ic0269 2 S6ec:-1992n.9() -XX -// -AC FR0002267 -XX -ID FR0002267 -XX -DT )er2.. 0;00aet9cd(0hl5 .k7 -DT 0m00 07.p.2dl;ae)5(9.ktud -CO Copyright (C), Biobase GmbH. -XX -DE Gene: G020751; Gene: G020752. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -SQ gtggcgggcctctataacgtacacggctccgctgggccttcccgctgtaaaccgggaatt -SQ acgtcgagaggtgcaaggtgaaaccaccgactgaccccggccggagaccggagcagcgca -SQ ctccgtcccaactacatgctgttcctgccacatgcaagcgttccctagtgaatctcgcgg -SQ cgggactggcgcattacgcgcactccggtttcgagcgcggtcacaaccttacgttttacc -SQ gcgttggttatgaccggtggtatgcggcgccggtgctgggcagatcagggtggtttcgct -SQ gtcccattcatgcctccgatttctccactccaacgtctcctcagatgttacctcgtcatg -SQ atcgcatacggtctgcgtagtatgggtcaccgatccggaatgcacgcgatgctgagcgtt -SQ atgtacgcccgaccgtgcctgcttaccccccgcgcggaagacggcggctgttgcgttctg -SQ GTCTACAACtGaCctActGcCtCAAGCCCcGATcCCcCctgGCAAcCGcTcGAaCcTCGt -SQ ACTCTTGCGGACGGAGAGCCGAAGAGCAAAGCGTCGCCGGGTAAGACGAACGCTCAAGGG -SQ GGTACGAGCAGCGTAACGACGGAAACGGTGACGCCCCGGGATTTGGGGCTCAGCTAGGGT -SQ CGCCGAGTAGGGGGCCGCGGGGACAACGGGGGCGACACGCCGCTTTCCCTGCGTCTGTGG -SQ AGCCTATGGTACGGCGTAACCGGTTGTGTGATGAACTGTCCAGACCGCACGTAGTCCCAG -SQ CGCAAGGTCTATGCCGCCTAGAGGCAAGACGGGCCGTCTCCTACTTAGTAGCCAGCTACG -SQ GGGCGTTGGTCCCCTCGGTAGTGCAACTATCCAGCCACGGCGTCCGCCGGGCTGAGCCTC -SQ AGCAGAGCTGGGGGGGTATCATTCCGACGCTGTTTAATTCGTCAGCAGGACCCACTACAC -SQ GCTCTGTCATTCGCCTGAGCAGTTGTAAATTAGCGCGGCGATCTTGCAAGAGACAAGGAG -SQ GCGAACCTGGGGTCGGGACGTAAGGACGAACGGCAGTACAGACGCTGGGGGACGCCACGT -SQ GCCAGAACCTCTCACGACCGGAGGTTCAACGCTGATTGGGGCGCAACAGAGGGCGGAGCA -SQ GCGAGGTGGCGCTGGTGGGATGGGGCGAGACAAACCCAAGCTGACGCCGAAGGGCCCGCG -SQ TGGCCGGGCTGGGGCCCGTAGAACGAGGGAATTGTATGCGGCGCCTGAATGGGCGCACCA -SQ tCAgTCTTaGcacTCcCGCATTcTtcATcACaCtcACcCAtctAaggGAAcTaCGCCgGT -SQ gctaagcaaggagtcgatcaacggaaaggtgctaggggtaactgggtcttgcgcggcagc -SQ cgtgtactcggcttgtaacgacaccgcgactgtaagcgccctacttgcgtatattaacgt -SQ aatgttttagtatagttcgttttatctcatctcatgagcttgttttgttgtgtctcgttt -SQ tcgttttttttattttgtttttcgtttttttttttctttgtgtctatcgttcattttttt -SQ ttatagtggacgcccctgggtagtgcgctcgggcctgtcggagatctcgtccaacgtctc -SQ tatcactgctagtgttgccaagcttgctgtaactgaagcggacagtcccacccccactcc -SQ ttaagtaagtagacttattccgtttgttttccgttagcgtcccgaagcggtttgaataac -SQ ctcgtgcacccgaatactgctatacacatgactaaccaggtcctcccgtgtgcccc -XX -SC 30997.0aADHBrM346. d68 .9(y4 iS:W.2lA2F2RC hROu4_77)48 -XX -BF T03828; HNF-4alpha; Quality: 6; Species: human, Homo sapiens; Cellular source: 2614, hepatocytes. -XX -MM yCrtea IropnPhr-or-moa -XX -DR M8C081A09:(78:1E 1.6;8L488)B3 -RN [1]; RE0023998. -RX PUBMED: 14988562. -RA rgSM,k .z, .E l.dBopf dn. ..,,f.A..seL bo .aG. mHKJ l.e.e T R.tPeR , rlIOAeoGy ,.lG DneZ.l Gc.o B.lJF rNdfe, i,.il Wh rR,erd,,oB.Tro.n ana li rr. D YeliL ,erN uVg or. Diuk -RT aF lnn ee tscfH bt eNarseeorc npsarr prnsCsdnaioooei vonnlit partogycir.xf -RL 7i3)1S088e 1:n4c0-ce20(3.1 33 -XX -// diff --git a/t/data/transfac_pro/gene.dat b/t/data/transfac_pro/gene.dat deleted file mode 100644 index aea978a19..000000000 --- a/t/data/transfac_pro/gene.dat +++ /dev/null @@ -1,349 +0,0 @@ -VV TRANSFAC GENE TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH -XX -// -AC G000001 -XX -ID AAV$P5 -XX -DT )b5oed40(.t9d.;161 ra .9ec -DT 00e.ad0.mu1(vta.2p9d0 )1; -CO Copyright (C), Biobase GmbH. -XX -SD P5 -XX -OS AAV, adeno-associated virus -OC viridae; ss-DNA nonenveloped viruses; parvoviridae; dependovirus -XX -BC .6.36 -XX -BS -60 0 R03174; AAV$P5_01; Binding factors: YY1 . -BS -50 0 R03176; AAV$P5_03; Binding factors: 96K-protein , factor 2 . -BS 1 0 R03175; AAV$P5_02; Binding factors: YY1 . -XX -DR .0H R0GN:P100TTA0SA -DR 109;L2JX.X 1 E0B:M -XX -RN [1]; RE0006609. -RX PUBMED: 8413258. -RA ,oSMa oCo. ,.w.matehKLysCH K.cn Prirott. hyk ,.Aa -RT lnafrtnatYrfettincurvmcipelmmsos A ttet ciittctihaocreYrcndr s pe N ss1mt ci:te e1 oer tpa npiatioo riDYelsorfaeensane wet loiYia-eironnctaptTAt oe rcctoislanp tpinf rhettr -RL 18.o -B3l9e .MC2)36ll1(..9166l26 o :i -RN [2]; RE0000230. -RX PUBMED: 1655281. -RA n- h S .. SCo.ETY,h.SntaS,ge eih.,kL -RT fyaAdoh b dcalt1ernre- pirte u ,neTosrruseteEsra olGYsnas rpibapne Kn-eeatonp lrlm u ivoeLnyneonpeYfiripr1i,ae rdrsiIopi -RL 3713e9:l81C77. 9l8( )-6 -XX -// -AC G000060 -XX -ID CHICK$GATA1 -XX -DT 5a0) .b1 e.r9d.;c9to16(d4e -DT 1;doa22. 0e50k 1.u)te(pd2. -CO Copyright (C), Biobase GmbH. -XX -SD GATA1 -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; aves; neornithes; neognathae; galliformes; phasianidae -XX -BC 622..4..1 -XX -BS -643 -634 R03087; CHICK$GATA1_01; Binding factors: GATA-1 . -BS -592 -572 R03088; CHICK$GATA1_02. -BS -568 -555 R03089; CHICK$GATA1_03. -BS -485 -471 R03090; CHICK$GATA1_04. -BS -442 -429 R03091; CHICK$GATA1_05. -BS -409 -389 R03092; CHICK$GATA1_06. -BS -373 -349 R03093; CHICK$GATA1_07; Binding factors: GATA-1 . -BS -257 -239 R03094; CHICK$GATA1_08. -BS -235 -221 R03095; CHICK$GATA1_09. -BS -177 -157 R03096; CHICK$GATA1_10. -BS -153 -137 R03097; CHICK$GATA1_11. -BS -121 -102 R03098; CHICK$GATA1_12. -BS -59 -49 R03099; CHICK$GATA1_13. -XX -FA T00267; GATA-1 (isogroup). -FA T08293; GATA-1 (basic). -XX -DR 0R00A6A0TNS 0GT.:HP -DR 735 :LG.;AG 99MEG1ABMT -DR 40935ZE:.EGN TENER6 -DR T00R .R3D52: -XX -RN [1]; RE0012279. -RX PUBMED: 8628290. -RA hV..n F.bo.loennafeT.dr TnoMOC.. J ld c.ro. C,eG isna, nG.GirDror ,,enir .GA nskieMl g e, Lm -RT sniieeeraagth ueAoeAAiTogrir1 rtointh cirtno-N1G t ri--bpiefpb GtfitriAeh h an rqof eny-ciTatiaof stmDe ooornirfds ddthgr iei unvirlAlntAmtezn gcs bymw na -RL iB 2Co..1M26l 13-().l:.7 l6o92e842l9 -RN [2]; RE0008084. -RX PUBMED: 1400499. -RA Ama,n .Digih .nJlZ -RT dsihioeleym uo1en baece en mtnaa 1e mieigPneTensaeoos in ex Dg nn titsec- advAelu lifagettnaAns-dPm t bhldehs -RL -1m1.:C .2.)7 69hJl1eo29 428B9 i2(.090 -RN [3]; RE0002473. -RX PUBMED: 2014222. -RA . H.R sn l Tln ,uva.aoHnednGlEn efFodse,, G. -RT ftonc-fteiirronSreoctidAr ugoretorei rarort shocuheAeha ttp tm p tnef naaiyrvyGdt1T tc -RL :c8 3d.AA.0occ 011l 9rPt 93.8SiS0N4)..(8aa-0 U -XX -// -AC G000174 -XX -ID HS$4F2H -XX -DT ae06 )r..;1(.o9c4bd 91det5 -DT ().am1au2ddtep0 .v2.01; 11 -CO Copyright (C), Biobase GmbH. -XX -SD 4F2HC -XX -DE yvheFahn 4 2ica -XX -SY 4F2; 4F2HC; 4T2HC; CD98; MDU1; MDV1; NACAE; SLC3A2; solute carrier family 3 (activators of dibasic and neutral amino acid transport), member 2. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -CH 11q13 -XX -BC 616.. -XX -BS 0 0 R00005; HS$4F2H_01; Binding factors: NF-4FA . -BS 0 0 R00006; HS$4F2H_02; Binding factors: NF-4FC . -BS 0 0 R00007; HS$4F2H_03; Binding factors: c-Jun . -BS 0 0 R00008; HS$4F2H_04; Binding factors: NF-4FB . -XX -BR FR0008041; downstream; Species: human, Homo sapiens. -BR FR0008042; downstream; Species: human, Homo sapiens. -BR FR0008043; downstream; Species: human, Homo sapiens. -BR FR0008047; downstream; Species: human, Homo sapiens. -XX -DR m:HmLunSPH3AD 2LBnK:a; C.uaS -DR 4P0:.1NAH0T7GR0AT S -DR 9M4GE2 HF :1L1S8MB.;82H -DR 10.S:0M00N0GL080BE SENE63 -DR 0F:A_2_2atYEu0_HF9osF IRMYG.F_XFTAs:4c -DR _HYFLI2YM33 Ta0.FXF_S2Us9_E F1_t_:RU_240APAG: -DR 23.ATFF_ 1aGA3_ __FXUMRFt4YAs:2:EY00IH9 -DR 03_G AMs1F__34XUtAHT:F_F.FI9E Y:R22A_2Y0a -DR 5XRME9:AY09H.FY_tG8AFF TIU F__aA:32 -DR F AY9FE2MU_39 .X2_v5YF:AIT:H0aFRG_t8A -DR F:U2A7 EYG:_I3_ 3Mt_RE9H5_FYFaF.AT2Xs -DR _:tU33a9_AATs_R_FE7 GYXFFI3Y.H5E:FM8 -DR 8 _FI:50HMXE_tY_5.F4GAF:R9a_Ts9YF AEU -DR _T9XGMaYeLFn_tAeF0M AYRFFIF.4:2E :Hu1 -DR XaP:1T0a5_5 E_0I_3t1_3F3AYFMAX. FgpFUR44Y:3_ -DR 00B B1M8;101L:E010BAA.8 0 -DR A. 01F3F50;LE 531BMA0:055 -DR E;552K5AKM:A 024L .88540B -DR 995B00A8MEL8;75 .A070KK: -DR 2K9.4;A A 2EB60KL4600M:09 -DR :006 010AP10 1L;P.M0BAE61 -DR 0C100B001;0LM:6.116BB E C -DR L 0.0BB: 0C0M0030C3BE000; -DR MF;27.2LAEJ :H9B 046S -DR 2 9LGH:3B0.SF9ME; 2J4 -DR NJ;T:A.HB A5L L396M0SE -DR 22:E9S 9. 8MM;GH421LFBH -DR SMH:F3GB M2.2L0 4H910;E -DR M42B.F 2:19H4SHLM;E1G 0 -DR B M F;ESGHH2024.M:129L5 -DR H0M:H4 23G6FE12S9ML.B; -DR H4247BG1HE:9;.20S MFML -DR T0EN52:6.GREENZ E -DR 51.8 OM:M70I0 -DR 23CSA N.:HCGL -DR 06RSF11_62 Q001N.ME:E -DR R26106.:0NF E_S012MEQ -DR 3RES1F162.00_EQ0:6 MN -DR F61. EEQ1:R00S2N0M_46 -DR 0MFESN51:2 01Q0.1E3R_ -DR :0M.SF3_R4 E90QEN2 -DR .0:ENSQ21E0_P7F061R 9 -DR F0E06:2ERS_NP08 01.1Q -DR 111PEEQ0006SFR8:2_ .N -DR 0E60R08NE21:2_. QPFS1 -DR ._PE16F0Q1S2NE 3R900: -DR Q:32E8P FRSE_5.N00 -DR :.7sEG6HUNI92.0 N5E -DR 00:0DR2RT .0 -DR 5_4SRN1:S4TPA.AO_H6R -DR .A__HP5RT4 NS2AORS4:6 -DR R4HORT3.N_A4_SS6 PA5: -XX -RN [1]; RE0001199. -RX PUBMED: 2761540. -RA edDAahr. Pc LKeea.ngi,Mrk.n el.sH, M. sL..arCJ GY.npi- ,io.,i B -RT eenrr pefrn nnhca-n4nossm oeeul en ii aetyaeTlnTGhnCiFtttens CieraoePNcnEloaatlBt t ht aI Htrivhidnr iirMhs 2ncsaFop TaluE -RL 9o -e.oM12(9l.8llli759.29.8C5 :8B) -XX -// -AC G000264 -XX -ID HS$GBP -XX -DT et b4;10c.a(do.erd.)19596 -DT d9.6; 29ba5d1t.4e(0p d)uo. -CO Copyright (C), Biobase GmbH. -XX -SD GBP -XX -DE oaa-ginelentiupntybrgndi -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -BC 165.. -XX -BS -216 -76 R02897; HS$GBP_01; Binding factors: IRF-1 , ISGF-1 . -BS -128 -99 R03064; HS$GBP_02; Binding factors: AAF , GAF . -XX -DR TN.ATGS0264PR HA00: -DR D.00:0R 0TR2 -XX -RN [1]; RE0000446. -RX PUBMED: 1901265. -RA D r,lMLr ., eieJe.Jwre. kTahcD witoEJlk,D.cn.. -RT nf A,t ntotrboa-c apscm aNeogi tnCieaaldva mfDFalIiidFcyigrN--utAnGgamo -RL 9-1E0M .2 .3(7:J19O991)2B -RN [2]; RE0001471. -RX PUBMED: 1833631. -RA . cel,arn JrDlkeJ..w eT E.e ,.DD L -RT tfcerufn c eoo op enbetgisgetywitdltugoreantrtp inropsd eytubidotnl otat he- a ntsiaiceasdtcann cnmpnhnonh tai -panititodergriir- aitannniTsl vawaynoef -RL l(lo3i9el5 B91l.1)1-M .Co.. 5151741 : -RN [3]; RE0001567. -RX PUBMED: 1898761. -RA h...l ro IkeDl Jrc e .e. LwehTe.JnD,rSDlt, w,Ea -RT it snop dpteneyoagtc nileib O iruloocm oegrgnrnaai nartieetf ahm-mmb revenotanmt htagaiernsgipra iraplneun enpnllnteedpdtey anidi -RL o-(. ..ll1C:l11 1BM 1o119. 9lie)298 -XX -// -AC G020751 -XX -ID HS$ABCG5 -XX -DT ;k2c.0are..05d509t 0 )me(l -CO Copyright (C), Biobase GmbH. -XX -SD Abcg5 -XX -DE imAT rmiese(r as-stbaitb,l )fiP(bcuI5dWeGs He tnT n),l-menyEo g1 -XX -SY Abcg5; ATP-binding cassette, sub-family G (WHITE), member 5 (sterolin 1); sterolin 1. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -CH 2p21 -XX -BR FR0002267; upstream; Species: human, Homo sapiens. -BR FR0003356; upstream; Species: human, Homo sapiens. -BR FR0006822; upstream; Species: human, Homo sapiens. -XX -DR :Dn;mAHu 5L aGPS.KHmunaBC: B -DR PA 0S:1T0G72A.TNH5R -DR LENS78.3:0E00MB100S 0E5NG -DR E_FF3R 2Y3:_F. GucFoY2_AHFs0T:8XAtaMI -DR 8L_U1.3UP_:32: atGFAFM3RFY2T2_0E_3IFYAXSH _ -DR F_t03T A GX3:8Y_2U1H3AYFaE.F_R2FIMA3: -DR Ua 8X3t1._A:FE23_AH3G_Y23:I_F FM20FTAYR -DR 7Y3DU_ 9AtA.F 8GFTF_a9XE:F_H5Y2I:MR -DR 41C2M1;L001:2 42BACE2. A1 -DR :1 4M0;LCC.A E686187AB047 -DR E M1:;2.5LFA717133 1AF52B -DR BE;A:M33F022 9F02.339 L2A -DR 10M4L:A6F1BE6;F4 440A. 00 -DR A; LMB40:007744F4.0 A1F1E -DR ML;7K0 9K9 AB.9A910E9719: -DR 141511 B MLB;1BC4.51CE1:1 -DR 2E:0EZE.44ET GN6NR -DR .:201 0OIM5M2 -DR O.4505I69: MM -DR G.H g5c:ANCb -DR F 2E3:6M2RSE4_N.Q0 -DR E7E0P._S8F1N1 :R8Q -DR EN3G2:2.Us.IEN99 1H -DR _P .R11_:2RN4SSAT7H0OA -DR SNO.P1S70:_A2HT 4_R2RA -DR 7HT4SN31.R0A AS:RP_2O_ -DR 74_PHSTS:4AR2N R0.1AO_ -DR A T2_A0.R:SN4PHRS1O75_ -DR A.SNOP0 _6:R_2SAT74H1R -XX -// -AC G036757 -XX -ID HS$TRPV6 -XX -DT a0) ect2..(d5 r0e;4e02ch.6 -DT e0 2(0.)ah2dtde4..pu;6 50c -CO Copyright (C), Biobase GmbH. -XX -SD TRPV6 -XX -DE psai etc irnba nuV,oaee ebrh af nlrtnolrtc enmmi,tlse6cpneiayomtt -XX -SY ABP/ZF; CAT1; CATL; ECAC2; HSA277909; LP6728; transient receptor potential cation channel, subfamily V, member 6; TRPV6; ZFAB. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -CH 7q33-q34 -XX -BS -5508 -5494 R19310; HS$TRPV6_01; Binding factors: VDR:RXR-alpha . -BS -4337 -4323 R19311; HS$TRPV6_02; Binding factors: VDR:RXR-alpha . -BS -4287 -4273 R19312; HS$TRPV6_03; Binding factors: VDR:RXR-alpha . -BS -3519 -3505 R19313; HS$TRPV6_04; Binding factors: VDR:RXR-alpha . -BS -2169 -2155 R19314; HS$TRPV6_05; Binding factors: VDR:RXR-alpha . -BS -2119 -2105 R19315; HS$TRPV6_06; Binding factors: VDR:RXR-alpha . -BS -1283 -1269 R19316; HS$TRPV6_07; Binding factors: VDR:RXR-alpha . -XX -DR nHBaa;muPSK :umH:nDT RPLV.6 -DR 5PTHAT:7 G.R60N3S7A -DR 2:5 24YM46L1A;612.5AB2Y E -DR ES0B: 6GE00S11E05N0M.52LN -DR _YFFtcG7sE:X8FH.As2uF :_o6RYF_TI_M a20A -DR 6XEPRY_1_: :8_2GAHtF3UTL aIA.Fs_M72UYS32_FF0_ -DR RUYA72MFE AA3I1__6GtFF3F2Y8_0_aH:sT. X: -DR FYME 20s_FG: 6F_13Ya.28H_A:XTAI3_U7R_tAF2 -DR _tAFIXA38:YT5 _FRA69aU6FM:F H0Y_GE. -DR 2.Yv_3F:G9I TAa066_ERA8:FHtXFFYU5_AM -DR 1_eHeXFInM32A8AE LF6YuGUT.:FRF:_t FYa -DR A 3FM:L34 066B44E43A03;.F -DR 9:36.A B3AE6L57;952 7F2FM -DR 5AM9836L: A.F2F6 35;82B9E -DR M5034HJL. :A03024BA25ES;0 -DR L32;M1:5AA40.BJ1325 H40ES -DR 92.;L727J007799H S9: MEABA -DR 484BA7M;AH6E47SJL:9846 9. -DR KA13:51BL5.7;0 K00 AE370M -DR 6 E002MK0:7BL76 AA5.25K;0 -DR 0Y5ML05.0;0B30EYA0:33A 3 -DR 2A4:3;7900YY9MEL4A37.2B -DR 3E68U212UH .6318B;MS:L -DR 7:E7XALHSBMZF 88 .;B3 -DR ET5.R5E0E GZENN:35 -DR 6O60I6:8.M 0M -DR :CHNT. PR6GV -DR N 6E4108S:R6_M.QEF -DR S6E1Q_.RF6 1P:E0N1 -DR XE4121MR:65F0 Q01S_.E -DR 05.6P F1140_2EQ1S:XER -DR 2I007HE.NUs:4N 3EG. -DR 66HGU:NsEIE 8.36N9. -DR .46U.IsE7N3N8EH:G0 -DR O8RN_HA1TP7 A:4_.RSS1 -DR TRA8SAONS4.:7 2R1_P_H -DR 178P4 3NH:ATR_R_OSSA. -DR A4SO18RT4NSR.A_HP _7: -DR O7.:TAN41SSARH8R5__ P -XX -RN [1]; RE0047775. -RX PUBMED: 16574738. -RA Mh. ,. JeS.emtM,vK.S,ed W yP ,i an u.e aNKe.krkWBM. .i i -RT TTSDLIN TO16 NETIYE SHANEVIL HDIPDOTTP-EIUON LNTRIICGVMAD I2 EATSTL IAEMAIINC NIIDT PT TXYT OLESIADI5.ITC MUDSTRHTCNAMANHBVEPEORIM BR,NAO RLRSNM3 A VEY T -RL nrM o00d2 .:c.l6.( noEoi)l -XX -// diff --git a/t/data/transfac_pro/matrix.dat b/t/data/transfac_pro/matrix.dat deleted file mode 100644 index e49eab7be..000000000 --- a/t/data/transfac_pro/matrix.dat +++ /dev/null @@ -1,243 +0,0 @@ -VV TRANSFAC MATRIX TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH -XX -// -AC M00001 -XX -ID V$MYOD_01 -XX -DT (d9.01.w9 a29ee1;)1ciet.r -DT dd.p ..b2uo7ae9)2;0 9(1td1 -CO Copyright (C), Biobase GmbH. -XX -NA MyoD -XX -DE ounedpt tadtiorlieboe aesynrmg ntmc -XX -BF T00526; MyoD; Species: mouse, Mus musculus. -BF T09177; MyoD; Species: mouse, Mus musculus. -XX -P0 A C G T -01 5 0 0 0 A -02 0 0 0 5 T -03 0 0 5 0 G -04 0 5 0 0 C -05 5 0 0 0 A -06 0 0 0 5 T -07 0 0 5 0 G -08 0 5 0 0 C -09 5 0 0 0 A -10 0 0 0 5 T -11 0 0 5 0 G -12 0 5 0 0 C -XX -BA on3 nfsen snng5eluaetemii ctle -XX -// -AC M00002 -XX -ID V$E47_01 -XX -DT w.e9921td19i1e .e ).;(0car -DT 7.0di12 e8t0;dep).(uw00 a. -CO Copyright (C), Biobase GmbH. -XX -NA 4E7 -XX -DE 7E4 -XX -BF T00207; E47; Species: human, Homo sapiens. -XX -P0 A C G T -01 5 0 0 0 A -02 0 0 0 5 T -03 0 0 5 0 G -04 0 5 0 0 C -05 5 0 0 0 A -06 0 0 0 5 T -07 0 0 5 0 G -08 0 5 0 0 C -09 5 0 0 0 A -10 0 0 0 5 T -11 0 0 5 0 G -12 0 5 0 0 C -XX -BA fs EMyeignnt1 tkid yarEtawn4oDeoEeD1o) 7fMl1d sre7n 2-(,b+c o, riseoE 1 g4ds2 -XX -BS ATGCATGCATGC; R05108; 3; 15; 17; p. -BS ATGCATGCATGC; R05109; 1; 15;; p. -BS ATGCATGCATGC; R05110; 5; 15;; p. -BS ATGCATGCATGC; R05111; 5; 15;; p. -BS ATGCATGCATGC; R05112; 8; 15;; p. -BS ATGCATGCATGC; R05113; 9; 15;; p. -BS ATGCATGCATGC; R05114; 8; 15;; p. -BS ATGCATGCATGC; R05115; 7; 15;; p. -BS ATGCATGCATGC; R05116; 11; 15;; p. -BS ATGCATGCATGC; R05117; 5; 15;; p. -BS ATGCATGCATGC; R05118; 5; 15;; p. -XX -CC iefn bmtetle tc r ne;nudetrntEd errEsNunrxo[(p-i ]srv2aoueri= hi1eaager pt deiiiIic )od;tl nt dahu1o mte2tcGisi sctl1o 5sbn e scc cor qgNldyo en -XX -RN [1]; RE0000231. -RX PUBMED: 1846322. -RA Hlr..neDto. i,-Bam XS u -RT te ciot emfAandnenn ri 1irrdt2o oiniEm1stEt h pNAdmrmyora2neut iisspni iosrg fthbDvoin odotinrbnoEhr be cie n12a -RL . -0)1:47l9C49(el4165 9 -XX -// -AC M00972 -XX -ID V$IRF_Q6_01 -XX -DT ..ea24e0dt0;r2 d10tc( c3). -DT p.e2tt2c.dd 0 ;)45a6u.d(00 -CO Copyright (C), Biobase GmbH. -XX -NA RFI -XX -P0 A C G T -01 5 0 0 0 A -02 0 0 0 5 T -03 0 0 5 0 G -04 0 5 0 0 C -05 5 0 0 0 A -06 0 0 0 5 T -07 0 0 5 0 G -08 0 5 0 0 C -09 5 0 0 0 A -10 0 0 0 5 T -11 0 0 5 0 G -12 0 5 0 0 C -XX -BA oen mlpoptACNn3TNgccsCdeElidiM16 cRu isGeIqei e lSsuesmn1O -XX -BS ATGCATGCATGC; R00001; 3; 11;; p. -XX -RN [1]; RE0024406. -RA NATSTmF.aR_AeC -RT RAnMATFt CnSIATvese N XreoRi Rifsoi -RL 0RoR0eC 0NA 4)(4r2s A.p0:F0tTS3 -RN [2]; RE0017571. -RA mCNF_aTA.eRSTA -RT NR XT nRACMsFStrew NTeIiAeA -RL Fops2r)R00S0(T:AC1Rt0. N 20e3 A -XX -// -AC M01123 -XX -ID V$NANOG_01 -XX -DT d..6e2cr660(a0a)v.e ;tm 00 -DT d0aeavt7.)..( u;2 606pm00d -CO Copyright (C), Biobase GmbH. -XX -NA aNogn -XX -BF T08800; Nanog; Species: mouse, Mus musculus. -XX -P0 A C G T -01 5 0 0 0 A -02 0 0 0 5 T -03 0 0 5 0 G -04 0 5 0 0 C -05 5 0 0 0 A -06 0 0 0 5 T -07 0 0 5 0 G -08 0 5 0 0 C -09 5 0 0 0 A -10 0 0 0 5 T -11 0 0 5 0 G -12 0 5 0 0 C -XX -CC tai1effdthnPo dd(fiemrwN drMoi[ahs i lseo)eoitCEsPiana gANt aCTe ssdIto - nh gcIiy]iihttm bonmevt -XX -RN [1]; RE0047531. -RX PUBMED: 16518401. -RA a..A,egiBm ,L,...gVh er Chp.nP .LaeuRWuu.nJot..e Se PDW B,. YC . .K Ynbtini,vX son vg, L L.oq.o,ngnYC ,Cn.o,,,nZe ..hV.sK .zh, Wug e,eah LL.e. VRW.Hc .r,,,w B ua, SioiC e. Bng . ,.Leh.LGQtLHH ,Z,N,W,Ju ..GW . .uL W uioKJgo.oo .h. oX a K -RT rp4nyoiomes mnwntsstrncermtn iach on pngrcog.staelei ikcylept uaotilOu neebrdtr sN Tea clueo -RL .3n20aN08t (-.teG4640 1 3:4).e -RN [2]; RE0047626. -RA _TSNRAm.TeAFaC -RT atwmeris.iNtr nxe e -RL 0oRACAe2St0s)p1RF10.rNe 0 T:(20Rl06 -XX -// -AC M01124 -XX -ID V$OCT4_02 -XX -DT (6e m.atr0;. 200)6c0avd.6e -DT 2ap0m0d 760.tdv0(;a).e6u. -CO Copyright (C), Biobase GmbH. -XX -NA OF5tP-4c)U(O1 -XX -BF T00651; POU5F1; Species: mouse, Mus musculus. -XX -P0 A C G T -01 5 0 0 0 A -02 0 0 0 5 T -03 0 0 5 0 G -04 0 5 0 0 C -05 5 0 0 0 A -06 0 0 0 5 T -07 0 0 5 0 G -08 0 5 0 0 C -09 5 0 0 0 A -10 0 0 0 5 T -11 0 0 5 0 G -12 0 5 0 0 C -XX -CC sEsmf- edCyhnoT rsdti tcdas caitOMttfiPa won 1he)4ai-Nvtd Cgs itiIeIoAi bo i( h[nme]ttP dflheremio -XX -RN [1]; RE0047531. -RX PUBMED: 16518401. -RA C.C.., gg.K eLu.W.urhh,V,ege KC Xue. q W QeLsWuuvL hgi s P B, .H iJ.,B. ni. o . Hn,W. LXB.DLY.Sat e.h ie neue.,o.oe.. .L ,.oK ,.ou,a R,piGL zG ow.RgK .,o , NYHoL,,hC ntn CWnA,Si nW, ..oZ go ahr.mV.g J,V,co,b.,Ln. . uvtnBhLZanJW..a ,PeY . -RT an o c peetetilsois OwlNulr gerdtmitiluhptcmep cnsecunsbeaorre.kneor cntannyyiTa tsngrmo 4 o -RL 3 0.G3082a:6t4)e40N4. n1e .(t- -RN [2]; RE0047626. -RA FeTAm_Aa.TNCRS -RT rweNa rtx imneis.te -RL p)NFtR0l02:Te11oC0 A0 rs 0.e6RAS20(R -XX -// -AC M01125 -XX -ID V$OCT4_01 -XX -DT .ce66 tea0 avm;0)r0d(.60.2 -DT 1.pko)e9; 61ad2e0d.t0. (u0 -CO Copyright (C), Biobase GmbH. -XX -NA tcU5-1O)(P O4F -XX -DE dpl2iotciOreeoisi ,n4 -mindiotfxctSojtc -XX -BF T00651; POU5F1; Species: mouse, Mus musculus. -XX -P0 A C G T -01 5 0 0 0 A -02 0 0 0 5 T -03 0 0 5 0 G -04 0 5 0 0 C -05 5 0 0 0 A -06 0 0 0 5 T -07 0 0 5 0 G -08 0 5 0 0 C -09 5 0 0 0 A -10 0 0 0 5 T -11 0 0 5 0 G -12 0 5 0 0 C -XX -CC tesinocerost cdfT wI-)lefiirbtentPt4(heihn a]iPs -ei g h1 eoafe t dmsmadmiyiO dhrttd[ ovdaeoCtEWi s -XX -RN [1]; RE0047531. -RX PUBMED: 16518401. -RA n BrnC., he.BY ,ViB.aKnWL L W, hV.R Cu.Ze.LHoaD,o Cu B..J.s b i.g ,,v... ,NnnX.geKL heH. c.ipW .PK,eH .g, noq r Jag,oi.. Lo..aYWJo e o,h nG ..hn ootgeXW,o Y L, u o ,wtK zL.e .CZ.SL, W R n uSV,a L,Lih.WAP,.gs.i.u,em.,e .n.,h uGveQg,uCt . u -RT tceyntmb aieoT atkeioycsmoo odisa regnupln eu ONepeinit nrgtellssrmsr.wrtr c4cla ce tuh nonnp -RL eaN3( 4842e16)G-..0t4tn3:0. 0 -RN [2]; RE0047626. -RA _NaAeT.CFATRmS -RT itNeasi x.wtrrmeen -RL NT20oSA(RCe6R00RA:s0p0F 0 .)1lr21et -XX -// diff --git a/t/data/transfac_pro/readme.txt b/t/data/transfac_pro/readme.txt deleted file mode 100644 index 30f7355d2..000000000 --- a/t/data/transfac_pro/readme.txt +++ /dev/null @@ -1,5 +0,0 @@ -The data here is a very cut-down version of Transfac Pro, used without permission. -However, the only thing these files represent is the file format which isn't -beleived to be copyrightable. IDs are intact, but the actual useful data is -randomised or artifically created. At the very least the usage is believed to -be 'fair' according to copyright law. diff --git a/t/data/transfac_pro/reference.dat b/t/data/transfac_pro/reference.dat deleted file mode 100644 index 404cda184..000000000 --- a/t/data/transfac_pro/reference.dat +++ /dev/null @@ -1,92 +0,0 @@ -VV TRANSFAC REFERENCE TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH -XX -// -AC RE0023998 -RX PUBMED: 14988562. -RA eb .,r.ohG.,lr ka a. e.lLf. ,lo L k n fdlP rotRiFpmocfl e el. u oY ,I..VeGZDrS.inl.gl ,e,T RiT,,elr,DyB . or.dHM.NrddrGJir..BOD. i,a e.z.Ee oBn R . ANsgA,JK .G. n r e.W,u -RT tnissnatr pocaHlnctabp oar gn t Naersnrdr xocvffi Fireseoynoieerpsn.elC o -RL .830 3(e0 n)48c2S01c:3-137i1e -FA TRANSFAC: T08612; HNF-4alpha; taxonomic class Mammalia. -FR TRANSFAC: FR0001534; human, Homo sapiens. -FR TRANSFAC: FR0002267; human, Homo sapiens. -FR TRANSFAC: FR0003841; human, Homo sapiens. -XX -// -AC RE0047531 -RX PUBMED: 16518401. -RA o on.gLg WJ eZeWQ,..n . aC,L Pt Yn.,o Yn cqg iBu Wz.aK.onL..ogL,, .VSh,a G,,JW.L , ..n.L.h pXi C.. u,huh hK eg..su..uoo giuh.H ,nh .,,t o Y.vACaCgS.V,.LeG R.ZiLn sWJ o.,ee B u, n a.rb HPKeVo,CWe,.o v i L..,rmB L. Bu . . H eiNn DeR tX ,W w, Ke, -RT pe.s ra m eTsrttt4nkenceeepgnyniowldlsmle rt aoNitng eOa cycrip nbcmt onruoislirhseuo tauonc -RL tn020e..t ) :G1eN04- 34.8a46(3 -FA TRANSFAC: T08800; Nanog; mouse, Mus musculus. -FA TRANSFAC: T08969; Nanog; mouse, Mus musculus. -FR TRANSFAC: FR0012904; mouse, Mus musculus. -FR TRANSFAC: FR0016884; mouse, Mus musculus. -MX TRANSFAC: M01123; Nanog. -MX TRANSFAC: M01124; Oct-4 (POU5F1). -MX TRANSFAC: M01125; Oct-4 (POU5F1). -XX -// -AC RE0047775 -RX PUBMED: 16574738. -RA ..Bet S . ,.u i rMeK ,,d. vWeWk KaS.ee.nyNk mJMMih. a, i P -RT INDD VDGT C1AALEBEI.EIT IYIHLA6ITTE E ANV ITSL MTRTANYE TM NISP TNBAUTPOIORSL I- NVTOD,MHIRRLINSDX TRPY NO CAELUAOA SNMMNT CED5CTH NII TERTOI2IMTVPEH3DSAI -RL Mc (o0o.. 0n)lnir.do 2E:6l -GE TRANSFAC: G036757; TRPV6; human, Homo sapiens. -BS TRANSFAC: R19310;; human, Homo sapiens. -BS TRANSFAC: R19311;; human, Homo sapiens. -BS TRANSFAC: R19312;; human, Homo sapiens. -BS TRANSFAC: R19313;; human, Homo sapiens. -BS TRANSFAC: R19314;; human, Homo sapiens. -BS TRANSFAC: R19315;; human, Homo sapiens. -BS TRANSFAC: R19316;; human, Homo sapiens. -XX -// -AC RE0047626 -RA _eCART.mTaANFS -RT ixNr e r.setitnmwae -RL ) :N0Se0R1rRs2o 0 TR010CFA6A2tp0(le. -MX TRANSFAC: M01105; ZBRK1. -MX TRANSFAC: M01107; RUSH-1alpha. -MX TRANSFAC: M01108; HOXA7. -MX TRANSFAC: M01109; SZF1-1. -MX TRANSFAC: M01111; RBP-Jkappa. -MX TRANSFAC: M01112; RBP-Jkappa. -MX TRANSFAC: M01113; CACD. -MX TRANSFAC: M01114; E2F. -MX TRANSFAC: M01116; CLOCK:BMAL. -MX TRANSFAC: M01117; OTX. -MX TRANSFAC: M01118; WT1. -MX TRANSFAC: M01119; KAISO. -MX TRANSFAC: M01122; ZNF219. -MX TRANSFAC: M01123; Nanog. -MX TRANSFAC: M01124; Oct-4 (POU5F1). -MX TRANSFAC: M01125; Oct-4 (POU5F1). -XX -// -AC RE0047634 -RX PUBMED: 15743839. -RA STi,rNHHKs aa..Sd .Tdu m,,aj Kaauroo,Ho.ia. M ,.Kk,,H u atnua oa t aYdba..mirT TuN.et -RT aoeNfnmusiriOorrner. tcec eqdio aeissnxrxnooa csnmt fa eoesgir el eogltrdatipnanreete rla ngpSu -RL 0404eC2:8Ml55. l52()25.o i7ll o. 2-.B -FA TRANSFAC: T08800; Nanog; mouse, Mus musculus. -GE TRANSFAC: G031512; Nanog; mouse, Mus musculus. -BS TRANSFAC: R19260; Nanog (Nanog homeobox); mouse, Mus musculus. -BS TRANSFAC: R19510; Nanog (Nanog homeobox); mouse, Mus musculus. -XX -// -AC RE0047637 -RX PUBMED: 12787504. -RA h .KeyMarKa mk.sYiia . ,MHMtMa.aSnzaw,a kaghTakum ,a..oIwd. MMuTaia, ua ,i,hra.ama, t eaMu oYS,.akKs -RT Smoeitnt T els iresde irnoeqcofEgp dm aeuriasho n hycaasr.onoilenbilicrunnlnemt tNo epfepape uo -RL 33 02l102 -1.l(C136):46e -FA TRANSFAC: T08800; Nanog; mouse, Mus musculus. -XX -// -AC RE0047645 -RX PUBMED: 12787505. -RA S.eRedtbt i..,Mhb, lowmCnh .iTLro. eD ,S,AsmicC., h ylbIseo, Naees . roJ Se -RT ioei.N fclFoilsngers aeu ypoern aesinsxen nprcostmottiolc n cc p,llymt nuigau tofacnb niinagsorn -RL 536:34.3-260051()1 el Cl -FA TRANSFAC: T08800; Nanog; mouse, Mus musculus. -XX -// diff --git a/t/data/transfac_pro/site.dat b/t/data/transfac_pro/site.dat deleted file mode 100644 index 6bad8267c..000000000 --- a/t/data/transfac_pro/site.dat +++ /dev/null @@ -1,806 +0,0 @@ -VV TRANSFAC SITE TABLE, Release 11.1 - licensed - 2007-03-31, (C) Biobase GmbH -XX -// -AC R00001 -XX -ID HS$IFI616_01 -XX -DT 0r0d.c)i(e.;t1a 90ee 9w.26 -DT 0vpd12.am1)t(ua d1;.1.0 2e -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE IFI-6-16 (interferon-induced gene 6-16); Gene: G000176. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -SQ aGAGACATAAGTgA. -XX -EL SEIR -XX -SF -172 -ST -98 -XX -BF T00428; ISGF-3; Quality: 6; Species: human, Homo sapiens. -XX -MX M00972; V$IRF_Q6_01 -XX -SO lNa3sl;IBro00hFtaB -p+8.i8 -SO .0i;uDa01d6 -SO -p1 lah eH .;N0I1a+F0aL -SO .80H1 ;0FF -SO u .Ci L0D0M8;d+3a -XX -MM onerl aorithptre/ogopinephcpinnntf -MM ia fhrittecoynerneneetl -MM rtdinarotleae g -MM atittne meiernorhefcneyl -MM aolmtnocptheoir ettnyi -XX -CC -rro iaFiobnsirFntpFeidutkIamhtn fseh s)lfu abtwHhfaol ina( cN -XX -DR (N6:E.L:HNI 9 0BI0);0242YS8 558MF -DR H 9P7_SNP.202IDIE:;E0 -DR 9HS9TRA__1SN:.1OAP6R -DR 26 _OTRRA.H:9SA1_NSP9 -XX -RN [1]; RE0002135. -RX PUBMED: 1707163. -RA boyuhB.,CRe l .eL -RT ean Atfhttn cn otnrpinrpatteanescnmif ertlor-f ni ioteeDt:voaa emieNoseo rrnto-orite ipAoeroHn rpDe lmosNs -RL R ie11.is9171Ne l:9A.c 9 )udsc5(c -RN [2]; RE0001119. -RX PUBMED: 2243138. -RA ,jS. R.AaS, d,nnDoo. ZtD ,nM,MBt eiarrars m. g i. J ,epoh.Tsat eEis .oaz.wnColKne , J. Hir s,. SO aH.ee.Hhtesr -RT avl tpbciemennioshhsni ainecotcdao nrdcmris onitgvlilape intasnl trc ppnodslitneerioduip aosdailiieonayelt riemro caaaotytp oeglsabisunkh rsecep dytrn nuog bhcnnfut- nes reteeme teifp ll o nrlhe obu etlimornleua efrrsiA -RL 9 .1...148 Jt6v -Iei6(s097C1n60l 6):n -RN [3]; RE0002131. -RX PUBMED: 2123539. -RA ..k lM e.I . Mi , l I A, ...rm.c amtKTDS,GeA. Rr kM .ararAl.,AC -RT a redupr ren ooicisrdalnapnem nTasnbmincyst trhcdia aotfag f -RL )R8.0. d1 A9s39ucec8N:50ii-(e6s6 15l7 c -RN [4]; RE0000307. -RX PUBMED: 2721502. -RA J.rewl ..rG,Rae.s S. .AR Rtr oMDKnt a roe..eMe,u ,e.r. M n.LGIAkP .C i ,, J .iGl,lCT .. -RT nstoniie corens-evne iitstafpo DalmdvOtpoiuvtei lAsca d ariticlribnd rn dNtrpida ivgerctuntentnoiufinrsndfs nego -RL 91EO3)1.8 :99(3M. J-88B8 -RN [5]; RE0002261. -RX PUBMED: 2919169. -RA .I..R .K.r,.etlCma. rMkMaDe SI.raA, m G T, -RT epeanpncotRbemraioDenoiiiepeat pytga Afe tdar a t fr l ennnvapsyniric d tt lsbatdl etloraoon ftt.lh hiNnusci e-n -RL c.o2.2l8 c1038 .P:9S6. () irNd .1S a79Aa0-AU1ct -RN [6]; RE0000306. -RX PUBMED: 3359997. -RA h ew r C A G lY ICtro.ib . .GkaMeo .rarGj,a.STe.ie,.s,k K,. ..r,rCR.l.tCre PDtS n -RT n rmol6eenI 6tepgentf eneouftnea-e mn e hsshr1o re -RL -8E1)9.9B .:M7 58(JO2 8 -XX -// -AC R00019 -XX -ID MOUSE$ACRD_02 -XX -DT ceeei0w 2a0()906.91r.d ;.t -DT 1di;7. wad191)ut 9epe(.12. -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE AChR delta (acetylcholine receptor, delta-subunit); Gene: G000457. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; rodentia; myomorpha; muridae; murinae -XX -SQ gccgcATGCATGCATGCtttaagc. -XX -EL E1 -XX -SF -92 -ST 1-5 -XX -BF T01786; E12; Quality: 6; Species: human, Homo sapiens. -BF T01788; E47; Quality: 6; Species: human, Homo sapiens. -BF T00526; MyoD; Quality: 6; Species: human, Homo sapiens. -BF T00528; myogenin; Quality: 6; Species: human, Homo sapiens. -XX -MX M00693; V$E12_Q6 -MX M00804; V$E2A_Q2 -MX M00973; V$E2A_Q6 -MX M01034; V$EBOX_Q6_01 -MX M00929; V$MYOD_Q6_01 -MX M00712; V$MYOGENIN_Q6 -XX -SO 2C4sa. s2b0mt0y; lo -SO mrec3- )l0oi.;Eocu(03.se -SO oyt 8u0se3C;b7m.2 -XX -MM elih esfrgtti dc -MM olht rteyeicternnniefea -MM n cnynastlfoaausiil -MM teigrntraoaedl -MM tlhntotfnme reeciraieyen -XX -CC 4th]iiMrd m tna oo gobE1a s hr1te[weyehiiodnsni7erd E2enDmey r -XX -DR 8HARE )B: 6M4:M6L6.(481A137 M33MCD; -DR :169 C385BRLHM71:1M3M4 3 (AX9.E;) -DR _3RP.S1N1U_77TA RM2:OM -DR _PSM.1URM_O8AN7 2TR6: -DR P:2TO_278RN U.MS_6ARM -XX -RN [1]; RE0005844. -RX PUBMED: 8355673. -RA M neu.SSA.JnrBo.mid , . -RT at son seiaecdv eu fticgetorsleos tnsnn e-x rcadnidehinghEtiu tAandtsoganly pee bm lieni aryiorbpeecouetro me -RL 31.ei13o551)91l -(3:34.loll..9 C0M B -RN [2]; RE0001768. -RX PUBMED: 2797207. -RA ,d.J..wn .BSu TJieaBn lr d -RT -tlcepmubn ae ancigt neMkeerii nnciir gns-se f perooooelee ydcDed bss1ia ogatcsgllMrlxyti nluey -RL r2 t u9416e8(3)7109.1-Na:7 -XX -// -AC R00850 -XX -ID MOUSE$IGH_16 -XX -DT w .69)i01e.0e(2 ar.;dte9c0 -DT . de)et1(i60u 05p9d7.;aw9. -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE IgH (immunoglobulin heavy chain); Gene: G000537. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; rodentia; myomorpha; muridae; murinae -XX -SQ ATGCATGCATGC. -XX -EL n,mEurhee nca2 -XX -SF 378 -ST 913 -XX -BF T00207; E47; Quality: 6; Species: human, Homo sapiens. -BF T09514; HTF4gamma; Quality: 2; Species: human, Homo sapiens. -BF T00406; INSAF; Quality: 6; Species: hamster, Cricetulus sp. -BF T00526; MyoD; Quality: 6; Species: human, Homo sapiens. -BF T08853; myogenin; Quality: 2; Species: human, Homo sapiens. -BF T01799; Tal-1; Quality: 6; Species: human, Homo sapiens. -BF T01800; Tal-1; Quality: 6; Species: chick, Gallus gallus. -BF T00790; Tal1-PP42; Quality: 6; Species: human, Homo sapiens. -XX -MX M00698; V$HEB_Q6 -MX M00712; V$MYOGENIN_Q6 -MX M00993; V$TAL1_Q6 -XX -SO I3;N0H30T0-3. -SO JA630 B.;-B0 -SO .0ae00 HL;1 -SO ; 16S1940.8 -SO Ucl 462;2 3.0 -SO .; 42EJ40 -SO 2;0 523.4D -SO h (.r0uty3calms)n;r ee3-2 -SO me.r6;y(ls3ec0r )o- 2tseu -SO ais6r);oec0a7.oe-Eu6-b.m (dllcm -XX -MM iechte sfdlrgit -MM ott IieaspnrofNiDgn -MM itoo yep a/ioincgnmoct nervtih olinmvet -XX -RN [1]; RE0003087. -RX PUBMED: 1312219. -RA ns.H. R ul.o Kt,E.,.i sSE NO-gnon .J -RT 2 odanoendarEaibsit t h g oi aeucu-ameoF2x p e hirtHgct ci eNo yfdr,igdaoatml tt Tt relAtlEB-n rfleax-oiolyt honDhIpnyabeeiAln l -RL oi4(102eM139.10:.l1 ol9-l12.C lB )2. -RN [2]; RE0001629. -RX PUBMED: 2038315. -RA - CeaC .Js,u nH..nQ- e,B.hh.gTLerH ,.R -RT -eog ni2oxe pn tlo Ecs1o/xlton nnrci--aii hhlon eraottievpwi4ey afiithnastE1 d 7e tnh isEhl-tiitbocnoihperac -RL 92lli..1M7Bo llo34C031:1-0 () 91.3e. -RN [3]; RE0001447. -RX PUBMED: 2111447. -RA .rMmBtinaD me.AnL ore, e .oadL b r.a,Tnli -RT no ohporanillbec circG evvdplpIEne aeheil-mm oem hnyhamoIief s ecn naeEniuSlRs onynatoeeiet fCgLnHtnugSxedvooin fti-uEmn -RL 0l092 -5.i6.1.1:oleB3 93Col1M)1. (5l -RN [4]; RE0001998. -RX PUBMED: 2181401. -RA ,C okenoW. M.AW e lrPiha,Rr.A., D.nA smrek.a -RT iootca ootfsrtbct aelmtip cieeAiico aacnNu henmtoisua e grApaerr nn n vgflniDnpttuaescra fnl ecde -RL 5-1 cdcs i 1ecl(e9 A811.N.1u9:Ri60s)691 -RN [5]; RE0002608. -RX PUBMED: 3917574. -RA gr Gw,huaS EtChoee.niplriMG . c.auhAbsW r.,.T ,s -RT meuFunoepictoe iinI runo-n nnofsgmncei Iw a eCb ncoiSnVErliaflctt hre oLlaai lrhcg lvaaiBt -RL (n1)2Sc4 8 0e-71c932i5.:e41 -XX -// -AC R02116 -XX -ID AAF$CONS -XX -DT (1sr9 tF)de c4;1ee2.0aiaMs.r9ty.e1/ -DT 0 ddb5od 19.p.t(.1)3e6a9u; -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE consensus. -XX -SQ ACCTTTATTTTTCA. -XX -BF T00001; AAF; Quality: 6; Species: human, Homo sapiens. -XX -RN [1]; RE0000446. -RX PUBMED: 1901265. -RA . ri Dk.Tn l.l,.w cEer,DhoLr M.DJiJweeJc.ta k,e -RT iNtm ctcFynt,o ad-r GF-Csagrcmiatfl D-moaNgAtieinaIidnua vAebalag on ofp -RL MO9.JB21920-:3 E1(.99 )71 -XX -// -AC R02418 -XX -ID HS$MLC_01 -XX -DT 9(1;0 e2ctd1.)2 1.wie.aer9 -DT 2pe.;maua .v00d.d007 6t8)( -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE MLC (myosin light chain 1/3 locus); Gene: G000347. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -SQ ATGCATGCATGC. -XX -EL hc r'aen3en -XX -SF 214 -ST 142 -XX -BF T10006; Myf-5; Quality: 2; Species: human, Homo sapiens. -BF T00522; Myf-6; Quality: 2; Species: human, Homo sapiens. -BF T00526; MyoD; Quality: 2; Species: human, Homo sapiens. -BF T09197; MyoD; Quality: 2; Species: human, Homo sapiens. -BF T00520; Myogenin; Quality: 2; Species: human, Homo sapiens. -XX -SO 3Ece)mrsc0. lei0(-;o.3uo -SO E(m36ich;unre.0co0-. a)l -XX -MM lat ogrreditean -MM eraeettcltfioynreihmnne -XX -CC 4osydfnn]ogb gydfM3 k tta,i,by- Mb Muy nnrfiy 1-n -nfMetiow6g l,o-ti[5 -XX -RN [1]; RE0002118. -RX PUBMED: 2243772. -RA r,be.Bhl oueu tMe noBtBr.eH h nrrg,Ee-w o, ol M.o.un,eidaarNWA.EgdWtn s Bht .B R,,lHTn.nn . r.B. Do., -RT fiasnndt ityuseome ceen rr eth wh1enor3aredtico rlh ln e eom/sgeolc soyr adenoiicgsC mta n gr maovttfimatuhcLp ahMAfnul -RL 91e1dR2cA98036 N. .ies 9uicl (c:)s -XX -// -AC R02419 -XX -ID HS$MLC_02 -XX -DT tea2.)w1ce0(.i12de91;.9r -DT 2) 8.uapm0d; 06(0ea7.d.0vt -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE MLC (myosin light chain 1/3 locus); Gene: G000347. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -SQ ATGCATGCATGCATGCATGC. -XX -S1 within conserved region -SF 527 -ST 030 -XX -BF T10006; Myf-5; Quality: 2; Species: human, Homo sapiens. -BF T00526; MyoD; Quality: 2; Species: human, Homo sapiens. -BF T09197; MyoD; Quality: 2; Species: human, Homo sapiens. -BF T00520; Myogenin; Quality: 2; Species: human, Homo sapiens. -XX -SO .o3u)(;i0E3msole-re c.0c -SO E0ce03cai(hr )6;mn.l-u.o -XX -MM oattgaedierrl n -MM hre niertiatnemfoyelntec -XX -CC n) tndiknbfse[6ffn to Marly n1,- wa] edn y3o rybktMg u- nnto 4bei-y oMiag(wvo5Mgd-dat fyyi -XX -RN [1]; RE0002118. -RX PUBMED: 2243772. -RA ,a r ,gt .D. e,baMBHBlWhnweuR. .dN-EH rrBsn , r.Brtot,e,eEg .ltTooihnl oe,Mn dhAB B eorWoun.....nun -RT impidcLosdn crM uan inym o3helaug eec hAt re nmint vmieh o rineutos tawllsorcag/yare1ftdhah clse trCfanort neso mogtfee -RL 9l 8AidNss.i(1 6c.9)Ru 9c1c2:30ee -XX -// -AC R02452 -XX -ID HS$AAC_20 -XX -DT 2)911re.ede(.1.tc9iwa 2 ;3 -DT 0d.itued26)(1 ;.a003n6 dp. -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE ACTC (actin, alpha, cardiac muscle); Gene: G000193. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -RE promoter -XX -SQ ATGCATGCATGC. -XX -EL xoE-b -XX -SF -65 -ST -49 -XX -BF T00526; MyoD; Quality: 6; Species: human, Homo sapiens. -BF T06361; MyoD:E12; Quality: 1; Species: human, Homo sapiens. -BF T00528; myogenin; Quality: 3; Species: human, Homo sapiens. -XX -MX M00804; V$E2A_Q2 -MX M00973; V$E2A_Q6 -MX M01034; V$EBOX_Q6_01 -MX M00929; V$MYOD_Q6_01 -XX -SO 2;C. 2mabs0sy0tl4o -SO 23C120.04; C -SO c3ml.o()ir03cEe-0;eo s.u -SO h ci60are.ouE-c3m(n0.)l; -XX -MM asyinncalnfoaul tsi -MM adittloergraen -MM ottelhmfeigip i cnost -MM eiheeeon nrtcifeayttnlrm -MM npfiti(dgnbsiie)o tnsab uhryd -XX -CC Demnsym trsaedo lysp0re icta ap eftnct4phi-u0iet aStufyioqreuMsnsaa cl-ap scal0nc>fielhr ss2osccfo Ritnco[0unc 1m oono tu ob h lie 1doedg ctelituoRcinta]h fnipaerh6nrpi- inetw,rpsa1aRmglcaits ei<,secucfcvoiSaoF2 d7e le -XX -DR ; 3HM)27.8(E4:ATC48 41BM3SA:C3L -DR 6T3;SP:E 0P1. CHE3C_DA -DR 1SNO8_:10STP7ARAH. R -XX -RN [1]; RE0025127. -RX PUBMED: 10082523. -RA sl,iHB aea.ias arE r KmV,id.,Y. oto mSe.ilre aeLd -RT ltosol mesdahpro thattmpreSlnal-syc aioeipopaagcnountni iepfsdcieni hm nciova etx -nulmyh ica cqicle coMpmoteix oebao .1rtnr intrtroniciaterhsnfaauxpprfil ad eaet p-rticrr oo -RL lC5)o .8. ol:.5.9l l79(49217Bi9-eM21 -RN [2]; RE0000718. -RX PUBMED: 2123467. -RA . .,bKK.oltlteaL r,sV.SWei sederrAe -RT ecMsaiie gpuGeisaci-onc csn haDqybp1adrbtc ot-ea rrMCren1onceputl- e cidsixaeo reg irhafn,i osfxSlfAn,p d -RL .:18 e9.9DG)21v 81e1ne(420s-1 -XX -// -AC R03064 -XX -ID HS$GBP_02 -XX -DT e91r).ai;9. e (d23ct10w.e5 -DT a1 wi.1u).15htid(199d1p. e; -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE GBP (guanylate-binding protein); Gene: G000264. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -SQ tatcttatagtCTTTTATCTCAATTaaaccc. -XX -EL GAS -XX -SF -821 -ST -99 -XX -BF T00001; AAF; Quality: 6; Species: human, Homo sapiens. -BF T00300; GAF; Quality: 6; Species: human, Homo sapiens. -XX -SO I;e LlhF.- N0+10p1aHaa -SO .g5m;H+- FNaaILme0a4 0 -SO Ia6+ .4 m-a 0S;2FN0gmF -SO - Fs7d4 boat ;fN+ lapd0lsbaioh0iplrI.i -XX -MM diutnaseeecessloxg -XX -RN [1]; RE0000446. -RX PUBMED: 1901265. -RA aD.r er ETc J i.Jw. l,eiokce.D.e.n, L lk rwhMtJD, -RT Aa-n,ot FslFctgdo-mvCadioaaD mfnuoaeefGt ANagnaapi iirtymlntcg IrN b-ic -RL -E 917.1.MB12 (2O0J 39)99: -RN [2]; RE0001471. -RX PUBMED: 1833631. -RA EJeeD wJD. L.n, ae .T rDe.k.,clrl -RT ttihnonbeg-ifnitoeyaa il ritngtolr sb-t slarci w foa e nasiphotahc dnva deoe enntinponpn-nsaodeietapinrndncafrpo tteusgtuaoctiwtrTti e ruyagce imn dtynt -RL olC541-19l.o17.5e Bl5.1:1M 9)l ( .31i -XX -// -AC R03174 -XX -ID AAV$P5_01 -XX -DT d)4art.9 3c0w9e6.;ie1.e(1 -DT 89(dpr9ard.1e8.1da.u;t2ee0) -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE P5; Gene: G000001. -XX -OS AAV, adeno-associated virus -OC viridae; ss-DNA nonenveloped viruses; parvoviridae; dependovirus -XX -SQ CAGACGCTCTGTTGTACTGTAT. -XX -SF 6-0 -XX -BF T00915; YY1; Quality: 2; Species: human, Homo sapiens. -XX -MX M00793; V$YY1_Q6 -MX M01035; V$YY1_Q6_02 -XX -SO .0aHe100L ; -SO acu);(ire.cn0-El0mh. 36o -SO 77 P;-0Y.S82 -XX -MM rdhf ilgstcti ee -MM otNngpIiis enrt aofD -MM cetfi ohntlip iogemts -MM reymreinienn leatfteothc -XX -CC ian1idi2k]s w[[ebnet g] -XX -DR 19X01212. J270: ;: B3M)6(ELX -DR VAE1AP D:2.N7;6V2P0C_AE -XX -RN [1]; RE0006609. -RX PUBMED: 8413258. -RA oSo,y.twio My.rKHtmh.C a,cro. aK.enAkh.C P,asLt -RT i1 nttiit srpneaspiw Aftinpocarspottc telrcterersfasYYani mt cvctltpe etetpoaitmeocif s oeninlYrfop stnteDa aticN iremrhoi aclrecYurr rrnoteA: ledtT tne aetahcoiot-smnn1i -RL 33) ll.l661:9le96(B6C.-. . iM1 1oo228 -RN [2]; RE0000230. -RX PUBMED: 1655281. -RA SSnS,e.hat,ko .,L .-nhgC S YheT. . Ei -RT ena fy1laYoso rltneesI- sroo imn,anr rrp dpphnGcK e ALdrlobYriaen pfrearivpe ntl e -asnnrbi,irpieosydeuaou1 ireeutEse itTp -RL 98e363-: 77l19(Cl. )871 -XX -// -AC R19310 -XX -ID HS$TRPV6_01 -XX -DT r ..6d2e0c;0.2h540()ateec -DT 0d0;.(a6 upt 3)ee0c25hd..0 -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE Gene: G036757. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -RE distal promoter region -XX -SQ aGTCTAGGGCATAga. -XX -EL V.DER-55 -XX -SF 85-05 -ST 445-9 -XX -BF T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens. -XX -SO c ;32aC.o92-0 -SO amei-20tn(bah.r1c;u)ac 6re -SO 0.S2010;38L -XX -MM poeiinnaioctmhm)apuoPcC mhIirrt(ni t -MM stgh rfeltdeiic -MM syalc uiflnatoasnin -MM hegtipicsofimeo tltn -XX -DR 6MA3422-9215).-1:A( 6:;Y 57Y45B55L912 E -DR AS OH:P_T7A3S4.R8_NR1 -DR RA44ST81SOH7. P_NR:_A -XX -RN [1]; RE0047775. -RX PUBMED: 16574738. -RA te.k.. .,PiuKB.a e.NM e,raS. W, eJ ,dW Mym inMKvk eSi.h -RT NINCTRE2AIML IT6CYIREHT TP ONA TTMTIDOLIEANGVTIIMIENLPUPLH1SUN ABR SHTTEVEIXOPNMVTDORY N,ENRIM RA5S .ADI-T AIOBAOTTETASDIIY EVSLTDET ICC NL3MS INDD A H -RL M.lnoE.6 :).n20(lodcor0 i -XX -// -AC R19311 -XX -ID HS$TRPV6_02 -XX -DT c5a.0dr)e0ee2.5;(0.2 c th6 -DT t. h;52d)06.dp00 ac.(u30ee -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE Gene: G036757. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -RE distal promoter region -XX -SQ TACCCAGAGTAGCGT. -XX -EL .a43DRVE- -XX -SF 37-34 -ST 23-43 -XX -BF T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens. -XX -SO C2;a90 .o-2c3 -SO -cic )m2(0thbeeau1rr;6a.an -SO 0S; 0L38210. -XX -MM mtih(m)onntpcei irroPiIupotChcianm a -MM di etisfertgch l -MM tasfisnuanlnycolai -MM itihnoe tcteiosm fglp -XX -DR E5YA42067121511:1AY4: 7L262.;M ()27B -DR .R3A_ PR_TO718HS:A4SN -DR 8:RR_T.SO_47NSH 1P4AA -XX -RN [1]; RE0047775. -RX PUBMED: 16574738. -RA Ji,NBm y .,iSekM.h.K , i tM. ...kee,aue WMvWdKrSe .aP n -RT TIAPT.A3IIERA-YIO HEUACHLIMTES2NATMS XR TADPTEDOL N N OETTN1HNESM IIPT MRV S6NIIAPA5NSCTLVDIS CTGRA BOTMIDE OVNYRIDBDTL INL INMDCEA,HYLTT IRTEENT UVI IO -RL )0Mro ll:.ooiE nnc6.2(.0d -XX -// -AC R19312 -XX -ID HS$TRPV6_03 -XX -DT c5at( 6e).0crd;0. 05h2ee2. -DT 2d0da;65 (ec0..u03e0 h)t.p -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE Gene: G036757. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -RE distal promoter region -XX -SQ gyGACTAGGGGAGTg. -XX -EL R.EDb 4-3V -XX -SF 4-287 -ST 327-4 -XX -BF T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens. -XX -SO .o0-C23 9ca;2 -SO 6 rmr)n(.1baahcut;ei-c02ae -SO 810LS 032;.0 -XX -MM moci)mrmipoti tipPaa(tCorhhIi necunn -MM esli htfti grdce -MM cyl usaoalfniatisnn -MM itispftcto lei megonh -XX -DR 6)EY-2 2512114LAB(5.718 7265-472:MY ;A: -DR _8P4 .7TSORS31R:A_AHN -DR :H SAR74_AN.R_S81P4TO -XX -RN [1]; RE0047775. -RX PUBMED: 16574738. -RA MeW.Nd., Bv ie, .MKSri. ,mu. ,WeMJ yPeate.S ai Khkn.k. -RT DGNLLXNDC HOE2TU YTO3 I5VOCORNN IIADN EPI AEIVYES TRSTLUATSTCI HEDP ITDA VIADT MTTI,R MT-T CTTE IOHBIAINLVM INILNETMISTSPLA.E EYMRSNINA REA 1RANOTMIBPH6D -RL .lno:0..0oMiE2rl onc(d6) -XX -// -AC R19313 -XX -ID HS$TRPV6_04 -XX -DT 0 5002t(ea2h5..c6cree)d;. -DT d.t; 0)3h.u20.ac(6ee 5dp00 -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE Gene: G036757. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -RE distal promoter region -XX -SQ gagGCGAAGGGGTCA. -XX -EL 5.3-VERD -XX -SF 3591- -ST 5530- -XX -BF T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens. -XX -SO 90;Ca o3c.-22 -SO uretea2r).i b(acha-601mc;n -SO L 38;0S1200. -XX -MM riiam pt(paPCrtmhnucm)iihetncoo Inoi -MM scftdt rii geehl -MM cssiayn iaafnlunlot -MM ehfoemsptitcitnogli -XX -DR 7;-1L .2-425 64Y:AM21E8224Y79 9)B:A65(2 -DR APR TNO4:_H3_1SRA.S87 -DR 4R_S:ST.AA71 ON8PRH4_ -XX -RN [1]; RE0047775. -RX PUBMED: 16574738. -RA ,K Mer SuyW.eedi M., m., kiP h eti ...BakK.M aeJv.,NS Wn -RT RAL5RT NIA1PVIMATBDSEE P-HIAR.SUTERDHIN T IO,L IPATNNITNTCMSTNTSCRINDATDMIN6 IL YRTATO HTID E NO UVVLD BNM GXHOIDLEO TV TTEI32PMLEAYCIMAEASOSCEITINIYE -RL or.0 n.n.ol lE(0 :Mdio62)c -XX -// -AC R19314 -XX -ID HS$TRPV6_05 -XX -DT t 05)d;r.0c. h.ee5ea2c2(06 -DT .hp ;c0e.)d0e3t00d2(5u .a6 -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE Gene: G036757. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -RE distal promoter region -XX -SQ GTTTTGAGGCGACGT. -XX -EL 21.DER -Va -XX -SF 619-2 -ST 52-15 -XX -BF T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens. -XX -SO 2; c-0C.93oa2 -SO cie;rbm1ac6n-r0ha.e2u(ta) -SO 0L0;2.1S83 0 -XX -MM pcoitmtuinaho)nChreItriomnc (imiaP p -MM eridf ts getlihc -MM acsntanulyfl inisoa -MM iefh ntmeltioispgo tc -XX -DR 462Y9A7 22:M42515B).;A:12 Y762 1LE99( -DR S_RA:8S1_HANP73T. 4OR -DR 47.8HST_AAR4:P_ NSO1R -XX -RN [1]; RE0047775. -RX PUBMED: 16574738. -RA v i,,MNk JmW Syan.at.,e PSe. Wh. uKKreB dMi,e .Mei..k. -RT I DXNNVNLT3 DT RSOYBLRNTUANSE ITETIITM SRMN1SNRTMAILVI PDIMIETEETLP2Y.T IS HAIRIRVCA EHCTTVEAPONN TTIOD NA 5HUHD NCTOLE DIEAL6IMIAGTPDY AO MS ,TCBIO IAE- -RL :lE.c.iono 06.o0 rM) n(ld2 -XX -// -AC R19315 -XX -ID HS$TRPV6_06 -XX -DT 5de.(2 c )a.0eer5;c0t2h.60 -DT te0.0ap( 2 h;05e0.3d.c)ud6 -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE Gene: G036757. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -RE distal promoter region -XX -SQ CCCCATGCCCGACAA. -XX -EL b-D2VR 1.E -XX -SF -1912 -ST 20-15 -XX -BF T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens. -XX -SO 3cC0.ao2 92-; -SO 6riearmah)10e(;a2ccbutn.- -SO 01;8L30 2.S0 -XX -MM (cnoCrPnmtociihm oaiah pipruIitm)net -MM eiscrgh l fttide -MM ifnsaslncunaa otyil -MM fititepo nhce oigltsm -XX -DR 346E9A21.61Y:2A5;B9 43:2 4Y5(2 M721)L -DR R _R.AO7N8AP:1TS3_4HS -DR 8RS_ O1.THP:4RS4AN_A7 -XX -RN [1]; RE0047775. -RX PUBMED: 16574738. -RA u,nek,.mW ,e PMJedre viKM.a. tBWh .M S N. kSi ae .y Ki,.. -RT APITALNI SSI I3 TOTONMA6YAUT1OT2 IAS H AADNR IVDVTNOPNTRYILT IVIMVRDC PCLDXTAEDAIT-BR M E5CNLSCBS,INEDOTITHS TIINGNRHEHEI.PTODIY T RLIN ENEUM EATM TME EL -RL Er0l:o.n )i.ldM6o02con( . -XX -// -AC R19316 -XX -ID HS$TRPV6_07 -XX -DT 2)(00eh5t ;e5cr0cd.2.ae. 6 -DT a 3)p.5 cde062d.(00.0te;hu -CO Copyright (C), Biobase GmbH. -XX -TY DNA -XX -DE Gene: G036757. -XX -OS human, Homo sapiens -OC eukaryota; animalia; metazoa; chordata; vertebrata; tetrapoda; mammalia; eutheria; primates -XX -RE distal promoter region -XX -SQ ttTCAGATGACTGAt. -XX -EL E2-V.R 1D -XX -SF -3821 -ST 12-96 -XX -BF T08716; VDR:RXR-alpha; Quality: 3; Species: human, Homo sapiens. -XX -SO co;9-3 22aC.0 -SO ;cmrn a-(u1.0ea6c2t)bheiar -SO 0S02 10.;38L -XX -MM (miaiorcp ImiuiitnantChohP rm)tocpne -MM hlger ficitsetd -MM nini yasnasalocltfu -MM gtihe tpeftn cmisooli -XX -DR :A416Y41E02B( A12Y502.2;:8 61-41 LM)715-6 -DR _RA3A1S8 TH7_RPSN.O:4 -DR _P4.H T8A:A1R_S4S7RNO -XX -RN [1]; RE0047775. -RX PUBMED: 16574738. -RA mn.KWie WeS. a,M Nt,a ,K.d B.e.uP.r ,MkkiheJ . My S v.ei -RT IMTICNTDL PHIC LHBLIA OMT 6 EN TAIEV NTS E25NMT1LMISPGEVTADAIIXEAN REEAATIMNITIV P NRE ISD EO ISIO,TNORDADECO YTYONVBYT.SRRDIPURNTLLCNAMIDTAUTHH I -T3TS -RL E.rlond o(nilo): M.20.c60 -XX -// -- 2.11.4.GIT