From 6be5d5f5c01fbb11c9249fbb633ecc358ae3e0cb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Carn=C3=AB=20Draug?= Date: Thu, 13 Sep 2018 16:11:05 +0100 Subject: [PATCH] Move HMMER related modules, tests, and programs to new distribution. --- Bio/Index/Hmmer.pm | 480 ------- Bio/Search/HSP/HMMERHSP.pm | 398 ------ Bio/Search/HSP/HmmpfamHSP.pm | 361 ------ Bio/Search/Hit/HMMERHit.pm | 309 ----- Bio/Search/Hit/HmmpfamHit.pm | 377 ------ Bio/Search/Hit/hmmer3Hit.pm | 159 --- Bio/Search/Result/HMMERResult.pm | 448 ------- Bio/Search/Result/HmmpfamResult.pm | 393 ------ Bio/Search/Result/hmmer3Result.pm | 191 --- Bio/SearchIO/hmmer.pm | 159 --- Bio/SearchIO/hmmer2.pm | 1105 ---------------- Bio/SearchIO/hmmer3.pm | 1257 ------------------ Bio/SearchIO/hmmer_pull.pm | 283 ----- Bio/Tools/HMMER/Domain.pm | 339 ----- Bio/Tools/HMMER/Results.pm | 976 -------------- Bio/Tools/HMMER/Set.pm | 263 ---- Bio/Tools/Hmmpfam.pm | 228 ---- Changes | 4 + scripts/searchio/bp_hmmer_to_table.pl | 98 -- scripts/searchio/bp_parse_hmmsearch.pl | 206 --- t/LocalDB/Index/Index.t | 18 +- t/SearchIO/hmmer.t | 1712 ------------------------- t/SearchIO/hmmer_pull.t | 219 ---- t/Tools/Hmmer.t | 110 -- t/data/L77119.hmmer | 50 - t/data/cysprot1b.hmmsearch | 177 --- t/data/hmmpfam_HSPdashline.txt | 74 -- t/data/hmmpfam_cs.out | 559 -------- t/data/hmmpfam_fake.out | 55 - t/data/hmmpfam_multiresult.out | 106 -- t/data/hmmscan.out | 202 --- t/data/hmmscan_multi_domain.out | 116 -- t/data/hmmscan_qry_stop.txt | 63 - t/data/hmmscan_sec_struct.out | 140 --- t/data/hmmsearch.out | 2170 -------------------------------- t/data/hmmsearch3.out | 18 - t/data/hmmsearch3_multi.out | 211 ---- t/data/nhmmer-3.1.out | 63 - t/data/pfamOutput-bug3376.out | 38 - t/data/phmmer.out | 183 --- 40 files changed, 6 insertions(+), 14312 deletions(-) delete mode 100644 Bio/Index/Hmmer.pm delete mode 100644 Bio/Search/HSP/HMMERHSP.pm delete mode 100755 Bio/Search/HSP/HmmpfamHSP.pm delete mode 100644 Bio/Search/Hit/HMMERHit.pm delete mode 100755 Bio/Search/Hit/HmmpfamHit.pm delete mode 100644 Bio/Search/Hit/hmmer3Hit.pm delete mode 100644 Bio/Search/Result/HMMERResult.pm delete mode 100755 Bio/Search/Result/HmmpfamResult.pm delete mode 100644 Bio/Search/Result/hmmer3Result.pm delete mode 100644 Bio/SearchIO/hmmer.pm delete mode 100644 Bio/SearchIO/hmmer2.pm delete mode 100644 Bio/SearchIO/hmmer3.pm delete mode 100755 Bio/SearchIO/hmmer_pull.pm delete mode 100644 Bio/Tools/HMMER/Domain.pm delete mode 100644 Bio/Tools/HMMER/Results.pm delete mode 100644 Bio/Tools/HMMER/Set.pm delete mode 100755 Bio/Tools/Hmmpfam.pm delete mode 100644 scripts/searchio/bp_hmmer_to_table.pl delete mode 100644 scripts/searchio/bp_parse_hmmsearch.pl delete mode 100644 t/SearchIO/hmmer.t delete mode 100644 t/SearchIO/hmmer_pull.t delete mode 100644 t/Tools/Hmmer.t delete mode 100644 t/data/L77119.hmmer delete mode 100644 t/data/cysprot1b.hmmsearch delete mode 100644 t/data/hmmpfam_HSPdashline.txt delete mode 100644 t/data/hmmpfam_cs.out delete mode 100755 t/data/hmmpfam_fake.out delete mode 100644 t/data/hmmpfam_multiresult.out delete mode 100644 t/data/hmmscan.out delete mode 100644 t/data/hmmscan_multi_domain.out delete mode 100644 t/data/hmmscan_qry_stop.txt delete mode 100644 t/data/hmmscan_sec_struct.out delete mode 100755 t/data/hmmsearch.out delete mode 100644 t/data/hmmsearch3.out delete mode 100644 t/data/hmmsearch3_multi.out delete mode 100644 t/data/nhmmer-3.1.out delete mode 100644 t/data/pfamOutput-bug3376.out delete mode 100644 t/data/phmmer.out diff --git a/Bio/Index/Hmmer.pm b/Bio/Index/Hmmer.pm deleted file mode 100644 index 8c48f940e..000000000 --- a/Bio/Index/Hmmer.pm +++ /dev/null @@ -1,480 +0,0 @@ -# -# BioPerl module for Bio::Index::Hmmer -# -# Please direct questions and support issues to -# -# Cared for by Josh Lauricha -# -# Copyright Josh Lauricha -# Unless otherwise noted, this was shamelessly ripped from -# Bio::Index::Blast -# -# You may distribute this module under the terms of perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Index::Hmmer - indexes HMMER reports and supports retreival based on query - -=head1 SYNOPSIS - - # Complete Code for indexing a set of report files - #!/usr/bin/perl -w - use strict; - use Bio::Index::Hmmer; - my $indexfile = shift; - my $index = Bio::Index::Hmmer->new( - -filename => $indexfile, - -write_flag => 1 - ); - $index->make_index(@ARGV); - - - # Complete code for fetching a report - use strict; - use Bio::Index::Hmmer; - my $indexfile = shift; - my $index = Bio::Index::Hmmer->new( - -filename => $indexfile, - -write_flag => 0 - ); - - foreach my $id (@ARGV) { - my $report = $index->fetch_report($id); - print "Query: ", $report->query_name(), "\n"; - while( my $hit = $report->next_hit() ) { - print "\tHit Name: ", $hit->name(), "\n"; - while( my $hsp = $hit->next_domain() ) { - print "\t\tE-Value: ", $hsp->evalue(), "\n"; - } - } - } - -=head1 DESCRIPTION - -This object allows one to build an index on a HMMER file (or files) -and provide quick access to the HMMER report for that accession. -For best results 'use strict'. - -You can also set or customize the unique key used to retrieve by -writing your own function and calling the id_parser() method. -For example: - - $inx->id_parser(\&get_id); - # make the index - $inx->make_index($file_name); - - # here is where the retrieval key is specified - sub get_id { - my $line = shift; - $line =~ /^KW\s+([A-Z]+)/i; - $1; - } - - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Josh Lauricha - -Email laurichj@bioinfo.ucr.edu - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Index::Hmmer; -use strict; - -use Bio::SearchIO; -use IO::String; -use Bio::Root::Version; - -use base qw(Bio::Index::Abstract Bio::Root::Root); - -sub _version -{ - return ${Bio::Root::Version::VERSION}; -} - -=head2 new - - Usage : $index = Bio::Index::Hmmer->new( - -filename => $dbm_file, - -write_flag => 0, - -dbm_package => 'DB_File', - -verbose => 0 - ); - Function: Returns a new index object. If filename is - specified, then open_dbm() is immediately called. - Returns : A new index object - Args : -filename The name of the dbm index file. - -write_flag TRUE if write access to the dbm file is - needed. - -dbm_package The Perl dbm module to use for the - index. - -verbose Print debugging output to STDERR if - TRUE. - -=cut - -sub new -{ - my($class, @args) = @_; - my $self = $class->SUPER::new(@args); -} - -=head2 Bio::Index::Hmmer implemented methods - -=cut - -=head2 fetch_report - - Title : fetch_report - Usage : my $report = $idx->fetch_report($id); - Function: Returns a Bio::Search::Result::HMMERResult report object - for a specific HMMER report - Returns : Bio::Search::Result::HMMERResult - Args : valid id - -=cut - -sub fetch_report -{ - my ($self, $id) = @_; - my (@header, @data, $line); - my $fh = $self->get_stream($id); - my $pos = tell($fh); - - seek($fh, 0, 0); # The HMMER SearchIO wants the header, so we fetch it - while($line = <$fh>) { - push @header, $line; - last if $line =~ /Query sequence:/o; - } - seek($fh, $pos, 0); - - # Then the data - while(<$fh>) { - push @data, $_ if defined; - last if m{//}o; - } - - # Then join them and send - my $rfh = IO::String->new(join('', @header, @data)); - my $report = Bio::SearchIO->new( - -noclose => 1, - -format => 'hmmer', - -fh => $rfh - ); - return $report->next_result(); -} - -# shamelessly stolen from Bio::Index::Fasta - -=head2 id_parser - - Title : id_parser - Usage : $index->id_parser( CODE ) - Function: Stores or returns the code used by record_id to - parse the ID for record from a string. Useful - for (for instance) specifying a different - parser for different flavours of blast dbs. - Returns \&default_id_parser (see below) if not - set. If you supply your own id_parser - subroutine, then it should expect a fasta - description line. An entry will be added to - the index for each string in the list returned. - Example : $index->id_parser( \&my_id_parser ) - Returns : ref to CODE if called without arguments - Args : CODE - -=cut - -sub id_parser -{ - my( $self, $code ) =@_; - - if ($code) { - $self->{'_id_parser'} = $code; - } - return $self->{'_id_parser'} || \&default_id_parser; -} - -=head2 default_id_parser - - Title : default_id_parser - Usage : $id = default_id_parser( $header ) - Function: The default Blast Query ID parser for Bio::Index::Blast.pm - Returns $1 from applying the regexp /^>\s*(\S+)/ - to $header. - Returns : ID string - Args : a header line string - -=cut - -sub default_id_parser -{ - if ($_[0] =~ /^\s*(\S+)/) { - return $1; - } else { - return; - } -} - -=head2 Require methods from Bio::Index::Abstract - -=cut - -=head2 _index_file - - Title : _index_file - Usage : $index->_index_file( $file_name, $i ) - Function: Specialist function to index HMMER report file(s). - Is provided with a filename and an integer - by make_index in its SUPER class. - Example : - Returns : - Args : - -=cut - - -sub _index_file { - my($self, $file, $i) = @_; - my($begin); - - open my $HMMER, '<', $file or $self->throw("Could not read file '$file': $!"); - - my $id; - my $indexpoint = 0; - - while(<$HMMER>) { - if( /Query sequence: ([^\s]+)/o ) { - $indexpoint = tell($HMMER); - foreach my $id ($self->id_parser()->($1)) { - print "id is $id, begin is $indexpoint\n" if $self->verbose() > 0; - $self->add_record($id, $i, $indexpoint); - } - } - } - close $HMMER; - return 1; -} - -=head2 Bio::Index::Abstract methods - -=cut - -=head2 filename - - Title : filename - Usage : $value = $self->filename(); - $self->filename($value); - Function: Gets or sets the name of the dbm index file. - Returns : The current value of filename - Args : Value of filename if setting, or none if - getting the value. - -=head2 write_flag - - Title : write_flag - Usage : $value = $self->write_flag(); - $self->write_flag($value); - Function: Gets or sets the value of write_flag, which - is whether the dbm file should be opened with - write access. - Returns : The current value of write_flag (default 0) - Args : Value of write_flag if setting, or none if - getting the value. - -=head2 dbm_package - - Usage : $value = $self->dbm_package(); - $self->dbm_package($value); - - Function: Gets or sets the name of the Perl dbm module used. - If the value is unset, then it returns the value of - the package variable $USE_DBM_TYPE or if that is - unset, then it chooses the best available dbm type, - choosing 'DB_File' in preference to 'SDBM_File'. - Bio::Abstract::Index may work with other dbm file - types. - - Returns : The current value of dbm_package - Args : Value of dbm_package if setting, or none if - getting the value. - - -=head2 get_stream - - Title : get_stream - Usage : $stream = $index->get_stream( $id ); - Function: Returns a file handle with the file pointer - at the approprite place - - This provides for a way to get the actual - file contents and not an object - - WARNING: you must parse the record deliminter - *yourself*. Abstract won't do this for you - So this code - - $fh = $index->get_stream($myid); - while( <$fh> ) { - # do something - } - will parse the entire file if you don't put in - a last statement in, like - - while( <$fh> ) { - /^\/\// && last; # end of record - # do something - } - - Returns : A filehandle object - Args : string represents the accession number - Notes : This method should not be used without forethought - - -=head2 open_dbm - - Usage : $index->open_dbm() - Function: Opens the dbm file associated with the index - object. Write access is only given if explicitly - asked for by calling new(-write => 1) or having set - the write_flag(1) on the index object. The type of - dbm file opened is that returned by dbm_package(). - The name of the file to be is opened is obtained by - calling the filename() method. - - Example : $index->_open_dbm() - Returns : 1 on success - - -=head2 _version - - Title : _version - Usage : $type = $index->_version() - Function: Returns a string which identifes the version of an - index module. Used to permanently identify an index - file as having been created by a particular version - of the index module. Must be provided by the sub class - Example : - Returns : - Args : none - -=head2 _filename - - Title : _filename - Usage : $index->_filename( FILE INT ) - Function: Indexes the file - Example : - Returns : - Args : - -=head2 _file_handle - - Title : _file_handle - Usage : $fh = $index->_file_handle( INT ) - Function: Returns an open filehandle for the file - index INT. On opening a new filehandle it - caches it in the @{$index->_filehandle} array. - If the requested filehandle is already open, - it simply returns it from the array. - Example : $fist_file_indexed = $index->_file_handle( 0 ); - Returns : ref to a filehandle - Args : INT - -=head2 _file_count - - Title : _file_count - Usage : $index->_file_count( INT ) - Function: Used by the index building sub in a sub class to - track the number of files indexed. Sets or gets - the number of files indexed when called with or - without an argument. - Example : - Returns : INT - Args : INT - - -=head2 add_record - - Title : add_record - Usage : $index->add_record( $id, @stuff ); - Function: Calls pack_record on @stuff, and adds the result - of pack_record to the index database under key $id. - If $id is a reference to an array, then a new entry - is added under a key corresponding to each element - of the array. - Example : $index->add_record( $id, $fileNumber, $begin, $end ) - Returns : TRUE on success or FALSE on failure - Args : ID LIST - -=head2 pack_record - - Title : pack_record - Usage : $packed_string = $index->pack_record( LIST ) - Function: Packs an array of scalars into a single string - joined by ASCII 034 (which is unlikely to be used - in any of the strings), and returns it. - Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end ) - Returns : STRING or undef - Args : LIST - -=head2 unpack_record - - Title : unpack_record - Usage : $index->unpack_record( STRING ) - Function: Splits the sting provided into an array, - splitting on ASCII 034. - Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} ) - Returns : A 3 element ARRAY - Args : STRING containing ASCII 034 - -=head2 DESTROY - - Title : DESTROY - Usage : Called automatically when index goes out of scope - Function: Closes connection to database and handles to - sequence files - Returns : NEVER - Args : NONE - - -=cut - -1; diff --git a/Bio/Search/HSP/HMMERHSP.pm b/Bio/Search/HSP/HMMERHSP.pm deleted file mode 100644 index 9eead3472..000000000 --- a/Bio/Search/HSP/HMMERHSP.pm +++ /dev/null @@ -1,398 +0,0 @@ -# -# BioPerl module for Bio::Search::HSP::HMMERHSP -# -# Please direct questions and support issues to -# -# Cared for by Jason Stajich -# -# Copyright Jason Stajich -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Search::HSP::HMMERHSP - A HSP object for HMMER results - -=head1 SYNOPSIS - - use Bio::Search::HSP::HMMERHSP; - # use it just like a Bio::Search::HSP::GenericHSP object - -=head1 DESCRIPTION - -This object is a specialization of L. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Jason Stajich - -Email jason-at-bioperl.org - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Search::HSP::HMMERHSP; -use strict; - -use base qw(Bio::Search::HSP::GenericHSP); - -=head2 new - - Title : new - Usage : my $obj = Bio::Search::HSP::HMMERHSP->new(); - Function: Builds a new Bio::Search::HSP::HMMERHSP object - Returns : Bio::Search::HSP::HMMERHSP - Args : - -Plus Bio::Search::HSP::GenericHSP methods - - -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc) - -evalue => evalue - -pvalue => pvalue - -bits => bit value for HSP - -score => score value for HSP (typically z-score but depends on - analysis) - -hsp_length => Length of the HSP (including gaps) - -identical => # of residues that that matched identically - -conserved => # of residues that matched conservatively - (only protein comparisons - - conserved == identical in nucleotide comparisons) - -hsp_gaps => # of gaps in the HSP - -query_gaps => # of gaps in the query in the alignment - -hit_gaps => # of gaps in the subject in the alignment - -query_name => HSP Query sequence name (if available) - -query_start => HSP Query start (in original query sequence coords) - -query_end => HSP Query end (in original query sequence coords) - -hit_name => HSP Hit sequence name (if available) - -hit_start => HSP Hit start (in original hit sequence coords) - -hit_end => HSP Hit end (in original hit sequence coords) - -hit_length => total length of the hit sequence - -query_length => total length of the query sequence - -query_seq => query sequence portion of the HSP - -hit_seq => hit sequence portion of the HSP - -homology_seq => homology sequence for the HSP - -hit_frame => hit frame (only if hit is translated protein) - -query_frame => query frame (only if query is translated protein) - -=cut - -=head2 Bio::Search::HSP::HSPI methods - -Implementation of Bio::Search::HSP::HSPI methods follow - -=head2 algorithm - - Title : algorithm - Usage : my $r_type = $hsp->algorithm - Function: Obtain the name of the algorithm used to obtain the HSP - Returns : string (e.g., BLASTP) - Args : [optional] scalar string to set value - -=cut - -=head2 pvalue - - Title : pvalue - Usage : my $pvalue = $hsp->pvalue(); - Function: Returns the P-value for this HSP or undef - Returns : float or exponential (2e-10) - P-value is not defined with NCBI Blast2 reports. - Args : [optional] numeric to set value - -=cut - -=head2 evalue - - Title : evalue - Usage : my $evalue = $hsp->evalue(); - Function: Returns the e-value for this HSP - Returns : float or exponential (2e-10) - Args : [optional] numeric to set value - -=cut - -=head2 frac_identical - - Title : frac_identical - Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] ); - Function: Returns the fraction of identitical positions for this HSP - Returns : Float in range 0.0 -> 1.0 - Args : arg 1: 'query' = num identical / length of query seq (without gaps) - 'hit' = num identical / length of hit seq (without gaps) - 'total' = num identical / length of alignment (with gaps) - default = 'total' - arg 2: [optional] frac identical value to set for the type requested - -=cut - -=head2 frac_conserved - - Title : frac_conserved - Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] ); - Function : Returns the fraction of conserved positions for this HSP. - This is the fraction of symbols in the alignment with a - positive score. - Returns : Float in range 0.0 -> 1.0 - Args : arg 1: 'query' = num conserved / length of query seq (without gaps) - 'hit' = num conserved / length of hit seq (without gaps) - 'total' = num conserved / length of alignment (with gaps) - default = 'total' - arg 2: [optional] frac conserved value to set for the type requested - -=cut - -=head2 gaps - - Title : gaps - Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] ); - Function : Get the number of gaps in the query, hit, or total alignment. - Returns : Integer, number of gaps or 0 if none - Args : arg 1: 'query' = num gaps in query seq - 'hit' = num gaps in hit seq - 'total' = num gaps in whole alignment - default = 'total' - arg 2: [optional] integer gap value to set for the type requested - -=cut - -=head2 query_string - - Title : query_string - Usage : my $qseq = $hsp->query_string; - Function: Retrieves the query sequence of this HSP as a string - Returns : string - Args : [optional] string to set for query sequence - - -=cut - -=head2 hit_string - - Title : hit_string - Usage : my $hseq = $hsp->hit_string; - Function: Retrieves the hit sequence of this HSP as a string - Returns : string - Args : [optional] string to set for hit sequence - - -=cut - - -=head2 homology_string - - Title : homology_string - Usage : my $homo_string = $hsp->homology_string; - Function: Retrieves the homology sequence for this HSP as a string. - : The homology sequence is the string of symbols in between the - : query and hit sequences in the alignment indicating the degree - : of conservation (e.g., identical, similar, not similar). - Returns : string - Args : [optional] string to set for homology sequence - -=cut - -=head2 length - - Title : length - Usage : my $len = $hsp->length( ['query'|'hit'|'total'] ); - Function : Returns the length of the query or hit in the alignment - (without gaps) - or the aggregate length of the HSP (including gaps; - this may be greater than either hit or query ) - Returns : integer - Args : arg 1: 'query' = length of query seq (without gaps) - 'hit' = length of hit seq (without gaps) - 'total' = length of alignment (with gaps) - default = 'total' - arg 2: [optional] integer length value to set for specific type - -=cut - -=head2 percent_identity - - Title : percent_identity - Usage : my $percentid = $hsp->percent_identity() - Function: Returns the calculated percent identity for an HSP - Returns : floating point between 0 and 100 - Args : none - - -=cut - - -=head2 frame - - Title : frame - Usage : my ($qframe, $hframe) = $hsp->frame('list',$queryframe,$subjectframe) - Function: Set the Frame for both query and subject and insure that - they agree. - This overrides the frame() method implementation in - FeaturePair. - Returns : array of query and subject frame if return type wants an array - or query frame if defined or subject frame if not defined - Args : 'hit' or 'subject' or 'sbjct' to retrieve the frame of the subject (default) - 'query' to retrieve the query frame - 'list' or 'array' to retrieve both query and hit frames together - Note : Frames are stored in the GFF way (0-2) not 1-3 - as they are in BLAST (negative frames are deduced by checking - the strand of the query or hit) - -=cut - -=head2 get_aln - - Title : get_aln - Usage : my $aln = $hsp->gel_aln - Function: Returns a Bio::SimpleAlign representing the HSP alignment - Returns : Bio::SimpleAlign - Args : none - -=cut - -sub get_aln { - my ($self) = shift; - $self->warn("Inappropriate to build a Bio::SimpleAlign from a HMMER HSP object"); - return; -} - -=head2 num_conserved - - Title : num_conserved - Usage : $obj->num_conserved($newval) - Function: returns the number of conserved residues in the alignment - Returns : inetger - Args : integer (optional) - - -=cut - -=head2 num_identical - - Title : num_identical - Usage : $obj->num_identical($newval) - Function: returns the number of identical residues in the alignment - Returns : integer - Args : integer (optional) - - -=cut - -=head2 seq_inds - - Title : seq_inds - Purpose : Get a list of residue positions (indices) for all identical - : or conserved residues in the query or sbjct sequence. - Example : @s_ind = $hsp->seq_inds('query', 'identical'); - : @h_ind = $hsp->seq_inds('hit', 'conserved'); - : @h_ind = $hsp->seq_inds('hit', 'conserved', 1); - Returns : List of integers - : May include ranges if collapse is true. - Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query) - : ('sbjct' is synonymous with 'hit') - : class = 'identical' or 'conserved' or 'nomatch' or 'gap' - : (default = identical) - : (can be shortened to 'id' or 'cons') - : - : collapse = boolean, if true, consecutive positions are merged - : using a range notation, e.g., "1 2 3 4 5 7 9 10 11" - : collapses to "1-5 7 9-11". This is useful for - : consolidating long lists. Default = no collapse. - Throws : n/a. - Comments : - -See Also : L, -L - -=cut - -=head2 Inherited from Bio::SeqFeature::SimilarityPair - -These methods come from Bio::SeqFeature::SimilarityPair - -=head2 query - - Title : query - Usage : my $query = $hsp->query - Function: Returns a SeqFeature representing the query in the HSP - Returns : Bio::SeqFeature::Similarity - Args : [optional] new value to set - - -=head2 hit - - Title : hit - Usage : my $hit = $hsp->hit - Function: Returns a SeqFeature representing the hit in the HSP - Returns : Bio::SeqFeature::Similarity - Args : [optional] new value to set - - -=head2 significance - - Title : significance - Usage : $evalue = $obj->significance(); - $obj->significance($evalue); - Function: Get/Set the significance value - Returns : numeric - Args : [optional] new value to set - - -=head2 score - - Title : score - Usage : my $score = $hsp->score(); - Function: Returns the score for this HSP or undef - Returns : numeric - Args : [optional] numeric to set value - -=cut - -=head2 bits - - Title : bits - Usage : my $bits = $hsp->bits(); - Function: Returns the bit value for this HSP or undef - Returns : numeric - Args : none - -=cut - -sub bits { return 0; } - -1; diff --git a/Bio/Search/HSP/HmmpfamHSP.pm b/Bio/Search/HSP/HmmpfamHSP.pm deleted file mode 100755 index bab8cb957..000000000 --- a/Bio/Search/HSP/HmmpfamHSP.pm +++ /dev/null @@ -1,361 +0,0 @@ -# -# BioPerl module for Bio::Search::HSP::HmmpfamHSP -# -# Please direct questions and support issues to -# -# Cared for by Sendu Bala -# -# Copyright Sendu Bala -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Search::HSP::HmmpfamHSP - A parser and HSP object for hmmpfam hsps - -=head1 SYNOPSIS - - # generally we use Bio::SearchIO to build these objects - use Bio::SearchIO; - my $in = Bio::SearchIO->new(-format => 'hmmer_pull', - -file => 'result.hmmer'); - - while (my $result = $in->next_result) { - while (my $hit = $result->next_hit) { - print $hit->name, "\n"; - print $hit->score, "\n"; - print $hit->significance, "\n"; - - while (my $hsp = $hit->next_hsp) { - # process HSPI objects - } - } - } - -=head1 DESCRIPTION - -This object implements a parser for hmmpfam hsp output, a program in the HMMER -package. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Sendu Bala - -Email bix@sendu.me.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Search::HSP::HmmpfamHSP; - -use strict; -use base qw(Bio::Search::HSP::PullHSPI); - -=head2 new - - Title : new - Usage : my $obj = Bio::Search::HSP::HmmpfamHSP->new(); - Function: Builds a new Bio::Search::HSP::HmmpfamHSP object. - Returns : Bio::Search::HSP::HmmpfamHSP - Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent) - -parent => Bio::PullParserI object (required if no -chunk) - -hsp_data => array ref with [rank query_start query_end hit_start - hit_end score evalue] - - where the array ref provided to -chunk contains an IO object - for a filehandle to something representing the raw data of the - hsp, and $start and $end define the tell() position within the - filehandle that the hsp data starts and ends (optional; defaults - to start and end of the entire thing described by the filehandle) - -=cut - -sub new { - my ($class, @args) = @_; - my $self = $class->SUPER::new(@args); - - $self->_setup(@args); - - my $fields = $self->_fields; - foreach my $field (qw( alignment )) { - $fields->{$field} = undef; - } - - my $hsp_data = $self->_raw_hsp_data; - if ($hsp_data && ref($hsp_data) eq 'ARRAY') { - my @hsp_data = @{$hsp_data}; # don't alter the reference - foreach my $field (qw(rank query_start query_end hit_start hit_end score evalue)) { - $fields->{$field} = shift(@hsp_data); - } - } - - $self->_dependencies( { ( query_string => 'alignment', - hit_string => 'alignment', - homology_string => 'alignment', - hit_identical_inds => 'seq_inds', - hit_conserved_inds => 'seq_inds', - hit_nomatch_inds => 'seq_inds', - hit_gap_inds => 'seq_inds', - query_identical_inds => 'seq_inds', - query_conserved_inds => 'seq_inds', - query_nomatch_inds => 'seq_inds', - query_gap_inds => 'seq_inds' ) } ); - - return $self; -} - -# -# PullParserI discovery methods so we can answer all HitI questions -# - -sub _discover_alignment { - my $self = shift; - my $alignments_hash = $self->get_field('alignments'); - - my $identifier = $self->get_field('name').'~~~~'.$self->get_field('rank'); - - while (! defined $alignments_hash->{$identifier}) { - last unless $self->parent->parent->_next_alignment; - } - my $alignment = $alignments_hash->{$identifier}; - - if ($alignment) { - # work out query, hit and homology strings, and some stats - # (quicker to do this all at once instead of each method working on - # $alignment string itself) - - my ($query_string, $hit_string, $homology_string); - while ($alignment =~ /\s+(\S+)\n\s+(\S.+)\n\s+\S+\s+\d+\s+(\S+)\s+\d/gm) { - my $hi = $1; - my $ho = $2; - $query_string .= $3; - - $hi =~ s/\*\-\>//; - $ho = ' 'x(length($hi) - length($ho)).$ho; - $hi =~ s/\<\-\*//; - - $hit_string .= $hi; - $homology_string .= $ho; - } - - $self->_fields->{query_string} = $query_string; - $self->_fields->{hit_string} = $hit_string; - $homology_string =~ s/ $//; - $self->_fields->{homology_string} = $homology_string; - - ($self->{_query_gaps}) = $query_string =~ tr/-//; - ($self->{_hit_gaps}) = $hit_string =~ tr/.//; - ($self->{_total_gaps}) = $self->{_query_gaps} + $self->{_hit_gaps}; - } - - $self->_fields->{alignment} = 1; # stop this method being called again -} - -# seq_inds related methods, all just need seq_inds field to have been gotten -sub _discover_seq_inds { - my $self = shift; - my ($seqString, $qseq, $sseq) = ( $self->get_field('homology_string'), - $self->get_field('query_string'), - $self->get_field('hit_string') ); - - # (code largely lifted from GenericHSP) - - # Using hashes to avoid saving duplicate residue numbers. - my %identicalList_query = (); - my %identicalList_sbjct = (); - my %conservedList_query = (); - my %conservedList_sbjct = (); - my @gapList_query = (); - my @gapList_sbjct = (); - my %nomatchList_query = (); - my %nomatchList_sbjct = (); - - my $resCount_query = $self->get_field('query_end'); - my $resCount_sbjct = $self->get_field('hit_end'); - - my ($mchar, $schar, $qchar); - while ($mchar = chop($seqString) ) { - ($qchar, $schar) = (chop($qseq), chop($sseq)); - - if ($mchar eq '+' || $mchar eq '.' || $mchar eq ':') { - $conservedList_query{ $resCount_query } = 1; - $conservedList_sbjct{ $resCount_sbjct } = 1; - } - elsif ($mchar eq ' ') { - $nomatchList_query{ $resCount_query } = 1; - $nomatchList_sbjct{ $resCount_sbjct } = 1; - } - else { - $identicalList_query{ $resCount_query } = 1; - $identicalList_sbjct{ $resCount_sbjct } = 1; - } - - if ($qchar eq '-') { - push(@gapList_query, $resCount_query); - } - else { - $resCount_query -= 1; - } - if ($schar eq '.') { - push(@gapList_sbjct, $resCount_sbjct); - } - else { - $resCount_sbjct -= 1; - } - } - - my $fields = $self->_fields; - $fields->{hit_identical_inds} = [ sort { $a <=> $b } keys %identicalList_sbjct ]; - $fields->{hit_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_sbjct ]; - $fields->{hit_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_sbjct ]; - $fields->{hit_gap_inds} = [ reverse @gapList_sbjct ]; - $fields->{query_identical_inds} = [ sort { $a <=> $b } keys %identicalList_query ]; - $fields->{query_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_query ]; - $fields->{query_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_query ]; - $fields->{query_gap_inds} = [ reverse @gapList_query ]; - - $fields->{seq_inds} = 1; -} - -=head2 query - - Title : query - Usage : my $query = $hsp->query - Function: Returns a SeqFeature representing the query in the HSP - Returns : L - Args : none - -=cut - -sub query { - my $self = shift; - unless ($self->{_created_query}) { - $self->SUPER::query( new Bio::SeqFeature::Similarity - ('-primary' => $self->primary_tag, - '-start' => $self->get_field('query_start'), - '-end' => $self->get_field('query_end'), - '-expect' => $self->get_field('evalue'), - '-score' => $self->get_field('score'), - '-strand' => 1, - '-seq_id' => $self->get_field('query_name'), - #'-seqlength'=> $self->get_field('query_length'), (not known) - '-source' => $self->get_field('algorithm'), - '-seqdesc' => $self->get_field('query_description') - ) ); - $self->{_created_query} = 1; - } - return $self->SUPER::query(@_); -} - -=head2 hit - - Title : hit - Usage : my $hit = $hsp->hit - Function: Returns a SeqFeature representing the hit in the HSP - Returns : L - Args : [optional] new value to set - -=cut - -sub hit { - my $self = shift; - unless ($self->{_created_hit}) { - # the full length isn't always known (given in the report), but don't - # warn about the missing info all the time - my $verbose = $self->parent->parent->parent->verbose; - $self->parent->parent->parent->verbose(-1); - my $seq_length = $self->get_field('length'); - $self->parent->parent->parent->verbose($verbose); - - $self->SUPER::hit( new Bio::SeqFeature::Similarity - ('-primary' => $self->primary_tag, - '-start' => $self->get_field('hit_start'), - '-end' => $self->get_field('hit_end'), - '-expect' => $self->get_field('evalue'), - '-score' => $self->get_field('score'), - '-strand' => 1, - '-seq_id' => $self->get_field('name'), - $seq_length ? ('-seqlength' => $seq_length) : (), - '-source' => $self->get_field('algorithm'), - '-seqdesc' => $self->get_field('description') - ) ); - $self->{_created_hit} = 1; - } - return $self->SUPER::hit(@_); -} - -=head2 gaps - - Title : gaps - Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] ); - Function : Get the number of gaps in the query, hit, or total alignment. - Returns : Integer, number of gaps or 0 if none - Args : 'query' = num conserved / length of query seq (without gaps) - 'hit' = num conserved / length of hit seq (without gaps) - 'total' = num conserved / length of alignment (with gaps) - default = 'total' - -=cut - -sub gaps { - my ($self, $type) = @_; - - $type = lc $type if defined $type; - $type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/); - $type = 'hit' if $type =~ /sbjct|subject/; - - $self->get_field('alignment'); # make sure gaps have been calculated - - return $self->{'_'.$type.'_gaps'}; -} - -=head2 pvalue - - Title : pvalue - Usage : my $pvalue = $hsp->pvalue(); - Function: Returns the P-value for this HSP - Returns : undef (Hmmpfam reports do not have p-values) - Args : none - -=cut - -# noop -sub pvalue { } - -1; diff --git a/Bio/Search/Hit/HMMERHit.pm b/Bio/Search/Hit/HMMERHit.pm deleted file mode 100644 index d16e4a784..000000000 --- a/Bio/Search/Hit/HMMERHit.pm +++ /dev/null @@ -1,309 +0,0 @@ -# -# BioPerl module for Bio::Search::Hit::HMMERHit -# -# Please direct questions and support issues to -# -# Cared for by Jason Stajich -# -# Copyright Jason Stajich -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Search::Hit::HMMERHit - A Hit module for HMMER hits - -=head1 SYNOPSIS - - use Bio::Search::Hit::HMMERHit; - my $hit = Bio::Search::Hit::HMMERHit->new(); - # use it in the same way as Bio::Search::Hit::GenericHit - -=head1 DESCRIPTION - -This is a specialization of L. There -are a few news methods L and L. Note that -L and L make no sense for this object and will -return 0. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Jason Stajich - -Email jason@bioperl.org - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::Search::Hit::HMMERHit; -use strict; - - -use base qw(Bio::Search::Hit::GenericHit); - -=head2 new - - Title : new - Usage : my $obj = Bio::Search::Hit::HMMERHit->new(); - Function: Builds a new Bio::Search::Hit::HMMERHit object - Returns : Bio::Search::Hit::HMMERHit - Args : - - Plus the Bio::Search::Hit::GenericHit inherited params - -name => Name of Hit (required) - -description => Description (optional) - -accession => Accession number (optional) - -length => Length of the Hit (optional) - -score => Raw Score for the Hit (optional) - -significance => Significance value for the Hit (optional) - -algorithm => Algorithm used (BLASTP, FASTX, etc...) - -hsps => Array ref of HSPs for this Hit. - - -=cut - - -=head2 next_domain - - Title : next_domain - Usage : my $domain = $hit->next_domain(); - Function: An alias for L, this will return the next HSP - Returns : L object - Args : none - - -=cut - -sub next_domain{ shift->next_hsp } - -=head2 domains - - Title : domains - Usage : my @domains = $hit->domains(); - Function: An alias for L, this will return the full list of hsps - Returns : array of L objects - Args : none - - -=cut - -sub domains{ shift->hsps() } - - -=head2 inherited Bio::Search::Hit::GenericHit methods - -=cut - -=head2 add_hsp - - Title : add_hsp - Usage : $hit->add_hsp($hsp) - Function: Add a HSP to the collection of HSPs for a Hit - Returns : number of HSPs in the Hit - Args : Bio::Search::HSP::HSPI object - - -=cut - -=head2 Bio::Search::Hit::HitI methods - -=cut - -=head2 name - - Title : name - Usage : $hit_name = $hit->name(); - Function: returns the name of the Hit sequence - Returns : a scalar string - Args : [optional] scalar string to set the name - -=cut - -=head2 accession - - Title : accession - Usage : $acc = $hit->accession(); - Function: Retrieve the accession (if available) for the hit - Returns : a scalar string (empty string if not set) - Args : none - -=cut - -=head2 description - - Title : description - Usage : $desc = $hit->description(); - Function: Retrieve the description for the hit - Returns : a scalar string - Args : [optional] scalar string to set the description - -=cut - -=head2 length - - Title : length - Usage : my $len = $hit->length - Function: Returns the length of the hit - Returns : integer - Args : [optional] integer to set the length - -=cut - -=head2 algorithm - - Title : algorithm - Usage : $alg = $hit->algorithm(); - Function: Gets the algorithm specification that was used to obtain the hit - For BLAST, the algorithm denotes what type of sequence was aligned - against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated - dna-prt, TBLASTN prt-translated dna, TBLASTX translated - dna-translated dna). - Returns : a scalar string - Args : [optional] scalar string to set the algorithm - -=cut - -=head2 raw_score - - Title : raw_score - Usage : $score = $hit->raw_score(); - Function: Gets the "raw score" generated by the algorithm. What - this score is exactly will vary from algorithm to algorithm, - returning undef if unavailable. - Returns : a scalar value - Args : [optional] scalar value to set the raw score - -=cut - -=head2 significance - - Title : significance - Usage : $significance = $hit->significance(); - Function: Used to obtain the E or P value of a hit, i.e. the probability that - this particular hit was obtained purely by random chance. If - information is not available (nor calculatable from other - information sources), return undef. - Returns : a scalar value or undef if unavailable - Args : [optional] scalar value to set the significance - -=cut - -=head2 bits - - Usage : $hit_object->bits(); - Purpose : Gets the bit score of the best HSP for the current hit. - Example : $bits = $hit_object->bits(); - Returns : Integer or undef if bit score is not set - Argument : n/a - -See Also : L - -=cut - -sub bits { return 0 } - -=head2 next_hsp - - Title : next_hsp - Usage : while( $hsp = $obj->next_hsp()) { ... } - Function : Returns the next available High Scoring Pair - Example : - Returns : Bio::Search::HSP::HSPI object or null if finished - Args : none - -=cut - -=head2 hsps - - Usage : $hit_object->hsps(); - Purpose : Get a list containing all HSP objects. - : Get the numbers of HSPs for the current hit. - Example : @hsps = $hit_object->hsps(); - : $num = $hit_object->hsps(); # alternatively, use num_hsps() - Returns : Array context : list of Bio::Search::HSP::BlastHSP.pm objects. - : Scalar context: integer (number of HSPs). - : (Equivalent to num_hsps()). - Argument : n/a. Relies on wantarray - Throws : Exception if the HSPs have not been collected. - -See Also : L, L - -=cut - -=head2 num_hsps - - Usage : $hit_object->num_hsps(); - Purpose : Get the number of HSPs for the present Blast hit. - Example : $nhsps = $hit_object->num_hsps(); - Returns : Integer - Argument : n/a - Throws : Exception if the HSPs have not been collected. - -See Also : L - -=cut - -=head2 rewind - - Title : rewind - Usage : $hit->rewind; - Function: Allow one to reset the HSP iteration to the beginning - Since this is an in-memory implementation - Returns : none - Args : none - -=cut - -=head2 iteration - - Title : iteration - Usage : $obj->iteration($newval) - Function: PSI-BLAST iteration - Returns : value of iteration - Args : newvalue (optional) - - -=cut - - -sub iteration { return 0 } - -1; diff --git a/Bio/Search/Hit/HmmpfamHit.pm b/Bio/Search/Hit/HmmpfamHit.pm deleted file mode 100755 index ad76378db..000000000 --- a/Bio/Search/Hit/HmmpfamHit.pm +++ /dev/null @@ -1,377 +0,0 @@ -# -# BioPerl module for Bio::Search::Hit::HmmpfamHit -# -# Please direct questions and support issues to -# -# Cared for by Sendu Bala -# -# Copyright Sendu Bala -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Search::Hit::HmmpfamHit - A parser and hit object for hmmpfam hits - -=head1 SYNOPSIS - - # generally we use Bio::SearchIO to build these objects - use Bio::SearchIO; - my $in = Bio::SearchIO->new(-format => 'hmmer_pull', - -file => 'result.hmmer'); - - while (my $result = $in->next_result) { - while (my $hit = $result->next_hit) { - print $hit->name, "\n"; - print $hit->score, "\n"; - print $hit->significance, "\n"; - - while (my $hsp = $hit->next_hsp) { - # process HSPI objects - } - } - } - -=head1 DESCRIPTION - -This object implements a parser for hmmpfam hit output, a program in the HMMER -package. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Sendu Bala - -Email bix@sendu.me.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Search::Hit::HmmpfamHit; - -use strict; - -use Bio::Search::HSP::HmmpfamHSP; - -use base qw(Bio::Root::Root Bio::Search::Hit::PullHitI); - -=head2 new - - Title : new - Usage : my $obj = Bio::Search::Hit::HmmpfamHit->new(); - Function: Builds a new Bio::Search::Hit::HmmpfamHit object. - Returns : Bio::Search::Hit::HmmpfamHit - Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent) - -parent => Bio::PullParserI object (required if no -chunk) - -hit_data => array ref with [name description score significance - num_hsps rank] - - where the array ref provided to -chunk contains an IO object - for a filehandle to something representing the raw data of the - hit, and $start and $end define the tell() position within the - filehandle that the hit data starts and ends (optional; defaults - to start and end of the entire thing described by the filehandle) - -=cut - -sub new { - my ($class, @args) = @_; - my $self = $class->SUPER::new(@args); - - $self->_setup(@args); - - my $fields = $self->_fields; - foreach my $field (qw( next_domain domains hsp_data )) { - $fields->{$field} = undef; - } - - my $hit_data = $self->_raw_hit_data; - if ($hit_data && ref($hit_data) eq 'ARRAY') { - foreach my $field (qw(name description score significance num_hsps rank)) { - $fields->{$field} = shift(@{$hit_data}); - } - } - $fields->{hit_start} = 1; - - delete $self->_fields->{accession}; - - $self->_dependencies( { ( length => 'hsp_data' ) } ); - - return $self; -} - -# -# PullParserI discovery methods so we can answer all HitI questions -# - -sub _discover_description { - # this should be set when this object is created, but if it was undef as is - # possible, this _discover method will be called: just return and keep the - # return value undef - return; -} - -sub _discover_hsp_data { - my $self = shift; - my $hsp_table = $self->get_field('hsp_table'); - my $hsp_data = $hsp_table->{$self->get_field('name')} || undef; - if ($hsp_data) { - if (defined $hsp_data->{hit_length}) { - $self->_fields->{length} = $hsp_data->{hit_length}; - } - - # rank query_start query_end hit_start hit_end score evalue - $self->_fields->{hsp_data} = $hsp_data->{hsp_data}; - } -} - -sub _discover_query_start { - my $self = shift; - my $hsp_data = $self->get_field('hsp_data') || return; - - my ($this_hsp) = sort { $a->[1] <=> $b->[1] } @{$hsp_data}; - $self->_fields->{query_start} = $this_hsp->[1]; -} - -sub _discover_query_end { - my $self = shift; - my $hsp_data = $self->get_field('hsp_data') || return; - - my ($this_hsp) = sort { $b->[2] <=> $a->[2] } @{$hsp_data}; - $self->_fields->{query_end} = $this_hsp->[2]; -} - -sub _discover_hit_start { - my $self = shift; - my $hsp_data = $self->get_field('hsp_data') || return; - - my ($this_hsp) = sort { $a->[3] <=> $b->[3] } @{$hsp_data}; - $self->_fields->{hit_start} = $this_hsp->[3]; -} - -sub _discover_hit_end { - my $self = shift; - my $hsp_data = $self->get_field('hsp_data') || return; - - my ($this_hsp) = sort { $b->[4] <=> $a->[4] } @{$hsp_data}; - $self->_fields->{hit_end} = $this_hsp->[4]; -} - -sub _discover_next_hsp { - my $self = shift; - my $hsp_data = $self->get_field('hsp_data') || return; - unless (defined $self->{_next_hsp_index}) { - $self->{_next_hsp_index} = 0; - } - return if $self->{_next_hsp_index} == -1; - - $self->_fields->{next_hsp} = Bio::Search::HSP::HmmpfamHSP->new(-parent => $self, - -hsp_data => $hsp_data->[$self->{_next_hsp_index}++]); - - if ($self->{_next_hsp_index} > $#{$hsp_data}) { - $self->{_next_hsp_index} = -1; - } -} - -=head2 next_hsp - - Title : next_hsp - Usage : while( $hsp = $obj->next_hsp()) { ... } - Function : Returns the next available High Scoring Pair - Example : - Returns : L object or null if finished - Args : none - -=cut - -sub next_hsp { - my $self = shift; - my $hsp = $self->get_field('next_hsp'); - undef $self->_fields->{next_hsp}; - return $hsp; -} - -=head2 next_domain - - Title : next_domain - Usage : my $domain = $hit->next_domain(); - Function: An alias for L, this will return the next HSP - Returns : L object - Args : none - -=cut - -*next_domain = \&next_hsp; - -=head2 hsps - - Usage : $hit_object->hsps(); - Purpose : Get a list containing all HSP objects. - Example : @hsps = $hit_object->hsps(); - Returns : list of L objects. - Argument : none - -=cut - -sub hsps { - my $self = shift; - my $old = $self->{_next_hsp_index} || 0; - $self->rewind; - my @hsps; - while (defined(my $hsp = $self->next_hsp)) { - push(@hsps, $hsp); - } - $self->{_next_hsp_index} = @hsps > 0 ? $old : -1; - return @hsps; -} - -=head2 domains - - Title : domains - Usage : my @domains = $hit->domains(); - Function: An alias for L, this will return the full list of hsps - Returns : array of L objects - Args : none - -=cut - -*domains = \&hsps; - -=head2 hsp - - Usage : $hit_object->hsp( [string] ); - Purpose : Get a single HSPI object for the present HitI object. - Example : $hspObj = $hit_object->hsp; # same as 'best' - : $hspObj = $hit_object->hsp('best'); - : $hspObj = $hit_object->hsp('worst'); - Returns : Object reference for a L object. - Argument : String (or no argument). - : No argument (default) = highest scoring HSP (same as 'best'). - : 'best' = highest scoring HSP. - : 'worst' = lowest scoring HSP. - Throws : Exception if an unrecognized argument is used. - -See Also : L, L() - -=cut - -sub hsp { - my ($self, $type) = @_; - $type ||= 'best'; - my $hsp_data = $self->get_field('hsp_data') || return; - - my $sort; - if ($type eq 'best') { - $sort = sub { $a->[6] <=> $b->[6] }; - } - elsif ($type eq 'worst') { - $sort = sub { $b->[6] <=> $a->[6] }; - } - else { - $self->throw("Unknown arg '$type' given to hsp()"); - } - - my ($this_hsp) = sort $sort @{$hsp_data}; - return Bio::Search::HSP::HmmpfamHSP->new(-parent => $self, -hsp_data => $this_hsp); -} - -=head2 rewind - - Title : rewind - Usage : $result->rewind; - Function: Allow one to reset the Hit iterator to the beginning, so that - next_hit() will subsequently return the first hit and so on. - Returns : n/a - Args : none - -=cut - -sub rewind { - my $self = shift; - my $hsp_data = $self->get_field('hsp_data') || return; - $self->{_next_hsp_index} = @{$hsp_data} > 0 ? 0 : -1; -} - -# have p() a synonym of significance() -sub p { - return shift->significance; -} - -=head2 strand - - Usage : $sbjct->strand( [seq_type] ); - Purpose : Gets the strand(s) for the query, sbjct, or both sequences. - : For hmmpfam, the answers are always 1 (forward strand). - Example : $qstrand = $sbjct->strand('query'); - : $sstrand = $sbjct->strand('hit'); - : ($qstrand, $sstrand) = $sbjct->strand(); - Returns : scalar context: integer '1' - : array context without args: list of two strings (1, 1) - : Array context can be "induced" by providing an argument of 'list' - : or 'array'. - Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default - : = 'query') ('sbjct' is synonymous with 'hit') - -=cut - -sub strand { - my ($self, $type) = @_; - $type ||= (wantarray ? 'list' : 'query'); - $type = lc($type); - if ($type eq 'list' || $type eq 'array') { - return (1, 1); - } - return 1; -} - -=head2 frac_aligned_query - - Usage : $hit_object->frac_aligned_query(); - Purpose : Get the fraction of the query sequence which has been aligned - : across all HSPs (not including intervals between non-overlapping - : HSPs). - Example : $frac_alnq = $hit_object->frac_aligned_query(); - Returns : undef (the length of query sequences is unknown in Hmmpfam reports) - Argument : none - -=cut - -# noop -sub frac_aligned_query { } - -1; diff --git a/Bio/Search/Hit/hmmer3Hit.pm b/Bio/Search/Hit/hmmer3Hit.pm deleted file mode 100644 index 3e93c8fad..000000000 --- a/Bio/Search/Hit/hmmer3Hit.pm +++ /dev/null @@ -1,159 +0,0 @@ -# $Id: bioperl.lisp 15559 2009-02-23 12:11:20Z maj $ -# -# BioPerl module for Bio::Search::Hit::hmmer3Hit -# -# Please direct questions and support issues to -# -# Cared for by Thomas Sharpton -# -# Copyright Thomas Sharpton -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Search::Hit::hmmer3Hit - DESCRIPTION of Object - -=head1 SYNOPSIS - -Give standard usage here - -=head1 DESCRIPTION - -Describe the object here - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -L - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via -the web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Thomas Sharpton - -Email thomas.sharpton@gmail.com - -Describe contact details here - -=head1 CONTRIBUTORS - -Additional contributors names and emails here - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::Search::Hit::hmmer3Hit; -use strict; - - -use base qw(Bio::Search::Hit::GenericHit); - -=head2 new - - Title : new - Usage : my $obj = Bio::Search::Hit::HMMERHit->new(); - Function: Builds a new Bio::Search::Hit::HMMERHit object - Returns : Bio::Search::Hit::HMMERHit - Args : - - Plus the Bio::Search::Hit::GenericHit inherited params - -name => Name of Hit (required) - -description => Description (optional) - -accession => Accession number (optional) - -length => Length of the Hit (optional) - -score => Raw Score for the Hit (optional) - -significance => Significance value for the Hit (optional) - -algorithm => Algorithm used (BLASTP, FASTX, etc...) - -hsps => Array ref of HSPs for this Hit. - - -=cut - - -=head2 next_domain - - Title : next_domain - Usage : my $domain = $hit->next_domain(); - Function: An alias for L, this will return the next HSP - Returns : L object - Args : none - - -=cut - -sub next_domain{ shift->next_hsp } - -=head2 domains - - Title : domains - Usage : my @domains = $hit->domains(); - Function: An alias for L, this will return the full list of hsps - Returns : array of L objects - Args : none - - -=cut - -sub domains{ shift->hsps() } - -=head2 bits - - Usage : $hit_object->bits(); - Purpose : Gets the bit score of the best HSP for the current hit. - Example : $bits = $hit_object->bits(); - Returns : Integer or undef if bit score is not set - Argument : n/a - -See Also : L - -=cut - -sub bits { return 0 } - -=head2 iteration - - Title : iteration - Usage : $obj->iteration($newval) - Function: PSI-BLAST iteration - Returns : value of iteration - Args : newvalue (optional) - - -=cut - -sub iteration { return 0 } - -1; diff --git a/Bio/Search/Result/HMMERResult.pm b/Bio/Search/Result/HMMERResult.pm deleted file mode 100644 index bd7a95117..000000000 --- a/Bio/Search/Result/HMMERResult.pm +++ /dev/null @@ -1,448 +0,0 @@ -# -# BioPerl module for Bio::Search::Result::HMMERResult -# -# Please direct questions and support issues to -# -# Cared for by Jason Stajich -# -# Copyright Jason Stajich -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Search::Result::HMMERResult - A Result object for HMMER results - -=head1 SYNOPSIS - - use Bio::Search::Result::HMMERResult; - my $result = Bio::Search::Result::HMMERResult->new - ( -hmm_name => 'pfam', - -sequence_file => 'roa1.pep', - -hits => \@hits); - - # generally we use Bio::SearchIO to build these objects - use Bio::SearchIO; - my $in = Bio::SearchIO->new(-format => 'hmmer', - -file => 'result.hmmer'); - while( my $result = $in->next_result ) { - print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n"; - } - -=head1 DESCRIPTION - -This is a specialization of L. -There are a few extra methods, specifically L, -L, L, and L. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Jason Stajich - -Email jason@bioperl.org - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::Search::Result::HMMERResult; -use strict; - - - -use base qw(Bio::Search::Result::GenericResult); - -=head2 new - - Title : new - Usage : my $obj = Bio::Search::Result::HMMERResult->new(); - Function: Builds a new Bio::Search::Result::HMMERResult object - Returns : Bio::Search::Result::HMMERResult - Args : -hmm_name => string, name of hmm file - -sequence_file => name of the sequence file - -plus Bio::Search::Result::GenericResult parameters - - -query_name => Name of query Sequence - -query_accession => Query accession number (if available) - -query_description => Description of query sequence - -query_length => Length of query sequence - -database_name => Name of database - -database_letters => Number of residues in database - -database_entries => Number of entries in database - -parameters => hash ref of search parameters (key => value) - -statistics => hash ref of search statistics (key => value) - -algorithm => program name (blastx) - -algorithm_version => version of the algorithm (2.1.2) - -program_reference => literature reference string for this algorithm - -=cut - -sub new { - my($class,@args) = @_; - my $self = $class->SUPER::new(@args); - - my ($hmm,$seqfile) = $self->_rearrange([qw(HMM_NAME SEQUENCE_FILE)], - @args); - - defined( $seqfile) && $self->sequence_file($seqfile); - defined( $hmm) && $self->hmm_name($hmm); - - return $self; -} - - -=head2 hmm_name - - Title : hmm_name - Usage : $obj->hmm_name($newval) - Function: Get/Set the value of hmm_name - Returns : value of hmm_name - Args : newvalue (optional) - - -=cut - -sub hmm_name{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'_hmm_name'} = $value; - } - return $self->{'_hmm_name'}; -} - - -=head2 sequence_file - - Title : sequence_file - Usage : $obj->sequence_file($newval) - Function: Get/Set the value of sequence_file - Returns : value of sequence_file - Args : newvalue (optional) - - -=cut - -sub sequence_file{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'_sequence_file'} = $value; - } - return $self->{'_sequence_file'}; - -} - - -=head2 next_model - - Title : next_model - Usage : my $domain = $result->next_model - Function: Returns the next domain - this - is an alias for next_hit - Returns : L object - Args : none - - -=cut - -sub next_model{ shift->next_hit } - -=head2 models - - Title : models - Usage : my @domains = $result->models; - Function: Returns the list of HMM models seen - this - is an alias for hits() - Returns : Array of L objects - Args : none - - -=cut - -sub models{ shift->hits } - -=head2 Bio::Search::Result::GenericResult inherited methods - -=cut - -=head2 algorithm - - Title : algorithm - Usage : my $r_type = $hsp->algorithm - Function: Obtain the name of the algorithm used to obtain the Result - Returns : string (e.g., BLASTP) - Args : [optional] scalar string to set value - -=cut - -=head2 algorithm_version - - Title : algorithm_version - Usage : my $r_version = $hsp->algorithm_version - Function: Obtain the version of the algorithm used to obtain the Result - Returns : string (e.g., 2.1.2) - Args : [optional] scalar string to set algorithm version value - -=cut - -=head2 Bio::Search::Result::ResultI interface methods - -Bio::Search::Result::ResultI implementation - -=head2 next_hit - - Title : next_hit - Usage : while( $hit = $result->next_hit()) { ... } - Function: Returns the next available Hit object, representing potential - matches between the query and various entities from the database. - Returns : a Bio::Search::Hit::HitI object or undef if there are no more. - Args : none - - -=cut - -=head2 query_name - - Title : query_name - Usage : $id = $result->query_name(); - Function: Get the string identifier of the query used by the - algorithm that performed the search. - Returns : a string. - Args : [optional] new string value for query name - -=cut - -=head2 query_accession - - Title : query_accession - Usage : $id = $result->query_accession(); - Function: Get the accession (if available) for the query sequence - Returns : a string - Args : [optional] new string value for accession - -=cut - -=head2 query_length - - Title : query_length - Usage : $id = $result->query_length(); - Function: Get the length of the query sequence - used in the search. - Returns : a number - Args : [optional] new integer value for query length - -=cut - -=head2 query_description - - Title : query_description - Usage : $id = $result->query_description(); - Function: Get the description of the query sequence - used in the search. - Returns : a string - Args : [optional] new string for the query description - -=cut - -=head2 database_name - - Title : database_name - Usage : $name = $result->database_name() - Function: Used to obtain the name of the database that the query was searched - against by the algorithm. - Returns : a scalar string - Args : [optional] new string for the db name - -=cut - -=head2 database_letters - - Title : database_letters - Usage : $size = $result->database_letters() - Function: Used to obtain the size of database that was searched against. - Returns : a scalar integer (units specific to algorithm, but probably the - total number of residues in the database, if available) or undef if - the information was not available to the Processor object. - Args : [optional] new scalar integer for number of letters in db - - -=cut - -=head2 database_entries - - Title : database_entries - Usage : $num_entries = $result->database_entries() - Function: Used to obtain the number of entries contained in the database. - Returns : a scalar integer representing the number of entities in the database - or undef if the information was not available. - Args : [optional] new integer for the number of sequence entries in the db - - -=cut - -=head2 get_parameter - - Title : get_parameter - Usage : my $gap_ext = $report->get_parameter('gapext') - Function: Returns the value for a specific parameter used - when running this report - Returns : string - Args : name of parameter (string) - -=cut - -=head2 available_parameters - - Title : available_parameters - Usage : my @params = $report->available_paramters - Function: Returns the names of the available parameters - Returns : Return list of available parameters used for this report - Args : none - -=cut - -=head2 get_statistic - - Title : get_statistic - Usage : my $gap_ext = $report->get_statistic('kappa') - Function: Returns the value for a specific statistic available - from this report - Returns : string - Args : name of statistic (string) - -=cut - -=head2 available_statistics - - Title : available_statistics - Usage : my @statnames = $report->available_statistics - Function: Returns the names of the available statistics - Returns : Return list of available statistics used for this report - Args : none - -=cut - -=head2 Bio::Search::Result::GenericResult specific methods - -=cut - -=head2 add_hit - - Title : add_hit - Usage : $report->add_hit($hit) - Function: Adds a HitI to the stored list of hits - Returns : Number of HitI currently stored - Args : Bio::Search::Hit::HitI - -=cut - -=head2 rewind - - Title : rewind - Usage : $result->rewind; - Function: Allow one to reset the Hit iteration to the beginning - Since this is an in-memory implementation - Returns : none - Args : none - -=cut - -sub rewind{ - my ($self) = @_; - $self->{'_hitindex'} = 0; -} - - -=head2 add_parameter - - Title : add_parameter - Usage : $report->add_parameter('gapext', 11); - Function: Adds a parameter - Returns : none - Args : key - key value name for this parama - value - value for this parameter - -=cut - -=head2 add_statistic - - Title : add_statistic - Usage : $report->add_statistic('lambda', 2.3); - Function: Adds a parameter - Returns : none - Args : key - key value name for this parama - value - value for this parameter - -=cut - -=head2 num_hits - - Title : num_hits - Usage : my $hitcount= $result->num_hits - Function: returns the number of hits for this query result - Returns : integer - Args : none - - -=cut - -=head2 hits - - Title : hits - Usage : my @hits = $result->hits - Function: Returns the available hits for this Result - Returns : Array of L objects - Args : none - - -=cut - -=head2 program_reference - - Title : program_reference - Usage : $obj->program_reference($newval) - Function: - Returns : value of the literature reference for the algorithm - Args : newvalue (optional) - - -=cut - -1; diff --git a/Bio/Search/Result/HmmpfamResult.pm b/Bio/Search/Result/HmmpfamResult.pm deleted file mode 100755 index f0f087999..000000000 --- a/Bio/Search/Result/HmmpfamResult.pm +++ /dev/null @@ -1,393 +0,0 @@ -# -# BioPerl module for Bio::Search::Result::HmmpfamResult -# -# Please direct questions and support issues to -# -# Cared for by Sendu Bala -# -# Copyright Sendu Bala -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Search::Result::HmmpfamResult - A parser and result object for hmmpfam - results - -=head1 SYNOPSIS - - # generally we use Bio::SearchIO to build these objects - use Bio::SearchIO; - my $in = Bio::SearchIO->new(-format => 'hmmer_pull', - -file => 'result.hmmer'); - - while (my $result = $in->next_result) { - print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n"; - } - -=head1 DESCRIPTION - -This object implements a parser for hmmpfam result output, a program in the HMMER -package. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Sendu Bala - -Email bix@sendu.me.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Search::Result::HmmpfamResult; - -use strict; - -use Bio::Search::Hit::HmmpfamHit; - -use base qw(Bio::Root::Root Bio::Search::Result::PullResultI); - -=head2 new - - Title : new - Usage : my $obj = Bio::SearchIO::Result::hmmpfam->new(); - Function: Builds a new Bio::SearchIO::Result::hmmpfam object - Returns : Bio::SearchIO::Result::hmmpfam - Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent) - -parent => Bio::PullParserI object (required if no -chunk) - -parameters => hash ref of search parameters (key => value), optional - -statistics => hash ref of search statistics (key => value), optional - - where the array ref provided to -chunk contains an IO object - for a filehandle to something representing the raw data of the - result, and $start and $end define the tell() position within the - filehandle that the result data starts and ends (optional; defaults - to start and end of the entire thing described by the filehandle) - -=cut - -sub new { - my ($class, @args) = @_; - my $self = $class->SUPER::new(@args); - - $self->_setup(@args); - - foreach my $field (qw( header hit_table hsp_table alignments next_model models query_length )) { - $self->_fields->{$field} = undef; - } - - $self->_dependencies( { ( query_name => 'header', - query_accession => 'header', - query_description => 'header', - hit_table => 'header', - num_hits => 'hit_table', - no_hits_found => 'hit_table', - hsp_table => 'hit_table', - next_alignment => 'hsp_table' ) } ); - - return $self; -} - -# -# PullParserI discovery methods so we can answer all ResultI questions -# - -sub _discover_header { - my $self = shift; - $self->_chunk_seek(0); - my $header = $self->_get_chunk_by_end("all domains):\n"); - $self->{_after_header} = $self->_chunk_tell; - - $header || $self->throw("Could not find hmmer header, is file really hmmer format?"); - - ($self->_fields->{query_name}) = $header =~ /^Query(?:\s+sequence)?:\s+(\S+)/m; - ($self->_fields->{query_accession}) = $header =~ /^Accession:\s+(\S+)/m; - ($self->_fields->{query_description}) = $header =~ /^Description:\s+(\S.+)/m; - $self->_fields->{query_accession} ||= ''; - $self->_fields->{query_description} ||= ''; - - $self->_fields->{header} = 1; # stop this method being called again -} - -sub _discover_hit_table { - my $self = shift; - - $self->_chunk_seek($self->{_after_header}); - my $table = $self->_get_chunk_by_end("for domains:\n"); - $self->{_after_hit_table} = $self->_chunk_tell; - - my $evalue_cutoff = $self->get_field('evalue_cutoff'); - undef $evalue_cutoff if $evalue_cutoff eq '[unset]'; - my $score_cutoff = $self->get_field('score_cutoff'); - undef $score_cutoff if $score_cutoff eq '[unset]'; - my $hsps_cutoff = $self->get_field('hsps_cutoff'); - undef $hsps_cutoff if $hsps_cutoff eq '[unset]'; - - my @table; - my $no_hit = 1; - while ($table =~ /^(\S+)\s+(\S.+?)?\s+(\S+)\s+(\S+)\s+(\d+)\n/gm) { - $no_hit = 0; - my $evalue = abs($4); # consistency for tests under Windows - next if ($evalue_cutoff && $evalue > $evalue_cutoff); - next if ($score_cutoff && $3 < $score_cutoff); - next if ($hsps_cutoff && $5 < $hsps_cutoff); - push(@table, [$1, $2, $3, $evalue, $5]); - } - $self->_fields->{hit_table} = \@table; - $self->{_next_hit_index} = @table > 0 ? 0 : -1; - - $self->_fields->{no_hits_found} = $no_hit; - $self->_fields->{num_hits} = @table; -} - -sub _discover_hsp_table { - my $self = shift; - - $self->_chunk_seek($self->{_after_hit_table}); - my $table = $self->_get_chunk_by_end("top-scoring domains:\n"); - $table ||= $self->_get_chunk_by_end("//\n"); # A0 reports - $self->{_after_hsp_table} = $self->_chunk_tell; - - my %table; - # can't save this regex work for the hsp object because the hit object needs - # its length, so may as well just do all the work here - while ($table =~ /^(\S+)\s+(\d+)\/\d+\s+(\d+)\s+(\d+)\s+\S\S\s+(\d+)\s+(\d+)\s+\S(\S)\s+(\S+)\s+(\S+)/gm) { - # rank query_start query_end hit_start hit_end score evalue - my $evalue = abs($9); # consistency for tests under Windows - push(@{$table{$1}->{hsp_data}}, [$2, $3, $4, $5, $6, $8, $evalue]); - if ($7 eq ']') { - $table{$1}->{hit_length} = $6; - } - } - $self->_fields->{hsp_table} = \%table; -} - -sub _discover_alignments { - my $self = shift; - $self->_fields->{alignments} = { }; -} - -sub _next_alignment { - my $self = shift;; - return if $self->{_no_more_alignments}; - - my $aligns = $self->_fields->{alignments}; - - unless (defined $self->{_after_previous_alignment}) { - $self->_chunk_seek($self->{_after_hsp_table}); - my $chunk = $self->_get_chunk_by_end(": domain"); - unless ($chunk) { - $self->{_no_more_alignments} = 1; - return; - } - - $self->{_after_previous_alignment} = $self->_chunk_tell; - $self->{_next_alignment_start_text} = $chunk; - return $self->_next_alignment; - } - - $self->_chunk_seek($self->{_after_previous_alignment}); - my $chunk = $self->_get_chunk_by_end(": domain"); - unless ($chunk) { - $self->_chunk_seek($self->{_after_previous_alignment}); - $chunk = $self->_get_chunk_by_end("//"); - - unless ($chunk) { - $self->{_no_more_alignments} = 1; - return; - } - } - - $self->{_after_previous_alignment} = $self->_chunk_tell; - - if (defined $self->{_next_alignment_start_text}) { - $chunk = $self->{_next_alignment_start_text}.$chunk; - } - - $chunk =~ s/(\S+: domain)$//; - $self->{_next_alignment_start_text} = $1; - - my ($name, $domain) = $chunk =~ /^(\S+): domain (\d+)/; - $aligns->{$name.'~~~~'.$domain} = $chunk; - return 1; -} - -sub _discover_next_hit { - my $self = shift; - my @hit_table = @{$self->get_field('hit_table')}; - return if $self->{_next_hit_index} == -1; - - #[name description score significance num_hsps rank] - my @hit_data = (@{$hit_table[$self->{_next_hit_index}++]}, $self->{_next_hit_index}); - - $self->_fields->{next_hit} = Bio::Search::Hit::HmmpfamHit->new(-parent => $self, - -hit_data => \@hit_data); - - if ($self->{_next_hit_index} > $#hit_table) { - $self->{_next_hit_index} = -1; - } -} - -=head2 next_hit - - Title : next_hit - Usage : while( $hit = $result->next_hit()) { ... } - Function: Returns the next available Hit object, representing potential - matches between the query and various entities from the database. - Returns : a Bio::Search::Hit::HitI object or undef if there are no more. - Args : none - -=cut - -sub next_hit { - my $self = shift; - my $hit = $self->get_field('next_hit'); - undef $self->_fields->{next_hit}; - return $hit; -} - -=head2 next_model - - Title : next_model - Usage : my $domain = $result->next_model - Function: Returns the next domain - this is an alias for next_hit() - Returns : L object - Args : none - -=cut - -*next_model = \&next_hit; - -=head2 hits - - Title : hits - Usage : my @hits = $result->hits - Function: Returns the HitI objects contained within this Result - Returns : Array of Bio::Search::Hit::HitI objects - Args : none - -See Also: L - -=cut - -sub hits { - my $self = shift; - my $old = $self->{_next_hit_index} || 0; - $self->rewind; - my @hits; - while (defined(my $hit = $self->next_hit)) { - push(@hits, $hit); - } - $self->{_next_hit_index} = @hits > 0 ? $old : -1; - return @hits; -} - -=head2 models - - Title : models - Usage : my @domains = $result->models; - Function: Returns the list of HMM models seen - this is an alias for hits() - Returns : Array of L objects - Args : none - -=cut - -*models = \&hits; - -=head2 sort_hits - - Title : sort_hits - Usage : $result->sort_hits('sort_hits( - sub{$Bio::Search::Result::HmmpfamResult::a->[2] - <=> - $Bio::Search::Result::HmmpfamResult::b->[2]}); - NOT $result->sort_hits($a->[2] <=> $b->[2]); - -=cut - -sub sort_hits { - my ($self, $code_ref) = @_; - $code_ref ||= sub { $a->[3] <=> $b->[3] }; - - # avoid creating hit objects just to sort, hence force user to sort on - # the array references in hit table - my $table_ref = $self->get_field('hit_table'); - @{$table_ref} > 1 || return; - - my @sorted = sort $code_ref @{$table_ref}; - @sorted == @{$table_ref} || $self->throw("Your sort routine failed to give back all hits!"); - $self->_fields->{hit_table} = \@sorted; -} - -=head2 rewind - - Title : rewind - Usage : $result->rewind; - Function: Allow one to reset the Hit iterator to the beginning, so that - next_hit() will subsequently return the first hit and so on. - Returns : n/a - Args : none - -=cut - -sub rewind { - my $self = shift; - unless ($self->_fields->{hit_table}) { - $self->get_field('hit_table'); - } - $self->{_next_hit_index} = @{$self->_fields->{hit_table}} > 0 ? 0 : -1; -} - -1; diff --git a/Bio/Search/Result/hmmer3Result.pm b/Bio/Search/Result/hmmer3Result.pm deleted file mode 100644 index 5f5de6cab..000000000 --- a/Bio/Search/Result/hmmer3Result.pm +++ /dev/null @@ -1,191 +0,0 @@ -# $Id: bioperl.lisp 15559 2009-02-23 12:11:20Z maj $ -# -# BioPerl module for Bio::Search::Result::hmmer3Result -# -# Please direct questions and support issues to -# -# Cared for by Thomas Sharpton -# -# Copyright Thomas Sharpton -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Search::Result::hmmer3Result - DESCRIPTION of Object - -=head1 SYNOPSIS - -Give standard usage here - -=head1 DESCRIPTION - -Describe the object here - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -L - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via -the web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Thomas Sharpton - -Email thomas.sharpton@gmail.com - -Describe contact details here - -=head1 CONTRIBUTORS - -Additional contributors names and emails here - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::Search::Result::hmmer3Result; -use strict; - -use base qw(Bio::Search::Result::GenericResult); - -=head2 new - - Title : new - Usage : my $obj = new Bio::Search::Result::hmmer3Result.pm(); - Function: Builds a new Bio::Search::Result::hmmer3Result.pm object - Returns : an instance of Bio::Search::Result::hmmer3Result.pm - Args : -hmm_name => string, name of hmm file - -sequence_file => name of the sequence file - -=cut - -sub new { - my($class,@args) = @_; - my $self = $class->SUPER::new(@args); - - my ($hmm,$seqfile) = $self->_rearrange([qw(HMM_NAME SEQUENCE_FILE)], - @args); - defined( $seqfile ) && $self->sequence_file( $seqfile ); - defined( $hmm ) && $self->hmm_name( $hmm ); - - return $self; -} - -=head2 hmm_name - - Title : hmm_name - Usage : $obj->hmm_name($newval) - Function: Get/Set the value of hmm_name - Returns : value of hmm_name - Args : newvalue (optional) - - -=cut - -sub hmm_name{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'_hmm_name'} = $value; - } - return $self->{'_hmm_name'}; -} - -=head2 sequence_file - - Title : sequence_file - Usage : $obj->sequence_file($newval) - Function: Get/Set the value of sequence_file - Returns : value of sequence_file - Args : newvalue (optional) - - -=cut - -sub sequence_file{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'_sequence_file'} = $value; - } - return $self->{'_sequence_file'}; - -} - -=head2 next_model - - Title : next_model - Usage : my $domain = $result->next_model - Function: Returns the next domain - this - is an alias for next_hit - Returns : L object - Args : none - - -=cut - -sub next_model{ shift->next_hit } - -=head2 models - - Title : models - Usage : my @domains = $result->models; - Function: Returns the list of HMM models seen - this - is an alias for hits() - Returns : Array of L objects - Args : none - - -=cut - -sub models{ shift->hits } - -=head2 rewind - - Title : rewind - Usage : $result->rewind; - Function: Allow one to reset the Hit iteration to the beginning - Since this is an in-memory implementation - Returns : none - Args : none - -=cut - -sub rewind{ - my ($self) = @_; - $self->{'_hitindex'} = 0; -} - - - -1; diff --git a/Bio/SearchIO/hmmer.pm b/Bio/SearchIO/hmmer.pm deleted file mode 100644 index 689794732..000000000 --- a/Bio/SearchIO/hmmer.pm +++ /dev/null @@ -1,159 +0,0 @@ -# -# BioPerl module for Bio::SearchIO::hmmer -# -# Please direct questions and support issues to -# -# Cared for by Kai Blin -# -# Copyright Kai Blin -# -# You may distribute this module under the same terms as perl itself -# -# POD documentation - main docs before the code - -=head1 NAME - -Bio::SearchIO::hmmer - A parser for HMMER2 and HMMER3 output (hmmscan, hmmsearch, hmmpfam) - -=head1 SYNOPSIS - - # do not use this class directly it is available through Bio::SearchIO - use Bio::SearchIO; - my $in = Bio::SearchIO->new(-format => 'hmmer', - -file => 't/data/L77119.hmmer'); - while( my $result = $in->next_result ) { - # this is a Bio::Search::Result::HMMERResult object - print $result->query_name(), " for HMM ", $result->hmm_name(), "\n"; - while( my $hit = $result->next_hit ) { - print $hit->name(), "\n"; - while( my $hsp = $hit->next_hsp ) { - print "length is ", $hsp->length(), "\n"; - } - } - } - -=head1 DESCRIPTION - -This object implements a parser for HMMER output. It works with both HMMER2 and HMMER3 - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Kai Blin - -Email kai.blin-at-biotech.uni-tuebingen.de - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::SearchIO::hmmer; - -use strict; - -use Bio::Factory::ObjectFactory; - -use base qw(Bio::SearchIO); - -sub new { - my ( $caller, @args ) = @_; - my $class = ref($caller) || $caller; - - my $self = $class->SUPER::new(@args); - $self->_initialize(@args); - - # Try to guess the hmmer format version if it's not specified. - my $version; - my %param = @args; - - @param{ map { lc $_ } keys %param } = values %param; # lowercase keys - - # If the caller specified a version, go for that - if (defined($param{"-version"})) { - $version = $param{"-version"}; - } else { - - # read second line of the file - my $first_line = $self->_readline; - $_ = $self->_readline; - - if ( m/HMMER\s3/ ) { - $version = "3"; - } else { - $version = "2"; - } - - $self->_pushback($_); - $self->_pushback($first_line); - } - - my $format = "hmmer$version"; - return unless( $class->_load_format_module($format) ); - - bless($self, "Bio::SearchIO::$format"); - - return $self; -} - -sub _initialize { - my ( $self, @args ) = @_; - $self->SUPER::_initialize(@args); - my $handler = $self->_eventHandler; - $handler->register_factory( - 'result', - Bio::Factory::ObjectFactory->new( - -type => 'Bio::Search::Result::HMMERResult', - -interface => 'Bio::Search::Result::ResultI' - ) - ); - - $handler->register_factory( - 'hit', - Bio::Factory::ObjectFactory->new( - -type => 'Bio::Search::Hit::HMMERHit', - -interface => 'Bio::Search::Hit::HitI' - ) - ); - - $handler->register_factory( - 'hsp', - Bio::Factory::ObjectFactory->new( - -type => 'Bio::Search::HSP::HMMERHSP', - -interface => 'Bio::Search::HSP::HSPI' - ) - ); -} - -1; diff --git a/Bio/SearchIO/hmmer2.pm b/Bio/SearchIO/hmmer2.pm deleted file mode 100644 index 85c4d9032..000000000 --- a/Bio/SearchIO/hmmer2.pm +++ /dev/null @@ -1,1105 +0,0 @@ -# -# BioPerl module for Bio::SearchIO::hmmer2 -# -# Please direct questions and support issues to -# -# Cared for by Jason Stajich -# -# Copyright Jason Stajich -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::SearchIO::hmmer2 - A parser for HMMER output (hmmpfam, hmmsearch) - -=head1 SYNOPSIS - - # do not use this class directly it is available through Bio::SearchIO - use Bio::SearchIO; - my $in = Bio::SearchIO->new(-format => 'hmmer2', - -file => 't/data/L77119.hmmer'); - while( my $result = $in->next_result ) { - # this is a Bio::Search::Result::HMMERResult object - print $result->query_name(), " for HMM ", $result->hmm_name(), "\n"; - while( my $hit = $result->next_hit ) { - print $hit->name(), "\n"; - while( my $hsp = $hit->next_hsp ) { - print "length is ", $hsp->length(), "\n"; - } - } - } - -=head1 DESCRIPTION - -This object implements a parser for HMMER output. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Jason Stajich - -Email jason-at-bioperl.org - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::SearchIO::hmmer2; - -use strict; - -use Bio::Factory::ObjectFactory; - -use vars qw(%MAPPING %MODEMAP -); - -use base qw(Bio::SearchIO::hmmer); - -BEGIN { - - # mapping of HMMER items to Bioperl hash keys - %MODEMAP = ( - 'HMMER_Output' => 'result', - 'Hit' => 'hit', - 'Hsp' => 'hsp' - ); - - %MAPPING = ( - 'Hsp_bit-score' => 'HSP-bits', - 'Hsp_score' => 'HSP-score', - 'Hsp_evalue' => 'HSP-evalue', - 'Hsp_query-from' => 'HSP-query_start', - 'Hsp_query-to' => 'HSP-query_end', - 'Hsp_hit-from' => 'HSP-hit_start', - 'Hsp_hit-to' => 'HSP-hit_end', - 'Hsp_positive' => 'HSP-conserved', - 'Hsp_identity' => 'HSP-identical', - 'Hsp_gaps' => 'HSP-hsp_gaps', - 'Hsp_hitgaps' => 'HSP-hit_gaps', - 'Hsp_querygaps' => 'HSP-query_gaps', - 'Hsp_qseq' => 'HSP-query_seq', - 'Hsp_csline' => 'HSP-cs_seq', - 'Hsp_hseq' => 'HSP-hit_seq', - 'Hsp_midline' => 'HSP-homology_seq', - 'Hsp_align-len' => 'HSP-hsp_length', - 'Hsp_query-frame' => 'HSP-query_frame', - 'Hsp_hit-frame' => 'HSP-hit_frame', - - 'Hit_id' => 'HIT-name', - 'Hit_len' => 'HIT-length', - 'Hit_accession' => 'HIT-accession', - 'Hit_desc' => 'HIT-description', - 'Hit_signif' => 'HIT-significance', - 'Hit_score' => 'HIT-score', - - 'HMMER_program' => 'RESULT-algorithm_name', - 'HMMER_version' => 'RESULT-algorithm_version', - 'HMMER_query-def' => 'RESULT-query_name', - 'HMMER_query-len' => 'RESULT-query_length', - 'HMMER_query-acc' => 'RESULT-query_accession', - 'HMMER_querydesc' => 'RESULT-query_description', - 'HMMER_hmm' => 'RESULT-hmm_name', - 'HMMER_seqfile' => 'RESULT-sequence_file', - 'HMMER_db' => 'RESULT-database_name', - ); -} - -=head2 next_result - - Title : next_result - Usage : my $hit = $searchio->next_result; - Function: Returns the next Result from a search - Returns : Bio::Search::Result::ResultI object - Args : none - -=cut - -sub next_result { - my ($self) = @_; - my $seentop = 0; - my $reporttype; - my ( $buffer, $last, @hitinfo, @hspinfo, %hspinfo, %hitinfo ); - local $/ = "\n"; - - my $verbose = $self->verbose; # cache for speed? - $self->start_document(); - - while ( defined( $buffer = $self->_readline ) ) { - my $lineorig = $buffer; - - chomp $buffer; - if ($buffer =~ /^HMMER\s+(\S+)\s+\((.+)\)/o) { - my ( $prog, $version ) = split( /\s+/, $buffer ); - if ($seentop) { - $self->_pushback($buffer); - $self->end_element( { 'Name' => 'HMMER_Output' } ); - return $self->end_document(); - } - $self->{'_hmmidline'} = $buffer; - $self->start_element( { 'Name' => 'HMMER_Output' } ); - $self->{'_result_count'}++; - $seentop = 1; - if ( defined $last ) { - ($reporttype) = split( /\s+/, $last ); - $reporttype = uc($reporttype) if defined $reporttype; - $self->element( - { - 'Name' => 'HMMER_program', - 'Data' => $reporttype - } - ); - } - $self->element( - { - 'Name' => 'HMMER_version', - 'Data' => $version - } - ); - } - elsif ($buffer =~ s/^HMM file:\s+//o) { - $self->{'_hmmfileline'} = $lineorig; - $self->element( - { - 'Name' => 'HMMER_hmm', - 'Data' => $buffer - } - ); - } - elsif ($buffer =~ s/^Sequence\s+(file|database):\s+//o) { - $self->{'_hmmseqline'} = $lineorig; - if ( $1 eq 'database' ) { - $self->element( - { - 'Name' => 'HMMER_db', - 'Data' => $buffer - } - ); - } - $self->element( - { - 'Name' => 'HMMER_seqfile', - 'Data' => $buffer - } - ); - } - elsif ($buffer =~ s/^Query(?:\s+(?:sequence|HMM))?(?:\s+\d+)?:\s+//o) { - if ( !$seentop ) { - - # we're in a multi-query report - $self->_pushback($lineorig); - $self->_pushback( $self->{'_hmmseqline'} ); - $self->_pushback( $self->{'_hmmfileline'} ); - $self->_pushback( $self->{'_hmmidline'} ); - next; - } - $buffer =~ s/\s+$//; - $self->element( - { - 'Name' => 'HMMER_query-def', - 'Data' => $buffer - } - ); - } - elsif ($buffer =~ s/^Accession:\s+//o) { - $buffer =~ s/\s+$//; - $self->element( - { - 'Name' => 'HMMER_query-acc', - 'Data' => $buffer - } - ); - } - elsif ($buffer =~ s/^Description:\s+//o) { - $buffer =~ s/\s+$//; - $self->element( - { - 'Name' => 'HMMER_querydesc', - 'Data' => $buffer - } - ); - } - elsif ( defined $self->{'_reporttype'} - && ( $self->{'_reporttype'} eq 'HMMSEARCH' - || $self->{'_reporttype'} eq 'HMMPFAM' ) - ) { - # PROCESS RESULTS HERE - if ($buffer =~ /^Scores for (?:complete sequences|sequence family)/o) { - while ( defined( $buffer = $self->_readline ) ) { - last if ($buffer =~ /^\s+$/); - next if ( $buffer =~ /^Model\s+Description/o - || $buffer =~ /^Sequence\s+Description/o - || $buffer =~ /^\-\-\-/o ); - - chomp $buffer; - my @line = split( /\s+/, $buffer ); - my ( $name, $domaintotal, $evalue, $score ) = - ( shift @line, pop @line, pop @line, pop @line ); - my $desc = join( ' ', @line ); - push @hitinfo, [ $name, $desc, $score, $evalue, $domaintotal ]; - $hitinfo{$name} = $#hitinfo; - } - } - elsif ($buffer =~ /^Parsed for domains:/o) { - @hspinfo = (); - - while ( defined( $buffer = $self->_readline ) ) { - last if ($buffer =~ /^\s+$/); - if ($buffer =~ m!^//!) { - $self->_pushback($buffer); - last; - } - next if ( $buffer =~ /^(?:Model|Sequence)\s+Domain/ || $buffer =~ /^\-\-\-/ ); - - chomp $buffer; - if ( - my ( $name, $domainct, $domaintotal, - $seq_start, $seq_end, $seq_cov, - $hmm_start, $hmm_end, $hmm_cov, - $score, $evalue ) - = ( $buffer =~ - m!^(\S+)\s+ # domain name - (\d+)/(\d+)\s+ # domain num out of num - (\d+)\s+(\d+)\s+ # seq start, end - (\S+)\s+ # seq coverage - (\d+)\s+(\d+)\s+ # hmm start, end - (\S+)\s+ # hmm coverage - (\S+)\s+ # score - (\S+) # evalue - \s*$!ox - ) - ) { - my $hindex = $hitinfo{$name}; - if ( !defined $hindex ) { - push @hitinfo, - [ $name, '', $score, $evalue, $domaintotal ]; - $hitinfo{$name} = $#hitinfo; - $hindex = $#hitinfo; - } - - my $info = $hitinfo[$hindex]; - if ( !defined $info ) { - if ($self->{'_reporttype'} eq 'HMMSEARCH') { - $self->warn( - "Incomplete Sequence information, can't find $name hitinfo says $hitinfo{$name}" - ); - } - elsif ($self->{'_reporttype'} eq 'HMMPFAM') { - $self->warn( - "Incomplete Domain information, can't find $name hitinfo says $hitinfo{$name}" - ); - } - next; - } - - # Try to get HMM and Sequence lengths from the alignment information - if ($self->{'_reporttype'} eq 'HMMSEARCH') { - # For Hmmsearch, if seq coverage ends in ']' it means that the alignment - # runs until the end. In that case add the END coordinate to @hitinfo - # to use it as Hit Length - if ( $seq_cov =~ m/\]$/ - and scalar @{ $hitinfo[$hindex] } == 5 - ) { - push @{ $hitinfo[$hindex] }, $seq_end ; - } - # For Hmmsearch, if hmm coverage ends in ']', it means that the alignment - # runs until the end. In that case use the END coordinate as Query Length - if ( $hmm_cov =~ m/\]$/ - and not exists $self->{_values}->{'RESULT-query_length'} - ) { - $self->element( - { 'Name' => 'HMMER_query-len', - 'Data' => $hmm_end - } - ); - } - } - elsif ($self->{'_reporttype'} eq 'HMMPFAM') { - # For Hmmpfam, if hmm coverage ends in ']' it means that the alignment - # runs until the end. In that case add the END coordinate to @hitinfo - # to use it as Hit Length - if ( $hmm_cov =~ m/\]$/ - and scalar @{ $hitinfo[$hindex] } == 5 - ) { - push @{ $hitinfo[$hindex] }, $hmm_end ; - } - # For Hmmpfam, if seq coverage ends in ']', it means that the alignment - # runs until the end. In that case use the END coordinate as Query Length - if ( $seq_cov =~ m/\]$/ - and not exists $self->{_values}->{'RESULT-query_length'} - ) { - $self->element( - { 'Name' => 'HMMER_query-len', - 'Data' => $seq_end - } - ); - } - } - - my @vals = ($seq_start, $seq_end, - $hmm_start, $hmm_end, - $score, $evalue); - push @hspinfo, [ $name, @vals ]; - } - } - } - elsif ($buffer =~ /^Alignments of top/o) { - my ( $prelength, $count, $width ); - $count = 0; - my %domaincounter; - my $second_tier = 0; - my $csline = ''; - - while ( defined( $buffer = $self->_readline ) ) { - next if ( $buffer =~ /^Align/o ); - - if ( $buffer =~ m/^Histogram/o - || $buffer =~ m!^//!o - || $buffer =~ m/^Query(?:\s+(?:sequence|HMM))?(?:\s+\d+)?:/o - ) { - if ( $self->in_element('hsp') ) { - $self->end_element( { 'Name' => 'Hsp' } ); - } - if ( $self->within_element('hit') ) { - $self->end_element( { 'Name' => 'Hit' } ); - } - $self->_pushback($buffer); - last; - } - - chomp $buffer; - if ( - my ( $name, $domainct, $domaintotal, - $from, $to ) - = ( $buffer =~ - m/^\s*(.+): - \s+ domain \s+ (\d+) \s+ of \s+ (\d+) , - \s+ from \s+ (\d+) \s+ to \s+ (\d+)/x - ) - ) { - $domaincounter{$name}++; - if ( $self->within_element('hit') ) { - if ( $self->within_element('hsp') ) { - $self->end_element( { 'Name' => 'Hsp' } ); - } - $self->end_element( { 'Name' => 'Hit' } ); - } - - my $info = [ @{ $hitinfo[ $hitinfo{$name} ] } ]; - if ( !defined $info - || $info->[0] ne $name - ) { - $self->warn( - "Somehow the Model table order does not match the order in the domains (got " - . $info->[0] - . ", expected $name). We're back loading this from the alignment information instead" - ); - $info = [ - $name, '', - $buffer =~ /score \s+ ([^,\s]+), \s+E\s+=\s+ (\S+)/ox, - $domaintotal - ]; - push @hitinfo, $info; - $hitinfo{$name} = $#hitinfo; - } - - $self->start_element( { 'Name' => 'Hit' } ); - $self->element( - { - 'Name' => 'Hit_id', - 'Data' => shift @{$info} - } - ); - $self->element( - { - 'Name' => 'Hit_desc', - 'Data' => shift @{$info} - } - ); - $self->element( - { - 'Name' => 'Hit_score', - 'Data' => shift @{$info} - } - ); - $self->element( - { - 'Name' => 'Hit_signif', - 'Data' => shift @{$info} - } - ); - my $dom_total = shift @{$info}; - if (my $hit_end = shift @{$info}) { - $self->element( - { - 'Name' => 'Hit_len', - 'Data' => $hit_end - } - ); - } - - $self->start_element( { 'Name' => 'Hsp' } ); - my $HSPinfo = shift @hspinfo; - my $id = shift @$HSPinfo; - - if ( $id ne $name ) { - $self->throw( - "Somehow the domain list details do not match " - . "the table (got $id, expected $name)" - ); - } - - if ($self->{'_reporttype'} eq 'HMMSEARCH') { - $self->element( - { - 'Name' => 'Hsp_hit-from', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_hit-to', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_query-from', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_query-to', - 'Data' => shift @$HSPinfo - } - ); - } - elsif ($self->{'_reporttype'} eq 'HMMPFAM') { - $self->element( - { - 'Name' => 'Hsp_query-from', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_query-to', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_hit-from', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_hit-to', - 'Data' => shift @$HSPinfo - } - ); - } - $self->element( - { - 'Name' => 'Hsp_score', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_evalue', - 'Data' => shift @$HSPinfo - } - ); - - if ( $domaincounter{$name} == $domaintotal ) { - $hitinfo[ $hitinfo{$name} ] = undef; - } - } - else { - - # Might want to change this so that it - # accumulates all the of the alignment lines into - # three array slots and then tests for the - # end of the line - if ($buffer =~ m/^\s+(?:CS|RF)\s+/o && $count == 0) { - # Buffer the CS line now and process it later at - # midline point, where $prelength and width will be known - $csline = $buffer; - next; - } - elsif ($buffer =~ /^(\s+ \*->) (\S+)/ox) { - # start of domain - $prelength = CORE::length($1); - $width = 0; - - # deal with fact that start and stop is on same line - my $data = $2; - if ($data =~ s/<-?\*?\s*$//) - { - $width = CORE::length($data); - } - - if ($self->{'_reporttype'} eq 'HMMSEARCH') { - $self->element( - { - 'Name' => 'Hsp_qseq', - 'Data' => $data - } - ); - } - elsif ($self->{'_reporttype'} eq 'HMMPFAM') { - $self->element( - { - 'Name' => 'Hsp_hseq', - 'Data' => $data - } - ); - } - $count = 0; - $second_tier = 0; - } - elsif ($buffer =~ /^(\s+) (\S+) <-?\*? \s*$/ox) { - # end of domain - $prelength -= 3 unless ( $second_tier++ ); - if ($self->{'_reporttype'} eq 'HMMSEARCH') { - $self->element( - { - 'Name' => 'Hsp_qseq', - 'Data' => $2 - } - ); - } - elsif ($self->{'_reporttype'} eq 'HMMPFAM') { - $self->element( - { - 'Name' => 'Hsp_hseq', - 'Data' => $2 - } - ); - } - $width = CORE::length($2); - $count = 0; - } - elsif ( ( $count != 1 && $buffer =~ /^\s+$/o ) - || CORE::length($buffer) == 0 - || $buffer =~ /^\s+\-?\*\s*$/ - || $buffer =~ /^\s+\S+\s+\-\s+\-\s*$/ ) - { - next; - } - elsif ( $count == 0 ) { - $prelength -= 3 unless ( $second_tier++ ); - unless ( defined $prelength ) { - - # $self->warn("prelength not set"); - next; - } - if ($self->{'_reporttype'} eq 'HMMSEARCH') { - $self->element( - { - 'Name' => 'Hsp_qseq', - 'Data' => substr( $buffer, $prelength ) - } - ); - } - elsif ($self->{'_reporttype'} eq 'HMMPFAM') { - $self->element( - { - 'Name' => 'Hsp_hseq', - 'Data' => substr( $buffer, $prelength ) - } - ); - } - } - elsif ( $count == 1 ) { - if ( !defined $prelength ) { - $self->warn("prelength not set"); - } - if ($width) { - $self->element( - { - 'Name' => 'Hsp_midline', - 'Data' => substr( $buffer, $prelength, $width ) - } - ); - if ($csline ne '') { - $self->element( - { - 'Name' => 'Hsp_csline', - 'Data' => substr( $csline, $prelength, $width ) - - } - ); - $csline = ''; - } - } - else { - $self->element( - { - 'Name' => 'Hsp_midline', - 'Data' => substr( $buffer, $prelength ) - } - ); - if ($csline ne '') { - $self->element( - { - 'Name' => 'Hsp_csline', - 'Data' => substr( $csline, $prelength ) - } - ); - $csline = ''; - } - } - } - elsif ( $count == 2 ) { - if ( $buffer =~ /^\s+(\S+)\s+(\d+|\-)\s+(\S*)\s+(\d+|\-)/o ) { - if ($self->{'_reporttype'} eq 'HMMSEARCH') { - $self->element( - { - 'Name' => 'Hsp_hseq', - 'Data' => $3 - } - ); - } - elsif ($self->{'_reporttype'} eq 'HMMPFAM') { - $self->element( - { - 'Name' => 'Hsp_qseq', - 'Data' => $3 - } - ); - } - } - else { - $self->warn("unrecognized line ($count): $buffer\n"); - } - } - $count = 0 if $count++ >= 2; - } - } - } - elsif ( $buffer =~ /^Histogram/o || $buffer =~ m!^//!o ) { - my %domaincounter; - - while ( my $HSPinfo = shift @hspinfo ) { - my $id = shift @$HSPinfo; - $domaincounter{$id}++; - - my $info = [ @{ $hitinfo[ $hitinfo{$id} ] } ]; - next unless defined $info; - - $self->start_element( { 'Name' => 'Hit' } ); - $self->element( - { - 'Name' => 'Hit_id', - 'Data' => shift @{$info} - } - ); - $self->element( - { - 'Name' => 'Hit_desc', - 'Data' => shift @{$info} - } - ); - $self->element( - { - 'Name' => 'Hit_score', - 'Data' => shift @{$info} - } - ); - $self->element( - { - 'Name' => 'Hit_signif', - 'Data' => shift @{$info} - } - ); - my $domaintotal = shift @{$info}; - if (my $hit_end = shift @{$info}) { - $self->element( - { - 'Name' => 'Hit_len', - 'Data' => $hit_end - } - ); - } - - # Histogram is exclusive of Hmmsearch, not found in Hmmpfam, - # so just use Hmmsearch start/end order (first hit, then query) - $self->start_element( { 'Name' => 'Hsp' } ); - $self->element( - { - 'Name' => 'Hsp_hit-from', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_hit-to', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_query-from', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_query-to', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_score', - 'Data' => shift @$HSPinfo - } - ); - $self->element( - { - 'Name' => 'Hsp_evalue', - 'Data' => shift @$HSPinfo - } - ); - $self->end_element( { 'Name' => 'Hsp' } ); - $self->end_element( { 'Name' => 'Hit' } ); - - if ( $domaincounter{$id} == $domaintotal ) { - $hitinfo[ $hitinfo{$id} ] = undef; - } - } - @hitinfo = (); - %hitinfo = (); - last; - } - # uncomment to see missed lines with verbose on - #else { - # $self->debug($buffer); - #} - } - $last = $buffer; - } - $self->end_element( { 'Name' => 'HMMER_Output' } ) unless !$seentop; - return $self->end_document(); -} - -=head2 start_element - - Title : start_element - Usage : $eventgenerator->start_element - Function: Handles a start element event - Returns : none - Args : hashref with at least 2 keys 'Data' and 'Name' - - -=cut - -sub start_element { - my ( $self, $data ) = @_; - - # we currently don't care about attributes - my $nm = $data->{'Name'}; - my $type = $MODEMAP{$nm}; - if ($type) { - if ( $self->_eventHandler->will_handle($type) ) { - my $func = sprintf( "start_%s", lc $type ); - $self->_eventHandler->$func( $data->{'Attributes'} ); - } - unshift @{ $self->{'_elements'} }, $type; - } - if ( defined $type - && $type eq 'result' ) - { - $self->{'_values'} = {}; - $self->{'_result'} = undef; - } -} - -=head2 end_element - - Title : start_element - Usage : $eventgenerator->end_element - Function: Handles an end element event - Returns : none - Args : hashref with at least 2 keys 'Data' and 'Name' - - -=cut - -sub end_element { - my ( $self, $data ) = @_; - my $nm = $data->{'Name'}; - my $type = $MODEMAP{$nm}; - my $rc; - - if ( $nm eq 'HMMER_program' ) { - if ( $self->{'_last_data'} =~ /(HMM\S+)/i ) { - $self->{'_reporttype'} = uc $1; - } - } - - # Hsp are sort of weird, in that they end when another - # object begins so have to detect this in end_element for now - if ( $nm eq 'Hsp' ) { - foreach my $line (qw(Hsp_csline Hsp_qseq Hsp_midline Hsp_hseq)) { - my $data = $self->{'_last_hspdata'}->{$line}; - if ($data && $line eq 'Hsp_hseq') { - # replace hmm '.' gap symbol by '-' - $data =~ s/\./-/g; - } - $self->element( - { - 'Name' => $line, - 'Data' => $data - } - ); - # Since HMMER doesn't print some data explicitly, - # calculate it from the homology line (midline) - if ($line eq 'Hsp_midline') { - if ($data) { - my $length = length $data; - my $identical = ($data =~ tr/a-zA-Z//); - my $positive = ($data =~ tr/+//) + $identical; - $self->element( - { - 'Name' => 'Hsp_align-len', - 'Data' => $length - } - ); - $self->element( - { 'Name' => 'Hsp_identity', - 'Data' => $identical - } - ); - $self->element( - { 'Name' => 'Hsp_positive', - 'Data' => $positive - } - ); - } - else { - $self->element( - { 'Name' => 'Hsp_identity', - 'Data' => 0 - } - ); - $self->element( - { 'Name' => 'Hsp_positive', - 'Data' => 0 - } - ); - } - } - } - $self->{'_last_hspdata'} = {}; - } - if ($type) { - if ( $self->_eventHandler->will_handle($type) ) { - my $func = sprintf( "end_%s", lc $type ); - $rc = $self->_eventHandler->$func( $self->{'_reporttype'}, - $self->{'_values'} ); - } - my $lastelem = shift @{ $self->{'_elements'} }; - - # Flush corresponding values from the {_values} buffer - my $name = uc $type; - foreach my $key (keys %{ $self->{_values} }) { - delete $self->{_values}->{$key} if ($key =~ m/^$name-/); - } - } - elsif ( $MAPPING{$nm} ) { - if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { - my $key = ( keys %{ $MAPPING{$nm} } )[0]; - $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } = - $self->{'_last_data'}; - } - else { - $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'}; - } - } - else { - $self->debug("unknown nm $nm, ignoring\n"); - } - $self->{'_last_data'} = ''; # remove read data if we are at - # end of an element - $self->{'_result'} = $rc if ( defined $type && $type eq 'result' ); - return $rc; -} - -=head2 element - - Title : element - Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); - Function: Convience method that calls start_element, characters, end_element - Returns : none - Args : Hash ref with the keys 'Name' and 'Data' - - -=cut - -sub element { - my ( $self, $data ) = @_; - $self->start_element($data); - $self->characters($data); - $self->end_element($data); -} - -=head2 characters - - Title : characters - Usage : $eventgenerator->characters($str) - Function: Send a character events - Returns : none - Args : string - - -=cut - -sub characters { - my ( $self, $data ) = @_; - - if ( $self->in_element('hsp') - && $data->{'Name'} =~ /Hsp\_(?:qseq|hseq|csline|midline)/o - && defined $data->{'Data'} ) - { - $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'}; - } - return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o ); - - $self->{'_last_data'} = $data->{'Data'}; -} - -=head2 within_element - - Title : within_element - Usage : if( $eventgenerator->within_element($element) ) {} - Function: Test if we are within a particular element - This is different than 'in' because within can be tested - for a whole block. - Returns : boolean - Args : string element name - - -=cut - -sub within_element { - my ( $self, $name ) = @_; - return 0 - if ( !defined $name - || !defined $self->{'_elements'} - || scalar @{ $self->{'_elements'} } == 0 ); - foreach my $element ( @{ $self->{'_elements'} } ) { - return 1 if ( $element eq $name ); - } - return 0; -} - -=head2 in_element - - Title : in_element - Usage : if( $eventgenerator->in_element($element) ) {} - Function: Test if we are in a particular element - This is different than 'within' because 'in' only - tests its immediete parent. - Returns : boolean - Args : string element name - - -=cut - -sub in_element { - my ( $self, $name ) = @_; - return 0 if !defined $self->{'_elements'}->[0]; - return ( $self->{'_elements'}->[0] eq $name ); -} - -=head2 start_document - - Title : start_document - Usage : $eventgenerator->start_document - Function: Handle a start document event - Returns : none - Args : none - - -=cut - -sub start_document { - my ($self) = @_; - $self->{'_lasttype'} = ''; - $self->{'_values'} = {}; - $self->{'_result'} = undef; - $self->{'_elements'} = []; -} - -=head2 end_document - - Title : end_document - Usage : $eventgenerator->end_document - Function: Handles an end document event - Returns : Bio::Search::Result::ResultI object - Args : none - - -=cut - -sub end_document { - my ($self) = @_; - return $self->{'_result'}; -} - -=head2 result_count - - Title : result_count - Usage : my $count = $searchio->result_count - Function: Returns the number of results we have processed - Returns : integer - Args : none - - -=cut - -sub result_count { - my $self = shift; - return $self->{'_result_count'}; -} - -1; diff --git a/Bio/SearchIO/hmmer3.pm b/Bio/SearchIO/hmmer3.pm deleted file mode 100644 index 0ff6b2c12..000000000 --- a/Bio/SearchIO/hmmer3.pm +++ /dev/null @@ -1,1257 +0,0 @@ -# -# BioPerl module for Bio::SearchIO::hmmer3 -# -# Please direct questions and support issues to -# -# Cared for by Thomas Sharpton -# -# Copyright Thomas Sharpton -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::SearchIO::hmmer3 - -=head1 SYNOPSIS - -use Bio::SearchIO; - -my $searchio = Bio::SearchIO->new( - -format => 'hmmer', - -version => 3, - -file => 'hmmsearch.out' -); - -my $result = $searchio->next_result; -my $hit = $result->next_hit; -print $hit->name, $hit->description, $hit->significance, - $hit->score, "\n"; - -my $hsp = $hit->next_hsp; -print $hsp->start('hit'), $hsp->end('hit'), $hsp->start('query'), - $hsp->end('query'), "\n"; - -=head1 DESCRIPTION - -Code to parse output from hmmsearch, hmmscan, phmmer and nhmmer, compatible with -both version 2 and version 3 of the HMMER package from L. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -L - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via -the web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Thomas Sharpton - -Email thomas.sharpton@gmail.com - -Describe contact details here - -=head1 CONTRIBUTORS - -Additional contributors names and emails here - -briano at bioteam.net - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::SearchIO::hmmer3; - -use strict; -use Data::Dumper; -use Bio::Factory::ObjectFactory; -use Bio::Tools::IUPAC; -use vars qw(%MAPPING %MODEMAP); -use base qw(Bio::SearchIO::hmmer); - -BEGIN { - - # mapping of HMMER items to Bioperl hash keys - %MODEMAP = ( - 'HMMER_Output' => 'result', - 'Hit' => 'hit', - 'Hsp' => 'hsp' - ); - - %MAPPING = ( - 'Hsp_bit-score' => 'HSP-bits', - 'Hsp_score' => 'HSP-score', - 'Hsp_evalue' => 'HSP-evalue', - 'Hsp_query-from' => 'HSP-query_start', - 'Hsp_query-to' => 'HSP-query_end', - 'Hsp_query-strand' => 'HSP-query_strand', - 'Hsp_hit-from' => 'HSP-hit_start', - 'Hsp_hit-to' => 'HSP-hit_end', - 'Hsp_hit-strand' => 'HSP-hit_strand', - 'Hsp_positive' => 'HSP-conserved', - 'Hsp_identity' => 'HSP-identical', - 'Hsp_gaps' => 'HSP-hsp_gaps', - 'Hsp_hitgaps' => 'HSP-hit_gaps', - 'Hsp_querygaps' => 'HSP-query_gaps', - 'Hsp_qseq' => 'HSP-query_seq', - 'Hsp_csline' => 'HSP-cs_seq', - 'Hsp_hseq' => 'HSP-hit_seq', - 'Hsp_midline' => 'HSP-homology_seq', - 'Hsp_pline' => 'HSP-pp_seq', - 'Hsp_align-len' => 'HSP-hsp_length', - 'Hsp_query-frame' => 'HSP-query_frame', - 'Hsp_hit-frame' => 'HSP-hit_frame', - - 'Hit_id' => 'HIT-name', - 'Hit_len' => 'HIT-length', - 'Hit_accession' => 'HIT-accession', - 'Hit_desc' => 'HIT-description', - 'Hit_signif' => 'HIT-significance', - 'Hit_score' => 'HIT-score', - - 'HMMER_program' => 'RESULT-algorithm_name', - 'HMMER_version' => 'RESULT-algorithm_version', - 'HMMER_query-def' => 'RESULT-query_name', - 'HMMER_query-len' => 'RESULT-query_length', - 'HMMER_query-acc' => 'RESULT-query_accession', - 'HMMER_querydesc' => 'RESULT-query_description', - 'HMMER_hmm' => 'RESULT-hmm_name', - 'HMMER_seqfile' => 'RESULT-sequence_file', - 'HMMER_db' => 'RESULT-database_name', - ); -} - -=head2 next_result - - Title : next_result - Usage : my $hit = $searchio->next_result; - Function: Returns the next Result from a search - Returns : Bio::Search::Result::ResultI object - Args : none - -=cut - -sub next_result { - my ($self) = @_; - my ( $buffer, $last, @hit_list, @hsp_list, %hspinfo, %hitinfo, %domaincounter ); - local $/ = "\n"; - - my @ambiguous_nt = keys %Bio::Tools::IUPAC::IUB; - my $ambiguous_nt = join '', @ambiguous_nt; - - my $verbose = $self->verbose; # cache for speed? JES's idea in hmmer.pm - $self->start_document(); - - # This is here to ensure that next_result doesn't produce infinite loop - if ( !defined( $buffer = $self->_readline ) ) { - return undef; - } - else { - $self->_pushback($buffer); - } - - my $hit_counter = 0; # helper variable for non-unique hit IDs - - # Regex goes here for HMMER3 - # Start with hmmsearch processing - while ( defined( $buffer = $self->_readline ) ) { - my $lineorig = $buffer; - chomp $buffer; - - # Grab the program name - if ( $buffer =~ m/^\#\s(\S+)\s\:\:\s/ ) { - my $prog = $1; - - # TO DO: customize the above regex to adapt to other - # program types (hmmscan, etc) - $self->start_element( { 'Name' => 'HMMER_Output' } ); - $self->{'_result_count'}++; #Might need to move to another block - $self->element( - { 'Name' => 'HMMER_program', - 'Data' => uc($prog) - } - ); - } - - # Get the HMMER package version and release date - elsif ( $buffer =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/ ) { - my $version = $1; - my $versiondate = $2; - $self->{'_hmmidline'} = $buffer; - $self->element( - { 'Name' => 'HMMER_version', - 'Data' => $version - } - ); - } - - # Get the query info - elsif ( $buffer =~ /^\#\squery (?:\w+ )?file\:\s+(\S+)/ ) { - if ( $self->{'_reporttype'} eq 'HMMSEARCH' - || $self->{'_reporttype'} eq 'PHMMER' - || $self->{'_reporttype'} eq 'NHMMER' ) - { - $self->{'_hmmfileline'} = $lineorig; - $self->element( - { 'Name' => 'HMMER_hmm', - 'Data' => $1 - } - ); - } - elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) { - $self->{'_hmmseqline'} = $lineorig; - $self->element( - { 'Name' => 'HMMER_seqfile', - 'Data' => $1 - } - ); - } - } - - # If this is a report without alignments - elsif ( $buffer =~ m/^\#\sshow\salignments\sin\soutput/ ) { - $self->{'_alnreport'} = 0; - } - - # Get the database info - elsif ( $buffer =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ) { - - if ( $self->{'_reporttype'} eq 'HMMSEARCH' - || $self->{'_reporttype'} eq 'PHMMER' - || $self->{'_reporttype'} eq 'NHMMER' ) - { - $self->{'_hmmseqline'} = $lineorig; - $self->element( - { 'Name' => 'HMMER_seqfile', - 'Data' => $1 - } - ); - } - elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) { - $self->{'_hmmfileline'} = $lineorig; - $self->element( - { 'Name' => 'HMMER_hmm', - 'Data' => $1 - } - ); - } - } - - # Get query data - elsif ( $buffer =~ s/^Query:\s+// ) { - # For multi-query reports - if ( ( not exists $self->{_values}->{"RESULT-algorithm_name"} - or not exists $self->{_values}->{"RESULT-algorithm_version"} - ) - and exists $self->{_hmmidline} - ) { - my ($version, $versiondate) = $self->{_hmmidline} =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/; - $self->element( - { 'Name' => 'HMMER_program', - 'Data' => $self->{_reporttype} - } - ); - $self->element( - { 'Name' => 'HMMER_version', - 'Data' => $version - } - ); - } - if ( ( not exists $self->{_values}->{"RESULT-hmm_name"} - or not exists $self->{_values}->{"RESULT-sequence_file"} - ) - and ( exists $self->{_hmmfileline} - or exists $self->{_hmmseqline} - ) - ) { - if ( $self->{'_reporttype'} eq 'HMMSEARCH' - or $self->{'_reporttype'} eq 'PHMMER' - or $self->{'_reporttype'} eq 'NHMMER' - ) { - my ($qry_file) = $self->{_hmmfileline} =~ m/^\#\squery (?:\w+ )?file\:\s+(\S+)/; - my ($target_file) = $self->{_hmmseqline} =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/; - $self->element( - { 'Name' => 'HMMER_hmm', - 'Data' => $qry_file - } - ); - $self->element( - { 'Name' => 'HMMER_seqfile', - 'Data' => $target_file - } - ); - } - elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) { - my ($qry_file) = $self->{_hmmseqline} =~ m/^\#\squery \w+ file\:\s+(\S+)/; - my ($target_file) = $self->{_hmmfileline} =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/; - $self->element( - { 'Name' => 'HMMER_seqfile', - 'Data' => $qry_file - } - ); - $self->element( - { 'Name' => 'HMMER_hmm', - 'Data' => $target_file - } - ); - } - } - - unless ($buffer =~ s/\s+\[[L|M]\=(\d+)\]$//) { - warn "Error parsing length for query, offending line $buffer\n"; - exit(0); - } - my $querylen = $1; - $self->element( - { 'Name' => 'HMMER_query-len', - 'Data' => $querylen - } - ); - $self->element( - { 'Name' => 'HMMER_query-def', - 'Data' => $buffer - } - ); - } - - # Get Accession data - elsif ( $buffer =~ s/^Accession:\s+// ) { - $buffer =~ s/\s+$//; - $self->element( - { 'Name' => 'HMMER_query-acc', - 'Data' => $buffer - } - ); - } - - # Get description data - elsif ( $buffer =~ s/^Description:\s+// ) { - $buffer =~ s/\s+$//; - $self->element( - { 'Name' => 'HMMER_querydesc', - 'Data' => $buffer - } - ); - } - - # hmmsearch, nhmmer, and hmmscan-specific formatting here - elsif ( - defined $self->{'_reporttype'} - && ( $self->{'_reporttype'} eq 'HMMSEARCH' - || $self->{'_reporttype'} eq 'HMMSCAN' - || $self->{'_reporttype'} eq 'PHMMER' - || $self->{'_reporttype'} eq 'NHMMER' ) - ) - { - # Complete sequence table data above inclusion threshold, - # hmmsearch or hmmscan - if ( $buffer =~ m/Scores for complete sequence/ ) { - while ( defined( $buffer = $self->_readline ) ) { - if ( $buffer =~ m/inclusion threshold/ - || $buffer =~ m/Domain( and alignment)? annotation for each/ - || $buffer =~ m/\[No hits detected/ - || $buffer =~ m!^//! ) - { - $self->_pushback($buffer); - last; - } - elsif ( $buffer =~ m/^\s+E-value\s+score/ - || $buffer =~ m/\-\-\-/ - || $buffer =~ m/^$/ - ) - { - next; - } - - # Grab table data - $hit_counter++; - my ($eval_full, $score_full, $bias_full, $eval_best, - $score_best, $bias_best, $exp, $n, - $hitid, $desc, @hitline - ); - @hitline = split( " ", $buffer ); - $eval_full = shift @hitline; - $score_full = shift @hitline; - $bias_full = shift @hitline; - $eval_best = shift @hitline; - $score_best = shift @hitline; - $bias_best = shift @hitline; - $exp = shift @hitline; - $n = shift @hitline; - $hitid = shift @hitline; - $desc = join " ", @hitline; - - $desc = '' if ( !defined($desc) ); - - push @hit_list, - [ $hitid, $desc, $eval_full, $score_full ]; - $hitinfo{"$hitid.$hit_counter"} = $#hit_list; - } - } - - # nhmmer - elsif ( $buffer =~ /Scores for complete hits/ ) { - while ( defined( $buffer = $self->_readline ) ) { - if ( $buffer =~ /inclusion threshold/ - || $buffer =~ /Annotation for each hit/ - || $buffer =~ /\[No hits detected/ - || $buffer =~ m!^//! ) - { - $self->_pushback($buffer); - last; - } - elsif ( $buffer =~ m/^\s+E-value\s+score/ - || $buffer =~ m/\-\-\-/ - || $buffer =~ m/^$/ - ) - { - next; - } - - # Grab table data - $hit_counter++; - my ($eval, $score, $bias, $hitid, - $start, $end, $desc, @hitline - ); - @hitline = split( " ", $buffer ); - $eval = shift @hitline; - $score = shift @hitline; - $bias = shift @hitline; - $hitid = shift @hitline; - $start = shift @hitline; - $end = shift @hitline; - $desc = join ' ', @hitline; - - $desc = '' if ( !defined($desc) ); - - push @hit_list, [ $hitid, $desc, $eval, $score ]; - $hitinfo{"$hitid.$hit_counter"} = $#hit_list; - } - } - - # Complete sequence table data below inclusion threshold - elsif ( $buffer =~ /inclusion threshold/ ) { - while ( defined( $buffer = $self->_readline ) ) { - if ( $buffer =~ /Domain( and alignment)? annotation for each/ - || $buffer =~ /Internal pipeline statistics summary/ - || $buffer =~ /Annotation for each hit\s+\(and alignments\)/ - ) - { - $self->_pushback($buffer); - last; - } - elsif ( $buffer =~ m/inclusion threshold/ - || $buffer =~ m/^$/ - ) - { - next; - } - - # Grab table data - $hit_counter++; - my ($eval_full, $score_full, $bias_full, $eval_best, - $score_best, $bias_best, $exp, $n, - $hitid, $desc, @hitline - ); - @hitline = split( " ", $buffer ); - $eval_full = shift @hitline; - $score_full = shift @hitline; - $bias_full = shift @hitline; - $eval_best = shift @hitline; - $score_best = shift @hitline; - $bias_best = shift @hitline; - $exp = shift @hitline; - $n = shift @hitline; - $hitid = shift @hitline; - $desc = join " ", @hitline; - - $desc = '' if ( !defined($desc) ); - - push @hit_list, - [ $hitid, $desc, $eval_full, $score_full ]; - $hitinfo{"$hitid.$hit_counter"} = $#hit_list; - } - } - - # Domain annotation for each sequence table data, - # for hmmscan, hmmsearch & nhmmer - elsif ( $buffer =~ /Domain( and alignment)? annotation for each/ - or $buffer =~ /Annotation for each hit\s+\(and alignments\)/ - ) { - @hsp_list = (); # Here for multi-query reports - my $name; - my $annot_counter = 0; - - while ( defined( $buffer = $self->_readline ) ) { - if ( $buffer =~ /\[No targets detected/ - || $buffer =~ /Internal pipeline statistics/ ) - { - $self->_pushback($buffer); - last; - } - - if ( $buffer =~ m/^\>\>\s(\S*)\s+(.*)/ ) { - $name = $1; - my $desc = $2; - $annot_counter++; - $domaincounter{"$name.$annot_counter"} = 0; - - # The Hit Description from the Scores table can be truncated if - # its too long, so use the '>>' line description when its longer - if (length $hit_list[ - $hitinfo{"$name.$annot_counter"} - ] - [1] < length $desc - ) { - $hit_list[ $hitinfo{"$name.$annot_counter"} ][1] = $desc; - } - - while ( defined( $buffer = $self->_readline ) ) { - if ( $buffer =~ m/Internal pipeline statistics/ - || $buffer =~ m/Alignments for each domain/ - || $buffer =~ m/^\s+Alignment:/ - || $buffer =~ m/^\>\>/ ) - { - $self->_pushback($buffer); - last; - } - elsif ( $buffer =~ m/^\s+score\s+bias/ - || $buffer =~ m/^\s+\#\s+score/ - || $buffer =~ m/^\s+------\s+/ - || $buffer =~ m/^\s\-\-\-\s+/ - || $buffer =~ m/^$/ - ) - { - next; - } - - # Grab hsp data from table, push into @hsp; - if ($self->{'_reporttype'} =~ m/(?:HMMSCAN|HMMSEARCH|PHMMER|NHMMER)/) { - my ( $domain_num, $score, $bias, - $ceval, $ieval, - $hmm_start, $hmm_stop, $hmm_cov, - $seq_start, $seq_stop, $seq_cov, - $env_start, $env_stop, $env_cov, - $hitlength, $acc ); - my @vals; - - if ( # HMMSCAN & HMMSEARCH - ( $domain_num, $score, $bias, - $ceval, $ieval, - $hmm_start, $hmm_stop, $hmm_cov, - $seq_start, $seq_stop, $seq_cov, - $env_start, $env_stop, $env_cov, - $acc ) - = ( $buffer =~ - m|^\s+(\d+)\s\!*\?*\s+ # domain number - (\S+)\s+(\S+)\s+ # score, bias - (\S+)\s+(\S+)\s+ # c-eval, i-eval - (\d+)\s+(\d+)\s+(\S+)\s+ # hmm start, stop, coverage - (\d+)\s+(\d+)\s+(\S+)\s+ # seq start, stop, coverage - (\d+)\s+(\d+)\s+(\S+)\s+ # env start, stop, coverage - (\S+) # posterior probability accuracy - \s*$|ox - ) - ) { - # Values assigned when IF succeeded - - # Try to get the Hit length from the alignment information - $hitlength = 0; - if ($self->{'_reporttype'} eq 'HMMSEARCH' || $self->{'_reporttype'} eq 'PHMMER') { - # For Hmmsearch, if seq coverage ends in ']' it means that the alignment - # runs until the end. In that case add the END coordinate to @hitinfo - # to use it as Hit Length - if ( $seq_cov =~ m/\]$/ ) { - $hitlength = $seq_stop; - } - } - elsif ($self->{'_reporttype'} eq 'HMMSCAN') { - # For Hmmscan, if hmm coverage ends in ']' it means that the alignment - # runs until the end. In that case add the END coordinate to @hitinfo - # to use it as Hit Length - if ( $hmm_cov =~ m/\]$/ ) { - $hitlength = $hmm_stop; - } - } - } - elsif ( # NHMMER - ( $score, $bias, $ceval, - $hmm_start, $hmm_stop, $hmm_cov, - $seq_start, $seq_stop, $seq_cov, - $env_start, $env_stop, $env_cov, - $hitlength, $acc ) - = ( $buffer =~ - m|^\s+[!?]\s+ - (\S+)\s+(\S+)\s+(\S+)\s+ # score, bias, evalue - (\d+)\s+(\d+)\s+(\S+)\s+ # hmm start, stop, coverage - (\d+)\s+(\d+)\s+(\S+)\s+ # seq start, stop, coverage - (\d+)\s+(\d+)\s+(\S+)\s+ # env start, stop, coverage - (\d+)\s+(\S+) # target length, pp accuracy - .*$|ox - ) - ) { - # Values assigned when IF succeeded - } - else { - print STDERR "Missed this line: $buffer\n"; - next; - } - - my $info = $hit_list[ $hitinfo{"$name.$annot_counter"} ]; - if ( !defined $info ) { - $self->warn( - "Incomplete information: can't find HSP $name in list of hits\n" - ); - next; - } - - $domaincounter{"$name.$annot_counter"}++; - my $hsp_key - = $name . "_" . $domaincounter{"$name.$annot_counter"}; - - # Keep it simple for now. let's customize later - @vals = ( - $hmm_start, $hmm_stop, - $seq_start, $seq_stop, - $score, $ceval, - $hitlength, '', - '', '', - '', '' - ); - push @hsp_list, [ $name, @vals ]; - $hspinfo{"$hsp_key.$annot_counter"} = $#hsp_list; - } - } - } - elsif ( $buffer =~ /Alignment(?:s for each domain)?:/ ) { - #line counter - my $count = 0; - - # There's an optional block, so we sometimes need to - # count to 3, and sometimes to 4. - my $max_count = 3; - my $lastdomain; - my $hsp; - my ( $csline, $hline, $midline, $qline, $pline ); - - # To avoid deleting whitespaces from the homology line, - # keep track of the position and length of the alignment - # in each individual hline/qline, to take them as reference - # and use them in the homology line - my $align_offset = 0; - my $align_length = 0; - - while ( defined( $buffer = $self->_readline ) ) { - if ( $buffer =~ m/^\>\>/ - || $buffer =~ m/Internal pipeline statistics/ ) - { - $self->_pushback($buffer); - last; - } - elsif ($buffer =~ m/^$/ ) - { - # Reset these scalars on empty lines to help - # distinguish between the consensus structure/reference - # tracks (CS|RF lines) and homology lines ending in - # CS or RF aminoacids - $align_offset = 0; - $align_length = 0; - next; - } - - if ( $buffer =~ /\s\s\=\=\sdomain\s(\d+)\s+/ - or $buffer =~ /\s\sscore:\s\S+\s+/ - ) { - my $domainnum = $1 || 1; - $count = 0; - my $key = $name . "_" . $domainnum; - $hsp = $hsp_list[ $hspinfo{"$key.$annot_counter"} ]; - $csline = $$hsp[-5]; - $hline = $$hsp[-4]; - $midline = $$hsp[-3]; - $qline = $$hsp[-2]; - $pline = $$hsp[-1]; - $lastdomain = $name; - } - # Consensus Structure or Reference track, some reports - # don't have it. Since it appears on top of the alignment, - # the reset of $align_length to 0 between alignment blocks - # avoid confusing homology lines with it. - elsif ( $buffer =~ m/\s+\S+\s(?:CS|RF)$/ and $align_length == 0 ) { - my @data = split( " ", $buffer ); - $csline .= $data[-2]; - $max_count++; - $count++; - next; - } - # Query line and Hit line swaps positions - # depending of the program - elsif ( $count == $max_count - 3 - or $count == $max_count - 1 - ) { - my @data = split( " ", $buffer ); - - my $line_offset = 0; - # Use \Q\E on match to avoid errors on alignments - # that include stop codons (*) - while ($buffer =~ m/\Q$data[-2]\E/g) { - $line_offset = pos $buffer; - } - if ($line_offset != 0) { - $align_length = length $data[-2]; - $align_offset = $line_offset - $align_length; - } - - if ($self->{'_reporttype'} eq 'HMMSCAN') { - # hit sequence - $hline .= $data[-2] if ($count == $max_count - 3); - # query sequence - $qline .= $data[-2] if ($count == $max_count - 1); - } - else { # hmmsearch & nhmmer - # hit sequence - $hline .= $data[-2] if ($count == $max_count - 1); - # query sequence - $qline .= $data[-2] if ($count == $max_count - 3); - } - - $count++; - next; - } - # conservation track - # storage isn't quite right - need to remove - # leading/lagging whitespace while preserving - # gap data (latter isn't done, former is) - elsif ( $count == $max_count - 2 ) { - $midline .= substr $buffer, $align_offset, $align_length; - $count++; - next; - } - # posterior probability track - elsif ( $count == $max_count ) { - my @data = split(" ", $buffer); - $pline .= $data[-2]; - $count = 0; - $max_count = 3; - $$hsp[-5] = $csline; - $$hsp[-4] = $hline; - $$hsp[-3] = $midline; - $$hsp[-2] = $qline; - $$hsp[-1] = $pline; - next; - } - else { - print STDERR "Missed this line: $buffer\n"; - } - } - } - } - } - - # End of report - elsif ( $buffer =~ m/Internal pipeline statistics/ || $buffer =~ m!^//! ) { - # If within hit, hsp close; - if ( $self->within_element('hit') ) { - if ( $self->within_element('hsp') ) { - $self->end_element( { 'Name' => 'Hsp' } ); - } - $self->end_element( { 'Name' => 'Hit' } ); - } - - # Grab summary statistics of run - while ( defined( $buffer = $self->_readline ) ) { - last if ( $buffer =~ m/^\/\/$/ ); - } - - # Do a lot of processing of hits and hsps here - my $index = 0; - while ( my $hit = shift @hit_list ) { - $index++; - my $hit_name = shift @$hit; - my $hit_desc = shift @$hit; - my $hit_signif = shift @$hit; - my $hit_score = shift @$hit; - my $num_domains = $domaincounter{"$hit_name.$index"} || 0; - - $self->start_element( { 'Name' => 'Hit' } ); - $self->element( - { 'Name' => 'Hit_id', - 'Data' => $hit_name - } - ); - $self->element( - { 'Name' => 'Hit_desc', - 'Data' => $hit_desc - } - ); - $self->element( - { 'Name' => 'Hit_signif', - 'Data' => $hit_signif - } - ); - $self->element( - { 'Name' => 'Hit_score', - 'Data' => $hit_score - } - ); - - for my $i ( 1 .. $num_domains ) { - my $key = $hit_name . "_" . $i; - my $hsp = $hsp_list[ $hspinfo{"$key.$index"} ]; - if ( defined $hsp ) { - my $hsp_name = shift @$hsp; - $self->start_element( { 'Name' => 'Hsp' } ); - # Since HMMER doesn't print some data explicitly, - # calculate it from the homology line (midline) - if ($$hsp[-3] ne '') { - my $length = length $$hsp[-3]; - my $identical = ($$hsp[-3] =~ tr/a-zA-Z//); - my $positive = ($$hsp[-3] =~ tr/+//) + $identical; - $self->element( - { - 'Name' => 'Hsp_align-len', - 'Data' => $length - } - ); - $self->element( - { 'Name' => 'Hsp_identity', - 'Data' => $identical - } - ); - $self->element( - { 'Name' => 'Hsp_positive', - 'Data' => $positive - } - ); - } - else { - $self->element( - { 'Name' => 'Hsp_identity', - 'Data' => 0 - } - ); - $self->element( - { 'Name' => 'Hsp_positive', - 'Data' => 0 - } - ); - } - if ( $self->{'_reporttype'} eq 'HMMSCAN' ) { - $self->element( - { 'Name' => 'Hsp_hit-from', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_hit-to', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_query-from', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_query-to', - 'Data' => shift @$hsp - } - ); - } - elsif ( $self->{'_reporttype'} eq 'HMMSEARCH' - or $self->{'_reporttype'} eq 'NHMMER' - ) { - $self->element( - { 'Name' => 'Hsp_query-from', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_query-to', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_hit-from', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_hit-to', - 'Data' => shift @$hsp - } - ); - } - $self->element( - { 'Name' => 'Hsp_score', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_evalue', - 'Data' => shift @$hsp - } - ); - my $hitlength = shift @$hsp; - if ( $hitlength != 0 ) { - $self->element( - { 'Name' => 'Hit_len', - 'Data' => $hitlength - } - ); - } - $self->element( - { 'Name' => 'Hsp_csline', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_hseq', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_midline', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_qseq', - 'Data' => shift @$hsp - } - ); - $self->element( - { 'Name' => 'Hsp_pline', - 'Data' => shift @$hsp - } - ); - - # Only nhmmer output has strand information - if ( $self->{'_reporttype'} eq 'NHMMER' ) { - my $hstart = $self->get_from_element('HSP-hit_start'); - my $hend = $self->get_from_element('HSP-hit_end'); - my $hstrand = ( $hstart < $hend ) ? 1 : -1; - - my $qstart = $self->get_from_element('HSP-query_start'); - my $qend = $self->get_from_element('HSP-query_end'); - my $qstrand = ( $qstart < $qend ) ? 1 : -1; - - $self->element( - { 'Name' => 'Hsp_query-strand', - 'Data' => $qstrand - } - ); - $self->element( - { 'Name' => 'Hsp_hit-strand', - 'Data' => $hstrand - } - ); - } - - $self->end_element( { 'Name' => 'Hsp' } ); - } - } - $self->end_element( { 'Name' => 'Hit' } ); - } - @hit_list = (); - %hitinfo = (); - last; - } - } - else { - print STDERR "Missed this line: $buffer\n"; - $self->debug($buffer); - } - $last = $buffer; - } - $self->end_element( { 'Name' => 'HMMER_Output' } ); - my $result = $self->end_document(); - return $result; -} - -=head2 start_element - - Title : start_element - Usage : $eventgenerator->start_element - Function: Handles a start event - Returns : none - Args : hashref with at least 2 keys 'Data' and 'Name' - -=cut - -sub start_element { - - my ( $self, $data ) = @_; - - # we currently don't care about attributes - my $nm = $data->{'Name'}; - my $type = $MODEMAP{$nm}; - if ($type) { - if ( $self->_eventHandler->will_handle($type) ) { - my $func = sprintf( "start_%s", lc $type ); - $self->_eventHandler->$func( $data->{'Attributes'} ); - } - unshift @{ $self->{'_elements'} }, $type; - } - if ( defined $type - && $type eq 'result' ) - { - $self->{'_values'} = {}; - $self->{'_result'} = undef; - } -} - -=head2 end_element - - Title : end_element - Usage : $eventgeneartor->end_element - Function: Handles and end element event - Returns : none - Args : hashref with at least 2 keys 'Data' and 'Name' - -=cut - -sub end_element { - - my ( $self, $data ) = @_; - my $nm = $data->{'Name'}; - my $type = $MODEMAP{$nm}; - my $rc; - - if ( $nm eq 'HMMER_program' ) { - if ( $self->{'_last_data'} =~ /([NP]?HMM\S+)/i ) { - $self->{'_reporttype'} = uc $1; - } - } - - # Hsp are sort of weird, in that they end when another - # object begins so have to detect this in end_element for now - if ( $nm eq 'Hsp' ) { - foreach my $line (qw(Hsp_csline Hsp_qseq Hsp_midline Hsp_hseq Hsp_pline)) { - my $data = $self->{'_last_hspdata'}->{$line}; - if ( $data && $line eq 'Hsp_hseq' ) { - - # replace hmm '.' gap symbol by '-' - $data =~ s/\./-/g; - } - $self->element( - { 'Name' => $line, - 'Data' => $data - } - ); - } - $self->{'_last_hspdata'} = {}; - } - if ($type) { - if ( $self->_eventHandler->will_handle($type) ) { - my $func = sprintf( "end_%s", lc $type ); - $rc = $self->_eventHandler->$func( $self->{'_reporttype'}, - $self->{'_values'} ); - } - my $lastelem = shift @{ $self->{'_elements'} }; - - # Flush corresponding values from the {_values} buffer - my $name = uc $type; - foreach my $key (keys %{ $self->{_values} }) { - delete $self->{_values}->{$key} if ($key =~ m/^$name-/); - } - } - elsif ( $MAPPING{$nm} ) { - if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { - my $key = ( keys %{ $MAPPING{$nm} } )[0]; - $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } - = $self->{'_last_data'}; - } - else { - $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'}; - - # print "lastdata is " . $self->{'_last_data'} . "\n"; - } - } - else { - $self->debug("unknown nm $nm, ignoring\n"); - } - $self->{'_last_data'} = ''; # remove read data if we are at - # end of an element - $self->{'_result'} = $rc if ( defined $type && $type eq 'result' ); - return $rc; -} - -=head2 element - - Title : element - Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); - Function: Convenience method that calls start_element, characters, end_element - Returns : none - Args : Hash ref with the keys 'Name' and 'Data' - -=cut - -sub element { - my ( $self, $data ) = @_; - $self->start_element($data); - $self->characters($data); - $self->end_element($data); -} - -=head2 get_from_element - - Title : get_from_element - Usage : $self->get_from_element('HSP-hit_start'); - Function: Convenience method to retrieve data from '_values' hash - Returns : string - Args : key - -=cut - -sub get_from_element { - my ($self,$key) = @_; - my $values = $self->{_values}; - $values->{$key}; -} - -=head2 characters - - Title : characters - Usage : $eventgenerator->characters($str) - Function: Send a character events - Returns : none - Args : string - -=cut - -sub characters { - my ( $self, $data ) = @_; - - if ( $self->in_element('hsp') - && $data->{'Name'} =~ /Hsp\_(?:qseq|hseq|csline|pline|midline)/o - && defined $data->{'Data'} ) - { - $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'}; - } - return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o ); - - $self->{'_last_data'} = $data->{'Data'}; -} - -=head2 within_element - - Title : within_element - Usage : if( $eventgenerator->within_element( $element ) ) {} - Function: Test if we are within a particular element - This is different than 'in' because within can be tested for - a whole block - Returns : boolean - Args : string element name - -=cut - -sub within_element { - my ( $self, $name ) = @_; - return 0 - if ( !defined $name - || !defined $self->{'_elements'} - || scalar @{ $self->{'_elements'} } == 0 ); - foreach my $element ( @{ $self->{'_elements'} } ) { - return 1 if ( $element eq $name ); - } - return 0; -} - -=head2 in_element - - Title : in_element - Usage : if( $eventgenerator->in_element( $element ) ) {} - Function: Test if we are in a particular element - This is different than 'within' because 'in' only - tests its immediate parent - Returns : boolean - Args : string element name - -=cut - -sub in_element { - my ( $self, $name ) = @_; - return 0 if !defined $self->{'_elements'}->[0]; - return ( $self->{'_elements'}->[0] eq $name ); -} - -=head2 start_document - - Title : start_document - Usage : $eventgenerator->start_document - Function: Handle a start document event - Returns : none - Args : none - -=cut - -sub start_document { - my ($self) = @_; - $self->{'_lasttype'} = ''; - $self->{'_values'} = {}; - $self->{'_result'} = undef; - $self->{'_elements'} = []; -} - -=head2 end_document - - Title : end_document - Usage : $eventgenerator->end_document - Function: Handles an end document event - Returns : Bio::Search::Result::ResultI object - Args : none - -=cut - -sub end_document { - my ($self) = @_; - return $self->{'_result'}; -} - -=head2 result_count - - Title : result_count - Usage : my $count = $searchio->result_count - Function: Returns the number of results processed - Returns : integer - Args : none - -=cut - -sub result_count { - my $self = shift; - return $self->{'_result_count'}; -} - -1; diff --git a/Bio/SearchIO/hmmer_pull.pm b/Bio/SearchIO/hmmer_pull.pm deleted file mode 100755 index 812243094..000000000 --- a/Bio/SearchIO/hmmer_pull.pm +++ /dev/null @@ -1,283 +0,0 @@ -# -# BioPerl module for Bio::SearchIO::hmmer_pull -# -# Please direct questions and support issues to -# -# Cared for by Sendu Bala -# -# Copyright Sendu Bala -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::SearchIO::hmmer_pull - A parser for HMMER output - -=head1 SYNOPSIS - - # do not use this class directly it is available through Bio::SearchIO - use Bio::SearchIO; - my $in = Bio::SearchIO->new(-format => 'hmmer_pull', - -file => 't/data/hmmpfam.bigout'); - while (my $result = $in->next_result) { - # this is a Bio::Search::Result::HmmpfamResult object - print $result->query_name(), " for HMM ", $result->hmm_name(), "\n"; - while (my $hit = $result->next_hit) { - print $hit->name(), "\n"; - while (my $hsp = $hit->next_hsp) { - print "length is ", $hsp->length(), "\n"; - } - } - } - -=head1 DESCRIPTION - -This object implements a pull-parser for HMMER output. It is fast since it -only does work on request (hence 'pull'). - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Sendu Bala - -Email bix@sendu.me.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::SearchIO::hmmer_pull; - -use strict; - - -use base qw(Bio::SearchIO Bio::PullParserI); - -=head2 new - - Title : new - Usage : my $obj = Bio::SearchIO::hmmer_pull->new(); - Function: Builds a new Bio::SearchIO::hmmer_pull object - Returns : Bio::SearchIO::hmmer_pull - Args : -fh/-file => HMMER output filename - -format => 'hmmer_pull' - -evalue => float or scientific notation number to be used - as an evalue cutoff for hits - -score => integer or scientific notation number to be used - as a score value cutoff for hits - -hsps => integer minimum number of hsps (domains) a hit must have - -piped_behaviour => 'temp_file'|'memory'|'sequential_read' - - -piped_behaviour defines what the parser should do if the input is - an unseekable filehandle (eg. piped input), see - Bio::PullParserI::chunk for details. Default is 'sequential_read'. - -=cut - -sub _initialize { - my ($self, @args) = @_; - - # don't do normal SearchIO initialization - - my ($writer, $file, $fh, $piped_behaviour, $evalue, $score, $hsps) = - $self->_rearrange([qw(WRITER - FILE FH - PIPED_BEHAVIOUR - EVALUE - SCORE - HSPS)], @args); - $self->writer($writer) if $writer; - - $self->_fields( { ( header => undef, - algorithm => undef, - algorithm_version => undef, - algorithm_reference => '', - hmm_file => undef, - hmm_name => undef, - sequence_file => undef, - sequence_database => undef, - database_name => undef, - database_letters => undef, - database_entries => undef, - next_result => undef, - evalue_cutoff => '[unset]', - score_cutoff => '[unset]', - hsps_cutoff => '[unset]' ) } ); - - $self->_fields->{evalue_cutoff} = $evalue if $evalue; - $self->_fields->{score_cutoff} = $score if $score; - $self->_fields->{hsps_cutoff} = $hsps if $hsps; - - $self->_dependencies( { ( algorithm => 'header', - algorithm_version => 'header', - hmm_file => 'header', - hmm_name => 'header', - sequence_file => 'header', - sequence_database => 'header' ) } ); - - $self->chunk($file || $fh || $self->throw("-file or -fh must be supplied"), - -piped_behaviour => $piped_behaviour || 'sequential_read'); -} - -sub _discover_header { - my $self = shift; - $self->_chunk_seek(0); - my $header = $self->_get_chunk_by_nol(8); - $self->{_after_header} = $self->_chunk_tell; - - my ($algo) = $header =~ /^(hmm\S+) - search/m; - $self->_fields->{algorithm} = uc $algo; - - ($self->_fields->{algorithm_version}) = $header =~ /^HMMER\s+?(\S+)/m; - - ($self->_fields->{hmm_file}) = $header =~ /^HMM file:\s.+?(\S+)$/m; - $self->_fields->{hmm_name} = $self->_fields->{hmm_file}; - - ($self->_fields->{sequence_file}) = $header =~ /^Sequence (?:file|database):\s.+?(\S+)$/m; - $self->_fields->{sequence_database} = $self->_fields->{sequence_file}; - - $self->_fields->{header} = 1; -} - -sub _discover_database_name { - my $self = shift; - my $type = $self->get_field('algorithm'); - - if ($type eq 'HMMPFAM') { - $self->_fields->{database_name} = $self->get_field('hmm_file'); - } - elsif ($type eq 'HMMSEARCH') { - $self->_fields->{database_name} = $self->get_field('sequence_file'); - } -} - -sub _discover_next_result { - my $self = shift; - my $type = $self->get_field('algorithm'); # also sets _after_header if not set - - if ($type eq 'HMMPFAM') { - use Bio::Search::Result::HmmpfamResult; - - unless ($self->_sequential) { - $self->_chunk_seek($self->{_end_of_previous_result} || $self->{_after_header}); - - my ($start, $end) = $self->_find_chunk_by_end("//\n"); - return if $start == $end; - $self->_fields->{next_result} = Bio::Search::Result::HmmpfamResult->new(-chunk => [($self->chunk, $start, $end)], - -parent => $self); - - $self->{_end_of_previous_result} = $end; - } - else { - # deliberatly don't cache these, which means rewind won't work; - # if we cached we may as well have used 'memory' option to - # -piped_behaviour - my $chunk = $self->_get_chunk_by_end("//\n"); - $chunk || return; - $self->_fields->{next_result} = Bio::Search::Result::HmmpfamResult->new(-chunk => [$chunk], - -parent => $self); - } - } - elsif ($type eq 'HMMSEARCH') { - $self->throw("Can't handle hmmsearch yet\n"); - } - else { - $self->throw("Unknown report type"); - } -} - -=head2 next_result - - Title : next_result - Usage : my $hit = $searchio->next_result; - Function: Returns the next Result from a search - Returns : Bio::Search::Result::ResultI object - Args : none - -=cut - -sub next_result { - my $self = shift; - my $result = $self->get_field('next_result') || return; - - undef $self->_fields->{next_result}; - - $self->{'_result_count'}++; - return $result; -} - -=head2 result_count - - Title : result_count - Usage : my $count = $searchio->result_count - Function: Returns the number of results we have processed. - Returns : integer - Args : none - -=cut - -sub result_count { - my $self = shift; - return $self->{'_result_count'}; -} - -=head2 rewind - - Title : rewind - Usage : $searchio->rewind; - Function: Allow one to reset the Result iterator to the beginning, so that - next_result() will subsequently return the first result and so on. - - NB: result objects are not cached, so you will get new result objects - each time you rewind. Also, note that result_count() counts the - number of times you have called next_result(), so will not be able - tell you how many results there were in the file if you use rewind(). - - Returns : n/a - Args : none - -=cut - -sub rewind { - my $self = shift; - if ($self->_sequential) { - $self->warn("rewind has no effect on piped input when you have chosen 'sequential_read' mode"); - } - delete $self->{_end_of_previous_result}; -} - -1; diff --git a/Bio/Tools/HMMER/Domain.pm b/Bio/Tools/HMMER/Domain.pm deleted file mode 100644 index 0eb9e83a6..000000000 --- a/Bio/Tools/HMMER/Domain.pm +++ /dev/null @@ -1,339 +0,0 @@ -# -# BioPerl module for Bio::Tools::HMMER::Domain -# -# Please direct questions and support issues to -# -# Cared for by Ewan Birney -# -# Copyright Ewan Birney -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Tools::HMMER::Domain - One particular domain hit from HMMER - -=head1 SYNOPSIS - -Read the Bio::Tools::HMMER::Results docs - -=head1 DESCRIPTION - -A particular domain score. We reuse the Homol SeqFeature system -here, so this inherits off Homol SeqFeature. As this code -originally came from a separate project, there are some backward -compatibility stuff provided to keep this working with old code. - -Don't forget this inherits off Bio::SeqFeature, so all your usual -nice start/end/score stuff is ready for use. - -=head1 CONTACT - -Ewan Birney, birney@ebi.ac.uk - -=head1 CONTRIBUTORS - -Jason Stajich, jason@bioperl.org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - -#' -package Bio::Tools::HMMER::Domain; - -use Bio::SeqFeature::Generic; -use strict; - - -use base qw(Bio::SeqFeature::FeaturePair); - -sub new { - my($class,@args) = @_; - my $self = $class->SUPER::new(@args); - - $self->{'alignlines'} = []; - - my $hmmf1 = Bio::SeqFeature::Generic->new(@args); - my $hmmf2 = Bio::SeqFeature::Generic->new(@args); - - $self->feature1($hmmf1); - $self->feature2($hmmf2); - - return $self; -} - -=head2 add_alignment_line - - Title : add_alignment_line - Usage : $domain->add_alignment_line($line_from_hmmer_output); - Function: add an alignment line to this Domain object - Returns : Nothing - Args : scalar - - Adds an alignment line, mainly for storing the HMMER alignments -as flat text which can be reguritated. You're right. This is *not -nice* and not the right way to do it. C'est la vie. - -=cut - -sub add_alignment_line { - my $self = shift; - my $line = shift; - push(@{$self->{'alignlines'}},$line); -} - -=head2 each_alignment_line - - Title : each_alignment_line - Usage : foreach $line ( $domain->each_alignment_line ) - Function: reguritates the alignment lines as they were fed in. - only useful realistically for printing. - Example : - Returns : - Args : None - - -=cut - -sub each_alignment_line { - my $self = shift; - return @{$self->{'alignlines'}}; -} - -=head2 get_nse - - Title : get_nse - Usage : $domain->get_nse() - Function: Provides a seqname/start-end format, useful - for unique keys. nse stands for name-start-end - It is used a lot in Pfam - Example : - Returns : A string - Args : Optional separator 1 and separator 2 (default / and -) - - -=cut - - - -sub get_nse { - my $self = shift; - my $sep1 = shift; - my $sep2 = shift; - - if( !defined $sep2 ) { - $sep2 = "-"; - } - if( !defined $sep1 ) { - $sep1 = "/"; - } - - return sprintf("%s%s%d%s%d",$self->seq_id,$sep1,$self->start,$sep2,$self->end); -} - - -# =head2 start_seq - -# Title : start_seq -# Usage : Backward compatibility with old HMMER modules. -# should use $domain->start -# Function: -# Example : -# Returns : -# Args : - -# =cut - -sub start_seq { - my $self = shift; - my $start = shift; - - $self->warn("Using old domain->start_seq. Should use domain->start"); - return $self->start($start); -} - -# =head2 end_seq - -# Title : end_seq -# Usage : Backward compatibility with old HMMER modules. -# should use $domain->end -# Function: -# Example : -# Returns : -# Args : - -# =cut - -sub end_seq { - my $self = shift; - my $end = shift; - - $self->warn("Using old domain->end_seq. Should use domain->end"); - return $self->end($end); -} - -# =head2 start_hmm - -# Title : start_hmm -# Usage : Backward compatibility with old HMMER modules, and -# for convience. Equivalent to $self->homol_SeqFeature->start -# Function: -# Example : -# Returns : -# Args : - -# =cut - -sub start_hmm { - my $self = shift; - my $start = shift; - $self->warn("Using old domain->start_hmm. Should use domain->hstart"); - return $self->hstart($start); -} - -# =head2 end_hmm - -# Title : end_hmm -# Usage : Backward compatibility with old HMMER modules, and -# for convience. Equivalent to $self->homol_SeqFeature->start -# Function: -# Example : -# Returns : -# Args : - -# =cut - -sub end_hmm { - my $self = shift; - my $end = shift; - - $self->warn("Using old domain->end_hmm. Should use domain->hend"); - return $self->hend($end); -} - -=head2 hmmacc - - Title : hmmacc - Usage : $domain->hmmacc($newacc) - Function: set get for HMM accession number. This is placed in the homol - feature of the HMM - Example : - Returns : - Args : - - -=cut - -sub hmmacc{ - my ($self,$acc) = @_; - if( defined $acc ) { - $self->feature2->add_tag_value('accession',$acc); - } - my @vals = $self->feature2->each_tag_value('accession'); - return shift @vals; -} - -=head2 hmmname - - Title : hmmname - Usage : $domain->hmmname($newname) - Function: set get for HMM accession number. This is placed in the homol - feature of the HMM - Example : - Returns : - Args : - -=cut - -sub hmmname { - return shift->hseq_id(@_); -} - -=head2 bits - - Title : bits - Usage : - Function: backward compatibility. Same as score - Example : - Returns : - Args : - -=cut - -sub bits{ - return shift->score(@_); -} - -=head2 evalue - - Title : evalue - Usage : - Function: $domain->evalue($value); - Example : - Returns : - Args : - -=cut - -sub evalue{ - return shift->_tag_value('evalue',@_); -} - -=head2 seqbits - - Title : seqbits - Usage : - Function: $domain->seqbits($value); - Example : - Returns : - Args : - -=cut - -sub seqbits { - return shift->_tag_value('seqbits',@_); -} - -=head2 seq_range - - Title : seq_range - Usage : - Function: Throws an exception to catch scripts which need to upgrade - Example : - Returns : - Args : - -=cut - -sub seq_range{ - my ($self,@args) = @_; - - $self->throw("You have accessed an old method. Please recode your script to the new bioperl HMMER module"); -} - -=head2 hmm_range - - Title : hmm_range - Usage : - Function: Throws an exception to catch scripts which need to upgrade - Example : - Returns : - Args : - - -=cut - -sub hmm_range{ - my ($self,@args) = @_; - - $self->throw("You have accessed an old method. Please recode your script to the new bioperl HMMER module"); -} - -1; # says use was ok -__END__ diff --git a/Bio/Tools/HMMER/Results.pm b/Bio/Tools/HMMER/Results.pm deleted file mode 100644 index 7beb1b363..000000000 --- a/Bio/Tools/HMMER/Results.pm +++ /dev/null @@ -1,976 +0,0 @@ -# -# Perl Module for HMMResults -# -# Please direct questions and support issues to -# -# Cared for by Ewan Birney -# -#Copyright Genome Research Limited (1997). - -=head1 NAME - -Bio::Tools::HMMER::Results - Object representing HMMER output results - -=head1 SYNOPSIS - - # parse a hmmsearch file (can also parse a hmmpfam file) - $res = Bio::Tools::HMMER::Results->new( -file => 'output.hmm' , - -type => 'hmmsearch'); - - # print out the results for each sequence - foreach $seq ( $res->each_Set ) { - print "Sequence bit score is",$seq->bits,"\n"; - foreach $domain ( $seq->each_Domain ) { - print " Domain start ",$domain->start," end ",$domain->end, - " score ",$domain->bits,"\n"; - } - } - - # new result object on a sequence/domain cutoff of - # 25 bits sequence, 15 bits domain - $newresult = $res->filter_on_cutoff(25,15); - - # alternative way of getting out all domains directly - foreach $domain ( $res->each_Domain ) { - print "Domain on ",$domain->seq_id," with score ", - $domain->bits," evalue ",$domain->evalue,"\n"; - } - -=head1 DESCRIPTION - -This object represents HMMER output, either from hmmsearch or -hmmpfam. For hmmsearch, a series of HMMER::Set objects are made, one -for each sequence, which have the the bits score for the object. For -hmmpfam searches, only one Set object is made. - - -These objects come from the original HMMResults modules used -internally in Pfam, written by Ewan Birney. Ewan then converted them to -BioPerl objects in 1999. That conversion is meant to be backwardly -compatible, but may not be (caveat emptor). - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to one -of the Bioperl mailing lists. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Ewan Birney - -Email birney@ebi.ac.uk - -=head1 CONTRIBUTORS - -Jason Stajich, jason-at-bioperl.org - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - -package Bio::Tools::HMMER::Results; - -use strict; - -use Bio::Tools::HMMER::Domain; -use Bio::Tools::HMMER::Set; -use Symbol; - -use base qw(Bio::Root::Root Bio::Root::IO Bio::SeqAnalysisParserI); - -sub new { - my($class,@args) = @_; - - my $self = $class->SUPER::new(@args); - - $self->{'domain'} = []; # array of HMMUnits - $self->{'seq'} = {}; - - my ($parsetype) = $self->_rearrange([qw(TYPE)],@args); - $self->_initialize_io(@args); - if( !defined $parsetype ) { - $self->throw("No parse type provided. should be hmmsearch or hmmpfam"); - } - $self->parsetype($parsetype); - if( defined $self->_fh() ) { - if( $parsetype eq 'hmmsearch' ) { - $self->_parse_hmmsearch($self->_fh()); - } elsif ( $parsetype eq 'hmmpfam' ) { - $self->_parse_hmmpfam($self->_fh()); - } else { - $self->throw("Did not recoginise type $parsetype"); - } - } - - return $self; # success - we hope! -} - - -=head2 next_feature - - Title : next_feature - Usage : while( my $feat = $res->next_feature ) { # do something } - Function: SeqAnalysisParserI implementing function - Example : - Returns : A Bio::SeqFeatureI compliant object, in this case, - each DomainUnit object, ie, flattening the Sequence - aspect of this. - Args : None - - -=cut - -sub next_feature{ - my ($self) = @_; - - if( $self->{'_started_next_feature'} == 1 ) { - return shift @{$self->{'_next_feature_array'}}; - } else { - $self->{'_started_next_feature'} = 1; - my @array; - foreach my $seq ( $self->each_Set() ) { - foreach my $unit ( $seq->each_Domain() ) { - push(@array,$unit); - } - } - my $res = shift @array; - $self->{'_next_feature_array'} = \@array; - return $res; - } - - $self->throw("Should not reach here! Error!"); -} - - -=head2 number - - Title : number - Usage : print "There are ",$res->number," domains hit\n"; - Function: provides the number of domains in the HMMER report - -=cut - -sub number { - my $self = shift; - my @val; - my $ref; - $ref = $self->{'domain'}; - - - @val = @{$self->{'domain'}}; - return scalar @val; -} - -=head2 seqfile - - Title : seqfile - Usage : $obj->seqfile($newval) - Function: - Example : - Returns : value of seqfile - Args : newvalue (optional) - - -=cut - -sub seqfile{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'seqfile'} = $value; - } - return $self->{'seqfile'}; - -} - -=head2 hmmfile - - Title : hmmfile - Usage : $obj->hmmfile($newval) - Function: - Example : - Returns : value of hmmfile - Args : newvalue (optional) - - -=cut - -sub hmmfile{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'hmmfile'} = $value; - } - return $self->{'hmmfile'}; - -} - -=head2 add_Domain - - Title : add_Domain - Usage : $res->add_Domain($unit) - Function: adds a domain to the results array. Mainly used internally. - Args : A Bio::Tools::HMMER::Domain - - -=cut - -sub add_Domain { - my $self = shift; - my $unit = shift; - my $name; - - $name = $unit->seq_id(); - - if( ! exists $self->{'seq'}->{$name} ) { - $self->warn("Adding a domain of $name but with no HMMSequence. Will be kept in domain array but not added to a HMMSequence"); - } else { - $self->{'seq'}->{$name}->add_Domain($unit); - } - push(@{$self->{'domain'}},$unit); -} - - -=head2 each_Domain - - Title : each_Domain - Usage : foreach $domain ( $res->each_Domain() ) - Function: array of Domain units which are held in this report - Returns : array - Args : none - - -=cut - -sub each_Domain { - my $self = shift; - my (@arr,$u); - - foreach $u ( @{$self->{'domain'}} ) { - push(@arr,$u); - } - - return @arr; -} - - -=head2 domain_bits_cutoff_from_evalue - - Title : domain_bits_cutoff_from_evalue - Usage : $cutoff = domain_bits_cutoff_from_evalue(0.01); - Function: return a bits cutoff from an evalue using the - scores here. Somewhat interesting logic: - Find the two bit score which straddle the evalue - if( 25 is between these two points) return 25 - else return the midpoint. - - This logic tries to ensure that with large signal to - noise separation one still has sensible 25 bit cutoff - Returns : - Args : - -=cut - -sub domain_bits_cutoff_from_evalue { - my $self = shift; - my $eval = shift; - my ($dom,$prev,@doms,$cutoff,$sep,$seen); - - @doms = $self->each_Domain; - - - @doms = map { $_->[0] } - sort { $b->[1] <=> $a->[1] } - map { [ $_, $_->bits] } @doms; - $seen = 0; - foreach $_ ( @doms ) { - if( $_->evalue > $eval ) { - $seen = 1; - $dom = $_; - last; - } - $prev = $_; - } - - if( ! defined $prev || $seen == 0) { - $self->throw("Evalue is either above or below the list..."); - return; - } - - $sep = $prev->bits - $dom->bits ; - - if( $sep < 1 ) { - return $prev->bits(); - } - if( $dom->bits < 25 && $prev->bits > 25 ) { - return 25; - } - - return int( $dom->bits + $sep/2 ) ; - -} - - -sub dictate_hmm_acc { - my $self = shift; - my $acc = shift; - my ($unit); - - - foreach $unit ( $self->eachHMMUnit() ) { - $unit->hmmacc($acc); - } -} - -=head2 write_FT_output - - Title : write_FT_output - Usage : $res->write_FT_output(\*STDOUT,'DOMAIN') - Function: writes feature table output ala swissprot - Returns : - Args : - - -=cut - -sub write_FT_output { - my $self = shift; - my $file = shift; - my $idt = shift; - my ($seq,$unit); - - if( !defined $idt ) { - $idt = "DOMAIN"; - } - - foreach $seq ( $self->each_Set() ) { - print $file sprintf("ID %s\n",$seq->name()); - foreach $unit ( $seq->each_Domain() ) { - print $file sprintf("FT %s %d %d %s\n",$idt, - $unit->start,$unit->end,$unit->hmmname); - } - print $file "//\n"; - } -} - -=head2 filter_on_cutoff - - Title : filter_on_cutoff - Usage : $newresults = $results->filter_on_cutoff(25,15); - Function: Produces a new HMMER::Results module which has - been trimmed at the cutoff. - Returns : a Bio::Tools::HMMER::Results module - Args : sequence cutoff and domain cutoff. in bits score - if you want one cutoff, simply use same number both places - -=cut - -sub filter_on_cutoff { - my $self = shift; - my $seqthr = shift; - my $domthr = shift; - my ($new,$seq,$unit,@array,@narray); - - if( !defined $domthr ) { - $self->throw("hmmresults filter on cutoff needs two arguments"); - } - - $new = Bio::Tools::HMMER::Results->new(-type => $self->parsetype); - - foreach $seq ( $self->each_Set()) { - next if( $seq->bits() < $seqthr ); - $new->add_Set($seq); - foreach $unit ( $seq->each_Domain() ) { - next if( $unit->bits() < $domthr ); - $new->add_Domain($unit); - } - } - $new; -} - -=head2 write_ascii_out - - Title : write_ascii_out - Usage : $res->write_ascii_out(\*STDOUT) - Function: writes as - seq seq_start seq_end model-acc model_start model_end model_name - Returns : - Args : - - FIXME: Now that we have no modelacc, this is probably a bad thing. - -=cut - -# writes as seq sstart send modelacc hstart hend modelname - -sub write_ascii_out { - my $self = shift; - my $fh = shift; - my ($unit,$seq); - - if( !defined $fh) { - $fh = \*STDOUT; - } - - - foreach $seq ( $self->each_Set()) { - foreach $unit ( $seq->each_Domain()) { - print $fh sprintf("%s %4d %4d %s %4d %4d %4.2f %4.2g %s\n", - $unit->seq_id(),$unit->start(),$unit->end(), - $unit->hmmacc,$unit->hstart,$unit->hend, - $unit->bits,$unit->evalue,$unit->hmmname); - } - } - -} - -=head2 write_GDF_bits - - Title : write_GDF_bits - Usage : $res->write_GDF_bits(25,15,\*STDOUT) - Function: writes GDF format with a sequence,domain threshold - Returns : - Args : - -=cut - -sub write_GDF_bits { - my $self = shift; - my $seqt = shift; - my $domt = shift; - my $file = shift; - my $seq; - my $unit; - my (@array,@narray); - - if( !defined $file ) { - $self->throw("Attempting to use write_GDF_bits without passing in correct arguments!"); - return; - } - - foreach $seq ( $self->each_Set()) { - - if( $seq->bits() < $seqt ) { - next; - } - - foreach $unit ( $seq->each_Domain() ) { - if( $unit->bits() < $domt ) { - next; - } - push(@array,$unit); - } - - } - - @narray = sort { my ($aa,$bb,$st_a,$st_b); - $aa = $a->seq_id(); - $bb = $b->seq_id(); - if ( $aa eq $bb) { - $st_a = $a->start(); - $st_b = $b->start(); - return $st_a <=> $st_b; - } - else { - return $aa cmp $bb; - } } @array; - - foreach $unit ( @narray ) { - print $file sprintf("%-24s\t%6d\t%6d\t%15s\t%.1f\t%g\n",$unit->get_nse(),$unit->start(),$unit->end(),$unit->seq_id(),$unit->bits(),$unit->evalue); - } - -} - -sub write_scores_bits { - my $self = shift; - my $seqt = shift; - my $domt = shift; - my $file = shift; - my $seq; - my $unit; - my (@array,@narray); - - if( !defined $file ) { - $self->warn("Attempting to use write_scores_bits without passing in correct arguments!"); - return; - } - - foreach $seq ( $self->eachHMMSequence()) { - - if( $seq->bits() < $seqt ) { - next; - } - - foreach $unit ( $seq->eachHMMUnit() ) { - if( $unit->bits() < $domt ) { - next; - } - push(@array,$unit); - } - - } - - @narray = sort { my ($aa,$bb,$st_a,$st_b); - $aa = $a->bits(); - $bb = $b->bits(); - return $aa <=> $bb; - } @array; - - foreach $unit ( @narray ) { - print $file sprintf("%4.2f %s\n",$unit->bits(),$unit->get_nse()); - } - -} - -sub write_GDF { - my $self = shift; - my $file = shift; - my $unit; - - if( !defined $file ) { - $file = \*STDOUT; - } - - - foreach $unit ( $self->eachHMMUnit() ) { - print $file sprintf("%-24s\t%6d\t%6d\t%15s\t%.1f\t%g\n",$unit->get_nse(),$unit->start(),$unit->end(),$unit->seq_id(),$unit->bits(),$unit->evalue); - } - -} - -sub highest_noise { - my $self = shift; - my $seqt = shift; - my $domt = shift; - my ($seq,$unit,$hseq,$hdom,$noiseseq,$noisedom); - - $hseq = $hdom = -100000; - - foreach $seq ( $self->eachHMMSequence()) { - if( $seq->bits() < $seqt && $seq->bits() > $hseq ) { - $hseq = $seq->bits(); - $noiseseq = $seq; - } - foreach $unit ( $seq->eachHMMUnit() ) { - if( (($seq->bits() < $seqt) || ($seq->bits() > $seqt && $unit->bits < $domt)) && $unit->bits() > $hdom ) { - $hdom = $unit->bits(); - $noisedom = $unit; - } - } - } - - - return ($noiseseq,$noisedom); - -} - - -sub lowest_true { - my $self = shift; - my $seqt = shift; - my $domt = shift; - my ($seq,$unit,$lowseq,$lowdom,$trueseq,$truedom); - - if( ! defined $domt ) { - $self->warn("lowest true needs at least a domain threshold cut-off"); - return (0,0); - } - - $lowseq = $lowdom = 100000; - - foreach $seq ( $self->eachHMMSequence()) { - - if( $seq->bits() >= $seqt && $seq->bits() < $lowseq ) { - $lowseq = $seq->bits(); - $trueseq = $seq; - } - if( $seq->bits() < $seqt ) { - next; - } - - foreach $unit ( $seq->eachHMMUnit() ) { - if( $unit->bits() >= $domt && $unit->bits() < $lowdom ) { - $lowdom = $unit->bits(); - $truedom = $unit; - } - } - } - - - return ($trueseq,$truedom); - -} - - - -=head2 add_Set - - Title : add_Set - Usage : Mainly internal function - Function: - Returns : - Args : - - -=cut - -sub add_Set { - my $self = shift; - my $seq = shift; - my $name; - - $name = $seq->name(); - - if( exists $self->{'seq'}->{$name} ) { - $self->throw("You alredy have $name in HMMResults!"); - } - $self->{'seq'}->{$name} = $seq; -} - - -=head2 each_Set - - Title : each_Set - Usage : - Function: - Returns : - Args : - - -=cut - -sub each_Set { - my $self = shift; - my (@array,$name); - - - foreach $name ( keys %{$self->{'seq'}} ) { - push(@array,$self->{'seq'}->{$name}); - } - return @array; -} - - -=head2 get_Set - - Title : get_Set - Usage : $set = $res->get_Set('sequence-name'); - Function: returns the Set for a particular sequence - Returns : a HMMER::Set object - Args : name of the sequence - - -=cut - -sub get_Set { - my $self = shift; - my $name = shift; - - return $self->{'seq'}->{$name}; -} - - -=head2 _parse_hmmpfam - - Title : _parse_hmmpfam - Usage : $res->_parse_hmmpfam($filehandle) - Function: - Returns : - Args : - - -=cut - -sub _parse_hmmpfam { - my $self = shift; - my $file = shift; - - my ($id,$sqfrom,$sqto,$hmmf,$hmmt,$sc,$ev, - $unit,$nd,$seq,$name,$seqname,$from, - $to,%hash,%acc,$acc); - my $count = 0; - - while(<$file>) { - if( /^HMM file:\s+(\S+)/ ) { $self->hmmfile($1); next; } - elsif( /^Sequence file:\s+(\S+)/ ) { $self->seqfile($1); next } - elsif( /^Query(\s+sequence)?:\s+(\S+)/ ) { - - $seqname = $2; - - $seq = Bio::Tools::HMMER::Set->new(); - - $seq ->name($seqname); - $self->add_Set($seq); - %hash = (); - - while(<$file>){ - - if( /Accession:\s+(\S+)/ ) { $seq->accession($1); next } - elsif( s/^Description:\s+// ) { chomp; $seq->desc($_); next } - /^Parsed for domains/ && last; - - # This is to parse out the accession numbers in old Pfam format. - # now not support due to changes in HMMER. - - if( (($id,$acc, $sc, $ev, $nd) = /^\s*(\S+)\s+(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/)) { - $hash{$id} = $sc; # we need this for the sequence - # core of the domains below! - $acc {$id} = $acc; - - # this is the more common parsing routine - } elsif ( (($id,$sc, $ev, $nd) = - /^\s*(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/) ) { - - $hash{$id} = $sc; # we need this for the - # sequence score of hte domains below! - - } - } - - while(<$file>) { - /^Align/ && last; - m{^//} && last; - # this is meant to match - - #Sequence Domain seq-f seq-t hmm-f hmm-t score E-value - #-------- ------- ----- ----- ----- ----- ----- ------- - #PF00621 1/1 198 372 .. 1 207 [] 281.6 1e-80 - - if( (($id, $sqfrom, $sqto, $hmmf,$hmmt,$sc, $ev) = - /(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/)) { - $unit = Bio::Tools::HMMER::Domain->new(); - $unit->seq_id ($seqname); - $unit->hmmname ($id); - $unit->start ($sqfrom); - $unit->end ($sqto); - $unit->hstart($hmmf); - $unit->hend ($hmmt); - $unit->bits ($sc); - $unit->evalue ($ev); - - if( !exists($hash{$id}) ) { - $self->throw("HMMResults parsing error in hmmpfam for $id - can't find sequecne score"); - } - - $unit->seqbits($hash{$id}); - - if( defined $acc{$id} ) { - $unit->hmmacc($acc{$id}); - } - - # this should find it's own sequence! - $self->add_Domain($unit); - } - } - if( m{^//} ) { next; } - - $_ = <$file>; - # parses alignment lines. Icky as we have to break on the same line - # that we need to read to place the alignment lines with the unit. - - while(1) { - (!defined $_ || m{^//}) && last; - - # matches: - # PF00621: domain 1 of 1, from 198 to 372 - if( /^\s*(\S+):.*from\s+(\d+)\s+to\s+(\d+)/ ) { - - $name = $1; - $from = $2; - $to = $3; - - # find the HMMUnit which this alignment is from - - $unit = $self->get_unit_nse($seqname,$name,$from,$to); - if( !defined $unit ) { - $self->warn("Could not find $name $from $to unit even though I am reading it in. ugh!"); - $_ = <$file>; - next; - } - while(<$file>) { - m{^//} && last; - /^\s*\S+:.*from\s+\d+\s+to\s+\d+/ && last; - $unit->add_alignment_line($_); - } - } else { - $_ = <$file>; - } - } - - # back to main 'Query:' loop - } - } -} - -# mainly internal function - -sub get_unit_nse { - my $self = shift; - my $seqname = shift; - my $domname = shift; - my $start = shift; - my $end = shift; - - my($seq,$unit); - - $seq = $self->get_Set($seqname); - - if( !defined $seq ) { - $self->throw("Could not get sequence name $seqname - so can't get its unit"); - } - - foreach $unit ( $seq->each_Domain() ) { - if( $unit->hmmname() eq $domname && $unit->start() == $start && $unit->end() == $end ) { - return $unit; - } - } - - return; -} - - -=head2 _parse_hmmsearch - - Title : _parse_hmmsearch - Usage : $res->_parse_hmmsearch($filehandle) - Function: - Returns : - Args : - - -=cut - -sub _parse_hmmsearch { - my $self = shift; - my $file = shift; - my ($id,$sqfrom,$sqto,$sc,$ev,$unit,$nd,$seq,$hmmf,$hmmt, - $hmmfname,$hmmacc, $hmmid, %seqh); - my $count = 0; - - while(<$file>) { - /^HMM file:\s+(\S+)/ and do { $self->hmmfile($1); $hmmfname = $1 }; - /^Accession:\s+(\S+)/ and do { $hmmacc = $1 }; - /^Query HMM:\s+(\S+)/ and do { $hmmid = $1 }; - /^Sequence database:\s+(\S+)/ and do { $self->seqfile($1) }; - /^Scores for complete sequences/ && last; - } - - $hmmfname = "given" if not $hmmfname; - - while(<$file>) { - /^Parsed for domains/ && last; - if( (($id, $sc, $ev, $nd) = /(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/)) { - $seq = Bio::Tools::HMMER::Set->new(); - $seq->name($id); - $seq->bits($sc); - $seqh{$id} = $sc; - $seq->evalue($ev); - $self->add_Set($seq); - $seq->accession($hmmacc); - } - } - - while(<$file>) { - /^Alignments of top-scoring domains/ && last; - if( (($id, $sqfrom, $sqto, $hmmf, $hmmt, $sc, $ev) = /(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/)) { - $unit = Bio::Tools::HMMER::Domain->new(); - - $unit->seq_id($id); - $unit->hmmname($hmmfname); - $unit->start($sqfrom); - $unit->end($sqto); - $unit->bits($sc); - $unit->hstart($hmmf); - $unit->hend($hmmt); - $unit->evalue($ev); - $unit->seqbits($seqh{$id}); - $self->add_Domain($unit); - $count++; - } - } - - $_ = <$file>; - - ## Recognize and store domain alignments - - while(1) { - if( !defined $_ ) { - last; - } - /^Histogram of all scores/ && last; - - # matches: - # PF00621: domain 1 of 1, from 198 to 372 - if( /^\s*(\S+):.*from\s+(\d+)\s+to\s+(\d+)/ ) { - my $name = $1; - my $from = $2; - my $to = $3; - - # find the HMMUnit which this alignment is from - $unit = $self->get_unit_nse($name,$hmmfname,$from,$to); - - if( !defined $unit ) { - $self->warn("Could not find $name $from $to unit even though I am reading it in. ugh!"); - next; - } - while(<$file>) { - /^Histogram of all scores/ && last; - /^\s*\S+:.*from\s+\d+\s+to\s+\d+/ && last; - $unit->add_alignment_line($_); - } - } - else { - $_ = <$file>; - } - } - - return $count; -} - -=head2 parsetype - - Title : parsetype - Usage : $obj->parsetype($newval) - Function: - Returns : value of parsetype - Args : newvalue (optional) - - -=cut - -sub parsetype{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'_parsetype'} = $value; - } - return $self->{'_parsetype'}; -} - -1; # says use was ok -__END__ - - diff --git a/Bio/Tools/HMMER/Set.pm b/Bio/Tools/HMMER/Set.pm deleted file mode 100644 index 10416652e..000000000 --- a/Bio/Tools/HMMER/Set.pm +++ /dev/null @@ -1,263 +0,0 @@ -# -# BioPerl module for Bio::Tools::HMMER::Set -# -# Please direct questions and support issues to -# -# Cared for by Ewan Birney -# -# Copyright Ewan Birney -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Tools::HMMER::Set - Set of identical domains from HMMER matches - -=head1 SYNOPSIS - - # get a Set object probably from the results object - print "Bits score over set ",$set->bits," evalue ",$set->evalue,"\n"; - - foreach $domain ( $set->each_Domain ) { - print "Domain start ",$domain->start," end ",$domain->end,"\n"; - } - -=head1 DESCRIPTION - -Represents a set of HMMER domains hitting one sequence. HMMER reports two -different scores, a per sequence total score (and evalue) and a per -domain score and evalue. This object represents a collection of the same -domain with the sequence bits score and evalue. (these attributes are also -on the per domain scores, which you can get there). - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to one -of the Bioperl mailing lists. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution.Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Ewan Birney - -Email birney-at-ebi.ac.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::Tools::HMMER::Set; -use strict; - -use Bio::Tools::HMMER::Domain; - -use base qw(Bio::Root::Root); - -sub new { - my($class,@args) = @_; - my $self = $class->SUPER::new(@args); - my ($name,$acc,$desc) = $self->_rearrange([qw(NAME ACCESSION DESC)], - @args); - $name && $self->name($name); - $acc && $self->accession($acc); - $desc && $self->desc($desc); - - - $self->{'domains'} = []; - $self->{'domainnames'} = {}; - return $self; -} - -=head2 add_Domain - - Title : add_Domain - Usage : $set->add_Domain($domain) - Function: adds the domain to the list - Returns : nothing - Args : A Bio::Tools::HMMER::Domain object - -=cut - -sub add_Domain{ - my ($self,$domain) = @_; - - - if( ! defined $domain || ! $domain->isa("Bio::Tools::HMMER::Domain") ) { - $self->throw("[$domain] is not a Bio::Tools::HMMER::Domain. aborting"); - } - return if $self->{'domainnames'}->{$domain->get_nse}++; - push(@{$self->{'domains'}},$domain); - -} - -=head2 each_Domain - - Title : each_Domain - Usage : foreach $domain ( $set->each_Domain() ) - Function: returns an array of domain objects in this set - Returns : array - Args : none - - -=cut - -sub each_Domain{ - my ($self,@args) = @_; - - return @{$self->{'domains'}}; -} - -=head2 name - - Title : name - Usage : $obj->name($newval) - Function: - Example : - Returns : value of name - Args : newvalue (optional) - - -=cut - -sub name{ - my ($obj,$value) = @_; - if( defined $value) { - $obj->{'name'} = $value; - } - return $obj->{'name'}; - -} - -=head2 desc - - Title : desc - Usage : $obj->desc($newval) - Function: - Example : - Returns : value of desc - Args : newvalue (optional) - -=cut - -sub desc{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'desc'} = $value; - } - return $self->{'desc'}; - -} - -=head2 accession - - Title : accession - Usage : $obj->accession($newval) - Function: - Example : - Returns : value of accession - Args : newvalue (optional) - - -=cut - -sub accession{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'accession'} = $value; - } - return $self->{'accession'}; -} - - -=head2 bits - - Title : bits - Usage : $obj->bits($newval) - Function: - Example : - Returns : value of bits - Args : newvalue (optional) - - -=cut - -sub bits{ - my ($obj,$value) = @_; - - if( defined $value) { - $obj->{'bits'} = $value; - } - return $obj->{'bits'}; - -} - -=head2 evalue - - Title : evalue - Usage : $obj->evalue($newval) - Function: - Example : - Returns : value of evalue - Args : newvalue (optional) - - -=cut - -sub evalue{ - my ($obj,$value) = @_; - if( defined $value) { - $obj->{'evalue'} = $value; - } - return $obj->{'evalue'}; - -} - - -sub addHMMUnit { - my $self = shift; - my $unit = shift; - - $self->warn("Using old addHMMUnit call on Bio::Tools::HMMER::Set. Should replace with add_Domain"); - return $self->add_Domain($unit); -} - -sub eachHMMUnit { - my $self = shift; - $self->warn("Using old eachHMMUnit call on Bio::Tools::HMMER::Set. Should replace with each_Domain"); - return $self->each_Domain(); -} - -1; # says use was ok -__END__ - diff --git a/Bio/Tools/Hmmpfam.pm b/Bio/Tools/Hmmpfam.pm deleted file mode 100755 index cf76c655a..000000000 --- a/Bio/Tools/Hmmpfam.pm +++ /dev/null @@ -1,228 +0,0 @@ -# BioPerl module for Bio::Tools::Hmmpfam -# -# Please direct questions and support issues to -# -# Cared for by Balamurugan Kumarasamy -# -# You may distribute this module under the same terms as perl itself -# POD documentation - main docs before the code -# - -=head1 NAME - -Bio::Tools::Hmmpfam - Parser for Hmmpfam program - -=head1 SYNOPSIS - - use Bio::Tools::Hmmpfam; - my @hmmpfam_feat; - my $hmmpfam_parser = Bio::Tools::Hmmpfam->new(-fh =>$filehandle ); - while( my $hmmpfam_feat = $hmmpfam_parser->next_result ) { - push @hmmpfam_feat, $hmmpfam_feat; - } - -=head1 DESCRIPTION - -Parser for Hmmpfam program. See also L. - -=head1 FEEDBACK - -=head2 Mailing Lists - - User feedback is an integral part of the evolution of this and other - Bioperl modules. Send your comments and suggestions preferably to - the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Balamurugan Kumarasamy - - Email: fugui@worf.fugu-sg.org - -=head1 APPENDIX - - The rest of the documentation details each of the object methods. - Internal methods are usually preceded with a _ - - -=cut - -package Bio::Tools::Hmmpfam; -use strict; - -use Bio::SeqFeature::FeaturePair; -use Bio::SeqFeature::Generic; -use base qw(Bio::Root::Root Bio::Root::IO); - - - -=head2 new - - Title : new - Usage : my $obj = Bio::Tools::Hmmpfam->new(-fh=>$filehandle); - Function: Builds a new Bio::Tools::Hmmpfam object - Returns : Bio::Tools::Hmmpfam - Args : -filename - -fh (filehandle) - -=cut - -sub new { - my($class,@args) = @_; - - my $self = $class->SUPER::new(@args); - $self->_initialize_io(@args); - - return $self; -} - - -=head2 next_result - - Title : next_result - Usage : my $feat = $hmmpfam_parser->next_result - Function: Get the next result set from parser data - Returns : L - Args : none - -=cut - -sub next_result { - my ($self) = @_; - my $filehandle; - - my $line; - - my $id; - while ($_=$self->_readline()) { - $line = $_; - chomp $line; - - if ( $line=~m/^Alignments of top-scoring domains/ ) { - while( my $rest = $self->_readline() ) { last if $rest =~ m!^//! } - } - - next if ($line=~m/^Model/ || /^\-/ || /^$/); - - if ($line=~m/^Query sequence:\s+(\S+)/) { - $id = $1; - $self->seqname($id); - } - - if (my ($hid, $start, $end, $hstart, $hend, $score, $evalue) = $line=~m/^(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+\S+\s+(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)/) { - my %feature; - - ($feature{name}) = $self->seqname; - $feature{raw_score} = $score; - $feature{p_value} = sprintf ("%.3e", $evalue); - $feature{score} = $feature{p_value}; - $feature{start} = $start; - $feature{end} = $end; - $feature{hname} = $hid; - $feature{hstart} = $hstart; - $feature{hend} = $hend; - ($feature{source}) = 'pfam'; - $feature{primary} = $hid; - ($feature{program}) = 'pfam'; - ($feature{db}) = 'db1'; - ($feature{logic_name}) = 'hmmpfam'; - my $new_feat = $self->create_feature (\%feature); - return $new_feat - - } - next; - - } - return; -} - -=head2 create_feature - - Title : create_feature - Usage : my $feat=$hmmpfam_parser->create_feature($feature,$seqname) - Function: creates a SeqFeature Generic object - Returns : L - Args : - - -=cut - -sub create_feature { - my ($self, $feat) = @_; - - - - my $feature1= Bio::SeqFeature::Generic->new( -seq_id =>$feat->{name}, - -start =>$feat->{start}, - -end =>$feat->{end}, - -score =>$feat->{score}, - -source =>$feat->{source}, - -primary =>$feat->{primary}, - ); - - - - my $feature2= Bio::SeqFeature::Generic->new( - -start =>$feat->{hstart}, - -end =>$feat->{hend}, - ); - - - - - my $featurepair = Bio::SeqFeature::FeaturePair->new; - $featurepair->feature1 ($feature1); - $featurepair->feature2 ($feature2); - - $featurepair->add_tag_value('evalue',$feat->{p_value}); - $featurepair->add_tag_value('percent_id','NULL'); - $featurepair->add_tag_value("hid",$feat->{primary}); - return $featurepair; - -} - -=head2 seqname - - Title : seqname - Usage : obj->seqname($seqname) - Function: Internal(not to be used directly) - Returns : - Args : seqname - -=cut - -sub seqname{ - my($self,$seqname)=@_; - - if(defined($seqname)) - { - $self->{'seqname'}=$seqname; - } - - return $self->{'seqname'}; - -} - -1; - - diff --git a/Changes b/Changes index 5b0995bdf..964eb7fa1 100644 --- a/Changes +++ b/Changes @@ -126,6 +126,10 @@ be removed. * The entire Bio::DB::TFBS namespace has been moved to its own distribution named after itself. + * All modules to handle HMMER programs output have been moved to their + own distribution named Bio-SearchIO-hmmer. This also includes the + programs bp_hmmer_to_table and bp_parse_hmmsearch. + 1.7.2 - "Entebbe" diff --git a/scripts/searchio/bp_hmmer_to_table.pl b/scripts/searchio/bp_hmmer_to_table.pl deleted file mode 100644 index f53890451..000000000 --- a/scripts/searchio/bp_hmmer_to_table.pl +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; - -=head1 NAME - -bp_hmmer_to_table - turn HMMER output into tabular format - -=head1 SYNOPSIS - - bp_hmmer_to_table [-e evaluefilter] [-b bitscorefilter] [--header] [-o outfile] inputfile1 inputfile2 ... - -=head1 DESCRIPTION - -Command line options: - -e/--evalue evalue -- filter by evalue - -b/--bitscore bitscore -- filter by bitscore - --header -- boolean flag to print column header - -o/--out -- optional outputfile to write data, - otherwise will write to STDOUT - -h/--help -- show this documentation - -Not technically a SearchIO script as this doesn't use any Bioperl -components but is a useful and fast. The output is tabular output. - - query sequence/domain (these are flip-flopped for hmmsearch / hmmpfam) - query start - query end - domain/sequence name or PFAM accession - hit start - hit end - score - e-value - domain/sequence name (these are flip-flopped for hmmsearch / hmmpfam) - -=head1 AUTHOR - Jason Stajich - -Jason Stajich jason_at_bioperl-dot-org - -=cut - -use Getopt::Long; - -my ($evalue,$bitscore,$header,$outfile); -GetOptions( - 'b|bitscore|bits:f' => \$bitscore, - 'e|evalue:f' => \$evalue, - 'header' => \$header, - 'o|out|outfile:s' => \$outfile, - 'h|help' => sub { exec('perldoc',$0); exit; } - ); - -my $outfh; -if( $outfile ) { - open $outfh, '>', $outfile or die "Could not write file '$outfile': $!\n"; -} else { - $outfh = \*STDOUT; -} - -my @fields = qw(QNAME QSTART QEND HACCESSION HSTART HEND SCORE EVALUE HNAME); -if( $header ) { - print $outfh join("\t", @fields), "\n"; -} -my %dat; -while(<>) { - if( s/^Query(\s+(sequence|HMM))?:\s+// ) { - s/\s+$//; - $dat{'Query'} = $_; - } elsif( /^Parsed for domains:/ ) { - my $ready = 0; - while(<>) { - if(/^Model|Sequence\s+Domain/ ) { $ready = 1; } - elsif( $ready && /^\-\-/) { $ready = 2; } - elsif( /^Alignments of/ ) { undef %dat; last; } - elsif( $ready == 2 ) { - if( my ($n,$domainnum,$domainct, @vals) = - (m!^(\S+)\s+ # domain name - (\d+)\/(\d+)\s+ # num/num (ie 1 of 2) - (\d+)\s+(\d+).+? # sequence start and end - (\d+)\s+(\d+)\s+ # hmm start and end - \S+\s+ # [] - (\S+)\s+ # score - (\S+) # evalue - \s*$!ox) ) { - next if( defined $bitscore && $vals[4] < $bitscore ); - next if (defined $evalue && $vals[5] > $evalue); - print $outfh join("\t", - $dat{'Query'}, - $vals[0], $vals[1], - $n, - $vals[2],$vals[3], - $vals[4],$vals[5], - $n),"\n"; - } - } - } - } -} diff --git a/scripts/searchio/bp_parse_hmmsearch.pl b/scripts/searchio/bp_parse_hmmsearch.pl deleted file mode 100644 index 3d93499ac..000000000 --- a/scripts/searchio/bp_parse_hmmsearch.pl +++ /dev/null @@ -1,206 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -=head1 NAME - -bp_parse_hmmsearch - parse single/multiple HMMSEARCH results file(s) with - different output options - -=head1 SYNOPSIS - -bp_parse_hmmsearch [--po] [--ps] -s hmmsearch_file - -bp_parse_hmmsearch [--po] [--ps] -m index_file - -=head1 DESCRIPTION - -=head2 Mandatory Options: - - -s HMMSEARCH file to parse. - -m INDEX file that contains a list of HMMSEARCH files for multiple - parsing. - -=head2 Special Options: - - --po Print only the hits that have positive scores. - --ps Print the total of positive scores found. - --help Show this documentation. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to the -Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - - Mauricio Herrera Cuadra - -=cut - -# Modules, pragmas and variables to use -use Bio::SearchIO; -use Getopt::Long; -use vars qw($opt_s $opt_m $opt_po $opt_ps $opt_help); - -# Gets options from the command line -GetOptions qw(-s:s -m:s --po --ps --help); - -# Print documentation if help switch was given -exec('perldoc', $0) and exit() if $opt_help; - -# If no mandatory options are given prints an error and exits -if (!$opt_s && !$opt_m) { - print "ERROR: No HMMSEARCH or INDEX file has been specified.\n Use -'--help' switch for documentation.\n" and exit(); -} elsif ($opt_s && $opt_m) { - print "ERROR: You must select only one option (-s or -m) for input.\n -Use '--help' switch for documentation.\n" and exit(); -} - -# Initializes a counter for the domain positive scores if the option -# was given -my $pos_scores = 0 if $opt_ps; - -# If single file mode was selected -if ($opt_s) { - parse_hmmsearch($opt_s); - - # Prints the total domain positive scores if the option was given - if ($opt_ps) { - print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - -\n"; - print "Total domain positive scores: $pos_scores\n"; - } - -# If multiple files mode was selected -} elsif ($opt_m) { - - # Opens the INDEX file sent as input - open my $FH, '<', $opt_m or die "Could not read INDEX file '$opt_m': $!\n"; - - # Cycle that extracts one line for every loop until finding the - # end of file - while (my $line = <$FH>) { - - # Deletes the new line characters from the line - chomp $line; - - # Parses the result file in turn - parse_hmmsearch($line); - print "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -= = = =\n"; - } - - # Prints the total domain positive scores if the option was given - print "Total domain positive scores: $pos_scores\n" if $opt_ps; - - # Closes INDEX files - close $FH; -} - -# Exits the program -exit(); - -# Subroutine that parses a HMMSEARCH results file -sub parse_hmmsearch { - - # Gets the parameters sent to the function - my ($file) = @_; - - # Creates a new Bio::SearchIO object - my $in = new Bio::SearchIO( - -format => 'hmmer', - -file => $file, - ); - - # Loops through the results file - while (my $result = $in->next_result()) { - - # Prints program name and version (these are values from - # Bio::Search::Result::GenericResult methods) - print $result->algorithm(), " ", $result->algorithm_version(), "\n"; - print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - -\n"; - - # Prints HMM file and sequence database (these are values from - # Bio::Search::Result::HMMERResult methods) - print "HMM file:\t\t\t", $result->hmm_name(), "\n"; - print "Sequence database:\t\t", $result->sequence_file(), "\n"; - print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --\n"; - - # Prints some values from Bio::Search::Result::GenericResult - # methods - print "Query HMM:\t\t\t", $result->query_name(), "\n"; - print "Accession:\t\t\t", $result->query_accession(), "\n"; - print "Description:\t\t\t", $result->query_description(), "\n"; - print "Total hits:\t\t\t", $result->num_hits(), "\n"; - - # Loops through the sequence in turn - while (my $hit = $result->next_hit()) { - - # If only positive scores option was given and the score - # in turn is greater than zero - if ($opt_po) { - printHits($hit) if ($hit->score() >= 0); - - # Prints all hits otherwise - } else { - printHits($hit); - } - } - } -} - -# Subroutine that prints the values from a Bio::Search::Hit::HitI -# object -sub printHits { - - # Gets the parameters sent to the function - my ($hit) = @_; - - # Prints some values from Bio::Search::Hit::HitI methods - print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n"; - print "Hit ", $hit->rank(), "\n"; - print "Sequence:\t\t\t", $hit->name(), "\n"; - print "Description:\t\t\t", $hit->description(), "\n"; - print "Score:\t\t\t\t", $hit->score(), "\n"; - print "E-value:\t\t\t", $hit->significance(), "\n"; - print "Number of domains:\t\t", $hit->num_hsps(), "\n"; - - # Loops through the domain in turn - while (my $hsp = $hit->next_hsp()) { - - # Prints some values from Bio::Search::HSP::HSPI methods - print " - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n"; - print " Domain:\t\t\t", $hsp->rank(), " of ", $hit->num_hsps(), "\n"; - print " seq-f:\t\t\t", $hsp->start('hit'), "\n"; - print " seq-t:\t\t\t", $hsp->end('hit'), "\n"; - print " hmm-f:\t\t\t", $hsp->start(), "\n"; - print " hmm-t:\t\t\t", $hsp->end(), "\n"; - print " score:\t\t\t", $hsp->score(), "\n"; - $pos_scores++ if ($hsp->score() >= 0) && $opt_ps; - print " E-value:\t\t\t", $hsp->evalue(), "\n"; - my $hmm_string = $hsp->query_string(); - $hmm_string =~ s/<-\*$//; - print " hmm string:\t\t\t", $hmm_string, "\n"; - print " homology string:\t\t", $hsp->homology_string(), "\n"; - print " hit string:\t\t\t", $hsp->hit_string(), "\n"; - } -} diff --git a/t/LocalDB/Index/Index.t b/t/LocalDB/Index/Index.t index 1eec2ad5f..d5bb6175d 100644 --- a/t/LocalDB/Index/Index.t +++ b/t/LocalDB/Index/Index.t @@ -7,7 +7,7 @@ BEGIN { use lib '.'; use Bio::Root::Test; - test_begin(-tests => 73, + test_begin(-tests => 69, -requires_modules => [qw(DB_File Storable Fcntl)]); @@ -19,7 +19,6 @@ BEGIN { use_ok('Bio::Index::GenBank'); use_ok('Bio::Index::Stockholm'); use_ok('Bio::Index::Swissprot'); - use_ok('Bio::Index::Hmmer'); use_ok('Bio::DB::InMemoryCache'); use_ok('Bio::DB::InMemoryCache'); } @@ -192,19 +191,6 @@ ok ( -e "Wibbl6" ); my $aln = $st_ind->fetch_aln('PF00244'); isa_ok($aln,'Bio::SimpleAlign'); -# test Hmmer -my $hmmer_ind = Bio::Index::Hmmer->new(-filename => 'Wibbl7', - -write_flag => 1, - -verbose => 0); -isa_ok $hmmer_ind, 'Bio::Index::Hmmer'; -$hmmer_ind->make_index(test_input_file('hmmpfam_multiresult.out')); -ok ( -e "Wibbl7" ); -my $hmm_result = $hmmer_ind->fetch_report('lcl|gi|340783625|Plus1'); -is ($hmm_result->query_description, 'megaplasmid, complete sequence [UNKNOWN]'); - - - - sub get_id { my $line = shift; return $1 if ($line =~ /product="([^"]+)"/); @@ -216,7 +202,7 @@ END { } sub cleanup { - for my $root ( qw( Wibbl Wibbl2 Wibbl3 Wibbl4 Wibbl5 Wibbl6 Wibbl7 + for my $root ( qw( Wibbl Wibbl2 Wibbl3 Wibbl4 Wibbl5 Wibbl6 multifa_index multifa_qual_index ) ) { unlink $root if( -e $root ); unlink "$root.pag" if( -e "$root.pag"); diff --git a/t/SearchIO/hmmer.t b/t/SearchIO/hmmer.t deleted file mode 100644 index 399b7e9c9..000000000 --- a/t/SearchIO/hmmer.t +++ /dev/null @@ -1,1712 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl - -use strict; -use warnings; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin( -tests => 824 ); - - use_ok('Bio::SearchIO'); -} - -my $searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmpfam.out') -); -my $result; - -while ( $result = $searchio->next_result ) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMPFAM', 'Check algorithm' ); - is( $result->algorithm_version, '2.1.1', 'Check algorithm version' ); - is( $result->hmm_name, 'pfam', 'Check hmm_name' ); - is( $result->sequence_file, - '/home/birney/src/wise2/example/road.pep', - 'Check sequence_file' - ); - - is( $result->query_name, 'roa1_drome', 'Check query_name' ); - is( $result->query_length, 0, 'Check query_length absence' ); - is( $result->query_description, '', 'Check query_description' ); - is( $result->num_hits(), 2, 'Check num_hits' ); - my ( $hsp, $hit ); - - if ( defined( $hit = $result->next_model ) ) { - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'SEED', 'Check hit name' ); - is( $hit->description, - '', - 'Check for hit description' - ); - is( $hit->raw_score, 146.1, 'Check hit raw_score' ); - is( $hit->bits, 0, 'Check hit bits (0)' ); - float_is( $hit->significance, 6.3e-40, 'Check hit significance' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - - # Query and Hit lengths are usually unknown in HMMER, - # but sometimes they can be deduced from domain data '[]' - is( $hit->length, 77, 'Check hit length' ); - is( $hit->frac_aligned_query, undef ); - is( $hit->frac_aligned_hit, '1.00' ); - - is( $hit->matches('cons'), 55, 'Check hit total conserved residues' ); - is( $hit->matches('id'), 22, 'Check hit total identical residues' ); - is( sprintf( "%.3f", $hit->frac_identical('query') ), '0.310' ); - is( sprintf( "%.3f", $hit->frac_identical('hit') ), 0.286 ); - is( sprintf( "%.3f", $hit->frac_identical('total') ), 0.282 ); - is( sprintf( "%.3f", $hit->frac_conserved('query') ), 0.775 ); - is( sprintf( "%.3f", $hit->frac_conserved('hit') ), 0.714 ); - is( sprintf( "%.3f", $hit->frac_conserved('total') ), 0.705 ); - - if ( defined( $hsp = $hit->next_domain ) ) { - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->query->seq_id(), 'roa1_drome', 'Check for query seq_id' ); - is( $hsp->hit->seq_id(), 'SEED', 'Check for hit seq_id' ); - - is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 77, 'Check for hit hmm to value' ); - is( $hsp->query->start, 33, 'Check for query alifrom value' ); - is( $hsp->query->end, 103, 'Check for query ali to value' ); - is( $hsp->score, 71.2, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 2.2e-17, 'Check for hsp c-Evalue' ); - - is( $hsp->length('query'), 71, 'Check for hsp query length' ); - is( $hsp->length('hit'), 77, 'Check for hsp hit length' ); - is( $hsp->length('total'), 78, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 7, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 1, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 8, 'Check for hsp total gaps' ); - - ($hit->length == 0) ? - is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' ) - : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' ); - ($result->query_length == 0) ? - is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' ) - : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' ); - - is( $hsp->num_conserved, 55 ); - is( $hsp->num_identical, 22 ); - is( sprintf( "%.2f", $hsp->percent_identity ), 28.21 ); - is( sprintf( "%.3f", $hsp->frac_identical('query') ), '0.310' ); - is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.286 ); - is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.282 ); - is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.775 ); - is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.714 ); - is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.705 ); - - is( $hsp->query_string, - 'LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP', - 'Check for query string' - ); - is( $hsp->hit_string, - 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG-kelggrklrv', - 'Check for hit string' - ); - is( $hsp->homology_string, - 'lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ', - 'Check for homology string' - ); - is( length( $hsp->homology_string ), - length( $hsp->hit_string ), - 'Check if homology string and hit string have an equal length' - ); - is( length( $hsp->query_string ), - length( $hsp->homology_string ), - 'Check if query string and homology string have an equal length' - ); - # This Hmmpfam don't have PP or CS strings, these are tests to check for side effects - is( $hsp->posterior_string, '' ); - is( $hsp->consensus_string, '' ); - } - } - if ( defined( $hit = $result->next_model ) ) { - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'SEED', 'Check hit name' ); - is( $hit->description, '', 'Check for hit description' ); - is( $hit->raw_score, 146.1, 'Check hit raw_score' ); - is( $hit->bits, 0, 'Check hit bits (0)' ); - float_is( $hit->significance, 6.3e-040, 'Check hit significance' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - - # Query and Hit lengths are usually unknown in HMMER, - # but sometimes they can be deduced from domain data '[]' - is( $hit->length, 77, 'Check hit length' ); - is( $hit->frac_aligned_query, undef ); - is( $hit->frac_aligned_hit, '1.00' ); - - is( $hit->matches('cons'), 56, 'Check hit total conserved residues' ); - is( $hit->matches('id'), 33, 'Check hit total identical residues' ); - is( sprintf( "%.3f", $hit->frac_identical('query') ), 0.471 ); - is( sprintf( "%.3f", $hit->frac_identical('hit') ), 0.429 ); - is( sprintf( "%.3f", $hit->frac_identical('total') ), 0.429 ); - is( sprintf( "%.3f", $hit->frac_conserved('query') ), '0.800' ); - is( sprintf( "%.3f", $hit->frac_conserved('hit') ), 0.727 ); - is( sprintf( "%.3f", $hit->frac_conserved('total') ), 0.727 ); - - if ( defined( $hsp = $hit->next_domain ) ) { - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->query->seq_id(), 'roa1_drome', 'Check for query seq_id' ); - is( $hsp->hit->seq_id(), 'SEED', 'Check for hit seq_id' ); - - is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 77, 'Check for hit hmm to value' ); - is( $hsp->query->start, 124, 'Check for query alifrom value' ); - is( $hsp->query->end, 193, 'Check for query ali to value' ); - is( $hsp->score, 75.5, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 1.1e-18, 'Check for hsp c-Evalue' ); - - is( $hsp->length('query'), 70, 'Check for hsp query length' ); - is( $hsp->length('hit'), 77, 'Check for hsp hit length' ); - is( $hsp->length('total'), 77, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 7, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 7, 'Check for hsp total gaps' ); - - ($hit->length == 0) ? - is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' ) - : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' ); - ($result->query_length == 0) ? - is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' ) - : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' ); - - is( $hsp->num_conserved, 56 ); - is( $hsp->num_identical, 33 ); - is( sprintf( "%.2f", $hsp->percent_identity ), 42.86 ); - is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.471 ); - is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.429 ); - is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.429 ); - is( sprintf( "%.3f", $hsp->frac_conserved('query') ), '0.800' ); - is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.727 ); - is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.727); - - is( $hsp->query_string, - 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL--KQHQLNGKMVDV', - 'Check for query string' - ); - is( $hsp->hit_string, - 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv', - 'Check for hit string' - ); - is( $hsp->homology_string, - 'lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +GfaFVeF++++ ++k + ++l+g+ + v', - 'Check for homology string' - ); - is( length( $hsp->homology_string ), - length( $hsp->hit_string ), - 'Check if homology string and hit string have an equal length' - ); - is( length( $hsp->query_string ), - length( $hsp->homology_string ), - 'Check if query string and homology string have an equal length' - ); - } - last; - } -} - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmsearch.out') -); -while ( $result = $searchio->next_result ) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' ); - is( $result->algorithm_version, '2.0', 'Check algorithm version' ); - is( $result->hmm_name, 'HMM [SEED]', 'Check hmm_name' ); - is( $result->sequence_file, 'HMM.dbtemp.29591', 'Check sequence_file' ); - is( $result->database_name, 'HMM.dbtemp.29591', 'Check database_name' ); - - is( $result->query_name, 'SEED', 'Check query_name' ); - is( $result->query_length, 77, 'Check query_length' ); - is( $result->query_description, '', 'Check query_description' ); - is( $result->num_hits(), 1215, 'Check num_hits' ); - - my $hit = $result->next_model; - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'Q91581', 'Check hit name' ); - is( $hit->description, - 'Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN', - 'Check for hit description' - ); - is( $hit->raw_score, 119.7, 'Check hit raw_score' ); - is( $hit->bits, 0, 'Check hit bits (0)' ); - float_is( $hit->significance, 2e-31, 'Check hit significance' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - is( $hit->length, 0, 'Check hit length' ); - - my $hsp = $hit->next_domain; - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->query->seq_id(), 'SEED', 'Check for query seq_id' ); - is( $hsp->hit->seq_id(), 'Q91581', 'Check for hit seq_id' ); - - is( $hsp->hit->start, 18, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 89, 'Check for hit hmm to value' ); - is( $hsp->query->start, 1, 'Check for query alifrom value' ); - is( $hsp->query->end, 77, 'Check for query ali to value' ); - is( $hsp->score, 119.7, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 2e-31, 'Check for hsp c-Evalue' ); - - is( $hsp->length('query'), 77, 'Check for hsp query length' ); - is( $hsp->length('hit'), 72, 'Check for hsp hit length' ); - is( $hsp->length('total'), 0, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' ); - - my $example_counter = 0; - while ($hit = $result->next_model) { - if ($hit->name eq 'Q61954') { - $example_counter++; - if ($example_counter == 1) { - # Query and Hit lengths are usually unknown in HMMER, - # but sometimes they can be deduced from domain data '[]' - is( $hit->length, 153, 'Check hit length' ); - is( $hit->frac_aligned_query, '1.00' ); - is( $hit->frac_aligned_hit, 0.42 ); - - $hsp = $hit->next_domain; - is( $hsp->query->seq_id(), 'SEED', 'Check for query seq_id' ); - is( $hsp->hit->seq_id(), 'Q61954', 'Check for hit seq_id' ); - - is( $hsp->hit->start, 26, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 89, 'Check for hit hmm to value' ); - is( $hsp->query->start, 1, 'Check for query alifrom value' ); - is( $hsp->query->end, 77, 'Check for query ali to value' ); - is( $hsp->score, 72.9, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 2.4e-17, 'Check for hsp c-Evalue' ); - - is( $hsp->length('query'), 77, 'Check for hsp query length' ); - is( $hsp->length('hit'), 64, 'Check for hsp hit length' ); - is( $hsp->length('total'), 0, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' ); - } - elsif ($example_counter == 2) { - # Query and Hit lengths are usually unknown in HMMER, - # but sometimes they can be deduced from domain data '[]' - is( $hit->length, 153, 'Check hit length' ); - is( $hit->frac_aligned_query, '1.00' ); - is( $hit->frac_aligned_hit, 0.34 ); - - $hsp = $hit->next_domain; - is( $hsp->query->seq_id(), 'SEED', 'Check for query seq_id' ); - is( $hsp->hit->seq_id(), 'Q61954', 'Check for hit seq_id' ); - - is( $hsp->hit->start, 102, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 153, 'Check for hit hmm to value' ); - is( $hsp->query->start, 1, 'Check for query alifrom value' ); - is( $hsp->query->end, 77, 'Check for query ali to value' ); - is( $hsp->score, 3.3, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 1.9, 'Check for hsp c-Evalue' ); - - is( $hsp->length('query'), 77, 'Check for hsp query length' ); - is( $hsp->length('hit'), 52, 'Check for hsp hit length' ); - is( $hsp->length('total'), 0, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' ); - - last; - } - } - } -} - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('L77119.hmmer') -); - -while ( $result = $searchio->next_result ) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMPFAM', 'Check algorithm' ); - is( $result->algorithm_version, '2.2g', 'Check algorithm version' ); - is( $result->hmm_name, 'Pfam', 'Check hmm_name' ); - is( $result->sequence_file, 'L77119.faa', 'Check sequence_file' ); - - is( $result->query_name, - 'gi|1522636|gb|AAC37060.1|', - 'Check query_name' - ); - is( $result->query_length, 0, 'Check query_length absence' ); - is( $result->query_description, - 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]', - 'Check query_description' - ); - is( $result->num_hits(), 1, 'Check num_hits' ); - - my $hit = $result->next_hit; - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'Methylase_M', 'Check hit name' ); - is( $hit->description, - 'Type I restriction modification system, M', - 'Check for hit description' - ); - is( $hit->raw_score, -105.2, 'Check hit raw_score' ); - is( $hit->bits, 0, 'Check hit bits (0)' ); - float_is( $hit->significance, 0.0022, 'Check hit significance' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - - # Query and Hit lengths are usually unknown in HMMER, - # but sometimes they can be deduced from domain data '[]' - is( $hit->length, 279, 'Check hit length' ); - is( $hit->frac_aligned_query, undef ); - is( $hit->frac_aligned_hit, '1.00' ); - - is( $hit->matches('cons'), 133, 'Check hit total conserved residues' ); - is( $hit->matches('id'), 48, 'Check hit total identical residues' ); - is( sprintf( "%.3f", $hit->frac_identical('query') ), 0.238 ); - is( sprintf( "%.3f", $hit->frac_identical('hit') ), 0.172 ); - is( sprintf( "%.3f", $hit->frac_identical('total') ), 0.171 ); - is( sprintf( "%.3f", $hit->frac_conserved('query') ), 0.658 ); - is( sprintf( "%.3f", $hit->frac_conserved('hit') ), 0.477 ); - is( sprintf( "%.3f", $hit->frac_conserved('total') ), 0.475 ); - - my $hsp = $hit->next_hsp; - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->query->seq_id(), 'gi|1522636|gb|AAC37060.1|', 'Check for query seq_id' ); - is( $hsp->hit->seq_id(), 'Methylase_M', 'Check for hit seq_id' ); - - is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 279, 'Check for hit hmm to value' ); - is( $hsp->query->start, 280, 'Check for query alifrom value' ); - is( $hsp->query->end, 481, 'Check for query ali to value' ); - is( $hsp->score, -105.2, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 0.0022, 'Check for hsp evalue' ); - - is( $hsp->length('query'), 202, 'Check for hsp query length' ); - is( $hsp->length('hit'), 279, 'Check for hsp hit length' ); - is( $hsp->length('total'), 280, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 78, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 1, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 79, 'Check for hsp total gaps' ); - - ($hit->length == 0) ? - is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' ) - : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' ); - ($result->query_length == 0) ? - is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' ) - : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' ); - - is( $hsp->num_conserved, 133 ); - is( $hsp->num_identical, 48 ); - is( sprintf( "%.2f", $hsp->percent_identity ), 17.14 ); - is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.238 ); - is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.172 ); - is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.171 ); - is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.658 ); - is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.477 ); - is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.475 ); - - is (length($hsp->homology_string), length($hsp->query_string)); - - is( $hsp->hit_string, - 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG-dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn', - 'Check for hiy string' - ); - is( $hsp->query_string, - 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST', - 'Check for query string' - ); - is( $hsp->homology_string, - ' ++EL+++ av+ R L+F K++ dk +i+ p + + +++y ++ ++ ++y ++ + lF++++ e ++ ++++ + + ++ + + Glf ++++ ++ +s+ +ne ++e+i+ +++ +++ G++ +el D++G +YE L+ Ae K+ G +YTP e++ ia+ + i+ ++ +++ ++ k+n+i + s+', - 'Check for homology string' - ); - is( join( ' ', $hsp->seq_inds( 'query', 'nomatch', 1 ) ), - '280 288 289 293-295 300 304 311 313-315 317 324-326 332 335 337 344-346 348 355 358-361 364-366 372 379 383-385 389 396 400 404-408 412 416 417 422 426 429-431 434-436 439 441 446 450 451 455 459 460 463 464 468 471 472 478', - 'Check for nomatch indices in query' - ); - is( join( ' ', $hsp->seq_inds( 'hit', 'nomatch', 1 ) ), - '1 9 10 14-16 18-31 35 39 42-47 51-59 61 63-65 67 72-74 77-79 82 86 89-94 96 103-105 107 110 111 116 118 120-123 126-131 133 135-141 145 150 151 154 158-160 164 171 175 179-183 187 191-193 198 202 205-207 210-212 215 217 222 226 227 231 233 236 237 240-257 261 264-267 273 275-278', - 'Check for nomatch indices in hit' - ); - is( join( ' ', $hsp->seq_inds( 'query', 'gap', 1 ) ), - '296 306 309 321 328 334 335 350 356 366-368 376 417 456 463 470 479', - 'Check for gap indices in query' - ); - is( join( ' ', $hsp->seq_inds( 'hit', 'gap', 1 ) ), - '', 'Check for gap indices in hit' ); -} - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('cysprot1b.hmmsearch') -); - -while ( $result = $searchio->next_result ) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' ); - is( $result->algorithm_version, '2.2g', 'Check algorithm version' ); - is( $result->hmm_name, - 'Peptidase_C1.hmm [Peptidase_C1]', - 'Check hmm_name' - ); - is( $result->database_name, 'cysprot1b.fa', 'Check database_name' ); - is( $result->sequence_file, 'cysprot1b.fa', 'Check sequence_file' ); - - is( $result->query_name, 'Peptidase_C1', 'Check query_name' ); - is( $result->query_length, 337, 'Check query_length' ); - is( $result->query_accession, 'PF00112', 'Check query_accession' ); - is( $result->query_description, - 'Papain family cysteine protease', - 'Check query_description' - ); - is( $result->num_hits(), 4, 'Check num_hits' ); - - my $hit = $result->next_hit; - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'CATL_RAT', 'Check hit name' ); - is( $hit->description, - '', - 'Check for hit description' - ); - is( $hit->raw_score, 449.4, 'Check hit raw_score' ); - is( $hit->bits, 0, 'Check hit bits (0)' ); - float_is( $hit->significance, 2e-135, 'Check hit significance' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - - # Query and Hit lengths are usually unknown in HMMER, - # but sometimes they can be deduced from domain data '[]' - is( $hit->length, 0, 'Check hit length absence' ); - is( $hit->frac_aligned_query, '1.00' ); - is( $hit->frac_aligned_hit, undef ); - - is( $hit->matches('cons'), 204, 'Check hit total conserved residues' ); - is( $hit->matches('id'), 131, 'Check hit total identical residues' ); - is( sprintf( "%.3f", $hit->frac_identical('query') ), 0.389 ); - is( sprintf( "%.3f", $hit->frac_identical('hit') ), 0.598 ); - is( sprintf( "%.3f", $hit->frac_identical('total') ), 0.389 ); - is( sprintf( "%.3f", $hit->frac_conserved('query') ), 0.605 ); - is( sprintf( "%.3f", $hit->frac_conserved('hit') ), 0.932 ); - is( sprintf( "%.3f", $hit->frac_conserved('total') ), 0.605 ); - - my $hsp = $hit->next_hsp; - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->query->seq_id(), 'Peptidase_C1', 'Check for query seq_id' ); - is( $hsp->hit->seq_id(), 'CATL_RAT', 'Check for hit seq_id' ); - - is( $hsp->hit->start, 114, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 332, 'Check for hit hmm to value' ); - is( $hsp->query->start, 1, 'Check for query alifrom value' ); - is( $hsp->query->end, 337, 'Check for query ali to value' ); - is( $hsp->score, 449.4, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 2e-135, 'Check for hsp evalue' ); - - is( $hsp->length('query'), 337, 'Check for hsp query length' ); - is( $hsp->length('hit'), 219, 'Check for hsp hit length' ); - is( $hsp->length('total'), 337, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 118, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 118, 'Check for hsp total gaps' ); - - ($hit->length == 0) ? - is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' ) - : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' ); - ($result->query_length == 0) ? - is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' ) - : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' ); - - is( $hsp->num_conserved, 204 ); - is( $hsp->num_identical, 131 ); - is( sprintf( "%.2f", $hsp->percent_identity ), 38.87 ); - is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.389 ); - is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.598 ); - is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.389 ); - is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.605 ); - is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.932 ); - is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.605 ); - - is (length($hsp->homology_string), length($hsp->query_string)); - - is( $hsp->hit_string, - 'IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT------GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE-----NGGLDSEESY-----PYE----AKD-------------------GSCKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAMDASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGYGYEG-T------------------------------------DSNKDKYWLVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI', - 'Check for hiy string' - ); - is( $hsp->homology_string, - '+P+++DWRe kg VtpVK+QG qCGSCWAFSa g lEg+ ++kt gkl+sLSEQ+LvDC++ d gn+ GCnG Glmd Af+Yik+ NgGl++E++Y PY+ +kd g+Cky+ + ++ a+++g++d+p++ E+al+ka+a++GP+sVa+das+ s q+Y+sG +Y+++ C+++ +LdH+Vl+VGYG e+ ++++ +YW+VKNSWG++WG++GY++ia+++n n+CG+a+ asypi', - 'Check for homology string' - ); - is( $hsp->query_string, - 'lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgtkawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikkeqIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgtCkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVaidasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGYGteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYWIVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi', - 'Check for query string' - ); - # Hmmsearch2 don't have PP or CS strings, these are tests to check for side effects - is( $hsp->posterior_string, '' ); - is( $hsp->consensus_string, '' ); - - $hit = $result->next_hit; - is( $hit->name, 'CATL_HUMAN', 'Check hit name' ); - is( $hit->description, '', 'Check for hit description' ); - is( $hit->raw_score, 444.5, 'Check hit raw_score' ); - is( $hit->bits, 0, 'Check hit bits (0)' ); - float_is( $hit->significance, 6.1e-134, 'Check hit significance' ); -} - -# test for bug 2632 - CS lines are captured without breaking the parser -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmpfam_cs.out') -); -if (defined ($result = $searchio->next_result) ) { - my $hit = $result->next_hit; - my $hsp = $hit->next_hsp; - - is ($hsp->seq_str, $hsp->query_string); - is (length($hsp->seq_str), length($hsp->query_string)); - is (length($hsp->homology_string), length($hsp->query_string)); - is (length($hsp->consensus_string), length($hsp->query_string)); - - is( $hsp->consensus_string, - 'EEEEEEEEETSSHSBHHHHHHHHHHHHHGGGGSSCSTTSSCECEEEEEEECTCCCHHHHHHHCT----S GC-EEEEEEE-SSHHHHHHHHHHHHHHHHHTT-EEEEEEE--B-GGGS-HHHHHC--EEEEEEEE-TT--HHHHHHCEEEEECHSCHHHHTHHH. BEEEEEESSEEEEEECC-GGGHHHHBHGGGSTTEEBSEEEEEECESSSSSCTGGGSSCEEECCCTTCEEEEEEEEETTTHHHHHHHHHHTSCCCSSTTCGHHHHCC-SSS-TTSCHHHHHHHHHHHHHHTT--HHHHHHHHS----TT-GGGTST-HHHHHHHHHHHHHHCCHCCEEEEEEETSSEEEEEEETTTSCESEEEEEEEEEE.TTEEEEEESSC', - 'Check for consensus structure string' - ); - is( $hsp->seq_str, - 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES', - 'Check for hsp seq_str' - ); - is( $hsp->query_string, - 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES', - 'Check for query string' - ); - is( $hsp->hit_string, - 'CGvlGfiAhikgkpshkivedaleaLerLeHRGavgADgktGDGAGIltqiPdgFFrevakelGieLpe-gqYAVGmvFLPqdelaraearkifEkiaeeeGLeVLGWReVPvnnsvLGetAlatePvIeQvFvgapsgdgedfErrLyviRkrieksivaenvn----fYiCSLSsrTIVYKGMLtseQLgqFYpDLqderfeSalAivHsRFSTNTfPsWplAQPfRVnslwgggivlAHNGEINTlrgNrnwMraRegvlksplFgddldkLkPIvneggSDSaalDnvlEllvraGRslpeAlMMlIPEAWqnnpdmdkdrpekraFYeylsglmEPWDGPAalvftDGryavgAtLDRNGLTRPaRygiTrdldkDglvvvaSEa', - 'Check for hit string' - ); - is( $hsp->homology_string, - 'CGv GfiA+ ++ ++hkiv +aleaL+++eHRGa++AD ++GDGAGI t+iP+++F++ ++++i++ ++ +VGm+FLP l+ + i+E +++ee+Le++GWR VP+ +vLG++A + P++eQvF+ +++ +++ +E++L+++Rk+iek+i+ + + ++fYiCSLS++TIVYKGM++s++LgqFY+DL++++++S++Ai+H+RFSTNT+P+WplAQP+R ++ HNGEINTl gN nwM++Re +l+s++++d++++LkPI n+++SDSa+lD ++Ell+++GRs++eAlM+l+PEA+qn+pd +++e+ +FYey+sgl+EPWDGPA++vft+G++ +gAtLDRNGL RPaRy+iT kD+lv+v+SE+', - 'Check for homology string' - ); -} - -# Tests for hmmer3 output here -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmscan.out'), - -verbose => 1 -); -is( ref($searchio), 'Bio::SearchIO::hmmer3', - 'Check if correct searchio object is returned' ); -my $counter = 0; -while ( $result = $searchio->next_result ) { - $counter++; - if ($counter == 1) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMSCAN', 'Check algorithm' ); - is( $result->algorithm_version, '3.0', 'Check algorithm version' ); - is( $result->hmm_name, - '/data/biodata/HMMerDB/Pfam.hmm', - 'Check hmm_name' - ); - is( $result->sequence_file, - 'BA000019.orf1.fasta', - 'Check sequence_file' - ); - is( $result->query_name, 'BA000019.orf1', 'Check query_name' ); - is( $result->query_length, 198, 'Check query_length' ); - is( $result->query_accession, '', 'Check query_accession' ); - is( $result->query_description, '', 'Check query_description' ); - # 1 hit above and 6 below inclusion threshold - is( $result->num_hits(), 7, 'Check num_hits' ); - - my ( $hsp, $hit ); - if ( $hit = $result->next_model ) { - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'Peripla_BP_2', 'Check hit name' ); - is( $hit->description, - 'Periplasmic binding protein', - 'Check for hit description' - ); - is( $hit->raw_score, 105.2, 'Check hit raw_score' ); - is( $hit->bits, 0, 'Check hit bits (0)' ); - float_is( $hit->significance, 6e-30, 'Check hit significance' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - - # Hit length is usually unknown for HMMSCAN and HMMSEARCH but not for NHMMER. - # When is not known, sometimes it can be deduced from domain data '[]' - is( $hit->length, 0, 'Check hit length absence' ); - is( $hit->frac_aligned_query, 0.87 ); - is( $hit->frac_aligned_hit, undef ); - - if ( defined( $hsp = $hit->next_domain ) ) { - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->hit->seq_id(), 'Peripla_BP_2', 'Check for hit seq_id' ); - is( $hsp->query->seq_id(), 'BA000019.orf1', 'Check for query seq_id' ); - - is( $hsp->hit->start, 59, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 236, 'Check for hit hmm to value' ); - is( $hsp->query->start, 2, 'Check for query alifrom value' ); - is( $hsp->query->end, 173, 'Check for query ali to value' ); - is( $hsp->score, '105.0', 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 1.5e-33, 'Check for hsp c-Evalue' ); - - is( $hsp->length('query'), 172, 'Check for hsp query length' ); - is( $hsp->length('hit'), 178, 'Check for hsp hit length' ); - is( $hsp->length('total'), 180, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 8, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 2, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 10, 'Check for hsp total gaps' ); - - ($hit->length == 0) ? - is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' ) - : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' ); - ($result->query_length == 0) ? - is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' ) - : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' ); - - is( $hsp->num_conserved, 140 ); - is( $hsp->num_identical, 50 ); - is( sprintf( "%.2f", $hsp->percent_identity ), 27.78 ); - is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.291 ); - is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.281 ); - is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.278 ); - is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.814 ); - is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.787 ); - is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.778 ); - - is (length($hsp->homology_string), length($hsp->query_string)); - - is( $hsp->query_string, - 'LKPDLIIGREYQ---KNIYNQLSNFAPTVLVDWGSF-TSFQDNFRYIAQVLNEEEQGKLVLQQYQKRIRDLQDRMGERlQKIEVSVIGFSGQSIKSLNR-DAVFNQVLDDAGIKRIsIQKNQQERYLEISIENLNKYDADVLFVINE---SKEQLYPDLKNPLWHHLRAVKKQQVYVVNQ', - 'Check for query string' - ); - is( $hsp->hit_string, - 'lkPDlvivsafgalvseieellelgipvvavessstaeslleqirllgellgeedeaeelvaelesridavkaridsl-kpktvlvfgyadegikvvfgsgswvgdlldaaggeni-iaeakgseseeisaEqilaadpdviivsgrgedtktgveelkenplwaelpAvkngrvyllds', - 'Check for hit string' - ); - is( $hsp->homology_string, - 'lkPDl+i+ +++ ++i+++l++ +p+v v+ s+ s+++ +r ++++l+ee++++ + +++++ri+++++r + ++ +v+v+g+++ +ik+++ + ++++ld+ag++ i i++++++ + eis+E+++++d+dv++v k+ + ++nplw +l+Avk+++vy++++', - 'Check for homology string' - ); - is( $hsp->posterior_string, - '8***********...********************9.*****************************************999999999999997777776.5678999999****99777777*************************...77777777899***************9976', - 'Check for posterior probability string' - ); - } - } - } - # Check for errors in HSP caused by the existence of 2 hits with the same ID - elsif ($counter == 2) { - is( $result->algorithm, 'HMMSCAN', 'Check algorithm' ); - is( $result->algorithm_version, '3.0', 'Check algorithm version' ); - is( $result->hmm_name, - '/data/biodata/HMMerDB/Pfam.hmm', - 'Check hmm_name' - ); - is( $result->sequence_file, - 'BA000019.orf1.fasta', - 'Check sequence_file' - ); - is( $result->query_name, 'lcl|Test_ID.1|P1', 'Check query_name' ); - is( $result->query_length, 463, 'Check query_length' ); - is( $result->query_description, '281521..282909', 'Check query_description' ); - is( $result->num_hits(), 2, 'Check num_hits' ); - - my ( $hsp, $hit ); - my $hit_counter = 0; - while ( $hit = $result->next_model ) { - $hit_counter++; - if ($hit_counter == 1) { - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'IS4.original', 'Check hit name' ); - is( $hit->description, '', 'Check for hit description' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - if ( defined( $hsp = $hit->next_domain ) ) { - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->hit->seq_id(), 'IS4.original', 'Check for hit seq_id' ); - is( $hsp->query->seq_id(), 'lcl|Test_ID.1|P1', 'Check for query seq_id' ); - - is( $hsp->hit->start, 315, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 353, 'Check for hit hmm to value' ); - is( $hsp->query->start, 335, 'Check for query alifrom value' ); - is( $hsp->query->end, 369, 'Check for query ali to value' ); - is( $hsp->score, 18.9, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 8.9e-08, 'Check for hsp c-Evalue' ); - } - } - elsif ($hit_counter == 2) { - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'IS4.original', 'Check hit name' ); - is( $hit->description, '', 'Check for hit description' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - if ( defined( $hsp = $hit->next_domain ) ) { - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->hit->seq_id(), 'IS4.original', 'Check for hit seq_id' ); - is( $hsp->query->seq_id(), 'lcl|Test_ID.1|P1', 'Check for query seq_id' ); - - is( $hsp->hit->start, 315, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 353, 'Check for hit hmm to value' ); - is( $hsp->query->start, 335, 'Check for query alifrom value' ); - is( $hsp->query->end, 369, 'Check for query ali to value' ); - is( $hsp->score, 18.8, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 9e-08, 'Check for hsp c-Evalue' ); - } - } - } - } -} - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmsearch3.out'), - -verbose => 1 -); -while ( $result = $searchio->next_result ) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' ); - is( $result->algorithm_version, '3.0', 'Check algorithm version' ); - is( $result->hmm_name, 'Kv9.hmm', 'Check hmm_name' ); - is( $result->sequence_file, - '/home/pboutet/Desktop/databases/nr_May26', - 'Check sequence_file' - ); - is( $result->query_name, 'Kv9', 'Check query_name' ); - is( $result->query_length, '481', 'Check query_length' ); - is( $result->query_description, '', 'Check query_description' ); - is( $result->num_hits(), 2, 'Check num_hits' ); - - while ( my $hit = $result->next_model ) { - } -} - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmsearch3_multi.out'), - -verbose => 1 -); -is( ref($searchio), 'Bio::SearchIO::hmmer3', - 'Check if correct searchio object is returned' ); -$counter = 0; -while ( $result = $searchio->next_result ) { - $counter++; - if ($counter == 1) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' ); - is( $result->algorithm_version, '3.0', 'Check algorithm version' ); - is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' ); - is( $result->sequence_file, 'test_seqs.seq_raw.txt', 'Check sequence_file' ); - - is( $result->query_name, '1-cysPrx_C', 'Check query_name' ); - is( $result->query_length, 40, 'Check query_length' ); - is( $result->query_accession, 'PF10417.4', 'Check query_accession' ); - is( $result->query_description, - 'C-terminal domain of 1-Cys peroxiredoxin', - 'Check query_description' - ); - is( $result->num_hits(), 0, 'Check num_hits' ); - } - elsif ($counter == 2) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' ); - is( $result->algorithm_version, '3.0', 'Check algorithm version' ); - is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' ); - is( $result->sequence_file, 'test_seqs.seq_raw.txt', 'Check sequence_file' ); - - is( $result->query_name, 'DUF4229', 'Check query_name' ); - is( $result->query_length, 69, 'Check query_length' ); - is( $result->query_accession, 'PF14012.1', 'Check query_accession' ); - is( $result->query_description, - 'Protein of unknown function (DUF4229)', - 'Check query_description' - ); - is( $result->num_hits(), 1, 'Check num_hits' ); - - my ( $hsp, $hit ); - if ( $hit = $result->next_model ) { - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'lcl|Protein_ID1.3|M3', 'Check hit name' ); - is( $hit->description, - 'complement(48376..51420)', - 'Check for hit description' - ); - is( $hit->raw_score, -17.8, 'Check hit raw_score' ); - is( $hit->bits, 0, 'Check hit bits (0)' ); - float_is( $hit->significance, 3, 'Check hit significance' ); - is( $hit->num_hsps, 5, 'Check num_hsps' ); - - # Check first HSP - if ( defined( $hsp = $hit->next_domain ) ) { - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->hit->seq_id(), 'lcl|Protein_ID1.3|M3', 'Check for hit seq_id' ); - is( $hsp->query->seq_id(), 'DUF4229', 'Check for query seq_id' ); - - is( $hsp->hit->start, 305, 'Check for hit alifrom value' ); - is( $hsp->hit->end, 311, 'Check for hit ali to value' ); - is( $hsp->query->start, 34, 'Check for query hmmfrom value' ); - is( $hsp->query->end, 40, 'Check for query hmm to value' ); - is( $hsp->score, -4.3, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 1, 'Check for hsp c-Evalue' ); - - is( $hsp->length('query'), 7, 'Check for hsp query length' ); - is( $hsp->length('hit'), 7, 'Check for hsp hit length' ); - is( $hsp->length('total'), 7, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' ); - - ($hit->length == 0) ? - is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' ) - : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' ); - ($result->query_length == 0) ? - is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' ) - : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' ); - - is( $hsp->num_conserved, 6 ); - is( $hsp->num_identical, 4 ); - is( sprintf( "%.2f", $hsp->percent_identity ), 57.14 ); - is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.571 ); - is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.571 ); - is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.571 ); - is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.857 ); - is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.857 ); - is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.857 ); - - is (length($hsp->homology_string), length($hsp->query_string)); - - is( $hsp->consensus_string, - '', - 'Check for consensus structure string' - ); - is( $hsp->query_string, - 'laallAl', - 'Check for query string' - ); - is( $hsp->hit_string, - 'LAILSAI', - 'Check for hit string' - ); - is( $hsp->homology_string, - 'la+l A+', - 'Check for homology string' - ); - is( $hsp->posterior_string, - '3333332', - 'Check for posterior probability string' - ); - } - } - } - elsif ($counter == 3) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' ); - is( $result->algorithm_version, '3.0', 'Check algorithm version' ); - is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' ); - is( $result->sequence_file, 'test_seqs.seq_raw.txt', 'Check sequence_file' ); - - is( $result->query_name, 'ACR_tran', 'Check query_name' ); - is( $result->query_length, 1021, 'Check query_length' ); - is( $result->query_accession, 'PF00873.14', 'Check query_accession' ); - is( $result->query_description, - 'AcrB/AcrD/AcrF family', - 'Check query_description' - ); - is( $result->num_hits(), 1, 'Check num_hits' ); - - my ( $hsp, $hit ); - if ( $hit = $result->next_model ) { - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'lcl|Protein_ID1.3|M3', 'Check hit name' ); - is( $hit->description, - 'complement(48376..51420)', - 'Check for hit description' - ); - is( $hit->raw_score, 616.9, 'Check hit raw_score' ); - is( $hit->bits, 0, 'Check hit bits (0)' ); - float_is( $hit->significance, 9.3e-189, 'Check hit significance' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - - # Hit length is usually unknown for HMMSCAN and HMMSEARCH but not for NHMMER. - # When is not known, sometimes it can be deduced from domain data '[]' - is( $hit->length, 0, 'Check hit length absence' ); - is( $hit->frac_aligned_query, 0.93 ); - is( $hit->frac_aligned_hit, undef ); - - if ( defined( $hsp = $hit->next_domain ) ) { - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->hit->seq_id(), 'lcl|Protein_ID1.3|M3', 'Check for hit seq_id' ); - is( $hsp->query->seq_id(), 'ACR_tran', 'Check for query seq_id' ); - - is( $hsp->hit->start, 11, 'Check for hit alifrom value' ); - is( $hsp->hit->end, 1000, 'Check for hit ali to value' ); - is( $hsp->query->start, 71, 'Check for query hmmfrom value' ); - is( $hsp->query->end, 1021, 'Check for query hmm to value' ); - is( $hsp->score, 616.6, 'Check for hsp score' ); - is( $hsp->bits, 0, 'Check for hsp bits (0)' ); - float_is( $hsp->evalue, 3.9e-189, 'Check for hsp c-Evalue' ); - - is( $hsp->length('query'), 951, 'Check for hsp query length' ); - is( $hsp->length('hit'), 990, 'Check for hsp hit length' ); - is( $hsp->length('total'), 1003, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 52, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 13, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 65, 'Check for hsp total gaps' ); - - ($hit->length == 0) ? - is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' ) - : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' ); - ($result->query_length == 0) ? - is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' ) - : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' ); - - is( $hsp->num_conserved, 690 ); - is( $hsp->num_identical, 262 ); - is( sprintf( "%.2f", $hsp->percent_identity ), 26.12 ); - is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.275 ); - is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.265 ); - is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.261 ); - is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.726 ); - is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.697 ); - is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.688 ); - - is (length($hsp->homology_string), length($hsp->query_string)); - - is( $hsp->consensus_string, - 'S-TTEEEEEEEETTSEEEEEEEESTTS-HHHHHHHHHHHHHHHGGGS-HHHHHH-EEEEEEECCECEEEEEEESSSTS-HHHHHHHHHHCTHHHHHTSTTEEEEEESS.--EEEEEEE-HHHHHCTT--HHHHHHHHHHHSSB-EEEECTT-SB-EEEE-SB---SCCHHCT-EEEETTSEEEEHHHCEEEEEEESSSS-EEEETTCEEEEEEEEEETTSBHHHHHHHHHHHHHCCGGGSSTTEEEEEEEESHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHSSHCCCHHHHHHHHHHHHHHHHHHHHTT--EEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCSS-HHHHHHHHHHHHCCHHHHHHHHHHHHCCGGGGSBHHHHHHHHHHHHHHHHHHHHHHHHHHCCHHHHHHHCS----TT-CC..............................CHHHHHHHHHHHHHHHHHHHHHHHHHSCHHHHHHHHHHHHH.HHHHHCCS-BESS----TSEEEEEEE-STTC-HHHHHHHHHHHHHHHH...TTTTEEEEEEEESESSSS..E........CTTEEEEEEEE--CTTS-SCCCSHHHHHHHHHHHC.CTSTSSEEEEEE-SSSCCCSSSSSEEEEEEE.TSSSCHHHHHHHHHHHHHHHCCSTTEECEEESS-S-EEEEEEEE-HHHHHHCTB-HHHHHHHHHHHHT-..EEEEEEEETTE...EEEEEEEE-GGGSSSGGGGCC-EEEETTSE.EEECGGCEEEEEEEE-SEEEEETTCEEEEEEEEESTTS...-HHHHHHHHHHCCTT..SSTTEEEEEECHHHHHHHHCCCHHHHHHHHHHHHHHHHHHHCTSSSTCHHHHTTHHHHHHHHHHHHHHTT--BSHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCTTTBHHHHHHHHHHHHCHHHHHHHHHHHHHCCHHHHTT-STTHHHHHHHHHHHHHHHHHHHHCHHHHHHHHHHHHH', - 'Check for consensus structure string' - ); - is( $hsp->query_string, - 'gldglkyvsSqSseglssitvtFedgtdidiArqqvqnrlqeaknkLPeevqepgiskiktssseilvlavtskdgsltktdlrdlaesnikdqlsrveGVgdvqliGgsekavriwldpqklaklgltltdvvsalkeqnvqvaaGqlegqqeelliraqgrlqsaediekiivksqdgskvrlrDvAkvelgaeeeriaatlngkpavllavkklpganaievvkavkekleelketlPegveivvvydttefvrasieeVvktlleaivLvvlvlflFLqnlratlipaiavPlsllgtfavlkalglsiNlltlfgLvlAiGlvvDdAiVvvEnverkleeegekpleaalksmkeiegalvaialvllavfvPilflgGveGklfrqfaltivlaillsvlvaltltPalcallLkarkeekek..............................gffrefnrlfdalerrYekllekvlrhravvllvalllvvg.slllfvripkeflPeedegvlvtsvqlppgvsleqtekvlkqvekilk...ekpevesvfavtGfafagdta........gqnsakvfisLkpekerkeeektvealierlrkel.ekikganvellapiqlreletlsgvrlelqvklfgddleaLseareqllaalkqlpeladvrseqqedepqlqvkidrekaaalGvsiadinetlstalgg..syvndfieegr...vvkvvvqleedlrsspedlkklyvrnkkgk.mvplsavakieeekgpnsierenglrsveisgevaegd...slgeaeeavekiakqvklPagvgiewtglseqeqeagnsllllvalalllvflvLaalyeslsdpllvlltvPlalvGallalllrglelsviaqvGlilliGlavkNailivefakelrekeglsleeAileaaklRLrPiLMTalaailGvlPLalstGaGselqqplgivvlGGlvtstvLtlllvPvlYvlva', - 'Check for query string' - ); - is( $hsp->hit_string, - 'TVNDIEHIESQSLFGYGIVKIFFQPDVDIRTANAQVTAISQTVLKQMPPGITPPLILNYNAATVPILQLALSSK--VLSEDRIFDLGQNFIRPQLATVRGSAVPSPYGGKVRQIQIDLDPQAMQSKRVSPDDVARALSQQNLVLSPGTEKIGSFEYNVKINDSPDEFTLLNNLPIKNVGGVTIFIHDVAHVRDGFPPQINVVRDDGRRSVLMTILKNGATSTLDIIQGTKELIPKLKETLPNNLVLKVVGDQSIFVKSAISGVVREGTIAGILTSVMILLFLGSWRSTIIISMSIPLAILSAIIFLSLTGNTLNVMTLGGLALAVGMLVDDATVVIENINHHLEM-GKPTTKAIIDAARQIIQPALVSTLSICIVFVPMFSLTGVPRYLFIPMAEAVIFGMLSSFVLSQTFVPTVANKLLKYQTQHFKHehhtdahrpehdpnfkvhrsvkasifqffiNIQQGFEKRFTKVRLVYRSILHFALDHRKKFITLFLGFVIVsCVTLFPLLGKNFFPEVDSGDMKIHIRVQVGTRIEETAKQFDLIENTIRrlvPQNELDTIVDNIGLSVSGINTaysstgtiGPQDGDILIHLNEN------HHPTKEYMKKLRETLpRAFPGVS-FAFLPADITSQILNFGVPAPIDIRVDGPNHDNNLKFVRAILKDIRNVPGIADLRVQQATNYPQFNVDIDRSQAKNYGLTEGDITNSLVATLAGtsQVAPTFWLNNKngvSYPIVIQMPQYKINSLADLANIPITTKESSsMQVLGGLGSIERDQSDSVISHYNIKPSFDIFASLQGRDlgsISGDIETIIQHHHQE--LPKGVSVKLQGQVPIMQDSYRGLSLGLVASIILIYFLVVVNFESWLDPFVIITALPAALAGIVWMLYLTGTTLSVPALTGAIMCMGVATANSILVISFARERLA-IVKDSTQAALEAGYTRFRPVLMTASAMLIGMIPMALGLGDGGEQNAPLGRAVIGGLLLATIATLIFVPVVFSVVH', - 'Check for hit string' - ); - is( $hsp->homology_string, - ' ++ +++++SqS g + + F+ + di A+ qv++ q + +++P ++++p i +++ +il+la++sk l++ + dl ++ i++ql+ v G + +Gg+ ++++i ldpq++++ +++++dv++al++qn + G+ + + e+++++++ + ++++ +k+ g + ++DvA+v +g + ++++ +g vl+++ k ++++++ ke +++lketlP+++ ++vv d++ fv+++i+ Vv + +a +L ++++lFL+++r+t+i+ +++Pl++l ++++l++ g ++N++tl+gL+lA+G++vDdA Vv+En+ +le+ g+ +a++ ++++i + + ++l++++vfvP+++l+Gv lf ++a ++++ +l s +++ t++P ++ lLk + ++ ++ ++ + f++ f ++ Y+ +l++ l hr+ ++++l +v++ ++ lf+ ++k+f+Pe d g++ ++++++ g+ +e+t+k + +e++++ ++e + ++ G + +g + g++ +++ i+L ++ ++ ++ +++lr+ l ++++g++ +++ p +++ + gv + ++ + g ++++ + ++++l+ ++++p++ad+r++q ++ pq++v+idr +a+++G++ di + l + l g +++ +f +++ + +v+q+++ + +s+ dl+++++++k++ m l+ + +ie+ ++ + i+++n ++s+ i ++++ +d ++g++e+++++ +++ lP+gv+++ +g+ q ++ l+l ++++++l++++ + +es++dp+++++ +P al+G + l+l+g++lsv a+ G i+ +G+a N il+++fa+e + ++ +A+lea+ +R+rP+LMTa a+++G++P+al+ G+G e plg +v+GGl+++t+ tl +vPv++ +v+', - 'Check for homology string' - ); - is( $hsp->posterior_string, - '578899********************************************************************..*****************************************************************************************************************************************************************************************************************************************************************************.***************************************************************************8776544446799********************9655555578*************************999999887775899******************************************8875446889999999999888774331111111134445555555444......45688999999999945678887.7888999*999************************************************************************8877666655434556776544422279***********************998764889*******************************8876222578999999999888..********************************************************************************************************.888899*****************************************************************9997', - 'Check for posterior probability string' - ); - } - } - } -} - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmscan_multi_domain.out'), - -verbose => 1 -); - -my @multi_hits = ( - [ 'PPC', - 'Bacterial pre-peptidase C-terminal domain', - '111.0', 3.1e-32, 6, - [ [ 4, 59, 117, 183, 0.5, 0.16 ], - [ 12, 58, 347, 388, -0.6, 0.36 ], - [ 1, 69, 470, 549, 71.3, 1.3e-23 ], - [ 15, 25, 582, 603, -3.2, 2 ], - [ 13, 36, 987, 1019, -1.1, 0.5 ], - [ 1, 69, 1087, 1168, 54.4, 2.4e-18 ] - ] - ], - [ 'HemolysinCabind', - 'Hemolysin-type calcium-binding repeat (2 copies)', - '47.9', 4.7e-13, 3, - [ [ 2, 13, 1214, 1225, 5.9, 0.0026 ], - [ 1, 18, 1231, 1248, 10.8, 6.8e-5 ], - [ 4, 18, 1243, 1257, 11.4, 4.3e-05 ] - ] - ] -); - -while ( $result = $searchio->next_result ) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMSCAN', 'Check algorithm' ); - is( $result->algorithm_version, '3.0', 'Check algorithm version' ); - is( $result->hmm_name, - '/data/biodata/HMMerDB/Pfam-A.hmm', - 'Check hmm_name' - ); - is( $result->sequence_file, 'BA000019.orf37.fasta', - 'Check sequence_file' ); - is( $result->query_name, 'BA000019.orf37', 'Check query_name' ); - is( $result->query_length, '1418', 'Check query_length' ); - is( $result->query_description, '', 'Check query_description' ); - is( $result->num_hits(), 2, 'Check num_hits' ); - my ( $hsp, $hit ); - - while ( $hit = $result->next_model ) { - if ($hit->name eq 'HemolysinCabind') { - # Hit length is usually unknown for HMMSCAN and HMMSEARCH but not for NHMMER. - # When is not known, sometimes it can be deduced from domain data '[]' - is( $hit->length, 18, 'Check hit length' ); - is( $hit->frac_aligned_query, 0.03 ); - is( $hit->frac_aligned_hit, '1.00' ); - } - my @expected = @{ shift @multi_hits }; - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, shift @expected, 'Check hit name' ); - is( $hit->description, shift @expected, 'Check for hit description' ); - is( $hit->raw_score, shift @expected, 'Check hit raw_score' ); - float_is( - $hit->significance, - shift @expected, - 'Check hit significance' - ); - is( $hit->num_hsps, shift @expected, 'Check num_hsps' ); - my @hsp_list = @{ shift @expected }; - - while ( defined( $hsp = $hit->next_domain ) ) { - my @hsp_exp = @{ shift @hsp_list }; - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->hit->start, - shift @hsp_exp, - 'Check for hit envfrom value' - ); - is( $hsp->hit->end, shift @hsp_exp, - 'Check for hit env to value' ); - is( $hsp->query->start, - shift @hsp_exp, - 'Check for query hmmfrom value' - ); - is( $hsp->query->end, - shift @hsp_exp, - 'Check for query hmm to value' - ); - is( $hsp->score, shift @hsp_exp, 'Check for hsp score' ); - float_is( $hsp->evalue, shift @hsp_exp, - 'Check for hsp c-Evalue' ); - } - } -} - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmscan_sec_struct.out'), - -verbose => 1 -); - -@multi_hits = ( - [ 'HTH_AraC', - 'Bacterial regulatory helix-turn-helix proteins, AraC family', - '41.3', 6.7e-11, 2, - [ [ 'siadiAeevgfSpsyfsrlFkkytGvt', 'SLMELSRQVGLNDCTLKRGFRLVFDTT' ], - [ 'nwsiadiAeevgf-SpsyfsrlFkkytGvtPsqyr', - 'EINISQAARRVGFsSRSYFATAFRKKFGINPKEFL' - ] - ] - ], - [ 'PKSI-KS_m3', - '', '38.2', 3.8e-12, 2, - [ [ 'GPSvtVDTACSSSLvA', 'GPSVTVDTLCSSSLVA' ], - [ 'GPSvtVDTACSSSLv', 'GPNLVIDSACSSALV' ] - ] - ], - [ 'DUF746', - 'Domain of Unknown Function (DUF746)', - '13.9', 0.023, 2, - [ [ 'rllIrlLsqplslaeaadqlgtdegiiak', - 'EILIRNLENPPSLMELSRQVGLNDCTLKR' - ], - [ 'plslaeaadqlgtdeg', 'EINISQAARRVGFSSR' ] - ] - ] -); - -my $result_counter = 0; -while ( $result = $searchio->next_result ) { - $result_counter++; - if ($result_counter == 1) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMSCAN', 'Check algorithm' ); - is( $result->algorithm_version, '3.0', 'Check algorithm version' ); - is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' ); - is( $result->sequence_file, 'BA000019.orf8.fasta', 'Check sequence_file' ); - is( $result->query_name, 'BA000019.orf8', 'Check query_name' ); - is( $result->query_length, 348, 'Check query_length' ); - is( $result->query_description, '', 'Check query_description' ); - is( $result->num_hits(), 3, 'Check num_hits' ); - my ( $hsp, $hit ); - - while ( $hit = $result->next_model ) { - if ($hit->name eq 'PKSI-KS_m3') { - # Hit length is usually unknown for HMMSCAN and HMMSEARCH but not for NHMMER. - # When is not known, sometimes it can be deduced from domain data '[]' - is( $hit->length, 16, 'Check hit length' ); - is( $hit->frac_aligned_query, 0.09 ); - is( $hit->frac_aligned_hit, '1.00' ); - } - my @expected = @{ shift @multi_hits }; - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, shift @expected, 'Check hit name' ); - is( $hit->description, shift @expected, 'Check for hit description' ); - is( $hit->raw_score, shift @expected, 'Check hit raw_score' ); - float_is( - $hit->significance, - shift @expected, - 'Check hit significance' - ); - is( $hit->num_hsps, shift @expected, 'Check num_hsps' ); - my @hsp_list = @{ shift @expected }; - - while ( defined( $hsp = $hit->next_domain ) ) { - my @hsp_exp = @{ shift @hsp_list }; - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->hit_string, shift @hsp_exp, 'Check hit sequence' ); - is( $hsp->query_string, shift @hsp_exp, 'Check query sequence' ); - } - } - } - elsif ($result_counter == 2) { - is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' - ); - is( $result->algorithm, 'HMMSCAN', 'Check algorithm' ); - is( $result->algorithm_version, '3.0', 'Check algorithm version' ); - is( $result->query_name, 'lcl|aorf_00010|P1', 'Check query_name' ); - is( $result->query_length, 132, 'Check query_length' ); - is( $result->query_description, 'IS481.original transposase', 'Check query_description' ); - is( $result->num_hits(), 1, 'Check num_hits' ); - my ( $hsp, $hit ); - - while ( $hit = $result->next_model ) { - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'IS481.original.hmm', 'Check hit name' ); - is( $hit->description, '', 'Check for hit description' ); - is( $hit->raw_score, '130.0', 'Check hit raw_score' ); - float_is( $hit->significance, 3.4e-040, 'Check hit significance' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - - while ( defined( $hsp = $hit->next_domain ) ) { - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->query_string, - 'GEIETAHPSYLGSQDTFYVGNITGAGR----------------------------IYQQTFVDTYSKWDSTKLYTTKTPITAADLLNDRVLSFFA-EQGMGIIRLLTDRSTEYCSKA--ETQDYELCLALNDIEHTKTKVYHPQTNDICRRFHKA', - 'Check for query string' - ); - is( $hsp->hit_string, - 'kRYErdhPgeLvhmDvkklgripdgGgvkighRwrgrtrgrgkrtnqsrnrglgkayvitaiDDhSRfayaeilsdettttaadfllraaayfygkigeeiitrvlTDnGaayrskkrsakhdFqealaelGIkhilTrprsPqTNGKiERFhrT', - 'Check for hit string' - ); - is( $hsp->homology_string, - '+++E++hP +L+++D++++g+i + G+ +y++t++D++S+ +++++++t++taad l++ ++ f+ ++++i r lTD+ ++y+sk ++ d+ +la ++I+h++T++++PqTN ++ RFh+ ', - 'Check for homology string' - ); - is( $hsp->posterior_string, - '579*******************88888............................****************************************.********************8..**********************************95', - 'Check for posterior probability string' - ); - } - } - } -} - -# Make sure that you can also directly call the hmmer2 and hmmer3 subclasses -$searchio = Bio::SearchIO->new( - -format => 'hmmer2', - -file => test_input_file('hmmpfam.out') -); -is( ref($searchio), 'Bio::SearchIO::hmmer2', - 'Check if loading hmmpfam output via the hmm2 parser directly works' ); -is( ref( $searchio->next_result ), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' -); - -$searchio = Bio::SearchIO->new( - -format => 'hmmer2', - -file => test_input_file('hmmsearch.out') -); -is( ref($searchio), 'Bio::SearchIO::hmmer2', - 'Check if loading hmmsearch2 output via the hmm2 parser directly works' ); -is( ref( $searchio->next_result ), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' -); - -$searchio = Bio::SearchIO->new( - -format => 'hmmer3', - -file => test_input_file('hmmscan.out') -); -is( ref($searchio), 'Bio::SearchIO::hmmer3', - 'Check if loading hmmscan output via the hmm3 parser directly works' ); -is( ref( $searchio->next_result ), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' -); - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmsearch3.out') -); -is( ref($searchio), 'Bio::SearchIO::hmmer3', - 'Check if loading hmmsearch3 output via the hmm3 parser directly works' ); -is( ref( $searchio->next_result ), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' -); - -# Make sure that you can also specify the -version parameter directly -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmpfam.out'), - -version => 2 -); -is( ref($searchio), 'Bio::SearchIO::hmmer2', - 'Check if selecting the correct hmmpfam parser using -version works' ); -is( ref( $searchio->next_result ), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' -); - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmsearch.out'), - -version => 2 -); -is( ref($searchio), 'Bio::SearchIO::hmmer2', - 'Check if selecting the correct hmmsearch2 parser using -version works' ); -is( ref( $searchio->next_result ), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' -); - -$searchio = Bio::SearchIO->new( - -format => 'hmmer3', - -file => test_input_file('hmmscan.out'), - -version => 3 -); -is( ref($searchio), 'Bio::SearchIO::hmmer3', - 'Check if selecting the correct hmmscan parser using -version works' ); -is( ref( $searchio->next_result ), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' -); - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmsearch3.out'), - -version => 3 -); -is( ref($searchio), 'Bio::SearchIO::hmmer3', - 'Check if selecting the correct hmmsearch3 parser using -version works' ); -is( ref( $searchio->next_result ), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' -); - -my $cat_command = ($^O =~ m/mswin/i) ? 'type' : 'cat'; -my $pipestr = "$cat_command " . test_input_file('hmmpfam.out') . " |"; -open( my $pipefh, $pipestr ); - -$searchio = Bio::SearchIO->new( - -format => 'hmmer', - -fh => $pipefh -); -is( ref($searchio), 'Bio::SearchIO::hmmer2', - 'Check if reading from a pipe works' ); -$result = $searchio->next_result; -is( ref($result), - 'Bio::Search::Result::HMMERResult', - 'Check for the correct result reference type' -); -is( $result->num_hits(), 2, 'Check num_hits' ); - -# bug 3376 -{ - my $in = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('pfamOutput-bug3376.out') - ); - my $result = $in->next_result; - my $hit = $result->next_hit; - my $hsp = $hit->next_hsp; - is( $result->query_length, 97, 'Check query_length' ); - is( $hit->length, 95, 'Check hit length' ); - is( $hsp->hit_string, - 'svfqqqqssksttgstvtAiAiAigYRYRYRAvtWnsGsLssGvnDnDnDqqsdgLYtiYYsvtvpssslpsqtviHHHaHkasstkiiikiePr', - 'bug3376' - ); -} -# end bug 3376 - -# bug 3421 - making sure a full line of dashes in an HSP is parsed correctly -{ - my $in = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmpfam_HSPdashline.txt') - ); - my $result = $in->next_result; - my $hit = $result->next_hit; - my $hsp = $hit->next_hsp; - is( $hsp->length, '561', - 'bug3421 - Check if can correctly parse an HSP with line full of dashes' - ); -} -# end bug 3421 - -# bug 3302 -{ - my $in = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('hmmpfam_multiresult.out') - ); - my $result = $in->next_result; - $result = $in->next_result; - my $hit = $result->next_hit; - is( $hit->name, 'IS66_ORF3.uniq', 'bug3302 - Check if can parse multiresult hmmer' ); -} -# end bug 3302 - -# HMMER 3.1 nhmmer output -{ - my $in = Bio::SearchIO->new( - -format => 'hmmer', - -version => 3, - -file => test_input_file('nhmmer-3.1.out') - ); - my $result = $in->next_result; - is( $result->algorithm, 'NHMMER', 'Check algorithm' ); - is( $result->algorithm_version, '3.1b1', 'Check nhmmer algorithm version' ); - is( $result->hmm_name, - '../HMMs/A_HA_H7_CDS_nucleotide.hmm', - 'Check hmm_name' - ); - is( $result->sequence_file, - 'tmp.fa', - 'Check sequence_file' - ); - is( $result->query_name, 'A_HA_H7_CDS_nucleotide', 'Check query_name' ); - is( $result->query_length, 1683, 'Check query_length' ); - is( $result->query_accession, '', 'Check query_accession' ); - is( $result->query_description, '', 'Check query_description' ); - is( $result->num_hits(), 2, 'Check num_hits' ); - - my $hit = $result->next_hit; - is( ref($hit), 'Bio::Search::Hit::HMMERHit', - 'Check for the correct hit reference type' ); - is( $hit->name, 'seq1', 'Check nhmmer hit name' ); - is( $hit->description, 'Description of seq1', 'Check nhmmer hit description' ); - is( $hit->score, 148.2, 'Check nhmmer hit score' ); - is( $hit->bits, 0, 'Check nhmmer hit bits (0)' ); - float_is( $hit->significance, 3.2e-48, 'Check nhmmer hit significance' ); - is( $hit->num_hsps, 1, 'Check num_hsps' ); - - # Hit length is usually unknown for HMMSCAN and HMMSEARCH but not for NHMMER. - # When is not known, sometimes it can be deduced from domain data '[]' - is( $hit->length, 151, 'Check nhmmer hit length' ); - is( $hit->frac_aligned_query, 0.09 ); - is( $hit->frac_aligned_hit, '1.00' ); - - my $hsp = $hit->next_hsp; - is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', - 'Check for correct hsp reference type' ); - is( $hsp->hit->seq_id(), 'seq1', 'Check for nhmmer hit seq_id' ); - is( $hsp->query->seq_id(), 'A_HA_H7_CDS_nucleotide', 'Check for nhmmer query seq_id' ); - - is( $hsp->start('hit'), 1, 'Check nhmmer hsp hit start' ); - is( $hsp->end('hit'), 151, 'Check nhmmer hsp hit end' ); - is( $hsp->start('query'), 258, 'Check nhmmer hsp query start' ); - is( $hsp->end('query'), 411, 'Check nhmmer hsp query end' ); - is( $hsp->strand('hit'), 1, 'Check nhmmer hsp hit strand' ); - is( $hsp->strand('query'), 1, 'Check nhmmer hsp query strand' ); - is( $hsp->score, 148.2, 'Check nhmmer hsp score' ); - is( $hsp->bits, 0, 'Check nhmmer hsp bits (0)' ); - float_is( $hsp->significance, 3.2e-48, 'Check nhmmer hsp evalue' ); - - is( $hsp->length('query'), 154, 'Check for hsp query length' ); - is( $hsp->length('hit'), 151, 'Check for hsp hit length' ); - is( $hsp->length('total'), 154, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 3, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 3, 'Check for hsp total gaps' ); - - ($hit->length == 0) ? - is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' ) - : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' ); - ($result->query_length == 0) ? - is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' ) - : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' ); - - is( $hsp->num_conserved, 151 ); - is( $hsp->num_identical, 146 ); - is( sprintf( "%.2f", $hsp->percent_identity ), 94.81 ); - is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.948 ); - is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.967 ); - is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.948 ); - is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.981 ); - is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), '1.000' ); - is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.981 ); - - is( $hsp->consensus_string, - '', - 'Check for consensus structure string' - ); - is( $hsp->query_string, - 'attcctagaattttcagctgatttaattattgagaggcgagaaggaagtaatgatgtctgttatcctgggaaattcgtaaatgaagaagctctgaggcaaattctcagggggtcaggcggaattgacaaggagacaatgggattcacatatagc', - 'Check for nhmmer query string' - ); - is( $hsp->homology_string, - 'attcctagaattttcagc+gatttaattattgagaggcgagaaggaagt gatgtctgttatcctgggaaattcgt+aatgaagaagctctgaggcaaattctcaggg+gtcaggcggaattgacaaggagacaatgggattcac+ta+agc', - 'Check for nhmmer homology string' - ); - is( $hsp->hit_string, - 'ATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGT---GATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACCTACAGC', - 'Check for nhmmer hit string' - ); - is( $hsp->posterior_string, - '689*******************************************777...***************************************************************************************************986', - 'Check for nhmmer posterior probability string' - ); - is( length( $hsp->homology_string ), - length( $hsp->hit_string ), - 'Check if nhmmer homology string and hit string have an equal length' - ); - is( length( $hsp->query_string ), - length( $hsp->homology_string ), - 'Check if nhmmer query string and homology string have an equal length' - ); - - $hit = $result->next_hit; - is( $hit->name, 'seq2', 'Check nhmmer hit name' ); - is( $hit->description, 'Description of seq2', 'Check nhmmer hit description' ); - is( $hit->score, 38.6, 'Check nhmmer hit score' ); - is( $hit->bits, 0, 'Check nhmmer hit bits (0)' ); - float_is( $hit->significance, 3.9e-15, 'Check nhmmer hit significance' ); - is( $hit->length, 60, 'Check nhmmer hit length' ); - - $hsp = $hit->next_hsp; - is( $hsp->hit->seq_id(), 'seq2', 'Check for nhmmer hit seq_id' ); - is( $hsp->query->seq_id(), 'A_HA_H7_CDS_nucleotide', 'Check for nhmmer query seq_id' ); - - is( $hsp->start('query'), 34, 'Check nhmmer hsp query start' ); - is( $hsp->end('query'), 92, 'Check nhmmer hsp query end' ); - is( $hsp->start('hit'), 1, 'Check nhmmer hsp hit start' ); - is( $hsp->end('hit'), 59, 'Check nhmmer hsp hit end' ); - is( $hsp->strand('hit'), -1, 'Check nhmmer hsp hit strand' ); - is( $hsp->strand('query'), 1, 'Check nhmmer hsp query strand' ); - is( $hsp->score, 38.6, 'Check nhmmer hsp score' ); - is( $hsp->bits, 0, 'Check nhmmer hsp bits (0)' ); - float_is( $hsp->significance, 3.9e-15, 'Check nhmmer hsp evalue' ); - - is( $hsp->length('query'), 59, 'Check for hsp query length' ); - is( $hsp->length('hit'), 59, 'Check for hsp hit length' ); - is( $hsp->length('total'), 59, 'Check for hsp total length' ); - is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' ); - is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' ); - is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' ); - - ($hit->length == 0) ? - is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' ) - : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' ); - ($result->query_length == 0) ? - is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' ) - : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' ); - - is (length($hsp->homology_string), length($hsp->query_string)); - - is( $hsp->consensus_string, - '', - 'Check for consensus structure string' - ); - is( $hsp->query_string, - 'gtgatgattgcaacaaatgcagacaaaatctgccttgggcaccatgctgtgtcaaacgg', - 'Check for nhmmer query string' - ); - is( $hsp->homology_string, - 'g+gat+att+c+acaaatgcagacaa atctgccttgggca+catgc+gtgtcaaacgg', - 'Check for nhmmer homology string' - ); - is( $hsp->hit_string, - 'GCGATCATTCCGACAAATGCAGACAAGATCTGCCTTGGGCATCATGCCGTGTCAAACGG', - 'Check for nhmmer hit string' - ); - is( $hsp->posterior_string, - '6899****************************************************986', - 'Check for nhmmer posterior probability string' ); - is( length( $hsp->homology_string ), - length( $hsp->hit_string ), - 'Check if nhmmer homology string and hit string have an equal length' - ); - is( length( $hsp->query_string ), - length( $hsp->homology_string ), - 'Check if nhmmer query string and homology string have an equal length' - ); -} -# end HMMER 3.1 nhmmer output - -# Test HIT filtering by SIGNIFICANCE -$searchio = Bio::SearchIO->new( - '-format' => 'hmmer', - '-file' => test_input_file('hmmpfam_cs.out'), - '-signif' => 1e-100 -); -# NOTE: For Hmmer2, if a single model pass the HIT filter -# but it shows 2 domains, it counts as 2 hits (Glu_synthase) -my @valid = qw( GATase_2 - Glu_syn_central - Glu_synthase - Glu_synthase - GXGXG ); -$result = $searchio->next_result; -is( $result->num_hits(), 5, 'Check Significance filtered num_hits' ); -while ( my $hit = $result->next_hit ) { - is( $hit->name, shift @valid, 'Check Significance filtered hit ID' ); -} -is( @valid, 0 ); - -# Test HIT filtering by SCORE -$searchio = Bio::SearchIO->new( - '-format' => 'hmmer', - '-file' => test_input_file('hmmsearch.out'), - '-score' => 390 -); -# NOTE: This Hmmer2 report top hit (score 393.8) have 4 domains, -# so it count as 4 hits (PAB2_ARATH) -@valid = qw( PAB2_ARATH - PAB2_ARATH - PAB2_ARATH - PAB2_ARATH ); -$result = $searchio->next_result; -is( $result->num_hits(), 4, 'Check Score filtered num_hits' ); -while ( my $hit = $result->next_hit ) { - is( $hit->name, shift @valid, 'Check Score filtered hit ID' ); -} -is( @valid, 0 ); - -# Test HIT filtering by BITS -$searchio = Bio::SearchIO->new( - '-format' => 'hmmer', - '-file' => test_input_file('hmmsearch3_multi.out'), - '-bits' => 10 -); -# NOTE: No HMMER report use Bits, so this will filter out everything -$result = $searchio->next_result; -is( $result->num_hits(), 0, 'Check Bits filtered num_hits' ); -$result = $searchio->next_result; -is( $result->num_hits(), 0, 'Check Bits filtered num_hits' ); -$result = $searchio->next_result; -is( $result->num_hits(), 0, 'Check Bits filtered num_hits' ); - -# Test HIT filtering by HIT_FILTER -my $filt_func = sub { - my $hit = shift; - $hit->frac_aligned_query >= 0.20; -}; -$searchio = Bio::SearchIO->new( - '-format' => 'hmmer', - '-file' => test_input_file('hmmscan_multi_domain.out'), - '-hit_filter' => $filt_func -); -# NOTE: In Hmmer3 reports, the multiple domains of a model are treated -# as HSPs instead of Hits (like it is in Hmmer2 reports) -@valid = qw( PPC ); -$result = $searchio->next_result; -is( $result->num_hits(), 1, 'Check Hit_filter filtered num_hits' ); -while ( my $hit = $result->next_hit ) { - is( $hit->name, shift @valid, 'Check Hit_filter filtered hits ID' ); -} -is( @valid, 0 ); - -# Test for correct parsing of results from query sequences containing stops. -# Without the patch, parsing dies with "Quantifier follows nothing in regex;" error -$searchio = Bio::SearchIO->new( - '-format' => 'hmmer', - '-file' => test_input_file('hmmscan_qry_stop.txt'), -); -eval { $searchio->next_result; }; -is( $@, '', 'Correct parsing of alignments with stops' ); - - -# Test for correct parsing of phmmer results -# Without the patch, parsing skips all lines from phmmer output -{ - my $searchio = Bio::SearchIO->new( - -format => 'hmmer', - -file => test_input_file('phmmer.out') - ); - - my $result = $searchio->next_result; - if ( defined $result ) { - - is( $result->algorithm, 'PHMMER', 'Check algorithm' ); - is( $result->query_name, 'A0R3R7', 'Check query_name' ); - is( $result->query_length, 762, 'Check query_length absence' ); - is( $result->query_description, '', 'Check query_description' ); - is( $result->num_hits(), 8, 'Check num_hits' ); - - my $hit = $result->next_model; - if ( defined $hit ) { - is( $hit->name, 'cath|4_0_0|1vs0A03/639-759', 'query name okay' ); - is( $hit->num_hsps(), 1, 'Check num_hsps' ); - } - } - -} diff --git a/t/SearchIO/hmmer_pull.t b/t/SearchIO/hmmer_pull.t deleted file mode 100644 index 00b9f7d4d..000000000 --- a/t/SearchIO/hmmer_pull.t +++ /dev/null @@ -1,219 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id: SearchIO_hmmer_pull.t 14984 2008-11-11 18:39:20Z sendu $ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 290); - - use_ok('Bio::SearchIO'); -} - -my $searchio = Bio::SearchIO->new(-format => 'hmmer_pull', -file => test_input_file('hmmpfam_fake.out'), -verbose => -1); -my @data = ([qw(roa1_drome roa2_drome)], [2, 1], [1, 2], [2, 1]); -while (my $result = $searchio->next_result) { - is ref($result), 'Bio::Search::Result::HmmpfamResult'; - is $result->algorithm, 'HMMPFAM'; - is $result->algorithm_version, '2.1.1'; - is $result->hmm_name, 'pfam'; - is $result->hmm_file, $result->hmm_name; - is $result->database_name, $result->hmm_name; - is $result->sequence_file, '/home/birney/src/wise2/example/road.pep'; - is $result->sequence_database, $result->sequence_file; - is $result->query_name, shift @{$data[0]}; - is $result->num_hits(), shift @{$data[1]}; - is $result->no_hits_found, 0; - - is $result->query_accession, ''; - is $result->query_description, ''; - ok ! $result->query_length; - ok ! $result->database_letters; - ok ! $result->database_entries; - is $result->algorithm_reference, ''; - is $result->get_parameter('test'), undef; - is $result->available_parameters, undef; - is $result->get_statistic('test'), undef; - is $result->available_statistics, undef; - - my @orig_order = $result->hits; - is @orig_order, shift @{$data[3]}; - if (@orig_order > 1) { - isnt $orig_order[0]->name, $orig_order[1]->name; - $result->sort_hits(sub{$Bio::Search::Result::HmmpfamResult::a->[2] - <=> - $Bio::Search::Result::HmmpfamResult::b->[2]}); - my @hits = $result->hits; - is @hits, @orig_order; - is $hits[0]->name, $orig_order[1]->name; - $result->sort_hits(sub{$Bio::Search::Result::HmmpfamResult::b->[4] - <=> - $Bio::Search::Result::HmmpfamResult::a->[4]}); - } - - my @hit_data = ([qw(SEED TEST)], [146.1, "5.0"], [6.3e-40, 7.2], [2, 1], [77, undef], [2, 0], [1, 2], - ["33 34 36 38 43 45 47 48 51 53 55 57 58 65 68 71 73 74 76 88 98 99 124 125 126 127 129 132 135 140 142 145 146 148 149 151 153 154 156 157 158 159 160 161 164 165 166 167 168 169 170 178 187 189 194", ''], - ["1 2 3 4 6 9 11 12 13 15 16 17 19 21 22 23 25 26 28 30 31 33 39 40 41 42 43 44 46 47 48 49 50 51 52 60 61 70 72 73 77", ''], - ["1-6 8-13 15-23 25-33 39-56 58-63 67-77", '']); - while (defined(my $hit = $result->next_model)) { - is ref($hit), 'Bio::Search::Hit::HmmpfamHit'; - is $hit->name, shift @{$hit_data[0]}; - is $hit->raw_score, shift @{$hit_data[1]}; - is $hit->score, $hit->raw_score; - float_is $hit->significance, shift @{$hit_data[2]}; - float_is $hit->p, $hit->significance; - is $hit->num_hsps, shift @{$hit_data[3]}; - is $hit->n, $hit->num_hsps; - is $hit->algorithm, $result->algorithm; - is $hit->overlap, 0; - is $hit->rank, shift @{$hit_data[6]}; - is $hit->tiled_hsps, 0; - is $hit->strand('query'), 1; - is $hit->strand('hit'), 1; - my @strands = $hit->strand; - is "@strands", "1 1"; - - is $hit->description, undef; - is $hit->accession, undef; - ok ! $hit->locus; - ok ! $hit->bits; - ok ! $result->logical_length('query'); - ok ! $result->frame; - is $hit->each_accession_number, undef; - - is $hit->length, shift @{$hit_data[4]}; - is $hit->logical_length('hit'), $hit->length; - - if ($result->query_name eq 'roa1_drome') { - my @inds = $hit->seq_inds('query', 'identical'); - is "@inds", shift @{$hit_data[7]}; - @inds = $hit->seq_inds('hit', 'identical'); - is "@inds", shift @{$hit_data[8]}; - @inds = $hit->seq_inds('hit', 'conserved', 1); - is "@inds", shift @{$hit_data[9]}; - } - - if ($hit->name eq 'SEED') { - my $best = $hit->hsp('best'); - float_is($best->evalue, 1.1e-18); - my $worst = $hit->hsp('worst'); - float_is($worst->evalue, 2.2e-17); - is $hit->start('query'), 33; - is $hit->start('hit'), 1; - is $hit->end('query'), 194; - is $hit->end('hit'), 77; - my @range = $hit->range('query'); - is "@range", '33 194'; - @range = $hit->range('hit'); - is "@range", '1 77'; - - if ($hit->query_name eq 'roa1_drome') { - is $hit->length_aln('query'),142; - is $hit->length_aln('hit'), 77; - is $hit->gaps('total'), 14; - is $hit->gaps('query'), 13; - is $hit->gaps('hit'), 1; - is $hit->matches('id'), 41; - is $hit->matches('cons'), 24; - is $hit->frac_identical, 0.387; - is $hit->frac_conserved, 0.169; - ok ! $hit->frac_aligned_query; - is $hit->frac_aligned_hit, '1.00'; - is $hit->num_unaligned_hit, 1; - is $hit->num_unaligned_query, 13; - } - } - - my @hsps = $hit->hsps; - is @hsps, shift @{$hit_data[5]}; - - my @hsp_data = ([1, 1], [77, 77], [33, 124], [103, 194], [71.2, 75.5], [2.2e-17, 1.1e-18], - ['LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP', - 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV'], - [7, 6], - ['lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG.kelggrklrv', - 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv'], - ['lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ', - 'lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +GfaFVeF++++ ++k + ++l+g+ + v'], - [1, 0], [8, 6], [1, 2], ['33 103', '124 194'], [78, 77], [22, 33], [33, 23], - ['0.3099', '0.4648'], ['0.2857', '0.4286'], ['0.2821', '0.4286']); - - while (defined(my $hsp = $hit->next_domain)) { - is ref($hsp), 'Bio::Search::HSP::HmmpfamHSP'; - is $hsp->hit->start, shift @{$hsp_data[0]}; - is $hsp->hit->end, shift @{$hsp_data[1]}; - is $hsp->query->start, shift @{$hsp_data[2]}; - is $hsp->query->end, shift @{$hsp_data[3]}; - is $hsp->start('hit'), $hsp->hit->start; - is $hsp->end('hit'),$hsp->hit->end; - is $hsp->start('query'), $hsp->query->start; - is $hsp->end('query'), $hsp->query->end; - is $hsp->strand('hit'), 1; - is $hsp->strand('query'), 1; - is $hsp->score, shift @{$hsp_data[4]}; - ok ! $hsp->bits; - float_is($hsp->evalue, shift @{$hsp_data[5]}); - ok ! $hsp->pvalue; - float_is($hsp->significance, $hsp->evalue); - is $hsp->algorithm, $result->algorithm; - is $hsp->rank, shift @{$hsp_data[12]}; - my @range = $hsp->range; - is "@range", shift @{$hsp_data[13]}; - is $hsp->n, $hit->num_hsps; - is $hsp->length('query'), 71; - is $hsp->length('hit'), 77; - my $locseq = $hsp->seq('hit'); - - if ($result->query_name eq 'roa1_drome') { - is ref($locseq), 'Bio::LocatableSeq'; - my $aln = $hsp->get_aln('hit'); - is ref($aln), 'Bio::SimpleAlign'; - is $hsp->query_string, shift @{$hsp_data[6]}; - is $hsp->gaps('query'), shift @{$hsp_data[7]}; - is $hsp->gaps('hit'), shift @{$hsp_data[10]}; - is $hsp->gaps('total'), shift @{$hsp_data[11]}; - is $hsp->hit_string, shift @{$hsp_data[8]}; - is $hsp->homology_string, shift @{$hsp_data[9]}; - is $hsp->seq_str('hit'), $hsp->hit_string; - is $hsp->seq_str('query'), $hsp->query_string; - is $hsp->seq_str('homology'), $hsp->homology_string; - is length($hsp->homology_string), length($hsp->hit_string); - is length($hsp->query_string), length($hsp->homology_string); - is $hsp->length('total'), shift @{$hsp_data[14]}; - is $hsp->hsp_length, $hsp->length('total'); - is $hsp->num_identical, shift @{$hsp_data[15]}; - is $hsp->num_conserved, shift @{$hsp_data[16]}; - is $hsp->frac_identical('query'), shift @{$hsp_data[17]}; - is $hsp->frac_identical('hit'), shift @{$hsp_data[18]}; - is $hsp->frac_identical('total'), shift @{$hsp_data[19]}; - } - } - } -} - -is $searchio->result_count, 2; - -# bug revealed by bug 2632 - CS lines were already ignored, but we couldn't -# parse alignments when HSPs weren't in simple order!! -$searchio = Bio::SearchIO->new(-format => 'hmmer_pull', -file => test_input_file('hmmpfam_cs.out'), -verbose => 1); -my $result = $searchio->next_result; -my $hit = $result->next_hit; -my $hsp = $hit->next_hsp; -is $hsp->seq_str, "IPPLLAVGAVHHHLINKGLRQEASILV"; - -# and another bug revealed: we don't always know the hit length, and -# shouldn't complain about that with a warning -is $hsp->hit->seqlength, 412; - -my $count = 0; -while (my $hit = $result->next_hit) { - $count++; - next if $count < 6; - last if $count > 6; - my $hsp = $hit->next_hsp; - ok ! $hsp->hit->seqlength; - #*** not sure how to test for the lack of a warning though... - # Maybe run an eval with verbose set to 2, then make sure $@ is undef? --cjfields -} diff --git a/t/Tools/Hmmer.t b/t/Tools/Hmmer.t deleted file mode 100644 index d6520261d..000000000 --- a/t/Tools/Hmmer.t +++ /dev/null @@ -1,110 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id: Hmmer.t 14989 2008-11-11 19:52:02Z cjfields $ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 29); - - use_ok('Bio::Tools::HMMER::Domain'); - use_ok('Bio::Tools::HMMER::Set'); - use_ok('Bio::Tools::HMMER::Results'); -} - -my ($domain,$set,$homol,$rev,$res,$dom,@doms); -$domain = Bio::Tools::HMMER::Domain->new(-verbose=>1); - -is ref($domain), 'Bio::Tools::HMMER::Domain'; - -$domain->start(50); -$domain->end(200); -$domain->hstart(10); -$domain->hend(100); -$domain->seqbits(50); -$domain->bits(20); -$domain->evalue(0.0001); -$domain->seq_id('silly'); - - -# test that we can get out forward and reverse homol_SeqFeatures -$homol = $domain->feature2(); -is $homol->start(), 10; - -$rev = $domain; - -is $rev->start(), 50; - -$set = Bio::Tools::HMMER::Set->new(); -$set->add_Domain($domain); - -@doms = $set->each_Domain(); -$dom = shift @doms; - -is $dom->start(), 50; - -$set->bits(300); -$set->evalue(0.0001); -$set->name('sillyname'); -$set->desc('a desc'); -$set->accession('fakeaccesssion'); -is $set->bits(), 300; -is $set->evalue(), 0.0001; -is $set->name(), 'sillyname'; -is $set->desc, 'a desc'; -is $set->accession, 'fakeaccesssion'; - -$res = Bio::Tools::HMMER::Results->new( -file => test_input_file('hmmsearch.out') , -type => 'hmmsearch'); -my $seen =0; -is $res->hmmfile, "HMM"; -is $res->seqfile, "HMM.dbtemp.29591"; - -my $first = 0; -foreach $set ( $res->each_Set) { - foreach $domain ( $set->each_Domain ) { - #print STDERR "Got domain ",$domain->seq_id," start ",$domain->start," end ",$domain->end,"\n"; - # do nothing for the moment - $seen = 1; - } -} -is $seen, 1; - -is $res->number, 1215; - -$res = Bio::Tools::HMMER::Results->new( -file => test_input_file('hmmpfam.out') , - -type => 'hmmpfam'); - -is ($res->number, 2); - -# parse HMM 2.2 files - -$res = Bio::Tools::HMMER::Results->new( -file => test_input_file('L77119.hmmer'), - -type => 'hmmpfam'); -$seen =0; -is $res->hmmfile, 'Pfam'; -is $res->seqfile, 'L77119.faa'; -foreach $set ( $res->each_Set) { - # only one set anyways - - is($set->name, 'gi|1522636|gb|AAC37060.1|'); - is($set->desc, 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]'); - is($set->accession, '[none]'); - foreach $domain ( $set->each_Domain ) { - #print STDERR "Got domain ",$domain->seq_id," start ",$domain->start," end ",$domain->end,"\n"; - # do nothing for the moment - is($domain->start, 280); - is($domain->end, 481); - is($domain->bits, -105.2); - is($domain->evalue, 0.0022 ); - } -} -is ($res->number, 1); - -# test for bugs #(1189,1034,1172) -$res = Bio::Tools::HMMER::Results->new( -file => test_input_file('hmmsearch.out') , - -type => 'hmmsearch'); -my $res2 = $res->filter_on_cutoff(100,50); -ok($res2); -is($res2->number, 604); diff --git a/t/data/L77119.hmmer b/t/data/L77119.hmmer deleted file mode 100644 index 490e122fe..000000000 --- a/t/data/L77119.hmmer +++ /dev/null @@ -1,50 +0,0 @@ -hmmpfam - search one or more sequences against HMM database -HMMER 2.2g (August 2001) -Copyright (C) 1992-2001 HHMI/Washington University School of Medicine -Freely distributed under the GNU General Public License (GPL) -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -HMM file: Pfam -Sequence file: L77119.faa -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query sequence: gi|1522636|gb|AAC37060.1| -Accession: [none] -Description: M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii] - -Scores for sequence family classification (score includes all domains): -Model Description Score E-value N --------- ----------- ----- ------- --- -Methylase_M Type I restriction modification system, M -105.2 0.0022 1 - -Parsed for domains: -Model Domain seq-f seq-t hmm-f hmm-t score E-value --------- ------- ----- ----- ----- ----- ----- ------- -Methylase_M 1/1 280 481 .. 1 279 [] -105.2 0.0022 - -Alignments of top-scoring domains: -Methylase_M: domain 1 of 1, from 280 to 481: score -105.2, E = 0.0022 - *->lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerri - ++EL+++ av+ R L+F K++ dk - gi|1522636 280 NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------ 306 - - eieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsql - +i+ p + + +++y ++ ++ ++y ++ + l - gi|1522636 307 GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPL 338 - - FwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdl - F++++ e ++ ++++ + + ++ + + Glf ++ - gi|1522636 339 FYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSN 374 - - dfnsnkLgskaqarnetLtelidlfselelgtPmHNG.dfeelgikDlfG - ++ ++ +s+ +ne ++e+i+ +++ +++ G++ +el D++G - gi|1522636 375 NV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILG 421 - - DaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDP - +YE L+ Ae K+ G +YTP e++ ia+ + i+ ++ - gi|1522636 422 YVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE------- 463 - - AcGSGSLllqaskflgehdgkrnaisyYGQEsn<-* - +++ ++ k+n+i + s+ - gi|1522636 464 ---------RFKEIIK--NWKINDINF----ST 481 - -// diff --git a/t/data/cysprot1b.hmmsearch b/t/data/cysprot1b.hmmsearch deleted file mode 100644 index 09c39c67f..000000000 --- a/t/data/cysprot1b.hmmsearch +++ /dev/null @@ -1,177 +0,0 @@ -hmmsearch - search a sequence database with a profile HMM -HMMER 2.2g (August 2001) -Copyright (C) 1992-2001 HHMI/Washington University School of Medicine -Freely distributed under the GNU General Public License (GPL) -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -HMM file: Peptidase_C1.hmm [Peptidase_C1] -Sequence database: cysprot1b.fa -per-sequence score cutoff: [none] -per-domain score cutoff: [none] -per-sequence Eval cutoff: <= 10 -per-domain Eval cutoff: [none] -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query HMM: Peptidase_C1 -Accession: PF00112 -Description: Papain family cysteine protease - [HMM has been calibrated; E-values are empirical estimates] - -Scores for complete sequences (score includes all domains): -Sequence Description Score E-value N --------- ----------- ----- ------- --- -CATL_RAT 449.4 2e-135 1 -CATL_HUMAN 444.5 6.1e-134 1 -CATH_RAT 381.8 4.8e-115 1 -PAPA_CARPA 337.7 9e-102 1 - -Parsed for domains: -Sequence Domain seq-f seq-t hmm-f hmm-t score E-value --------- ------- ----- ----- ----- ----- ----- ------- -CATL_RAT 1/1 114 332 .. 1 337 [] 449.4 2e-135 -CATL_HUMAN 1/1 114 332 .. 1 337 [] 444.5 6.1e-134 -CATH_RAT 1/1 114 330 .. 1 337 [] 381.8 4.8e-115 -PAPA_CARPA 1/1 134 343 .. 1 337 [] 337.7 9e-102 - -Alignments of top-scoring domains: -CATL_RAT: domain 1 of 1, from 114 to 332: score 449.4, E = 2e-135 - *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt - +P+++DWRe kg VtpVK+QG qCGSCWAFSa g lEg+ ++kt - CATL_RAT 114 IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT-- 155 - - kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke - gkl+sLSEQ+LvDC++ d gn+ GCnG Glmd Af+Yik+ - CATL_RAT 156 ----GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE- 192 - - qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt - NgGl++E++Y PY+ +kd g+ - CATL_RAT 193 ----NGGLDSEESY-----PYE----AKD-------------------GS 210 - - CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai - Cky+ + ++ a+++g++d+p++ E+al+ka+a++GP+sVa+ - CATL_RAT 211 CKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAM 249 - - dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY - das+ s q+Y+sG +Y+++ C+++ +LdH+Vl+VGY - CATL_RAT 250 DASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGY 283 - - GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW - G e+ ++++ +YW - CATL_RAT 284 GYEG-T------------------------------------DSNKDKYW 296 - - IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-* - +VKNSWG++WG++GY++ia+++n n+CG+a+ asypi - CATL_RAT 297 LVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI 332 - -CATL_HUMAN: domain 1 of 1, from 114 to 332: score 444.5, E = 6.1e-134 - *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt - +P s+DWRe kg +VtpVK+QG qCGSCWAFSa+galEg+ ++kt - CATL_HUMAN 114 APRSVDWRE-KG-YVTPVKNQG-QCGSCWAFSATGALEGQMFRKT-- 155 - - kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke - g l+sLSEQ+LvDC+g + gn+ GCnG Glmd+Af+Y+++ - CATL_HUMAN 156 ----GRLISLSEQNLVDCSG-PQGNE------GCNG-GLMDYAFQYVQD- 192 - - qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt - NgGl++E++Y PY+ +++ + - CATL_HUMAN 193 ----NGGLDSEESY-----PYE----ATE-------------------ES 210 - - CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai - Ckyn +k s+ a+++g++d+p + E+al+ka+a++GP+sVai - CATL_HUMAN 211 CKYN-PKYSV-----ANDTGFVDIPKQ-----EKALMKAVATVGPISVAI 249 - - dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY - da++ s F +Yk G +Y ++ +C+++ + dH+Vl+VGY - CATL_HUMAN 250 DAGHES---FLFYKEG-------IYFEP---DCSSE---DMDHGVLVVGY 283 - - GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW - G e+ e+++ +YW - CATL_HUMAN 284 GFES-T------------------------------------ESDNNKYW 296 - - IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-* - +VKNSWG++WG+ GY+++a+++ n+CGIas asyp+ - CATL_HUMAN 297 LVKNSWGEEWGMGGYVKMAKDRR----NHCGIASAASYPT 332 - -CATH_RAT: domain 1 of 1, from 114 to 330: score 381.8, E = 4.8e-115 - *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt - P s+DWR+ kg V+pVK+QG CGSCW FS++galE++ +i++ - CATH_RAT 114 YPSSMDWRK-KGNVVSPVKNQG-ACGSCWTFSTTGALESAVAIAS-- 156 - - kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke - gk L EQqLvDC +++n+ GC+G Gl+++AfeYi++ - CATH_RAT 157 ----GKMMTLAEQQLVDCAQ-NFNNH------GCQG-GLPSQAFEYILY- 193 - - qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt - N+G++ E++Y PY gk+ g+ - CATH_RAT 194 ----NKGIMGEDSY-----PYI----GKN-------------------GQ 211 - - CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai - Ck+n +++++ a++k+ ++++ n dE+a+ +a+a + Pvs a+ - CATH_RAT 212 CKFN-PEKAV-----AFVKNVVNITLN----DEAAMVEAVALYNPVSFAF 251 - - dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY - +++e DF++YksG vY++ +C +tp + ++HAVl+VGY - CATH_RAT 252 EVTE----DFMMYKSG-------VYSSN---SCHKTP-DKVNHAVLAVGY 286 - - GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW - G +n g YW - CATH_RAT 287 GEQN-GLL----------------------------------------YW 295 - - IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-* - IVKNSWG++WG nGYf i+Rgkn +CG+a +asypi - CATH_RAT 296 IVKNSWGSNWGNNGYFLIERGKN-----MCGLAACASYPI 330 - -PAPA_CARPA: domain 1 of 1, from 134 to 343: score 337.7, E = 9e-102 - *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt - +Pe +DWR+ kg aVtpVK+QG +CGSCWAFSav ++Eg+++i+t - PAPA_CARPA 134 IPEYVDWRQ-KG-AVTPVKNQG-SCGSCWAFSAVVTIEGIIKIRT-- 175 - - kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke - g+l +SEQ+L+DCd+ ++ GCnG G+++ A++ + + - PAPA_CARPA 176 ----GNLNEYSEQELLDCDR---RSY------GCNG-GYPWSALQLVAQ- 210 - - qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt - G++ Y PY+ g++ - PAPA_CARPA 211 -----YGIHYRNTY-----PYE----GVQ-------------------RY 227 - - CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai - C+++ +k+ + +ak +g ++v+++ +E al + +a+ +PvsV - PAPA_CARPA 228 CRSR-EKGPY----AAKTDGVRQVQPY----NEGALLYSIAN-QPVSVVL 267 - - dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY - +a + DFqlY++G ++++ +Cg+ +dHAV++VGY - PAPA_CARPA 268 EAAGK---DFQLYRGG-------IFVG----PCGN----KVDHAVAAVGY 299 - - GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW - G +Y - PAPA_CARPA 300 G---------------------------------------------PNYI 304 - - IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-* - ++KNSWGt WGEnGY+ri+Rg+++s ++ CG+ ++ yp+ - PAPA_CARPA 305 LIKNSWGTGWGENGYIRIKRGTGNS-YGVCGLYTSSFYPV 343 - - -Histogram of all scores: -score obs exp (one = represents 1 sequences) ------ --- --- -> 337 4 -|==== - - -% Statistical details of theoretical EVD fit: - mu = -195.8384 - lambda = 0.1423 -chi-sq statistic = 0.0000 - P(chi-square) = 0 - -Total sequences searched: 4 - -Whole sequence top hits: -tophits_s report: - Total hits: 4 - Satisfying E cutoff: 4 - Total memory: 16K - -Domain top hits: -tophits_s report: - Total hits: 4 - Satisfying E cutoff: 4 - Total memory: 20K diff --git a/t/data/hmmpfam_HSPdashline.txt b/t/data/hmmpfam_HSPdashline.txt deleted file mode 100644 index 58827762a..000000000 --- a/t/data/hmmpfam_HSPdashline.txt +++ /dev/null @@ -1,74 +0,0 @@ -hmmpfam - search one or more sequences against HMM database -HMMER 2.3.2 (Oct 2003) -Copyright (C) 1992-2003 HHMI/Washington University School of Medicine -Freely distributed under the GNU General Public License (GPL) -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -HMM file: trans_db -Sequence file: megaplasmid.fasta -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query sequence: lcl|gi|340783625|Plus1 -Accession: [none] -Description: megaplasmid, complete sequence [UNKNOWN] - -Scores for sequence family classification (score includes all domains): -Model Description Score E-value N --------- ----------- ----- ------- --- -IS66_ORF3.uniq -30.0 1.7e-15 1 - -Parsed for domains: -Model Domain seq-f seq-t hmm-f hmm-t score E-value --------- ------- ----- ----- ----- ----- ----- ------- -IS66_ORF3.uniq 1/1 60792 61184 .. 1 558 [] -30.0 1.7e-15 - -Alignments of top-scoring domains: -IS66_ORF3.uniq: domain 1 of 1, from 60792 to 61184: score -30.0, E = 1.7e-15 - *->mSSPLDLSLFPNLmtevvkmnaalpddlslLkAalqkqQavvea..l - L S FP + + aa+p+ l + l + v + + - lcl|gi|340 60792 SCTGLWDSEFPPVFPVNGSQRAATPF-LPVGPDELSSPPSSVLXscY 60837 - - dakianleelIEkLka.qldkLRRmkFGkrSEKkRhkLeldiaQiekale - d + l +L ++q LR ++r - lcl|gi|340 60838 DFPLCQSLSLCFRLGFpQALALRVRQRAPRGCH----------------- 60870 - - dLEddLnetqaeiaeaekkieaPAssppkeaPiaskPesPRqerkRkPLP - + +a+ + A+ +p++ + + LP - lcl|gi|340 60871 ------AHRRARGLFFTDTPHPVAVPRPQR----------DLTGSLVTLP 60904 - - eeLPReerRlePestvCPcggGqLkriGEDvsEqLDlvpaaFeVIqTvRp - + +P e P + + P + + D vp+ + - lcl|gi|340 60905 APMPCSET---PVESQHPGH-----------TGRSDAVPTPNTMKTS--- 60937 - - KyACrqCdtiVQAPaPakpIErGiptaGLLArvlVSKyaEHlPLYRQsEI - - lcl|gi|340 - -------------------------------------------------- - - - yaRqGVeiaRstLadWVgrtgarLaPLvdALaeyVLkeGklHADeTPVqV - +i s L V++ + r - lcl|gi|340 60938 ------AIMISGLIHGVSARCLRF-------------------------- 60955 - - LaPGnkKTKTGyLWAYvRDDRnagsSlppaVvfayspdRkGEHaqvhLae - k TG R D Slp+ + + R G e - lcl|gi|340 60956 -----KRSVTG-----PRQD-----SLPADGL---RLCREG-------VE 60980 - - ysGkLqaDAyaGYnalyesgRVGGDpikEAgCwAHaRRKifDlhvrnpSe - sG + + + ++ wAH RR ++D+ pS+ - lcl|gi|340 60981 PSGSQXKVSVTSILLSRTXP---------VASWAHVRRHFYDVNQGAPSP 61021 - - lteEALerIaaLYeIEaeIRGspaEdRlavRqelSvPlvkslseWLtaqr - ++ AL rI +LYeIEaeI p+E+ Rq+++ Pl+ s+ WL r - lcl|gi|340 61022 VAQTALLRIHELYEIEAEIKDDPPEQKMLARQQRAAPLLESFWTWLNDTR 61071 - - KtlsrgseLAKALaYlLnrWdALtryldDGqVpIDNNivENAiRrvAlGR - + +s+ AKA Y+L+rW ALt+yl+ G+ IDNN vE A+R vA+GR - lcl|gi|340 61072 AQVAPKSAIAKAIGYALKRWKALTLYLQEGHLGIDNNPVERALRGVAIGR 61121 - - kNyLFaGSDrGGeraAliySLIgTCKmNgVePyaYLrdVltrLadwplnk - kN+LFaG D GGeraA++y LI TCK+NgVeP aYL+dVl++L++wp + - lcl|gi|340 61122 KNFLFAGNDAGGERAAAFYTLIETCKLNGVEPFAYLCDVLEKLPTWPNKR 61171 - - DideLLPWayasai<-* - +eLLPW++ - lcl|gi|340 61172 -LHELLPWNWKNLT 61184 - -// diff --git a/t/data/hmmpfam_cs.out b/t/data/hmmpfam_cs.out deleted file mode 100644 index 28e8a7281..000000000 --- a/t/data/hmmpfam_cs.out +++ /dev/null @@ -1,559 +0,0 @@ -hmmpfam - search one or more sequences against HMM database -HMMER 2.3.2 (Oct 2003) -Copyright (C) 1992-2003 HHMI/Washington University School of Medicine -Freely distributed under the GNU General Public License (GPL) -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -HMM file: ../Shared/Pfam_fs -Sequence file: single_porphyra_AA.fa -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query sequence: gi|90819130|dbj|BAE92499.1| -Accession: [none] -Description: glutamate synthase [Porphyra yezoensis] - -Scores for sequence family classification (score includes all domains): -Model Description Score E-value N --------- ----------- ----- ------- --- -Glu_synthase Conserved region in glutamate synthas 858.6 3.6e-255 2 -GATase_2 Glutamine amidotransferases class-II 731.8 3.9e-226 1 -Glu_syn_central Glutamate synthase central domain 649.1 7.9e-213 1 -GXGXG GXGXG motif 367.3 2.7e-107 1 -HdeA hns-dependent expression protein A (H 9.6 0.015 1 -GDC-P Glycine cleavage system P-protein 7.1 0.086 1 -Cache_1 Cache domain 7.0 0.14 1 -IBN_N Importin-beta N-terminal domain 8.2 0.17 1 -DUF1200 Protein of unknown function (DUF1200) 6.7 0.42 1 -cobW CobW/HypB/UreG, nucleotide-binding do 5.1 0.45 1 -PUF Pumilio-family RNA binding repeat 6.5 0.47 1 -Arch_flagellin Archaebacterial flagellin 4.1 0.66 1 -FMN_dh FMN-dependent dehydrogenase 3.2 0.89 1 -RNA_pol_Rpb2_4 RNA polymerase Rpb2, domain 4 4.6 1.4 1 -DUF477 Domain of unknown function (DUF477) 3.8 1.7 1 -FRG1 FRG1-like family 0.2 1.7 1 -DUF1393 Protein of unknown function (DUF1393) 3.1 2 1 -tRNA_anti OB-fold nucleic acid binding domain 4.9 2 1 -SelT Selenoprotein T 3.1 2.2 1 -RNase_PH_C 3' exoribonuclease family, domain 2 4.2 2.3 1 -Pencillinase_R Penicillinase repressor 3.9 2.5 1 -Hormone_4 Neurohypophysial hormones, N-terminal 4.4 2.5 1 -DSRB Dextransucrase DSRB 2.7 2.7 1 -FtsK_SpoIIIE FtsK/SpoIIIE family 2.6 3.1 1 -UBA UBA/TS-N domain 4.2 3.1 1 -DUF1981 Domain of unknown function (DUF1981) 3.6 3.3 1 -Gla Vitamin K-dependent carboxylation/gam 4.0 3.5 1 -Scm3 Centromere protein Scm3 2.2 3.5 1 -Ribosomal_S6 Ribosomal protein S6 3.3 3.7 1 -Cystatin Cystatin domain 2.4 3.9 1 -Phage_prot_Gp6 Phage portal protein, SPP1 Gp6-like 1.0 4 1 -DUF1976 Domain of unknown function (DUF1976) -1.5 4.3 1 -DUF37 Domain of unknown function DUF37 3.0 4.5 1 -Flavodoxin_NdrI NrdI Flavodoxin like 2.1 4.6 1 -Bac_rhodopsin Bacteriorhodopsin 0.9 4.9 1 -Nitro_FeMo-Co Dinitrogenase iron-molybdenum cofacto 2.1 5.3 1 -MoCF_biosynth Probable molybdopterin binding domain 1.3 5.6 1 -PaaA_PaaC Phenylacetic acid catabolic protein 0.4 5.6 1 -Albicidin_res Albicidin resistance domain 1.7 5.7 1 -DUF1514 Protein of unknown function (DUF1514) 3.5 5.7 1 -T5orf172 T5orf172 domain 2.0 6.1 1 -Nup133_N Nup133 N terminal like -0.6 6.5 1 -BicD Microtubule-associated protein Bicaud -1.6 6.8 1 -Sel1 Sel1 repeat 2.5 7 1 -CAP_C DE Adenylate cyclase associated (CA 1.3 7.4 1 -Colicin Colicin pore forming domain 1.4 7.5 1 -MADF_DNA_bdg Alcohol dehydrogenase transcription f 1.8 8.2 1 -DUF258 Protein of unknown function, DUF258 0.3 8.3 1 -PspB Phage shock protein B 0.4 8.4 1 -GspM General secretion pathway, M protein 1.0 8.6 1 -Coq4 Coenzyme Q (ubiquinone) biosynthesis -0.3 9.1 1 -P22_AR_N P22_AR N-terminal domain -0.2 9.5 1 -C1_2 C1 domain 1.1 9.6 1 -Phage_Mu_P Bacteriophage Mu P protein -0.4 10 1 - -Parsed for domains: -Model Domain seq-f seq-t hmm-f hmm-t score E-value --------- ------- ----- ----- ----- ----- ----- ------- -GATase_2 1/1 34 404 .. 1 385 [] 731.8 3.9e-226 -FRG1 1/1 88 107 .. 151 173 .. 0.2 1.7 -C1_2 1/1 191 210 .. 9 27 .. 1.1 9.6 -MADF_DNA_bdg 1/1 235 261 .. 57 95 .] 1.8 8.2 -PaaA_PaaC 1/1 258 269 .. 1 13 [. 0.4 5.6 -Albicidin_res 1/1 274 289 .. 50 65 .. 1.7 5.7 -UBA 1/1 311 331 .. 18 38 .] 4.2 3.1 -Gla 1/1 342 357 .. 27 42 .] 4.0 3.5 -RNA_pol_Rpb2_4 1/1 369 381 .. 1 13 [. 4.6 1.4 -MoCF_biosynth 1/1 371 396 .. 23 49 .. 1.3 5.6 -DUF1200 1/1 389 401 .. 1 13 [. 6.7 0.42 -Nup133_N 1/1 397 419 .. 475 498 .] -0.6 6.5 -DUF1976 1/1 428 448 .. 1296 1319 .] -1.5 4.3 -Bac_rhodopsin 1/1 445 472 .. 219 250 .] 0.9 4.9 -Coq4 1/1 459 481 .. 60 82 .. -0.3 9.1 -Glu_syn_central 1/1 478 773 .. 1 301 [] 649.1 7.9e-213 -Flavodoxin_NdrI 1/1 488 497 .. 122 131 .] 2.1 4.6 -P22_AR_N 1/1 524 541 .. 110 126 .] -0.2 9.5 -Cache_1 1/1 537 557 .. 1 23 [. 7.0 0.14 -Glu_synthase 1/2 650 676 .. 297 323 .. 1.3 3 -HdeA 1/1 727 749 .. 58 79 .] 9.6 0.015 -Sel1 1/1 729 745 .. 32 49 .] 2.5 7 -DUF1981 1/1 765 787 .. 62 88 .] 3.6 3.3 -tRNA_anti 1/1 818 839 .. 54 85 .] 4.9 2 -Cystatin 1/1 826 859 .. 1 38 [. 2.4 3.9 -RNase_PH_C 1/1 827 846 .. 64 84 .] 4.2 2.3 -Glu_synthase 2/2 830 1216 .. 1 412 [] 857.3 9e-255 -DUF258 1/1 839 860 .. 282 305 .] 0.3 8.3 -Pencillinase_R 1/1 856 894 .. 84 118 .] 3.9 2.5 -SelT 1/1 872 885 .. 96 111 .] 3.1 2.2 -Nitro_FeMo-Co 1/1 879 897 .. 87 105 .] 2.1 5.3 -DUF37 1/1 927 934 .. 61 68 .] 3.0 4.5 -Scm3 1/1 953 963 .. 103 113 .] 2.2 3.5 -cobW 1/1 1038 1058 .. 202 222 .] 5.1 0.45 -Arch_flagellin 1/1 1050 1072 .. 197 219 .] 4.1 0.66 -DUF1393 1/1 1055 1068 .. 1 14 [. 3.1 2 -FtsK_SpoIIIE 1/1 1107 1143 .. 163 198 .. 2.6 3.1 -FMN_dh 1/1 1109 1148 .. 291 330 .. 3.2 0.89 -DSRB 1/1 1120 1134 .. 1 16 [. 2.7 2.7 -Phage_Mu_P 1/1 1122 1131 .. 1 10 [. -0.4 10 -Hormone_4 1/1 1168 1176 .. 1 9 [] 4.4 2.5 -GDC-P 1/1 1205 1225 .. 10 30 .. 7.1 0.086 -PspB 1/1 1268 1276 .. 1 9 [. 0.4 8.4 -T5orf172 1/1 1271 1293 .. 35 58 .. 2.0 6.1 -CAP_C 1/1 1283 1292 .. 161 170 .] 1.3 7.4 -GXGXG 1/1 1290 1485 .. 1 228 [] 367.3 2.7e-107 -DUF1514 1/1 1453 1469 .. 50 66 .] 3.5 5.7 -Colicin 1/1 1456 1467 .. 192 203 .] 1.4 7.5 -Ribosomal_S6 1/1 1461 1481 .. 16 36 .. 3.3 3.7 -BicD 1/1 1465 1481 .. 1 17 [. -1.6 6.8 -PUF 1/1 1470 1486 .. 19 35 .] 6.5 0.47 -DUF477 1/1 1472 1495 .. 1 24 [. 3.8 1.7 -Phage_prot_Gp6 1/1 1479 1492 .. 1 14 [. 1.0 4 -IBN_N 1/1 1498 1516 .. 1 20 [. 8.2 0.17 -GspM 1/1 1506 1520 .. 1 15 [. 1.0 8.6 - -Alignments of top-scoring domains: -GATase_2: domain 1 of 1, from 34 to 404: score 731.8, E = 3.9e-226 - CS EEEEEEEEETSSHSBHHHHHHHHHHHHHGGGGSSCSTTSSCECEEEE - *->CGvlGfiAhikgkpshkivedaleaLerLeHRGavgADgktGDGAGI - CGv GfiA+ ++ ++hkiv +aleaL+++eHRGa++AD ++GDGAGI - gi|9081913 34 CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGI 79 - - CS EEECTCCCHHHHHHHCT----S GC-EEEEEEE-SSHHHHHHHHHHHHHH - ltqiPdgFFrevakelGieLpe.gqYAVGmvFLPqdelaraearkifEki - t+iP+++F++ ++++i++ ++ +VGm+FLP l+ + i+E + - gi|9081913 80 TTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETV 127 - - CS HHHTT-EEEEEEE--B-GGGS-HHHHHC--EEEEEEEE-TT--HHHHHHC - aeeeGLeVLGWReVPvnnsvLGetAlatePvIeQvFvgapsgdgedfErr - ++ee+Le++GWR VP+ +vLG++A + P++eQvF+ +++ +++ +E++ - gi|9081913 128 LKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQ 177 - - CS EEEEECHSCHHHHTHHH. BEEEEEESSEEEEEECC-GGGHHHHBHG - LyviRkrieksivaenvn....fYiCSLSsrTIVYKGMLtseQLgqFYpD - L+++Rk+iek+i+ + + ++fYiCSLS++TIVYKGM++s++LgqFY+D - gi|9081913 178 LFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQD 227 - - CS GGSTTEEBSEEEEEECESSSSSCTGGGSSCEEECCCTTCEEEEEEEEETT - LqderfeSalAivHsRFSTNTfPsWplAQPfRVnslwgggivlAHNGEIN - L++++++S++Ai+H+RFSTNT+P+WplAQP+R ++ HNGEIN - gi|9081913 228 LYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEIN 268 - - CS THHHHHHHHHHTSCCCSSTTCGHHHHCC-SSS-TTSCHHHHHHHHHHHHH - TlrgNrnwMraRegvlksplFgddldkLkPIvneggSDSaalDnvlEllv - Tl gN nwM++Re +l+s++++d++++LkPI n+++SDSa+lD ++Ell+ - gi|9081913 269 TLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLI 318 - - CS HTT--HHHHHHHHS----TT-GGGTST-HHHHHHHHHHHHHHCCHCCEEE - raGRslpeAlMMlIPEAWqnnpdmdkdrpekraFYeylsglmEPWDGPAa - ++GRs++eAlM+l+PEA+qn+pd +++e+ +FYey+sgl+EPWDGPA+ - gi|9081913 319 ASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPAL 367 - - CS EEEETSSEEEEEEETTTSCESEEEEEEEEEE.TTEEEEEESSC - lvftDGryavgAtLDRNGLTRPaRygiTrdldkDglvvvaSEa<-* - +vft+G++ +gAtLDRNGL RPaRy+iT kD+lv+v+SE+ - gi|9081913 368 VVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES 404 - -FRG1: domain 1 of 1, from 88 to 107: score 0.2, E = 1.7 - *->FQkfKvDLqdrklrinekDkkel<-* - FQk+ Lq+ + +++D+ ++ - gi|9081913 88 FQKS---LQNQNIKFEQNDSVGV 107 - -C1_2: domain 1 of 1, from 191 to 210: score 1.1, E = 9.6 - *->idgfyg...fYsCkkccddftl<-* - i+g+++ ++fY C+ c +t+ - gi|9081913 191 INGKDWaheFYICSLSC--YTI 210 - -MADF_DNA_bdg: domain 1 of 1, from 235 to 261: score 1.8, E = 8.2 - *->drYrrelrkirqgnsegsstgsgesykskWryyeelsFL<-* - +++ ++r+ ++ +kW+++ ++F - gi|9081913 235 SSFAIYHRRFS------------TNTMPKWPLAQPMRFV 261 - -PaaA_PaaC: domain 1 of 1, from 258 to 269: score 0.4, E = 5.6 - CS X............ - *->MYnFvEHGGvint<-* - M Fv H G int - gi|9081913 258 M-RFVSHNGEINT 269 - -Albicidin_res: domain 1 of 1, from 274 to 289: score 1.7, E = 5.7 - *->LrlmharEPsLrkgtG<-* - L+ m+ rEP L+ +++ - gi|9081913 274 LNWMQSREPLLQSKVW 289 - -UBA: domain 1 of 1, from 311 to 331: score 4.2, E = 3.1 - CS HHHHHHHHHTTT-HHHHHHHH - *->eeakkALeatngnverAvewL<-* - ++a++ L a++ ++e+A+++L - gi|9081913 311 DAAVELLIASGRSPEEALMIL 331 - -Gla: domain 1 of 1, from 342 to 357: score 4.0, E = 3.5 - CS CSSHHHHHHHHHHCTC - *->fednegtkefwrkYfg<-* - f++n+++ f++ Y g - gi|9081913 342 FANNTEISDFYEYYSG 357 - -RNA_pol_Rpb2_4: domain 1 of 1, from 369 to 381: score 4.6, E = 1.4 - CS EEETTEEEEEESS - *->VYvNGklvGthrn<-* - V+ NGk++G + + - gi|9081913 369 VFTNGKVIGATLD 381 - -MoCF_biosynth: domain 1 of 1, from 371 to 396: score 1.3, E = 5.6 - CS CHHHHHHHHHHHTTTCEEEEEEEE-SS - *->tNgpmLaalLresaGaevirygiVpDd<-* - tNg+ + a L + G ++ry+i +D+ - gi|9081913 371 TNGKVIGATLDR-NGLRPARYVITKDN 396 - -DUF1200: domain 1 of 1, from 389 to 401: score 6.7, E = 0.42 - *->kYvltedtLlIks<-* - +Yv+t+d L+I+s - gi|9081913 389 RYVITKDNLVIVS 401 - -Nup133_N: domain 1 of 1, from 397 to 419: score -0.6, E = 6.5 - *->lylltrnsGvvrIeHaleedstne<-* - l++ + +sGvv++e + + s + - gi|9081913 397 LVIVSSESGVVQVE-PGNVKSKGR 419 - -DUF1976: domain 1 of 1, from 428 to 448: score -1.5, E = 4.3 - *->VsvYiyFkevtdnksLsEysVtyk<-* - V++++ ++++nk ++ sVt k - gi|9081913 428 VDIFS--HKILNNKEIK-TSVTTK 448 - -Bac_rhodopsin: domain 1 of 1, from 445 to 472: score 0.9, E = 4.9 - CS HHHHHHHHHHHHHHHHHCHHHTC--------- - *->vvAKVgFgfilLrsravlertvavgsalaage<-* - v++K+++g +l ++r++le + + l+++ - gi|9081913 445 VTTKIPYGELLTDARQILE--HK--PFLSDQQ 472 - -Coq4: domain 1 of 1, from 459 to 481: score -0.3, E = 9.1 - *->rrILkEkPRissetldlkkLrkL<-* - r+IL kP s ++d kkL +L - gi|9081913 459 RQILEHKPFLSDQQVDIKKLMQL 481 - -Glu_syn_central: domain 1 of 1, from 478 to 773: score 649.1, E = 7.9e-213 - CS HHHHHHCTT--HHHHHCTCHHHHHHSS--EE-S---S--CCC-SS-- - *->llrrQkAFGYTyEdvelvllPMAetGkEalGSMGdDtPLAVLSekpr - l+++Q+AFGYT+Edvelv+++MA+++kE++++MGdD+PL +LSek++ - gi|9081913 478 LMQLQTAFGYTNEDVELVIEHMASQAKEPTFCMGDDIPLSILSEKSH 524 - - CS -GGGCEEE----SSS----TTTTGGG-B--EEES--S-TTS-SGGGC-CE - lLYdYFKQlFAQVTNPPIDPIREelVMSLetylGpegNlLeptpeqarrl - +LYdYFKQ+FAQVTNP+IDP+RE+lVMSL+ ++G+++NlL+ p+ a+++ - gi|9081913 525 ILYDYFKQRFAQVTNPAIDPLRESLVMSLAIQIGHKSNLLDDQPTLAKHI 574 - - CS EESSSB--HHHHHH.HHHH....CCCCEEEEESEEESTTSTTCHHHHHHH - kLesPILsnselekmlknidairegfkaatIditFdveeGvdgLeaaLdr - kLesP+++++el++ + + +++++ I+++F e+G++ ++ + + - gi|9081913 575 KLESPVINEGELNA-IFE-----SKLSCIRINTLFQLEDGPKNFKQQIQQ 618 - - CS HHHHHHHHHHCT-SEEEEESTCG--CTTEEE--HHHHHHHHHHHHHCTT- - lceeAeeAirsGaniivLSDRndildeervaIPaLLAvGAVHhHLIrkgL - lce A++Ai +G ni+vLSD+n+ ld+e+v+IP+LLAvGAVHhHLI kgL - gi|9081913 619 LCENASQAILDGNNILVLSDKNNSLDSEKVSIPPLLAVGAVHHHLINKGL 668 - - CS CCC-EEEEEESS--SHHHHHHHHCTT-SEEEEHCCHHHHHHHHCCCCCCC - RtkvslvVETGEaREvHHFAvLiGYGAsAInPYLAyETirdWWlirrGll - R+ +s+ VET++++++HHFA+LiGYGAsAI+PYLA+ET r+WW + ++++ - gi|9081913 669 RQEASILVETAQCWSTHHFACLIGYGASAICPYLAFETARHWWSNPKTKM 718 - - CS CHTTTS- T--HHHHHHHHHHHHHHHHHHHHHCTT--BHHHHCCS--EEE - lmskGkl.elsleeavkNYrkAiekGlLKIMSKMGISTlqSYrGAQIFEA - lmskG+l++++++ea++NY+kA+e+GlLKI+SKMGIS+l+SY+GAQIFE+ - gi|9081913 719 LMSKGRLpACNIQEAQANYKKAVEAGLLKILSKMGISLLSSYHGAQIFEI 768 - - CS SSB-H - vGLsk<-* - +GL++ - gi|9081913 769 LGLGS 773 - -Flavodoxin_NdrI: domain 1 of 1, from 488 to 497: score 2.1, E = 4.6 - CS -HHHHHHHHH - *->TneDVerVrk<-* - TneDVe V + - gi|9081913 488 TNEDVELVIE 497 - -P22_AR_N: domain 1 of 1, from 524 to 541: score -0.2, E = 9.5 - *->dVLydYWtrkGkAv..NPR<-* - ++LydY+ + +A +NP+ - gi|9081913 524 HILYDYFK-QRFAQvtNPA 541 - -Cache_1: domain 1 of 1, from 537 to 557: score 7.0, E = 0.14 - *->wTePYvdaalktgdlViTiaqPv<-* - +T+P++d + +++lV ++a+++ - gi|9081913 537 VTNPAIDPL--RESLVMSLAIQI 557 - -Glu_synthase: domain 1 of 2, from 650 to 676: score 1.3, E = 3 - CS --HHHHHHHHHHHHHCTT-CCCSEEEE - *->lPwelgLaevhqtLvengLRdrVsLia<-* - +P l++ +vh L++ gLR + s+ + - gi|9081913 650 IPPLLAVGAVHHHLINKGLRQEASILV 676 - -HdeA: domain 1 of 1, from 727 to 749: score 9.6, E = 0.015 - *->ACk.QdkkAsFkdKvkaEldKvk<-* - AC Q+ +A++k+ v+a l K+ - gi|9081913 727 ACNiQEAQANYKKAVEAGLLKIL 749 - -Sel1: domain 1 of 1, from 729 to 745: score 2.5, E = 7 - CS .HHH.HHHHHHHHHHTT- - *->DyekeAlkwyekAAeqGn<-* - ++++ A + y+kA e+G - gi|9081913 729 NIQE-AQANYKKAVEAGL 745 - -DUF1981: domain 1 of 1, from 765 to 787: score 3.6, E = 3.3 - *->iFgvltlaakeesesivklAfqiid.qi<-* - iF++l+l++ v+lAf+ +++qi - gi|9081913 765 IFEILGLGSEV-----VNLAFKGTTsQI 787 - -tRNA_anti: domain 1 of 1, from 818 to 839: score 4.9, E = 2 - CS EEEEEEETTSSTSTCTCTT..EEEEEEEEEEE - *->tGkvkkrpggeqNnlkTGeKAlelvveeievl<-* - +G v+ rpgge ++++ +e+ - gi|9081913 818 YGFVQYRPGGE----------YHINNPEMSKA 839 - -Cystatin: domain 1 of 1, from 826 to 859: score 2.4, E = 3.9 - CS ECEEEEET.STSHHHHHHHHHHHHHHHHHSSSSEEEEE - *->GglspvdpNendpevqealdfAlakyNeksndnylfel<-* - Gg +++ pe +al+ A+ yN + +ny++ l - gi|9081913 826 GGEYHINN----PEMSKALHQAVRGYNPEYYNNYQSLL 859 - -RNase_PH_C: domain 1 of 1, from 827 to 846: score 4.2, E = 2.3 - CS SSSS.B.HHHHHHHHHHHHHH - *->GkgnglteelleealelAkeg<-* - G +++++ +++ +al++A+ g - gi|9081913 827 G-EYHINNPEMSKALHQAVRG 846 - -Glu_synthase: domain 2 of 2, from 830 to 1216: score 857.3, E = 9e-255 - CS -SS-HHHHHHHHHHHHC--T-HHHHHHHHHHHHTS.-S-SGGGGEEE - *->hrnepeviktlqkavqvpveskpsydkYreplnertpigalrdlLef - h n+pe++k l++av+ + y +Y+ +l +r p++alrdlL++ - gi|9081913 830 HINNPEMSKALHQAVRG--YNPEYYNNYQSLLQNR-PPTALRDLLKL 873 - - CS --SS--......--GGGS--HHHHHTTEEEEEB-CTTC-HHHHHHHHHHH - kyaeepldtdkiipieevepaleikkrfctgaMSyGALSeeAheALAiAm - ++++p i+i+eve+++ i + fctg+MS+GALS+e+he+LAiAm - gi|9081913 874 QSNRAP------ISIDEVESIEDILQKFCTGGMSLGALSRETHETLAIAM 917 - - CS HHCT-EEEETTT---GGGCSB-TTS-T S BTTSTT--S--TT-B---SE - nriGtksNtGEGGedperlkpaadlds.G.SpTlpHLkGLqnednarSAI - nriG+ksN+GEGGedp r+k + d++s+G+Sp lpHLkGL+n+d+a+SAI - gi|9081913 918 NRIGGKSNSGEGGEDPVRFKILNDVNSsGtSPLLPHLKGLKNGDTASSAI 967 - - CS EEE-TT-TT--............HHHHCC-SEEEEE---TTSTTT--EE- - kQvASGRFGVtkRnGefWeefkRseYLvnAdalEIKiAQGAKPGeGGhLP - kQ+ASGRFGVt +eYL+nA++lEIKiAQGAKPGeGG+LP - gi|9081913 968 KQIASGRFGVT------------PEYLMNAKQLEIKIAQGAKPGEGGQLP 1005 - - CS GGG--HHHHHHHTS-TT--EE--SS-TT-SSHHHHHHHHHHHHHH-.TTS - GeKVspeIAriRnstPGvgliSPpPHHDIysiEDLaqLIydLkeindpkA - G+K+sp+IA +R ++PGv liSPpPHHDIysiEDL+qLI+dL++in pkA - gi|9081913 1006 GKKISPYIATLRKCKPGVPLISPPPHHDIYSIEDLSQLIFDLHQIN-PKA 1054 - - CS EEEEEEE-STTHHHHHHH...HHHTT-SEEEEE-TT---SSEECCHHHHC - pisVKLVsehgvgtiaaGhmqvakAnADiIlIdGhdGGTGASpktsikha - +isVKLVse g+gtiaaG vak+nADiI+I+GhdGGTGASp++sikha - gi|9081913 1055 KISVKLVSEIGIGTIAAG---VAKGNADIIQISGHDGGTGASPLSSIKHA 1101 - - CS ---HHHHHHHHHHHHHCTT-CCCSEEEEESS--SHHHHHHHHHCT-SEEE - GlPwelgLaevhqtLvengLRdrVsLiadGGLrTGaDVakAaaLGAdavg - G PwelgL+evhq+L en+LRdrV+L++dGGLrTG D+++Aa++GA+++g - gi|9081913 1102 GSPWELGLSEVHQLLAENQLRDRVTLRVDGGLRTGSDIVLAAIMGAEEFG 1151 - - CS -SHHHHHHCT--S---CCCT--TTSSS---CCHH..CT----HHHHHHHH - iGTaaLiAlGCimaRvCHtntCPvGvATQDPeLrKrlkfegaperVvNyf - +GT+a+iA+GCimaR+CHtn+CPvGvATQ++eLr +f g+pe +vN+f - gi|9081913 1152 FGTVAMIATGCIMARICHTNKCPVGVATQREELR--ARFSGVPEALVNFF 1199 - - CS HHHHHHHHHHHHHHT-S - iflaeEvrellaqlGfr<-* - +f+ Evre+la+lG++ - gi|9081913 1200 LFIGNEVREILASLGYK 1216 - -DUF258: domain 1 of 1, from 839 to 860: score 0.3, E = 8.3 - CS HHHHHHHCTSS-HHHHHHHHHHHH - *->AVkaAveeGeIseeRYesYlklle<-* - A+ +Av +++e Y++Y+ ll+ - gi|9081913 839 ALHQAVR--GYNPEYYNNYQSLLQ 860 - -Pencillinase_R: domain 1 of 1, from 856 to 894: score 3.9, E = 2.5 - CS XXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXX - *->drlfggsvgalvanfleee....klSeddieeLrelLde<-* - + l++++++ ++ ++l+ ++++ ++S d++e ++++L++ - gi|9081913 856 QSLLQNRPPTALRDLLKLQsnraPISIDEVESIEDILQK 894 - -SelT: domain 1 of 1, from 872 to 885: score 3.1, E = 2.2 - *->KLqtGrvYAPPtpqEL<-* - KLq++r P++++E+ - gi|9081913 872 KLQSNRA--PISIDEV 885 - -Nitro_FeMo-Co: domain 1 of 1, from 879 to 897: score 2.1, E = 5.3 - CS EEE-TTSSBHHHHHHHHHC - *->pikagegetieeaiealqe<-* - pi e e+ie+ + ++ + - gi|9081913 879 PISIDEVESIEDILQKFCT 897 - -DUF37: domain 1 of 1, from 927 to 934: score 3.0, E = 4.5 - *->hpGGyDPV<-* - ++GG DPV - gi|9081913 927 GEGGEDPV 934 - -Scm3: domain 1 of 1, from 953 to 963: score 2.2, E = 3.5 - *->HLraLeteddi<-* - HL++L+++d++ - gi|9081913 953 HLKGLKNGDTA 963 - -cobW: domain 1 of 1, from 1038 to 1058: score 5.1, E = 0.45 - CS ...HHHHHHHHHH-SSS-EEE - *->adlekleadlrrlnpeapiip<-* - +dl++l+ dl+++np+a+i - gi|9081913 1038 EDLSQLIFDLHQINPKAKISV 1058 - -Arch_flagellin: domain 1 of 1, from 1050 to 1072: score 4.1, E = 0.66 - *->inpstkvrgeVvpenGapgtief<-* - inp k+++++v+e+G+ ++ - gi|9081913 1050 INPKAKISVKLVSEIGIGTIAAG 1072 - -DUF1393: domain 1 of 1, from 1055 to 1068: score 3.1, E = 2 - *->klSvKtVVAiGIGA<-* - k+SvK V iGIG+ - gi|9081913 1055 KISVKLVSEIGIGT 1068 - -FtsK_SpoIIIE: domain 1 of 1, from 1107 to 1143: score 2.6, E = 3.1 - *->lviDnydeLaeenlL.ervtsLknqGlsygvhvmata<-* - l++ + ++L +en+L++rvt+ + +Gl +g +++++a - gi|9081913 1107 LGLSEVHQLLAENQLrDRVTLRVDGGLRTGSDIVLAA 1143 - -FMN_dh: domain 1 of 1, from 1109 to 1148: score 3.2, E = 0.89 - CS HHHHHHHHHCHHTTTSSEEEEESS-SSHHHHHHHHHHTSS - *->LpeVvPIlkeaAvkgdieVllDgGvRRGtDVlKALALGAr<-* - L eV +l e + +++ +DgG R+G+D++ A +GA+ - gi|9081913 1109 LSEVHQLLAENQLRDRVTLRVDGGLRTGSDIVLAAIMGAE 1148 - -DSRB: domain 1 of 1, from 1120 to 1134: score 2.7, E = 2.7 - *->mKvndrvtvKtDGgpR<-* - ++ drvt + DGg R - gi|9081913 1120 -QLRDRVTLRVDGGLR 1134 - -Phage_Mu_P: domain 1 of 1, from 1122 to 1131: score -0.4, E = 10 - *->sntVtLrvgG<-* - ++VtLrv+G - gi|9081913 1122 RDRVTLRVDG 1131 - -Hormone_4: domain 1 of 1, from 1168 to 1176: score 4.4, E = 2.5 - CS X-TT--TT- - *->CyirnCPrG<-* - C + CP+G - gi|9081913 1168 CHTNKCPVG 1176 - -GDC-P: domain 1 of 1, from 1205 to 1225: score 7.1, E = 0.086 - *->eqqeMLstiGlssLddLidat<-* - e++e+L+++G++sLdd ++++ - gi|9081913 1205 EVREILASLGYKSLDDITGQN 1225 - -PspB: domain 1 of 1, from 1268 to 1276: score 0.4, E = 8.4 - *->MsaffLagP<-* - M+ ++La+P - gi|9081913 1268 MDDDILAIP 1276 - -T5orf172: domain 1 of 1, from 1271 to 1293: score 2.0, E = 6.1 - *->dvvalievedaraklEklLHkrFk<-* - d+ a+ ev++a klE+++ k+Fk - gi|9081913 1271 DILAIPEVSNAI-KLETEITKHFK 1293 - -CAP_C: domain 1 of 1, from 1283 to 1292: score 1.3, E = 7.4 - CS EEEEEE---- - *->KLvTevveha<-* - KL+Te++ h - gi|9081913 1283 KLETEITKHF 1292 - -GXGXG: domain 1 of 1, from 1290 to 1485: score 367.3, E = 2.7e-107 - CS EEEEE-TT--STTHHHHHHHHHHCTTTS.S-TTCEEEEEEEEE-TTT - *->keeaiiNtdrlvgtrlsgeiakkygeegalpkdtgkivfnGsAGqsf - k+++i Nt+r+vgtrlsg iak yg+ g + k+ +k++f+GsAGqsf - gi|9081913 1290 KHFKIANTNRTVGTRLSGIIAKNYGNTG-F-KGLIKLNFYGSAGQSF 1334 - - CS TTT-BTTEEEEEEEEE-S.TTTTT-ECCEEEEE--TT-.......SS-GG - GafmagGvtLeleGdAnddyvGkgmsGGeIvikgnagdpvGnnMdageyv - Gaf+a+G++L l+G+And yvGkgm+GG+Ivi+++ag +e + - gi|9081913 1335 GAFLASGINLKLMGEAND-YVGKGMNGGSIVIVPPAGT-------IYEDN 1376 - - CS GSEEC-SSTTTT--CEEEEESSEE-TTTTTT-.....CCEEEEESEB.-S - gnviaGNtclyGatGGkifiaGdAGerfgvrnkayKdsgatiVveGvaGd - ++vi+GNtclyGatGG++f++G+AGerf+vrn s a+ VveGv Gd - gi|9081913 1377 NQVIIGNTCLYGATGGYLFAQGQAGERFAVRN-----SLAESVVEGV-GD 1420 - - CS STTTT-EEEEEEESS-B-SSBTTT--CCEEEEE-TTS.......THHHHB - hggEYMtGGtivVlGdaGrnvGagMtGGiaYvlgeiedfsyMiatlpgkv - h++EYMtGG+ivVlG+aGrnvGagMtGG+aY+l+e+e + ++v - gi|9081913 1421 HACEYMTGGVIVVLGKAGRNVGAGMTGGLAYFLDEDE-------NFIDRV 1463 - - CS -CCCEEEE...ES-S......CCHHHHHHHH - nleiVeledlkrievkrkklLpegekqlkel<-* - n+eiV+ + r+ + ++ge+qlk+l - gi|9081913 1464 NSEIVKIQ---RVIT------KAGEEQLKNL 1485 - -DUF1514: domain 1 of 1, from 1453 to 1469: score 3.5, E = 5.7 - *->LeeyrieveRikkevkk<-* - L e+++ ++R++ e+ k - gi|9081913 1453 LDEDENFIDRVNSEIVK 1469 - -Colicin: domain 1 of 1, from 1456 to 1467: score 1.4, E = 7.5 - CS SHHHHHHHHHCH - *->DdkfveklNkli<-* - D++f++ +N +i - gi|9081913 1456 DENFIDRVNSEI 1467 - -Ribosomal_S6: domain 1 of 1, from 1461 to 1481: score 3.3, E = 3.7 - CS CCHHHHHHHHHHHHHCTT-EE - *->EqvkqeiekYqkvLtnngAei<-* - ++v++ei k+q+v+t++g+e+ - gi|9081913 1461 DRVNSEIVKIQRVITKAGEEQ 1481 - -BicD: domain 1 of 1, from 1465 to 1481: score -1.6, E = 6.8 - *->gqaysnqrkvAkdGeer<-* - + +++qr+ +k Gee+ - gi|9081913 1465 SEIVKIQRVITKAGEEQ 1481 - -PUF: domain 1 of 1, from 1470 to 1486: score 6.5, E = 0.47 - *->lQkllevateeqkqlil<-* - +Q+++++a+eeq ++++ - gi|9081913 1470 IQRVITKAGEEQLKNLI 1486 - -DUF477: domain 1 of 1, from 1472 to 1495: score 3.8, E = 1.7 - *->gtLspserarLeqalaalEqktga<-* - ++++++ ++L ++ ++ktg+ - gi|9081913 1472 RVITKAGEEQLKNLIENHAAKTGS 1495 - -Phage_prot_Gp6: domain 1 of 1, from 1479 to 1492: score 1.0, E = 4 - *->eEmikkFidkHklr<-* - eE +k++i+ H+++ - gi|9081913 1479 EEQLKNLIENHAAK 1492 - -IBN_N: domain 1 of 1, from 1498 to 1516: score 8.2, E = 0.17 - CS HHHHHHHHHCCTHHCHHHHH - *->AEkqLeqlekqklPgfllaL<-* - A++ Le+++++ lP+f++ + - gi|9081913 1498 AHTILEKWNSY-LPQFWQVV 1516 - -GspM: domain 1 of 1, from 1506 to 1520: score 1.0, E = 8.6 - CS XXXXXXXXXXXXXXX - *->mneLqawWqgrspRE<-* - ++ L ++Wq ++p+E - gi|9081913 1506 NSYLPQFWQVVPPSE 1520 - -// diff --git a/t/data/hmmpfam_fake.out b/t/data/hmmpfam_fake.out deleted file mode 100755 index 5a499aed7..000000000 --- a/t/data/hmmpfam_fake.out +++ /dev/null @@ -1,55 +0,0 @@ -hmmpfam - search a single seq against HMM database -HMMER 2.1.1 (Dec 1998) -Copyright (C) 1992-1998 Washington University School of Medicine -HMMER is freely distributed under the GNU General Public License (GPL). -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -HMM file: pfam -Sequence file: /home/birney/src/wise2/example/road.pep -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query: roa1_drome - -Scores for sequence family classification (score includes all domains): -Model Description Score E-value N --------- ----------- ----- ------- --- -SEED 146.1 6.3e-40 2 -TEST 5.0 7.2 1 - -Parsed for domains: -Model Domain seq-f seq-t hmm-f hmm-t score E-value --------- ------- ----- ----- ----- ----- ----- ------- -SEED 1/2 33 103 .. 1 77 [] 71.2 2.2e-17 -SEED 2/2 124 194 .. 1 77 [] 75.5 1.1e-18 - -Alignments of top-scoring domains: -SEED: domain 1 of 2, from 33 to 103: score 71.2, E = 2.2e-17 - *->lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGf - lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf - roa1_drome 33 LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGF 74 - - aFVeFeseedAekAlealnG.kelggrklrv<-* - +F+++ ++ + A + +++++gr+++ - roa1_drome 75 GFITYSHSSMIDEAQK--SRpHKIDGRVVEP 103 - -SEED: domain 2 of 2, from 124 to 194: score 75.5, E = 1.1e-18 - *->lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGf - lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +Gf - roa1_drome 124 LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGF 165 - - aFVeFeseedAekAlealnGkelggrklrv<-* - aFVeF++++ ++k + ++l+g+ + v - roa1_drome 166 AFVEFDDYDPVDKVVL-QKQHQLNGKMVDV 194 - -// -Query: roa2_drome - -Scores for sequence family classification (score includes all domains): -Model Description Score E-value N --------- ----------- ----- ------- --- -SEED 146.1 6.3e-40 2 - -Parsed for domains: -Model Domain seq-f seq-t hmm-f hmm-t score E-value --------- ------- ----- ----- ----- ----- ----- ------- -SEED 1/2 33 103 .. 1 77 [] 71.2 2.2e-17 -SEED 2/2 124 194 .. 1 77 [] 75.5 1.1e-18 -// \ No newline at end of file diff --git a/t/data/hmmpfam_multiresult.out b/t/data/hmmpfam_multiresult.out deleted file mode 100644 index 7a94e4a35..000000000 --- a/t/data/hmmpfam_multiresult.out +++ /dev/null @@ -1,106 +0,0 @@ -hmmpfam - search one or more sequences against HMM database -HMMER 2.3.2 (Oct 2003) -Copyright (C) 1992-2003 HHMI/Washington University School of Medicine -Freely distributed under the GNU General Public License (GPL) -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -HMM file: testInput.hmm -Sequence file: testInput.fasta -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query sequence: Test -Accession: [none] -Description: [none] - -Scores for sequence family classification (score includes all domains): -Model Description Score E-value N --------- ----------- ----- ------- --- -TEST 184.7 2.5e-56 1 - -Parsed for domains: -Model Domain seq-f seq-t hmm-f hmm-t score E-value --------- ------- ----- ----- ----- ----- ----- ------- -TEST 1/1 8 97 .] 1 95 [] 184.7 2.5e-56 - -Alignments of top-scoring domains: -TEST: domain 1 of 1, from 8 to 97: score 184.7, E = 2.5e-56 - *->svfqqqqssksttgstvtAiAiAigYRYRYRAvtWnsGsLssGvnDn - sv+qqqq+ + +vtAiAiAigYRYRYRAv Wn GsLs G nDn - Test 8 SVYQQQQGGSA----MVTAIAIAIGYRYRYRAVVWNKGSLSTGTNDN 50 - - DnDqqsdgLYtiYYsvtvpssslpsqtviHHHaHkasstkiiikiePr<- - DnDq +d LYtiYYsvtv +ss+p q+v+HHHaH+asstkiiiki P - Test 51 DNDQAAD-LYTIYYSVTVSASSWPGQSVTHHHAHPASSTKIIIKIAPS 97 - - * - - Test - - - -// -Sequence 0 - -Query sequence: lcl|gi|340783625|Plus1 -Accession: [none] -Description: megaplasmid, complete sequence [UNKNOWN] - -Scores for sequence family classification (score includes all domains): -Model Description Score E-value N --------- ----------- ----- ------- --- -IS66_ORF3.uniq -30.0 1.7e-15 1 - -Parsed for domains: -Model Domain seq-f seq-t hmm-f hmm-t score E-value --------- ------- ----- ----- ----- ----- ----- ------- -IS66_ORF3.uniq 1/1 60792 61184 .. 1 558 [] -30.0 1.7e-15 - -Alignments of top-scoring domains: -IS66_ORF3.uniq: domain 1 of 1, from 60792 to 61184: score -30.0, E = 1.7e-15 - *->mSSPLDLSLFPNLmtevvkmnaalpddlslLkAalqkqQavvea..l - L S FP + + aa+p+ l + l + v + + - lcl|gi|340 60792 SCTGLWDSEFPPVFPVNGSQRAATPF-LPVGPDELSSPPSSVLXscY 60837 - - dakianleelIEkLka.qldkLRRmkFGkrSEKkRhkLeldiaQiekale - d + l +L ++q LR ++r - lcl|gi|340 60838 DFPLCQSLSLCFRLGFpQALALRVRQRAPRGCH----------------- 60870 - - dLEddLnetqaeiaeaekkieaPAssppkeaPiaskPesPRqerkRkPLP - + +a+ + A+ +p++ + + LP - lcl|gi|340 60871 ------AHRRARGLFFTDTPHPVAVPRPQR----------DLTGSLVTLP 60904 - - eeLPReerRlePestvCPcggGqLkriGEDvsEqLDlvpaaFeVIqTvRp - + +P e P + + P + + D vp+ + - lcl|gi|340 60905 APMPCSET---PVESQHPGH-----------TGRSDAVPTPNTMKTS--- 60937 - - KyACrqCdtiVQAPaPakpIErGiptaGLLArvlVSKyaEHlPLYRQsEI - - lcl|gi|340 - -------------------------------------------------- - - - yaRqGVeiaRstLadWVgrtgarLaPLvdALaeyVLkeGklHADeTPVqV - +i s L V++ + r - lcl|gi|340 60938 ------AIMISGLIHGVSARCLRF-------------------------- 60955 - - LaPGnkKTKTGyLWAYvRDDRnagsSlppaVvfayspdRkGEHaqvhLae - k TG R D Slp+ + + R G e - lcl|gi|340 60956 -----KRSVTG-----PRQD-----SLPADGL---RLCREG-------VE 60980 - - ysGkLqaDAyaGYnalyesgRVGGDpikEAgCwAHaRRKifDlhvrnpSe - sG + + + ++ wAH RR ++D+ pS+ - lcl|gi|340 60981 PSGSQXKVSVTSILLSRTXP---------VASWAHVRRHFYDVNQGAPSP 61021 - - lteEALerIaaLYeIEaeIRGspaEdRlavRqelSvPlvkslseWLtaqr - ++ AL rI +LYeIEaeI p+E+ Rq+++ Pl+ s+ WL r - lcl|gi|340 61022 VAQTALLRIHELYEIEAEIKDDPPEQKMLARQQRAAPLLESFWTWLNDTR 61071 - - KtlsrgseLAKALaYlLnrWdALtryldDGqVpIDNNivENAiRrvAlGR - + +s+ AKA Y+L+rW ALt+yl+ G+ IDNN vE A+R vA+GR - lcl|gi|340 61072 AQVAPKSAIAKAIGYALKRWKALTLYLQEGHLGIDNNPVERALRGVAIGR 61121 - - kNyLFaGSDrGGeraAliySLIgTCKmNgVePyaYLrdVltrLadwplnk - kN+LFaG D GGeraA++y LI TCK+NgVeP aYL+dVl++L++wp + - lcl|gi|340 61122 KNFLFAGNDAGGERAAAFYTLIETCKLNGVEPFAYLCDVLEKLPTWPNKR 61171 - - DideLLPWayasai<-* - +eLLPW++ - lcl|gi|340 61172 -LHELLPWNWKNLT 61184 - -// -Sequence 1 diff --git a/t/data/hmmscan.out b/t/data/hmmscan.out deleted file mode 100644 index 65edacc6e..000000000 --- a/t/data/hmmscan.out +++ /dev/null @@ -1,202 +0,0 @@ -# hmmscan :: search sequence(s) against a profile database -# HMMER 3.0 (March 2010); http://hmmer.org/ -# Copyright (C) 2010 Howard Hughes Medical Institute. -# Freely distributed under the GNU General Public License (GPLv3). -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# query sequence file: BA000019.orf1.fasta -# target HMM database: /data/biodata/HMMerDB/Pfam.hmm -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query: BA000019.orf1 [L=198] -Scores for complete sequence (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Model Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - 6e-30 105.2 0.3 6.7e-30 105.0 0.2 1.0 1 Peripla_BP_2 Periplasmic binding protein - ------ inclusion threshold ------ - 0.036 14.7 0.0 0.09 13.4 0.0 1.7 1 DUF2726 Protein of unknown function (DUF2726) - 0.039 14.1 0.3 0.049 13.8 0.2 1.1 1 Pfam-B_1590 - 0.22 12.5 0.0 0.33 12.0 0.0 1.3 1 Pfam-B_6580 - 0.25 12.1 0.2 0.37 11.5 0.2 1.2 1 Calpain_III Calpain large subunit, domain III - 0.42 11.5 1.2 1.7 9.5 0.1 2.3 3 MHCassoc_trimer Class II MHC-associated invariant chain trim - 1.2 9.2 2.2 1.3 9.1 1.5 1.1 1 DUF3498 Domain of unknown function (DUF3498) - - -Domain annotation for each model (and alignments): ->> Peripla_BP_2 Periplasmic binding protein - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 105.0 0.2 1.5e-33 6.7e-30 59 236 .. 2 173 .. 1 175 [. 0.93 - - Alignments for each domain: - == domain 1 score: 105.0 bits; conditional E-value: 1.5e-33 - Peripla_BP_2 59 lkPDlvivsafgalvseieellelgipvvavessstaeslleqirllgellgeedeaeelvaelesridavkaridsl.kpktvlvfgyadegikv 153 - lkPDl+i+ +++ ++i+++l++ +p+v v+ s+ s+++ +r ++++l+ee++++ + +++++ri+++++r + ++ +v+v+g+++ +ik+ - BA000019.orf1 2 LKPDLIIGREYQ---KNIYNQLSNFAPTVLVDWGSF-TSFQDNFRYIAQVLNEEEQGKLVLQQYQKRIRDLQDRMGERlQKIEVSVIGFSGQSIKS 93 - 8***********...********************9.*****************************************999999999999997777 PP - - Peripla_BP_2 154 vfgsgswvgdlldaaggeni.iaeakgseseeisaEqilaadpdviivsgrgedtktgveelkenplwaelpAvkngrvyllds 236 - ++ + ++++ld+ag++ i i++++++ + eis+E+++++d+dv++v k+ + ++nplw +l+Avk+++vy++++ - BA000019.orf1 94 LNR-DAVFNQVLDDAGIKRIsIQKNQQERYLEISIENLNKYDADVLFVINE---SKEQLYPDLKNPLWHHLRAVKKQQVYVVNQ 173 - 776.5678999999****99777777*************************...77777777899***************9976 PP - ->> DUF2726 Protein of unknown function (DUF2726) - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ? 13.4 0.0 2e-05 0.09 79 112 .. 86 119 .. 79 130 .. 0.75 - - Alignments for each domain: - == domain 1 score: 13.4 bits; conditional E-value: 2e-05 - DUF2726 79 ashkqgkaekrDalkeealekAgipllrvkakks 112 - +s ++ k +rDa+++++l+ Agi+ + +++ ++ - BA000019.orf1 86 FSGQSIKSLNRDAVFNQVLDDAGIKRISIQKNQQ 119 - 4555777999****************99954433 PP - ->> Pfam-B_1590 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ? 13.8 0.2 1.1e-05 0.049 6 102 .. 78 169 .. 74 195 .. 0.75 - - Alignments for each domain: - == domain 1 score: 13.8 bits; conditional E-value: 1.1e-05 - Pfam-B_1590 6 risafinlfiGqsvsrktivnail.slleqkGflkrswnkinlpctyvnlsieriskfdflpilskikgkeisyklyGeteWqtidklLlnvnkhk 100 - +i + f Gqs++ + +a++ +l++ G + s +k n y+++sie+++k+d ++ + ke +y + + W+++ v+k++ - BA000019.orf1 78 KIEVSVIGFSGQSIKSLNR-DAVFnQVLDDAGIKRISIQK-NQQERYLEISIENLNKYDADVLFVINESKEQLYPDLKNPLWHHLR----AVKKQQ 167 - 5666666789999876554.34441578999999999988.56789*************98888888899****999999997654....444444 PP - - Pfam-B_1590 101 af 102 - ++ - BA000019.orf1 168 VY 169 - 44 PP - ->> Pfam-B_6580 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ? 12.0 0.0 7.2e-05 0.33 36 89 .. 70 124 .. 61 139 .. 0.83 - - Alignments for each domain: - == domain 1 score: 12.0 bits; conditional E-value: 7.2e-05 - Pfam-B_6580 36 ewLedrlataaaAalvldalqseelpr.alqrelLeaigakavvlrkdqtrrlla 89 - +++ +rl+++++ + + + l+r a+ +++L+ +g+k + ++k+q+ r l - BA000019.orf1 70 DRMGERLQKIEVSVIGFSGQSIKSLNRdAVFNQVLDDAGIKRISIQKNQQERYLE 124 - 5677889999999999999988888862699******************999764 PP - ->> Calpain_III Calpain large subunit, domain III - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ? 11.5 0.2 8.1e-05 0.37 25 91 .. 35 102 .. 31 129 .. 0.75 - - Alignments for each domain: - == domain 1 score: 11.5 bits; conditional E-value: 8.1e-05 - Calpain_III 25 etfltNPqfrlslkepddedctvliaLmqknrrkkrkk.geenltigfavykv.kkkekeldkeffkkn 91 - +f +N + ++ +++++ +++++ +qk+ r ++ ge++ i+++v+ +++ k+l+++++ ++ - BA000019.orf1 35 TSFQDNFRYIAQVLNEEEQ-GKLVLQQYQKRIRDLQDRmGERLQKIEVSVIGFsGQSIKSLNRDAVFNQ 102 - 48999**********9987.7999999999988776655888888999998887777788888766555 PP - ->> MHCassoc_trimer Class II MHC-associated invariant chain trimerisation domain - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ? -3.5 0.0 4.3 2e+04 41 49 .. 29 37 .. 27 41 .. 0.78 - 2 ? 0.1 0.0 0.35 1.6e+03 4 36 .. 51 81 .. 49 96 .. 0.74 - 3 ? 9.5 0.1 0.00038 1.7 9 43 .. 139 176 .. 131 178 .. 0.73 - - Alignments for each domain: - == domain 1 score: -3.5 bits; conditional E-value: 4.3 - HHHHHHHHH CS - MHCassoc_trimer 41 vdWksfEsW 49 - vdW sf s+ - BA000019.orf1 29 VDWGSFTSF 37 - 689999886 PP - - == domain 2 score: 0.1 bits; conditional E-value: 0.35 - HHHHHHHHHHH-TT---------HHHHHHHHHH CS - MHCassoc_trimer 4 edqvkhLllksdPkkvfPqlketlleNLksLKk 36 - e+q k l++ + k +l++ + e L+++ - BA000019.orf1 51 EEQGKLVLQQYQ--KRIRDLQDRMGERLQKIEV 81 - 556666677777..7789999999999998765 PP - - == domain 3 score: 9.5 bits; conditional E-value: 0.00038 - HHHHHH-TT---------HHHHHHHHHHH....S-HHHH CS - MHCassoc_trimer 9 hLllksdPkkvfPqlketlleNLksLKkt....mdevdW 43 - + +s+ ++++P+lk+ l + L++ Kk+ ++++dW - BA000019.orf1 139 FVINESK-EQLYPDLKNPLWHHLRAVKKQqvyvVNQSDW 176 - 5556666.899****************973443456666 PP - ->> DUF3498 Domain of unknown function (DUF3498) - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ? 9.1 1.5 0.00029 1.3 395 425 .. 43 73 .. 35 137 .. 0.81 - - Alignments for each domain: - == domain 1 score: 9.1 bits; conditional E-value: 0.00029 - ------------------------------- CS - DUF3498 395 yErRLlsQeeQaqklLleYqaRLedseeRLR 425 - y + l eeQ + +L++Yq+R+ d ++R+ - BA000019.orf1 43 YIAQVLNEEEQGKLVLQQYQKRIRDLQDRMG 73 - 66788999*****************999984 PP - - - -Internal pipeline statistics summary: -------------------------------------- -Query sequence(s): 1 (198 residues) -Target model(s): 31912 (6698792 nodes) -Passed MSV filter: 1891 (0.0592567); expected 638.2 (0.02) -Passed bias filter: 1237 (0.0387628); expected 638.2 (0.02) -Passed Vit filter: 100 (0.00313362); expected 31.9 (0.001) -Passed Fwd filter: 7 (0.000219353); expected 0.3 (1e-05) -Initial search space (Z): 31912 [actual number of targets] -Domain search space (domZ): 7 [number of targets reported over threshold] -# CPU time: 0.74u 0.55s 00:00:01.29 Elapsed: 00:00:07.01 -# Mc/sec: 189.21 -// -Query: lcl|Test_ID.1|P1 [L=463] -Description: 281521..282909 -Scores for complete sequence (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Model Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - 2.2e-06 19.9 1.5 4.3e-06 18.9 1.0 1.4 1 IS4.original - 2.3e-06 19.7 1.5 4.3e-06 18.8 1.0 1.4 1 IS4.original - - -Domain annotation for each model (and alignments): ->> IS4.original - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 18.9 1.0 8.9e-08 4.3e-06 315 353 .. 335 369 .. 318 391 .. 0.75 - - Alignments for each domain: - == domain 1 score: 18.9 bits; conditional E-value: 8.9e-08 - IS4.original 315 filatnlddeldaediaelYrlRwqiElfFrelKsllgl 353 - lat+ +el+ae+i+ lY +Rw+iE +F+ lK ++ - lcl|Test_ID.1|P1 335 LLLATE--TELSAEEILRLYARRWGIEPLFHNLKR--WW 369 - 345555..**************************7..44 PP - ->> IS4.original - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 18.8 1.0 9e-08 4.3e-06 315 353 .. 335 369 .. 318 391 .. 0.75 - - Alignments for each domain: - == domain 1 score: 18.8 bits; conditional E-value: 9e-08 - IS4.original 315 filatnlddeldaediaelYrlRwqiElfFrelKsllgl 353 - lat+ +el+ae+i+ lY +Rw+iE +F+ lK ++ - lcl|Test_ID.1|P1 335 LLLATE--TELSAEEILRLYARRWGIEPLFHNLKR--WW 369 - 345555..**************************7..44 PP - - - -Internal pipeline statistics summary: -------------------------------------- -Query sequence(s): 1 (463 residues) -Target model(s): 96 (37031 nodes) -Passed MSV filter: 16 (0.166667); expected 1.9 (0.02) -Passed bias filter: 16 (0.166667); expected 1.9 (0.02) -Passed Vit filter: 9 (0.09375); expected 0.1 (0.001) -Passed Fwd filter: 2 (0.0208333); expected 0.0 (1e-05) -Initial search space (Z): 96 [actual number of targets] -Domain search space (domZ): 2 [number of targets reported over threshold] -# CPU time: 0.03u 0.02s 00:00:00.04 Elapsed: 00:00:00.06 -# Mc/sec: 276.54 -// diff --git a/t/data/hmmscan_multi_domain.out b/t/data/hmmscan_multi_domain.out deleted file mode 100644 index d686ddc57..000000000 --- a/t/data/hmmscan_multi_domain.out +++ /dev/null @@ -1,116 +0,0 @@ -# hmmscan :: search sequence(s) against a profile database -# HMMER 3.0 (March 2010); http://hmmer.org/ -# Copyright (C) 2010 Howard Hughes Medical Institute. -# Freely distributed under the GNU General Public License (GPLv3). -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# query sequence file: BA000019.orf37.fasta -# target HMM database: /data/biodata/HMMerDB/Pfam-A.hmm -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query: BA000019.orf37 [L=1418] -Scores for complete sequence (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Model Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - 3.1e-32 111.0 11.8 7.6e-20 71.3 2.3 6.4 6 PPC Bacterial pre-peptidase C-terminal domain - 4.7e-13 47.9 92.9 0.0022 17.8 3.3 10.0 3 HemolysinCabind Hemolysin-type calcium-binding repeat (2 cop - - -Domain annotation for each model (and alignments): ->> PPC Bacterial pre-peptidase C-terminal domain - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ? 0.5 0.2 0.16 9.3e+02 4 59 .. 117 183 .. 84 192 .. 0.58 - 2 ? -0.6 0.0 0.36 2.1e+03 12 58 .. 347 388 .. 311 397 .. 0.46 - 3 ! 71.3 2.3 1.3e-23 7.6e-20 1 69 [. 470 549 .. 470 550 .. 0.95 - 4 ? -3.2 0.1 2 1.2e+04 15 25 .. 582 603 .. 567 626 .. 0.51 - 5 ? -1.1 0.6 0.5 3e+03 13 36 .. 987 1019 .. 974 1072 .. 0.49 - 6 ! 54.4 1.0 2.4e-18 1.4e-14 1 69 [. 1087 1168 .. 1087 1169 .. 0.85 - - Alignments for each domain: - == domain 1 score: 0.5 bits; conditional E-value: 0.16 - EEEE--S----EEEEEECTSS...EEEEE.EEEES.E.SSSSTTC--...-B--SEEEEEESS..... CS - PPC 4 vykfevpaggsltididggsg...dldLy.lldgngp.slaaydans...apsgndeyiefta....p 59 - + +f+++ag+ ++i d++s ++L +++++ + ++ + n+ + gn+++++ft p - BA000019.orf37 117 TATFTLEAGDDYSIG-DSSSEvtfYASLEeVPTPSVTpEISLSVENAtisEAEGNTTTLRFTLseppP 183 - 446667777766665.4444222222244455555322222222222233455566777777755544 PP - - == domain 2 score: -0.6 bits; conditional E-value: 0.36 - ---EEEEEECTSSEEEEEEEEES.ESSSSTTC---B--SEEEEEESS CS - PPC 12 ggsltididggsgdldLylldgngpslaaydansapsgndeyiefta 58 - g +++ + g+ dl+++ + + ++d + +de++ f+ - BA000019.orf37 347 GEIVAVR-SDGF-DLKITAKSATIQLPIQADGVA---ESDEQVVFSL 388 - 1112222.2233.444444444444444433333...2556666665 PP - - == domain 3 score: 71.3 bits; conditional E-value: 1.3e-23 - -EEEEEE--S----EEEEEECTSS......EEEEEEEEES.ESSSSTTC--.....-B--SEEEEEESS.----EEEEEE CS - PPC 1 dvDvykfevpaggsltididggsg......dldLylldgngpslaaydans.....apsgndeyieftapqaGtYyvaVs 69 - dvD+yk+e++ag++++id+d++++ d++L+l+d +g+ la++d+ + + sg ++yieftap++G+Yyv+V+ - BA000019.orf37 470 DVDFYKVELKAGDTIKIDTDSNQFadgrkvDTWLRLFDVSGTELASNDDGAapnevFDSGFQSYIEFTAPSDGVYYVGVT 549 - 8******************999999***99999******************8888877789*****************96 PP - - == domain 4 score: -3.2 bits; conditional E-value: 2 - EEEEEECTSS...........E CS - PPC 15 ltididggsg...........d 25 - l+i++++ +g + - BA000019.orf37 582 LNISLNNPTGfvagateippgN 603 - 5555555544333333333331 PP - - == domain 5 score: -1.1 bits; conditional E-value: 0.5 - --.EEEEEECTSS.....EEEEEEEEES....E CS - PPC 13 gs.ltididggsg.....dldLylldgng...p 36 - + l+++i + ++ ++ +ll g+g + - BA000019.orf37 987 TAsLSVAIANDNIaegveTATVTLLAGDGyqiN 1019 - 444777775555555665444555555542221 PP - - == domain 6 score: 54.4 bits; conditional E-value: 2.4e-18 - -EEEEEE--S----EEEEEECTSSEEE.....EEEEEES.E.....SSSSTTC--...-B--SEEEEEESS.----EEEEEE CS - PPC 1 dvDvykfevpaggsltididggsgdld.....Lylldgngp.....slaaydans...apsgndeyieftapqaGtYyvaVs 69 - dvD+yk+++++g +l+i++d +d++ L+++d++g+ + ++++a++ ++ nd+y+efta ++GtYyv++s - BA000019.orf37 1087 DVDLYKVNLKVGEKLSINVDAAEIDSKllyaqLRVFDAEGNelaktDFDDFQAAPdevFSAFNDPYLEFTAETTGTYYVGIS 1168 - 8***********************9999788888*****96222225555777777777777******************98 PP - ->> HemolysinCabind Hemolysin-type calcium-binding repeat (2 copies) - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 5.9 0.5 0.0026 16 2 13 .. 1214 1225 .. 1213 1225 .. 0.89 - 2 ! 10.8 3.1 6.8e-05 0.41 1 18 [] 1231 1248 .. 1231 1248 .. 0.95 - 3 ! 11.4 2.4 4.3e-05 0.26 4 18 .] 1243 1257 .. 1240 1257 .. 0.91 - - Alignments for each domain: - == domain 1 score: 5.9 bits; conditional E-value: 0.0026 - ---S-EEEE--- CS - HemolysinCabind 2 GgaGnDtLyGga 13 - G++G+DtL G++ - BA000019.orf37 1214 GTSGDDTLIGTD 1225 - 99*******986 PP - - == domain 2 score: 10.8 bits; conditional E-value: 6.8e-05 - E---S-EEEE---S-EEE CS - HemolysinCabind 1 yGgaGnDtLyGgaGnDtl 18 - +G++GnD+Ly +G+D+l - BA000019.orf37 1231 FGNGGNDILYARGGDDKL 1248 - 69**************87 PP - - == domain 3 score: 11.4 bits; conditional E-value: 4.3e-05 - -S-EEEE---S-EEE CS - HemolysinCabind 4 aGnDtLyGgaGnDtl 18 - +G+D L+GgaG+D l - BA000019.orf37 1243 GGDDKLFGGAGDDLL 1257 - 6************87 PP - - - -Internal pipeline statistics summary: -------------------------------------- -Query sequence(s): 1 (1418 residues) -Target model(s): 11912 (2158902 nodes) -Passed MSV filter: 231 (0.0193922); expected 238.2 (0.02) -Passed bias filter: 133 (0.0111652); expected 238.2 (0.02) -Passed Vit filter: 15 (0.00125923); expected 11.9 (0.001) -Passed Fwd filter: 2 (0.000167898); expected 0.1 (1e-05) -Initial search space (Z): 11912 [actual number of targets] -Domain search space (domZ): 2 [number of targets reported over threshold] -# CPU time: 0.57u 0.17s 00:00:00.74 Elapsed: 00:00:00.39 -# Mc/sec: 7849.55 -// diff --git a/t/data/hmmscan_qry_stop.txt b/t/data/hmmscan_qry_stop.txt deleted file mode 100644 index 0e202c9e1..000000000 --- a/t/data/hmmscan_qry_stop.txt +++ /dev/null @@ -1,63 +0,0 @@ -# hmmscan :: search sequence(s) against a profile database -# HMMER 3.0 (March 2010); http://hmmer.org/ -# Copyright (C) 2010 Howard Hughes Medical Institute. -# Freely distributed under the GNU General Public License (GPLv3). -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# query sequence file: FQ377874.faa -# target HMM database: HMM_Profiles.hmm -# output directed to file: FQ377874.txt -# number of worker threads: 8 -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query: gi|328801650|gb|CBW53803.1| [L=243] -Description: IS1296 A transposase protein B [Mycoplasma mycoides subsp. capri LC str. 95010] -Scores for complete sequence (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Model Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - 2.3e-88 289.2 12.7 3.8e-88 288.5 8.8 1.3 1 IS3_IS150_ORF2.curated - - -Domain annotation for each model (and alignments): ->> IS3_IS150_ORF2.curated - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 288.5 8.8 7.6e-89 3.8e-88 145 386 .. 4 236 .. 1 238 [. 0.94 - - Alignments for each domain: - == domain 1 score: 288.5 bits; conditional E-value: 7.6e-89 - IS3_IS150_ORF2.curated 145 eiyhrhrgrYGYRRitlaLrlSEkrGkqiNhKrvqrLMkelgLkSv.iRvKk.kyrsyrgevgivadNlLqRqFkadkPnek 224 - i+ ++ + YGYRR++++L G +NhK++ rL kelg R K +y sy+g+vg+ adN+L R+F + n k - gi|328801650|gb|CBW53803.1| 4 DIFSKSFETYGYRRLKITLK---SKGYIVNHKKILRLTKELGVQCIkFRTKNgRYSSYKGTVGKIADNVLKRNFHSLQAN-K 81 - 69999***************...*******************87642344434**********************99998.5 PP - - IS3_IS150_ORF2.curated 225 W.vTDVTEFkvgggkKlYLSpIlDLFNrEIisyslserpdaklVkktLerAlkklgpdevpvlHSPRLLSDqGwqYqskay. 304 - +TDVTEFkv+ g+KlYLSpI+DL+N EIisys + p+++l+ +L++Alkk+ ++ +++ HS DqG++Yq+ + - gi|328801650|gb|CBW53803.1| 82 L*CTDVTEFKVN-GQKLYLSPIIDLYNDEIISYSIQTNPNLNLTNSMLDKALKKVKNTNGLLIHS-----DQGFHYQH--Is 155 - 788*********.****************************************************.....******98..75 PP - - IS3_IS150_ORF2.curated 305 .remLeeqGirqSMSRKGnclDNavmEsffgtLKsEifygnlkkFeslddLeraItdYIey.YNheRIhlKLkgltPveYRt 384 - + +Lee i qSMSRKGnclDNa +E+ffg LK Ei+y +k++s+++L++ I+ YI y YN+ RI++KLkgl+Pv+ R - gi|328801650|gb|CBW53803.1| 156 *AKKLEENNITQSMSRKGNCLDNAIIENFFGLLKQEIYY--GEKYNSVEELTKRIHKYI-Y*YNNIRIKEKLKGLSPVQFRK 234 - 6689***********************************..67****************.*********************9 PP - - IS3_IS150_ORF2.curated 385 qr 386 - q+ - gi|328801650|gb|CBW53803.1| 235 QS 236 - 97 PP - - - -Internal pipeline statistics summary: -------------------------------------- -Query sequence(s): 1 (243 residues) -Target model(s): 116 (57162 nodes) -Passed MSV filter: 38 (0.327586); expected 2.3 (0.02) -Passed bias filter: 38 (0.327586); expected 2.3 (0.02) -Passed Vit filter: 31 (0.267241); expected 0.1 (0.001) -Passed Fwd filter: 1 (0.198276); expected 0.0 (1e-05) -Initial search space (Z): 116 [actual number of targets] -Domain search space (domZ): 1 [number of targets reported over threshold] -# CPU time: 0.91u 0.30s 00:00:01.20 Elapsed: 00:00:01.71 -# Mc/sec: 8.09 -// diff --git a/t/data/hmmscan_sec_struct.out b/t/data/hmmscan_sec_struct.out deleted file mode 100644 index d922f7ff2..000000000 --- a/t/data/hmmscan_sec_struct.out +++ /dev/null @@ -1,140 +0,0 @@ -# hmmscan :: search sequence(s) against a profile database -# HMMER 3.0 (March 2010); http://hmmer.org/ -# Copyright (C) 2010 Howard Hughes Medical Institute. -# Freely distributed under the GNU General Public License (GPLv3). -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# query sequence file: BA000019.orf8.fasta -# target HMM database: Pfam-A.hmm -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query: BA000019.orf8 [L=348] -Scores for complete sequence (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Model Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - 6.7e-11 41.3 0.0 1.5e-09 37.0 0.0 2.5 2 HTH_AraC Bacterial regulatory helix-turn-helix proteins, Ara - 3.8e-12 38.2 11.9 5.9e-08 25.0 2.5 2.9 2 PKSI-KS_m3 - 0.023 13.9 0.0 0.54 9.5 0.0 2.3 2 DUF746 Domain of Unknown Function (DUF746) - -Domain annotation for each model (and alignments): ->> HTH_AraC Bacterial regulatory helix-turn-helix proteins, AraC family - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ? 1.5 0.0 0.038 2.3e+02 10 36 .. 251 277 .. 242 278 .. 0.80 - 2 ! 37.0 0.0 2.6e-13 1.5e-09 8 41 .. 298 332 .. 295 333 .. 0.94 - - Alignments for each domain: - == domain 1 score: 1.5 bits; conditional E-value: 0.038 - -HHHHHHHHTS-HHHHHHHHHHH---- CS - HTH_AraC 10 siadiAeevgfSpsyfsrlFkkytGvt 36 - s+ +++++vg++ +++r F+ ++ t - BA000019.orf8 251 SLMELSRQVGLNDCTLKRGFRLVFDTT 277 - 66689999999999*******999877 PP - - == domain 2 score: 37.0 bits; conditional E-value: 2.6e-13 - .--HHHHHHHHTS.-HHHHHHHHHHH----HHHHH CS - HTH_AraC 8 nwsiadiAeevgf.SpsyfsrlFkkytGvtPsqyr 41 - +++i++ A++vgf S+syf+++F+k++G++P++++ - BA000019.orf8 298 EINISQAARRVGFsSRSYFATAFRKKFGINPKEFL 332 - 5789*****************************97 PP - ->> PKSI-KS_m3 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 25.0 2.5 3.6e-09 5.9e-08 1 16 [] 538 553 .. 538 553 .. 0.99 - 2 ! 17.4 0.9 9e-07 1.5e-05 1 15 [. 1672 1686 .. 1672 1687 .. 0.96 - - Alignments for each domain: - == domain 1 score: 25.0 bits; conditional E-value: 3.6e-09 - PKSI-KS_m3 1 GPSvtVDTACSSSLvA 16 - GPSvtVDT CSSSLvA - AM746336.orf22 538 GPSVTVDTLCSSSLVA 553 - 9**************9 PP - - == domain 2 score: 17.4 bits; conditional E-value: 9e-07 - PKSI-KS_m3 1 GPSvtVDTACSSSLv 15 - GP +++D ACSS Lv - AM746336.orf22 1672 GPNLVIDSACSSALV 1686 - 9*************9 PP - ->> DUF746 Domain of Unknown Function (DUF746) - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ? 9.5 0.0 9.1e-05 0.54 4 32 .. 240 268 .. 237 275 .. 0.85 - 2 ? 1.8 0.0 0.022 1.3e+02 13 28 .. 298 313 .. 293 319 .. 0.87 - - Alignments for each domain: - == domain 1 score: 9.5 bits; conditional E-value: 9.1e-05 - DUF746 4 rllIrlLsqplslaeaadqlgtdegiiak 32 - + lIr L p sl+e+++q+g+ + ++ - BA000019.orf8 240 EILIRNLENPPSLMELSRQVGLNDCTLKR 268 - 579******************98766655 PP - - == domain 2 score: 1.8 bits; conditional E-value: 0.022 - DUF746 13 plslaeaadqlgtdeg 28 - ++ + +aa+++g +++ - BA000019.orf8 298 EINISQAARRVGFSSR 313 - 6899********9876 PP - - - -Internal pipeline statistics summary: -------------------------------------- -Query sequence(s): 1 (348 residues) -Target model(s): 11912 (2158902 nodes) -Passed MSV filter: 248 (0.0208193); expected 238.2 (0.02) -Passed bias filter: 220 (0.0184688); expected 238.2 (0.02) -Passed Vit filter: 19 (0.00159503); expected 11.9 (0.001) -Passed Fwd filter: 2 (0.000167898); expected 0.1 (1e-05) -Initial search space (Z): 11912 [actual number of targets] -Domain search space (domZ): 2 [number of targets reported over threshold] -# CPU time: 0.24u 0.15s 00:00:00.39 Elapsed: 00:00:00.27 -# Mc/sec: 2782.58 -// -Query: lcl|aorf_00010|P1 [L=132] -Description: IS481.original transposase -Scores for complete sequence (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Model Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - 3.4e-40 130.0 0.4 3.8e-40 129.9 0.3 1.0 1 IS481.original.hmm - - -Domain annotation for each model (and alignments): ->> IS481.original.hmm - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 129.9 0.3 6.6e-42 3.8e-40 127 281 .. 7 130 .. 2 132 .] 0.97 - - Alignments for each domain: - == domain 1 score: 129.9 bits; conditional E-value: 6.6e-42 - IS481.original.hmm 127 kRYErdhPgeLvhmDvkklgripdgGgvkighRwrgrtrgrgkrtnqsrnrglgkayvitaiDDhSRfayaeilsd 202 - +++E++hP +L+++D++++g+i + G+ +y++t++D++S+ ++++++ - lcl|aorf_00010|P1 7 GEIETAHPSYLGSQDTFYVGNITGAGR----------------------------IYQQTFVDTYSKWDSTKLYTT 54 - 579*******************88888............................********************* PP - - IS481.original.hmm 203 ettttaadfllraaayfygkigeeiitrvlTDnGaayrskkrsakhdFqealaelGIkhilTrprsPqTNGKiERF 278 - +t++taad l++ ++ f+ ++++i r lTD+ ++y+sk ++ d+ +la ++I+h++T++++PqTN ++ RF - lcl|aorf_00010|P1 55 KTPITAADLLNDRVLSFFA-EQGMGIIRLLTDRSTEYCSKA--ETQDYELCLALNDIEHTKTKVYHPQTNDICRRF 127 - *******************.********************8..********************************* PP - - IS481.original.hmm 279 hrT 281 - h+ - lcl|aorf_00010|P1 128 HKA 130 - *95 PP - - - -Internal pipeline statistics summary: -------------------------------------- -Query sequence(s): 1 (132 residues) -Target model(s): 116 (57162 nodes) -Passed MSV filter: 4 (0.0344828); expected 2.3 (0.02) -Passed bias filter: 4 (0.0344828); expected 2.3 (0.02) -Passed Vit filter: 3 (0.0258621); expected 0.1 (0.001) -Passed Fwd filter: 1 (0.0172414); expected 0.0 (1e-05) -Initial search space (Z): 116 [actual number of targets] -Domain search space (domZ): 1 [number of targets reported over threshold] -# CPU time: 0.06u 0.00s 00:00:00.06 Elapsed: 00:00:00.06 -# Mc/sec: 117.90 -// diff --git a/t/data/hmmsearch.out b/t/data/hmmsearch.out deleted file mode 100755 index 570f1f4b9..000000000 --- a/t/data/hmmsearch.out +++ /dev/null @@ -1,2170 +0,0 @@ -hmmsearch - search a sequence database with a profile HMM -HMMER 2.0 (June 1998) -Copyright (C) 1992-1998 Washington University School of Medicine -HMMER is freely distributed under the GNU General Public License (GPL). -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -HMM file: HMM [SEED] -Sequence database: HMM.dbtemp.29591 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query HMM: SEED - [HMM has been calibrated; E-values are empirical estimates] - -Scores for complete sequences (score includes all domains): -Sequence Description Score E-value N --------- ----------- ----- ------- --- -PAB2_ARATH P42731 POLYADENYLATE-BINDING PROTEIN 2 (PO 393.8 6.1e-114 4 -Q13310 Q13310 INDUCIBLE POLY(A)-BINDING PROTEIN. 385.6 1.8e-111 4 -Q93004 Q93004 POLY(A)-BINDING PROTEIN. 384.2 4.6e-111 4 -PABP_MOUSE P29341 POLYADENYLATE-BINDING PROTEIN (POLY 383.5 7.5e-111 4 -O22173 O22173 PUTATIVE POLY(A)-BINDING PROTEIN. 371.0 4.3e-107 4 -P87135 P87135 POLYADENYLATE-BINDING PROTEIN PABPP 370.6 5.7e-107 4 -PABP_SCHPO P31209 POLYADENYLATE-BINDING PROTEIN (POLY 370.6 5.7e-107 4 -PABP_YEAST P04147 POLYADENYLATE-BINDING PROTEIN, CYTO 364.7 3.5e-105 4 -PABP_HUMAN P11940 POLYADENYLATE-BINDING PROTEIN (POLY 364.2 4.7e-105 4 -PABP_XENLA P20965 POLYADENYLATE-BINDING PROTEIN (POLY 362.6 1.5e-104 4 -P93616 P93616 POLY(A)-BINDING PROTEIN. 362.1 2.1e-104 4 -Q62029 Q62029 POLY A BINDING PROTEIN 2 (POLYA BIN 356.6 9.3e-103 4 -PAB5_ARATH Q05196 POLYADENYLATE-BINDING PROTEIN 5 (PO 354.4 4.4e-102 4 -Q39953 Q39953 POLY(A)-MRNA BINDING PROTEIN. 348.3 3e-100 4 -PABP_DROME P21187 POLYADENYLATE-BINDING PROTEIN (POLY 342.4 1.8e-98 4 -Q15097 Q15097 POLYADENYLATE BINDING PROTEIN II. 333.6 7.8e-96 4 -Q19581 Q19581 F18H3.3B. 329.1 1.8e-94 4 -Q19579 Q19579 F18H3.3A. 329.1 1.8e-94 4 -Q17350 Q17350 POLYADENYLATE-BINDING PROTEIN. 328.4 3e-94 4 -Q92227 Q92227 PUTATIVE POLY(A)-BINDING PROTEIN FA 324.6 3.9e-93 4 -O04319 O04319 POLY(A)-BINDING PROTEIN ISOLOG. 292.2 2.3e-83 4 -TIA1_MOUSE P52912 NUCLEOLYSIN TIA-1 (RNA BINDING PROT 288.1 3.9e-82 3 -TIA1_HUMAN P31483 NUCLEOLYSIN TIA-1. 285.7 2e-81 3 -Q27335 Q27335 POLY(A) BINDING PROTEIN. 280.8 6.3e-80 4 -Q12926 Q12926 ELAV-LIKE NEURONAL PROTEIN 1. 271.3 4.5e-77 3 -Q13235 Q13235 ELAV-LIKE NEURONAL PROTEIN 2 HEL-N2 271.3 4.5e-77 3 -NUCL_CHICK P15771 NUCLEOLIN (PROTEIN C23). 270.9 6e-77 4 -Q60899 Q60899 HU-ANTIGEN D (NERVOUS SYSTEM-SPECIF 270.3 8.9e-77 3 -Q24474 Q24474 RNA-BINDING PROTEIN. 268.9 2.3e-76 3 -HUD_MOUSE Q61701 PARANEOPLASTIC ENCEPHALOMYELITIS AN 268.7 2.7e-76 3 -HUD_HUMAN P26378 PARANEOPLASTIC ENCEPHALOMYELITIS AN 268.7 2.7e-76 3 -HUD_RAT O09032 PARANEOPLASTIC ENCEPHALOMYELITIS AN 268.7 2.7e-76 3 -O13620 O13620 HYPOTHETICAL 93.7 KD PROTEIN. 267.3 7.1e-76 5 -Q91585 Q91585 RIBONUCLEOPROTEIN. 267.3 7.2e-76 3 -Q60900 Q60900 HU-ANTIGEN C (MHUC-L). 262.0 2.8e-74 3 -Q91583 Q91583 RIBONUCLEOPROTEIN. 261.9 3e-74 3 -TIAR_HUMAN Q01085 NUCLEOLYSIN TIAR (TIA-1 RELATED PRO 261.8 3.2e-74 3 -Q91584 Q91584 RIBONUCLEOPROTEIN. 261.5 3.9e-74 3 -Q90409 Q90409 RIBONUCLEOPROTEIN. 260.6 7.4e-74 3 -P79736 P79736 ELAV/HUC HOMOLOG. 260.0 1.1e-73 3 -Q91903 Q91903 XEL-1. 259.2 1.9e-73 3 -Q06106 Q06106 SIMILAR TO POLYADENYLATE-BINDING PR 258.2 4e-73 5 -Q24473 Q24473 RNA-BINDING PROTEIN. 256.8 1.1e-72 3 -Q26293 Q26293 RRM9 (FRAGMENT). 255.9 2e-72 3 -Q14576 Q14576 (HUC). 255.2 3.1e-72 3 -Q16135 Q16135 NEURON-SPECIFIC RNA RECOGNITION MOT 254.2 6.2e-72 3 -GBP2_YEAST P25555 SINGLE-STRAND TELOMERIC DNA-BINDING 253.6 9.6e-72 3 -Q91582 Q91582 RIBONUCLEOPROTEIN. 253.2 1.3e-71 3 -Q15717 Q15717 HUR RNA BINDING PROTEIN. 250.3 9.7e-71 3 -Q20084 Q20084 F35H8.5. 247.9 4.9e-70 3 -PUB1_YEAST P32588 NUCLEAR AND CYTOPLASMIC POLYADENYLA 247.8 5.3e-70 3 -TIAR_MOUSE P70318 NUCLEOLYSIN TIAR (TIA-1 RELATED PRO 243.5 1.1e-68 3 -Q06459 Q06459 NUCLEOLIN. 242.4 2.3e-68 4 -ELAV_DROVI P23241 ELAV PROTEIN. 241.8 3.3e-68 3 -ELAV_DROME P16914 ELAV PROTEIN (EMBRYONIC LETHAL ABNO 241.8 3.3e-68 3 -NUCL_XENLA P20397 NUCLEOLIN (PROTEIN C23). 240.9 6.4e-68 4 -P70372 P70372 ELAV G HOMOLOG. 238.8 2.8e-67 3 -HRB1_YEAST P38922 HRB1 PROTEIN (TOM34 PROTEIN). 228.9 2.5e-64 3 -NUCL_MESAU P08199 NUCLEOLIN (PROTEIN C23). 225.1 3.5e-63 4 -ROM_HUMAN P52272 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 224.6 5.3e-63 3 -NUCL_MOUSE P09405 NUCLEOLIN (PROTEIN C23). 223.4 1.2e-62 4 -NUCL_RAT P13383 NUCLEOLIN (PROTEIN C23). 223.1 1.5e-62 4 -Q09959 Q09959 HYPOTHETICAL 49.8 KD PROTEIN C18A3. 221.0 6.4e-62 3 -Q08212 Q08212 NUCLEOLYSIN TIAR HOMOLOG (TIA-1 REL 220.3 1e-61 3 -Q91579 Q91579 RIBONUCLEOPROTEIN. 213.1 1.5e-59 3 -P93843 P93843 DNA BINDING PROTEIN ACBF. 208.7 3e-58 3 -NUCL_HUMAN P19338 NUCLEOLIN (PROTEIN C23). 207.1 9.4e-58 4 -Q40270 Q40270 RNA-BINDING PROTEIN PRECURSOR. 207.0 1e-57 2 -RO31_NICSY P19683 CHLOROPLAST 31 KD RIBONUCLEOPROTEIN 206.3 1.7e-57 2 -RO28_SPIOL P28644 CHLOROPLAST 28 KD RIBONUCLEOPROTEIN 205.8 2.4e-57 2 -PES4_YEAST P39684 PES4 PROTEIN (DNA POLYMERASE EPSILO 203.3 1.3e-56 4 -P92871 P92871 RNA-BINDING PROTEIN 2 (RNA-BINDING 202.5 2.4e-56 2 -Q39209 Q39209 RNA BINDING PROTEIN (FRAGMENT). 202.5 2.4e-56 2 -Q43350 Q43350 CP31 PRECURSOR. 202.5 2.4e-56 2 -RO31_ARATH Q04836 CHLOROPLAST 31 KD RIBONUCLEOPROTEIN 202.5 2.4e-56 2 -RO28_NICSY P19682 CHLOROPLAST 28 KD RIBONUCLEOPROTEIN 200.0 1.3e-55 2 -O18409 O18409 TESTIS-SPECIFIC RNP-TYPE RNA BINDIN 199.8 1.5e-55 3 -O02374 O02374 BRUNO. 199.8 1.5e-55 3 -Q41834 Q41834 NUCLEIC ACID-BINDING PROTEIN PRECUR 199.7 1.6e-55 2 -Q15164 Q15164 POLYADENYLATE BINDING PROTEIN II (F 199.7 1.6e-55 2 -NSR1_YEAST P27476 NUCLEAR LOCALIZATION SEQUENCE BINDI 199.0 2.6e-55 2 -Q08935 Q08935 CHLOROPLAST 29 KD RIBONUCLEOPROTEIN 198.0 5.3e-55 2 -O24306 O24306 RIBONUCLEOPROTEIN. 197.0 1.1e-54 2 -O23798 O23798 PS16 PROTEIN. 195.8 2.3e-54 2 -NOP4_YEAST P37838 NUCLEOLAR PROTEIN NOP4 (NUCLEOLAR P 194.4 6.3e-54 4 -Q39062 Q39062 CHLOROPLAST RNA-BINDING PROTEIN CP3 190.4 1e-52 2 -Q39061 Q39061 CHLOROPLAST RNA-BINDING PROTEIN CP3 190.4 1e-52 2 -RO30_NICPL P49313 CHLOROPLAST 30 KD RIBONUCLEOPROTEIN 189.6 1.8e-52 2 -Q08948 Q08948 CHLOROPLAST 33 KD RIBONUCLEOPROTEIN 189.5 1.9e-52 2 -RO33_NICSY P19684 CHLOROPLAST 33 KD RIBONUCLEOPROTEIN 188.9 2.9e-52 2 -Q99628 Q99628 SIAH BINDING PROTEIN 1 (FRAGMENT). 188.9 3e-52 3 -Q43349 Q43349 CP29. 188.8 3e-52 2 -Q41367 Q41367 24 KDA RNA BINDING PROTEIN (FRAGMEN 188.8 3.1e-52 2 -GAR2_SCHPO P41891 GAR2 PROTEIN. 188.0 5.4e-52 2 -O13707 O13707 PRE-RIBOPSOMAL PARTICLE ASSEMBLY PR 188.0 5.4e-52 2 -RO31_NICPL P49314 CHLOROPLAST 31 KD RIBONUCLEOPROTEIN 187.3 9e-52 2 -Q08937 Q08937 CHLOROPLAST 29 KD RIBONUCLEOPROTEIN 186.9 1.1e-51 2 -Q01491 Q01491 COLONY 1. 184.8 4.8e-51 4 -Q17385 Q17385 ELAV-TYPE RIBONUCLEOPROTEIN. 183.7 1.1e-50 3 -MODU_DROME P13469 DNA-BINDING PROTEIN MODULO. 182.8 2e-50 4 -O17310 O17310 SEX-LETHAL PROTEIN. 182.3 2.7e-50 2 -O17309 O17309 SEX-LETHAL PROTEIN (FRAGMENT). 182.3 2.7e-50 2 -YHH5_YEAST P38760 HYPOTHETICAL 75.9 KD PROTEIN IN SPO 181.3 5.4e-50 3 -NAM8_YEAST Q00539 NAM8 PROTEIN. 181.3 5.5e-50 3 -P70055 P70055 RNA BINDING PROTEIN ETR-3. 180.3 1.1e-49 3 -Q92879 Q92879 CUG-BP/HNAB50. 177.5 7.6e-49 3 -ROA1_RAT P04256 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 177.3 8.9e-49 2 -ROA1_MOUSE P49312 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 177.3 8.9e-49 2 -ROA1_HUMAN P09651 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 177.3 8.9e-49 2 -ROA1_BOVIN P09867 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 177.3 8.9e-49 2 -Q99141 Q99141 SEX-LETHAL PROTEIN, ALTERNATIVELY S 177.0 1.1e-48 2 -SXLF_DROME P19339 SEX-LETHAL PROTEIN, FEMALE-SPECIFIC 177.0 1.1e-48 2 -Q24668 Q24668 SEX-LETHAL GENE. 177.0 1.1e-48 2 -O13845 O13845 HYPOTHETICAL 69.4 KD PROTEIN. 176.6 1.4e-48 3 -Q92950 Q92950 ETR-3. 176.6 1.5e-48 3 -Q60668 Q60668 AU-RICH ELEMENT RNA-BINDING PROTEIN 175.3 3.5e-48 2 -SP49_HUMAN Q15427 SPLICEOSOME ASSOCIATED PROTEIN 49 ( 173.8 1e-47 2 -SQD_DROME Q08473 RNA-BINDING PROTEIN SQUID (HETEROGE 173.5 1.2e-47 2 -O15187 O15187 T-CLUSTER BINDING PROTEIN. 173.4 1.3e-47 2 -Q12771 Q12771 P37 AUF1 RNA-BINDING PROTEIN. 172.9 1.9e-47 2 -Q14103 Q14103 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 172.9 1.9e-47 2 -Q14100 Q14100 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 172.9 1.9e-47 2 -Q14102 Q14102 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 172.9 1.9e-47 2 -Q14101 Q14101 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 172.9 1.9e-47 2 -Q39568 Q39568 GBP1P. 172.8 2.1e-47 2 -PR24_YEAST P49960 U4/U6 SNRNA-ASSOCIATED SPLICING FAC 172.6 2.3e-47 3 -CABA_MOUSE Q99020 CARG-BINDING FACTOR-A (CBF-A). 171.7 4.4e-47 2 -ROA1_MACMU Q28521 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 171.1 6.4e-47 2 -Q93194 Q93194 C07A4.1. 170.8 8.3e-47 2 -O01671 O01671 SEX-LETHAL PROTEIN. 169.9 1.5e-46 2 -Q14498 Q14498 SPLICING FACTOR. 169.5 2e-46 3 -Q14499 Q14499 SPLICING FACTOR. 169.5 2e-46 3 -Q01858 Q01858 EII BINDING PROTEIN (HETEROGENEOUS 169.3 2.2e-46 2 -Q63568 Q63568 POLYPYRIMIDINE TRACT BINDING PROTEI 169.1 2.6e-46 4 -PTB_RAT Q00438 POLYPYRIMIDINE TRACT-BINDING PROTEI 169.1 2.6e-46 4 -Q08940 Q08940 PUTATIVE CHLOROPLAST 33 KD RIBONUCL 167.7 6.9e-46 2 -Q90602 Q90602 SINGLE STRANDED D BOX BINDING FACTO 167.4 8.2e-46 2 -NGR1_YEAST P32831 NEGATIVE GROWTH REGULATORY PROTEIN 165.5 3.1e-45 3 -ROA1_XENLA P17130 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 165.1 4.2e-45 2 -Q60901 Q60901 HU-ANTIGEN C (MHUC-S) (FRAGMENT). 164.8 5.1e-45 2 -PTB_HUMAN P26599 POLYPYRIMIDINE TRACT-BINDING PROTEI 163.6 1.1e-44 4 -Q39675 Q39675 (CEBP-1). 163.1 1.7e-44 2 -O14979 O14979 A+U-RICH ELEMENT RNA BINDING FACTOR 160.3 1.2e-43 2 -Q99729 Q99729 ABBP-1. 160.1 1.3e-43 2 -Q04150 Q04150 HETEROGENEOUS RIBONUCLEOPROTEIN C ( 160.1 1.3e-43 2 -Q00880 Q00880 CUTINASE NEGATIVE ACTING PROTEIN. 160.0 1.4e-43 2 -Q17352 Q17352 RRM-TYPE RNA BINDING PROTEIN. 159.2 2.5e-43 2 -YP85_CAEEL Q09442 HYPOTHETICAL 40.9 KD PROTEIN C08B11 159.2 2.5e-43 2 -Q41124 Q41124 CHLOROPLAST RNA BINDING PROTEIN PRE 158.9 3e-43 2 -NAB4_YEAST Q99383 NUCLEAR POLYADENYLATED RNA-BINDING 158.8 3.3e-43 2 -O22791 O22791 PUTATIVE RIBONUCLEOPROTEIN. 158.4 4.2e-43 2 -ROA2_HUMAN P22626 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 157.2 9.9e-43 2 -RO32_XENLA P51992 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 156.1 2.1e-42 2 -Q90626 Q90626 RIBONUCLEOPROTEIN. 156.1 2.1e-42 2 -O23093 O23093 SIMILAR TO NUCLEOLIN PROTEIN. 155.1 4.3e-42 3 -Q23795 Q23795 HNRNP PROTEIN. 154.6 6.2e-42 2 -RB27_DROME P48809 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 154.4 6.7e-42 2 -Q17201 Q17201 BMSQD-2. 154.4 7.2e-42 2 -Q17200 Q17200 BMSQD-1. 154.4 7.2e-42 2 -PTB_PIG Q29099 POLYPYRIMIDINE TRACT-BINDING PROTEI 154.3 7.3e-42 4 -Q90407 Q90407 RIBONUCLEOPROTEIN (FRAGMENT). 153.8 1e-41 2 -Q15584 Q15584 HTGR 1 MRNA. 152.8 2.1e-41 2 -RO31_XENLA P51968 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 151.9 3.8e-41 2 -O04240 O04240 RNA- OR SSDNA-BINDING PROTEIN (FRAG 151.9 3.8e-41 2 -RO22_XENLA P51990 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 151.9 3.9e-41 2 -Q61474 Q61474 MUSASHI-1 HOMOLOG (RNA-BINDING PROT 151.6 4.9e-41 2 -RO21_XENLA P51989 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 151.1 6.6e-41 2 -ROA3_HUMAN P51991 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 150.5 1e-40 2 -Q60690 Q60690 MYELIN BASIC PROTEIN EXPRESSION FAC 149.1 2.7e-40 2 -Q91807 Q91807 NERVOUS SYSTEM-SPECIFIC RNA-BINDING 148.8 3.4e-40 2 -Q91920 Q91920 RIBONUCLEOPROTEIN. 148.8 3.4e-40 2 -O08752 O08752 MLARK. 148.8 3.4e-40 2 -Q22037 Q22037 HNRNP LIKE PROTEIN. 148.8 3.4e-40 2 -Q91808 Q91808 NERVOUS SYSTEM-SPECIFIC RNA-BINDING 148.7 3.5e-40 2 -ROA1_SCHAM P21522 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 147.5 8.3e-40 2 -O02916 O02916 HLARK. 147.4 9e-40 2 -Q24486 Q24486 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 147.2 1e-39 2 -RB87_DROME P48810 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 147.2 1e-39 2 -Q24847 Q24847 SURFACE ANTIGEN. 146.6 1.5e-39 2 -ROA1_DROME P07909 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 146.1 2.2e-39 2 -Q99361 Q99361 HETEROGENEOUS RIBONUCLEOPROTEIN A1 146.1 2.2e-39 2 -Q24360 Q24360 NUCLEAR RIBONUCLEOPROTEIN. 146.1 2.2e-39 2 -Q24359 Q24359 NUCLEAR RIBONUCLEOPROTEIN. 146.1 2.2e-39 2 -O22855 O22855 HYPOTHETICAL PROTEIN. 145.2 4e-39 3 -Q24409 Q24409 MUSASHI. 145.2 4.1e-39 2 -Q13151 Q13151 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 145.1 4.4e-39 2 -Q21911 Q21911 R10E9.1. 144.8 5.3e-39 2 -SR55_DROME P26686 SERINE-ARGININE PROTEIN 55 (SRP55) 144.8 5.6e-39 2 -Q24252 Q24252 52-KD BRACKETING PROTEIN. 144.8 5.6e-39 2 -Q23796 Q23796 HNRNP PROTEIN. 144.2 7.9e-39 2 -SP33_HUMAN Q07955 PRE-MRNA SPLICING FACTOR SF2, P33 S 142.6 2.5e-38 2 -O04425 O04425 FLOWERING TIME CONTROL PROTEIN FCA. 140.4 1.1e-37 2 -O23475 O23475 FCA GAMMA. 140.4 1.1e-37 2 -RB97_DROME Q02926 RIBONUCLEOPROTEIN RB97D. 139.7 1.9e-37 2 -Q41042 Q41042 PROTEIN LOCALIZED IN THE NUCLEOLI. 139.4 2.2e-37 2 -SR75_HUMAN Q08170 PRE-MRNA SPLICING FACTOR SRP75. 138.8 3.5e-37 2 -U2AF_HUMAN P26368 SPLICING FACTOR U2AF 65 KD SUBUNIT 138.0 5.8e-37 3 -U2AF_MOUSE P26369 SPLICING FACTOR U2AF 65 KD SUBUNIT 138.0 5.8e-37 3 -Q13809 Q13809 ALTERNATIVE SPLICING FACTOR. 137.7 7.4e-37 2 -YG5B_YEAST P53316 HYPOTHETICAL 89.5 KD PROTEIN IN MGA 136.4 1.8e-36 3 -Q15020 Q15020 ORF. 136.3 2e-36 2 -Q13242 Q13242 SPLICING FACTOR, ARGININE/SERINE RI 136.3 2e-36 2 -P92966 P92966 SPLICING FACTOR. 133.6 1.3e-35 2 -O23189 O23189 RNA-BINDING PROTEIN HOMOLOG. 132.7 2.4e-35 3 -Q26692 Q26692 TBRRM1. 132.7 2.5e-35 3 -Q13245 Q13245 SRP55-3 PRE-MRNA SPLICING FACTOR (F 132.6 2.6e-35 2 -Q13247 Q13247 SRP55-1 PRE-MRNA SPLICING FACTOR. 132.6 2.6e-35 2 -MSSP_HUMAN P29558 SINGLE-STRANDED DNA-BINDING PROTEIN 131.9 4e-35 2 -O23212 O23212 SPLICING FACTOR HOMOLOG. 129.4 2.4e-34 2 -CL4_RAT Q09167 INSULIN-INDUCED GROWTH RESPONSE PRO 128.8 3.6e-34 2 -Q13243 Q13243 SRP40-1 PRE-MRNA SPLICING FACTOR. 128.8 3.6e-34 2 -P92965 P92965 ARGININE/SERINE-RICH SPLICING FACTO 128.2 5.5e-34 2 -RU1A_HUMAN P09012 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN 127.6 7.9e-34 2 -Q27199 Q27199 NUCLEOLAR PHOSPHOPROTEIN. 127.2 1.1e-33 2 -Q14869 Q14869 MSSP-2 MRNA. 126.6 1.6e-33 2 -Q15433 Q15433 SCR2. 126.6 1.6e-33 2 -Q26658 Q26658 ACTIVATOR PROTEIN. 126.2 2.1e-33 2 -Q19335 Q19335 HYPOTHETICAL PROTEIN F11A10.2. 125.5 3.6e-33 2 -NONA_DROME Q04047 NO-ON-TRANSIENT A PROTEIN. 125.4 3.7e-33 2 -RU1A_XENLA P45429 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN 125.4 3.8e-33 2 -PSF_HUMAN P23246 PTB-ASSOCIATED SPLICING FACTOR (PSF 124.6 6.3e-33 2 -Q62189 Q62189 SMALL NUCLEAR RNA. 124.4 7.3e-33 2 -PTB_MOUSE P17225 POLYPYRIMIDINE TRACT-BINDING PROTEI 124.3 7.9e-33 4 -P92964 P92964 SPLICING FACTOR. 124.2 8.8e-33 2 -O00201 O00201 NUCLEAR MATRIX PROTEIN 55. 123.7 1.3e-32 2 -Q12786 Q12786 54 KDA PROTEIN. 123.7 1.3e-32 2 -O35737 O35737 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 123.6 1.3e-32 3 -ROH1_HUMAN P31943 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 123.6 1.3e-32 3 -P90727 P90727 SPLICING FACTOR U2AF65. 122.5 2.8e-32 3 -Q24024 Q24024 TESTIS-SPECIFIC-RRM-PROTEIN. 122.3 3.1e-32 2 -ROH2_HUMAN P55795 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 121.6 5.2e-32 3 -Q24534 Q24534 SPLICEOSOMAL PROTEIN. 120.7 9.9e-32 2 -P70333 P70333 MURINE HOMOLOG OF HUMAN FTP-3. 120.6 1e-31 3 -P78814 P78814 FISSION YEAST (FRAGMENT). 120.5 1.1e-31 2 -Q24261 Q24261 BJ6 PROTEIN. 120.2 1.4e-31 2 -Q15434 Q15434 SCR3. 120.2 1.4e-31 2 -O13759 O13759 RNA BINDING POST-TRANSCRIPTIONAL RE 120.0 1.5e-31 2 -Q94901 Q94901 RNA-BINDING PROTEIN LARK. 120.0 1.6e-31 2 -Q91581 Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN 119.7 2e-31 1 -CST2_HUMAN P33240 CLEAVAGE STIMULATION FACTOR, 64 KD 119.7 2e-31 1 -NOP3_YEAST Q01560 NUCLEOLAR PROTEIN 3 (MITOCHONDRIAL 119.5 2.2e-31 2 -Q24562 Q24562 RNA BINDING PROTEIN. 118.7 4e-31 3 -Q40363 Q40363 NUM1 PROTEIN. 118.5 4.6e-31 2 -Q63887 Q63887 NONO. 117.2 1.1e-30 2 -RU2B_HUMAN P08579 U2 SMALL NUCLEAR RIBONUCLEOPROTEIN 116.7 1.5e-30 2 -Q24113 Q24113 NO-ON TRANSIENT A-LIKE PROTEIN (FRA 116.3 2e-30 2 -Q17430 Q17430 B0035.12. 114.6 6.5e-30 2 -RU1A_DROME P43332 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN 114.3 8.1e-30 2 -Q61413 Q61413 COLD INDUCIBLE RNA-BINDING PROTEIN 114.2 8.9e-30 1 -Q14011 Q14011 GLYCINE-RICH RNA BINDING PROTEIN CI 114.2 9.1e-30 1 -P93486 P93486 GLYCINE-RICH RNA-BINDING PROTEIN PS 114.1 9.6e-30 1 -YHC4_YEAST P38741 HYPOTHETICAL 80.1 KD PROTEIN IN SNF 113.6 1.4e-29 2 -GRP_DAUCA Q03878 GLYCINE-RICH RNA-BINDING PROTEIN. 113.4 1.6e-29 1 -GR10_BRANA Q05966 GLYCINE-RICH RNA-BINDING PROTEIN 10 113.0 2e-29 1 -ROG_HUMAN P38159 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 113.0 2e-29 1 -O23288 O23288 RIBONUCLEOPROTEIN HOMOLOG. 111.4 6e-29 2 -O24106 O24106 RNA-BINDING PROTEIN. 110.9 8.9e-29 1 -Q40426 Q40426 RNA-BINDING GLYCINE-RICH PROTEIN-1 110.4 1.2e-28 1 -O04070 O04070 SGRP-1 PROTEIN. 110.3 1.3e-28 1 -Q41518 Q41518 SINGLE-STRANDED NUCLEIC ACID BINDIN 110.3 1.3e-28 1 -O24601 O24601 GLYCINE-RICH RNA BINDING PROTEIN 2. 110.2 1.4e-28 1 -O24188 O24188 OSGRP2. 110.2 1.4e-28 1 -Q39105 Q39105 GLYCINE-RICH RNA-BINDING PROTEIN (F 109.8 1.8e-28 1 -RNPL_HUMAN P98179 PUTATIVE RNA-BINDING PROTEIN RNPL. 108.8 3.6e-28 1 -GRP1_SINAL P49310 GLYCINE-RICH RNA-BINDING PROTEIN GR 108.6 4.4e-28 1 -Q40052 Q40052 GLYCINE RICH PROTEIN, RNA BINDING P 108.6 4.4e-28 1 -O23793 O23793 RNA BINDING PROTEIN. 108.1 6.1e-28 1 -Q40437 Q40437 RGP-3 (FRAGMENT). 108.1 6.1e-28 1 -GRP8_ARATH Q03251 GLYCINE-RICH RNA-BINDING PROTEIN 8 107.9 7.1e-28 1 -GRP2_SINAL P49311 GLYCINE-RICH RNA-BINDING PROTEIN GR 107.8 7.3e-28 1 -O22314 O22314 ASF/SF2 HOMOLOG. 107.6 8.4e-28 2 -O22315 O22315 ASF/SF2 HOMOLOG. 107.6 8.4e-28 2 -Q40425 Q40425 RNA-BINDING GRICINE-RICH PROTEIN-1 106.7 1.6e-27 1 -GRP2_SORVU Q99070 GLYCINE-RICH RNA-BINDING PROTEIN 2. 106.5 1.8e-27 1 -RT19_ARATH P39697 MITOCHONDRIAL 40S RIBOSOMAL PROTEIN 106.4 1.9e-27 1 -O35326 O35326 HRS. 106.2 2.2e-27 2 -O35479 O35479 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 106.2 2.2e-27 1 -Q40427 Q40427 RNA-BINDING GLYCINE-RICH PROTEIN-1 106.0 2.6e-27 1 -Q41453 Q41453 PUTATIVE GLYCINE RICH RNA BINDING P 105.6 3.4e-27 1 -YA2B_SCHPO Q09702 HYPOTHETICAL 57.8 KD PROTEIN C2F7.1 105.4 3.8e-27 4 -U2AF_SCHPO P36629 SPLICING FACTOR U2AF LARGE SUBUNIT. 105.3 4.3e-27 2 -O24187 O24187 OSGRP1. 105.2 4.4e-27 1 -O22390 O22390 GLYCINE-RICH PROTEIN. 105.2 4.6e-27 1 -GRP7_ARATH Q03250 GLYCINE-RICH RNA-BINDING PROTEIN 7. 105.2 4.6e-27 1 -HS49_YEAST Q99181 HSH49 PROTEIN. 104.7 6.2e-27 2 -Q40436 Q40436 RNA-BINDING GLYCINE RICH PROTEIN (R 103.7 1.3e-26 1 -GRPA_MAIZE P10979 GLYCINE-RICH RNA-BINDING, ABSCISIC 103.7 1.3e-26 1 -Q21900 Q21900 R10E4.2. 103.7 1.3e-26 2 -O22385 O22385 GLYCINE-RICH PROTEIN. 103.6 1.3e-26 1 -P90978 P90978 U2AF65. 103.3 1.6e-26 3 -Q42412 Q42412 RNA-BINDING PROTEIN RZ-1. 102.8 2.3e-26 1 -YIS1_YEAST P40561 HYPOTHETICAL 29.0 KD PROTEIN IN BET 102.7 2.5e-26 1 -O22653 O22653 GLYCINE-RICH RNA-BINDING PROTEIN. 102.4 3.2e-26 1 -P90699 P90699 PUTATIVE RNA BINDING PROTEIN. 102.1 3.9e-26 1 -Q64283 Q64283 SILICA-INDUCED PROTEIN 41 (SIG41). 101.9 4.3e-26 1 -Q15815 Q15815 HTRA2-BETA. 101.9 4.3e-26 1 -O22703 O22703 PUTATIVE RNA-BINDING PROTEIN. 101.6 5.6e-26 1 -O22384 O22384 GLYCINE-RICH PROTEIN. 101.4 6.1e-26 1 -Q43472 Q43472 LOW TEMPERATURE-RESPONSIVE RNA-BIND 101.4 6.4e-26 1 -GRF1_HUMAN Q12849 G-RICH SEQUENCE FACTOR-1 (GRSF-1). 100.6 1.1e-25 3 -Q15376 Q15376 Y-CHROMOSOME RNA RECOGNITION MOTIF 100.2 1.4e-25 1 -Q15414 Q15414 RNA BINDING MOTIF PROTEIN 1, RELATE 100.2 1.4e-25 1 -Q09542 Q09542 HYPOTHETICAL 60.3 KD PROTEIN F25B5. 100.0 1.7e-25 2 -O24184 O24184 GLYCINE-RICH RNA-BINDING PROTEIN. 99.9 1.8e-25 1 -YNR5_YEAST P53883 HYPOTHETICAL 45.7 KD PROTEIN IN RPS 99.7 2e-25 2 -Q39244 Q39244 U1SNRNP-SPECIFIC PROTEIN. 99.6 2.2e-25 2 -Q62093 Q62093 PR264/SC35. 99.6 2.2e-25 1 -SC35_CHICK P30352 SPLICING FACTOR SC35 (SC-35) (SPLIC 99.6 2.2e-25 1 -Q44555 Q44555 RNA-BINDING PROTEIN. 99.5 2.3e-25 1 -Q15415 Q15415 YRRM2. 99.3 2.6e-25 1 -RN15_YEAST P25299 MRNA 3'-END PROCESSING PROTEIN RNA1 98.9 3.7e-25 1 -O25501 O25501 SS-DNA BINDING PROTEIN 12RNP2 PRECU 95.0 5.3e-24 1 -Q19706 Q19706 F22B5.2. 95.0 5.3e-24 1 -Q39201 Q39201 RIBONUCLEOPROTEIN. 94.9 5.7e-24 2 -ROF_HUMAN P52597 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 94.8 5.9e-24 3 -Q23120 Q23120 W02B12.2. 94.7 6.7e-24 2 -O22905 O22905 FCA GAMMA ISOLOG. 94.5 7.3e-24 2 -Q44560 Q44560 RBPA1 PROTEIN. 94.5 7.7e-24 1 -Q90408 Q90408 RIBONUCLEOPROTEIN (FRAGMENT). 93.9 1.2e-23 1 -Q08374 Q08374 RNA-BINDING PROTEIN RBPA. 93.7 1.3e-23 1 -Q13148 Q13148 TAR DNA-BINDING PROTEIN-43. 93.7 1.3e-23 2 -Q13595 Q13595 TRANSFORMER-2 ALPHA. 93.5 1.5e-23 1 -Q60990 Q60990 RBM. 92.9 2.3e-23 1 -TRA2_DROME P19018 TRANSFORMER-2 SEX-DETERMINING PROTE 92.8 2.4e-23 1 -O13741 O13741 HYPOTHETICAL 49.4 KD PROTEIN. 92.4 3.3e-23 2 -P70807 P70807 RNA-BINDING PROTEIN. 92.3 3.4e-23 1 -P73557 P73557 RNA-BINDING PROTEIN. 91.8 5e-23 1 -Q23121 Q23121 W02B12.3. 91.5 6e-23 2 -O35698 O35698 RNA-BINDING PROTEIN. 90.8 1e-22 1 -O15414 O15414 CAGH4. 90.5 1.2e-22 1 -Q44556 Q44556 RNA-BINDING PROTEIN. 90.3 1.4e-22 1 -Q21323 Q21323 SIMILAR TO U1 SMALL NUCLEAR RIBONUC 90.3 1.4e-22 2 -RN24_SCHPO Q09100 RNA-BINDING PROTEIN RNP24. 90.1 1.6e-22 2 -O02008 O02008 TRANSFORMER-2 PROTEIN ISOFORM 272. 90.0 1.7e-22 1 -O02009 O02009 TRANSFORMER-2 PROTEIN ISOFORM 225. 90.0 1.7e-22 1 -YDC1_SCHPO Q10422 HYPOTHETICAL 33.6 KD PROTEIN C25G10 89.7 2.1e-22 1 -Q44554 Q44554 RNA-BINDING PROTEIN RBPB. 89.3 2.8e-22 1 -SC35_HUMAN Q01130 SPLICING FACTOR SC35 (SC-35) (SPLIC 89.1 3.1e-22 1 -YIS5_YEAST P40565 HYPOTHETICAL 17.1 KD PROTEIN IN BET 88.9 3.6e-22 1 -Q41498 Q41498 U1SNRNP-SPECIFIC PROTEIN, U1A. 88.4 5e-22 2 -Q41810 Q41810 GLYCINE-RICH PROTEIN. 88.2 6.1e-22 1 -Q46349 Q46349 RNA-BINDING PROTEIN. 87.8 8e-22 1 -O22922 O22922 SPLICEOSOMAL PROTEIN U2B ISOLOG. 87.7 8.1e-22 2 -Q53322 Q53322 SS-DNA BINDING PROTEIN 12RNP2. 87.2 1.1e-21 1 -Q55343 Q55343 12KDA RNA-BINDING. 86.9 1.4e-21 1 -RU17_HUMAN P08621 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN 86.9 1.5e-21 1 -Q99377 Q99377 SMALL NUCLEAR RIBONUCLEOPROTEIN (U1 86.9 1.5e-21 1 -Q62376 Q62376 U1RNA-ASSOCIATED 70-KDA PROTEIN (FR 86.9 1.5e-21 1 -P78493 P78493 68 KDA (U1) RIBONUCLEOPROTEIN (U1). 86.9 1.5e-21 1 -Q57014 Q57014 HYPOTHETICAL 11.0 KD PROTEIN. 86.1 2.5e-21 1 -Q41499 Q41499 SPLICEOSOMAL PROTEIN. 86.0 2.7e-21 2 -RU17_DROME P17133 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN 85.8 3.1e-21 1 -Q62019 Q62019 16 KDA PROTEIN. 84.8 6.3e-21 2 -Q18999 Q18999 HYPOTHETICAL PROTEIN D2089.4. 84.0 1.1e-20 4 -O22851 O22851 SMALL NUCLEAR RIBONUCLEOPROTEIN ISO 83.9 1.2e-20 1 -O00320 O00320 F25451_2. 83.9 1.2e-20 1 -RU17_XENLA P09406 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN 83.4 1.7e-20 1 -Q94467 Q94467 SSRNA-BINDING PROTEIN. 83.3 1.7e-20 1 -O08831 O08831 SRP20 GENE. 82.8 2.5e-20 1 -X16_HUMAN P23152 PRE-MRNA SPLICING FACTOR SRP20 (X16 82.8 2.5e-20 1 -P91414 P91414 SIMILARITY TO RNA RECOGNITION MOTIF 82.5 3e-20 1 -Q42404 Q42404 U1 SNRNP 70K PROTEIN. 82.4 3.2e-20 1 -ROC_RAT P17132 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 81.7 5.2e-20 1 -Q55345 Q55345 RNA-BINDING PROTEIN. 81.7 5.4e-20 1 -YDB2_SCHPO Q10355 HYPOTHETICAL 24.4 KD PROTEIN C22E12 81.7 5.4e-20 1 -O18352 O18352 RNA BINDING PROTEIN. 81.5 6.2e-20 1 -Q18318 Q18318 SIMILAR TO RNA-BINDING PROTEIN. 81.4 6.7e-20 1 -P93396 P93396 TRANSFORMER-SR RIBONUCLEOPROTEIN (F 81.4 6.8e-20 1 -Q09511 Q09511 PROBABLE SPLICING FACTOR SC35 (PR26 80.8 9.8e-20 1 -Q15351 Q15351 SEB4B (FRAGMENT). 80.8 1e-19 1 -Q15350 Q15350 SEB4D (FRAGMENT). 80.8 1e-19 1 -YD3D_SCHPO Q10277 HYPOTHETICAL 59.1 KD PROTEIN C13G7. 80.7 1.1e-19 2 -O23866 O23866 MEI2-LIKE PROTEIN. 80.5 1.2e-19 2 -Q60399 Q60399 C23 NUCLEOLIN, GLYCINE RICH REGION 80.5 1.3e-19 1 -Q55342 Q55342 RNA-BINDING PROTEIN. 80.3 1.4e-19 1 -Q62176 Q62176 SEB4. 80.2 1.5e-19 1 -Q21832 Q21832 R07E5.14. 80.2 1.5e-19 1 -Q38915 Q38915 RNA-BINDING PROTEIN. 79.7 2.1e-19 2 -Q16629 Q16629 SPLICING FACTOR, ARGININE/SERINE-RI 79.6 2.4e-19 1 -RNP1_YEAST P32385 RIBONUCLEOPROTEIN-1. 79.0 3.5e-19 1 -YQOC_CAEEL Q09301 HYPOTHETICAL 21.6 KD PROTEIN EEED8. 79.0 3.6e-19 1 -Q16662 Q16662 SRP40-2. 78.8 3.9e-19 1 -P78795 P78795 FISSION YEAST (FRAGMENT). 78.8 4e-19 1 -YQO4_CAEEL Q09295 HYPOTHETICAL 26.2 KD PROTEIN EEED8. 78.5 5e-19 1 -Q55341 Q55341 21KDA RNA-BINDING PROTEIN, 12RNP1. 78.4 5.4e-19 1 -Q53321 Q53321 SS-DNA BINDING PROTEIN 12RNP1. 78.4 5.4e-19 1 -O14801 O14801 EUKARYOTIC TRANSLATION INITIATION F 78.0 7e-19 1 -EWS_MOUSE Q61545 RNA-BINDING PROTEIN EWS. 77.9 7.2e-19 1 -Q18409 Q18409 SIMILAR TO PRE-MRNA SPLICING FACTOR 77.9 7.7e-19 1 -O14875 O14875 RNA BINDING PROTEIN. 77.5 9.9e-19 1 -EWS_HUMAN Q01844 RNA-BINDING PROTEIN EWS. 77.4 1e-18 1 -O14327 O14327 RNA BINDING PROTEIN. 77.4 1.1e-18 1 -O13829 O13829 PUTATIVE SMALL NUCLEAR RIBONUCLEOPR 77.4 1.1e-18 1 -O00425 O00425 PUTATIVE RNA BINDING PROTEIN KOC (K 76.4 2.1e-18 2 -Q61954 Q61954 NEOSIN (FRAGMENT). 76.3 2.3e-18 2 -YNL0_YEAST P53927 HYPOTHETICAL 25.4 KD PROTEIN IN CYB 75.8 3.2e-18 1 -SSB1_YEAST P10080 SINGLE-STRANDED NUCLEIC ACID-BINDIN 75.6 3.6e-18 2 -O35935 O35935 POLY(A) BINDING PROTEIN II. 75.5 4e-18 1 -Q28165 Q28165 POLYA BINDING PROTEIN II. 75.5 4e-18 1 -Q99730 Q99730 TAT-SF1. 75.2 5e-18 2 -Q27926 Q27926 RNA BINDING PROTEIN. 75.1 5.3e-18 1 -Q16560 Q16560 U1-SNRNP BINDING PROTEIN HOMOLOG. 74.8 6.3e-18 1 -Q55765 Q55765 HYPOTHETICAL 16.6 KD PROTEIN. 74.8 6.5e-18 1 -O23146 O23146 HNRNP-LIKE PROTEIN. 74.1 1e-17 1 -O35335 O35335 RNA BINDING PROTEIN. 74.1 1e-17 1 -Q22030 Q22030 R74.5. 73.0 2.3e-17 1 -Q10572 Q10572 44.4 KD RNA-BINDING PROTEIN IN FOX- 72.7 2.8e-17 1 -Q22304 Q22304 SIMILAR TO C. ELEGANS PROTEIN R74.5 72.7 2.8e-17 1 -Q21155 Q21155 SIMILAR TO RNA BINDING PROTEINS. 72.6 2.9e-17 1 -Q22318 Q22318 T07F10.3. 72.1 4.2e-17 2 -Q93233 Q93233 C17E4.5. 72.0 4.3e-17 1 -Q02427 Q02427 RNA BINDING PROTEIN 1. 71.9 4.7e-17 1 -Q13244 Q13244 SRP55-2 PRE-MRNA SPLICING FACTOR. 71.8 5e-17 1 -Q15287 Q15287 RNA-BINDING PROTEIN. 71.4 6.7e-17 1 -Q13344 Q13344 FUS-LIKE PROTEIN (FRAGMENT). 71.3 7.4e-17 1 -FUS_HUMAN P35637 RNA-BINDING PROTEIN FUS/TLS. 71.3 7.4e-17 1 -FUS_BOVIN Q28009 RNA-BINDING PROTEIN FUS/TLS (NUCLEA 71.3 7.4e-17 1 -Q62826 Q62826 M4 PROTEIN HOMOLOG. 71.3 7.5e-17 1 -Q24491 Q24491 RNA BINDING PROTEIN. 70.8 1.1e-16 1 -O04432 O04432 GLYCINE-RICH PROTEIN. 70.3 1.5e-16 1 -Q08208 Q08208 CHROMOSOME XV READING FRAME ORF YOL 70.0 1.8e-16 1 -Q27294 Q27294 RNA BINDING PROTEIN CABEZA. 69.6 2.3e-16 1 -O14369 O14369 PUTATIVE RNA-BINDING PROTEIN. 68.5 5.2e-16 1 -YSO5_CAEEL Q10130 HYPOTHETICAL 98.0 KD PROTEIN F56D1. 66.9 1.5e-15 1 -ROC_HUMAN P07910 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 66.4 2.1e-15 1 -Q92751 Q92751 HTAFII68. 66.2 2.5e-15 1 -Q92804 Q92804 PUTATIVE RNA BINDING PROTEIN RBP56. 66.2 2.5e-15 1 -WHI3_YEAST P34761 WHI3 PROTEIN. 65.4 4.4e-15 1 -CB20_XENLA P52299 20 KD NUCLEAR CAP BINDING PROTEIN ( 65.3 4.5e-15 1 -CB20_HUMAN P52298 20 KD NUCLEAR CAP BINDING PROTEIN ( 64.1 1e-14 1 -O23131 O23131 CONTAINS PROCITE 'RNP1' PUTATIVE RN 64.0 1.1e-14 1 -Q21322 Q21322 SIMILAR TO U1 SMALL NUCLEAR RIBONUC 64.0 1.2e-14 2 -O42254 O42254 ZIPCODE-BINDING PROTEIN. 63.8 1.3e-14 2 -Q92909 Q92909 DAZLA. 63.6 1.5e-14 1 -Q92904 Q92904 RNA BINDING PROTEIN. 63.6 1.5e-14 1 -Q95192 Q95192 RNA-BINDING PROTEIN. 63.6 1.5e-14 1 -IF4B_HUMAN P23588 EUKARYOTIC TRANSLATION INITIATION F 63.5 1.6e-14 1 -Q93594 Q93594 F26A3.2. 63.4 1.8e-14 1 -Q19018 Q19018 MEC-8 GENE. 63.2 2e-14 2 -Q22039 Q22039 MEC-8 PROTEIN. 63.2 2e-14 2 -ROC_XENLA P19600 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 63.0 2.2e-14 1 -O15396 O15396 DAZLA PROTEIN. 62.1 4.4e-14 1 -O13674 O13674 HYPOTHETICAL 73.9 KD PROTEIN. 61.8 5.2e-14 3 -O23646 O23646 RSZP22 PROTEIN. 61.3 7.5e-14 1 -Q23287 Q23287 SIMILARITY TO 2 RNA RECOGNITION MOT 61.2 7.9e-14 1 -P90871 P90871 HYPOTHETICAL PROTEIN F39H2.2 IN CHR 61.1 8.7e-14 1 -O15042 O15042 KIAA0332 (FRAGMENT). 60.7 1.1e-13 1 -YSX2_CAEEL Q10021 HYPOTHETICAL 24.0 KD PROTEIN T28D9. 60.7 1.1e-13 1 -Q64368 Q64368 DAZ-LIKE AUTOSOMAL (RNA RECOGNITION 60.3 1.5e-13 1 -Q09331 Q09331 CSX1+ (FRAGMENT). 60.1 1.7e-13 1 -Q14151 Q14151 KIAA0138 PROTEIN. 60.1 1.7e-13 1 -Q22412 Q22412 T11G6.8. 60.0 1.8e-13 1 -Q09335 Q09335 CSX1+ (FRAGMENT). 59.9 2e-13 1 -Q64012 Q64012 MERC=RNA-BINDING PROTEIN {ALTERNATI 59.7 2.2e-13 1 -Q15056 Q15056 MRNA (KIAA0038) FOR ORF, PARTIAL CD 59.5 2.6e-13 1 -Q42215 Q42215 NAM8 PROTEIN (FRAGMENT). 59.5 2.6e-13 1 -Q09584 Q09584 HYPOTHETICAL 36.0 KD PROTEIN K04G7. 59.4 2.7e-13 1 -Q14924 Q14924 NCBP INTERACTING PROTEIN 1. 59.3 3e-13 1 -Q20414 Q20414 F44G4.4. 59.0 3.6e-13 2 -MLO3_SCHPO Q09330 MLO3 PROTEIN. 59.0 3.6e-13 1 -GRP1_SORVU Q99069 GLYCINE-RICH RNA-BINDING PROTEIN 1 58.9 3.9e-13 1 -RU17_YEAST Q00916 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN 58.9 4e-13 1 -O08583 O08583 TRANSCRIPTIONAL COACTIVATOR ALY (AL 58.7 4.6e-13 1 -Q14730 Q14730 LA 4.1 PROTEIN (FRAGMENT). 58.4 5.4e-13 1 -Q91017 Q91017 GIZZARD PTB-ASSOCIATED SPLICING FAC 58.2 6.2e-13 1 -LA_HUMAN P05455 LUPUS LA PROTEIN (SJOGREN SYNDROME 58.2 6.5e-13 1 -Q15367 Q15367 RIBONUCLEOPROTEIN (LA) (FRAGMENT). 58.2 6.5e-13 1 -Q08920 Q08920 CHROMOSOME XVI READING FRAME ORF YP 57.2 1.3e-12 1 -Q22135 Q22135 T04A8.6. 56.1 2.7e-12 1 -O23645 O23645 RSZP21 PROTEIN. 55.9 3.1e-12 1 -ARP2_PLAFA P13824 CLUSTERED-ASPARAGINE-RICH PROTEIN ( 55.6 3.9e-12 2 -P97855 P97855 RAS-GTPASE-ACTIVATING PROTEIN SH3-D 55.5 4e-12 1 -Q24207 Q24207 BOULE PROTEIN. 55.5 4.2e-12 1 -O14797 O14797 HRS (FRAGMENT). 55.4 4.5e-12 2 -Q62150 Q62150 RIBONUCLEIC ACID BINDING PROTEIN S1 55.2 4.9e-12 1 -Q13283 Q13283 GAP SH3 BINDING PROTEIN. 54.2 1e-11 1 -Q22708 Q22708 ZK1067.6 (FRAGMENT). 54.1 1.1e-11 2 -Q14136 Q14136 KIAA0122 PROTEIN (FRAGMENT). 54.1 1.1e-11 2 -Q04067 Q04067 D9461.16P. 54.0 1.2e-11 1 -Q62379 Q62379 U2-SNRNP B'' (PRNP31) (FRAGMENT). 53.7 1.5e-11 1 -Q18724 Q18724 HYPOTHETICAL PROTEIN C50B8.1. 53.3 1.9e-11 1 -ROL_HUMAN P14866 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 52.7 2.8e-11 3 -Q15424 Q15424 SCAFFOLD ATTACHMENT FACTOR (FRAGMEN 52.3 3.7e-11 1 -P97379 P97379 RAS-GTPASE-ACTIVATING PROTEIN SH3-D 52.2 4e-11 1 -SRP1_SCHPO Q10193 SRP1 PROTEIN. 51.1 8.9e-11 1 -RDP_MOUSE P19426 RD PROTEIN (WL623). 50.2 1.7e-10 1 -RDP_HUMAN P18615 RD PROTEIN. 50.2 1.7e-10 1 -LA_BOVIN P10881 LUPUS LA PROTEIN HOMOLOG (SJOGREN S 49.8 2.1e-10 1 -LA_RAT P38656 LUPUS LA PROTEIN HOMOLOG (SJOGREN S 49.8 2.2e-10 1 -LU15_HUMAN P52756 PUTATIVE TUMOR SUPPRESSOR LUCA15. 49.5 2.7e-10 2 -O15237 O15237 MSSP-2 (FRAGMENT). 49.4 2.8e-10 1 -O15236 O15236 MSSP-2 (FRAGMENT). 49.4 2.8e-10 1 -O14102 O14102 SPLICOSOME ASSOIATED PROTEIN (FRAGM 49.3 3.1e-10 1 -Q62378 Q62378 U2-SNRNP B'' (PRNP11) (FRAGMENT). 49.1 3.4e-10 1 -Q15380 Q15380 Y-CHROMOSOME RNA RECOGNITION MOTIF 48.9 3.9e-10 1 -U2AG_HUMAN Q01081 SPLICING FACTOR U2AF 35 KD SUBUNIT 48.7 4.6e-10 1 -YFK2_YEAST P43607 HYPOTHETICAL 31.9 KD PROTEIN IN RPL 48.5 5.4e-10 1 -IF32_YEAST P06103 EUKARYOTIC TRANSLATION INITIATION F 48.0 7.6e-10 1 -LA_MOUSE P32067 LUPUS LA PROTEIN HOMOLOG (SJOGREN S 47.6 9.8e-10 1 -CPO_DROME Q01617 PUTATIVE COUCH POTATO PROTEIN. 47.3 1.2e-09 1 -Q17175 Q17175 50KDA LECTIN. 46.6 2e-09 1 -Q13117 Q13117 PUTATIVE RNA BINDING DAZ PROTEIN. 46.1 2.8e-09 1 -O13801 O13801 HYPOTHETICAL 66.4 KD PROTEIN. 45.5 4.3e-09 1 -RU1A_YEAST P32605 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN 45.3 4.9e-09 1 -Q12159 Q12159 RNA ANNEALING PROTEIN YRA1P. 45.2 5.2e-09 1 -NAB3_YEAST P38996 NUCLEAR POLYADENYLATED RNA-BINDING 44.7 7.5e-09 1 -Q08925 Q08925 CHROMOSOME XVI READING FRAME ORF YP 44.1 1.1e-08 4 -Q18601 Q18601 SIMILAR TO HETEROGENEOUS RIBONUCLEO 44.0 1.2e-08 2 -Q63627 Q63627 CTD-BINDING SR-LIKE PROTEIN RA4 (FR 44.0 1.2e-08 1 -O15758 O15758 RNA BINDING PROTEIN. 43.9 1.3e-08 2 -MEI2_SCHPO P08965 MEI2 PROTEIN. 43.8 1.4e-08 1 -Q23161 Q23161 W04D2.6 (FRAGMENT). 43.3 2e-08 1 -O13649 O13649 SPLICEOSOMAL PROTEIN. 43.0 2.4e-08 1 -P90797 P90797 HYPOTHETICAL PROTEIN D2089.1 (FRAGM 42.9 2.5e-08 1 -Q07655 Q07655 CHROMOSOME IV READING FRAME ORF YDL 42.8 2.7e-08 1 -O15759 O15759 RNA BINDING PROTEIN. 42.8 2.7e-08 2 -RN12_YEAST P32843 RNA12 PROTEIN. 42.8 2.8e-08 1 -Q07034 Q07034 RNA BINDING PROTEIN. 42.3 3.9e-08 1 -Q24375 Q24375 LA RIBONUCLEOPROTEIN. 40.7 1.1e-07 1 -O18219 O18219 Y57G11A.5. 40.6 1.2e-07 1 -Q15686 Q15686 HU1-70K-LIKE PROTEIN (216 AA) (FRAG 40.4 1.5e-07 1 -Q15364 Q15364 RIBONUCLEOPROTEIN ANTIGEN. 40.4 1.5e-07 1 -D111_ARATH P42698 DNA-DAMAGE-REPAIR/TOLERATION PROTEI 40.4 1.5e-07 1 -LAB_XENLA P28049 LUPUS LA PROTEIN HOMOLOG B. 39.8 2.2e-07 1 -LA_DROME P40796 LA PROTEIN HOMOLOG. 38.3 6e-07 1 -YAG3_SCHPO Q09868 HYPOTHETICAL 62.1 KD PROTEIN C12G12 38.2 6.8e-07 1 -LAA_XENLA P28048 LUPUS LA PROTEIN HOMOLOG A. 37.7 9.1e-07 1 -Q92516 Q92516 WS-1/TYPE2. 37.5 1.1e-06 1 -Q92517 Q92517 WS-1/TYPE3. 37.5 1.1e-06 1 -Q93062 Q93062 WS-1/TYPE4. 37.5 1.1e-06 1 -Q63623 Q63623 CTD-BINDING SR-LIKE PROTEIN RA8. 37.2 1.3e-06 1 -Q10667 Q10667 RNA-BINDING PROTEIN RNP-1. 36.7 1.9e-06 1 -YN26_YEAST P53830 HYPOTHETICAL 32.3 KD PROTEIN IN SEC 36.0 3.1e-06 2 -P92204 P92204 ANON-66DA PROTEIN. 35.8 3.5e-06 1 -Q93733 Q93733 C17E4.11 (FRAGMENT). 35.5 4.5e-06 1 -MAT3_RAT P43244 MATRIN 3. 35.4 4.7e-06 2 -O35833 O35833 MATRIN 3. 35.4 4.7e-06 2 -P87216 P87216 VIP1 PROTEIN. 35.2 5.2e-06 1 -Q16630 Q16630 HPBRII-4 MRNA. 34.8 7.2e-06 1 -P87126 P87126 HYPOTHETICAL 46.4 KD PROTEIN. 34.4 8.9e-06 1 -Q18265 Q18265 SIMILAR TO NUCLEOLIN. 34.4 9.2e-06 1 -YAS9_SCHPO Q10145 HYPOTHETICAL 82.4 KD PROTEIN C3H8.0 34.2 1.1e-05 1 -YQOA_CAEEL Q09299 HYPOTHETICAL 76.5 KD PROTEIN EEED8. 33.1 2.4e-05 1 -O01806 O01806 SIMILARITY TO AN RNA RECOGNITION MO 32.9 2.5e-05 1 -YIS9_YEAST P40567 HYPOTHETICAL 12.8 KD PROTEIN IN PRI 32.9 2.7e-05 1 -Q18220 Q18220 COSMID C26E6. 32.3 4e-05 1 -Q18219 Q18219 COSMID C26E6. 32.3 4e-05 1 -ROU2_HUMAN P07029 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 31.8 5.6e-05 1 -Q60745 Q60745 RIBONUCLEOPROTEIN (FRAGMENT). 31.5 6.9e-05 2 -IF32_HUMAN P55884 EUKARYOTIC TRANSLATION INITIATION F 31.4 7.4e-05 1 -Q41988 Q41988 GLYCINE-RICH RNA-BINDING PROTEIN (F 31.0 9.7e-05 1 -YBF1_YEAST P34217 HYPOTHETICAL 73.8 KD PROTEIN IN SAS 30.9 0.0001 1 -U2AG_DROME Q94535 SPLICING FACTOR U2AF 35 KD SUBUNIT 29.0 0.00038 1 -U2AG_SCHPO Q09176 SPLICING FACTOR U2AF 23 KD SUBUNIT 28.9 0.00043 1 -IF32_SCHPO Q10425 PROBABLE EUKARYOTIC TRANSLATION INI 28.3 0.00063 1 -PRT1_PICAN P12806 PUTATIVE PRT1 PROTEIN. 28.2 0.00068 1 -P70501 P70501 S1-1 PROTEIN. 27.6 0.0011 2 -P87058 P87058 SLA1P. 27.4 0.0012 1 -ARP_YEAST P32770 ARP PROTEIN. 27.3 0.0012 1 -O13362 O13362 LA AUTOANTIGEN HOMOLOG. 27.3 0.0013 1 -MAT3_HUMAN P43243 MATRIN 3 (FRAGMENT). 27.0 0.0015 1 -O04554 O04554 T7N9.7. 26.9 0.0016 1 -Q26457 Q26457 LA AUTOANTIGEN HOMOLOG. 26.2 0.0026 1 -O15047 O15047 KIAA0339. 25.8 0.0035 1 -NOT4_YEAST P34909 GENERAL NEGATIVE REGULATOR OF TRANS 25.7 0.0039 1 -JSN1_YEAST P47135 JSN1 PROTEIN. 24.6 0.0081 1 -YAC4_SCHPO Q09818 PUTATIVE GENERAL NEGATIVE REGULATOR 24.6 0.0083 1 -P97343 P97343 PROTEIN KINASE. 24.4 0.0094 1 -Q63285 Q63285 KIS PROTEIN (PAM COOH-TERMINAL INTE 24.4 0.0094 1 -O18254 O18254 Y57G11C.9. 24.3 0.0099 1 -YLF1_CAEEL Q03571 HYPOTHETICAL 42.4 KD PROTEIN C40H1. 24.3 0.01 2 -Q18717 Q18717 SIMILAR TO S. CEREVISIAE GENERAL NE 23.6 0.016 1 -YKV4_YEAST P36036 HYPOTHETICAL 23.8 KD PROTEIN IN URA 23.0 0.02 1 -Q08287 Q08287 CHROMOSOME XV READING FRAME ORF YOL 22.5 0.022 1 -IF4B_YEAST P34167 EUKARYOTIC TRANSLATION INITIATION F 22.4 0.023 1 -Q05519 Q05519 ARGININE-RICH 54 KD NUCLEAR PROTEIN 22.3 0.023 1 -Q26273 Q26273 RNA RECOGNITION MOTIF-TYPE RNA-BIND 22.3 0.023 1 -Q14966 Q14966 NUCLEAR PROTEIN, NP220. 22.2 0.024 1 -P87143 P87143 HYPOTHETICAL 64.4 KD PROTEIN. 21.9 0.025 2 -Q20966 Q20966 F58B3.7. 21.4 0.028 1 -Q24433 Q24433 OVARIAN PROTEIN. 21.3 0.029 1 -P91156 P91156 SIMILARITY TO HUMAN HETEROGENEOUS N 21.1 0.03 1 -Q04142 Q04142 HYPOTHETICAL PROTEIN (FRAGMENT). 20.6 0.035 1 -Q12221 Q12221 HYPOTHETICAL 119.5 KD PROTEIN YPR03 20.6 0.035 1 -Q93021 Q93021 PUTATIVE TUMOR SUPPRESSOR. 19.5 0.044 1 -NRD1_YEAST P53617 NRD1 PROTEIN. 18.9 0.051 1 -Q93237 Q93237 C17E4.11 (FRAGMENT). 18.4 0.057 1 -O01159 O01159 D2089.1 (FRAGMENT). 16.8 0.083 1 -YAX9_SCHPO Q10200 HYPOTHETICAL 57.1 KD PROTEIN C13F4. 16.7 0.084 1 -Q61464 Q61464 NUCLEAR PROTEIN, NP220. 16.1 0.098 2 -Q21351 Q21351 K08F4.2. 15.9 0.1 1 -LAH1_YEAST P33399 LA PROTEIN HOMOLOG. 15.8 0.1 1 -Q06477 Q06477 INTERFERON RESPONSE ELEMENT-BINDING 14.5 0.14 1 -U2R1_HUMAN Q15695 U2 SMALL NUCLEAR RIBONUCLEOPROTEIN 14.1 0.15 1 -U2R2_HUMAN Q15696 U2 SMALL NUCLEAR RIBONUCLEOPROTEIN 14.1 0.15 1 -U2R2_MOUSE Q62377 U2 SMALL NUCLEAR RIBONUCLEOPROTEIN 13.4 0.18 1 -YN8T_YEAST P53741 HYPOTHETICAL 57.7 KD PROTEIN IN LYS 13.2 0.19 1 -O35404 O35404 SYNAPTOJANIN 2 (FRAGMENT). 13.1 0.19 1 -U2R1_MOUSE Q64707 U2 SMALL NUCLEAR RIBONUCLEOPROTEIN 13.1 0.2 1 -Q23391 Q23391 ZK1067.6 (FRAGMENT). 12.9 0.2 1 -Q10458 Q10458 SLA1 (FRAGMENT). 12.6 0.22 1 -P78332 P78332 G16 PROTEIN (FRAGMENT). 12.1 0.24 1 -Q18937 Q18937 HYPOTHETICAL PROTEIN D1046.1. 12.0 0.25 1 -O23612 O23612 HYPOTHETICAL 34.5 KD PROTEIN. 11.5 0.28 1 -Q23953 Q23953 D34 IMMUNODOMINANT ANTIGEN. 11.5 0.28 1 -Q26548 Q26548 CYCLOPHYLIN-LIKE PROTEIN TRANS-SPLI 11.2 0.3 1 -O15056 O15056 KIAA0348. 11.1 0.31 1 -P70166 P70166 CYTOPLASMIC POLYADENYLATION ELEMENT 10.6 0.35 1 -BF41_MOUSE P28659 BRAIN PROTEIN F41. 10.6 0.35 1 -Q24527 Q24527 MRNA SMOOTH FOR POLYPEPTIDE (HOMOLO 10.5 0.35 1 -O18964 O18964 SYNAPTOJANIN. 10.2 0.38 1 -Q62504 Q62504 COCHLEAR MRNA (CLONE 28D2) (FRAGMEN 9.2 0.48 1 -Q91572 Q91572 CYTOPLASMIC POLYADENYLATION ELEMENT 9.1 0.49 2 -YQO1_CAEEL Q09293 HYPOTHETICAL 69.9 KD PROTEIN EEED8. 8.8 0.53 1 -Q17561 Q17561 C01F6.5. 8.7 0.54 1 -YHS7_YEAST P38833 HYPOTHETICAL 27.1 KD PROTEIN IN NDT 8.6 0.55 1 -Q14206 Q14206 ZAKI-4 MRNA IN HUMAN SKIN FIBROBLAS 8.6 0.55 1 -O04526 O04526 F20P5.8. 7.9 0.65 1 -Q17860 Q17860 SIMILAR TO DIACYLGLYCEROL KINASE. 7.3 0.75 1 -Q92615 Q92615 MYELOBLAST KIAA0217 (FRAGMENT). 7.3 0.75 1 -O22794 O22794 PUTATIVE SPLICING FACTOR U2AF LARGE 6.6 0.87 2 -YMC7_CAEEL P53806 HYPOTHETICAL 26.6 KD PROTEIN F54E7. 5.6 1.1 1 -Q12046 Q12046 CHROMOSOME IV READING FRAME ORF YDL 5.1 1.2 1 -Q07623 Q07623 CHROMOSOME IV READING FRAME ORF YDL 5.1 1.2 1 -Q10954 Q10954 HYPOTHETICAL 78.8 KD PROTEIN B0336. 4.9 1.3 1 -Q26276 Q26276 RNA RECOGNITION MOTIF-TYPE RNA-BIND 4.4 1.5 1 -O01835 O01835 SIMILARITY TO XENOPUS CYTOPLASMIC P 4.2 1.5 1 -O01691 O01691 SIMILAR TO A HUMAN PUTATIVE TUMOR S 4.0 1.6 1 -Q23452 Q23452 F07A11.6 (FRAGMENT). 3.9 1.6 1 -ROAB_ARTSA P80350 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO 3.8 1.7 1 -YN8V_YEAST P53743 HYPOTHETICAL 36.4 KD PROTEIN IN POP 3.2 1.9 1 -Q26279 Q26279 RNA RECOGNITION MOTIF-TYPE RNA-BIND 2.9 2.1 1 -Q18317 Q18317 SIMILAR TO C. ELEGANS PROTEIN C40H1 2.6 2.2 1 -MUD2_YEAST P36084 SPLICING FACTOR MUD2. 1.9 2.6 1 -Q21559 Q21559 M18.7. 1.2 3.1 1 -O29092 O29092 ACYL-COA DEHYDROGENASE, SHORT CHAIN 0.9 3.3 1 -YG3Q_YEAST P39927 HYPOTHETICAL 47.0 KD PROTEIN IN CYS -0.2 4.3 1 -P70221 P70221 ORF2 PRODUCT (FRAGMENT). -0.3 4.3 1 -Y051_NPVAC P41455 HYPOTHETICAL 37.5 KD PROTEIN IN LEF -0.9 5 1 -YHR9_YEAST P38827 HYPOTHETICAL 123.9 KD PROTEIN IN OR -1.2 5.4 1 -O35847 O35847 ADAPT78. -2.7 7.6 1 -HIPO_CAMJE P45493 HIPPURATE HYDROLASE (EC 3.5.1.32) ( -3.7 9.4 1 -Q60701 Q60701 SPLICING FACTOR, ARGININE/SERINE-RI -3.7 9.5 1 -Q24424 Q24424 RNA BINDING PROTEIN (FRAGMENT). -3.9 10 1 -BLSA_HUMAN Q02832 B-LYMPHOCYTE ANTIGEN PRECURSOR (B-L -4.2 11 1 -XE7_HUMAN Q02040 PROTEIN XE7. -4.2 11 1 -YAQ2_SCHPO Q10103 HYPOTHETICAL 108.7 KD PROTEIN C18G6 -6.5 18 1 -O00583 O00583 DOWN SYNDROME CRITICAL REGION 1 PRO -6.7 19 1 -O13838 O13838 HYPOTHETICAL 40.3 KD PROTEIN. -7.5 23 1 -O05954 O05954 UDP-MURNAC-TRIPEPTIDE SYNTHETASE. -7.6 23 1 -YM28_YEAST Q03790 HYPOTHETICAL 52.6 KD PROTEIN IN IMP -8.1 26 1 -Q19944 Q19944 F31F6.3. -8.7 30 1 -Q26274 Q26274 RNA RECOGNITION MOTIF-TYPE RNA-BIND -8.9 32 1 -O01886 O01886 SIMILARITY TO THE PEPTIDASE FAMILY -9.2 34 1 -Q19164 Q19164 HYPOTHETICAL PROTEIN F07D3.3. -9.2 34 1 -ASM4_YEAST Q05166 ASM4 PROTEIN. -9.5 37 1 -Y117_HUMAN P42696 HYPOTHETICAL PROTEIN KIAA0117 (HAL8 -10.0 41 1 -O00582 O00582 DOWN SYNDROME CRITICAL REGION 1 PRO -10.0 41 1 -O28580 O28580 PHOSPHORIBOSYLFORMYLGLYCINAMIDINE C -10.9 50 1 -O35309 O35309 NMI. -11.2 54 1 -O29837 O29837 SIGNAL-TRANSDUCING HISTIDINE KINASE -11.7 61 1 -Q47952 Q47952 PRE-HGBA PRECURSOR. -12.8 78 1 -Q08646 Q08646 CHROMOSOME XV READING FRAME ORF YOR -13.0 82 1 -Q47957 Q47957 HEMOGLOBIN-BINDING PROTEIN. -13.1 84 1 -Q58954 Q58954 HYPOTHETICAL 21.3 KD PROTEIN 1559. -13.2 86 1 -Q26278 Q26278 RNA RECOGNITION MOTIF-TYPE RNA-BIND -13.5 92 1 -Q13287 Q13287 HOU. -14.9 1.3e+02 1 -TKTC_METJA Q58092 PUTATIVE TRANSKETOLASE C-TERMINAL S -15.0 1.3e+02 1 -PR06_YEAST P19735 PRE-MRNA SPLICING FACTOR PRP6. -15.1 1.3e+02 1 -KHK_HUMAN P50053 KETOHEXOKINASE (EC 2.7.1.3) (HEPATI -15.5 1.5e+02 1 -PUR5_METJA Q57656 PROBABLE PHOSPHORIBOSYLFORMYLGLYCIN -15.8 1.6e+02 1 -O31824 O31824 YNGD PROTEIN. -15.9 1.6e+02 1 -Q92518 Q92518 WS-1/TYPE5 (FRAGMENT). -16.0 1.6e+02 1 -YD33_SCHPO Q10267 HYPOTHETICAL 30.9 KD PROTEIN C13G7. -16.1 1.7e+02 1 -Q09135 Q09135 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO -16.1 1.7e+02 1 -O30057 O30057 HYPOTHETICAL 32.2 KD PROTEIN. -16.3 1.8e+02 1 -Q42378 Q42378 U1 SNRNP 70K TRUNCATED PROTEIN. -16.7 1.9e+02 1 -Q42482 Q42482 X P.DELTOIDES HYBRID WOUND RESPONSI -16.8 2e+02 1 -O35002 O35002 PUTATIVE PROTEASE. -17.0 2.1e+02 1 -PGDS_RAT P20786 ALPHA PLATELET-DERIVED GROWTH FACTO -17.1 2.1e+02 1 -YY08_METJA Q60307 HYPOTHETICAL PROTEIN MJECS08. -17.2 2.2e+02 1 -Q48827 Q48827 MAJOR OUTER MEMBRANE PROTEIN PRECUR -17.8 2.5e+02 1 -Q23637 Q23637 ZK856.3. -17.9 2.5e+02 1 -Q48639 Q48639 BGLR. -17.9 2.5e+02 1 -VJ01_VACCC P21032 PROTEIN J1. -18.3 2.8e+02 1 -HBA_ARAAR P01996 HEMOGLOBIN ALPHA-A CHAIN. -18.4 2.8e+02 1 -YAB9_SCHPO Q09809 HYPOTHETICAL 90.9 KD PROTEIN C2G11. -18.4 2.8e+02 1 -P94393 P94393 HOMOLOGUE OF HYPOTHETICAL PROTEIN H -18.7 3.1e+02 1 -Q26271 Q26271 RNA RECOGNITION MOTIF-TYPE RNA-BIND -18.9 3.2e+02 1 -Q96423 Q96423 CYTOCHROME P450. -18.9 3.2e+02 1 -O00373 O00373 L1 ELEMENT L1.24 P40. -19.3 3.5e+02 1 -YHB0_YEAST P38748 HYPOTHETICAL 67.5 KD PROTEIN IN PRP -19.4 3.6e+02 1 -VJ01_VACCV P07616 PROTEIN J1 (PROTEIN F7). -19.4 3.6e+02 1 -SYH_METJA Q58406 HISTIDYL-TRNA SYNTHETASE (EC 6.1.1. -19.8 3.9e+02 1 -Q12452 Q12452 ORF YLR100W. -20.0 4.1e+02 1 -Q46102 Q46102 CDTC. -20.2 4.3e+02 1 -O17002 O17002 T23B12.7 PROTEIN. -20.3 4.4e+02 1 -Q19942 Q19942 F31F6.1. -20.3 4.5e+02 1 -PHYA_SOLTU P30733 PHYTOCHROME A. -20.4 4.5e+02 1 -P75023 P75023 CARBOXYL-TERMINAL PROTEASE. -20.4 4.5e+02 1 -SYF_METJA Q57911 PROBABLE PHENYLALANYL-TRNA SYNTHETA -20.4 4.6e+02 1 -Y447_METJA Q57889 HYPOTHETICAL PROTEIN MJ0447. -20.6 4.7e+02 1 -Y383_METJA Q57828 HYPOTHETICAL PROTEIN MJ0383. -20.6 4.8e+02 1 -Q94172 Q94172 SIMILAR TO EF-HAND CALCIUM BINDING -20.8 5e+02 1 -CHLL_CHLRE Q00469 PROTOCHLOROPHYLLIDE REDUCTASE IRON- -20.9 5e+02 1 -Q61283 Q61283 ALPHA-1 ANTITRYPSIN 1-2 PRECURSOR ( -20.9 5.1e+02 1 -A1A2_MOUSE P22599 ALPHA-1 ANTITRYPSIN 2 PRECURSOR (AL -20.9 5.1e+02 1 -Q00898 Q00898 ALPHA-1 ANTITRYPSIN 1-5 PRECURSOR ( -20.9 5.1e+02 1 -Q00897 Q00897 ALPHA-1 ANTITRYPSIN 1-4 PRECURSOR ( -20.9 5.1e+02 1 -Q85381 Q85381 HOMOLOG OF VACCINIA VIRUS CDS J1R. -21.0 5.2e+02 1 -VJ01_VARV P33004 PROTEIN J1. -21.0 5.2e+02 1 -O34784 O34784 DNA-BINDING PROTEIN. -21.0 5.2e+02 1 -MENE_HAEIN P44565 O-SUCCINYLBENZOIC ACID--COA LIGASE -21.1 5.3e+02 1 -T2C2_CHVP1 P31117 TYPE II RESTRICTION ENZYME CVIAII ( -21.2 5.4e+02 1 -CARA_BACSU P25993 CARBAMOYL-PHOSPHATE SYNTHASE, PYRIM -21.2 5.4e+02 1 -SFCA_ECOLI P26616 PROBABLE MALATE OXIDOREDUCTASE (NAD -21.2 5.5e+02 1 -Q23267 Q23267 PROBABLE CARBOXYLESTERASE ZC376.3 I -21.5 5.8e+02 1 -DDLA_ECOLI P23844 D-ALANINE--D-ALANINE LIGASE A (EC 6 -21.5 5.8e+02 1 -KLP1_CHLRE P46870 KINESIN-LIKE PROTEIN KLP1. -21.5 5.8e+02 1 -Q26272 Q26272 RNA RECOGNITION MOTIF-TYPE RNA-BIND -21.7 6.1e+02 1 -CPC3_RABIT P00182 CYTOCHROME P450 IIC3 (EC 1.14.14.1) -22.0 6.6e+02 1 -O29409 O29409 CONSERVED HYPOTHETICAL PROTEIN. -22.0 6.6e+02 1 -O34925 O34925 PURINE NUCLEOSIDE PHOSPHORYLASE. -22.2 6.9e+02 1 -Q26281 Q26281 RNA RECOGNITION MOTIF-TYPE RNA-BIND -22.4 7.2e+02 1 -Q25988 Q25988 (CLONE PNM5) ORF (FRAGMENT). -22.4 7.2e+02 1 -META_ECOLI P07623 HOMOSERINE O-SUCCINYLTRANSFERASE (E -22.5 7.3e+02 1 -O04614 O04614 SIMILARITY TO NEBULIN. -22.8 7.8e+02 1 -Q51350 Q51350 PVDS. -22.8 7.9e+02 1 -P95425 P95425 PVDS. -22.8 7.9e+02 1 -P75431 P75431 TYPE 1 RESTRICTION ENZYME. -22.9 8e+02 1 -O00364 O00364 L1 ELEMENT L1.14 P40. -22.9 8e+02 1 -PRE1_STAAU P03857 PLASMID RECOMBINATION ENZYME (MOBIL -22.9 8.1e+02 1 -Q96515 Q96515 AR192. -23.0 8.2e+02 1 -O27761 O27761 PHOSPHATE-BINDING PROTEIN PSTS HOMO -23.1 8.4e+02 1 -Q51783 Q51783 IRON REGULATED TRANSCRIPTION ACTIVA -23.2 8.6e+02 1 -Q00896 Q00896 ALPHA-1 ANTITRYPSIN 1-3 PRECURSOR ( -23.4 9e+02 1 -A1A1_MOUSE P07758 ALPHA-1 ANTITRYPSIN 1 PRECURSOR (AL -23.4 9e+02 1 -Q20445 Q20445 F46B6.3. -23.5 9.3e+02 1 -YNC8_CAEEL P34541 HYPOTHETICAL 15.3 KD PROTEIN R05D3. -23.5 9.3e+02 1 -Q57721 Q57721 HYPOTHETICAL 12.0 KD PROTEIN 0273. -23.5 9.3e+02 1 -Y556_METJA Q57976 HYPOTHETICAL PROTEIN MJ0556. -23.7 9.7e+02 1 -O28372 O28372 LSU RIBOSOMAL PROTEIN L19E (RPL19E) -23.8 9.8e+02 1 -O00376 O00376 L1 ELEMENT L1.33 P40. -23.8 9.9e+02 1 -O00374 O00374 L1 ELEMENT L1.25 P40 AND PUTATIVE P -23.8 9.9e+02 1 -O00371 O00371 L1 ELEMENT L1.21 P40 AND PUTATIVE P -23.8 9.9e+02 1 -O00361 O00361 L1 ELEMENT L1.8 P40 AND PUTATIVE P1 -23.8 9.9e+02 1 -Q15605 Q15605 ORF1 CODES FOR A 40 KDA PRODUCT. -23.8 9.9e+02 1 -O00377 O00377 L1 ELEMENT L1.39 P40 AND PUTATIVE P -23.8 9.9e+02 1 -O00365 O00365 L1 ELEMENT L1.15 P40 AND PUTATIVE P -23.8 9.9e+02 1 -Q12880 Q12880 RETROTRANSPOSABLE L1 ELEMENT LRE2 F -23.8 9.9e+02 1 -O00369 O00369 L1 ELEMENT L1.20 P40 AND PUTATIVE P -23.8 9.9e+02 1 - -Parsed for domains: -Sequence Domain seq-f seq-t hmm-f hmm-t score E-value --------- ------- ----- ----- ----- ----- ----- ------- -Q91581 1/1 18 89 .. 1 77 [] 119.7 2e-31 -CST2_HUMAN 1/1 18 89 .. 1 77 [] 119.7 2e-31 -Q61413 1/1 8 79 .. 1 77 [] 114.2 8.9e-30 -Q14011 1/1 8 79 .. 1 77 [] 114.2 9.1e-30 -P93486 1/1 38 109 .. 1 77 [] 114.1 9.6e-30 -GRP_DAUCA 1/1 8 79 .. 1 77 [] 113.4 1.6e-29 -Q14498 2/3 252 323 .. 1 77 [] 113.3 1.7e-29 -Q14499 2/3 252 323 .. 1 77 [] 113.3 1.7e-29 -GR10_BRANA 1/1 8 79 .. 1 77 [] 113.0 2e-29 -ROG_HUMAN 1/1 10 81 .. 1 77 [] 113.0 2e-29 -Q15097 3/4 168 238 .. 1 77 [] 111.8 4.5e-29 -Q93004 3/4 193 263 .. 1 77 [] 111.8 4.5e-29 -PABP_MOUSE 3/4 193 263 .. 1 77 [] 111.2 7.2e-29 -O24106 1/1 8 79 .. 1 77 [] 110.9 8.9e-29 -Q40426 1/1 8 79 .. 1 77 [] 110.4 1.2e-28 -O04070 1/1 9 80 .. 1 77 [] 110.3 1.3e-28 -Q41518 1/1 8 79 .. 1 77 [] 110.3 1.3e-28 -O24601 1/1 10 81 .. 1 77 [] 110.2 1.4e-28 -O24188 1/1 39 110 .. 1 77 [] 110.2 1.4e-28 -Q39105 1/1 18 89 .. 1 77 [] 109.8 1.8e-28 -RO28_SPIOL 2/2 151 222 .. 1 77 [] 109.5 2.2e-28 -O22173 3/4 227 297 .. 1 77 [] 109.4 2.5e-28 -PAB2_ARATH 3/4 217 287 .. 1 77 [] 109.1 3e-28 -RNPL_HUMAN 1/1 8 79 .. 1 77 [] 108.8 3.6e-28 -GRP1_SINAL 1/1 10 81 .. 1 77 [] 108.6 4.4e-28 -Q40052 1/1 8 79 .. 1 77 [] 108.6 4.4e-28 -Q40270 2/2 207 278 .. 1 77 [] 108.1 5.9e-28 -Q15164 1/2 4 74 .. 1 77 [] 108.1 5.9e-28 -Q13310 3/4 193 263 .. 1 77 [] 108.1 5.9e-28 -O23793 1/1 40 111 .. 1 77 [] 108.1 6.1e-28 -Q40437 1/1 40 111 .. 1 77 [] 108.1 6.1e-28 -Q41834 2/2 221 292 .. 1 77 [] 108.0 6.3e-28 -GRP8_ARATH 1/1 8 79 .. 1 77 [] 107.9 7.1e-28 -GRP2_SINAL 1/1 10 81 .. 1 77 [] 107.8 7.3e-28 -TIA1_MOUSE 2/3 108 179 .. 1 77 [] 107.7 8.2e-28 -O24306 2/2 208 279 .. 1 77 [] 107.5 9.4e-28 -RO31_NICSY 2/2 232 303 .. 1 77 [] 107.5 9.4e-28 -P92871 2/2 232 303 .. 1 77 [] 107.4 9.7e-28 -Q39209 2/2 227 298 .. 1 77 [] 107.4 9.7e-28 -Q43350 2/2 231 302 .. 1 77 [] 107.4 9.7e-28 -RO31_ARATH 2/2 246 317 .. 1 77 [] 107.4 9.7e-28 -P93616 3/4 213 283 .. 1 77 [] 106.9 1.4e-27 -Q40425 1/1 8 79 .. 1 77 [] 106.7 1.6e-27 -GRP2_SORVU 1/1 10 81 .. 1 77 [] 106.5 1.8e-27 -P87135 3/4 263 333 .. 1 77 [] 106.5 1.9e-27 -PABP_SCHPO 3/4 249 319 .. 1 77 [] 106.5 1.9e-27 -RT19_ARATH 1/1 33 104 .. 1 77 [] 106.4 1.9e-27 -O35479 1/1 10 81 .. 1 77 [] 106.2 2.2e-27 -Q40427 1/1 8 79 .. 1 77 [] 106.0 2.6e-27 -Q41453 1/1 8 79 .. 1 77 [] 105.6 3.4e-27 -O24187 1/1 10 81 .. 1 77 [] 105.2 4.4e-27 -O22390 1/1 10 81 .. 1 77 [] 105.2 4.6e-27 -GRP7_ARATH 1/1 10 81 .. 1 77 [] 105.2 4.6e-27 -TIA1_HUMAN 2/3 97 168 .. 1 77 [] 104.9 5.5e-27 -Q08935 2/2 190 261 .. 1 77 [] 104.7 6.3e-27 -RO30_NICPL 2/2 196 267 .. 1 77 [] 104.7 6.3e-27 -Q39953 3/4 206 276 .. 1 77 [] 104.0 1e-26 -Q39061 1/2 118 189 .. 1 77 [] 103.7 1.3e-26 -Q39062 1/2 110 181 .. 1 77 [] 103.7 1.3e-26 -Q40436 1/1 42 113 .. 1 77 [] 103.7 1.3e-26 -GRPA_MAIZE 1/1 10 81 .. 1 77 [] 103.7 1.3e-26 -O22385 1/1 10 81 .. 1 77 [] 103.6 1.3e-26 -RO28_NICSY 2/2 193 264 .. 1 77 [] 103.6 1.4e-26 -GAR2_SCHPO 2/2 368 438 .. 1 77 [] 103.2 1.8e-26 -O13707 2/2 368 438 .. 1 77 [] 103.2 1.8e-26 -O23798 2/2 211 282 .. 1 77 [] 102.9 2.1e-26 -Q42412 1/1 8 79 .. 1 77 [] 102.8 2.3e-26 -YIS1_YEAST 1/1 66 136 .. 1 77 [] 102.7 2.5e-26 -O22653 1/1 9 80 .. 1 77 [] 102.4 3.2e-26 -P90699 1/1 5 76 .. 1 77 [] 102.1 3.9e-26 -Q15815 1/1 120 191 .. 1 77 [] 101.9 4.3e-26 -Q64283 1/1 120 191 .. 1 77 [] 101.9 4.3e-26 -RO33_NICSY 1/2 116 187 .. 1 77 [] 101.8 4.8e-26 -Q08948 1/2 111 182 .. 1 77 [] 101.8 4.8e-26 -Q08940 1/2 105 176 .. 1 77 [] 101.8 4.8e-26 -O22703 1/1 14 85 .. 1 77 [] 101.6 5.6e-26 -O22384 1/1 10 81 .. 1 77 [] 101.4 6.1e-26 -Q43472 1/1 8 79 .. 1 77 [] 101.4 6.4e-26 -Q62029 3/4 193 263 .. 1 77 [] 100.9 8.7e-26 -Q09959 2/3 177 248 .. 1 77 [] 100.8 9.7e-26 -Q15376 1/1 10 80 .. 1 77 [] 100.2 1.4e-25 -Q15414 1/1 10 80 .. 1 77 [] 100.2 1.4e-25 -Q24474 1/3 112 183 .. 1 77 [] 100.1 1.5e-25 -Q26293 1/3 112 183 .. 1 77 [] 100.1 1.5e-25 -Q24473 1/3 112 183 .. 1 77 [] 100.1 1.5e-25 -O24184 1/1 10 81 .. 1 77 [] 99.9 1.8e-25 -PABP_YEAST 2/4 127 197 .. 1 77 [] 99.8 1.9e-25 -Q92227 3/4 225 295 .. 1 77 [] 99.6 2.2e-25 -SC35_CHICK 1/1 16 87 .. 1 77 [] 99.6 2.2e-25 -Q62093 1/1 16 87 .. 1 77 [] 99.6 2.2e-25 -NSR1_YEAST 1/2 170 241 .. 1 77 [] 99.5 2.3e-25 -NSR1_YEAST 2/2 269 340 .. 1 77 [] 99.5 2.3e-25 -Q44555 1/1 3 74 .. 1 77 [] 99.5 2.3e-25 -PABP_HUMAN 1/4 13 84 .. 1 77 [] 99.4 2.6e-25 -Q93004 1/4 13 84 .. 1 77 [] 99.4 2.6e-25 -PABP_MOUSE 1/4 13 84 .. 1 77 [] 99.4 2.6e-25 -Q15415 1/1 10 80 .. 1 77 [] 99.3 2.6e-25 -TIA1_HUMAN 1/3 9 78 .. 1 77 [] 99.2 2.8e-25 -TIA1_MOUSE 1/3 9 78 .. 1 77 [] 99.2 2.8e-25 -Q13235 1/3 41 112 .. 1 77 [] 99.2 2.8e-25 -Q12926 1/3 41 112 .. 1 77 [] 99.2 2.8e-25 -Q60899 1/3 41 112 .. 1 77 [] 99.2 2.8e-25 -RO31_NICPL 2/2 210 281 .. 1 77 [] 98.9 3.5e-25 -RN15_YEAST 1/1 20 91 .. 1 77 [] 98.9 3.7e-25 -RO31_NICSY 1/2 138 209 .. 1 77 [] 98.8 3.8e-25 -Q40270 1/2 113 184 .. 1 77 [] 98.7 4e-25 -O17309 1/2 97 168 .. 1 77 [] 98.6 4.3e-25 -O17310 1/2 104 175 .. 1 77 [] 98.6 4.3e-25 -PABP_XENLA 1/4 13 84 .. 1 77 [] 98.5 4.7e-25 -TIAR_HUMAN 2/3 99 170 .. 1 77 [] 98.5 4.8e-25 -O15187 1/2 60 131 .. 1 77 [] 98.5 4.8e-25 -TIAR_MOUSE 2/3 116 187 .. 1 77 [] 98.5 4.8e-25 -PAB2_ARATH 2/4 126 196 .. 1 77 [] 98.3 5.4e-25 -Q08937 2/2 209 280 .. 1 77 [] 98.3 5.4e-25 -Q13310 1/4 13 84 .. 1 77 [] 98.3 5.5e-25 -Q91584 1/3 36 107 .. 1 77 [] 98.1 6.1e-25 -P79736 1/3 40 111 .. 1 77 [] 98.1 6.1e-25 -PES4_YEAST 1/4 93 164 .. 1 77 [] 97.8 7.8e-25 -ELAV_DROME 3/3 404 475 .. 1 77 [] 97.5 9.4e-25 -ELAV_DROVI 3/3 440 511 .. 1 77 [] 97.5 9.4e-25 -PAB2_ARATH 4/4 320 390 .. 1 77 [] 97.5 9.6e-25 -Q60900 1/3 41 112 .. 1 77 [] 97.3 1e-24 -Q16135 1/3 32 103 .. 1 77 [] 97.3 1e-24 -Q41367 1/2 40 111 .. 1 77 [] 96.5 1.9e-24 -RO28_NICSY 1/2 99 170 .. 1 77 [] 96.4 2e-24 -SQD_DROME 1/2 58 128 .. 1 77 [] 96.3 2.1e-24 -RO28_SPIOL 1/2 57 128 .. 1 77 [] 96.2 2.3e-24 -HUD_HUMAN 1/3 48 119 .. 1 77 [] 96.2 2.3e-24 -HUD_MOUSE 1/3 53 124 .. 1 77 [] 96.2 2.3e-24 -HUD_RAT 1/3 41 112 .. 1 77 [] 96.2 2.3e-24 -Q91585 1/3 48 119 .. 1 77 [] 96.2 2.3e-24 -Q90409 1/3 46 117 .. 1 77 [] 96.2 2.3e-24 -PUB1_YEAST 2/3 163 234 .. 1 77 [] 95.7 3.2e-24 -P93616 4/4 316 386 .. 1 77 [] 95.6 3.6e-24 -P92871 1/2 138 209 .. 1 77 [] 95.4 4.1e-24 -RO31_ARATH 1/2 152 223 .. 1 77 [] 95.4 4.1e-24 -Q39209 1/2 133 204 .. 1 77 [] 95.4 4.1e-24 -Q43350 1/2 137 208 .. 1 77 [] 95.4 4.1e-24 -PABP_XENLA 3/4 193 263 .. 1 77 [] 95.3 4.2e-24 -PABP_DROME 3/4 183 254 .. 1 77 [] 95.3 4.4e-24 -Q24668 1/2 121 192 .. 1 77 [] 95.2 4.5e-24 -SXLF_DROME 1/2 127 198 .. 1 77 [] 95.2 4.5e-24 -Q99141 1/2 119 190 .. 1 77 [] 95.2 4.5e-24 -Q39568 1/2 13 83 .. 1 77 [] 95.1 4.9e-24 -O25501 1/1 4 74 .. 1 77 [] 95.0 5.3e-24 -Q19706 1/1 178 249 .. 1 77 [] 95.0 5.3e-24 -Q13235 3/3 265 336 .. 1 77 [] 94.8 6e-24 -Q12926 3/3 278 349 .. 1 77 [] 94.8 6e-24 -PABP_YEAST 3/4 220 290 .. 1 77 [] 94.8 6.2e-24 -Q43349 2/2 251 322 .. 1 77 [] 94.6 7e-24 -Q43349 1/2 101 172 .. 1 77 [] 94.5 7.6e-24 -Q44560 1/1 3 74 .. 1 77 [] 94.5 7.7e-24 -Q13310 4/4 296 365 .. 1 77 [] 94.2 9.5e-24 -Q15164 2/2 107 176 .. 1 77 [] 94.2 9.5e-24 -Q14100 1/2 11 81 .. 1 77 [] 93.9 1.1e-23 -Q12771 1/2 78 148 .. 1 77 [] 93.9 1.1e-23 -Q01858 1/2 78 148 .. 1 77 [] 93.9 1.1e-23 -Q14101 1/2 99 169 .. 1 77 [] 93.9 1.1e-23 -Q60668 1/2 68 138 .. 1 77 [] 93.9 1.1e-23 -Q14103 1/2 99 169 .. 1 77 [] 93.9 1.1e-23 -Q14102 1/2 8 78 .. 1 77 [] 93.9 1.1e-23 -Q91584 3/3 267 338 .. 1 77 [] 93.9 1.2e-23 -P79736 3/3 264 335 .. 1 77 [] 93.9 1.2e-23 -Q90408 1/1 102 173 .. 1 77 [] 93.9 1.2e-23 -Q91583 3/3 308 379 .. 1 77 [] 93.8 1.2e-23 -Q60899 3/3 279 350 .. 1 77 [] 93.8 1.2e-23 -Q91903 3/3 308 379 .. 1 77 [] 93.8 1.2e-23 -Q08374 1/1 3 74 .. 1 77 [] 93.7 1.3e-23 -Q08935 1/2 89 160 .. 1 77 [] 93.7 1.3e-23 -Q08212 2/3 97 168 .. 1 77 [] 93.7 1.3e-23 -Q20084 1/3 44 115 .. 1 77 [] 93.6 1.4e-23 -Q13595 1/1 121 192 .. 1 77 [] 93.5 1.5e-23 -O22173 2/4 136 206 .. 1 77 [] 93.4 1.6e-23 -Q41367 2/2 137 208 .. 1 77 [] 93.3 1.7e-23 -Q93194 1/2 29 100 .. 1 77 [] 93.1 2e-23 -Q91585 3/3 285 356 .. 1 77 [] 93.1 2e-23 -Q90409 3/3 286 357 .. 1 77 [] 93.1 2e-23 -HUD_RAT 3/3 292 363 .. 1 77 [] 93.1 2e-23 -HUD_HUMAN 3/3 299 370 .. 1 77 [] 93.1 2e-23 -HUD_MOUSE 3/3 304 375 .. 1 77 [] 93.1 2e-23 -PAB5_ARATH 3/4 227 297 .. 1 77 [] 93.0 2.1e-23 -O01671 1/2 66 137 .. 1 77 [] 93.0 2.1e-23 -Q60990 1/1 10 81 .. 1 77 [] 92.9 2.3e-23 -O23798 1/2 117 188 .. 1 77 [] 92.9 2.3e-23 -TRA2_DROME 1/1 99 170 .. 1 77 [] 92.8 2.4e-23 -Q14576 1/3 41 112 .. 1 77 [] 92.8 2.4e-23 -Q41124 2/2 206 277 .. 1 77 [] 92.7 2.5e-23 -PABP_DROME 1/4 4 75 .. 1 77 [] 92.7 2.5e-23 -Q19579 4/4 345 415 .. 1 77 [] 92.6 2.9e-23 -Q19581 4/4 345 415 .. 1 77 [] 92.6 2.9e-23 -Q13310 2/4 101 170 .. 1 77 [] 92.5 3.1e-23 -NOP4_YEAST 1/4 28 98 .. 1 77 [] 92.4 3.2e-23 -P70807 1/1 3 74 .. 1 77 [] 92.3 3.4e-23 -Q24473 3/3 363 434 .. 1 77 [] 92.2 3.6e-23 -Q26293 3/3 363 434 .. 1 77 [] 92.2 3.6e-23 -Q24474 3/3 358 429 .. 1 77 [] 92.2 3.6e-23 -Q60901 2/2 194 265 .. 1 77 [] 92.2 3.7e-23 -Q60900 3/3 286 357 .. 1 77 [] 92.2 3.7e-23 -SP49_HUMAN 2/2 102 174 .. 1 77 [] 92.1 3.8e-23 -PAB2_ARATH 1/4 38 109 .. 1 77 [] 92.1 3.9e-23 -PABP_YEAST 4/4 323 393 .. 1 77 [] 92.0 4.2e-23 -ROA1_MOUSE 2/2 106 176 .. 1 77 [] 92.0 4.2e-23 -ROA1_BOVIN 2/2 106 176 .. 1 77 [] 92.0 4.2e-23 -ROA1_HUMAN 2/2 106 176 .. 1 77 [] 92.0 4.2e-23 -ROA1_RAT 2/2 106 176 .. 1 77 [] 92.0 4.2e-23 -P70372 1/3 22 93 .. 1 77 [] 91.9 4.7e-23 -PABP_XENLA 2/4 101 170 .. 1 77 [] 91.8 4.8e-23 -Q15097 2/4 76 145 .. 1 77 [] 91.8 5e-23 -PABP_MOUSE 2/4 101 170 .. 1 77 [] 91.8 5e-23 -PABP_HUMAN 2/4 101 170 .. 1 77 [] 91.8 5e-23 -Q93004 2/4 101 170 .. 1 77 [] 91.8 5e-23 -P73557 1/1 3 74 .. 1 77 [] 91.8 5e-23 -P87135 1/4 82 153 .. 1 77 [] 91.7 5.2e-23 -PABP_SCHPO 1/4 68 139 .. 1 77 [] 91.7 5.2e-23 -HRB1_YEAST 1/3 138 207 .. 1 77 [] 91.7 5.3e-23 -Q41834 1/2 127 198 .. 1 77 [] 91.7 5.4e-23 -Q62029 1/4 13 84 .. 1 77 [] 91.6 5.5e-23 -Q15717 1/3 22 93 .. 1 77 [] 91.5 6.1e-23 -Q99628 1/3 113 184 .. 1 77 [] 91.2 7.5e-23 -PABP_HUMAN 3/4 193 260 .. 1 77 [] 91.2 7.6e-23 -CABA_MOUSE 1/2 77 147 .. 1 77 [] 90.8 9.4e-23 -O35698 1/1 10 81 .. 1 77 [] 90.8 1e-22 -O22173 4/4 330 400 .. 1 77 [] 90.7 1e-22 -Q91582 3/3 246 317 .. 1 77 [] 90.6 1.1e-22 -O15414 1/1 275 346 .. 1 77 [] 90.5 1.2e-22 -Q91582 1/3 22 93 .. 1 77 [] 90.3 1.3e-22 -Q44556 1/1 3 74 .. 1 77 [] 90.3 1.4e-22 -PAB5_ARATH 4/4 330 400 .. 1 77 [] 90.3 1.4e-22 -Q14576 3/3 278 349 .. 1 77 [] 90.0 1.6e-22 -O02008 1/1 107 178 .. 1 77 [] 90.0 1.7e-22 -O02009 1/1 60 131 .. 1 77 [] 90.0 1.7e-22 -Q39675 1/2 114 185 .. 1 77 [] 89.9 1.8e-22 -Q62029 2/4 101 170 .. 1 77 [] 89.8 1.9e-22 -Q08937 1/2 89 160 .. 1 77 [] 89.8 2e-22 -Q23795 1/2 42 112 .. 1 77 [] 89.8 2e-22 -YDC1_SCHPO 1/1 103 174 .. 1 77 [] 89.7 2.1e-22 -O24306 1/2 111 182 .. 1 77 [] 89.7 2.1e-22 -PABP_SCHPO 2/4 156 226 .. 1 77 [] 89.6 2.2e-22 -P87135 2/4 170 240 .. 1 77 [] 89.6 2.2e-22 -RO31_NICPL 1/2 90 161 .. 1 77 [] 89.6 2.3e-22 -ROA1_MACMU 2/2 106 176 .. 1 77 [] 89.6 2.3e-22 -PABP_SCHPO 4/4 352 422 .. 1 77 [] 89.5 2.4e-22 -P87135 4/4 366 436 .. 1 77 [] 89.5 2.4e-22 -Q99729 1/2 70 140 .. 1 77 [] 89.4 2.6e-22 -Q04150 1/2 70 140 .. 1 77 [] 89.4 2.6e-22 -Q44554 1/1 3 74 .. 1 77 [] 89.3 2.8e-22 -NAM8_YEAST 2/3 165 237 .. 1 77 [] 89.3 2.8e-22 -SC35_HUMAN 1/1 16 87 .. 1 77 [] 89.1 3.1e-22 -YIS5_YEAST 1/1 33 104 .. 1 77 [] 88.9 3.6e-22 -PABP_DROME 2/4 92 162 .. 1 77 [] 88.7 4.2e-22 -Q17350 4/4 319 389 .. 1 77 [] 88.5 4.6e-22 -GBP2_YEAST 1/3 124 193 .. 1 77 [] 88.5 4.7e-22 -TIAR_HUMAN 1/3 11 80 .. 1 77 [] 88.4 5e-22 -Q41810 1/1 10 81 .. 1 77 [] 88.2 6.1e-22 -Q91583 1/3 68 140 .. 1 77 [] 88.1 6.5e-22 -ROA1_XENLA 2/2 107 177 .. 1 77 [] 87.8 8e-22 -Q46349 1/1 3 74 .. 1 77 [] 87.8 8e-22 -P93616 1/4 34 105 .. 1 77 [] 87.7 8.4e-22 -Q08948 2/2 214 285 .. 1 77 [] 87.6 8.7e-22 -Q15097 4/4 271 340 .. 1 77 [] 87.5 9.7e-22 -Q93004 4/4 296 365 .. 1 77 [] 87.5 9.7e-22 -PABP_MOUSE 4/4 296 365 .. 1 77 [] 87.5 9.7e-22 -PABP_HUMAN 4/4 293 362 .. 1 77 [] 87.5 9.7e-22 -Q91579 3/3 379 450 .. 1 77 [] 87.4 1e-21 -PAB5_ARATH 2/4 134 204 .. 1 77 [] 87.4 1e-21 -Q53322 1/1 2 73 .. 1 77 [] 87.2 1.1e-21 -Q15717 3/3 246 317 .. 1 77 [] 87.2 1.2e-21 -RO33_NICSY 2/2 219 290 .. 1 77 [] 87.0 1.3e-21 -Q55343 1/1 3 74 .. 1 77 [] 86.9 1.4e-21 -Q99377 1/1 105 176 .. 1 77 [] 86.9 1.5e-21 -Q62376 1/1 35 106 .. 1 77 [] 86.9 1.5e-21 -P78493 1/1 105 176 .. 1 77 [] 86.9 1.5e-21 -RU17_HUMAN 1/1 282 353 .. 1 77 [] 86.9 1.5e-21 -Q39062 2/2 213 284 .. 1 77 [] 86.7 1.6e-21 -Q39061 2/2 221 292 .. 1 77 [] 86.7 1.6e-21 -O13620 2/5 325 396 .. 1 77 [] 86.7 1.7e-21 -Q15584 2/2 495 564 .. 1 77 [] 86.5 1.9e-21 -ROM_HUMAN 3/3 655 724 .. 1 77 [] 86.5 1.9e-21 -GBP2_YEAST 2/3 221 291 .. 1 77 [] 86.5 1.9e-21 -Q91920 1/2 23 93 .. 1 77 [] 86.5 2e-21 -Q57014 1/1 3 74 .. 1 77 [] 86.1 2.5e-21 -Q39953 1/4 28 99 .. 1 77 [] 86.0 2.7e-21 -RU17_DROME 1/1 104 175 .. 1 77 [] 85.8 3.1e-21 -Q39953 2/4 116 186 .. 1 77 [] 85.8 3.2e-21 -RO30_NICPL 1/2 89 160 .. 1 77 [] 85.5 3.9e-21 -Q91903 1/3 68 140 .. 1 77 [] 85.5 3.9e-21 -SR55_DROME 1/2 5 68 .. 1 77 [] 85.5 4e-21 -Q24252 1/2 6 69 .. 1 77 [] 85.5 4e-21 -Q24534 2/2 121 193 .. 1 77 [] 85.4 4e-21 -PABP_YEAST 1/4 39 110 .. 1 77 [] 85.4 4e-21 -Q23796 1/2 6 69 .. 1 77 [] 85.4 4.1e-21 -PUB1_YEAST 1/3 76 146 .. 1 77 [] 85.4 4.1e-21 -NAB4_YEAST 1/2 161 230 .. 1 77 [] 85.4 4.2e-21 -ROA1_RAT 1/2 15 85 .. 1 77 [] 85.1 4.9e-21 -ROA1_BOVIN 1/2 15 85 .. 1 77 [] 85.1 4.9e-21 -ROA1_MOUSE 1/2 15 85 .. 1 77 [] 85.1 4.9e-21 -ROA1_HUMAN 1/2 15 85 .. 1 77 [] 85.1 4.9e-21 -Q90602 1/2 92 162 .. 1 77 [] 85.1 5e-21 -PAB5_ARATH 1/4 47 117 .. 1 77 [] 85.0 5.6e-21 -O13707 1/2 265 336 .. 1 77 [] 84.7 6.5e-21 -GAR2_SCHPO 1/2 265 336 .. 1 77 [] 84.7 6.5e-21 -Q13242 1/2 16 84 .. 1 77 [] 84.6 7.3e-21 -Q90407 2/2 165 236 .. 1 77 [] 84.5 7.7e-21 -Q90602 2/2 176 246 .. 1 77 [] 84.4 8e-21 -O04240 2/2 210 281 .. 1 77 [] 84.3 8.7e-21 -Q60668 2/2 153 225 .. 1 77 [] 84.3 9e-21 -Q92879 3/3 399 470 .. 1 77 [] 84.1 1e-20 -Q17350 2/4 122 192 .. 1 77 [] 84.0 1e-20 -O22851 1/1 60 131 .. 1 77 [] 83.9 1.2e-20 -O17310 2/2 190 263 .. 1 77 [] 83.9 1.2e-20 -O17309 2/2 183 256 .. 1 77 [] 83.9 1.2e-20 -O00320 1/1 242 313 .. 1 77 [] 83.9 1.2e-20 -YP85_CAEEL 2/2 102 174 .. 1 77 [] 83.8 1.2e-20 -Q17352 2/2 112 184 .. 1 77 [] 83.8 1.2e-20 -P93843 3/3 297 362 .. 1 77 [] 83.7 1.3e-20 -P70372 3/3 246 317 .. 1 77 [] 83.7 1.4e-20 -O22314 1/2 9 77 .. 1 77 [] 83.6 1.4e-20 -Q39201 1/2 9 77 .. 1 77 [] 83.6 1.4e-20 -O22315 1/2 9 77 .. 1 77 [] 83.6 1.4e-20 -Q61474 1/2 22 92 .. 1 77 [] 83.6 1.5e-20 -NGR1_YEAST 2/3 194 266 .. 1 77 [] 83.5 1.5e-20 -RO21_XENLA 2/2 102 172 .. 1 77 [] 83.5 1.6e-20 -RU17_XENLA 1/1 105 179 .. 1 77 [] 83.4 1.7e-20 -Q94467 1/1 202 272 .. 1 77 [] 83.3 1.7e-20 -CABA_MOUSE 2/2 161 231 .. 1 77 [] 83.3 1.7e-20 -Q90626 2/2 159 229 .. 1 77 [] 83.1 2e-20 -Q17350 1/4 34 105 .. 1 77 [] 83.0 2.2e-20 -PABP_XENLA 4/4 296 365 .. 1 77 [] 82.9 2.2e-20 -O22173 1/4 48 119 .. 1 77 [] 82.9 2.2e-20 -O08831 1/1 12 78 .. 1 77 [] 82.8 2.5e-20 -X16_HUMAN 1/1 12 78 .. 1 77 [] 82.8 2.5e-20 -Q62029 4/4 296 365 .. 1 77 [] 82.6 2.8e-20 -RO22_XENLA 2/2 102 172 .. 1 77 [] 82.6 2.8e-20 -NOP3_YEAST 1/2 127 190 .. 1 77 [] 82.6 2.9e-20 -U2AF_SCHPO 1/2 312 383 .. 1 77 [] 82.6 3e-20 -Q17385 3/3 501 572 .. 1 77 [] 82.5 3e-20 -P91414 1/1 335 406 .. 1 77 [] 82.5 3e-20 -MSSP_HUMAN 1/2 31 102 .. 1 77 [] 82.5 3e-20 -Q42404 1/1 140 211 .. 1 77 [] 82.4 3.2e-20 -O23288 2/2 107 177 .. 1 77 [] 82.1 4.1e-20 -Q91808 1/2 22 92 .. 1 77 [] 82.0 4.3e-20 -Q91807 1/2 22 92 .. 1 77 [] 82.0 4.3e-20 -O18409 3/3 726 797 .. 1 77 [] 82.0 4.3e-20 -O02374 3/3 522 593 .. 1 77 [] 82.0 4.3e-20 -P70055 3/3 455 526 .. 1 77 [] 82.0 4.4e-20 -O14979 1/2 1 71 [. 1 77 [] 81.9 4.8e-20 -Q24668 2/2 207 280 .. 1 77 [] 81.8 5e-20 -SXLF_DROME 2/2 213 286 .. 1 77 [] 81.8 5e-20 -Q99141 2/2 205 278 .. 1 77 [] 81.8 5e-20 -Q14100 2/2 96 168 .. 1 77 [] 81.7 5.2e-20 -Q14103 2/2 184 256 .. 1 77 [] 81.7 5.2e-20 -Q12771 2/2 163 235 .. 1 77 [] 81.7 5.2e-20 -ROC_RAT 1/1 36 108 .. 1 77 [] 81.7 5.2e-20 -Q14102 2/2 93 165 .. 1 77 [] 81.7 5.2e-20 -Q14101 2/2 184 256 .. 1 77 [] 81.7 5.2e-20 -Q55345 1/1 3 74 .. 1 77 [] 81.7 5.4e-20 -YDB2_SCHPO 1/1 32 102 .. 1 77 [] 81.7 5.4e-20 -SP49_HUMAN 1/2 15 86 .. 1 77 [] 81.7 5.5e-20 -Q19579 1/4 59 130 .. 1 77 [] 81.7 5.5e-20 -Q19581 1/4 59 130 .. 1 77 [] 81.7 5.5e-20 -Q60690 2/2 360 429 .. 1 77 [] 81.6 5.6e-20 -TIA1_HUMAN 3/3 205 270 .. 1 77 [] 81.6 5.8e-20 -ROA2_HUMAN 2/2 114 184 .. 1 77 [] 81.6 5.9e-20 -O18352 1/1 728 798 .. 1 77 [] 81.5 6.2e-20 -ROA1_MACMU 1/2 15 85 .. 1 77 [] 81.4 6.4e-20 -Q26658 2/2 101 171 .. 1 77 [] 81.4 6.5e-20 -Q18318 1/1 40 111 .. 1 77 [] 81.4 6.7e-20 -Q16135 3/3 269 340 .. 1 77 [] 81.4 6.7e-20 -Q19581 3/4 240 310 .. 1 77 [] 81.4 6.8e-20 -Q19579 3/4 240 310 .. 1 77 [] 81.4 6.8e-20 -P93396 1/1 68 139 .. 1 77 [] 81.4 6.8e-20 -O22791 2/2 112 182 .. 1 77 [] 81.3 7.2e-20 -TIA1_MOUSE 3/3 216 281 .. 1 77 [] 81.2 7.4e-20 -Q00880 2/2 358 429 .. 1 77 [] 81.2 7.5e-20 -O23212 2/2 358 429 .. 1 77 [] 80.9 9.4e-20 -Q92227 1/4 44 115 .. 1 77 [] 80.9 9.6e-20 -Q09511 1/1 21 92 .. 1 77 [] 80.8 9.8e-20 -Q15351 1/1 27 97 .. 1 77 [] 80.8 1e-19 -Q15350 1/1 26 96 .. 1 77 [] 80.8 1e-19 -U2AF_MOUSE 2/3 261 332 .. 1 77 [] 80.6 1.1e-19 -U2AF_HUMAN 2/3 261 332 .. 1 77 [] 80.6 1.1e-19 -Q99628 2/3 210 281 .. 1 77 [] 80.6 1.2e-19 -Q60399 1/1 52 120 .. 1 77 [] 80.5 1.3e-19 -NUCL_MOUSE 4/4 570 638 .. 1 77 [] 80.5 1.3e-19 -NUCL_MESAU 4/4 573 641 .. 1 77 [] 80.5 1.3e-19 -NUCL_RAT 4/4 576 644 .. 1 77 [] 80.5 1.3e-19 -Q17430 2/2 685 755 .. 1 77 [] 80.5 1.3e-19 -Q55342 1/1 3 74 .. 1 77 [] 80.3 1.4e-19 -Q91583 2/3 155 226 .. 1 77 [] 80.3 1.4e-19 -Q91903 2/3 155 226 .. 1 77 [] 80.3 1.4e-19 -Q62176 1/1 34 104 .. 1 77 [] 80.2 1.5e-19 -Q21832 1/1 57 128 .. 1 77 [] 80.2 1.5e-19 -YHH5_YEAST 3/3 315 384 .. 1 77 [] 80.1 1.6e-19 -HUD_MOUSE 2/3 139 210 .. 1 77 [] 79.9 1.9e-19 -HUD_RAT 2/3 127 198 .. 1 77 [] 79.9 1.9e-19 -HUD_HUMAN 2/3 134 205 .. 1 77 [] 79.9 1.9e-19 -RO32_XENLA 2/2 120 190 .. 1 77 [] 79.7 2.1e-19 -Q20084 3/3 376 446 .. 1 77 [] 79.7 2.2e-19 -Q41042 1/2 357 426 .. 1 77 [] 79.6 2.3e-19 -Q16629 1/1 13 79 .. 1 77 [] 79.6 2.4e-19 -Q17350 3/4 215 284 .. 1 77 [] 79.6 2.4e-19 -NUCL_CHICK 4/4 555 623 .. 1 77 [] 79.5 2.4e-19 -PES4_YEAST 3/4 305 374 .. 1 77 [] 79.4 2.6e-19 -Q19335 2/2 477 548 .. 1 77 [] 79.4 2.6e-19 -GBP2_YEAST 3/3 351 421 .. 1 77 [] 79.4 2.7e-19 -Q00880 1/2 253 324 .. 1 77 [] 79.3 2.8e-19 -RB27_DROME 1/2 9 79 .. 1 77 [] 79.3 2.8e-19 -RNP1_YEAST 1/1 37 109 .. 1 77 [] 79.0 3.5e-19 -YQOC_CAEEL 1/1 63 133 .. 1 77 [] 79.0 3.6e-19 -Q92950 3/3 407 478 .. 1 77 [] 79.0 3.6e-19 -SP33_HUMAN 1/2 17 85 .. 1 77 [] 78.9 3.7e-19 -Q13809 1/2 18 86 .. 1 77 [] 78.9 3.7e-19 -SQD_DROME 2/2 138 208 .. 1 77 [] 78.8 3.9e-19 -Q13243 1/2 6 69 .. 1 77 [] 78.8 3.9e-19 -O35326 1/2 6 69 .. 1 77 [] 78.8 3.9e-19 -CL4_RAT 1/2 6 69 .. 1 77 [] 78.8 3.9e-19 -Q16662 1/1 6 69 .. 1 77 [] 78.8 3.9e-19 -O14979 2/2 86 158 .. 1 77 [] 78.8 4e-19 -P78795 1/1 51 122 .. 1 77 [] 78.8 4e-19 -HRB1_YEAST 3/3 353 423 .. 1 77 [] 78.8 4.1e-19 -Q13151 2/2 100 170 .. 1 77 [] 78.7 4.3e-19 -Q17201 2/2 125 195 .. 1 77 [] 78.6 4.4e-19 -Q17200 2/2 125 195 .. 1 77 [] 78.6 4.4e-19 -YQO4_CAEEL 1/1 103 173 .. 1 77 [] 78.5 5e-19 -Q91585 2/3 134 205 .. 1 77 [] 78.4 5.1e-19 -Q53321 1/1 2 73 .. 1 77 [] 78.4 5.4e-19 -Q55341 1/1 3 74 .. 1 77 [] 78.4 5.4e-19 -Q93194 2/2 131 196 .. 1 77 [] 78.1 6.5e-19 -Q27335 1/4 11 81 .. 1 77 [] 78.0 6.9e-19 -O14801 1/1 241 312 .. 1 77 [] 78.0 7e-19 -NGR1_YEAST 3/3 362 427 .. 1 77 [] 78.0 7.1e-19 -EWS_MOUSE 1/1 362 441 .. 1 77 [] 77.9 7.2e-19 -Q18409 1/1 5 71 .. 1 77 [] 77.9 7.7e-19 -Q01858 2/2 163 235 .. 1 77 [] 77.7 8.7e-19 -Q39568 2/2 145 215 .. 1 77 [] 77.6 8.9e-19 -P93616 2/4 122 192 .. 1 77 [] 77.6 9.5e-19 -Q19579 2/4 147 217 .. 1 77 [] 77.5 9.5e-19 -Q19581 2/4 147 217 .. 1 77 [] 77.5 9.5e-19 -O14875 1/1 10 80 .. 1 77 [] 77.5 9.9e-19 -O04319 3/4 204 274 .. 1 77 [] 77.5 1e-18 -Q17201 1/2 45 115 .. 1 77 [] 77.4 1e-18 -Q17200 1/2 45 115 .. 1 77 [] 77.4 1e-18 -O22791 1/2 8 77 .. 1 77 [] 77.4 1e-18 -Q12926 2/3 127 198 .. 1 77 [] 77.4 1e-18 -Q13235 2/3 127 198 .. 1 77 [] 77.4 1e-18 -Q60899 2/3 127 198 .. 1 77 [] 77.4 1e-18 -EWS_HUMAN 1/1 363 442 .. 1 77 [] 77.4 1e-18 -O14327 1/1 57 127 .. 1 77 [] 77.4 1.1e-18 -O13829 1/1 102 173 .. 1 77 [] 77.4 1.1e-18 -ROA3_HUMAN 2/2 128 198 .. 1 77 [] 77.3 1.1e-18 -Q22037 1/2 25 95 .. 1 77 [] 77.2 1.2e-18 -Q14869 1/2 31 102 .. 1 77 [] 77.2 1.2e-18 -Q15433 1/2 64 135 .. 1 77 [] 77.2 1.2e-18 -Q92227 2/4 132 202 .. 1 77 [] 77.2 1.2e-18 -ROA1_XENLA 1/2 16 86 .. 1 77 [] 77.0 1.4e-18 -O01671 2/2 152 224 .. 1 77 [] 77.0 1.4e-18 -ROA1_SCHAM 1/2 19 89 .. 1 77 [] 76.9 1.5e-18 -Q24409 2/2 265 335 .. 1 77 [] 76.9 1.5e-18 -Q24474 2/3 198 269 .. 1 77 [] 76.7 1.7e-18 -Q24847 2/2 188 259 .. 1 77 [] 76.5 1.9e-18 -Q39953 4/4 309 379 .. 1 77 [] 76.5 2e-18 -Q21911 1/2 47 117 .. 1 77 [] 76.4 2.1e-18 -Q15020 2/2 803 873 .. 1 77 [] 76.3 2.2e-18 -O04319 2/4 114 183 .. 1 77 [] 76.2 2.4e-18 -RO31_XENLA 1/2 29 99 .. 1 77 [] 76.0 2.8e-18 -RO32_XENLA 1/2 29 99 .. 1 77 [] 76.0 2.8e-18 -O08752 2/2 80 143 .. 1 77 [] 76.0 2.8e-18 -YNL0_YEAST 1/1 93 164 .. 1 77 [] 75.8 3.2e-18 -RO31_XENLA 2/2 120 190 .. 1 77 [] 75.8 3.3e-18 -YP85_CAEEL 1/2 15 86 .. 1 77 [] 75.6 3.6e-18 -Q17352 1/2 25 96 .. 1 77 [] 75.6 3.6e-18 -Q16135 2/3 118 190 .. 1 77 [] 75.6 3.8e-18 -Q24360 2/2 119 189 .. 1 77 [] 75.5 3.8e-18 -ROA1_DROME 2/2 124 194 .. 1 77 [] 75.5 3.8e-18 -Q24359 2/2 120 190 .. 1 77 [] 75.5 3.8e-18 -Q99361 2/2 123 193 .. 1 77 [] 75.5 3.8e-18 -Q24486 2/2 117 187 .. 1 77 [] 75.5 3.9e-18 -RB87_DROME 2/2 117 187 .. 1 77 [] 75.5 3.9e-18 -O35935 1/1 170 240 .. 1 77 [] 75.5 4e-18 -Q28165 1/1 174 244 .. 1 77 [] 75.5 4e-18 -O04319 4/4 306 376 .. 1 77 [] 75.3 4.4e-18 -RB27_DROME 2/2 98 168 .. 1 77 [] 75.3 4.5e-18 -SR75_HUMAN 1/2 4 67 .. 1 77 [] 75.3 4.5e-18 -ROA2_HUMAN 1/2 23 93 .. 1 77 [] 75.3 4.6e-18 -NONA_DROME 1/2 304 369 .. 1 77 [] 75.2 4.8e-18 -Q27926 1/1 98 168 .. 1 77 [] 75.1 5.3e-18 -YHH5_YEAST 1/3 113 184 .. 1 77 [] 75.0 5.6e-18 -ELAV_DROME 1/3 151 235 .. 1 77 [] 75.0 5.7e-18 -ELAV_DROVI 1/3 187 271 .. 1 77 [] 75.0 5.7e-18 -O15187 2/2 168 233 .. 1 77 [] 74.9 5.9e-18 -TIAR_MOUSE 3/3 224 289 .. 1 77 [] 74.9 5.9e-18 -TIAR_HUMAN 3/3 207 272 .. 1 77 [] 74.9 5.9e-18 -Q12786 1/2 76 141 .. 1 77 [] 74.9 6e-18 -O00201 1/2 76 141 .. 1 77 [] 74.9 6e-18 -Q90626 1/2 75 145 .. 1 77 [] 74.8 6.3e-18 -Q16560 1/1 53 124 .. 1 77 [] 74.8 6.3e-18 -Q55765 1/1 5 77 .. 1 77 [] 74.8 6.5e-18 -PABP_DROME 4/4 287 357 .. 1 77 [] 74.8 6.6e-18 -Q20084 2/3 130 201 .. 1 77 [] 74.6 7.1e-18 -O02916 2/2 80 143 .. 1 77 [] 74.6 7.2e-18 -O13845 2/3 342 412 .. 1 77 [] 74.6 7.3e-18 -NAB4_YEAST 2/2 245 315 .. 1 77 [] 74.4 8.6e-18 -RB97_DROME 1/2 34 104 .. 1 77 [] 74.3 9.2e-18 -O23146 1/1 51 121 .. 1 77 [] 74.1 1e-17 -O35335 1/1 6 69 .. 1 77 [] 74.1 1e-17 -O13845 1/3 242 312 .. 1 77 [] 73.6 1.5e-17 -NAM8_YEAST 3/3 315 380 .. 1 77 [] 73.4 1.6e-17 -Q92227 4/4 328 449 .. 1 77 [] 73.3 1.8e-17 -ROA3_HUMAN 1/2 37 107 .. 1 77 [] 73.3 1.8e-17 -Q04150 2/2 154 225 .. 1 77 [] 73.1 2.1e-17 -Q99729 2/2 154 225 .. 1 77 [] 73.1 2.1e-17 -Q39675 2/2 210 281 .. 1 77 [] 73.1 2.1e-17 -Q22030 1/1 102 171 .. 1 77 [] 73.0 2.3e-17 -Q61954 1/2 26 89 .. 1 77 [] 72.9 2.4e-17 -O02916 1/2 4 67 .. 1 77 [] 72.9 2.4e-17 -O08752 1/2 4 67 .. 1 77 [] 72.9 2.4e-17 -Q22304 1/1 194 263 .. 1 77 [] 72.7 2.8e-17 -Q10572 1/1 155 224 .. 1 77 [] 72.7 2.8e-17 -Q24847 1/2 4 74 .. 1 77 [] 72.7 2.8e-17 -Q14576 2/3 127 198 .. 1 77 [] 72.7 2.8e-17 -Q21155 1/1 150 222 .. 1 77 [] 72.6 2.9e-17 -Q60900 2/3 127 198 .. 1 77 [] 72.6 2.9e-17 -Q60901 1/2 42 113 .. 1 77 [] 72.6 2.9e-17 -RB87_DROME 1/2 26 96 .. 1 77 [] 72.5 3.1e-17 -Q24486 1/2 26 96 .. 1 77 [] 72.5 3.1e-17 -RU1A_XENLA 1/2 12 84 .. 1 77 [] 72.5 3.2e-17 -Q91582 2/3 108 179 .. 1 77 [] 72.3 3.6e-17 -ROM_HUMAN 1/3 73 144 .. 1 77 [] 72.1 4.1e-17 -Q06459 3/4 468 534 .. 1 77 [] 72.1 4.3e-17 -Q93233 1/1 80 150 .. 1 77 [] 72.0 4.3e-17 -P78814 1/2 2 65 .. 1 77 [] 72.0 4.4e-17 -O23189 1/3 65 136 .. 1 77 [] 72.0 4.5e-17 -NUCL_XENLA 4/4 504 572 .. 1 77 [] 71.9 4.6e-17 -Q02427 1/1 13 79 .. 1 77 [] 71.9 4.7e-17 -Q13247 1/2 4 67 .. 1 77 [] 71.8 5e-17 -Q13244 1/1 4 67 .. 1 77 [] 71.8 5e-17 -Q13245 1/2 4 67 .. 1 77 [] 71.8 5e-17 -Q22037 2/2 116 186 .. 1 77 [] 71.7 5.5e-17 -Q15717 2/3 108 179 .. 1 77 [] 71.7 5.6e-17 -Q90409 2/3 132 206 .. 1 77 [] 71.5 6.1e-17 -Q63887 1/2 78 143 .. 1 77 [] 71.5 6.5e-17 -RU1A_HUMAN 1/2 12 84 .. 1 77 [] 71.4 6.6e-17 -Q15287 1/1 163 235 .. 1 77 [] 71.4 6.7e-17 -O23475 2/2 213 284 .. 1 77 [] 71.4 7e-17 -O04425 2/2 213 284 .. 1 77 [] 71.4 7e-17 -FUS_BOVIN 1/1 273 352 .. 1 77 [] 71.3 7.4e-17 -FUS_HUMAN 1/1 287 366 .. 1 77 [] 71.3 7.4e-17 -Q13344 1/1 290 369 .. 1 77 [] 71.3 7.4e-17 -Q62826 1/1 12 83 .. 1 77 [] 71.3 7.5e-17 -Q99361 1/2 32 102 .. 1 77 [] 71.2 7.9e-17 -Q24359 1/2 29 99 .. 1 77 [] 71.2 7.9e-17 -ROA1_DROME 1/2 33 103 .. 1 77 [] 71.2 7.9e-17 -Q24360 1/2 28 98 .. 1 77 [] 71.2 7.9e-17 -YHC4_YEAST 2/2 348 415 .. 1 77 [] 71.2 8e-17 -Q62019 2/2 80 143 .. 1 77 [] 71.1 8.1e-17 -Q23121 1/2 5 68 .. 1 77 [] 70.9 9.3e-17 -ROA1_SCHAM 2/2 110 180 .. 1 77 [] 70.9 9.6e-17 -Q27199 2/2 357 425 .. 1 77 [] 70.8 9.9e-17 -Q24491 1/1 9 75 .. 1 77 [] 70.8 1.1e-16 -O13741 2/2 272 343 .. 1 77 [] 70.7 1.1e-16 -NUCL_XENLA 3/4 416 482 .. 1 77 [] 70.6 1.2e-16 -Q15434 1/2 58 129 .. 1 77 [] 70.4 1.3e-16 -Q27335 4/4 290 362 .. 1 77 [] 70.4 1.3e-16 -Q06459 4/4 556 624 .. 1 77 [] 70.3 1.5e-16 -O04432 1/1 10 117 .. 1 77 [] 70.3 1.5e-16 -P92966 1/2 4 69 .. 1 77 [] 70.2 1.5e-16 -Q06106 5/5 765 835 .. 1 77 [] 70.2 1.6e-16 -Q24562 2/3 209 280 .. 1 77 [] 70.2 1.6e-16 -TIAR_MOUSE 1/3 11 97 .. 1 77 [] 70.1 1.7e-16 -Q24261 1/2 304 369 .. 1 77 [] 70.0 1.8e-16 -Q08208 1/1 281 351 .. 1 77 [] 70.0 1.8e-16 -Q91584 2/3 122 194 .. 1 77 [] 69.9 1.9e-16 -NUCL_CHICK 3/4 463 530 .. 1 77 [] 69.9 1.9e-16 -P92965 1/2 4 69 .. 1 77 [] 69.7 2.3e-16 -Q27294 1/1 121 200 .. 1 77 [] 69.6 2.3e-16 -Q06106 2/5 347 418 .. 1 77 [] 69.6 2.4e-16 -Q09542 1/2 118 182 .. 1 77 [] 69.4 2.6e-16 -ELAV_DROME 2/3 250 322 .. 1 77 [] 69.4 2.7e-16 -ELAV_DROVI 2/3 286 358 .. 1 77 [] 69.4 2.7e-16 -Q90407 1/2 27 99 .. 1 77 [] 69.4 2.8e-16 -O23475 1/2 122 194 .. 1 77 [] 69.0 3.5e-16 -O04425 1/2 122 194 .. 1 77 [] 69.0 3.5e-16 -RO22_XENLA 1/2 11 81 .. 1 77 [] 69.0 3.6e-16 -Q24409 1/2 177 247 .. 1 77 [] 68.7 4.5e-16 -Q21911 2/2 136 206 .. 1 77 [] 68.6 4.6e-16 -O14369 1/1 96 164 .. 1 77 [] 68.5 5.2e-16 -P79736 2/3 126 197 .. 1 77 [] 68.4 5.2e-16 -Q62189 1/2 18 90 .. 1 77 [] 68.3 5.7e-16 -YG5B_YEAST 1/3 197 268 .. 1 77 [] 68.2 6.3e-16 -Q61474 2/2 111 181 .. 1 77 [] 68.0 7.1e-16 -NUCL_CHICK 2/4 373 440 .. 1 77 [] 68.0 7.3e-16 -P93843 2/3 179 251 .. 1 77 [] 67.9 7.5e-16 -Q60690 1/2 87 157 .. 1 77 [] 67.8 8e-16 -O02374 1/3 153 225 .. 1 77 [] 67.8 8e-16 -O18409 1/3 357 429 .. 1 77 [] 67.8 8e-16 -Q27335 2/4 98 168 .. 1 77 [] 67.7 8.6e-16 -Q08212 1/3 9 77 .. 1 77 [] 67.7 8.9e-16 -O04240 1/2 119 190 .. 1 77 [] 67.7 8.9e-16 -NUCL_MOUSE 3/4 488 555 .. 1 77 [] 67.6 9.3e-16 -Q40363 1/2 378 447 .. 1 77 [] 67.5 1e-15 -NUCL_HUMAN 3/4 487 554 .. 1 77 [] 67.4 1.1e-15 -RO21_XENLA 1/2 11 81 .. 1 77 [] 67.3 1.1e-15 -Q24113 1/2 279 344 .. 1 77 [] 67.3 1.1e-15 -YNR5_YEAST 2/2 241 312 .. 1 77 [] 67.1 1.4e-15 -NUCL_RAT 3/4 490 557 .. 1 77 [] 66.9 1.5e-15 -YSO5_CAEEL 1/1 440 511 .. 1 77 [] 66.9 1.5e-15 -PUB1_YEAST 3/3 342 407 .. 1 77 [] 66.6 1.8e-15 -Q13151 1/2 9 79 .. 1 77 [] 66.5 2e-15 -NUCL_MESAU 3/4 487 554 .. 1 77 [] 66.5 2e-15 -RB97_DROME 2/2 125 196 .. 1 77 [] 66.4 2.1e-15 -NUCL_XENLA 2/4 326 393 .. 1 77 [] 66.4 2.1e-15 -ROC_HUMAN 1/1 18 82 .. 1 77 [] 66.4 2.1e-15 -Q91807 2/2 111 181 .. 1 77 [] 66.4 2.1e-15 -Q15584 1/2 46 116 .. 1 77 [] 66.4 2.1e-15 -ROM_HUMAN 2/3 206 276 .. 1 77 [] 66.4 2.1e-15 -Q91808 2/2 111 181 .. 1 77 [] 66.3 2.3e-15 -Q41124 1/2 115 186 .. 1 77 [] 66.2 2.5e-15 -Q92804 1/1 236 315 .. 1 77 [] 66.2 2.5e-15 -Q92751 1/1 233 312 .. 1 77 [] 66.2 2.5e-15 -Q21900 1/2 83 154 .. 1 77 [] 66.0 2.8e-15 -Q23795 2/2 122 192 .. 1 77 [] 65.9 3.1e-15 -Q08940 2/2 208 280 .. 1 77 [] 65.8 3.2e-15 -WHI3_YEAST 1/1 540 614 .. 1 77 [] 65.4 4.4e-15 -CB20_XENLA 1/1 34 105 .. 1 77 [] 65.3 4.5e-15 -P90727 2/3 284 354 .. 1 77 [] 65.1 5.2e-15 -PSF_HUMAN 1/2 299 364 .. 1 77 [] 65.1 5.3e-15 -Q01491 1/4 312 380 .. 1 77 [] 65.1 5.4e-15 -NUCL_HUMAN 4/4 573 639 .. 1 77 [] 65.1 5.5e-15 -O23093 3/3 320 389 .. 1 77 [] 65.0 5.7e-15 -P92964 2/2 95 159 .. 1 77 [] 64.8 6.4e-15 -Q09959 3/3 283 347 .. 1 77 [] 64.6 7.3e-15 -PR24_YEAST 3/3 212 284 .. 1 77 [] 64.6 7.3e-15 -Q27335 3/4 186 260 .. 1 77 [] 64.6 7.3e-15 -Q24473 2/3 198 274 .. 1 77 [] 64.4 8.4e-15 -O22855 2/3 252 316 .. 1 77 [] 64.3 9.2e-15 -P92966 2/2 98 162 .. 1 77 [] 64.3 9.5e-15 -CB20_HUMAN 1/1 42 113 .. 1 77 [] 64.1 1e-14 -Q91579 1/3 9 83 .. 1 77 [] 64.1 1.1e-14 -O23131 1/1 30 100 .. 1 77 [] 64.0 1.1e-14 -SP33_HUMAN 2/2 122 186 .. 1 77 [] 63.6 1.5e-14 -Q92904 1/1 42 110 .. 1 77 [] 63.6 1.5e-14 -Q92909 1/1 42 110 .. 1 77 [] 63.6 1.5e-14 -Q95192 1/1 42 110 .. 1 77 [] 63.6 1.5e-14 -Q26293 2/3 198 274 .. 1 77 [] 63.5 1.6e-14 -IF4B_HUMAN 1/1 98 168 .. 1 77 [] 63.5 1.6e-14 -Q06459 2/4 378 445 .. 1 77 [] 63.5 1.6e-14 -SR75_HUMAN 2/2 106 172 .. 1 77 [] 63.4 1.8e-14 -Q93594 1/1 39 110 .. 1 77 [] 63.4 1.8e-14 -P70372 2/3 108 179 .. 1 77 [] 63.3 1.9e-14 -O04319 1/4 23 94 .. 1 77 [] 63.2 2e-14 -RU2B_HUMAN 1/2 9 81 .. 1 77 [] 63.1 2.1e-14 -ROC_XENLA 1/1 19 83 .. 1 77 [] 63.0 2.2e-14 -P92964 1/2 4 69 .. 1 77 [] 62.6 3e-14 -O15396 1/1 42 110 .. 1 77 [] 62.1 4.4e-14 -Q01491 3/4 479 549 .. 1 77 [] 62.0 4.5e-14 -Q94901 1/2 9 72 .. 1 77 [] 61.7 5.4e-14 -Q17385 2/3 144 215 .. 1 77 [] 61.7 5.6e-14 -Q91920 2/2 112 182 .. 1 77 [] 61.6 6.1e-14 -P92965 2/2 99 163 .. 1 77 [] 61.5 6.4e-14 -Q24024 2/2 138 208 .. 1 77 [] 61.5 6.5e-14 -Q91579 2/3 97 168 .. 1 77 [] 61.5 6.6e-14 -NUCL_CHICK 1/4 283 352 .. 1 77 [] 61.4 6.9e-14 -O23646 1/1 4 66 .. 1 77 [] 61.3 7.5e-14 -HS49_YEAST 1/2 11 83 .. 1 77 [] 61.3 7.6e-14 -Q23287 1/1 53 122 .. 1 77 [] 61.2 7.9e-14 -P90871 1/1 173 244 .. 1 77 [] 61.1 8.7e-14 -Q24024 1/2 34 104 .. 1 77 [] 60.9 9.9e-14 -Q94901 2/2 88 151 .. 1 77 [] 60.7 1.1e-13 -O15042 1/1 275 349 .. 1 77 [] 60.7 1.1e-13 -YSX2_CAEEL 1/1 4 67 .. 1 77 [] 60.7 1.1e-13 -RU1A_DROME 1/2 9 81 .. 1 77 [] 60.6 1.2e-13 -O13620 4/5 621 697 .. 1 77 [] 60.6 1.2e-13 -Q13245 2/2 112 178 .. 1 77 [] 60.5 1.2e-13 -Q13247 2/2 112 178 .. 1 77 [] 60.5 1.2e-13 -SSB1_YEAST 1/2 39 114 .. 1 77 [] 60.4 1.3e-13 -Q64368 1/1 42 110 .. 1 77 [] 60.3 1.5e-13 -HRB1_YEAST 2/3 238 308 .. 1 77 [] 60.3 1.5e-13 -Q13148 1/2 106 175 .. 1 77 [] 60.2 1.5e-13 -Q27199 1/2 265 335 .. 1 77 [] 60.2 1.6e-13 -O13759 1/2 184 256 .. 1 77 [] 60.1 1.7e-13 -Q09331 1/1 24 96 .. 1 77 [] 60.1 1.7e-13 -Q14151 1/1 409 480 .. 1 77 [] 60.1 1.7e-13 -Q41042 2/2 453 527 .. 1 77 [] 60.0 1.8e-13 -Q22412 1/1 237 307 .. 1 77 [] 60.0 1.8e-13 -Q15020 1/2 706 777 .. 1 77 [] 60.0 1.8e-13 -O13759 2/2 299 364 .. 1 77 [] 59.9 2e-13 -Q09335 1/1 31 96 .. 1 77 [] 59.9 2e-13 -Q64012 1/1 23 87 .. 1 77 [] 59.7 2.2e-13 -Q23120 1/2 4 67 .. 1 77 [] 59.7 2.2e-13 -O22855 1/3 20 85 .. 1 77 [] 59.7 2.3e-13 -PSF_HUMAN 2/2 373 443 .. 1 77 [] 59.6 2.4e-13 -Q15056 1/1 46 115 .. 1 77 [] 59.5 2.6e-13 -Q42215 1/1 5 77 .. 1 77 [] 59.5 2.6e-13 -Q09584 1/1 105 172 .. 1 77 [] 59.4 2.7e-13 -NUCL_HUMAN 2/4 394 460 .. 1 77 [] 59.3 2.9e-13 -Q14924 1/1 42 113 .. 1 77 [] 59.3 3e-13 -MLO3_SCHPO 1/1 57 129 .. 1 77 [] 59.0 3.6e-13 -Q08212 3/3 223 289 .. 1 77 [] 58.9 3.8e-13 -GRP1_SORVU 1/1 1 60 [. 1 77 [] 58.9 3.9e-13 -ROH1_HUMAN 2/3 113 183 .. 1 77 [] 58.9 4e-13 -O35737 2/3 113 183 .. 1 77 [] 58.9 4e-13 -RU17_YEAST 1/1 109 183 .. 1 77 [] 58.9 4e-13 -Q13809 2/2 123 187 .. 1 77 [] 58.7 4.4e-13 -O08583 1/1 107 177 .. 1 77 [] 58.7 4.6e-13 -Q23796 2/2 118 184 .. 1 77 [] 58.5 5.1e-13 -NOP4_YEAST 3/4 292 378 .. 1 77 [] 58.5 5.3e-13 -Q06106 4/5 665 741 .. 1 77 [] 58.4 5.4e-13 -Q24252 2/2 122 188 .. 1 77 [] 58.4 5.4e-13 -SR55_DROME 2/2 116 182 .. 1 77 [] 58.4 5.4e-13 -Q14730 1/1 1 70 [. 1 77 [] 58.4 5.4e-13 -Q91017 1/1 26 96 .. 1 77 [] 58.2 6.2e-13 -LA_HUMAN 1/1 113 182 .. 1 77 [] 58.2 6.5e-13 -Q15367 1/1 60 129 .. 1 77 [] 58.2 6.5e-13 -MODU_DROME 3/4 342 410 .. 1 77 [] 58.1 6.7e-13 -RN24_SCHPO 2/2 230 295 .. 1 77 [] 57.8 8.2e-13 -Q26692 3/3 207 271 .. 1 77 [] 57.8 8.3e-13 -O13620 5/5 723 793 .. 1 77 [] 57.6 9.4e-13 -O23189 3/3 341 410 .. 1 77 [] 57.6 9.6e-13 -O23093 1/3 118 215 .. 1 77 [] 57.5 1.1e-12 -ROH2_HUMAN 2/3 113 183 .. 1 77 [] 57.4 1.1e-12 -P70333 2/3 113 183 .. 1 77 [] 57.4 1.1e-12 -Q08920 1/1 48 119 .. 1 77 [] 57.2 1.3e-12 -PR24_YEAST 2/3 119 190 .. 1 77 [] 57.1 1.3e-12 -P93843 1/3 86 157 .. 1 77 [] 57.1 1.4e-12 -Q92950 2/3 110 181 .. 1 77 [] 56.6 1.9e-12 -P70055 2/3 152 223 .. 1 77 [] 56.6 1.9e-12 -RU1A_HUMAN 2/2 210 277 .. 1 77 [] 56.6 1.9e-12 -Q62189 2/2 215 282 .. 1 77 [] 56.6 1.9e-12 -Q22135 1/1 34 105 .. 1 77 [] 56.1 2.7e-12 -O23645 1/1 4 68 .. 1 77 [] 55.9 3.1e-12 -Q09959 1/3 48 156 .. 1 77 [] 55.7 3.6e-12 -P97855 1/1 340 400 .. 1 77 [] 55.5 4e-12 -Q24207 1/1 35 105 .. 1 77 [] 55.5 4.2e-12 -Q62150 1/1 162 234 .. 1 77 [] 55.2 4.9e-12 -O14797 1/2 6 66 .. 1 77 [] 55.0 6e-12 -ARP2_PLAFA 2/2 364 438 .. 1 77 [] 55.0 6e-12 -Q13283 1/1 342 402 .. 1 77 [] 54.2 1e-11 -Q39244 1/2 20 92 .. 1 77 [] 54.1 1.1e-11 -Q04067 1/1 193 265 .. 1 77 [] 54.0 1.2e-11 -RU1A_DROME 2/2 144 211 .. 1 77 [] 53.8 1.4e-11 -Q62379 1/1 51 118 .. 1 77 [] 53.7 1.5e-11 -RU2B_HUMAN 2/2 153 220 .. 1 77 [] 53.7 1.5e-11 -NUCL_RAT 2/4 398 464 .. 1 77 [] 53.5 1.6e-11 -Q18724 1/1 27 97 .. 1 77 [] 53.3 1.9e-11 -ROF_HUMAN 2/3 113 183 .. 1 77 [] 53.2 2e-11 -RU1A_XENLA 2/2 210 277 .. 1 77 [] 53.1 2.2e-11 -MODU_DROME 2/4 260 326 .. 1 77 [] 52.8 2.6e-11 -Q15424 1/1 357 428 .. 1 77 [] 52.3 3.7e-11 -Q26692 2/3 127 191 .. 1 77 [] 52.3 3.9e-11 -Q41498 1/2 25 97 .. 1 77 [] 52.2 3.9e-11 -P97379 1/1 300 371 .. 1 77 [] 52.2 4e-11 -NUCL_MESAU 2/4 395 461 .. 1 77 [] 52.1 4.3e-11 -NUCL_MOUSE 2/4 396 462 .. 1 77 [] 52.0 4.6e-11 -Q92879 2/3 110 181 .. 1 77 [] 51.8 5.5e-11 -YG5B_YEAST 3/3 542 633 .. 1 77 [] 51.8 5.5e-11 -Q40363 2/2 477 552 .. 1 77 [] 51.6 6e-11 -Q13242 2/2 113 177 .. 1 77 [] 51.6 6.3e-11 -GRF1_HUMAN 2/3 196 265 .. 1 77 [] 51.5 6.7e-11 -Q38915 1/2 152 220 .. 1 77 [] 51.3 7.4e-11 -SRP1_SCHPO 1/1 9 81 .. 1 77 [] 51.1 8.9e-11 -PR24_YEAST 1/3 43 111 .. 1 77 [] 50.8 1.1e-10 -Q13243 2/2 110 176 .. 1 77 [] 50.7 1.1e-10 -CL4_RAT 2/2 110 176 .. 1 77 [] 50.7 1.1e-10 -NONA_DROME 2/2 378 448 .. 1 77 [] 50.2 1.6e-10 -Q24261 2/2 378 448 .. 1 77 [] 50.2 1.6e-10 -RDP_MOUSE 1/1 268 331 .. 1 77 [] 50.2 1.7e-10 -RDP_HUMAN 1/1 264 327 .. 1 77 [] 50.2 1.7e-10 -O02374 2/3 240 306 .. 1 77 [] 50.1 1.8e-10 -O18409 2/3 444 510 .. 1 77 [] 50.1 1.8e-10 -Q15097 1/4 1 59 [. 1 77 [] 49.9 1.9e-10 -PTB_MOUSE 2/4 185 252 .. 1 77 [] 49.9 2.1e-10 -PTB_PIG 2/4 186 253 .. 1 77 [] 49.9 2.1e-10 -PTB_HUMAN 2/4 186 253 .. 1 77 [] 49.9 2.1e-10 -PTB_RAT 2/4 185 252 .. 1 77 [] 49.9 2.1e-10 -Q63568 2/4 185 252 .. 1 77 [] 49.9 2.1e-10 -LA_BOVIN 1/1 113 182 .. 1 77 [] 49.8 2.1e-10 -LA_RAT 1/1 113 182 .. 1 77 [] 49.8 2.2e-10 -Q15434 2/2 137 207 .. 1 77 [] 49.7 2.2e-10 -PTB_HUMAN 3/4 339 406 .. 1 77 [] 49.6 2.5e-10 -O15236 1/1 9 79 .. 1 77 [] 49.4 2.8e-10 -Q15433 2/2 143 213 .. 1 77 [] 49.4 2.8e-10 -O15237 1/1 9 79 .. 1 77 [] 49.4 2.8e-10 -MSSP_HUMAN 2/2 110 180 .. 1 77 [] 49.4 2.8e-10 -Q14869 2/2 110 180 .. 1 77 [] 49.4 2.8e-10 -Q21323 1/2 10 82 .. 1 77 [] 49.3 3e-10 -O14102 1/1 1 66 [. 1 77 [] 49.3 3.1e-10 -Q62378 1/1 25 92 .. 1 77 [] 49.1 3.4e-10 -Q24113 2/2 352 422 .. 1 77 [] 49.1 3.5e-10 -Q14499 1/3 155 225 .. 1 77 [] 49.0 3.6e-10 -Q14498 1/3 155 225 .. 1 77 [] 49.0 3.6e-10 -Q15380 1/1 10 75 .. 1 77 [] 48.9 3.9e-10 -O00201 2/2 150 220 .. 1 77 [] 48.9 4e-10 -Q12786 2/2 150 220 .. 1 77 [] 48.9 4e-10 -O23212 1/2 238 314 .. 1 77 [] 48.9 4e-10 -U2AG_HUMAN 1/1 67 142 .. 1 77 [] 48.7 4.6e-10 -YFK2_YEAST 1/1 20 100 .. 1 77 [] 48.5 5.4e-10 -P78814 2/2 96 162 .. 1 77 [] 48.4 5.7e-10 -PTB_RAT 3/4 363 430 .. 1 77 [] 48.2 6.3e-10 -Q63568 3/4 364 431 .. 1 77 [] 48.2 6.3e-10 -O22905 2/2 213 284 .. 1 77 [] 48.0 7.3e-10 -U2AF_HUMAN 1/3 151 226 .. 1 77 [] 48.0 7.3e-10 -U2AF_MOUSE 1/3 151 226 .. 1 77 [] 48.0 7.3e-10 -IF32_YEAST 1/1 79 157 .. 1 77 [] 48.0 7.6e-10 -P90978 2/3 292 362 .. 1 77 [] 47.9 7.9e-10 -LA_MOUSE 1/1 113 182 .. 1 77 [] 47.6 9.8e-10 -Q19335 1/2 374 461 .. 1 77 [] 47.6 1e-09 -CPO_DROME 1/1 453 526 .. 1 77 [] 47.3 1.2e-09 -Q06459 1/4 286 355 .. 1 77 [] 47.2 1.3e-09 -Q17175 1/1 7 79 .. 1 77 [] 46.6 2e-09 -O22905 1/2 112 184 .. 1 77 [] 46.5 2.1e-09 -Q26658 1/2 8 79 .. 1 77 [] 46.3 2.4e-09 -HS49_YEAST 2/2 110 180 .. 1 77 [] 46.1 2.8e-09 -Q13117 1/1 42 110 .. 1 77 [] 46.1 2.8e-09 -Q63887 2/2 152 218 .. 1 77 [] 46.1 2.9e-09 -Q39244 2/2 179 245 .. 1 77 [] 45.5 4.2e-09 -O13801 1/1 246 317 .. 1 77 [] 45.5 4.3e-09 -O13674 1/3 301 356 .. 1 77 [] 45.4 4.4e-09 -RU1A_YEAST 1/1 229 294 .. 1 77 [] 45.3 4.9e-09 -Q41499 2/2 159 226 .. 1 77 [] 45.3 5e-09 -Q12159 1/1 80 151 .. 1 77 [] 45.2 5.2e-09 -O22922 2/2 160 227 .. 1 77 [] 45.1 5.5e-09 -NUCL_XENLA 1/4 234 303 .. 1 77 [] 45.1 5.6e-09 -YD3D_SCHPO 1/2 81 153 .. 1 77 [] 45.0 5.8e-09 -NAB3_YEAST 1/1 332 396 .. 1 77 [] 44.7 7.5e-09 -Q63627 1/1 424 491 .. 1 77 [] 44.0 1.2e-08 -MEI2_SCHPO 1/1 197 265 .. 1 77 [] 43.8 1.4e-08 -Q24562 1/3 95 170 .. 1 77 [] 43.6 1.6e-08 -Q06106 1/5 4 89 .. 1 77 [] 43.6 1.6e-08 -Q23161 1/1 59 129 .. 1 77 [] 43.3 2e-08 -O13649 1/1 6 78 .. 1 77 [] 43.0 2.4e-08 -P90797 1/1 66 136 .. 1 77 [] 42.9 2.5e-08 -Q07655 1/1 535 612 .. 1 77 [] 42.8 2.7e-08 -RN12_YEAST 1/1 200 267 .. 1 77 [] 42.8 2.8e-08 -O22922 1/2 12 84 .. 1 77 [] 42.6 3.1e-08 -YHC4_YEAST 1/2 95 167 .. 1 77 [] 42.4 3.6e-08 -Q07034 1/1 332 396 .. 1 77 [] 42.3 3.9e-08 -Q22318 2/2 179 249 .. 1 77 [] 41.8 5.4e-08 -Q92879 1/3 18 92 .. 1 77 [] 41.7 6e-08 -P70055 1/3 60 134 .. 1 77 [] 41.6 6.1e-08 -O13620 1/5 4 78 .. 1 77 [] 41.4 7.3e-08 -Q22708 1/2 16 85 .. 1 77 [] 41.3 7.7e-08 -MODU_DROME 4/4 422 484 .. 1 77 [] 41.2 8.5e-08 -P90978 1/3 185 260 .. 1 77 [] 41.0 9.5e-08 -Q92950 1/3 18 92 .. 1 77 [] 41.0 9.7e-08 -Q21323 2/2 145 212 .. 1 77 [] 40.9 1e-07 -P90727 1/3 177 252 .. 1 77 [] 40.9 1e-07 -O23866 1/2 188 254 .. 1 77 [] 40.9 1.1e-07 -Q41499 1/2 13 85 .. 1 77 [] 40.8 1.1e-07 -Q24375 1/1 151 225 .. 1 77 [] 40.7 1.1e-07 -O18219 1/1 19 96 .. 1 77 [] 40.6 1.2e-07 -Q15686 1/1 155 215 .. 1 77 [] 40.4 1.5e-07 -Q15364 1/1 105 165 .. 1 77 [] 40.4 1.5e-07 -D111_ARATH 1/1 281 360 .. 1 77 [] 40.4 1.5e-07 -Q21322 2/2 129 196 .. 1 77 [] 39.9 2e-07 -LAB_XENLA 1/1 112 183 .. 1 77 [] 39.8 2.2e-07 -O23866 2/2 273 339 .. 1 77 [] 39.7 2.4e-07 -PTB_MOUSE 1/4 60 127 .. 1 77 [] 39.5 2.7e-07 -Q17385 1/3 57 129 .. 1 77 [] 39.4 2.9e-07 -O00425 1/2 4 70 .. 1 77 [] 39.2 3.3e-07 -PTB_RAT 1/4 60 127 .. 1 77 [] 39.2 3.4e-07 -Q63568 1/4 60 127 .. 1 77 [] 39.2 3.4e-07 -PTB_PIG 1/4 61 128 .. 1 77 [] 38.9 4e-07 -NUCL_MESAU 1/4 309 378 .. 1 77 [] 38.5 5.3e-07 -LA_DROME 1/1 151 225 .. 1 77 [] 38.3 6e-07 -Q01491 2/4 388 458 .. 1 77 [] 38.3 6.3e-07 -YA2B_SCHPO 2/4 208 274 .. 1 77 [] 38.3 6.4e-07 -Q99730 1/2 135 213 .. 1 77 [] 38.2 6.6e-07 -PTB_PIG 3/4 365 432 .. 1 77 [] 38.2 6.7e-07 -YAG3_SCHPO 1/1 234 305 .. 1 77 [] 38.2 6.8e-07 -LAA_XENLA 1/1 113 182 .. 1 77 [] 37.7 9.1e-07 -Q21900 2/2 164 236 .. 1 77 [] 37.6 9.7e-07 -NOP4_YEAST 2/4 149 220 .. 1 77 [] 37.6 9.8e-07 -Q93062 1/1 26 93 .. 1 77 [] 37.5 1.1e-06 -Q92516 1/1 26 93 .. 1 77 [] 37.5 1.1e-06 -Q92517 1/1 26 93 .. 1 77 [] 37.5 1.1e-06 -ROH1_HUMAN 3/3 291 359 .. 1 77 [] 37.3 1.2e-06 -O35737 3/3 291 359 .. 1 77 [] 37.3 1.2e-06 -O00425 2/2 83 151 .. 1 77 [] 37.2 1.3e-06 -Q63623 1/1 479 546 .. 1 77 [] 37.2 1.3e-06 -PTB_HUMAN 1/4 61 128 .. 1 77 [] 37.1 1.4e-06 -MODU_DROME 1/4 177 246 .. 1 77 [] 37.0 1.5e-06 -Q99730 2/2 266 344 .. 1 77 [] 36.9 1.6e-06 -NOP3_YEAST 2/2 202 270 .. 1 77 [] 36.8 1.7e-06 -Q10667 1/1 5 67 .. 1 77 [] 36.7 1.9e-06 -O23093 2/3 224 299 .. 1 77 [] 36.3 2.4e-06 -YA2B_SCHPO 4/4 416 482 .. 1 77 [] 36.3 2.5e-06 -Q41498 2/2 181 248 .. 1 77 [] 36.2 2.7e-06 -P92204 1/1 168 231 .. 1 77 [] 35.8 3.5e-06 -NUCL_MOUSE 1/4 310 379 .. 1 77 [] 35.8 3.5e-06 -YD3D_SCHPO 2/2 367 436 .. 1 77 [] 35.7 3.8e-06 -Q22039 1/2 33 105 .. 1 77 [] 35.5 4.5e-06 -Q93733 1/1 33 105 .. 1 77 [] 35.5 4.5e-06 -Q19018 1/2 33 105 .. 1 77 [] 35.5 4.5e-06 -P87216 1/1 5 71 .. 1 77 [] 35.2 5.2e-06 -Q24534 1/2 15 105 .. 1 77 [] 35.2 5.3e-06 -Q23120 2/2 114 181 .. 1 77 [] 34.9 6.7e-06 -PES4_YEAST 2/4 181 249 .. 1 77 [] 34.8 6.8e-06 -Q16630 1/1 83 156 .. 1 77 [] 34.8 7.2e-06 -Q17430 1/2 595 663 .. 1 77 [] 34.7 7.4e-06 -Q20414 1/2 175 244 .. 1 77 [] 34.6 7.9e-06 -ROF_HUMAN 3/3 291 359 .. 1 77 [] 34.5 8.4e-06 -P87126 1/1 194 261 .. 1 77 [] 34.4 8.9e-06 -Q18265 1/1 300 379 .. 1 77 [] 34.4 9.2e-06 -YAS9_SCHPO 1/1 365 429 .. 1 77 [] 34.2 1.1e-05 -NUCL_RAT 1/4 312 381 .. 1 77 [] 34.0 1.2e-05 -P70333 3/3 291 359 .. 1 77 [] 34.0 1.2e-05 -ROH2_HUMAN 3/3 291 359 .. 1 77 [] 34.0 1.2e-05 -Q13148 2/2 193 257 .. 1 77 [] 33.4 1.9e-05 -YNR5_YEAST 1/2 127 214 .. 1 77 [] 33.1 2.2e-05 -YQOA_CAEEL 1/1 114 196 .. 1 77 [] 33.1 2.4e-05 -O42254 2/2 83 151 .. 1 77 [] 33.0 2.4e-05 -O01806 1/1 112 182 .. 1 77 [] 32.9 2.5e-05 -YIS9_YEAST 1/1 30 101 .. 1 77 [] 32.9 2.7e-05 -Q18999 3/4 392 459 .. 1 77 [] 32.7 3.1e-05 -RN24_SCHPO 1/2 107 196 .. 1 77 [] 32.3 3.9e-05 -Q18220 1/1 420 491 .. 1 77 [] 32.3 4e-05 -Q18219 1/1 420 491 .. 1 77 [] 32.3 4e-05 -YHH5_YEAST 2/3 201 269 .. 1 77 [] 31.9 5.2e-05 -PTB_MOUSE 4/4 452 520 .. 1 77 [] 31.8 5.5e-05 -PTB_RAT 4/4 480 548 .. 1 77 [] 31.8 5.5e-05 -Q63568 4/4 481 549 .. 1 77 [] 31.8 5.5e-05 -ROU2_HUMAN 1/1 174 224 .. 1 77 [] 31.8 5.6e-05 -IF32_HUMAN 1/1 195 271 .. 1 77 [] 31.4 7.4e-05 -Q41988 1/1 18 66 .] 1 77 [] 31.0 9.7e-05 -Q06106 3/5 534 599 .. 1 77 [] 30.9 0.0001 -YBF1_YEAST 1/1 87 158 .. 1 77 [] 30.9 0.0001 -O42254 1/2 4 70 .. 1 77 [] 30.7 0.00012 -Q09542 2/2 191 261 .. 1 77 [] 30.6 0.00013 -Q22318 1/2 91 161 .. 1 77 [] 30.3 0.00016 -ROH2_HUMAN 1/3 13 85 .. 1 77 [] 30.2 0.00017 -LU15_HUMAN 1/2 100 173 .. 1 77 [] 29.9 0.0002 -O23288 1/2 8 72 .. 1 77 [] 29.6 0.00026 -Q18601 1/2 37 97 .. 1 77 [] 29.4 0.0003 -P70333 1/3 13 85 .. 1 77 [] 29.3 0.00032 -U2AG_DROME 1/1 51 144 .. 1 77 [] 29.0 0.00038 -YN26_YEAST 1/2 47 125 .. 1 77 [] 29.0 0.00039 -U2AG_SCHPO 1/1 73 136 .. 1 77 [] 28.9 0.00043 -GRF1_HUMAN 1/3 96 168 .. 1 77 [] 28.7 0.00048 -O13845 3/3 509 586 .. 1 77 [] 28.7 0.00049 -Q14136 1/2 212 285 .. 1 77 [] 28.4 0.00058 -Q38915 2/2 249 318 .. 1 77 [] 28.4 0.0006 -IF32_SCHPO 1/1 41 124 .. 1 77 [] 28.3 0.00063 -NUCL_HUMAN 1/4 308 377 .. 1 77 [] 28.2 0.00066 -PRT1_PICAN 1/1 39 115 .. 1 77 [] 28.2 0.00068 -ROL_HUMAN 1/3 73 140 .. 1 77 [] 27.8 0.0009 -Q19018 2/2 230 298 .. 1 77 [] 27.7 0.00096 -Q22039 2/2 230 298 .. 1 77 [] 27.7 0.00096 -O35737 1/3 13 85 .. 1 77 [] 27.5 0.0011 -ROH1_HUMAN 1/3 13 85 .. 1 77 [] 27.5 0.0011 -P87058 1/1 156 238 .. 1 77 [] 27.4 0.0012 -PTB_PIG 4/4 482 550 .. 1 77 [] 27.3 0.0012 -ARP_YEAST 1/1 228 317 .. 1 77 [] 27.3 0.0012 -O13362 1/1 156 238 .. 1 77 [] 27.3 0.0013 -O35326 2/2 110 177 .. 1 77 [] 27.2 0.0013 -PTB_HUMAN 4/4 456 524 .. 1 77 [] 27.0 0.0015 -MAT3_HUMAN 1/1 70 138 .. 1 77 [] 27.0 0.0015 -O35833 2/2 498 566 .. 1 77 [] 27.0 0.0015 -MAT3_RAT 2/2 498 566 .. 1 77 [] 27.0 0.0015 -O04554 1/1 353 421 .. 1 77 [] 26.9 0.0016 -Q26457 1/1 143 219 .. 1 77 [] 26.2 0.0026 -Q20414 2/2 261 333 .. 1 77 [] 26.1 0.0029 -O15047 1/1 96 167 .. 1 77 [] 25.8 0.0035 -NOT4_YEAST 1/1 139 227 .. 1 77 [] 25.7 0.0039 -Q14136 2/2 383 459 .. 1 77 [] 25.6 0.0041 -YA2B_SCHPO 1/4 117 183 .. 1 77 [] 25.5 0.0045 -P70501 2/2 225 302 .. 1 77 [] 25.0 0.0064 -JSN1_YEAST 1/1 342 421 .. 1 77 [] 24.6 0.0081 -YAC4_SCHPO 1/1 118 197 .. 1 77 [] 24.6 0.0083 -P97343 1/1 344 400 .. 1 77 [] 24.4 0.0094 -Q63285 1/1 345 401 .. 1 77 [] 24.4 0.0094 -O18254 1/1 52 118 .. 1 77 [] 24.3 0.0099 -Q08925 4/4 524 599 .. 1 77 [] 24.3 0.01 -Q21322 1/2 10 77 .. 1 77 [] 24.1 0.012 -Q26692 1/3 6 69 .. 1 77 [] 24.1 0.012 -O22314 2/2 121 192 .. 1 77 [] 24.0 0.013 -O22315 2/2 121 192 .. 1 77 [] 24.0 0.013 -Q18717 1/1 110 188 .. 1 77 [] 23.6 0.016 -O15759 1/2 77 140 .. 1 77 [] 23.3 0.019 -O15758 1/2 59 122 .. 1 77 [] 23.3 0.019 -O13620 3/5 508 573 .. 1 77 [] 23.1 0.019 -Q60745 1/2 1 61 [. 1 77 [] 23.1 0.019 -YKV4_YEAST 1/1 66 133 .. 1 77 [] 23.0 0.02 -U2AF_SCHPO 2/2 440 504 .. 1 77 [] 22.8 0.021 -Q08287 1/1 9 78 .. 1 77 [] 22.5 0.022 -IF4B_YEAST 1/1 103 178 .. 1 77 [] 22.4 0.023 -Q05519 1/1 35 107 .. 1 77 [] 22.3 0.023 -Q26273 1/1 1 44 [] 1 77 [] 22.3 0.023 -O13741 1/2 166 257 .. 1 77 [] 22.2 0.023 -Q14966 1/1 678 746 .. 1 77 [] 22.2 0.024 -ROL_HUMAN 2/3 163 232 .. 1 77 [] 22.2 0.024 -O15758 2/2 140 204 .. 1 77 [] 21.5 0.028 -Q20966 1/1 276 358 .. 1 77 [] 21.4 0.028 -Q24433 1/1 578 654 .. 1 77 [] 21.3 0.029 -O22855 3/3 363 431 .. 1 77 [] 21.2 0.03 -P91156 1/1 275 342 .. 1 77 [] 21.1 0.03 -Q12221 1/1 318 397 .. 1 77 [] 20.6 0.035 -Q04142 1/1 318 397 .. 1 77 [] 20.6 0.035 -GRF1_HUMAN 3/3 347 415 .. 1 77 [] 20.4 0.036 -O15759 2/2 158 222 .. 1 77 [] 20.4 0.036 -Q23121 2/2 131 197 .. 1 77 [] 20.1 0.038 -LU15_HUMAN 2/2 233 310 .. 1 77 [] 19.5 0.044 -Q93021 1/1 117 194 .. 1 77 [] 19.5 0.044 -Q01491 4/4 595 665 .. 1 77 [] 19.3 0.047 -NRD1_YEAST 1/1 341 404 .. 1 77 [] 18.9 0.051 -SSB1_YEAST 2/2 188 269 .. 1 77 [] 18.8 0.053 -Q18999 1/4 120 193 .. 1 77 [] 18.7 0.053 -NAM8_YEAST 1/3 56 140 .. 1 77 [] 18.5 0.055 -Q93237 1/1 1 68 [. 1 77 [] 18.4 0.057 -Q18999 4/4 509 577 .. 1 77 [] 18.3 0.058 -YG5B_YEAST 2/3 297 413 .. 1 77 [] 17.7 0.068 -Q99628 3/3 446 526 .. 1 77 [] 17.2 0.075 -O01159 1/1 12 85 .. 1 77 [] 16.8 0.083 -YAX9_SCHPO 1/1 137 205 .. 1 77 [] 16.7 0.084 -P90727 3/3 391 474 .. 1 77 [] 16.5 0.089 -O13674 3/3 502 568 .. 1 77 [] 15.9 0.1 -Q21351 1/1 392 472 .. 1 77 [] 15.9 0.1 -LAH1_YEAST 1/1 125 211 .. 1 77 [] 15.8 0.1 -Q18601 2/2 128 195 .. 1 77 [] 14.6 0.14 -Q06477 1/1 62 128 .. 1 77 [] 14.5 0.14 -P90978 3/3 399 482 .. 1 77 [] 14.4 0.14 -Q18999 2/4 203 270 .. 1 77 [] 14.3 0.15 -U2R2_HUMAN 1/1 240 299 .. 1 77 [] 14.1 0.15 -U2R1_HUMAN 1/1 245 304 .. 1 77 [] 14.1 0.15 -P87143 1/2 73 146 .. 1 77 [] 13.7 0.17 -Q62019 1/2 3 67 .. 1 77 [] 13.6 0.17 -U2R2_MOUSE 1/1 236 303 .. 1 77 [] 13.4 0.18 -YN8T_YEAST 1/1 420 482 .. 1 77 [] 13.2 0.19 -O35404 1/1 877 948 .. 1 77 [] 13.1 0.19 -YLF1_CAEEL 2/2 180 244 .. 1 77 [] 13.1 0.2 -U2R1_MOUSE 1/1 215 290 .. 1 77 [] 13.1 0.2 -Q23391 1/1 179 249 .. 1 77 [] 12.9 0.2 -Q08925 3/4 433 499 .. 1 77 [] 12.9 0.21 -Q22708 2/2 124 199 .. 1 77 [] 12.8 0.21 -Q10458 1/1 3 77 .. 1 77 [] 12.6 0.22 -P78332 1/1 386 459 .. 1 77 [] 12.1 0.24 -Q61464 1/2 678 746 .. 1 77 [] 12.0 0.25 -Q18937 1/1 86 161 .. 1 77 [] 12.0 0.25 -O23612 1/1 4 82 .. 1 77 [] 11.5 0.28 -Q23953 1/1 250 313 .. 1 77 [] 11.5 0.28 -Q26548 1/1 1 43 [. 1 77 [] 11.2 0.3 -Q39201 2/2 121 192 .. 1 77 [] 11.2 0.3 -YLF1_CAEEL 1/2 60 144 .. 1 77 [] 11.2 0.3 -O15056 1/1 509 580 .. 1 77 [] 11.1 0.31 -U2AF_MOUSE 3/3 400 461 .. 1 77 [] 10.8 0.33 -U2AF_HUMAN 3/3 400 461 .. 1 77 [] 10.8 0.33 -P70166 1/1 312 402 .. 1 77 [] 10.6 0.35 -BF41_MOUSE 1/1 1 55 [. 1 77 [] 10.6 0.35 -Q24527 1/1 82 151 .. 1 77 [] 10.5 0.35 -O18964 1/1 907 966 .. 1 77 [] 10.2 0.38 -Q62504 1/1 124 193 .. 1 77 [] 9.2 0.48 -NOP4_YEAST 4/4 464 585 .. 1 77 [] 9.1 0.49 -Q91572 1/2 315 392 .. 1 77 [] 9.1 0.49 -YQO1_CAEEL 1/1 269 341 .. 1 77 [] 8.8 0.53 -Q17561 1/1 74 142 .. 1 77 [] 8.7 0.54 -Q60745 2/2 128 174 .] 1 77 [] 8.6 0.55 -YHS7_YEAST 1/1 159 231 .. 1 77 [] 8.6 0.55 -Q14206 1/1 13 78 .. 1 77 [] 8.6 0.55 -O35833 1/2 400 468 .. 1 77 [] 8.3 0.59 -MAT3_RAT 1/2 400 468 .. 1 77 [] 8.3 0.59 -P87143 2/2 211 288 .. 1 77 [] 8.2 0.6 -O04526 1/1 189 266 .. 1 77 [] 7.9 0.65 -Q08925 1/4 203 269 .. 1 77 [] 7.4 0.73 -Q17860 1/1 458 529 .. 1 77 [] 7.3 0.75 -Q92615 1/1 173 241 .. 1 77 [] 7.3 0.75 -Q14498 3/3 441 497 .. 1 77 [] 7.2 0.77 -Q14499 3/3 447 503 .. 1 77 [] 7.2 0.77 -YN26_YEAST 2/2 188 260 .. 1 77 [] 7.1 0.77 -ROF_HUMAN 1/3 13 85 .. 1 77 [] 7.0 0.79 -YA2B_SCHPO 3/4 325 391 .. 1 77 [] 6.5 0.91 -O22794 1/2 212 277 .. 1 77 [] 6.4 0.91 -Q08925 2/4 294 366 .. 1 77 [] 5.9 1 -Q24562 3/3 341 403 .. 1 77 [] 5.8 1.1 -YMC7_CAEEL 1/1 32 102 .. 1 77 [] 5.6 1.1 -Q12046 1/1 137 214 .. 1 77 [] 5.1 1.2 -Q07623 1/1 80 150 .. 1 77 [] 5.1 1.2 -Q10954 1/1 368 435 .. 1 77 [] 4.9 1.3 -Q26276 1/1 1 44 [] 1 77 [] 4.4 1.5 -O01835 1/1 297 390 .. 1 77 [] 4.2 1.5 -PES4_YEAST 4/4 395 466 .. 1 77 [] 4.2 1.5 -Q61464 2/2 904 971 .. 1 77 [] 4.1 1.6 -O01691 1/1 109 179 .. 1 77 [] 4.0 1.6 -NGR1_YEAST 1/3 36 159 .. 1 77 [] 4.0 1.6 -Q23452 1/1 213 280 .. 1 77 [] 3.9 1.6 -ARP2_PLAFA 1/2 26 107 .. 1 77 [] 3.9 1.6 -ROAB_ARTSA 1/1 36 95 .. 1 77 [] 3.8 1.7 -Q61954 2/2 102 153 .] 1 77 [] 3.3 1.9 -YN8V_YEAST 1/1 116 199 .. 1 77 [] 3.2 1.9 -PTB_MOUSE 3/4 337 413 .. 1 77 [] 3.1 2 -O23189 2/3 155 254 .. 1 77 [] 3.1 2 -Q26279 1/1 1 44 [] 1 77 [] 2.9 2.1 -ROL_HUMAN 3/3 353 420 .. 1 77 [] 2.9 2.1 -Q18317 1/1 277 347 .. 1 77 [] 2.6 2.2 -P70501 1/2 60 127 .. 1 77 [] 2.6 2.2 -MUD2_YEAST 1/1 425 510 .. 1 77 [] 1.9 2.6 -Q21559 1/1 76 144 .. 1 77 [] 1.2 3.1 -O29092 1/1 362 435 .. 1 77 [] 0.9 3.3 -O14797 2/2 107 173 .. 1 77 [] 0.6 3.5 -O13674 2/3 411 477 .. 1 77 [] 0.6 3.5 -O22794 2/2 375 460 .. 1 77 [] 0.2 3.9 -Q91572 2/2 434 504 .. 1 77 [] 0.0 4 -YG3Q_YEAST 1/1 22 90 .. 1 77 [] -0.2 4.3 -P70221 1/1 128 194 .. 1 77 [] -0.3 4.3 -Y051_NPVAC 1/1 225 284 .. 1 77 [] -0.9 5 -YHR9_YEAST 1/1 250 335 .. 1 77 [] -1.2 5.4 -O35847 1/1 17 83 .. 1 77 [] -2.7 7.6 -HIPO_CAMJE 1/1 254 322 .. 1 77 [] -3.7 9.4 -Q60701 1/1 85 143 .. 1 77 [] -3.7 9.5 -Q24424 1/1 2 43 .] 1 77 [] -3.9 10 -BLSA_HUMAN 1/1 149 248 .. 1 77 [] -4.2 11 -XE7_HUMAN 1/1 149 248 .. 1 77 [] -4.2 11 -YAQ2_SCHPO 1/1 313 388 .. 1 77 [] -6.5 18 -O00583 1/1 22 83 .. 1 77 [] -6.7 19 -O13838 1/1 172 248 .. 1 77 [] -7.5 23 -O05954 1/1 285 364 .. 1 77 [] -7.6 23 -YM28_YEAST 1/1 253 342 .. 1 77 [] -8.1 26 -Q19944 1/1 131 197 .. 1 77 [] -8.7 30 -Q26274 1/1 1 44 [] 1 77 [] -8.9 32 -O01886 1/1 37 121 .. 1 77 [] -9.2 34 -Q19164 1/1 14 84 .. 1 77 [] -9.2 34 -ASM4_YEAST 1/1 271 384 .. 1 77 [] -9.5 37 -Y117_HUMAN 1/1 189 227 .] 1 77 [] -10.0 41 -O00582 1/1 22 83 .. 1 77 [] -10.0 41 -O28580 1/1 244 322 .. 1 77 [] -10.9 50 -O35309 1/1 160 233 .. 1 77 [] -11.2 54 -O29837 1/1 267 359 .. 1 77 [] -11.7 61 -Q47952 1/1 101 196 .. 1 77 [] -12.8 78 -Q08646 1/1 284 354 .. 1 77 [] -13.0 82 -Q47957 1/1 101 188 .. 1 77 [] -13.1 84 -Q58954 1/1 21 96 .. 1 77 [] -13.2 86 -Q26278 1/1 1 44 [] 1 77 [] -13.5 92 -Q13287 1/1 159 233 .. 1 77 [] -14.9 1.3e+02 -TKTC_METJA 1/1 139 200 .. 1 77 [] -15.0 1.3e+02 -PR06_YEAST 1/1 599 668 .. 1 77 [] -15.1 1.3e+02 -KHK_HUMAN 1/1 166 223 .. 1 77 [] -15.5 1.5e+02 -PUR5_METJA 1/1 258 322 .. 1 77 [] -15.8 1.6e+02 -O31824 1/1 57 130 .. 1 77 [] -15.9 1.6e+02 -Q92518 1/1 2 48 .. 1 77 [] -16.0 1.6e+02 -YD33_SCHPO 1/1 14 90 .. 1 77 [] -16.1 1.7e+02 -Q09135 1/1 44 93 .. 1 77 [] -16.1 1.7e+02 -O30057 1/1 159 244 .. 1 77 [] -16.3 1.8e+02 -Q42378 1/1 140 193 .. 1 77 [] -16.7 1.9e+02 -Q42482 1/1 20 76 .. 1 77 [] -16.8 2e+02 -O35002 1/1 186 249 .. 1 77 [] -17.0 2.1e+02 -PGDS_RAT 1/1 447 515 .. 1 77 [] -17.1 2.1e+02 -YY08_METJA 1/1 879 966 .. 1 77 [] -17.2 2.2e+02 -Q48827 1/1 113 175 .. 1 77 [] -17.8 2.5e+02 -Q23637 1/1 311 382 .. 1 77 [] -17.9 2.5e+02 -Q48639 1/1 46 120 .. 1 77 [] -17.9 2.5e+02 -VJ01_VACCC 1/1 70 126 .. 1 77 [] -18.3 2.8e+02 -HBA_ARAAR 1/1 13 93 .. 1 77 [] -18.4 2.8e+02 -YAB9_SCHPO 1/1 241 300 .. 1 77 [] -18.4 2.8e+02 -P94393 1/1 303 365 .. 1 77 [] -18.7 3.1e+02 -Q26271 1/1 1 39 [] 1 77 [] -18.9 3.2e+02 -Q96423 1/1 42 105 .. 1 77 [] -18.9 3.2e+02 -O00373 1/1 181 248 .. 1 77 [] -19.3 3.5e+02 -YHB0_YEAST 1/1 115 191 .. 1 77 [] -19.4 3.6e+02 -VJ01_VACCV 1/1 70 126 .. 1 77 [] -19.4 3.6e+02 -SYH_METJA 1/1 329 397 .. 1 77 [] -19.8 3.9e+02 -Q12452 1/1 50 101 .. 1 77 [] -20.0 4.1e+02 -Q46102 1/1 39 109 .. 1 77 [] -20.2 4.3e+02 -O17002 1/1 192 242 .. 1 77 [] -20.3 4.4e+02 -Q19942 1/1 125 197 .. 1 77 [] -20.3 4.5e+02 -PHYA_SOLTU 1/1 822 898 .. 1 77 [] -20.4 4.5e+02 -P75023 1/1 228 280 .. 1 77 [] -20.4 4.5e+02 -SYF_METJA 1/1 84 188 .. 1 77 [] -20.4 4.6e+02 -Y447_METJA 1/1 76 149 .. 1 77 [] -20.6 4.7e+02 -Y383_METJA 1/1 182 261 .. 1 77 [] -20.6 4.8e+02 -Q94172 1/1 32 112 .. 1 77 [] -20.8 5e+02 -CHLL_CHLRE 1/1 172 245 .. 1 77 [] -20.9 5e+02 -Q00898 1/1 136 190 .. 1 77 [] -20.9 5.1e+02 -Q00897 1/1 136 190 .. 1 77 [] -20.9 5.1e+02 -A1A2_MOUSE 1/1 136 190 .. 1 77 [] -20.9 5.1e+02 -Q61283 1/1 125 179 .. 1 77 [] -20.9 5.1e+02 -Q85381 1/1 70 137 .. 1 77 [] -21.0 5.2e+02 -VJ01_VARV 1/1 70 137 .. 1 77 [] -21.0 5.2e+02 -O34784 1/1 466 555 .. 1 77 [] -21.0 5.2e+02 -MENE_HAEIN 1/1 340 416 .. 1 77 [] -21.1 5.3e+02 -T2C2_CHVP1 1/1 220 286 .. 1 77 [] -21.2 5.4e+02 -CARA_BACSU 1/1 100 156 .. 1 77 [] -21.2 5.4e+02 -SFCA_ECOLI 1/1 84 158 .. 1 77 [] -21.2 5.5e+02 -Q23267 1/1 351 414 .. 1 77 [] -21.5 5.8e+02 -DDLA_ECOLI 1/1 120 200 .. 1 77 [] -21.5 5.8e+02 -KLP1_CHLRE 1/1 171 249 .. 1 77 [] -21.5 5.8e+02 -Q26272 1/1 1 43 [] 1 77 [] -21.7 6.1e+02 -CPC3_RABIT 1/1 38 101 .. 1 77 [] -22.0 6.6e+02 -O29409 1/1 236 296 .. 1 77 [] -22.0 6.6e+02 -O34925 1/1 71 150 .. 1 77 [] -22.2 6.9e+02 -Q26281 1/1 1 39 [. 1 77 [] -22.4 7.2e+02 -Q25988 1/1 2 73 .. 1 77 [] -22.4 7.2e+02 -META_ECOLI 1/1 38 112 .. 1 77 [] -22.5 7.3e+02 -O04614 1/1 39 117 .. 1 77 [] -22.8 7.8e+02 -P95425 1/1 70 137 .. 1 77 [] -22.8 7.9e+02 -Q51350 1/1 70 137 .. 1 77 [] -22.8 7.9e+02 -P75431 1/1 1 78 [. 1 77 [] -22.9 8e+02 -O00364 1/1 181 249 .. 1 77 [] -22.9 8e+02 -PRE1_STAAU 1/1 89 166 .. 1 77 [] -22.9 8.1e+02 -Q96515 1/1 181 253 .. 1 77 [] -23.0 8.2e+02 -O27761 1/1 110 225 .. 1 77 [] -23.1 8.4e+02 -Q51783 1/1 70 137 .. 1 77 [] -23.2 8.6e+02 -A1A1_MOUSE 1/1 136 190 .. 1 77 [] -23.4 9e+02 -Q00896 1/1 136 190 .. 1 77 [] -23.4 9e+02 -Q20445 1/1 11 106 .. 1 77 [] -23.5 9.3e+02 -YNC8_CAEEL 1/1 3 52 .. 1 77 [] -23.5 9.3e+02 -Q57721 1/1 1 62 [. 1 77 [] -23.5 9.3e+02 -Y556_METJA 1/1 101 181 .. 1 77 [] -23.7 9.7e+02 -O28372 1/1 29 98 .. 1 77 [] -23.8 9.8e+02 -O00376 1/1 181 249 .. 1 77 [] -23.8 9.9e+02 -O00361 1/1 181 249 .. 1 77 [] -23.8 9.9e+02 -O00371 1/1 181 249 .. 1 77 [] -23.8 9.9e+02 -O00377 1/1 181 249 .. 1 77 [] -23.8 9.9e+02 -Q15605 1/1 181 249 .. 1 77 [] -23.8 9.9e+02 -Q12880 1/1 181 249 .. 1 77 [] -23.8 9.9e+02 -O00369 1/1 181 249 .. 1 77 [] -23.8 9.9e+02 -O00374 1/1 181 249 .. 1 77 [] -23.8 9.9e+02 -O00365 1/1 181 249 .. 1 77 [] -23.8 9.9e+02 - -Histogram of all scores: -score obs exp (one = represents 6 sequences) ------ --- --- - -67 2 0|= - -66 0 0| - -65 0 0| - -64 1 0|= - -63 0 0| - -62 4 0|= - -61 2 0|= - -60 5 0|= - -59 12 0|== - -58 3 0|= - -57 5 0|= - -56 7 3|*= - -55 16 12|=*= - -54 16 30|=== * - -53 20 61|==== * - -52 37 103|======= * - -51 42 148|======= * - -50 48 188|======== * - -49 42 216|======= * - -48 71 231|============ * - -47 61 232|=========== * - -46 64 222|=========== * - -45 96 204|================ * - -44 96 183|================ * - -43 100 159|================= * - -42 92 136|================ * - -41 97 114|================= * - -40 112 95|===============*=== - -39 109 78|============*====== - -38 95 64|==========*===== - -37 108 52|========*========= - -36 86 42|======*======== - -35 92 33|=====*========== - -34 89 27|====*========== - -33 81 21|===*========== - -32 61 17|==*======== - -31 64 13|==*======== - -30 34 11|=*==== - -29 25 8|=*=== - -28 33 7|=*==== - -27 14 5|*== - -26 30 4|*==== - -25 25 3|*==== - -24 22 2|*=== - -23 13 2|*== - -22 9 1|*= - -21 16 1|*== - -20 5 1|* - -19 6 0|= - -18 5 0|= - -17 6 0|= - -16 6 0|= - -15 1 0|= - -14 4 0|= - -13 1 0|= - -12 2 0|= - -11 2 0|= - -10 4 0|= - -9 3 0|= - -8 2 0|= - -7 2 0|= - -6 0 0| - -5 2 0|= - -4 3 0|= - -3 1 0|= - -2 1 0|= - -1 3 0|= - 0 1 0|= - 1 2 0|= - 2 2 0|= - 3 3 0|= - 4 4 0|= - 5 3 0|= - 6 1 0|= - 7 3 0|= - 8 4 0|= - 9 2 0|= - 10 4 0|= - 11 5 0|= - 12 3 0|= - 13 4 0|= - 14 3 0|= - 15 2 0|= - 16 3 0|= - 17 0 0| - 18 2 0|= - 19 1 0|= - 20 2 0|= - 21 4 0|= - 22 6 0|= - 23 1 0|= - 24 6 0|= - 25 2 0|= - 26 2 0|= - 27 5 0|= - 28 3 0|= - 29 1 0|= - 30 1 0|= - 31 4 0|= - 32 4 0|= - 33 1 0|= - 34 4 0|= - 35 6 0|= - 36 1 0|= - 37 5 0|= - 38 2 0|= - 39 1 0|= - 40 5 0|= - 41 0 0| - 42 6 0|= - 43 5 0|= - 44 2 0|= - 45 3 0|= - 46 2 0|= - 47 3 0|= - 48 3 0|= - 49 7 0|== - 50 2 0|= - 51 1 0|= - 52 3 0|= - 53 2 0|= - 54 4 0|= - 55 6 0|= - 56 1 0|= - 57 1 0|= - 58 7 0|== - 59 8 0|== - 60 6 0|= - 61 4 0|= - 62 1 0|= - 63 10 0|== - 64 2 0|= - 65 2 0|= - 66 4 0|= - 67 0 0| - 68 1 0|= - 69 2 0|= - 70 2 0|= - 71 7 0|== - 72 6 0|= - 73 0 0| - 74 4 0|= - 75 6 0|= - 76 2 0|= - 77 7 0|== - 78 6 0|= - 79 3 0|= - 80 9 0|== - 81 6 0|= - 82 4 0|= - 83 5 0|= - 84 1 0|= - 85 1 0|= - 86 7 0|== - 87 3 0|= - 88 3 0|= - 89 5 0|= - 90 5 0|= - 91 2 0|= - 92 4 0|= - 93 4 0|= -> 94 318 -|===================================================== - - -% Statistical details of theoretical EVD fit: - mu = -46.9346 - lambda = 0.2314 -chi-sq statistic = 1898.6975 - P(chi-square) = 0 - -Whole sequence top hits: -tophits_s report: - Total hits: 751 - Satisfying E cutoff: 751 - Total memory: 117K - -Domain top hits: -tophits_s report: - Total hits: 1215 - Satisfying E cutoff: 1215 - Total memory: 618K diff --git a/t/data/hmmsearch3.out b/t/data/hmmsearch3.out deleted file mode 100644 index 20589e467..000000000 --- a/t/data/hmmsearch3.out +++ /dev/null @@ -1,18 +0,0 @@ -# hmmsearch :: search profile(s) against a sequence database -# HMMER 3.0 (March 2010); http://hmmer.org/ -# Copyright (C) 2010 Howard Hughes Medical Institute. -# Freely distributed under the GNU General Public License (GPLv3). -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# query HMM file: Kv9.hmm -# target sequence database: /home/pboutet/Desktop/databases/nr_May26 -# max ASCII text line length: unlimited -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query: Kv9 [M=481] -Scores for complete sequences (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Sequence Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - 1.1e-260 874.4 2.2 1.2e-260 874.3 1.5 1.0 1 gi|281344220|gb|EFB19804.1| hypothetical protein PANDA_002980 [Ailuropoda melanoleuca] - 2.2e-259 870.1 2.8 2.5e-259 869.9 1.9 1.0 1 gi|296224438|ref|XP_002758060.1| PREDICTED: potassium voltage-gated channel subfamily S member 3 [Callithrix jacchus] -// diff --git a/t/data/hmmsearch3_multi.out b/t/data/hmmsearch3_multi.out deleted file mode 100644 index 1eb27d36c..000000000 --- a/t/data/hmmsearch3_multi.out +++ /dev/null @@ -1,211 +0,0 @@ -# hmmsearch :: search profile(s) against a sequence database -# HMMER 3.0 (March 2010); http://hmmer.org/ -# Copyright (C) 2010 Howard Hughes Medical Institute. -# Freely distributed under the GNU General Public License (GPLv3). -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# query HMM file: Pfam-A.hmm -# target sequence database: test_seqs.seq_raw.txt -# output directed to file: hmmsearch.output.txt -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query: 1-cysPrx_C [M=40] -Accession: PF10417.4 -Description: C-terminal domain of 1-Cys peroxiredoxin -Scores for complete sequences (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Sequence Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - - [No hits detected that satisfy reporting thresholds] - - -Domain annotation for each sequence (and alignments): - - [No targets detected that satisfy reporting thresholds] - - -Internal pipeline statistics summary: -------------------------------------- -Query model(s): 1 (40 nodes) -Target sequences: 3 (2141 residues) -Passed MSV filter: 0 (0); expected 0.1 (0.02) -Passed bias filter: 0 (0); expected 0.1 (0.02) -Passed Vit filter: 0 (0); expected 0.0 (0.001) -Passed Fwd filter: 0 (0); expected 0.0 (1e-05) -Initial search space (Z): 3 [actual number of targets] -Domain search space (domZ): 0 [number of targets reported over threshold] -# CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00.00 -# Mc/sec: 14.27 -// -Query: DUF4229 [M=69] -Accession: PF14012.1 -Description: Protein of unknown function (DUF4229) -Scores for complete sequences (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Sequence Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - ------ inclusion threshold ------ - 3 -17.8 37.0 0.019 3.0 2.5 4.3 5 lcl|Protein_ID1.3|M3 complement(48376..51420) - - -Domain annotation for each sequence (and alignments): ->> lcl|Protein_ID1.3|M3 complement(48376..51420) - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ? -4.3 1.7 1 3 34 40 .. 305 311 .. 277 337 .. 0.55 - 2 ? 2.6 2.1 0.0084 0.025 12 50 .. 382 418 .. 374 422 .. 0.69 - 3 ? -1.1 0.6 0.12 0.36 4 36 .. 841 874 .. 841 882 .. 0.52 - 4 ? 3.0 2.5 0.0064 0.019 7 59 .. 872 925 .. 870 929 .. 0.90 - 5 ? -0.7 0.9 0.087 0.26 31 50 .. 972 991 .. 937 1001 .. 0.56 - - Alignments for each domain: - == domain 1 score: -4.3 bits; conditional E-value: 1 - DUF4229 34 laallAl 40 - la+l A+ - lcl|Protein_ID1.3|M3 305 LAILSAI 311 - 3333332 PP - - == domain 2 score: 2.6 bits; conditional E-value: 0.0084 - DUF4229 12 fvvllavlvllgvligllvplllaallAlvvalplSfll 50 - + v ++ l gv l +p+ a++++++ +++lS + - lcl|Protein_ID1.3|M3 382 VFVPMF--SLTGVPRYLFIPMAEAVIFGMLSSFVLSQTF 418 - 333333..34455555449*****************776 PP - - == domain 3 score: -1.1 bits; conditional E-value: 0.12 - DUF4229 4 YtalRlglfvvllav..lvllgvligllvplllaa 36 - Y +l lgl++ + ++ lv+++ + +l+ p++++ - lcl|Protein_ID1.3|M3 841 YRGLSLGLVASIILIyfLVVVNFESWLD-PFVIIT 874 - 5566688888777774344444444444.554443 PP - - == domain 4 score: 3.0 bits; conditional E-value: 0.0064 - DUF4229 7 lRlglfvvllavlvllgvligll.vplllaallAlvvalplSflllrklRrrat 59 - + +l ++l++++++l + + l vp l a++ + va + S+l+ + R+r++ - lcl|Protein_ID1.3|M3 872 IITALPAALAGIVWMLYLTGTTLsVPALTGAIMCMGVATANSILVISFARERLA 925 - 56789999******9999999889*********************999999875 PP - - == domain 5 score: -0.7 bits; conditional E-value: 0.087 - DUF4229 31 plllaallAlvvalplSfll 50 - pl a++ +l++a + +++ - lcl|Protein_ID1.3|M3 972 PLGRAVIGGLLLATIATLIF 991 - 55555555555554444333 PP - - - -Internal pipeline statistics summary: -------------------------------------- -Query model(s): 1 (69 nodes) -Target sequences: 3 (2141 residues) -Passed MSV filter: 1 (0.333333); expected 0.1 (0.02) -Passed bias filter: 1 (0.333333); expected 0.1 (0.02) -Passed Vit filter: 1 (0.333333); expected 0.0 (0.001) -Passed Fwd filter: 1 (0.333333); expected 0.0 (1e-05) -Initial search space (Z): 3 [actual number of targets] -Domain search space (domZ): 1 [number of targets reported over threshold] -# CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00.00 -# Mc/sec: 16.41 -// -Query: ACR_tran [M=1021] -Accession: PF00873.14 -Description: AcrB/AcrD/AcrF family -Scores for complete sequences (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Sequence Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - 9.3e-189 616.9 10.5 1.2e-188 616.6 7.2 1.0 1 lcl|Protein_ID1.3|M3 complement(48376..51420) - - -Domain annotation for each sequence (and alignments): ->> lcl|Protein_ID1.3|M3 complement(48376..51420) - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 616.6 7.2 3.9e-189 1.2e-188 71 1021 .] 11 1000 .. 2 1000 .. 0.93 - - Alignments for each domain: - == domain 1 score: 616.6 bits; conditional E-value: 3.9e-189 - S-TTEEEEEEEETTSEEEEEEEESTTS-HHHHHHHHHHHHHHHGGGS-HHHHHH-EEEEEEECCECEEEEEEESSSTS-HHHHHHHH CS - ACR_tran 71 gldglkyvsSqSseglssitvtFedgtdidiArqqvqnrlqeaknkLPeevqepgiskiktssseilvlavtskdgsltktdlrdla 157 - ++ +++++SqS g + + F+ + di A+ qv++ q + +++P ++++p i +++ +il+la++sk l++ + dl - lcl|Protein_ID1.3|M3 11 TVNDIEHIESQSLFGYGIVKIFFQPDVDIRTANAQVTAISQTVLKQMPPGITPPLILNYNAATVPILQLALSSK--VLSEDRIFDLG 95 - 578899********************************************************************..*********** PP - - HHCTHHHHHTSTTEEEEEESS.--EEEEEEE-HHHHHCTT--HHHHHHHHHHHSSB-EEEECTT-SB-EEEE-SB---SCCHHCT-E CS - ACR_tran 158 esnikdqlsrveGVgdvqliGgsekavriwldpqklaklgltltdvvsalkeqnvqvaaGqlegqqeelliraqgrlqsaediekii 244 - ++ i++ql+ v G + +Gg+ ++++i ldpq++++ +++++dv++al++qn + G+ + + e+++++++ + ++++ - lcl|Protein_ID1.3|M3 96 QNFIRPQLATVRGSAVPSPYGGKVRQIQIDLDPQAMQSKRVSPDDVARALSQQNLVLSPGTEKIGSFEYNVKINDSPDEFTLLNNLP 182 - *************************************************************************************** PP - - EEETTSEEEEHHHCEEEEEEESSSS-EEEETTCEEEEEEEEEETTSBHHHHHHHHHHHHHCCGGGSSTTEEEEEEEESHHHHHHHHH CS - ACR_tran 245 vksqdgskvrlrDvAkvelgaeeeriaatlngkpavllavkklpganaievvkavkekleelketlPegveivvvydttefvrasie 331 - +k+ g + ++DvA+v +g + ++++ +g vl+++ k ++++++ ke +++lketlP+++ ++vv d++ fv+++i+ - lcl|Protein_ID1.3|M3 183 IKNVGGVTIFIHDVAHVRDGFPPQINVVRDDGRRSVLMTILKNGATSTLDIIQGTKELIPKLKETLPNNLVLKVVGDQSIFVKSAIS 269 - *************************************************************************************** PP - - HHHHHHHHHHHHHHHHHHHHHSSHCCCHHHHHHHHHHHHHHHHHHHHTT--EEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCSS CS - ACR_tran 332 eVvktlleaivLvvlvlflFLqnlratlipaiavPlsllgtfavlkalglsiNlltlfgLvlAiGlvvDdAiVvvEnverkleeege 418 - Vv + +a +L ++++lFL+++r+t+i+ +++Pl++l ++++l++ g ++N++tl+gL+lA+G++vDdA Vv+En+ +le+ g+ - lcl|Protein_ID1.3|M3 270 GVVREGTIAGILTSVMILLFLGSWRSTIIISMSIPLAILSAIIFLSLTGNTLNVMTLGGLALAVGMLVDDATVVIENINHHLEM-GK 355 - ************************************************************************************.** PP - - -HHHHHHHHHHHHCCHHHHHHHHHHHHCCGGGGSBHHHHHHHHHHHHHHHHHHHHHHHHHHCCHHHHHHHCS----TT-CC...... CS - ACR_tran 419 kpleaalksmkeiegalvaialvllavfvPilflgGveGklfrqfaltivlaillsvlvaltltPalcallLkarkeekek...... 499 - +a++ ++++i + + ++l++++vfvP+++l+Gv lf ++a ++++ +l s +++ t++P ++ lLk + ++ ++ - lcl|Protein_ID1.3|M3 356 PTTKAIIDAARQIIQPALVSTLSICIVFVPMFSLTGVPRYLFIPMAEAVIFGMLSSFVLSQTFVPTVANKLLKYQTQHFKHehhtda 442 - *************************************************************************8776544446799* PP - - ........................CHHHHHHHHHHHHHHHHHHHHHHHHHSCHHHHHHHHHHHHH.HHHHHCCS-BESS----TSEE CS - ACR_tran 500 ........................gffrefnrlfdalerrYekllekvlrhravvllvalllvvg.slllfvripkeflPeedegvl 561 - ++ + f++ f ++ Y+ +l++ l hr+ ++++l +v++ ++ lf+ ++k+f+Pe d g++ - lcl|Protein_ID1.3|M3 443 hrpehdpnfkvhrsvkasifqffiNIQQGFEKRFTKVRLVYRSILHFALDHRKKFITLFLGFVIVsCVTLFPLLGKNFFPEVDSGDM 529 - *******************9655555578*************************999999887775899****************** PP - - EEEEE-STTC-HHHHHHHHHHHHHHHH...TTTTEEEEEEEESESSSS..E........CTTEEEEEEEE--CTTS-SCCCSHHHHH CS - ACR_tran 562 vtsvqlppgvsleqtekvlkqvekilk...ekpevesvfavtGfafagdta........gqnsakvfisLkpekerkeeektveali 637 - ++++++ g+ +e+t+k + +e++++ ++e + ++ G + +g + g++ +++ i+L ++ ++ ++ + - lcl|Protein_ID1.3|M3 530 KIHIRVQVGTRIEETAKQFDLIENTIRrlvPQNELDTIVDNIGLSVSGINTaysstgtiGPQDGDILIHLNEN------HHPTKEYM 610 - ************************8875446889999999999888774331111111134445555555444......45688999 PP - - HHHHHHC.CTSTSSEEEEEE-SSSCCCSSSSSEEEEEEE.TSSSCHHHHHHHHHHHHHHHCCSTTEECEEESS-S-EEEEEEEE-HH CS - ACR_tran 638 erlrkel.ekikganvellapiqlreletlsgvrlelqvklfgddleaLseareqllaalkqlpeladvrseqqedepqlqvkidre 723 - ++lr+ l ++++g++ +++ p +++ + gv + ++ + g ++++ + ++++l+ ++++p++ad+r++q ++ pq++v+idr - lcl|Protein_ID1.3|M3 611 KKLRETLpRAFPGVS-FAFLPADITSQILNFGVPAPIDIRVDGPNHDNNLKFVRAILKDIRNVPGIADLRVQQATNYPQFNVDIDRS 696 - 999999945678887.7888999*999************************************************************ PP - - HHHHCTB-HHHHHHHHHHHHT-..EEEEEEEETTE...EEEEEEEE-GGGSSSGGGGCC-EEEETTSE.EEECGGCEEEEEEEE-SE CS - ACR_tran 724 kaaalGvsiadinetlstalgg..syvndfieegr...vvkvvvqleedlrsspedlkklyvrnkkgk.mvplsavakieeekgpns 804 - +a+++G++ di + l + l g +++ +f +++ + +v+q+++ + +s+ dl+++++++k++ m l+ + +ie+ ++ + - lcl|Protein_ID1.3|M3 697 QAKNYGLTEGDITNSLVATLAGtsQVAPTFWLNNKngvSYPIVIQMPQYKINSLADLANIPITTKESSsMQVLGGLGSIERDQSDSV 783 - ************8877666655434556776544422279***********************998764889*************** PP - - EEEETTCEEEEEEEEESTTS...-HHHHHHHHHHCCTT..SSTTEEEEEECHHHHHHHHCCCHHHHHHHHHHHHHHHHHHHCTSSST CS - ACR_tran 805 ierenglrsveisgevaegd...slgeaeeavekiakqvklPagvgiewtglseqeqeagnsllllvalalllvflvLaalyeslsd 888 - i+++n ++s+ i ++++ +d ++g++e+++++ +++ lP+gv+++ +g+ q ++ l+l ++++++l++++ + +es++d - lcl|Protein_ID1.3|M3 784 ISHYNIKPSFDIFASLQGRDlgsISGDIETIIQHHHQE--LPKGVSVKLQGQVPIMQDSYRGLSLGLVASIILIYFLVVVNFESWLD 868 - ****************8876222578999999999888..*********************************************** PP - - CHHHHTTHHHHHHHHHHHHHHTT--BSHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCTTTBHHHHHHHHHHHHCHHHHHHHHHHHHH CS - ACR_tran 889 pllvlltvPlalvGallalllrglelsviaqvGlilliGlavkNailivefakelrekeglsleeAileaaklRLrPiLMTalaail 975 - p+++++ +P al+G + l+l+g++lsv a+ G i+ +G+a N il+++fa+e + ++ +A+lea+ +R+rP+LMTa a+++ - lcl|Protein_ID1.3|M3 869 PFVIITALPAALAGIVWMLYLTGTTLSVPALTGAIMCMGVATANSILVISFARERLA-IVKDSTQAALEAGYTRFRPVLMTASAMLI 954 - *********************************************************.888899*********************** PP - - CCHHHHTT-STTHHHHHHHHHHHHHHHHHHHHCHHHHHHHHHHHHH CS - ACR_tran 976 GvlPLalstGaGselqqplgivvlGGlvtstvLtlllvPvlYvlva 1021 - G++P+al+ G+G e plg +v+GGl+++t+ tl +vPv++ +v+ - lcl|Protein_ID1.3|M3 955 GMIPMALGLGDGGEQNAPLGRAVIGGLLLATIATLIFVPVVFSVVH 1000 - ******************************************9997 PP - - - -Internal pipeline statistics summary: -------------------------------------- -Query model(s): 1 (1021 nodes) -Target sequences: 3 (2141 residues) -Passed MSV filter: 1 (0.333333); expected 0.1 (0.02) -Passed bias filter: 1 (0.333333); expected 0.1 (0.02) -Passed Vit filter: 1 (0.333333); expected 0.0 (0.001) -Passed Fwd filter: 1 (0.333333); expected 0.0 (1e-05) -Initial search space (Z): 3 [actual number of targets] -Domain search space (domZ): 1 [number of targets reported over threshold] -# CPU time: 0.17u 0.03s 00:00:00.20 Elapsed: 00:00:00.19 -# Mc/sec: 11.27 -// diff --git a/t/data/nhmmer-3.1.out b/t/data/nhmmer-3.1.out deleted file mode 100644 index b799943ed..000000000 --- a/t/data/nhmmer-3.1.out +++ /dev/null @@ -1,63 +0,0 @@ -# nhmmer :: search a DNA model or alignment against a DNA database -# HMMER 3.1b1 (May 2013); http://hmmer.org/ -# Copyright (C) 2013 Howard Hughes Medical Institute. -# Freely distributed under the GNU General Public License (GPLv3). -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# query file: ../HMMs/A_HA_H7_CDS_nucleotide.hmm -# target sequence database: tmp.fa -# number of worker threads: 4 -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query: A_HA_H7_CDS_nucleotide [M=1683] -Scores for complete hits: - E-value score bias Sequence start end Description - ------- ------ ----- -------- ----- ----- ----------- - 3.2e-48 148.2 5.7 seq1 1 151 Description of seq1 - 3.9e-15 38.6 0.4 seq2 59 1 Description of seq2 - - -Annotation for each hit (and alignments): ->> seq1 Description of seq1 - score bias Evalue hmmfrom hmm to alifrom ali to envfrom env to sq len acc - ------ ----- --------- ------- ------- --------- --------- --------- --------- --------- ---- - ! 148.2 5.7 3.2e-48 258 411 .. 1 151 [] 1 151 [] 151 0.98 - - Alignment: - score: 148.2 bits - A_HA_H7_CDS_nucleotide 258 attcctagaattttcagctgatttaattattgagaggcgagaaggaagtaatgatgtctgttatcctgggaaattcgtaaatgaaga 344 - attcctagaattttcagc+gatttaattattgagaggcgagaaggaagt gatgtctgttatcctgggaaattcgt+aatgaaga - seq1 1 ATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGT---GATGTCTGTTATCCTGGGAAATTCGTGAATGAAGA 84 - 689*******************************************777...*********************************** PP - - A_HA_H7_CDS_nucleotide 345 agctctgaggcaaattctcagggggtcaggcggaattgacaaggagacaatgggattcacatatagc 411 - agctctgaggcaaattctcaggg+gtcaggcggaattgacaaggagacaatgggattcac+ta+agc - seq1 85 AGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACCTACAGC 151 - ****************************************************************986 PP - ->> seq2 Description of seq2 - score bias Evalue hmmfrom hmm to alifrom ali to envfrom env to sq len acc - ------ ----- --------- ------- ------- --------- --------- --------- --------- --------- ---- - ! 38.6 0.4 3.9e-15 34 92 .. 59 1 .. 60 1 .. 60 0.97 - - Alignment: - score: 38.6 bits - A_HA_H7_CDS_nucleotide 34 gtgatgattgcaacaaatgcagacaaaatctgccttgggcaccatgctgtgtcaaacgg 92 - g+gat+att+c+acaaatgcagacaa atctgccttgggca+catgc+gtgtcaaacgg - seq2 59 GCGATCATTCCGACAAATGCAGACAAGATCTGCCTTGGGCATCATGCCGTGTCAAACGG 1 - 6899****************************************************986 PP - - - -Internal pipeline statistics summary: -------------------------------------- -Query model(s): 1 (1683 nodes) -Target sequences: 2 (422 residues searched) -Residues passing SSV filter: 211 (0.5); expected (0.02) -Residues passing bias filter: 211 (0.5); expected (0.02) -Residues passing Vit filter: 211 (0.5); expected (0.003) -Residues passing Fwd filter: 211 (0.5); expected (3e-05) -Total number of hits: 2 (0.498) -# CPU time: 0.02u 0.01s 00:00:00.03 Elapsed: 00:00:00.30 -# Mc/sec: 2.37 -// -[ok] diff --git a/t/data/pfamOutput-bug3376.out b/t/data/pfamOutput-bug3376.out deleted file mode 100644 index 39a84f90b..000000000 --- a/t/data/pfamOutput-bug3376.out +++ /dev/null @@ -1,38 +0,0 @@ -hmmpfam - search one or more sequences against HMM database -HMMER 2.3.2 (Oct 2003) -Copyright (C) 1992-2003 HHMI/Washington University School of Medicine -Freely distributed under the GNU General Public License (GPL) -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -HMM file: testInput.hmm -Sequence file: testInput.fasta -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query sequence: Test -Accession: [none] -Description: [none] - -Scores for sequence family classification (score includes all domains): -Model Description Score E-value N --------- ----------- ----- ------- --- -TEST 184.7 2.5e-56 1 - -Parsed for domains: -Model Domain seq-f seq-t hmm-f hmm-t score E-value --------- ------- ----- ----- ----- ----- ----- ------- -TEST 1/1 8 97 .] 1 95 [] 184.7 2.5e-56 - -Alignments of top-scoring domains: -TEST: domain 1 of 1, from 8 to 97: score 184.7, E = 2.5e-56 - *->svfqqqqssksttgstvtAiAiAigYRYRYRAvtWnsGsLssGvnDn - sv+qqqq+ + +vtAiAiAigYRYRYRAv Wn GsLs G nDn - Test 8 SVYQQQQGGSA----MVTAIAIAIGYRYRYRAVVWNKGSLSTGTNDN 50 - - DnDqqsdgLYtiYYsvtvpssslpsqtviHHHaHkasstkiiikiePr<- - DnDq +d LYtiYYsvtv +ss+p q+v+HHHaH+asstkiiiki P - Test 51 DNDQAAD-LYTIYYSVTVSASSWPGQSVTHHHAHPASSTKIIIKIAPS 97 - - * - - Test - - - -// diff --git a/t/data/phmmer.out b/t/data/phmmer.out deleted file mode 100644 index c07f86050..000000000 --- a/t/data/phmmer.out +++ /dev/null @@ -1,183 +0,0 @@ -# phmmer :: search a protein sequence against a protein database -# HMMER 3.1b1 (May 2013); http://hmmer.org/ -# Copyright (C) 2013 Howard Hughes Medical Institute. -# Freely distributed under the GNU General Public License (GPLv3). -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# query sequence file: /tmp/task-hmmsearch-TCHn9R/ZRkI2xwebU -# target sequence database: /cath/data/v4_0_0/release_data/CathDomainSeqs.COMBS -# output directed to file: /tmp/phmmer.out -# sequence reporting threshold: E-value <= 0.01 -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Query: A0R3R7 [L=762] -Scores for complete sequences (score includes all domains): - --- full sequence --- --- best 1 domain --- -#dom- - E-value score bias E-value score bias exp N Sequence Description - ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- - 2.1e-55 191.8 3.7 2.3e-55 191.7 3.7 1.0 1 cath|4_0_0|1vs0A03/639-759 - 2.1e-55 191.8 3.7 2.3e-55 191.7 3.7 1.0 1 cath|4_0_0|1vs0B03/639-759 - 1.9e-46 162.2 0.2 2.1e-46 162.1 0.2 1.0 1 cath|4_0_0|1vs0A02/484-593 - 1.9e-46 162.2 0.2 2.1e-46 162.1 0.2 1.0 1 cath|4_0_0|1vs0B02/484-593 - 5.8e-17 64.7 1.4 7.1e-10 41.3 0.1 2.0 2 cath|4_0_0|1vs0A01/453-483_594-638 - 5.8e-17 64.7 1.4 7.1e-10 41.3 0.1 2.0 2 cath|4_0_0|1vs0B01/453-483_594-638 - 6.4e-07 31.5 0.1 8.1e-07 31.2 0.1 1.0 1 cath|4_0_0|2cfmA03/422-561 - 0.00065 21.6 0.0 0.00069 21.5 0.0 1.0 1 cath|4_0_0|1x9nA03/571-696 - - -Domain annotation for each sequence (and alignments): ->> cath|4_0_0|1vs0A03/639-759 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 191.7 3.7 7.9e-60 2.3e-55 642 761 .. 1 120 [. 1 121 [] 0.99 - - Alignments for each domain: - == domain 1 score: 191.7 bits; conditional E-value: 7.9e-60 - A0R3R7 642 wntqevviggwrqgeggrssgigalvlgipgpeglqfvgrvgtgftekelsklkdmlkplhtdespfnaplpkvdargvtfvr 724 - wntqevviggwr geggrssg+g+l+ gipgp glqf grvgtg++e+el+ lk+ l plhtdespf+ plp da+g+t+v+ - cath|4_0_0|1vs0A03/639-759 1 WNTQEVVIGGWRAGEGGRSSGVGSLLXGIPGPGGLQFAGRVGTGLSERELANLKEXLAPLHTDESPFDVPLPARDAKGITYVK 83 - *********************************************************************************** PP - - A0R3R7 725 pelvgevrysertsdgrlrqpswrglrpdktpdevvw 761 - p lv+evryse t +grlrq swrglrpdk p evv - cath|4_0_0|1vs0A03/639-759 84 PALVAEVRYSEWTPEGRLRQSSWRGLRPDKKPSEVVR 120 - ***********************************96 PP - ->> cath|4_0_0|1vs0B03/639-759 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 191.7 3.7 7.9e-60 2.3e-55 642 761 .. 1 120 [. 1 121 [] 0.99 - - Alignments for each domain: - == domain 1 score: 191.7 bits; conditional E-value: 7.9e-60 - A0R3R7 642 wntqevviggwrqgeggrssgigalvlgipgpeglqfvgrvgtgftekelsklkdmlkplhtdespfnaplpkvdargvtfvr 724 - wntqevviggwr geggrssg+g+l+ gipgp glqf grvgtg++e+el+ lk+ l plhtdespf+ plp da+g+t+v+ - cath|4_0_0|1vs0B03/639-759 1 WNTQEVVIGGWRAGEGGRSSGVGSLLXGIPGPGGLQFAGRVGTGLSERELANLKEXLAPLHTDESPFDVPLPARDAKGITYVK 83 - *********************************************************************************** PP - - A0R3R7 725 pelvgevrysertsdgrlrqpswrglrpdktpdevvw 761 - p lv+evryse t +grlrq swrglrpdk p evv - cath|4_0_0|1vs0B03/639-759 84 PALVAEVRYSEWTPEGRLRQSSWRGLRPDKKPSEVVR 120 - ***********************************96 PP - ->> cath|4_0_0|1vs0A02/484-593 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 162.1 0.2 7e-51 2.1e-46 487 596 .. 1 110 [] 1 110 [] 0.99 - - Alignments for each domain: - == domain 1 score: 162.1 bits; conditional E-value: 7e-51 - A0R3R7 487 gyrviidadhgqlqirsrtgrevtgeypqfkalaadlaehhvvldgeavaldesgvpsfgqmqnrarstrvefwafdilwldg 569 - gyr++++adhg +++rsr+gr+vt+eypq++ala dla+hhvvldgeav ld sgvpsf+q qnr r trvefwafd+l+ldg - cath|4_0_0|1vs0A02/484-593 1 GYRLLVEADHGAVRLRSRSGRDVTAEYPQLRALAEDLADHHVVLDGEAVVLDSSGVPSFSQXQNRGRDTRVEFWAFDLLYLDG 83 - 8********************************************************************************** PP - - A0R3R7 570 rsllrakysdrrkilealadggglivp 596 - r+ll +y drrk+le la++ +l vp - cath|4_0_0|1vs0A02/484-593 84 RALLGTRYQDRRKLLETLANATSLTVP 110 - *********************999998 PP - ->> cath|4_0_0|1vs0B02/484-593 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 162.1 0.2 7e-51 2.1e-46 487 596 .. 1 110 [] 1 110 [] 0.99 - - Alignments for each domain: - == domain 1 score: 162.1 bits; conditional E-value: 7e-51 - A0R3R7 487 gyrviidadhgqlqirsrtgrevtgeypqfkalaadlaehhvvldgeavaldesgvpsfgqmqnrarstrvefwafdilwldg 569 - gyr++++adhg +++rsr+gr+vt+eypq++ala dla+hhvvldgeav ld sgvpsf+q qnr r trvefwafd+l+ldg - cath|4_0_0|1vs0B02/484-593 1 GYRLLVEADHGAVRLRSRSGRDVTAEYPQLRALAEDLADHHVVLDGEAVVLDSSGVPSFSQXQNRGRDTRVEFWAFDLLYLDG 83 - 8********************************************************************************** PP - - A0R3R7 570 rsllrakysdrrkilealadggglivp 596 - r+ll +y drrk+le la++ +l vp - cath|4_0_0|1vs0B02/484-593 84 RALLGTRYQDRRKLLETLANATSLTVP 110 - *********************999998 PP - ->> cath|4_0_0|1vs0A01/453-483_594-638 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 24.0 0.1 4.2e-09 0.00012 459 487 .. 7 35 .. 4 37 .. 0.93 - 2 ! 41.3 0.1 2.4e-14 7.1e-10 598 640 .. 36 78 .. 34 79 .] 0.96 - - Alignments for each domain: - == domain 1 score: 24.0 bits; conditional E-value: 4.2e-09 - A0R3R7 459 edfapmlategsvakykakqwafegkwdg 487 - +++ap lat g+va ka qwafeg wd - cath|4_0_0|1vs0A01/453-483_594-638 7 DNLAPXLATHGTVAGLKASQWAFEGXWDE 35 - 578************************95 PP - - == domain 2 score: 41.3 bits; conditional E-value: 2.4e-14 - A0R3R7 598 qlpgdgpeamehvrkkrfegvvakkwdstyqpgrrssswikdk 640 - lpgdg +a+ rk +egv+ak+ ds yqpgrr +sw+kdk - cath|4_0_0|1vs0A01/453-483_594-638 36 LLPGDGAQAFACSRKHGWEGVIAKRRDSRYQPGRRCASWVKDK 78 - 69****************************************8 PP - ->> cath|4_0_0|1vs0B01/453-483_594-638 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 24.0 0.1 4.2e-09 0.00012 459 487 .. 7 35 .. 4 37 .. 0.93 - 2 ! 41.3 0.1 2.4e-14 7.1e-10 598 640 .. 36 78 .. 34 79 .] 0.96 - - Alignments for each domain: - == domain 1 score: 24.0 bits; conditional E-value: 4.2e-09 - A0R3R7 459 edfapmlategsvakykakqwafegkwdg 487 - +++ap lat g+va ka qwafeg wd - cath|4_0_0|1vs0B01/453-483_594-638 7 DNLAPXLATHGTVAGLKASQWAFEGXWDE 35 - 578************************95 PP - - == domain 2 score: 41.3 bits; conditional E-value: 2.4e-14 - A0R3R7 598 qlpgdgpeamehvrkkrfegvvakkwdstyqpgrrssswikdk 640 - lpgdg +a+ rk +egv+ak+ ds yqpgrr +sw+kdk - cath|4_0_0|1vs0B01/453-483_594-638 36 LLPGDGAQAFACSRKHGWEGVIAKRRDSRYQPGRRCASWVKDK 78 - 69****************************************8 PP - ->> cath|4_0_0|2cfmA03/422-561 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 31.2 0.1 2.7e-11 8.1e-07 646 758 .. 8 117 .. 2 120 .. 0.80 - - Alignments for each domain: - == domain 1 score: 31.2 bits; conditional E-value: 2.7e-11 - A0R3R7 646 evviggwrqgeggrssgigalvlgipgpeglqf..vgrvgtgftekelsklkdmlkplhtdespfnapl.pkvdargvtfvrp 725 - ++vi g + geg r+ +g+++lg pe +f vg+vg+gft+ +l ++ lkpl e l pkv vt+ - cath|4_0_0|2cfmA03/422-561 8 DLVIIGAEWGEGRRAHLFGSFILGAYDPETGEFleVGKVGSGFTDDDLVEFTKXLKPLIIKEEGKRVWLqPKVV-IEVTYQ-- 87 - 456667789********************9888667**********************9988877776636664.446663.. PP - - A0R3R7 726 elvgevrysertsdgrlrqpswrglrpdktpde 758 - e+ +y s lr p + +lr dk p++ - cath|4_0_0|2cfmA03/422-561 88 EIQKSPKY---RSGFALRFPRFVALRDDKGPED 117 - 44444444...355689************9987 PP - ->> cath|4_0_0|1x9nA03/571-696 - # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc - --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- - 1 ! 21.5 0.0 2.3e-08 0.00069 494 585 .. 9 113 .. 1 124 [. 0.74 - - Alignments for each domain: - == domain 1 score: 21.5 bits; conditional E-value: 2.3e-08 - A0R3R7 494 adhgqlqirsrtgrevtgeypqfkalaadlae...hhvvldgeavalde..sgvpsfgqmqnrars........trvefwafd 563 - + g+++i sr ++ tg+yp + + + +ld eava d + f + r r +v ++afd - cath|4_0_0|1x9nA03/571-696 9 LEGGEVKIFSRNQEDNTGKYPDIISRIPKIKLpsvTSFILDTEAVAWDRekKQIQPFQVLTTRKRKevdaseiqVQVCLYAFD 91 - 36799*****************98776665430114679*******9962256889998888877422222211345589*** PP - - A0R3R7 564 ilwldgrsllrakysdrrkile 585 - +++l+g+sl+r s rr++l - cath|4_0_0|1x9nA03/571-696 92 LIYLNGESLVREPLSRRRQLLR 113 - ******************9985 PP - - - -Internal pipeline statistics summary: -------------------------------------- -Query model(s): 1 (762 nodes) -Target sequences: 235858 (38973128 residues searched) -Passed MSV filter: 6997 (0.0296662); expected 4717.2 (0.02) -Passed bias filter: 6066 (0.0257189); expected 4717.2 (0.02) -Passed Vit filter: 317 (0.00134403); expected 235.9 (0.001) -Passed Fwd filter: 9 (3.81586e-05); expected 2.4 (1e-05) -Initial search space (Z): 235858 [actual number of targets] -Domain search space (domZ): 8 [number of targets reported over threshold] -# CPU time: 4.10u 0.03s 00:00:04.13 Elapsed: 00:00:00.62 -# Mc/sec: 47899.23 -// -[ok] -- 2.11.4.GIT