1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin( -tests => 824 );
12 use_ok('Bio::SearchIO');
15 my $searchio = Bio::SearchIO->new(
17 -file => test_input_file('hmmpfam.out')
21 while ( $result = $searchio->next_result ) {
23 'Bio::Search::Result::HMMERResult',
24 'Check for the correct result reference type'
26 is( $result->algorithm, 'HMMPFAM', 'Check algorithm' );
27 is( $result->algorithm_version, '2.1.1', 'Check algorithm version' );
28 is( $result->hmm_name, 'pfam', 'Check hmm_name' );
29 is( $result->sequence_file,
30 '/home/birney/src/wise2/example/road.pep',
34 is( $result->query_name, 'roa1_drome', 'Check query_name' );
35 is( $result->query_length, 0, 'Check query_length absence' );
36 is( $result->query_description, '', 'Check query_description' );
37 is( $result->num_hits(), 2, 'Check num_hits' );
40 if ( defined( $hit = $result->next_model ) ) {
41 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
42 'Check for the correct hit reference type' );
43 is( $hit->name, 'SEED', 'Check hit name' );
44 is( $hit->description,
46 'Check for hit description'
48 is( $hit->raw_score, 146.1, 'Check hit raw_score' );
49 is( $hit->bits, 0, 'Check hit bits (0)' );
50 float_is( $hit->significance, 6.3e-40, 'Check hit significance' );
51 is( $hit->num_hsps, 1, 'Check num_hsps' );
53 # Query and Hit lengths are usually unknown in HMMER,
54 # but sometimes they can be deduced from domain data '[]'
55 is( $hit->length, 77, 'Check hit length' );
56 is( $hit->frac_aligned_query, undef );
57 is( $hit->frac_aligned_hit, '1.00' );
59 is( $hit->matches('cons'), 55, 'Check hit total conserved residues' );
60 is( $hit->matches('id'), 22, 'Check hit total identical residues' );
61 is( sprintf( "%.3f", $hit->frac_identical('query') ), '0.310' );
62 is( sprintf( "%.3f", $hit->frac_identical('hit') ), 0.286 );
63 is( sprintf( "%.3f", $hit->frac_identical('total') ), 0.282 );
64 is( sprintf( "%.3f", $hit->frac_conserved('query') ), 0.775 );
65 is( sprintf( "%.3f", $hit->frac_conserved('hit') ), 0.714 );
66 is( sprintf( "%.3f", $hit->frac_conserved('total') ), 0.705 );
68 if ( defined( $hsp = $hit->next_domain ) ) {
69 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
70 'Check for correct hsp reference type' );
71 is( $hsp->query->seq_id(), 'roa1_drome', 'Check for query seq_id' );
72 is( $hsp->hit->seq_id(), 'SEED', 'Check for hit seq_id' );
74 is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' );
75 is( $hsp->hit->end, 77, 'Check for hit hmm to value' );
76 is( $hsp->query->start, 33, 'Check for query alifrom value' );
77 is( $hsp->query->end, 103, 'Check for query ali to value' );
78 is( $hsp->score, 71.2, 'Check for hsp score' );
79 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
80 float_is( $hsp->evalue, 2.2e-17, 'Check for hsp c-Evalue' );
82 is( $hsp->length('query'), 71, 'Check for hsp query length' );
83 is( $hsp->length('hit'), 77, 'Check for hsp hit length' );
84 is( $hsp->length('total'), 78, 'Check for hsp total length' );
85 is( $hsp->gaps('query'), 7, 'Check for hsp query gaps' );
86 is( $hsp->gaps('hit'), 1, 'Check for hsp hit gaps' );
87 is( $hsp->gaps('total'), 8, 'Check for hsp total gaps' );
90 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
91 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
92 ($result->query_length == 0) ?
93 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
94 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
96 is( $hsp->num_conserved, 55 );
97 is( $hsp->num_identical, 22 );
98 is( sprintf( "%.2f", $hsp->percent_identity ), 28.21 );
99 is( sprintf( "%.3f", $hsp->frac_identical('query') ), '0.310' );
100 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.286 );
101 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.282 );
102 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.775 );
103 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.714 );
104 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.705 );
106 is( $hsp->query_string,
107 'LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP',
108 'Check for query string'
110 is( $hsp->hit_string,
111 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG-kelggrklrv',
112 'Check for hit string'
114 is( $hsp->homology_string,
115 'lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ',
116 'Check for homology string'
118 is( length( $hsp->homology_string ),
119 length( $hsp->hit_string ),
120 'Check if homology string and hit string have an equal length'
122 is( length( $hsp->query_string ),
123 length( $hsp->homology_string ),
124 'Check if query string and homology string have an equal length'
126 # This Hmmpfam don't have PP or CS strings, these are tests to check for side effects
127 is( $hsp->posterior_string, '' );
128 is( $hsp->consensus_string, '' );
131 if ( defined( $hit = $result->next_model ) ) {
132 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
133 'Check for the correct hit reference type' );
134 is( $hit->name, 'SEED', 'Check hit name' );
135 is( $hit->description, '', 'Check for hit description' );
136 is( $hit->raw_score, 146.1, 'Check hit raw_score' );
137 is( $hit->bits, 0, 'Check hit bits (0)' );
138 float_is( $hit->significance, 6.3e-040, 'Check hit significance' );
139 is( $hit->num_hsps, 1, 'Check num_hsps' );
141 # Query and Hit lengths are usually unknown in HMMER,
142 # but sometimes they can be deduced from domain data '[]'
143 is( $hit->length, 77, 'Check hit length' );
144 is( $hit->frac_aligned_query, undef );
145 is( $hit->frac_aligned_hit, '1.00' );
147 is( $hit->matches('cons'), 56, 'Check hit total conserved residues' );
148 is( $hit->matches('id'), 33, 'Check hit total identical residues' );
149 is( sprintf( "%.3f", $hit->frac_identical('query') ), 0.471 );
150 is( sprintf( "%.3f", $hit->frac_identical('hit') ), 0.429 );
151 is( sprintf( "%.3f", $hit->frac_identical('total') ), 0.429 );
152 is( sprintf( "%.3f", $hit->frac_conserved('query') ), '0.800' );
153 is( sprintf( "%.3f", $hit->frac_conserved('hit') ), 0.727 );
154 is( sprintf( "%.3f", $hit->frac_conserved('total') ), 0.727 );
156 if ( defined( $hsp = $hit->next_domain ) ) {
157 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
158 'Check for correct hsp reference type' );
159 is( $hsp->query->seq_id(), 'roa1_drome', 'Check for query seq_id' );
160 is( $hsp->hit->seq_id(), 'SEED', 'Check for hit seq_id' );
162 is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' );
163 is( $hsp->hit->end, 77, 'Check for hit hmm to value' );
164 is( $hsp->query->start, 124, 'Check for query alifrom value' );
165 is( $hsp->query->end, 193, 'Check for query ali to value' );
166 is( $hsp->score, 75.5, 'Check for hsp score' );
167 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
168 float_is( $hsp->evalue, 1.1e-18, 'Check for hsp c-Evalue' );
170 is( $hsp->length('query'), 70, 'Check for hsp query length' );
171 is( $hsp->length('hit'), 77, 'Check for hsp hit length' );
172 is( $hsp->length('total'), 77, 'Check for hsp total length' );
173 is( $hsp->gaps('query'), 7, 'Check for hsp query gaps' );
174 is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' );
175 is( $hsp->gaps('total'), 7, 'Check for hsp total gaps' );
177 ($hit->length == 0) ?
178 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
179 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
180 ($result->query_length == 0) ?
181 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
182 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
184 is( $hsp->num_conserved, 56 );
185 is( $hsp->num_identical, 33 );
186 is( sprintf( "%.2f", $hsp->percent_identity ), 42.86 );
187 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.471 );
188 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.429 );
189 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.429 );
190 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), '0.800' );
191 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.727 );
192 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.727);
194 is( $hsp->query_string,
195 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL--KQHQLNGKMVDV',
196 'Check for query string'
198 is( $hsp->hit_string,
199 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv',
200 'Check for hit string'
202 is( $hsp->homology_string,
203 'lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +GfaFVeF++++ ++k + ++l+g+ + v',
204 'Check for homology string'
206 is( length( $hsp->homology_string ),
207 length( $hsp->hit_string ),
208 'Check if homology string and hit string have an equal length'
210 is( length( $hsp->query_string ),
211 length( $hsp->homology_string ),
212 'Check if query string and homology string have an equal length'
219 $searchio = Bio::SearchIO->new(
221 -file => test_input_file('hmmsearch.out')
223 while ( $result = $searchio->next_result ) {
225 'Bio::Search::Result::HMMERResult',
226 'Check for the correct result reference type'
228 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
229 is( $result->algorithm_version, '2.0', 'Check algorithm version' );
230 is( $result->hmm_name, 'HMM [SEED]', 'Check hmm_name' );
231 is( $result->sequence_file, 'HMM.dbtemp.29591', 'Check sequence_file' );
232 is( $result->database_name, 'HMM.dbtemp.29591', 'Check database_name' );
234 is( $result->query_name, 'SEED', 'Check query_name' );
235 is( $result->query_length, 77, 'Check query_length' );
236 is( $result->query_description, '', 'Check query_description' );
237 is( $result->num_hits(), 1215, 'Check num_hits' );
239 my $hit = $result->next_model;
240 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
241 'Check for the correct hit reference type' );
242 is( $hit->name, 'Q91581', 'Check hit name' );
243 is( $hit->description,
244 'Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN',
245 'Check for hit description'
247 is( $hit->raw_score, 119.7, 'Check hit raw_score' );
248 is( $hit->bits, 0, 'Check hit bits (0)' );
249 float_is( $hit->significance, 2e-31, 'Check hit significance' );
250 is( $hit->num_hsps, 1, 'Check num_hsps' );
251 is( $hit->length, 0, 'Check hit length' );
253 my $hsp = $hit->next_domain;
254 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
255 'Check for correct hsp reference type' );
256 is( $hsp->query->seq_id(), 'SEED', 'Check for query seq_id' );
257 is( $hsp->hit->seq_id(), 'Q91581', 'Check for hit seq_id' );
259 is( $hsp->hit->start, 18, 'Check for hit hmmfrom value' );
260 is( $hsp->hit->end, 89, 'Check for hit hmm to value' );
261 is( $hsp->query->start, 1, 'Check for query alifrom value' );
262 is( $hsp->query->end, 77, 'Check for query ali to value' );
263 is( $hsp->score, 119.7, 'Check for hsp score' );
264 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
265 float_is( $hsp->evalue, 2e-31, 'Check for hsp c-Evalue' );
267 is( $hsp->length('query'), 77, 'Check for hsp query length' );
268 is( $hsp->length('hit'), 72, 'Check for hsp hit length' );
269 is( $hsp->length('total'), 0, 'Check for hsp total length' );
270 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
271 is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' );
272 is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' );
274 my $example_counter = 0;
275 while ($hit = $result->next_model) {
276 if ($hit->name eq 'Q61954') {
278 if ($example_counter == 1) {
279 # Query and Hit lengths are usually unknown in HMMER,
280 # but sometimes they can be deduced from domain data '[]'
281 is( $hit->length, 153, 'Check hit length' );
282 is( $hit->frac_aligned_query, '1.00' );
283 is( $hit->frac_aligned_hit, 0.42 );
285 $hsp = $hit->next_domain;
286 is( $hsp->query->seq_id(), 'SEED', 'Check for query seq_id' );
287 is( $hsp->hit->seq_id(), 'Q61954', 'Check for hit seq_id' );
289 is( $hsp->hit->start, 26, 'Check for hit hmmfrom value' );
290 is( $hsp->hit->end, 89, 'Check for hit hmm to value' );
291 is( $hsp->query->start, 1, 'Check for query alifrom value' );
292 is( $hsp->query->end, 77, 'Check for query ali to value' );
293 is( $hsp->score, 72.9, 'Check for hsp score' );
294 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
295 float_is( $hsp->evalue, 2.4e-17, 'Check for hsp c-Evalue' );
297 is( $hsp->length('query'), 77, 'Check for hsp query length' );
298 is( $hsp->length('hit'), 64, 'Check for hsp hit length' );
299 is( $hsp->length('total'), 0, 'Check for hsp total length' );
300 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
301 is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' );
302 is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' );
304 elsif ($example_counter == 2) {
305 # Query and Hit lengths are usually unknown in HMMER,
306 # but sometimes they can be deduced from domain data '[]'
307 is( $hit->length, 153, 'Check hit length' );
308 is( $hit->frac_aligned_query, '1.00' );
309 is( $hit->frac_aligned_hit, 0.34 );
311 $hsp = $hit->next_domain;
312 is( $hsp->query->seq_id(), 'SEED', 'Check for query seq_id' );
313 is( $hsp->hit->seq_id(), 'Q61954', 'Check for hit seq_id' );
315 is( $hsp->hit->start, 102, 'Check for hit hmmfrom value' );
316 is( $hsp->hit->end, 153, 'Check for hit hmm to value' );
317 is( $hsp->query->start, 1, 'Check for query alifrom value' );
318 is( $hsp->query->end, 77, 'Check for query ali to value' );
319 is( $hsp->score, 3.3, 'Check for hsp score' );
320 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
321 float_is( $hsp->evalue, 1.9, 'Check for hsp c-Evalue' );
323 is( $hsp->length('query'), 77, 'Check for hsp query length' );
324 is( $hsp->length('hit'), 52, 'Check for hsp hit length' );
325 is( $hsp->length('total'), 0, 'Check for hsp total length' );
326 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
327 is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' );
328 is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' );
336 $searchio = Bio::SearchIO->new(
338 -file => test_input_file('L77119.hmmer')
341 while ( $result = $searchio->next_result ) {
343 'Bio::Search::Result::HMMERResult',
344 'Check for the correct result reference type'
346 is( $result->algorithm, 'HMMPFAM', 'Check algorithm' );
347 is( $result->algorithm_version, '2.2g', 'Check algorithm version' );
348 is( $result->hmm_name, 'Pfam', 'Check hmm_name' );
349 is( $result->sequence_file, 'L77119.faa', 'Check sequence_file' );
351 is( $result->query_name,
352 'gi|1522636|gb|AAC37060.1|',
355 is( $result->query_length, 0, 'Check query_length absence' );
356 is( $result->query_description,
357 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]',
358 'Check query_description'
360 is( $result->num_hits(), 1, 'Check num_hits' );
362 my $hit = $result->next_hit;
363 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
364 'Check for the correct hit reference type' );
365 is( $hit->name, 'Methylase_M', 'Check hit name' );
366 is( $hit->description,
367 'Type I restriction modification system, M',
368 'Check for hit description'
370 is( $hit->raw_score, -105.2, 'Check hit raw_score' );
371 is( $hit->bits, 0, 'Check hit bits (0)' );
372 float_is( $hit->significance, 0.0022, 'Check hit significance' );
373 is( $hit->num_hsps, 1, 'Check num_hsps' );
375 # Query and Hit lengths are usually unknown in HMMER,
376 # but sometimes they can be deduced from domain data '[]'
377 is( $hit->length, 279, 'Check hit length' );
378 is( $hit->frac_aligned_query, undef );
379 is( $hit->frac_aligned_hit, '1.00' );
381 is( $hit->matches('cons'), 133, 'Check hit total conserved residues' );
382 is( $hit->matches('id'), 48, 'Check hit total identical residues' );
383 is( sprintf( "%.3f", $hit->frac_identical('query') ), 0.238 );
384 is( sprintf( "%.3f", $hit->frac_identical('hit') ), 0.172 );
385 is( sprintf( "%.3f", $hit->frac_identical('total') ), 0.171 );
386 is( sprintf( "%.3f", $hit->frac_conserved('query') ), 0.658 );
387 is( sprintf( "%.3f", $hit->frac_conserved('hit') ), 0.477 );
388 is( sprintf( "%.3f", $hit->frac_conserved('total') ), 0.475 );
390 my $hsp = $hit->next_hsp;
391 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
392 'Check for correct hsp reference type' );
393 is( $hsp->query->seq_id(), 'gi|1522636|gb|AAC37060.1|', 'Check for query seq_id' );
394 is( $hsp->hit->seq_id(), 'Methylase_M', 'Check for hit seq_id' );
396 is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' );
397 is( $hsp->hit->end, 279, 'Check for hit hmm to value' );
398 is( $hsp->query->start, 280, 'Check for query alifrom value' );
399 is( $hsp->query->end, 481, 'Check for query ali to value' );
400 is( $hsp->score, -105.2, 'Check for hsp score' );
401 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
402 float_is( $hsp->evalue, 0.0022, 'Check for hsp evalue' );
404 is( $hsp->length('query'), 202, 'Check for hsp query length' );
405 is( $hsp->length('hit'), 279, 'Check for hsp hit length' );
406 is( $hsp->length('total'), 280, 'Check for hsp total length' );
407 is( $hsp->gaps('query'), 78, 'Check for hsp query gaps' );
408 is( $hsp->gaps('hit'), 1, 'Check for hsp hit gaps' );
409 is( $hsp->gaps('total'), 79, 'Check for hsp total gaps' );
411 ($hit->length == 0) ?
412 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
413 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
414 ($result->query_length == 0) ?
415 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
416 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
418 is( $hsp->num_conserved, 133 );
419 is( $hsp->num_identical, 48 );
420 is( sprintf( "%.2f", $hsp->percent_identity ), 17.14 );
421 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.238 );
422 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.172 );
423 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.171 );
424 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.658 );
425 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.477 );
426 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.475 );
428 is (length($hsp->homology_string), length($hsp->query_string));
430 is( $hsp->hit_string,
431 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG-dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn',
432 'Check for hiy string'
434 is( $hsp->query_string,
435 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST',
436 'Check for query string'
438 is( $hsp->homology_string,
439 ' ++EL+++ av+ R L+F K++ dk +i+ p + + +++y ++ ++ ++y ++ + lF++++ e ++ ++++ + + ++ + + Glf ++++ ++ +s+ +ne ++e+i+ +++ +++ G++ +el D++G +YE L+ Ae K+ G +YTP e++ ia+ + i+ ++ +++ ++ k+n+i + s+',
440 'Check for homology string'
442 is( join( ' ', $hsp->seq_inds( 'query', 'nomatch', 1 ) ),
443 '280 288 289 293-295 300 304 311 313-315 317 324-326 332 335 337 344-346 348 355 358-361 364-366 372 379 383-385 389 396 400 404-408 412 416 417 422 426 429-431 434-436 439 441 446 450 451 455 459 460 463 464 468 471 472 478',
444 'Check for nomatch indices in query'
446 is( join( ' ', $hsp->seq_inds( 'hit', 'nomatch', 1 ) ),
447 '1 9 10 14-16 18-31 35 39 42-47 51-59 61 63-65 67 72-74 77-79 82 86 89-94 96 103-105 107 110 111 116 118 120-123 126-131 133 135-141 145 150 151 154 158-160 164 171 175 179-183 187 191-193 198 202 205-207 210-212 215 217 222 226 227 231 233 236 237 240-257 261 264-267 273 275-278',
448 'Check for nomatch indices in hit'
450 is( join( ' ', $hsp->seq_inds( 'query', 'gap', 1 ) ),
451 '296 306 309 321 328 334 335 350 356 366-368 376 417 456 463 470 479',
452 'Check for gap indices in query'
454 is( join( ' ', $hsp->seq_inds( 'hit', 'gap', 1 ) ),
455 '', 'Check for gap indices in hit' );
458 $searchio = Bio::SearchIO->new(
460 -file => test_input_file('cysprot1b.hmmsearch')
463 while ( $result = $searchio->next_result ) {
465 'Bio::Search::Result::HMMERResult',
466 'Check for the correct result reference type'
468 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
469 is( $result->algorithm_version, '2.2g', 'Check algorithm version' );
470 is( $result->hmm_name,
471 'Peptidase_C1.hmm [Peptidase_C1]',
474 is( $result->database_name, 'cysprot1b.fa', 'Check database_name' );
475 is( $result->sequence_file, 'cysprot1b.fa', 'Check sequence_file' );
477 is( $result->query_name, 'Peptidase_C1', 'Check query_name' );
478 is( $result->query_length, 337, 'Check query_length' );
479 is( $result->query_accession, 'PF00112', 'Check query_accession' );
480 is( $result->query_description,
481 'Papain family cysteine protease',
482 'Check query_description'
484 is( $result->num_hits(), 4, 'Check num_hits' );
486 my $hit = $result->next_hit;
487 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
488 'Check for the correct hit reference type' );
489 is( $hit->name, 'CATL_RAT', 'Check hit name' );
490 is( $hit->description,
492 'Check for hit description'
494 is( $hit->raw_score, 449.4, 'Check hit raw_score' );
495 is( $hit->bits, 0, 'Check hit bits (0)' );
496 float_is( $hit->significance, 2e-135, 'Check hit significance' );
497 is( $hit->num_hsps, 1, 'Check num_hsps' );
499 # Query and Hit lengths are usually unknown in HMMER,
500 # but sometimes they can be deduced from domain data '[]'
501 is( $hit->length, 0, 'Check hit length absence' );
502 is( $hit->frac_aligned_query, '1.00' );
503 is( $hit->frac_aligned_hit, undef );
505 is( $hit->matches('cons'), 204, 'Check hit total conserved residues' );
506 is( $hit->matches('id'), 131, 'Check hit total identical residues' );
507 is( sprintf( "%.3f", $hit->frac_identical('query') ), 0.389 );
508 is( sprintf( "%.3f", $hit->frac_identical('hit') ), 0.598 );
509 is( sprintf( "%.3f", $hit->frac_identical('total') ), 0.389 );
510 is( sprintf( "%.3f", $hit->frac_conserved('query') ), 0.605 );
511 is( sprintf( "%.3f", $hit->frac_conserved('hit') ), 0.932 );
512 is( sprintf( "%.3f", $hit->frac_conserved('total') ), 0.605 );
514 my $hsp = $hit->next_hsp;
515 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
516 'Check for correct hsp reference type' );
517 is( $hsp->query->seq_id(), 'Peptidase_C1', 'Check for query seq_id' );
518 is( $hsp->hit->seq_id(), 'CATL_RAT', 'Check for hit seq_id' );
520 is( $hsp->hit->start, 114, 'Check for hit hmmfrom value' );
521 is( $hsp->hit->end, 332, 'Check for hit hmm to value' );
522 is( $hsp->query->start, 1, 'Check for query alifrom value' );
523 is( $hsp->query->end, 337, 'Check for query ali to value' );
524 is( $hsp->score, 449.4, 'Check for hsp score' );
525 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
526 float_is( $hsp->evalue, 2e-135, 'Check for hsp evalue' );
528 is( $hsp->length('query'), 337, 'Check for hsp query length' );
529 is( $hsp->length('hit'), 219, 'Check for hsp hit length' );
530 is( $hsp->length('total'), 337, 'Check for hsp total length' );
531 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
532 is( $hsp->gaps('hit'), 118, 'Check for hsp hit gaps' );
533 is( $hsp->gaps('total'), 118, 'Check for hsp total gaps' );
535 ($hit->length == 0) ?
536 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
537 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
538 ($result->query_length == 0) ?
539 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
540 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
542 is( $hsp->num_conserved, 204 );
543 is( $hsp->num_identical, 131 );
544 is( sprintf( "%.2f", $hsp->percent_identity ), 38.87 );
545 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.389 );
546 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.598 );
547 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.389 );
548 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.605 );
549 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.932 );
550 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.605 );
552 is (length($hsp->homology_string), length($hsp->query_string));
554 is( $hsp->hit_string,
555 'IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT------GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE-----NGGLDSEESY-----PYE----AKD-------------------GSCKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAMDASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGYGYEG-T------------------------------------DSNKDKYWLVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI',
556 'Check for hiy string'
558 is( $hsp->homology_string,
559 '+P+++DWRe kg VtpVK+QG qCGSCWAFSa g lEg+ ++kt gkl+sLSEQ+LvDC++ d gn+ GCnG Glmd Af+Yik+ NgGl++E++Y PY+ +kd g+Cky+ + ++ a+++g++d+p++ E+al+ka+a++GP+sVa+das+ s q+Y+sG +Y+++ C+++ +LdH+Vl+VGYG e+ ++++ +YW+VKNSWG++WG++GY++ia+++n n+CG+a+ asypi',
560 'Check for homology string'
562 is( $hsp->query_string,
563 'lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgtkawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikkeqIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgtCkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVaidasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGYGteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYWIVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi',
564 'Check for query string'
566 # Hmmsearch2 don't have PP or CS strings, these are tests to check for side effects
567 is( $hsp->posterior_string, '' );
568 is( $hsp->consensus_string, '' );
570 $hit = $result->next_hit;
571 is( $hit->name, 'CATL_HUMAN', 'Check hit name' );
572 is( $hit->description, '', 'Check for hit description' );
573 is( $hit->raw_score, 444.5, 'Check hit raw_score' );
574 is( $hit->bits, 0, 'Check hit bits (0)' );
575 float_is( $hit->significance, 6.1e-134, 'Check hit significance' );
578 # test for bug 2632 - CS lines are captured without breaking the parser
579 $searchio = Bio::SearchIO->new(
581 -file => test_input_file('hmmpfam_cs.out')
583 if (defined ($result = $searchio->next_result) ) {
584 my $hit = $result->next_hit;
585 my $hsp = $hit->next_hsp;
587 is ($hsp->seq_str, $hsp->query_string);
588 is (length($hsp->seq_str), length($hsp->query_string));
589 is (length($hsp->homology_string), length($hsp->query_string));
590 is (length($hsp->consensus_string), length($hsp->query_string));
592 is( $hsp->consensus_string,
593 'EEEEEEEEETSSHSBHHHHHHHHHHHHHGGGGSSCSTTSSCECEEEEEEECTCCCHHHHHHHCT----S GC-EEEEEEE-SSHHHHHHHHHHHHHHHHHTT-EEEEEEE--B-GGGS-HHHHHC--EEEEEEEE-TT--HHHHHHCEEEEECHSCHHHHTHHH. BEEEEEESSEEEEEECC-GGGHHHHBHGGGSTTEEBSEEEEEECESSSSSCTGGGSSCEEECCCTTCEEEEEEEEETTTHHHHHHHHHHTSCCCSSTTCGHHHHCC-SSS-TTSCHHHHHHHHHHHHHHTT--HHHHHHHHS----TT-GGGTST-HHHHHHHHHHHHHHCCHCCEEEEEEETSSEEEEEEETTTSCESEEEEEEEEEE.TTEEEEEESSC',
594 'Check for consensus structure string'
597 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES',
598 'Check for hsp seq_str'
600 is( $hsp->query_string,
601 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES',
602 'Check for query string'
604 is( $hsp->hit_string,
605 'CGvlGfiAhikgkpshkivedaleaLerLeHRGavgADgktGDGAGIltqiPdgFFrevakelGieLpe-gqYAVGmvFLPqdelaraearkifEkiaeeeGLeVLGWReVPvnnsvLGetAlatePvIeQvFvgapsgdgedfErrLyviRkrieksivaenvn----fYiCSLSsrTIVYKGMLtseQLgqFYpDLqderfeSalAivHsRFSTNTfPsWplAQPfRVnslwgggivlAHNGEINTlrgNrnwMraRegvlksplFgddldkLkPIvneggSDSaalDnvlEllvraGRslpeAlMMlIPEAWqnnpdmdkdrpekraFYeylsglmEPWDGPAalvftDGryavgAtLDRNGLTRPaRygiTrdldkDglvvvaSEa',
606 'Check for hit string'
608 is( $hsp->homology_string,
609 'CGv GfiA+ ++ ++hkiv +aleaL+++eHRGa++AD ++GDGAGI t+iP+++F++ ++++i++ ++ +VGm+FLP l+ + i+E +++ee+Le++GWR VP+ +vLG++A + P++eQvF+ +++ +++ +E++L+++Rk+iek+i+ + + ++fYiCSLS++TIVYKGM++s++LgqFY+DL++++++S++Ai+H+RFSTNT+P+WplAQP+R ++ HNGEINTl gN nwM++Re +l+s++++d++++LkPI n+++SDSa+lD ++Ell+++GRs++eAlM+l+PEA+qn+pd +++e+ +FYey+sgl+EPWDGPA++vft+G++ +gAtLDRNGL RPaRy+iT kD+lv+v+SE+',
610 'Check for homology string'
614 # Tests for hmmer3 output here
615 $searchio = Bio::SearchIO->new(
617 -file => test_input_file('hmmscan.out'),
620 is( ref($searchio), 'Bio::SearchIO::hmmer3',
621 'Check if correct searchio object is returned' );
623 while ( $result = $searchio->next_result ) {
627 'Bio::Search::Result::HMMERResult',
628 'Check for the correct result reference type'
630 is( $result->algorithm, 'HMMSCAN', 'Check algorithm' );
631 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
632 is( $result->hmm_name,
633 '/data/biodata/HMMerDB/Pfam.hmm',
636 is( $result->sequence_file,
637 'BA000019.orf1.fasta',
638 'Check sequence_file'
640 is( $result->query_name, 'BA000019.orf1', 'Check query_name' );
641 is( $result->query_length, 198, 'Check query_length' );
642 is( $result->query_accession, '', 'Check query_accession' );
643 is( $result->query_description, '', 'Check query_description' );
644 # 1 hit above and 6 below inclusion threshold
645 is( $result->num_hits(), 7, 'Check num_hits' );
648 if ( $hit = $result->next_model ) {
649 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
650 'Check for the correct hit reference type' );
651 is( $hit->name, 'Peripla_BP_2', 'Check hit name' );
652 is( $hit->description,
653 'Periplasmic binding protein',
654 'Check for hit description'
656 is( $hit->raw_score, 105.2, 'Check hit raw_score' );
657 is( $hit->bits, 0, 'Check hit bits (0)' );
658 float_is( $hit->significance, 6e-30, 'Check hit significance' );
659 is( $hit->num_hsps, 1, 'Check num_hsps' );
661 # Hit length is usually unknown for HMMSCAN and HMMSEARCH but not for NHMMER.
662 # When is not known, sometimes it can be deduced from domain data '[]'
663 is( $hit->length, 0, 'Check hit length absence' );
664 is( $hit->frac_aligned_query, 0.87 );
665 is( $hit->frac_aligned_hit, undef );
667 if ( defined( $hsp = $hit->next_domain ) ) {
668 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
669 'Check for correct hsp reference type' );
670 is( $hsp->hit->seq_id(), 'Peripla_BP_2', 'Check for hit seq_id' );
671 is( $hsp->query->seq_id(), 'BA000019.orf1', 'Check for query seq_id' );
673 is( $hsp->hit->start, 59, 'Check for hit hmmfrom value' );
674 is( $hsp->hit->end, 236, 'Check for hit hmm to value' );
675 is( $hsp->query->start, 2, 'Check for query alifrom value' );
676 is( $hsp->query->end, 173, 'Check for query ali to value' );
677 is( $hsp->score, '105.0', 'Check for hsp score' );
678 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
679 float_is( $hsp->evalue, 1.5e-33, 'Check for hsp c-Evalue' );
681 is( $hsp->length('query'), 172, 'Check for hsp query length' );
682 is( $hsp->length('hit'), 178, 'Check for hsp hit length' );
683 is( $hsp->length('total'), 180, 'Check for hsp total length' );
684 is( $hsp->gaps('query'), 8, 'Check for hsp query gaps' );
685 is( $hsp->gaps('hit'), 2, 'Check for hsp hit gaps' );
686 is( $hsp->gaps('total'), 10, 'Check for hsp total gaps' );
688 ($hit->length == 0) ?
689 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
690 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
691 ($result->query_length == 0) ?
692 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
693 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
695 is( $hsp->num_conserved, 140 );
696 is( $hsp->num_identical, 50 );
697 is( sprintf( "%.2f", $hsp->percent_identity ), 27.78 );
698 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.291 );
699 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.281 );
700 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.278 );
701 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.814 );
702 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.787 );
703 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.778 );
705 is (length($hsp->homology_string), length($hsp->query_string));
707 is( $hsp->query_string,
708 'LKPDLIIGREYQ---KNIYNQLSNFAPTVLVDWGSF-TSFQDNFRYIAQVLNEEEQGKLVLQQYQKRIRDLQDRMGERlQKIEVSVIGFSGQSIKSLNR-DAVFNQVLDDAGIKRIsIQKNQQERYLEISIENLNKYDADVLFVINE---SKEQLYPDLKNPLWHHLRAVKKQQVYVVNQ',
709 'Check for query string'
711 is( $hsp->hit_string,
712 'lkPDlvivsafgalvseieellelgipvvavessstaeslleqirllgellgeedeaeelvaelesridavkaridsl-kpktvlvfgyadegikvvfgsgswvgdlldaaggeni-iaeakgseseeisaEqilaadpdviivsgrgedtktgveelkenplwaelpAvkngrvyllds',
713 'Check for hit string'
715 is( $hsp->homology_string,
716 'lkPDl+i+ +++ ++i+++l++ +p+v v+ s+ s+++ +r ++++l+ee++++ + +++++ri+++++r + ++ +v+v+g+++ +ik+++ + ++++ld+ag++ i i++++++ + eis+E+++++d+dv++v k+ + ++nplw +l+Avk+++vy++++',
717 'Check for homology string'
719 is( $hsp->posterior_string,
720 '8***********...********************9.*****************************************999999999999997777776.5678999999****99777777*************************...77777777899***************9976',
721 'Check for posterior probability string'
726 # Check for errors in HSP caused by the existence of 2 hits with the same ID
727 elsif ($counter == 2) {
728 is( $result->algorithm, 'HMMSCAN', 'Check algorithm' );
729 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
730 is( $result->hmm_name,
731 '/data/biodata/HMMerDB/Pfam.hmm',
734 is( $result->sequence_file,
735 'BA000019.orf1.fasta',
736 'Check sequence_file'
738 is( $result->query_name, 'lcl|Test_ID.1|P1', 'Check query_name' );
739 is( $result->query_length, 463, 'Check query_length' );
740 is( $result->query_description, '281521..282909', 'Check query_description' );
741 is( $result->num_hits(), 2, 'Check num_hits' );
745 while ( $hit = $result->next_model ) {
747 if ($hit_counter == 1) {
748 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
749 'Check for the correct hit reference type' );
750 is( $hit->name, 'IS4.original', 'Check hit name' );
751 is( $hit->description, '', 'Check for hit description' );
752 is( $hit->num_hsps, 1, 'Check num_hsps' );
753 if ( defined( $hsp = $hit->next_domain ) ) {
754 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
755 'Check for correct hsp reference type' );
756 is( $hsp->hit->seq_id(), 'IS4.original', 'Check for hit seq_id' );
757 is( $hsp->query->seq_id(), 'lcl|Test_ID.1|P1', 'Check for query seq_id' );
759 is( $hsp->hit->start, 315, 'Check for hit hmmfrom value' );
760 is( $hsp->hit->end, 353, 'Check for hit hmm to value' );
761 is( $hsp->query->start, 335, 'Check for query alifrom value' );
762 is( $hsp->query->end, 369, 'Check for query ali to value' );
763 is( $hsp->score, 18.9, 'Check for hsp score' );
764 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
765 float_is( $hsp->evalue, 8.9e-08, 'Check for hsp c-Evalue' );
768 elsif ($hit_counter == 2) {
769 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
770 'Check for the correct hit reference type' );
771 is( $hit->name, 'IS4.original', 'Check hit name' );
772 is( $hit->description, '', 'Check for hit description' );
773 is( $hit->num_hsps, 1, 'Check num_hsps' );
774 if ( defined( $hsp = $hit->next_domain ) ) {
775 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
776 'Check for correct hsp reference type' );
777 is( $hsp->hit->seq_id(), 'IS4.original', 'Check for hit seq_id' );
778 is( $hsp->query->seq_id(), 'lcl|Test_ID.1|P1', 'Check for query seq_id' );
780 is( $hsp->hit->start, 315, 'Check for hit hmmfrom value' );
781 is( $hsp->hit->end, 353, 'Check for hit hmm to value' );
782 is( $hsp->query->start, 335, 'Check for query alifrom value' );
783 is( $hsp->query->end, 369, 'Check for query ali to value' );
784 is( $hsp->score, 18.8, 'Check for hsp score' );
785 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
786 float_is( $hsp->evalue, 9e-08, 'Check for hsp c-Evalue' );
793 $searchio = Bio::SearchIO->new(
795 -file => test_input_file('hmmsearch3.out'),
798 while ( $result = $searchio->next_result ) {
800 'Bio::Search::Result::HMMERResult',
801 'Check for the correct result reference type'
803 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
804 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
805 is( $result->hmm_name, 'Kv9.hmm', 'Check hmm_name' );
806 is( $result->sequence_file,
807 '/home/pboutet/Desktop/databases/nr_May26',
808 'Check sequence_file'
810 is( $result->query_name, 'Kv9', 'Check query_name' );
811 is( $result->query_length, '481', 'Check query_length' );
812 is( $result->query_description, '', 'Check query_description' );
813 is( $result->num_hits(), 2, 'Check num_hits' );
815 while ( my $hit = $result->next_model ) {
819 $searchio = Bio::SearchIO->new(
821 -file => test_input_file('hmmsearch3_multi.out'),
824 is( ref($searchio), 'Bio::SearchIO::hmmer3',
825 'Check if correct searchio object is returned' );
827 while ( $result = $searchio->next_result ) {
831 'Bio::Search::Result::HMMERResult',
832 'Check for the correct result reference type'
834 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
835 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
836 is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' );
837 is( $result->sequence_file, 'test_seqs.seq_raw.txt', 'Check sequence_file' );
839 is( $result->query_name, '1-cysPrx_C', 'Check query_name' );
840 is( $result->query_length, 40, 'Check query_length' );
841 is( $result->query_accession, 'PF10417.4', 'Check query_accession' );
842 is( $result->query_description,
843 'C-terminal domain of 1-Cys peroxiredoxin',
844 'Check query_description'
846 is( $result->num_hits(), 0, 'Check num_hits' );
848 elsif ($counter == 2) {
850 'Bio::Search::Result::HMMERResult',
851 'Check for the correct result reference type'
853 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
854 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
855 is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' );
856 is( $result->sequence_file, 'test_seqs.seq_raw.txt', 'Check sequence_file' );
858 is( $result->query_name, 'DUF4229', 'Check query_name' );
859 is( $result->query_length, 69, 'Check query_length' );
860 is( $result->query_accession, 'PF14012.1', 'Check query_accession' );
861 is( $result->query_description,
862 'Protein of unknown function (DUF4229)',
863 'Check query_description'
865 is( $result->num_hits(), 1, 'Check num_hits' );
868 if ( $hit = $result->next_model ) {
869 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
870 'Check for the correct hit reference type' );
871 is( $hit->name, 'lcl|Protein_ID1.3|M3', 'Check hit name' );
872 is( $hit->description,
873 'complement(48376..51420)',
874 'Check for hit description'
876 is( $hit->raw_score, -17.8, 'Check hit raw_score' );
877 is( $hit->bits, 0, 'Check hit bits (0)' );
878 float_is( $hit->significance, 3, 'Check hit significance' );
879 is( $hit->num_hsps, 5, 'Check num_hsps' );
882 if ( defined( $hsp = $hit->next_domain ) ) {
883 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
884 'Check for correct hsp reference type' );
885 is( $hsp->hit->seq_id(), 'lcl|Protein_ID1.3|M3', 'Check for hit seq_id' );
886 is( $hsp->query->seq_id(), 'DUF4229', 'Check for query seq_id' );
888 is( $hsp->hit->start, 305, 'Check for hit alifrom value' );
889 is( $hsp->hit->end, 311, 'Check for hit ali to value' );
890 is( $hsp->query->start, 34, 'Check for query hmmfrom value' );
891 is( $hsp->query->end, 40, 'Check for query hmm to value' );
892 is( $hsp->score, -4.3, 'Check for hsp score' );
893 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
894 float_is( $hsp->evalue, 1, 'Check for hsp c-Evalue' );
896 is( $hsp->length('query'), 7, 'Check for hsp query length' );
897 is( $hsp->length('hit'), 7, 'Check for hsp hit length' );
898 is( $hsp->length('total'), 7, 'Check for hsp total length' );
899 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
900 is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' );
901 is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' );
903 ($hit->length == 0) ?
904 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
905 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
906 ($result->query_length == 0) ?
907 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
908 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
910 is( $hsp->num_conserved, 6 );
911 is( $hsp->num_identical, 4 );
912 is( sprintf( "%.2f", $hsp->percent_identity ), 57.14 );
913 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.571 );
914 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.571 );
915 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.571 );
916 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.857 );
917 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.857 );
918 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.857 );
920 is (length($hsp->homology_string), length($hsp->query_string));
922 is( $hsp->consensus_string,
924 'Check for consensus structure string'
926 is( $hsp->query_string,
928 'Check for query string'
930 is( $hsp->hit_string,
932 'Check for hit string'
934 is( $hsp->homology_string,
936 'Check for homology string'
938 is( $hsp->posterior_string,
940 'Check for posterior probability string'
945 elsif ($counter == 3) {
947 'Bio::Search::Result::HMMERResult',
948 'Check for the correct result reference type'
950 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
951 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
952 is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' );
953 is( $result->sequence_file, 'test_seqs.seq_raw.txt', 'Check sequence_file' );
955 is( $result->query_name, 'ACR_tran', 'Check query_name' );
956 is( $result->query_length, 1021, 'Check query_length' );
957 is( $result->query_accession, 'PF00873.14', 'Check query_accession' );
958 is( $result->query_description,
959 'AcrB/AcrD/AcrF family',
960 'Check query_description'
962 is( $result->num_hits(), 1, 'Check num_hits' );
965 if ( $hit = $result->next_model ) {
966 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
967 'Check for the correct hit reference type' );
968 is( $hit->name, 'lcl|Protein_ID1.3|M3', 'Check hit name' );
969 is( $hit->description,
970 'complement(48376..51420)',
971 'Check for hit description'
973 is( $hit->raw_score, 616.9, 'Check hit raw_score' );
974 is( $hit->bits, 0, 'Check hit bits (0)' );
975 float_is( $hit->significance, 9.3e-189, 'Check hit significance' );
976 is( $hit->num_hsps, 1, 'Check num_hsps' );
978 # Hit length is usually unknown for HMMSCAN and HMMSEARCH but not for NHMMER.
979 # When is not known, sometimes it can be deduced from domain data '[]'
980 is( $hit->length, 0, 'Check hit length absence' );
981 is( $hit->frac_aligned_query, 0.93 );
982 is( $hit->frac_aligned_hit, undef );
984 if ( defined( $hsp = $hit->next_domain ) ) {
985 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
986 'Check for correct hsp reference type' );
987 is( $hsp->hit->seq_id(), 'lcl|Protein_ID1.3|M3', 'Check for hit seq_id' );
988 is( $hsp->query->seq_id(), 'ACR_tran', 'Check for query seq_id' );
990 is( $hsp->hit->start, 11, 'Check for hit alifrom value' );
991 is( $hsp->hit->end, 1000, 'Check for hit ali to value' );
992 is( $hsp->query->start, 71, 'Check for query hmmfrom value' );
993 is( $hsp->query->end, 1021, 'Check for query hmm to value' );
994 is( $hsp->score, 616.6, 'Check for hsp score' );
995 is( $hsp->bits, 0, 'Check for hsp bits (0)' );
996 float_is( $hsp->evalue, 3.9e-189, 'Check for hsp c-Evalue' );
998 is( $hsp->length('query'), 951, 'Check for hsp query length' );
999 is( $hsp->length('hit'), 990, 'Check for hsp hit length' );
1000 is( $hsp->length('total'), 1003, 'Check for hsp total length' );
1001 is( $hsp->gaps('query'), 52, 'Check for hsp query gaps' );
1002 is( $hsp->gaps('hit'), 13, 'Check for hsp hit gaps' );
1003 is( $hsp->gaps('total'), 65, 'Check for hsp total gaps' );
1005 ($hit->length == 0) ?
1006 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
1007 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
1008 ($result->query_length == 0) ?
1009 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
1010 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
1012 is( $hsp->num_conserved, 690 );
1013 is( $hsp->num_identical, 262 );
1014 is( sprintf( "%.2f", $hsp->percent_identity ), 26.12 );
1015 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.275 );
1016 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.265 );
1017 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.261 );
1018 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.726 );
1019 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.697 );
1020 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.688 );
1022 is (length($hsp->homology_string), length($hsp->query_string));
1024 is( $hsp->consensus_string,
1025 'S-TTEEEEEEEETTSEEEEEEEESTTS-HHHHHHHHHHHHHHHGGGS-HHHHHH-EEEEEEECCECEEEEEEESSSTS-HHHHHHHHHHCTHHHHHTSTTEEEEEESS.--EEEEEEE-HHHHHCTT--HHHHHHHHHHHSSB-EEEECTT-SB-EEEE-SB---SCCHHCT-EEEETTSEEEEHHHCEEEEEEESSSS-EEEETTCEEEEEEEEEETTSBHHHHHHHHHHHHHCCGGGSSTTEEEEEEEESHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHSSHCCCHHHHHHHHHHHHHHHHHHHHTT--EEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCSS-HHHHHHHHHHHHCCHHHHHHHHHHHHCCGGGGSBHHHHHHHHHHHHHHHHHHHHHHHHHHCCHHHHHHHCS----TT-CC..............................CHHHHHHHHHHHHHHHHHHHHHHHHHSCHHHHHHHHHHHHH.HHHHHCCS-BESS----TSEEEEEEE-STTC-HHHHHHHHHHHHHHHH...TTTTEEEEEEEESESSSS..E........CTTEEEEEEEE--CTTS-SCCCSHHHHHHHHHHHC.CTSTSSEEEEEE-SSSCCCSSSSSEEEEEEE.TSSSCHHHHHHHHHHHHHHHCCSTTEECEEESS-S-EEEEEEEE-HHHHHHCTB-HHHHHHHHHHHHT-..EEEEEEEETTE...EEEEEEEE-GGGSSSGGGGCC-EEEETTSE.EEECGGCEEEEEEEE-SEEEEETTCEEEEEEEEESTTS...-HHHHHHHHHHCCTT..SSTTEEEEEECHHHHHHHHCCCHHHHHHHHHHHHHHHHHHHCTSSSTCHHHHTTHHHHHHHHHHHHHHTT--BSHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCTTTBHHHHHHHHHHHHCHHHHHHHHHHHHHCCHHHHTT-STTHHHHHHHHHHHHHHHHHHHHCHHHHHHHHHHHHH',
1026 'Check for consensus structure string'
1028 is( $hsp->query_string,
1029 'gldglkyvsSqSseglssitvtFedgtdidiArqqvqnrlqeaknkLPeevqepgiskiktssseilvlavtskdgsltktdlrdlaesnikdqlsrveGVgdvqliGgsekavriwldpqklaklgltltdvvsalkeqnvqvaaGqlegqqeelliraqgrlqsaediekiivksqdgskvrlrDvAkvelgaeeeriaatlngkpavllavkklpganaievvkavkekleelketlPegveivvvydttefvrasieeVvktlleaivLvvlvlflFLqnlratlipaiavPlsllgtfavlkalglsiNlltlfgLvlAiGlvvDdAiVvvEnverkleeegekpleaalksmkeiegalvaialvllavfvPilflgGveGklfrqfaltivlaillsvlvaltltPalcallLkarkeekek..............................gffrefnrlfdalerrYekllekvlrhravvllvalllvvg.slllfvripkeflPeedegvlvtsvqlppgvsleqtekvlkqvekilk...ekpevesvfavtGfafagdta........gqnsakvfisLkpekerkeeektvealierlrkel.ekikganvellapiqlreletlsgvrlelqvklfgddleaLseareqllaalkqlpeladvrseqqedepqlqvkidrekaaalGvsiadinetlstalgg..syvndfieegr...vvkvvvqleedlrsspedlkklyvrnkkgk.mvplsavakieeekgpnsierenglrsveisgevaegd...slgeaeeavekiakqvklPagvgiewtglseqeqeagnsllllvalalllvflvLaalyeslsdpllvlltvPlalvGallalllrglelsviaqvGlilliGlavkNailivefakelrekeglsleeAileaaklRLrPiLMTalaailGvlPLalstGaGselqqplgivvlGGlvtstvLtlllvPvlYvlva',
1030 'Check for query string'
1032 is( $hsp->hit_string,
1033 'TVNDIEHIESQSLFGYGIVKIFFQPDVDIRTANAQVTAISQTVLKQMPPGITPPLILNYNAATVPILQLALSSK--VLSEDRIFDLGQNFIRPQLATVRGSAVPSPYGGKVRQIQIDLDPQAMQSKRVSPDDVARALSQQNLVLSPGTEKIGSFEYNVKINDSPDEFTLLNNLPIKNVGGVTIFIHDVAHVRDGFPPQINVVRDDGRRSVLMTILKNGATSTLDIIQGTKELIPKLKETLPNNLVLKVVGDQSIFVKSAISGVVREGTIAGILTSVMILLFLGSWRSTIIISMSIPLAILSAIIFLSLTGNTLNVMTLGGLALAVGMLVDDATVVIENINHHLEM-GKPTTKAIIDAARQIIQPALVSTLSICIVFVPMFSLTGVPRYLFIPMAEAVIFGMLSSFVLSQTFVPTVANKLLKYQTQHFKHehhtdahrpehdpnfkvhrsvkasifqffiNIQQGFEKRFTKVRLVYRSILHFALDHRKKFITLFLGFVIVsCVTLFPLLGKNFFPEVDSGDMKIHIRVQVGTRIEETAKQFDLIENTIRrlvPQNELDTIVDNIGLSVSGINTaysstgtiGPQDGDILIHLNEN------HHPTKEYMKKLRETLpRAFPGVS-FAFLPADITSQILNFGVPAPIDIRVDGPNHDNNLKFVRAILKDIRNVPGIADLRVQQATNYPQFNVDIDRSQAKNYGLTEGDITNSLVATLAGtsQVAPTFWLNNKngvSYPIVIQMPQYKINSLADLANIPITTKESSsMQVLGGLGSIERDQSDSVISHYNIKPSFDIFASLQGRDlgsISGDIETIIQHHHQE--LPKGVSVKLQGQVPIMQDSYRGLSLGLVASIILIYFLVVVNFESWLDPFVIITALPAALAGIVWMLYLTGTTLSVPALTGAIMCMGVATANSILVISFARERLA-IVKDSTQAALEAGYTRFRPVLMTASAMLIGMIPMALGLGDGGEQNAPLGRAVIGGLLLATIATLIFVPVVFSVVH',
1034 'Check for hit string'
1036 is( $hsp->homology_string,
1037 ' ++ +++++SqS g + + F+ + di A+ qv++ q + +++P ++++p i +++ +il+la++sk l++ + dl ++ i++ql+ v G + +Gg+ ++++i ldpq++++ +++++dv++al++qn + G+ + + e+++++++ + ++++ +k+ g + ++DvA+v +g + ++++ +g vl+++ k ++++++ ke +++lketlP+++ ++vv d++ fv+++i+ Vv + +a +L ++++lFL+++r+t+i+ +++Pl++l ++++l++ g ++N++tl+gL+lA+G++vDdA Vv+En+ +le+ g+ +a++ ++++i + + ++l++++vfvP+++l+Gv lf ++a ++++ +l s +++ t++P ++ lLk + ++ ++ ++ + f++ f ++ Y+ +l++ l hr+ ++++l +v++ ++ lf+ ++k+f+Pe d g++ ++++++ g+ +e+t+k + +e++++ ++e + ++ G + +g + g++ +++ i+L ++ ++ ++ +++lr+ l ++++g++ +++ p +++ + gv + ++ + g ++++ + ++++l+ ++++p++ad+r++q ++ pq++v+idr +a+++G++ di + l + l g +++ +f +++ + +v+q+++ + +s+ dl+++++++k++ m l+ + +ie+ ++ + i+++n ++s+ i ++++ +d ++g++e+++++ +++ lP+gv+++ +g+ q ++ l+l ++++++l++++ + +es++dp+++++ +P al+G + l+l+g++lsv a+ G i+ +G+a N il+++fa+e + ++ +A+lea+ +R+rP+LMTa a+++G++P+al+ G+G e plg +v+GGl+++t+ tl +vPv++ +v+',
1038 'Check for homology string'
1040 is( $hsp->posterior_string,
1041 '578899********************************************************************..*****************************************************************************************************************************************************************************************************************************************************************************.***************************************************************************8776544446799********************9655555578*************************999999887775899******************************************8875446889999999999888774331111111134445555555444......45688999999999945678887.7888999*999************************************************************************8877666655434556776544422279***********************998764889*******************************8876222578999999999888..********************************************************************************************************.888899*****************************************************************9997',
1042 'Check for posterior probability string'
1049 $searchio = Bio::SearchIO->new(
1051 -file => test_input_file('hmmscan_multi_domain.out'),
1057 'Bacterial pre-peptidase C-terminal domain',
1058 '111.0', 3.1e-32, 6,
1059 [ [ 4, 59, 117, 183, 0.5, 0.16 ],
1060 [ 12, 58, 347, 388, -0.6, 0.36 ],
1061 [ 1, 69, 470, 549, 71.3, 1.3e-23 ],
1062 [ 15, 25, 582, 603, -3.2, 2 ],
1063 [ 13, 36, 987, 1019, -1.1, 0.5 ],
1064 [ 1, 69, 1087, 1168, 54.4, 2.4e-18 ]
1067 [ 'HemolysinCabind',
1068 'Hemolysin-type calcium-binding repeat (2 copies)',
1070 [ [ 2, 13, 1214, 1225, 5.9, 0.0026 ],
1071 [ 1, 18, 1231, 1248, 10.8, 6.8e-5 ],
1072 [ 4, 18, 1243, 1257, 11.4, 4.3e-05 ]
1077 while ( $result = $searchio->next_result ) {
1079 'Bio::Search::Result::HMMERResult',
1080 'Check for the correct result reference type'
1082 is( $result->algorithm, 'HMMSCAN', 'Check algorithm' );
1083 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
1084 is( $result->hmm_name,
1085 '/data/biodata/HMMerDB/Pfam-A.hmm',
1088 is( $result->sequence_file, 'BA000019.orf37.fasta',
1089 'Check sequence_file' );
1090 is( $result->query_name, 'BA000019.orf37', 'Check query_name' );
1091 is( $result->query_length, '1418', 'Check query_length' );
1092 is( $result->query_description, '', 'Check query_description' );
1093 is( $result->num_hits(), 2, 'Check num_hits' );
1096 while ( $hit = $result->next_model ) {
1097 if ($hit->name eq 'HemolysinCabind') {
1098 # Hit length is usually unknown for HMMSCAN and HMMSEARCH but not for NHMMER.
1099 # When is not known, sometimes it can be deduced from domain data '[]'
1100 is( $hit->length, 18, 'Check hit length' );
1101 is( $hit->frac_aligned_query, 0.03 );
1102 is( $hit->frac_aligned_hit, '1.00' );
1104 my @expected = @{ shift @multi_hits };
1105 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
1106 'Check for the correct hit reference type' );
1107 is( $hit->name, shift @expected, 'Check hit name' );
1108 is( $hit->description, shift @expected, 'Check for hit description' );
1109 is( $hit->raw_score, shift @expected, 'Check hit raw_score' );
1113 'Check hit significance'
1115 is( $hit->num_hsps, shift @expected, 'Check num_hsps' );
1116 my @hsp_list = @{ shift @expected };
1118 while ( defined( $hsp = $hit->next_domain ) ) {
1119 my @hsp_exp = @{ shift @hsp_list };
1120 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
1121 'Check for correct hsp reference type' );
1122 is( $hsp->hit->start,
1124 'Check for hit envfrom value'
1126 is( $hsp->hit->end, shift @hsp_exp,
1127 'Check for hit env to value' );
1128 is( $hsp->query->start,
1130 'Check for query hmmfrom value'
1132 is( $hsp->query->end,
1134 'Check for query hmm to value'
1136 is( $hsp->score, shift @hsp_exp, 'Check for hsp score' );
1137 float_is( $hsp->evalue, shift @hsp_exp,
1138 'Check for hsp c-Evalue' );
1143 $searchio = Bio::SearchIO->new(
1145 -file => test_input_file('hmmscan_sec_struct.out'),
1151 'Bacterial regulatory helix-turn-helix proteins, AraC family',
1153 [ [ 'siadiAeevgfSpsyfsrlFkkytGvt', 'SLMELSRQVGLNDCTLKRGFRLVFDTT' ],
1154 [ 'nwsiadiAeevgf-SpsyfsrlFkkytGvtPsqyr',
1155 'EINISQAARRVGFsSRSYFATAFRKKFGINPKEFL'
1160 '', '38.2', 3.8e-12, 2,
1161 [ [ 'GPSvtVDTACSSSLvA', 'GPSVTVDTLCSSSLVA' ],
1162 [ 'GPSvtVDTACSSSLv', 'GPNLVIDSACSSALV' ]
1166 'Domain of Unknown Function (DUF746)',
1168 [ [ 'rllIrlLsqplslaeaadqlgtdegiiak',
1169 'EILIRNLENPPSLMELSRQVGLNDCTLKR'
1171 [ 'plslaeaadqlgtdeg', 'EINISQAARRVGFSSR' ]
1176 my $result_counter = 0;
1177 while ( $result = $searchio->next_result ) {
1179 if ($result_counter == 1) {
1181 'Bio::Search::Result::HMMERResult',
1182 'Check for the correct result reference type'
1184 is( $result->algorithm, 'HMMSCAN', 'Check algorithm' );
1185 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
1186 is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' );
1187 is( $result->sequence_file, 'BA000019.orf8.fasta', 'Check sequence_file' );
1188 is( $result->query_name, 'BA000019.orf8', 'Check query_name' );
1189 is( $result->query_length, 348, 'Check query_length' );
1190 is( $result->query_description, '', 'Check query_description' );
1191 is( $result->num_hits(), 3, 'Check num_hits' );
1194 while ( $hit = $result->next_model ) {
1195 if ($hit->name eq 'PKSI-KS_m3') {
1196 # Hit length is usually unknown for HMMSCAN and HMMSEARCH but not for NHMMER.
1197 # When is not known, sometimes it can be deduced from domain data '[]'
1198 is( $hit->length, 16, 'Check hit length' );
1199 is( $hit->frac_aligned_query, 0.09 );
1200 is( $hit->frac_aligned_hit, '1.00' );
1202 my @expected = @{ shift @multi_hits };
1203 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
1204 'Check for the correct hit reference type' );
1205 is( $hit->name, shift @expected, 'Check hit name' );
1206 is( $hit->description, shift @expected, 'Check for hit description' );
1207 is( $hit->raw_score, shift @expected, 'Check hit raw_score' );
1211 'Check hit significance'
1213 is( $hit->num_hsps, shift @expected, 'Check num_hsps' );
1214 my @hsp_list = @{ shift @expected };
1216 while ( defined( $hsp = $hit->next_domain ) ) {
1217 my @hsp_exp = @{ shift @hsp_list };
1218 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
1219 'Check for correct hsp reference type' );
1220 is( $hsp->hit_string, shift @hsp_exp, 'Check hit sequence' );
1221 is( $hsp->query_string, shift @hsp_exp, 'Check query sequence' );
1225 elsif ($result_counter == 2) {
1227 'Bio::Search::Result::HMMERResult',
1228 'Check for the correct result reference type'
1230 is( $result->algorithm, 'HMMSCAN', 'Check algorithm' );
1231 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
1232 is( $result->query_name, 'lcl|aorf_00010|P1', 'Check query_name' );
1233 is( $result->query_length, 132, 'Check query_length' );
1234 is( $result->query_description, 'IS481.original transposase', 'Check query_description' );
1235 is( $result->num_hits(), 1, 'Check num_hits' );
1238 while ( $hit = $result->next_model ) {
1239 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
1240 'Check for the correct hit reference type' );
1241 is( $hit->name, 'IS481.original.hmm', 'Check hit name' );
1242 is( $hit->description, '', 'Check for hit description' );
1243 is( $hit->raw_score, '130.0', 'Check hit raw_score' );
1244 float_is( $hit->significance, 3.4e-040, 'Check hit significance' );
1245 is( $hit->num_hsps, 1, 'Check num_hsps' );
1247 while ( defined( $hsp = $hit->next_domain ) ) {
1248 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
1249 'Check for correct hsp reference type' );
1250 is( $hsp->query_string,
1251 'GEIETAHPSYLGSQDTFYVGNITGAGR----------------------------IYQQTFVDTYSKWDSTKLYTTKTPITAADLLNDRVLSFFA-EQGMGIIRLLTDRSTEYCSKA--ETQDYELCLALNDIEHTKTKVYHPQTNDICRRFHKA',
1252 'Check for query string'
1254 is( $hsp->hit_string,
1255 'kRYErdhPgeLvhmDvkklgripdgGgvkighRwrgrtrgrgkrtnqsrnrglgkayvitaiDDhSRfayaeilsdettttaadfllraaayfygkigeeiitrvlTDnGaayrskkrsakhdFqealaelGIkhilTrprsPqTNGKiERFhrT',
1256 'Check for hit string'
1258 is( $hsp->homology_string,
1259 '+++E++hP +L+++D++++g+i + G+ +y++t++D++S+ +++++++t++taad l++ ++ f+ ++++i r lTD+ ++y+sk ++ d+ +la ++I+h++T++++PqTN ++ RFh+ ',
1260 'Check for homology string'
1262 is( $hsp->posterior_string,
1263 '579*******************88888............................****************************************.********************8..**********************************95',
1264 'Check for posterior probability string'
1271 # Make sure that you can also directly call the hmmer2 and hmmer3 subclasses
1272 $searchio = Bio::SearchIO->new(
1273 -format => 'hmmer2',
1274 -file => test_input_file('hmmpfam.out')
1276 is( ref($searchio), 'Bio::SearchIO::hmmer2',
1277 'Check if loading hmmpfam output via the hmm2 parser directly works' );
1278 is( ref( $searchio->next_result ),
1279 'Bio::Search::Result::HMMERResult',
1280 'Check for the correct result reference type'
1283 $searchio = Bio::SearchIO->new(
1284 -format => 'hmmer2',
1285 -file => test_input_file('hmmsearch.out')
1287 is( ref($searchio), 'Bio::SearchIO::hmmer2',
1288 'Check if loading hmmsearch2 output via the hmm2 parser directly works' );
1289 is( ref( $searchio->next_result ),
1290 'Bio::Search::Result::HMMERResult',
1291 'Check for the correct result reference type'
1294 $searchio = Bio::SearchIO->new(
1295 -format => 'hmmer3',
1296 -file => test_input_file('hmmscan.out')
1298 is( ref($searchio), 'Bio::SearchIO::hmmer3',
1299 'Check if loading hmmscan output via the hmm3 parser directly works' );
1300 is( ref( $searchio->next_result ),
1301 'Bio::Search::Result::HMMERResult',
1302 'Check for the correct result reference type'
1305 $searchio = Bio::SearchIO->new(
1307 -file => test_input_file('hmmsearch3.out')
1309 is( ref($searchio), 'Bio::SearchIO::hmmer3',
1310 'Check if loading hmmsearch3 output via the hmm3 parser directly works' );
1311 is( ref( $searchio->next_result ),
1312 'Bio::Search::Result::HMMERResult',
1313 'Check for the correct result reference type'
1316 # Make sure that you can also specify the -version parameter directly
1317 $searchio = Bio::SearchIO->new(
1319 -file => test_input_file('hmmpfam.out'),
1322 is( ref($searchio), 'Bio::SearchIO::hmmer2',
1323 'Check if selecting the correct hmmpfam parser using -version works' );
1324 is( ref( $searchio->next_result ),
1325 'Bio::Search::Result::HMMERResult',
1326 'Check for the correct result reference type'
1329 $searchio = Bio::SearchIO->new(
1331 -file => test_input_file('hmmsearch.out'),
1334 is( ref($searchio), 'Bio::SearchIO::hmmer2',
1335 'Check if selecting the correct hmmsearch2 parser using -version works' );
1336 is( ref( $searchio->next_result ),
1337 'Bio::Search::Result::HMMERResult',
1338 'Check for the correct result reference type'
1341 $searchio = Bio::SearchIO->new(
1342 -format => 'hmmer3',
1343 -file => test_input_file('hmmscan.out'),
1346 is( ref($searchio), 'Bio::SearchIO::hmmer3',
1347 'Check if selecting the correct hmmscan parser using -version works' );
1348 is( ref( $searchio->next_result ),
1349 'Bio::Search::Result::HMMERResult',
1350 'Check for the correct result reference type'
1353 $searchio = Bio::SearchIO->new(
1355 -file => test_input_file('hmmsearch3.out'),
1358 is( ref($searchio), 'Bio::SearchIO::hmmer3',
1359 'Check if selecting the correct hmmsearch3 parser using -version works' );
1360 is( ref( $searchio->next_result ),
1361 'Bio::Search::Result::HMMERResult',
1362 'Check for the correct result reference type'
1365 my $cat_command = ($^O =~ m/mswin/i) ? 'type' : 'cat';
1366 my $pipestr = "$cat_command " . test_input_file('hmmpfam.out') . " |";
1367 open( my $pipefh, $pipestr );
1369 $searchio = Bio::SearchIO->new(
1373 is( ref($searchio), 'Bio::SearchIO::hmmer2',
1374 'Check if reading from a pipe works' );
1375 $result = $searchio->next_result;
1377 'Bio::Search::Result::HMMERResult',
1378 'Check for the correct result reference type'
1380 is( $result->num_hits(), 2, 'Check num_hits' );
1384 my $in = Bio::SearchIO->new(
1386 -file => test_input_file('pfamOutput-bug3376.out')
1388 my $result = $in->next_result;
1389 my $hit = $result->next_hit;
1390 my $hsp = $hit->next_hsp;
1391 is( $result->query_length, 97, 'Check query_length' );
1392 is( $hit->length, 95, 'Check hit length' );
1393 is( $hsp->hit_string,
1394 'svfqqqqssksttgstvtAiAiAigYRYRYRAvtWnsGsLssGvnDnDnDqqsdgLYtiYYsvtvpssslpsqtviHHHaHkasstkiiikiePr',
1400 # bug 3421 - making sure a full line of dashes in an HSP is parsed correctly
1402 my $in = Bio::SearchIO->new(
1404 -file => test_input_file('hmmpfam_HSPdashline.txt')
1406 my $result = $in->next_result;
1407 my $hit = $result->next_hit;
1408 my $hsp = $hit->next_hsp;
1409 is( $hsp->length, '561',
1410 'bug3421 - Check if can correctly parse an HSP with line full of dashes'
1417 my $in = Bio::SearchIO->new(
1419 -file => test_input_file('hmmpfam_multiresult.out')
1421 my $result = $in->next_result;
1422 $result = $in->next_result;
1423 my $hit = $result->next_hit;
1424 is( $hit->name, 'IS66_ORF3.uniq', 'bug3302 - Check if can parse multiresult hmmer' );
1428 # HMMER 3.1 nhmmer output
1430 my $in = Bio::SearchIO->new(
1433 -file => test_input_file('nhmmer-3.1.out')
1435 my $result = $in->next_result;
1436 is( $result->algorithm, 'NHMMER', 'Check algorithm' );
1437 is( $result->algorithm_version, '3.1b1', 'Check nhmmer algorithm version' );
1438 is( $result->hmm_name,
1439 '../HMMs/A_HA_H7_CDS_nucleotide.hmm',
1442 is( $result->sequence_file,
1444 'Check sequence_file'
1446 is( $result->query_name, 'A_HA_H7_CDS_nucleotide', 'Check query_name' );
1447 is( $result->query_length, 1683, 'Check query_length' );
1448 is( $result->query_accession, '', 'Check query_accession' );
1449 is( $result->query_description, '', 'Check query_description' );
1450 is( $result->num_hits(), 2, 'Check num_hits' );
1452 my $hit = $result->next_hit;
1453 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
1454 'Check for the correct hit reference type' );
1455 is( $hit->name, 'seq1', 'Check nhmmer hit name' );
1456 is( $hit->description, 'Description of seq1', 'Check nhmmer hit description' );
1457 is( $hit->score, 148.2, 'Check nhmmer hit score' );
1458 is( $hit->bits, 0, 'Check nhmmer hit bits (0)' );
1459 float_is( $hit->significance, 3.2e-48, 'Check nhmmer hit significance' );
1460 is( $hit->num_hsps, 1, 'Check num_hsps' );
1462 # Hit length is usually unknown for HMMSCAN and HMMSEARCH but not for NHMMER.
1463 # When is not known, sometimes it can be deduced from domain data '[]'
1464 is( $hit->length, 151, 'Check nhmmer hit length' );
1465 is( $hit->frac_aligned_query, 0.09 );
1466 is( $hit->frac_aligned_hit, '1.00' );
1468 my $hsp = $hit->next_hsp;
1469 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
1470 'Check for correct hsp reference type' );
1471 is( $hsp->hit->seq_id(), 'seq1', 'Check for nhmmer hit seq_id' );
1472 is( $hsp->query->seq_id(), 'A_HA_H7_CDS_nucleotide', 'Check for nhmmer query seq_id' );
1474 is( $hsp->start('hit'), 1, 'Check nhmmer hsp hit start' );
1475 is( $hsp->end('hit'), 151, 'Check nhmmer hsp hit end' );
1476 is( $hsp->start('query'), 258, 'Check nhmmer hsp query start' );
1477 is( $hsp->end('query'), 411, 'Check nhmmer hsp query end' );
1478 is( $hsp->strand('hit'), 1, 'Check nhmmer hsp hit strand' );
1479 is( $hsp->strand('query'), 1, 'Check nhmmer hsp query strand' );
1480 is( $hsp->score, 148.2, 'Check nhmmer hsp score' );
1481 is( $hsp->bits, 0, 'Check nhmmer hsp bits (0)' );
1482 float_is( $hsp->significance, 3.2e-48, 'Check nhmmer hsp evalue' );
1484 is( $hsp->length('query'), 154, 'Check for hsp query length' );
1485 is( $hsp->length('hit'), 151, 'Check for hsp hit length' );
1486 is( $hsp->length('total'), 154, 'Check for hsp total length' );
1487 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
1488 is( $hsp->gaps('hit'), 3, 'Check for hsp hit gaps' );
1489 is( $hsp->gaps('total'), 3, 'Check for hsp total gaps' );
1491 ($hit->length == 0) ?
1492 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
1493 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
1494 ($result->query_length == 0) ?
1495 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
1496 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
1498 is( $hsp->num_conserved, 151 );
1499 is( $hsp->num_identical, 146 );
1500 is( sprintf( "%.2f", $hsp->percent_identity ), 94.81 );
1501 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.948 );
1502 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.967 );
1503 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.948 );
1504 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.981 );
1505 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), '1.000' );
1506 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.981 );
1508 is( $hsp->consensus_string,
1510 'Check for consensus structure string'
1512 is( $hsp->query_string,
1513 'attcctagaattttcagctgatttaattattgagaggcgagaaggaagtaatgatgtctgttatcctgggaaattcgtaaatgaagaagctctgaggcaaattctcagggggtcaggcggaattgacaaggagacaatgggattcacatatagc',
1514 'Check for nhmmer query string'
1516 is( $hsp->homology_string,
1517 'attcctagaattttcagc+gatttaattattgagaggcgagaaggaagt gatgtctgttatcctgggaaattcgt+aatgaagaagctctgaggcaaattctcaggg+gtcaggcggaattgacaaggagacaatgggattcac+ta+agc',
1518 'Check for nhmmer homology string'
1520 is( $hsp->hit_string,
1521 'ATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGT---GATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACCTACAGC',
1522 'Check for nhmmer hit string'
1524 is( $hsp->posterior_string,
1525 '689*******************************************777...***************************************************************************************************986',
1526 'Check for nhmmer posterior probability string'
1528 is( length( $hsp->homology_string ),
1529 length( $hsp->hit_string ),
1530 'Check if nhmmer homology string and hit string have an equal length'
1532 is( length( $hsp->query_string ),
1533 length( $hsp->homology_string ),
1534 'Check if nhmmer query string and homology string have an equal length'
1537 $hit = $result->next_hit;
1538 is( $hit->name, 'seq2', 'Check nhmmer hit name' );
1539 is( $hit->description, 'Description of seq2', 'Check nhmmer hit description' );
1540 is( $hit->score, 38.6, 'Check nhmmer hit score' );
1541 is( $hit->bits, 0, 'Check nhmmer hit bits (0)' );
1542 float_is( $hit->significance, 3.9e-15, 'Check nhmmer hit significance' );
1543 is( $hit->length, 60, 'Check nhmmer hit length' );
1545 $hsp = $hit->next_hsp;
1546 is( $hsp->hit->seq_id(), 'seq2', 'Check for nhmmer hit seq_id' );
1547 is( $hsp->query->seq_id(), 'A_HA_H7_CDS_nucleotide', 'Check for nhmmer query seq_id' );
1549 is( $hsp->start('query'), 34, 'Check nhmmer hsp query start' );
1550 is( $hsp->end('query'), 92, 'Check nhmmer hsp query end' );
1551 is( $hsp->start('hit'), 1, 'Check nhmmer hsp hit start' );
1552 is( $hsp->end('hit'), 59, 'Check nhmmer hsp hit end' );
1553 is( $hsp->strand('hit'), -1, 'Check nhmmer hsp hit strand' );
1554 is( $hsp->strand('query'), 1, 'Check nhmmer hsp query strand' );
1555 is( $hsp->score, 38.6, 'Check nhmmer hsp score' );
1556 is( $hsp->bits, 0, 'Check nhmmer hsp bits (0)' );
1557 float_is( $hsp->significance, 3.9e-15, 'Check nhmmer hsp evalue' );
1559 is( $hsp->length('query'), 59, 'Check for hsp query length' );
1560 is( $hsp->length('hit'), 59, 'Check for hsp hit length' );
1561 is( $hsp->length('total'), 59, 'Check for hsp total length' );
1562 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
1563 is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' );
1564 is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' );
1566 ($hit->length == 0) ?
1567 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
1568 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
1569 ($result->query_length == 0) ?
1570 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
1571 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
1573 is (length($hsp->homology_string), length($hsp->query_string));
1575 is( $hsp->consensus_string,
1577 'Check for consensus structure string'
1579 is( $hsp->query_string,
1580 'gtgatgattgcaacaaatgcagacaaaatctgccttgggcaccatgctgtgtcaaacgg',
1581 'Check for nhmmer query string'
1583 is( $hsp->homology_string,
1584 'g+gat+att+c+acaaatgcagacaa atctgccttgggca+catgc+gtgtcaaacgg',
1585 'Check for nhmmer homology string'
1587 is( $hsp->hit_string,
1588 'GCGATCATTCCGACAAATGCAGACAAGATCTGCCTTGGGCATCATGCCGTGTCAAACGG',
1589 'Check for nhmmer hit string'
1591 is( $hsp->posterior_string,
1592 '6899****************************************************986',
1593 'Check for nhmmer posterior probability string' );
1594 is( length( $hsp->homology_string ),
1595 length( $hsp->hit_string ),
1596 'Check if nhmmer homology string and hit string have an equal length'
1598 is( length( $hsp->query_string ),
1599 length( $hsp->homology_string ),
1600 'Check if nhmmer query string and homology string have an equal length'
1603 # end HMMER 3.1 nhmmer output
1605 # Test HIT filtering by SIGNIFICANCE
1606 $searchio = Bio::SearchIO->new(
1607 '-format' => 'hmmer',
1608 '-file' => test_input_file('hmmpfam_cs.out'),
1611 # NOTE: For Hmmer2, if a single model pass the HIT filter
1612 # but it shows 2 domains, it counts as 2 hits (Glu_synthase)
1613 my @valid = qw( GATase_2
1618 $result = $searchio->next_result;
1619 is( $result->num_hits(), 5, 'Check Significance filtered num_hits' );
1620 while ( my $hit = $result->next_hit ) {
1621 is( $hit->name, shift @valid, 'Check Significance filtered hit ID' );
1625 # Test HIT filtering by SCORE
1626 $searchio = Bio::SearchIO->new(
1627 '-format' => 'hmmer',
1628 '-file' => test_input_file('hmmsearch.out'),
1631 # NOTE: This Hmmer2 report top hit (score 393.8) have 4 domains,
1632 # so it count as 4 hits (PAB2_ARATH)
1633 @valid = qw( PAB2_ARATH
1637 $result = $searchio->next_result;
1638 is( $result->num_hits(), 4, 'Check Score filtered num_hits' );
1639 while ( my $hit = $result->next_hit ) {
1640 is( $hit->name, shift @valid, 'Check Score filtered hit ID' );
1644 # Test HIT filtering by BITS
1645 $searchio = Bio::SearchIO->new(
1646 '-format' => 'hmmer',
1647 '-file' => test_input_file('hmmsearch3_multi.out'),
1650 # NOTE: No HMMER report use Bits, so this will filter out everything
1651 $result = $searchio->next_result;
1652 is( $result->num_hits(), 0, 'Check Bits filtered num_hits' );
1653 $result = $searchio->next_result;
1654 is( $result->num_hits(), 0, 'Check Bits filtered num_hits' );
1655 $result = $searchio->next_result;
1656 is( $result->num_hits(), 0, 'Check Bits filtered num_hits' );
1658 # Test HIT filtering by HIT_FILTER
1659 my $filt_func = sub {
1661 $hit->frac_aligned_query >= 0.20;
1663 $searchio = Bio::SearchIO->new(
1664 '-format' => 'hmmer',
1665 '-file' => test_input_file('hmmscan_multi_domain.out'),
1666 '-hit_filter' => $filt_func
1668 # NOTE: In Hmmer3 reports, the multiple domains of a model are treated
1669 # as HSPs instead of Hits (like it is in Hmmer2 reports)
1671 $result = $searchio->next_result;
1672 is( $result->num_hits(), 1, 'Check Hit_filter filtered num_hits' );
1673 while ( my $hit = $result->next_hit ) {
1674 is( $hit->name, shift @valid, 'Check Hit_filter filtered hits ID' );
1678 # Test for correct parsing of results from query sequences containing stops.
1679 # Without the patch, parsing dies with "Quantifier follows nothing in regex;" error
1680 $searchio = Bio::SearchIO->new(
1681 '-format' => 'hmmer',
1682 '-file' => test_input_file('hmmscan_qry_stop.txt'),
1684 eval { $searchio->next_result; };
1685 is( $@, '', 'Correct parsing of alignments with stops' );
1688 # Test for correct parsing of phmmer results
1689 # Without the patch, parsing skips all lines from phmmer output
1691 my $searchio = Bio::SearchIO->new(
1693 -file => test_input_file('phmmer.out')
1696 my $result = $searchio->next_result;
1697 if ( defined $result ) {
1699 is( $result->algorithm, 'PHMMER', 'Check algorithm' );
1700 is( $result->query_name, 'A0R3R7', 'Check query_name' );
1701 is( $result->query_length, 762, 'Check query_length absence' );
1702 is( $result->query_description, '', 'Check query_description' );
1703 is( $result->num_hits(), 8, 'Check num_hits' );
1705 my $hit = $result->next_model;
1706 if ( defined $hit ) {
1707 is( $hit->name, 'cath|4_0_0|1vs0A03/639-759', 'query name okay' );
1708 is( $hit->num_hsps(), 1, 'Check num_hsps' );