1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id: SearchIO_infernal.t 14672 2008-04-22 21:42:50Z cjfields $
11 test_begin(-tests => 496);
13 use_ok('Bio::SearchIO');
16 my ($in, $result, $iter, $hit, $hsp, $algorithm, $meta);
18 ### Infernal v. 1.1 ###
20 # one query vs one database sequence report
21 $in = Bio::SearchIO->new(
22 -format => 'infernal',
23 -file => test_input_file('cmsearch_output.txt')
25 $result = $in->next_result;
26 isa_ok($result, 'Bio::Search::Result::ResultI');
27 is( ref($result), 'Bio::Search::Result::INFERNALResult', 'Check for the correct Result reference type');
28 is( $result->algorithm, 'CMSEARCH', 'Check algorithm' );
29 is( $result->algorithm_version, '1.1.1', 'Check cmsearch algorithm version' );
30 is( $result->cm_name, 'RF00174.cm', 'Check cm_name');
31 is( $result->database_name, 'NC_000913.fna','Check database_name' );
32 is( $result->database_entries, 1, 'Check database_entries' );
33 is( $result->database_letters, 9283304, 'Check database_letters' );
34 is( $result->query_name, 'Cobalamin', 'Check query_name' );
35 is( $result->query_length, 191, 'Check query_length' );
36 is( $result->query_accession, 'RF00174', 'Check query_accession' );
37 is( $result->query_description, '', 'Check query_description' );
38 is( $result->num_hits(), 2, 'Check num_hits' );
42 $hit = $result->next_hit;
43 is( ref($hit), 'Bio::Search::Hit::ModelHit', 'Check for the correct hit reference type' );
44 is( $hit->algorithm, 'CMSEARCH', "Hit algorithm");
45 is( $hit->name, 'gi|556503834|ref|NC_000913.3|', 'Check hit name' );
46 is( $hit->description, 'Escherichia coli str. K-12 substr. MG1655, complete genome', 'Check hit description' );
47 is( $hit->length, 0, 'Check hit length' );
48 is( $hit->score, 98.2, 'Check hit score' );
49 is( $hit->bits, 98.2, 'Check hit bits' );
50 is( $hit->num_hsps, 1, 'Check number of HSPs' );
51 float_is( $hit->significance, 8.7e-16, 'Check hit significance' );
52 is($hit->rank, 1, 'Check hit rank' );
54 $hsp = $hit->next_hsp;
55 is( ref($hsp), 'Bio::Search::HSP::ModelHSP', 'Check for correct hsp reference type' );
56 isa_ok( $hsp, 'Bio::Search::HSP::HSPI' );
57 isa_ok( $hsp->get_aln, 'Bio::Align::AlignI' );
58 isa_ok( $hsp->hit, 'Bio::SeqFeature::Similarity', "Check for hsp hit isa seqfeature similarity" );
60 is( $hsp->hit->seq_id(), 'gi|556503834|ref|NC_000913.3|', 'Check for HSP hit seq_id' );
61 is( $hsp->query->seq_id(), 'Cobalamin', 'Check for HSP query seq_id' );
62 is( $hsp->start('query'), 1, 'Check hsp query start' );
63 is( $hsp->end('query'), 191, 'Check hsp query end' );
64 is( $hsp->start('hit'), 4163384, 'Check hsp hit start' );
65 is( $hsp->end('hit'), 4163574, 'Check hsp hit end' );
66 is( $hsp->score, 98.2, 'Check hsp score' );
67 is( $hsp->bits, 98.2, 'Check hsp bits' );
68 float_is( $hsp->significance, 8.7e-16, 'Check hsp evalue' );
70 is( $hsp->length('query'), 191, 'Check for hsp query length' );
71 is( $hsp->length('hit'), 191, 'Check for hsp hit length' );
72 is( $hsp->length, 207, 'Check for hsp total length' );
73 is( $hsp->gaps('query'), 16, 'Check for hsp query gaps' );
74 is( $hsp->gaps('hit'), 16, 'Check for hsp hit gaps' );
75 is( $hsp->gaps, 32, 'Check for hsp total gaps' );
76 is( $hsp->strand('hit'), 1, 'Check hsp hit strand' );
80 $hit = $result->next_hit;
81 is( $hit->name, 'gi|556503834|ref|NC_000913.3|', 'Check hit name' );
82 is( $hit->description, 'Escherichia coli str. K-12 substr. MG1655, complete genome','Check hit description' );
83 is( $hit->score, 8.4, 'Check hit score' );
84 is( $hit->raw_score, 8.4, "Check hit raw_score");
85 is( $hit->bits, 8.4, 'Check hit bits' );
86 float_is( $hit->significance, 0.63, 'Check hit significance' );
87 is( $hit->length, 0, 'Check hit length' );
88 is($hit->rank, 2, "Hit rank");
90 $hsp = $hit->next_hsp;
91 is( $hsp->hit->seq_id(), 'gi|556503834|ref|NC_000913.3|', 'Check for hit seq_id' );
92 is( $hsp->query->seq_id(), 'Cobalamin', 'Check for query seq_id' );
93 is( $hsp->start('query'), 1, 'Check hsp query start' );
94 is( $hsp->end('query'), 191, 'Check hsp query end' );
95 is( $hsp->start('hit'), 4593356, 'Check hsp hit start' );
96 is( $hsp->end('hit'), 4593565, 'Check hsp hit end' );
97 is( $hsp->score, 8.4, 'Check hsp score' );
98 is( $hsp->bits, 8.4, 'Check hsp bits' );
99 float_is( $hsp->significance, 0.63, 'Check hsp evalue' );
101 is( $hsp->gaps('query'), 67, 'Check for hsp query gaps' );
102 is( $hsp->gaps('hit'), 48, 'Check for hsp hit gaps' );
103 is( $hsp->gaps, 115, 'Check for hsp total gaps' );
104 is( $hsp->strand('hit'), 1, 'Check hsp hit strand' );
106 is( $hsp->noncanonical_string,
107 ' v v v v v v v vvvvvv vvv vvv vvv vvvvvvvvv v v v ',
108 'Check for NC string');
110 ':::::::::::::::[[[[[[,<<<____________>>>,,,,,(((,,,<<<<<_______>>>>>,,<<<____>>>,<<<---<<<<.------<<<<<<-----<<<-<<<<<<_____............................._>>>>>>--->>>>>>>>>----------....................................>>>>----.>>>,,,,)))]]]]]]:::::::::::::::',
111 'Check for CS string');
112 is( $hsp->query_string,
113 'uuaaauugaaacgaugauGGUuccccuuuaaagugaaggguuAAaaGGGAAcccGGUGaaAaUCCgggGCuGcCCCCgCaACuGUAAgcGg.agagcaccccccAauAaGCCACUggcccgcaa.............................gggccGGGAAGGCggggggaaggaaugac....................................cCgcgAGc.CaGGAGACCuGCCaucaguuuuugaaucucc',
114 'Check for query string');
115 is( $hsp->homology_string,
116 ' A AUU+A+++ :UGG :C +U ++ G G: +AA : GGAA: G C :+ GCCCCCGC +C GU+A :: GCA ++ ++ A GCCA G+C G :: +AG+ C GGA AC : CCA: + + + + AU ',
117 'Check for homology string');
118 is( $hsp->hit_string,
119 'GGAGAUUAAUCUUUACGUGGG-UCGUUGAUCGG---CUGACGAACCAGGAAGAUGU-------ACGCCAGUGCCCCCGCUGCGGUGACGCAa-CCGCAGAUGAUUAGU-GCCA---GACGG---aaugagugggugguaucaacaauaaaacc-----------------------------aguaaugaucggcgcaaaagaggcgcagaugaagcuGGCAAAGUuCUGGAUACUGCCCACCGACGCAGUCAUGCGA',
120 'Check for hit string');
121 is( $hsp->posterior_string,
122 '*********************.88877554444...5777779*********9996.......7999********************88873.333333333333333.4544...33333...44566655444444444444444444444.............................566666666666666666666666677777777776788899966*******************************',
123 'Check for posterior probability string');
125 isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
126 isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
127 ($meta) = $hsp->feature1->get_tag_values('meta');
128 is($meta, ':::::::::::::::[[[[[[,<<<____________>>>,,,,,(((,,,<<<<<_______>>>>>,,<<<____>>>,<<<---<<<<.------<<<<<<-----<<<-<<<<<<_____............................._>>>>>>--->>>>>>>>>----------....................................>>>>----.>>>,,,,)))]]]]]]:::::::::::::::', "Check hsp feature1 get_tag_values");
129 ($meta) = $hsp->feature2->get_tag_values('meta');
130 is($meta, ':::::::::::::::[[[[[[,<<<____________>>>,,,,,(((,,,<<<<<_______>>>>>,,<<<____>>>,<<<---<<<<.------<<<<<<-----<<<-<<<<<<_____............................._>>>>>>--->>>>>>>>>----------....................................>>>>----.>>>,,,,)))]]]]]]:::::::::::::::', "Check hsp feature2 get_tag_values");
132 $result = $in->next_result;
133 is( $result, undef, 'Check for undefined result' );
136 # multi query vs multi sequence database report
137 $in = Bio::SearchIO->new(
138 -format => 'infernal',
139 -file => test_input_file('cmsearch.multi.out')
142 $result = $in->next_result;
143 is( $result->num_hits, 12, 'Check result num_hits - multi report');
144 is( $result->query_name, 'tRNA5', 'Check result query_name - multi report');
145 $hit = $result->next_hit;
146 is( $hit->length, 72, 'Check hit length - multi report' );
149 $result = $in->next_result;
150 is( $result->num_hits, 1, 'Check result#2 num_hits - multi report');
151 is( $result->query_name, 'Cobalamin', 'Check result#2 query_name - multi report');
152 $hit = $result->next_hit;
153 is( $hit->length, 0, 'Check result#2 hit length - multi report' );
154 $hsp = $hit->next_hsp;
155 is( $hsp->strand('hit'), -1, 'Check result#2 hsp hit strand - multi report');
158 # report with no hits
159 $in = Bio::SearchIO->new(
160 -format => 'infernal',
161 -file => test_input_file('cmsearch.nohit.out')
163 $result = $in->next_result;
164 is( $result->cm_name, 'Cobalamin.c.cm', 'Check cm_name' );
165 $hit = $result->next_hit;
166 is( $hit, undef, 'Check for undefined hit' );
171 ### Infernal v. 1.0 ####
173 my $searchio = Bio::SearchIO->new( -format => 'infernal',
174 -file => test_input_file('test2.infernal'),
175 -model => 'tRNAtest',
176 -query_acc => 'RF01234',
177 -query_desc => 'tRNA',
180 $result = $searchio->next_result;
181 isa_ok($result, 'Bio::Search::Result::ResultI');
182 is($result->algorithm, 'CMSEARCH', "Result");
183 is($result->algorithm_reference, undef, "Result reference");
184 is($result->algorithm_version, '1.0', "Result version");
185 is($result->available_parameters, 0, "Result parameters");
186 is($result->available_statistics, 0, "Result statistics");
187 is($result->database_entries, '', "Result entries");
188 is($result->database_letters, 600000, "Result letters");
189 is($result->database_name, 'tosearch.300Kb.db',
190 "Result database_name");
191 is($result->num_hits, 1, "Result num_hits");
192 is($result->program_reference, undef, "Result program_reference");
193 is($result->query_accession, 'RF01234', "Result query_accession");
194 is($result->query_description, 'tRNA', "Result query_description");
195 is($result->query_length, 72, "Result query_length");
196 is($result->query_name, 'trna.5-1', "Result query_name");
198 $hit = $result->next_hit;
200 isa_ok($hit, 'Bio::Search::Hit::HitI');
201 is($hit->ncbi_gi, '', "Hit GI");
202 is($hit->accession, 'example', "Hit accession");
203 is($hit->algorithm, 'CMSEARCH', "Hit algorithm");
204 is($hit->bits, '78.06', "Hit bits");
205 is($hit->description, '', "Hit description"); # no hit descs yet
206 is($hit->locus, '', "Hit locus");
207 is($hit->n, 3, "Hit n");
208 is($hit->name, 'example', "Hit name");
209 is($hit->num_hsps, 3, "Hit num_hsps");
211 # These Bio::Search::Hit::HitI methods are currently unimplemented in
212 # Bio::Search::Hit::ModelHit; they may be integrated over time but will require
213 # some reconfiguring for Model-based searches
215 # these need to be replaced by dies_ok() or warnings_like()
216 warning_like { $hit->length_aln() }
217 qr'length_aln not implemented for Model-based searches',
218 "Hit length_aln() not implemented";
219 warning_like {$hit->num_unaligned_hit}
220 qr'num_unaligned_hit/num_unaligned_sbjct not implemented for Model-based searches',
221 "Hit num_unaligned_hit() not implemented";
222 warning_like {$hit->num_unaligned_query}
223 qr'num_unaligned_query not implemented for Model-based searches',
224 "Hit num_unaligned_query() not implemented";
225 warning_like {$hit->num_unaligned_sbjct}
226 qr'num_unaligned_hit/num_unaligned_sbjct not implemented for Model-based searches',
227 "Hit num_unaligned_sbjct() not implemented";
228 warning_like {$hit->start}
229 qr'start not implemented for Model-based searches',
230 'Hit start not implemented';
231 warning_like {$hit->end}
232 qr'end not implemented for Model-based searches',
233 'Hit end not implemented';
234 warning_like {$hit->strand}
235 qr'strand not implemented for Model-based searches',
236 'Hit strand not implemented';
237 warning_like {$hit->logical_length}
238 qr'logical_length not implemented for Model-based searches',
239 'Hit logical_length not implemented';
240 warning_like {$hit->frac_aligned_hit}
241 qr'frac_aligned_hit not implemented for Model-based searches',
242 'Hit frac_aligned_hit not implemented';
243 warning_like {$hit->frac_aligned_query}
244 qr'frac_aligned_query not implemented for Model-based searches',
245 'Hit frac_aligned_query not implemented';
246 warning_like {$hit->frac_conserved}
247 qr'frac_conserved not implemented for Model-based searches',
248 'Hit frac_conserved not implemented';
249 warning_like {$hit->frac_identical}
250 qr'frac_identical not implemented for Model-based searches',
251 'Hit frac_identical not implemented';
252 warning_like {$hit->matches}
253 qr'matches not implemented for Model-based searches',
254 'Hit matches not implemented';
255 warning_like {$hit->gaps}
256 qr'gaps not implemented for Model-based searches',
257 'Hit gaps not implemented';
258 warning_like {$hit->frame}
259 qr'frame not implemented for Model-based searches',
260 'Hit frame not implemented';
261 warning_like {$hit->range}
262 qr'range not implemented for Model-based searches',
263 'Hit range not implemented';
264 warning_like {$hit->seq_inds}
265 qr'seq_inds not implemented for Model-based searches',
266 'Hit seq_inds not implemented';
268 is($hit->length, 0, "Hit length");
269 is($hit->overlap, 0, "Hit overlap");
270 is($hit->query_length, 72, "Hit query_length");
271 is($hit->rank, 1, "Hit rank");
272 is($hit->raw_score, '78.06', "Hit raw_score");
273 is($hit->score, '78.06', "Hit score");
274 float_is($hit->p, '2.906e-26', "Hit p");
275 float_is($hit->significance, '3.133e-21');
277 $hsp = $hit->next_hsp;
278 isa_ok($hsp, 'Bio::Search::HSP::HSPI');
279 is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
280 float_is($hsp->evalue, '3.133e-21');
281 isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
282 isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
283 ($meta) = $hsp->feature1->get_tag_values('meta');
284 is($meta, '(((((((,,<<<<___.____>>>>,<<<<<_______>>>>>,,,,,<<<<<_______>>>>>))))))):');
285 ($meta) = $hsp->feature2->get_tag_values('meta');
286 is($meta, '(((((((,,<<<<___.____>>>>,<<<<<_______>>>>>,,,,,<<<<<_______>>>>>))))))):');
288 is($hsp->frame('query'), 0, "HSP frame");
289 is($hsp->gaps, 1, "HSP gaps");
290 is($hit->length, 0, "Hit length");
291 isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
292 isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
294 'GCGGAUUUAGCUCAGUuGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCA',
296 is($hsp->homology_string,
297 'GC::A::UAGC:CAGU GG AG:GCGCCAG:CUG+++A:CUGGAGGUCC:G:GUUCGAU C:C:G::U::GCA',
298 "HSP homology_string");
299 is($hsp->hsp_group, undef, "HSP hsp_group");
300 is($hsp->hsp_length, 73, "HSP hsp_length");
301 is($hsp->length, 73, "HSP length");
302 is($hsp->links, undef, "HSP links");
303 is($hsp->n, 1, "HSP n");
304 float_is($hsp->pvalue, 2.906e-26, "HSP pvalue");
305 isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
306 is($hsp->query_string,
307 'gCcgacAUaGcgcAgU.GGuAgcgCgccagccUgucAagcuggAGgUCCgggGUUCGAUuCcccGUgucgGca',
309 is($hsp->range, 72, "HSP range");
310 is($hsp->rank, 1, "HSP rank");
311 float_is($hsp->significance, 3.133e-21);
312 is($hsp->end, 72, "HSP end");
313 float_is($hsp->expect, '3.133e-21', "HSP expect");
315 # These Bio::Search::HSP::HSPI methods are currently unimplemented in
316 # Bio::Search::HSP::ModelHSP; they may be integrated over time but will require
317 # some reconfiguring for Model-based searches
319 warning_like {$hsp->seq_inds}
320 qr'seq_inds not implemented for Model-based searches',
321 'HSP seq_inds not implemented';
322 warning_like {$hsp->matches}
323 qr'matches not implemented for Model-based searches',
324 'HSP matches not implemented';
325 warning_like {$hsp->frac_conserved}
326 qr'frac_conserved not implemented for Model-based searches',
327 'HSP frac_conserved not implemented';
328 warning_like {$hsp->frac_identical}
329 qr'frac_identical not implemented for Model-based searches',
330 'HSP frac_identical not implemented';
331 warning_like {$hsp->num_conserved}
332 qr'num_conserved not implemented for Model-based searches',
333 'HSP num_conserved not implemented';
334 warning_like {$hsp->num_identical}
335 qr'num_identical not implemented for Model-based searches',
336 'HSP num_identical not implemented';
337 warning_like {$hsp->percent_identity}
338 qr'percent_identity not implemented for Model-based searches',
339 'HSP percent_identity not implemented';
340 warning_like {$hsp->cigar_string}
341 qr'cigar_string not implemented for Model-based searches',
342 'HSP cigar_string not implemented';
343 warning_like {$hsp->generate_cigar_string}
344 qr'generate_cigar_string not implemented for Model-based searches',
345 'HSP cigar_string not implemented';
347 isa_ok($hsp->seq, 'Bio::LocatableSeq');
349 'gCcgacAUaGcgcAgU.GGuAgcgCgccagccUgucAagcuggAGgUCCgggGUUCGAUuCcccGUgucgGca',
351 is($hsp->start, 1, "HSP start");
352 is($hsp->custom_score, undef, "HSP custom_score");
354 '(((((((,,<<<<___.____>>>>,<<<<<_______>>>>>,,,,,<<<<<_______>>>>>))))))):',
356 is($hsp->strand('hit'), 1, "HSP strand");
358 $hsp = $hit->next_hsp;
359 isa_ok($hsp, 'Bio::Search::HSP::HSPI');
360 is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
361 float_is($hsp->evalue, 0.6752);
362 isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
363 isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
364 is($hsp->frame('query'), 0, "HSP frame");
365 is($hsp->gaps, 4, "HSP gaps");
366 # infernal can return alignment data
367 isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
368 isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
370 'UCUGCUAUGGCGUAAUGGCCACGCGC----CCAUCAACAAAGAUAUC*[19]*UAACAGGA',
372 is($hsp->homology_string,
373 ' C:G :AU+GCG:A+UGG :CGCGC C UCAA +++GA +UC U: C:G A',
374 "HSP homology_string");
375 is($hsp->hsp_group, undef, "HSP hsp_group");
376 is($hsp->hsp_length, 73, "HSP hsp_length");
377 is($hsp->length, 73, "HSP length");
378 is($hsp->links, undef, "HSP links");
379 is($hsp->n, 1, "HSP n");
380 float_is($hsp->pvalue, 6.263e-06, "HSP pvalue");
381 isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
382 is($hsp->query_string,
383 'gCcgacAUaGcgcAgUGGuAgcgCgccagccUgucAagcuggAGgUC*[17]*UgucgGca',
385 is($hsp->range, 72, "HSP range");
386 is($hsp->rank, 2, "HSP rank");
387 float_is($hsp->significance, 0.6752);
388 is($hsp->end, 72, "HSP end");
389 float_is($hsp->expect, 0.6752, "HSP expect");
390 isa_ok($hsp->seq, 'Bio::LocatableSeq');
391 # this should probably default to the hit string
393 'gCcgacAUaGcgcAgUGGuAgcgCgccagccUgucAagcuggAGgUC*[17]*UgucgGca',
395 is($hsp->start, 1, "HSP start");
396 is($hsp->custom_score, undef, "HSP custom_score");
398 '(((((((,,<<<<_______>>>>,<<<<<_______>>>>>,,,,,~~~~~~))))))):',
400 is($hsp->strand('hit'), 1, "HSP strand");
402 ### Infernal pre-v. 1.0 ####
404 $searchio = Bio::SearchIO->new( -format => 'infernal',
405 -file => test_input_file('test.infernal'),
406 # version is reset to the correct one by parser
409 -query_acc => 'RF00167',
410 -query_desc => 'Purine riboswitch',
411 -database => 'b_sub.fas',
416 $result = $searchio->next_result;
417 isa_ok($result, 'Bio::Search::Result::ResultI');
418 $algorithm = $result->algorithm;
419 is($result->algorithm, 'CMSEARCH', "Result $algorithm");
420 is($result->algorithm_reference, undef, "Result $algorithm reference");
421 is($result->algorithm_version, 0.7, "Result $algorithm version");
422 is($result->available_parameters, 0, "Result parameters");
423 is($result->available_statistics, 0, "Result statistics");
424 is($result->database_entries, '', "Result entries");
425 is($result->database_letters, '', "Result letters");
426 is($result->database_name, 'b_sub.fas', "Result database_name");
427 is($result->num_hits, 2, "Result num_hits");
428 is($result->program_reference, undef, "Result program_reference");
429 is($result->query_accession, 'RF00167', "Result query_accession");
430 is($result->query_description, 'Purine riboswitch', "Result query_description");
431 is($result->query_length, 102, "Result query_length");
432 is($result->query_name, 'Purine', "Result query_name");
434 $hit = $result->next_hit;
436 isa_ok($hit, 'Bio::Search::Hit::HitI');
437 is($hit->ncbi_gi, '2239287', "Hit GI");
438 is($hit->accession, 'U51115.1', "Hit accession");
439 is($hit->algorithm, 'CMSEARCH', "Hit algorithm");
440 is($hit->bits, 81.29, "Hit bits");
441 is($hit->description, '', "Hit description"); # no hit descs yet
442 is($hit->locus, 'BSU51115', "Hit locus");
443 is($hit->n, 2, "Hit n");
444 is($hit->name, 'gi|2239287|gb|U51115.1|BSU51115', "Hit name");
445 is($hit->num_hsps, 2, "Hit num_hsps");
447 # p() works but there are no evalues yet for Infernal output, so catch and check...
448 warning_like {$hit->p}
449 qr'P-value not defined. Using significance\(\) instead',
452 is($hit->length, 0, "Hit length");
453 is($hit->overlap, 0, "Hit overlap");
454 is($hit->query_length, 102, "Hit query_length");
455 is($hit->rank, 1, "Hit rank");
456 is($hit->raw_score, 81.29, "Hit raw_score");
457 is($hit->score, 81.29, "Hit score");
458 float_is($hit->significance, undef);
460 $hsp = $hit->next_hsp;
461 isa_ok($hsp, 'Bio::Search::HSP::HSPI');
462 is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
463 float_is($hsp->evalue, undef);
464 isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
465 isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
466 ($meta) = $hsp->feature1->get_tag_values('meta');
467 is($meta, ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,)))).))))::::::::::::::');
468 ($meta) = $hsp->feature2->get_tag_values('meta');
469 is($meta, ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,)))).))))::::::::::::::');
471 is($hsp->frame('query'), 0, "HSP frame");
472 is($hsp->gaps, 1, "HSP gaps");
473 is($hit->length, 0, "Hit length");
474 isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
475 isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
477 'CAUGAAAUCAAAACACGACCUCAUAUAAUCUUGGGAAUAUGGCCCAUAAGUUUCUACCCGGCAACCGUAAAUUGCCGGACUAUGcAGGGAAGUGAUCGAUAAA',
479 is($hsp->homology_string,
480 ' A+ A+A+ AAAA A :CUC:UAUAAU: :GGGAAUAUGGCCC: :AGUUUCUACC:GGCAACCGUAAAUUGCC:GACUA:G AG: AA + ++ +++++',
481 "HSP homology_string");
482 is($hsp->hsp_group, undef, "HSP hsp_group");
483 is($hsp->hsp_length, 103, "HSP hsp_length");
484 is($hsp->length, 103, "HSP length");
485 is($hsp->links, undef, "HSP links");
486 is($hsp->n, 1, "HSP n");
487 float_is($hsp->pvalue, undef, "HSP pvalue");
488 isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
489 is($hsp->query_string,
490 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcG.aGuaAauauuaaauauuu',
492 is($hsp->range, 102, "HSP range");
493 is($hsp->rank, 1, "HSP rank");
494 float_is($hsp->significance, undef);
495 is($hsp->end, 102, "HSP end");
496 float_is($hsp->expect, undef, "HSP expect");
498 isa_ok($hsp->seq, 'Bio::LocatableSeq');
500 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcG.aGuaAauauuaaauauuu',
502 is($hsp->start, 1, "HSP start");
503 is($hsp->custom_score, undef, "HSP custom_score");
505 ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,)))).))))::::::::::::::',
507 is($hsp->strand('hit'), 1, "HSP strand");
509 $hsp = $hit->next_hsp;
510 isa_ok($hsp, 'Bio::Search::HSP::HSPI');
511 is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
512 float_is($hsp->evalue, undef);
513 isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
514 isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
515 is($hsp->frame('query'), 0, "HSP frame");
516 is($hsp->gaps, 0, "HSP gaps");
517 # infernal can return alignment data
518 isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
519 isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
521 'AGAAAUCAAAUAAGAUGAAUUCGUAUAAUCGCGGGAAUAUGGCUCGCAAGUCUCUACCAAGCUACCGUAAAUGGCUUGACUACGUAAACAUUUCUUUCGUUU',
523 is($hsp->homology_string,
524 'A AAAU AAA+AA A+ : CGUAUAAU::CG:GAAUAUGGC:CG::AGU UCUACCA:GC ACCGUAAAU GC:UGACUACG : AU+U +++ UUU',
525 "HSP homology_string");
526 is($hsp->hsp_group, undef, "HSP hsp_group");
527 is($hsp->hsp_length, 103, "HSP hsp_length");
528 is($hsp->length, 103, "HSP length");
529 is($hsp->links, undef, "HSP links");
530 is($hsp->n, 1, "HSP n");
531 float_is($hsp->pvalue, undef, "HSP pvalue");
532 isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
533 is($hsp->query_string,
534 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
536 is($hsp->range, 102, "HSP range");
537 is($hsp->rank, 2, "HSP rank");
538 float_is($hsp->significance, undef);
539 is($hsp->end, 102, "HSP end");
540 float_is($hsp->expect, undef, "HSP expect");
541 #is($hsp->matches, 2, "HSP matches");
542 isa_ok($hsp->seq, 'Bio::LocatableSeq');
543 # this should probably default to the hit string
545 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
547 is($hsp->start, 1, "HSP start");
548 is($hsp->custom_score, undef, "HSP custom_score");
550 ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::',
552 is($hsp->strand('hit'), 1, "HSP strand");
556 $hit = $result->next_hit;
557 isa_ok($hit, 'Bio::Search::Hit::HitI');
558 is($hit->accession, 'X83878.1', "Hit accession");
559 is($hit->ncbi_gi, '633168', "Hit GI");
560 is($hit->algorithm, 'CMSEARCH', "Hit algorithm");
561 is($hit->bits, 79.36, "Hit bits");
562 is($hit->description, '', "Hit description"); # no hit descs yet
563 is($hit->length, 0, "Hit length");
564 is($hit->locus, '', "Hit locus");
565 is($hit->n, 1, "Hit n");
566 is($hit->name, 'gi|633168|emb|X83878.1|', "Hit name");
567 is($hit->num_hsps, 1, "Hit num_hsps");
568 is($hit->overlap, 0, "Hit overlap");
569 is($hit->query_length, 102, "Hit query_length");
570 is($hit->rank, 2, "Hit rank");
571 is($hit->raw_score, 79.36, "Hit raw_score");
572 is($hit->score, 79.36, "Hit score");
573 float_is($hit->significance, undef);
577 $hsp = $hit->next_hsp;
578 isa_ok($hsp, 'Bio::Search::HSP::HSPI');
579 is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
580 float_is($hsp->evalue, undef);
581 isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
582 isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
583 is($hsp->frame('query'), 0, "HSP frame");
584 is($hsp->gaps, 2, "HSP gaps");
585 isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
586 isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
588 'UUACAAUAUAAUAGGAACACUCAUAUAAUCGCGUGGAUAUGGCACGCAAGUUUCUACCGGGCA-CCGUAAA-UGUCCGACUAUGGGUGAGCAAUGGAACCGC',
590 is($hsp->homology_string,
591 '+ A A++A AA A AA:AC+C:UAUAAU::CG:G AUAUGGC:CG::AGUUUCUACC:G CA CCGUAAA UG C:GACUA:G+GU:A A+U A+ ',
592 "HSP homology_string");
593 is($hsp->hsp_group, undef, "HSP hsp_group");
594 is($hsp->hsp_length, 103, "HSP hsp_length");
595 is($hsp->length, 103, "HSP length");
596 is($hsp->links, undef, "HSP links");
597 is($hsp->n, 1, "HSP n");
598 isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
599 is($hsp->query_string,
600 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
602 is($hsp->range, 102, "HSP range");
603 is($hsp->rank, 1, "HSP rank");
604 float_is($hsp->significance, undef);
605 is($hsp->end, 102, "HSP end");
606 float_is($hsp->expect, undef, "HSP expect");
607 isa_ok($hsp->seq, 'Bio::LocatableSeq');
609 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
611 is($hsp->start, 1, "HSP start");
612 is($hsp->custom_score, undef, "HSP custom_score");
614 ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::',
616 is($hsp->strand('hit'), 1, "HSP strand");
621 'single-strand' => ':',
624 # may add more for quartets, triplets
627 $searchio = Bio::SearchIO->new( -format => 'infernal',
628 -file => test_input_file('test.infernal'),
629 # version is reset to the correct one by parser
632 -query_acc => 'RF00167',
633 -query_desc => 'Purine riboswitch',
634 -database => 'b_sub.fas',
637 -symbols => $symbols,
640 $result = $searchio->next_result;
641 $hit = $result->next_hit;
642 $hsp = $hit->next_hsp;
644 ':::::::::::::::::((((((((:::(((((((:::::::)))))))::::::::(((((((:::::::)))))))::))))-))))::::::::::::::',
646 $hsp = $hit->next_hsp;
648 ':::::::::::::::::((((((((:::(((((((:::::::)))))))::::::::(((((((:::::::)))))))::))))))))::::::::::::::',
650 $hit = $result->next_hit;
651 $hsp = $hit->next_hsp;
653 ':::::::::::::::::((((((((:::(((((((:::::::)))))))::::::::(((((((:::::::)))))))::))))))))::::::::::::::',
655 ($meta) = $hsp->feature1->get_tag_values('meta');
656 is($meta, ':::::::::::::::::((((((((:::(((((((:::::::)))))))::::::::(((((((:::::::)))))))::))))))))::::::::::::::');
657 ($meta) = $hsp->feature2->get_tag_values('meta');
658 is($meta, ':::::::::::::::::((((((((:::(((((((:::::::)))))))::::::::(((((((:::::::)))))))::))))))))::::::::::::::');
660 ## Infernal 0.81 parsing ##
662 $searchio = Bio::SearchIO->new( -format => 'infernal',
663 -file => test_input_file('purine_v081.infernal'),
664 # version is reset to the correct one by parser
665 -query_acc => 'RF00167',
666 -query_desc => 'Purine riboswitch',
667 -database => 'b_sub.fas',
671 $result = $searchio->next_result;
673 isa_ok($result, 'Bio::Search::Result::ResultI');
674 $algorithm = $result->algorithm;
675 is($result->algorithm, 'CMSEARCH', "Result $algorithm");
676 is($result->algorithm_reference, undef, "Result $algorithm reference");
677 is($result->algorithm_version, 0.81, "Result $algorithm version");
678 is($result->available_parameters, 0, "Result parameters");
679 is($result->available_statistics, 0, "Result statistics");
680 is($result->database_entries, '', "Result entries");
681 is($result->database_letters, '', "Result letters");
682 is($result->database_name, 'b_sub.fas', "Result database_name");
683 is($result->num_hits, 3, "Result num_hits");
684 is($result->program_reference, undef, "Result program_reference");
685 is($result->query_accession, 'RF00167', "Result query_accession");
686 is($result->query_description, 'Purine riboswitch', "Result query_description");
687 is($result->query_length, 102, "Result query_length");
688 is($result->query_name, 'Purine', "Result query_name");
690 $hit = $result->next_hit;
691 isa_ok($hit, 'Bio::Search::Hit::HitI');
692 is($hit->ncbi_gi, '633168', "Hit GI");
693 is($hit->accession, 'X83878.1', "Hit accession");
694 is($hit->algorithm, 'CMSEARCH', "Hit algorithm");
695 is($hit->bits, 79.36, "Hit bits");
696 is($hit->description, '', "Hit description"); # no hit descs yet
697 is($hit->locus, '', "Hit locus");
698 is($hit->n, 2, "Hit n");
699 is($hit->name, 'gi|633168|emb|X83878.1|', "Hit name");
700 is($hit->num_hsps, 2, "Hit num_hsps");
702 # p() works but there are no evalues yet for Infernal output, so catch and check...
703 warnings_like {$hit->p} qr'P-value not defined. Using significance\(\) instead',
706 is($hit->length, 0, "Hit length");
707 is($hit->overlap, 0, "Hit overlap");
708 is($hit->query_length, 102, "Hit query_length");
709 is($hit->rank, 1, "Hit rank");
710 is($hit->raw_score, 79.36, "Hit raw_score");
711 is($hit->score, 79.36, "Hit score");
712 float_is($hit->significance, 1.945e-07);
714 $hsp = $hit->next_hsp;
715 isa_ok($hsp, 'Bio::Search::HSP::HSPI');
716 is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
717 float_is($hsp->evalue, 1.945e-07);
718 isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
719 isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
720 ($meta) = $hsp->feature1->get_tag_values('meta');
721 is($meta, ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::');
722 ($meta) = $hsp->feature2->get_tag_values('meta');
723 is($meta, ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::');
725 is($hsp->frame('query'), 0, "HSP frame");
726 is($hsp->gaps, 2, "HSP gaps");
727 is($hit->length, 0, "Hit length");
728 isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
729 isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
731 'UUACAAUAUAAUAGGAACACUCAUAUAAUCGCGUGGAUAUGGCACGCAAGUUUCUACCGGGCA-CCGUAAA-UGUCCGACUAUGGGUGAGCAAUGGAACCGC',
733 is($hsp->homology_string,
734 '+ A A++A AA A AA:AC+C:UAUAAU::CG:G AUAUGGC:CG::AGUUUCUACC:G CA CCGUAAA UG C:GACUA:G+GU:A A+U A+ ',
735 "HSP homology_string");
736 is($hsp->hsp_group, undef, "HSP hsp_group");
737 is($hsp->hsp_length,102, "HSP hsp_length");
738 is($hsp->length, 102, "HSP length");
739 is($hsp->links, undef, "HSP links");
740 is($hsp->n, 1, "HSP n");
741 float_is($hsp->pvalue, 1.945e-07, "HSP pvalue");
742 isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
743 is($hsp->query_string,
744 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
746 is($hsp->range, 102, "HSP range");
747 is($hsp->rank, 1, "HSP rank");
748 float_is($hsp->significance, 1.945e-07);
749 is($hsp->end, 102, "HSP end");
750 float_is($hsp->expect, 1.945e-07, "HSP expect");
752 isa_ok($hsp->seq, 'Bio::LocatableSeq');
754 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
756 is($hsp->start, 1, "HSP start");
757 is($hsp->custom_score, undef, "HSP custom_score");
759 ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::',
761 is($hsp->strand('hit'), 1, "HSP strand");
763 $hsp = $hit->next_hsp;
764 isa_ok($hsp, 'Bio::Search::HSP::HSPI');
765 is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
766 float_is($hsp->evalue, 6.802);
767 isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
768 isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
769 is($hsp->frame('query'), 0, "HSP frame");
770 is($hsp->gaps, 4, "HSP gaps");
771 # infernal can return alignment data
772 isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
773 isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
775 'CGUGCGGUUCCAUUGCUCACCCAUA-GUCGGACAU-UUACGG-UGCCCGGUAGAAACUUGCGUGCCAUAUCCACGCGAUUaUAUGAGUGUUCCUAUUAUAUUG',
777 is($hsp->homology_string,
778 ' + + A +:AC C:UA +::: :: UA GG :: :::GU AC: G::::CC UA ::::C : UA:G GU: + U+++AUAUU ',
779 "HSP homology_string");
780 is($hsp->hsp_group, undef, "HSP hsp_group");
781 is($hsp->hsp_length, 102, "HSP hsp_length");
782 is($hsp->length, 102, "HSP length");
783 is($hsp->links, undef, "HSP links");
784 is($hsp->n, 1, "HSP n");
785 float_is($hsp->pvalue, 0.9989, "HSP pvalue");
786 isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
787 is($hsp->query_string,
788 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGAC.UAcGaGuaAauauuaaauauuu',
790 is($hsp->range, 102, "HSP range");
791 is($hsp->rank, 2, "HSP rank");
792 float_is($hsp->significance, 6.802);
793 is($hsp->end, 102, "HSP end");
794 float_is($hsp->expect, 6.802, "HSP expect");
795 #is($hsp->matches, 2, "HSP matches");
796 isa_ok($hsp->seq, 'Bio::LocatableSeq');
797 # this should probably default to the hit string
799 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGAC.UAcGaGuaAauauuaaauauuu',
801 is($hsp->start, 1, "HSP start");
802 is($hsp->custom_score, undef, "HSP custom_score");
804 ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,.))))))))::::::::::::::',
806 is($hsp->strand('hit'), -1, "HSP strand");
810 $hit = $result->next_hit;
811 isa_ok($hit, 'Bio::Search::Hit::HitI');
812 is($hit->accession, 'U51115.1', "Hit accession");
813 is($hit->ncbi_gi, '2239287', "Hit GI");
814 is($hit->algorithm, 'CMSEARCH', "Hit algorithm");
815 is($hit->bits, 81.29, "Hit bits");
816 is($hit->description, '', "Hit description"); # no hit descs yet
817 is($hit->length, 0, "Hit length");
818 is($hit->locus, 'BSU51115', "Hit locus");
819 is($hit->n, 11, "Hit n");
820 is($hit->name, 'gi|2239287|gb|U51115.1|BSU51115', "Hit name");
821 is($hit->num_hsps, 11, "Hit num_hsps");
822 is($hit->overlap, 0, "Hit overlap");
823 is($hit->query_length, 102, "Hit query_length");
824 is($hit->rank, 2, "Hit rank");
825 is($hit->raw_score, 81.29, "Hit raw_score");
826 is($hit->score, 81.29, "Hit score");
827 float_is($hit->significance, 1.259e-07);
831 $hsp = $hit->next_hsp;
832 isa_ok($hsp, 'Bio::Search::HSP::HSPI');
833 is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
834 float_is($hsp->evalue, 1.259e-07);
835 isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
836 isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
837 is($hsp->frame('query'), 0, "HSP frame");
838 is($hsp->gaps, 0, "HSP gaps");
839 isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
840 isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
842 'AGAAAUCAAAUAAGAUGAAUUCGUAUAAUCGCGGGAAUAUGGCUCGCAAGUCUCUACCAAGCUACCGUAAAUGGCUUGACUACGUAAACAUUUCUUUCGUUU',
844 is($hsp->homology_string,
845 'A AAAU AAA+AA A+ : CGUAUAAU::CG:GAAUAUGGC:CG::AGU UCUACCA:GC ACCGUAAAU GC:UGACUACG : AU+U +++ UUU',
846 "HSP homology_string");
847 is($hsp->hsp_group, undef, "HSP hsp_group");
848 is($hsp->hsp_length, 102, "HSP hsp_length");
849 is($hsp->length, 102, "HSP length");
850 is($hsp->links, undef, "HSP links");
851 is($hsp->n, 1, "HSP n");
852 isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
853 is($hsp->query_string,
854 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
856 is($hsp->range, 102, "HSP range");
857 is($hsp->rank, 1, "HSP rank");
858 float_is($hsp->significance, 1.259e-07);
859 is($hsp->end, 102, "HSP end");
860 float_is($hsp->expect, 1.259e-07, "HSP expect");
861 isa_ok($hsp->seq, 'Bio::LocatableSeq');
863 'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
865 is($hsp->start, 1, "HSP start");
866 is($hsp->custom_score, undef, "HSP custom_score");
868 ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::',
870 is($hsp->strand('hit'), 1, "HSP strand");