1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id: SearchIO_blast.t 14995 2008-11-16 06:20:00Z cjfields $
11 test_begin(-tests => 1389);
13 use_ok('Bio::SearchIO');
17 test_skip(-tests => 4, -requires_module => 'Path::Class');
20 my $file = Path::Class::file(test_input_file('ecolitst.bls'));
21 my $f = sub { my ($file) = @_; Bio::SearchIO->new( -file => $file, -format => 'blast') };
23 lives_ok(sub { $f->($file) } , 'Bio::SearchIO->new can handle a Path::Class object');
24 isa_ok($f->($file), 'Bio::Root::IO');
26 $file = Path::Class::dir(File::Spec->catfile(qw/t data/))->file('ecolitst.bls');
28 lives_ok(sub { $f->($file) } , 'Bio::SearchIO->new can handle a Path::Class object');
29 isa_ok($f->($file), 'Bio::Root::IO');
32 my ( $searchio, $result, $iter, $hit, $hsp );
34 $searchio = Bio::SearchIO->new(
36 '-file' => test_input_file('ecolitst.bls')
39 $result = $searchio->next_result;
41 like($result->algorithm_reference,
42 qr/Gapped BLAST and PSI-BLAST: a new generation of protein database search/
45 is( $result->database_name, 'ecoli.aa', 'database_name()' );
46 is( $result->database_entries, 4289 );
47 is( $result->database_letters, 1358990 );
49 is( $result->algorithm, 'BLASTP' );
50 like( $result->algorithm_version, qr/^2\.1\.3/ );
51 like( $result->query_name,
52 qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/
54 is( $result->query_accession, 'AAC73113.1' );
55 is( $result->query_gi, 1786183 );
56 is( $result->query_length, 820 );
57 is( $result->get_statistic('kappa'), '0.135' );
58 is( $result->get_statistic('kappa_gapped'), '0.0410' );
59 is( $result->get_statistic('lambda'), '0.319' );
60 is( $result->get_statistic('lambda_gapped'), '0.267' );
61 is( $result->get_statistic('entropy'), '0.383' );
62 is( $result->get_statistic('entropy_gapped'), '0.140' );
64 is( $result->get_statistic('dbletters'), 1358990 );
65 is( $result->get_statistic('dbentries'), 4289 );
66 is( $result->get_statistic('effective_hsplength'), 47 );
67 is( $result->get_statistic('effectivespace'), 894675611 );
68 is( $result->get_parameter('matrix'), 'BLOSUM62' );
69 is( $result->get_parameter('gapopen'), 11 );
70 is( $result->get_parameter('gapext'), 1 );
71 is( $result->get_statistic('S2'), '92' );
72 is( $result->get_statistic('S2_bits'), '40.0' );
73 float_is( $result->get_parameter('expect'), '1.0e-03' );
74 is( $result->get_statistic('num_extensions'), '82424' );
75 is( $result->get_statistic('querylength'), 773 );
76 is( $result->get_statistic('effectivedblength'), 1157407 );
77 is( $result->get_statistic('effectivespaceused'), 894675611 );
80 [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 1567, 4058 ],
81 [ 'gb|AAC76922.1|', 810, 'AAC76922', '1e-91', 332, 850 ],
82 [ 'gb|AAC76994.1|', 449, 'AAC76994', '3e-47', 184, 467 ]
85 while ( $hit = $result->next_hit ) {
88 is( $hit->name, shift @$d );
89 is( $hit->length, shift @$d );
90 is( $hit->accession, shift @$d );
91 float_is( $hit->significance, shift @$d );
92 is( $hit->bits, shift @$d );
93 is( $hit->raw_score, shift @$d );
97 while ( my $hsp = $hit->next_hsp ) {
98 is( $hsp->query->start, 1 );
99 is( $hsp->query->end, 820 );
100 is( $hsp->hit->start, 1 );
101 is( $hsp->hit->end, 820 );
102 is( $hsp->length('total'), 820 );
103 is( $hsp->start('hit'), $hsp->hit->start );
104 is( $hsp->end('query'), $hsp->query->end );
105 is( $hsp->strand('sbjct'), $hsp->subject->strand ); # alias for hit
106 float_is( $hsp->evalue, 0.0 );
107 is( $hsp->score, 4058 );
108 is( $hsp->bits, 1567 );
109 is( sprintf( "%.2f", $hsp->percent_identity ), 98.29 );
110 is( sprintf( "%.4f", $hsp->frac_identical('query') ), 0.9829 );
111 is( sprintf( "%.4f", $hsp->frac_identical('hit') ), 0.9829 );
118 last if ( $count++ > @valid );
122 $searchio = Bio::SearchIO->new(
123 '-format' => 'blast',
124 '-file' => test_input_file('ecolitst.wublastp')
127 $result = $searchio->next_result;
129 like($result->algorithm_reference,
130 qr/Gish, W. \(1996-2000\)/);
132 is( $result->database_name, 'ecoli.aa' );
133 is( $result->database_letters, 1358990 );
134 is( $result->database_entries, 4289 );
135 is( $result->algorithm, 'BLASTP' );
136 like( $result->algorithm_version, qr/^2\.0MP\-WashU/ );
137 like( $result->query_name,
138 qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/
140 is( $result->query_accession, 'AAC73113.1' );
142 is( $result->query_length, 820 );
143 is( $result->query_gi, 1786183 );
144 is( $result->get_statistic('kappa'), 0.136 );
145 is( $result->get_statistic('lambda'), 0.319 );
146 is( $result->get_statistic('entropy'), 0.384 );
147 is( $result->get_statistic('dbletters'), 1358990 );
148 is( $result->get_statistic('dbentries'), 4289 );
149 is( $result->get_parameter('matrix'), 'BLOSUM62' );
150 is( $result->get_statistic('Frame+0_lambda_used'), '0.319' );
151 is( $result->get_statistic('Frame+0_kappa_used'), '0.136' );
152 is( $result->get_statistic('Frame+0_entropy_used'), '0.384' );
154 is( $result->get_statistic('Frame+0_lambda_computed'), '0.319' );
155 is( $result->get_statistic('Frame+0_kappa_computed'), '0.136' );
156 is( $result->get_statistic('Frame+0_entropy_computed'), '0.384' );
158 is( $result->get_statistic('Frame+0_lambda_gapped'), '0.244' );
159 is( $result->get_statistic('Frame+0_kappa_gapped'), '0.0300' );
160 is( $result->get_statistic('Frame+0_entropy_gapped'), '0.180' );
163 [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 4141 ],
164 [ 'gb|AAC76922.1|', 810, 'AAC76922', '3.1e-86', 844 ],
165 [ 'gb|AAC76994.1|', 449, 'AAC76994', '2.8e-47', 483 ]
168 while ( $hit = $result->next_hit ) {
169 my $d = shift @valid;
173 # Test HSP contig data returned by SearchUtils::tile_hsps()
174 # Second hit has two hsps that overlap.
176 # compare with the contig made by hand for these two contigs
177 # in t/data/contig-by-hand.wublastp
178 # (in this made-up file, the hsps from ecolitst.wublastp
179 # were aligned and contiged, and Length, Identities, Positives
180 # were counted, by a human (maj) )
182 my $hand_hit = Bio::SearchIO->new(
184 -file => test_input_file('contig-by-hand.wublastp')
185 )->next_result->next_hit;
186 my $hand_hsp = $hand_hit->next_hsp;
187 my @hand_qrng = $hand_hsp->range('query');
188 my @hand_srng = $hand_hsp->range('hit');
189 my @hand_matches = $hand_hit->matches;
191 my ( $qcontigs, $scontigs ) = Bio::Search::SearchUtils::tile_hsps($hit);
194 is( $qcontigs->[0]->{'start'}, $hand_qrng[0] );
195 is( $qcontigs->[0]->{'stop'}, $hand_qrng[1] );
196 is( $qcontigs->[0]->{'iden'}, $hand_matches[0] );
197 is( $qcontigs->[0]->{'cons'}, $hand_matches[1] );
200 is( $scontigs->[0]->{'start'}, $hand_srng[0] );
201 is( $scontigs->[0]->{'stop'}, $hand_srng[1] );
202 is( $scontigs->[0]->{'iden'}, $hand_matches[0] );
203 is( $scontigs->[0]->{'cons'}, $hand_matches[1] );
206 is( $hit->name, shift @$d );
207 is( $hit->length, shift @$d );
208 is( $hit->accession, shift @$d );
209 float_is( $hit->significance, shift @$d );
210 is( $hit->raw_score, shift @$d );
214 while ( my $hsp = $hit->next_hsp ) {
215 is( $hsp->query->start, 1 );
216 is( $hsp->query->end, 820 );
217 is( $hsp->hit->start, 1 );
218 is( $hsp->hit->end, 820 );
219 is( $hsp->length('total'), 820 );
221 float_is( $hsp->evalue, 0.0 );
222 float_is( $hsp->pvalue, '0.0' );
223 is( $hsp->score, 4141 );
224 is( $hsp->bits, 1462.8 );
225 is( $hsp->percent_identity, 100 );
226 is( $hsp->frac_identical('query'), 1.00 );
227 is( $hsp->frac_identical('hit'), 1.00 );
234 last if ( $count++ > @valid );
238 # test that add hit really works properly for BLAST objects
240 my @hits = $result->hits;
241 $result->add_hit( $hits[0] );
242 is( $result->num_hits, @hits + 1 );
244 # test WU-BLAST -noseqs option
245 $searchio = Bio::SearchIO->new(
246 '-format' => 'blast',
247 '-file' => test_input_file('ecolitst.noseqs.wublastp')
250 $result = $searchio->next_result;
252 $result->algorithm_reference, 'Gish, W. (1996-2004) http://blast.wustl.edu
255 is( $result->database_name, 'ecoli.aa' );
256 is( $result->database_letters, 1358990 );
257 is( $result->database_entries, 4289 );
258 is( $result->algorithm, 'BLASTP' );
259 like( $result->algorithm_version, qr/^2\.0MP\-WashU/ );
260 like( $result->query_name,
261 qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/
263 is( $result->query_accession, 'AAC73113.1' );
264 is( $result->query_gi, 1786183 );
266 is( $result->query_length, 820 );
267 is( $result->get_statistic('kappa'), 0.135 );
268 is( $result->get_statistic('lambda'), 0.319 );
269 is( $result->get_statistic('entropy'), 0.384 );
270 is( $result->get_statistic('dbletters'), 1358990 );
271 is( $result->get_statistic('dbentries'), 4289 );
272 is( $result->get_parameter('matrix'), 'BLOSUM62' );
273 is( $result->get_statistic('Frame+0_lambda_used'), '0.319' );
274 is( $result->get_statistic('Frame+0_kappa_used'), '0.135' );
275 is( $result->get_statistic('Frame+0_entropy_used'), '0.384' );
277 is( $result->get_statistic('Frame+0_lambda_computed'), '0.319' );
278 is( $result->get_statistic('Frame+0_kappa_computed'), '0.135' );
279 is( $result->get_statistic('Frame+0_entropy_computed'), '0.384' );
281 is( $result->get_statistic('Frame+0_lambda_gapped'), '0.244' );
282 is( $result->get_statistic('Frame+0_kappa_gapped'), '0.0300' );
283 is( $result->get_statistic('Frame+0_entropy_gapped'), '0.180' );
286 [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 4141 ],
287 [ 'gb|AAC76922.1|', 810, 'AAC76922', '6.6e-93', 907 ],
288 [ 'gb|AAC76994.1|', 449, 'AAC76994', '2.8e-47', 483 ]
291 while ( $hit = $result->next_hit ) {
292 my $d = shift @valid;
294 is( $hit->name, shift @$d );
295 is( $hit->length, shift @$d );
296 is( $hit->accession, shift @$d );
297 float_is( $hit->significance, shift @$d );
298 is( $hit->raw_score, shift @$d );
302 while ( my $hsp = $hit->next_hsp ) {
303 is( $hsp->query->start, 1 );
304 is( $hsp->query->end, 820 );
305 is( $hsp->hit->start, 1 );
306 is( $hsp->hit->end, 820 );
307 is( $hsp->length('total'), 820 );
309 float_is( $hsp->evalue, 0. );
310 float_is( $hsp->pvalue, '0.' );
311 is( $hsp->score, 4141 );
312 is( $hsp->bits, 1462.8 );
313 is( $hsp->percent_identity, 100 );
314 is( $hsp->frac_identical('query'), 1.00 );
315 is( $hsp->frac_identical('hit'), 1.00 );
322 last if ( $count++ > @valid );
327 $searchio = Bio::SearchIO->new(
328 '-format' => 'blast',
329 '-file' => test_input_file('HUMBETGLOA.tblastx')
332 $result = $searchio->next_result;
333 like($result->algorithm_reference,qr/Gapped BLAST and PSI-BLAST/);
334 is( $result->database_name, 'ecoli.nt' );
335 is( $result->database_letters, 4662239 );
336 is( $result->database_entries, 400 );
337 is( $result->algorithm, 'TBLASTX' );
338 like( $result->algorithm_version, qr/^2\.1\.2/ );
339 is( $result->query_name, 'HUMBETGLOA' );
340 is( $result->query_description,
341 'Human haplotype C4 beta-globin gene, complete cds.' );
342 is( $result->query_length, 3002 );
343 is( $result->get_statistic('kappa'), 0.135 );
344 is( $result->get_statistic('lambda'), 0.318 );
345 is( $result->get_statistic('entropy'), 0.401 );
346 is( $result->get_statistic('dbletters'), 4662239 );
347 is( $result->get_statistic('dbentries'), 400 );
348 is( $result->get_statistic('querylength'), 953 );
349 is( $result->get_statistic('effectivedblength'), 1535279 );
350 is( $result->get_statistic('effectivespace'), 1463120887 );
351 is( $result->get_statistic('effectivespaceused'), 1463120887 );
352 is( $result->get_statistic('T'), 13 );
353 is( $result->get_statistic('X1'), 16 );
354 is( $result->get_statistic('X1_bits'), 7.3 );
355 is( $result->get_statistic('X2'), 0 );
356 is( $result->get_statistic('X2_bits'), '0.0' );
357 is( $result->get_statistic('S1'), 41 );
358 is( $result->get_statistic('S1_bits'), 21.7 );
359 is( $result->get_statistic('S2'), 53 );
360 is( $result->get_statistic('S2_bits'), 27.2 );
362 is( $result->get_statistic('decayconst'), 0.1 );
364 is( $result->get_parameter('matrix'), 'BLOSUM62' );
367 [ 'gb|AE000479.1|AE000479', 10934, 'AE000479', '0.13', 33.6, 67 ],
368 [ 'gb|AE000302.1|AE000302', 10264, 'AE000302', '0.61', 31.3, 62 ],
369 [ 'gb|AE000277.1|AE000277', 11653, 'AE000277', '0.84', 30.8, 61 ]
373 while ( $hit = $result->next_hit ) {
374 my $d = shift @valid;
375 is( $hit->name, shift @$d );
376 is( $hit->length, shift @$d );
377 is( $hit->accession, shift @$d );
378 float_is( $hit->significance, shift @$d );
379 is( $hit->bits, shift @$d );
380 is( $hit->raw_score, shift @$d );
384 while ( my $hsp = $hit->next_hsp ) {
385 is( $hsp->query->start, 1057 );
386 is( $hsp->query->end, 1134 );
387 is( $hsp->query->strand, 1 );
388 is( $hsp->strand('query'), $hsp->query->strand );
389 is( $hsp->hit->end, 5893 );
390 is( $hsp->hit->start, 5816 );
391 is( $hsp->hit->strand, -1 );
392 is( $hsp->strand('sbjct'), $hsp->subject->strand );
393 is( $hsp->length('total'), 26 );
395 float_is( $hsp->evalue, 0.13 );
396 is( $hsp->score, 67 );
397 is( $hsp->bits, 33.6 );
398 is( sprintf( "%.2f", $hsp->percent_identity ), 42.31 );
399 is( sprintf( "%.4f", $hsp->frac_identical('query') ), '0.4231' );
400 is( sprintf( "%.4f", $hsp->frac_identical('hit') ), '0.4231' );
401 is( $hsp->query->frame(), 0 );
402 is( $hsp->hit->frame(), 1 );
404 is( $hsp->query_string, 'SAYWSIFPPLGCWWSTLGPRGSLSPL' );
405 is( $hsp->hit_string, 'AAVWALFPPVGSQWGCLASQWRTSPL' );
406 is( $hsp->homology_string, '+A W++FPP+G W L + SPL' );
408 # changed to reflect positional ambiguities, note extra flag
410 join( ' ', $hsp->seq_inds( 'query', 'nomatch', 1 ) ),
411 '1063-1065 1090-1095 1099-1104 1108-1113 1117-1125'
414 join( ' ', $hsp->seq_inds( 'hit', 'nomatch', 1 ) ),
415 '5825-5833 5837-5842 5846-5851 5855-5860 5885-5887'
417 is( $hsp->ambiguous_seq_inds, 'query/subject' );
423 last if ( $count++ > @valid );
427 # test for MarkW bug in blastN
429 $searchio = Bio::SearchIO->new(
430 '-format' => 'blast',
431 '-file' => test_input_file('a_thaliana.blastn')
434 $result = $searchio->next_result;
435 like($result->algorithm_reference,qr/Gapped BLAST and PSI-BLAST/);
436 is( $result->rid, '1012577175-3730-28291' );
437 is( $result->database_name,
438 'All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS, GSS,or phase 0, 1 or 2 HTGS sequences) '
440 is( $result->database_letters, 4677375331 );
441 is( $result->database_entries, 1083200 );
442 is( $result->algorithm, 'BLASTN' );
443 like( $result->algorithm_version, qr/^2\.2\.1/ );
444 is( $result->query_name, '' );
445 is( $result->query_length, 60 );
446 is( $result->get_parameter('gapopen'), 5 );
447 is( $result->get_parameter('gapext'), 2 );
448 is( $result->get_parameter('ktup'), undef );
449 is( $result->get_statistic('querylength'), 41 );
450 is( $result->get_statistic('effectivedblength'), 4656794531 );
451 is( $result->get_statistic('effectivespace'), 190928575771 );
452 is( $result->get_statistic('effectivespaceused'), 190928575771 );
454 is( $result->get_statistic('lambda'), 1.37 );
455 is( $result->get_statistic('kappa'), 0.711 );
456 is( $result->get_statistic('entropy'), 1.31 );
457 is( $result->get_statistic('T'), 0 );
458 is( $result->get_statistic('A'), 30 );
459 is( $result->get_statistic('X1'), '6' );
460 is( $result->get_statistic('X1_bits'), 11.9 );
461 is( $result->get_statistic('X2'), 15 );
462 is( $result->get_statistic('X2_bits'), 29.7 );
463 is( $result->get_statistic('S1'), 12 );
464 is( $result->get_statistic('S1_bits'), 24.3 );
465 is( $result->get_statistic('S2'), 17 );
466 is( $result->get_statistic('S2_bits'), 34.2 );
468 is( $result->get_statistic('dbentries'), 1083200 );
471 [ 'gb|AY052359.1|', 2826, 'AY052359', '3e-18', 95.6, 48, 1, 60, '1.0000' ],
473 'gb|AC002329.2|AC002329', 76170, 'AC002329', '3e-18', 95.6, 48, 1, 60,
477 'gb|AF132318.1|AF132318', 5383, 'AF132318', '0.04', 42.1, 21, 35, 55,
483 while ( my $hit = $result->next_hit ) {
484 my $d = shift @valid;
485 is( $hit->name, shift @$d );
486 is( $hit->length, shift @$d );
487 is( $hit->accession, shift @$d );
488 float_is( $hit->significance, shift @$d );
489 is( $hit->bits, shift @$d );
490 is( $hit->raw_score, shift @$d );
491 is( $hit->start, shift @$d );
492 is( $hit->end, shift @$d );
493 is( sprintf( "%.4f", $hit->frac_aligned_query ), shift @$d );
497 while ( my $hsp = $hit->next_hsp ) {
498 is( $hsp->query->start, 1 );
499 is( $hsp->query->end, 60 );
500 is( $hsp->query->strand, 1 );
501 is( $hsp->hit->start, 154 );
502 is( $hsp->hit->end, 212 );
503 is( $hsp->hit->strand, 1 );
504 is( $hsp->length('total'), 60 );
505 float_is( $hsp->evalue, 3e-18 );
506 is( $hsp->score, 48 );
507 is( $hsp->bits, 95.6 );
508 is( sprintf( "%.2f", $hsp->percent_identity ), 96.67 );
509 is( sprintf( "%.4f", $hsp->frac_identical('query') ), 0.9667 );
510 is( sprintf( "%.4f", $hsp->frac_identical('hit') ), 0.9831 );
511 is( $hsp->query->frame(), 0 );
512 is( $hsp->hit->frame(), 0 );
513 is( $hsp->query->seq_id, undef );
514 is( $hsp->hit->seq_id, 'gb|AY052359.1|' );
515 is( $hsp->gaps('query'), 0 );
516 is( $hsp->gaps('hit'), 1 );
518 is( $hsp->query_string,
519 'aggaatgctgtttaattggaatcgtacaatggagaatttgacggaaatagaatcaacgat'
521 is( $hsp->hit_string,
522 'aggaatgctgtttaattggaatca-acaatggagaatttgacggaaatagaatcaacgat'
524 is( $hsp->homology_string,
525 '||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||'
527 my $aln = $hsp->get_aln;
528 is( sprintf( "%.2f", $aln->overall_percentage_identity ), 96.67 );
529 is( sprintf( "%.2f", $aln->percentage_identity ), 98.31 );
535 last if ( $count++ > @valid );
541 $searchio = Bio::SearchIO->new(
542 '-format' => 'blast',
543 '-file' => test_input_file('dnaEbsub_ecoli.wublastx')
546 $result = $searchio->next_result;
548 $result->algorithm_reference, 'Gish, W. (1996-2000) http://blast.wustl.edu
549 Gish, Warren and David J. States (1993). Identification of protein coding
550 regions by database similarity search. Nat. Genet. 3:266-72.
553 is( $result->database_name, 'ecoli.aa' );
554 is( $result->database_letters, 1358990 );
555 is( $result->database_entries, 4289 );
556 is( $result->algorithm, 'BLASTX' );
557 like( $result->algorithm_version, qr/^2\.0MP\-WashU/ );
558 is( $result->query_name, 'gi|142864|gb|M10040.1|BACDNAE' );
559 is( $result->query_description,
560 'B.subtilis dnaE gene encoding DNA primase, complete cds' );
561 is( $result->query_accession, 'M10040.1' );
562 is( $result->query_gi, 142864 );
563 is( $result->query_length, 2001 );
564 is( $result->get_parameter('matrix'), 'blosum62' );
566 is( $result->get_statistic('lambda'), 0.318 );
567 is( $result->get_statistic('kappa'), 0.135 );
568 is( $result->get_statistic('entropy'), 0.401 );
570 is( $result->get_statistic('dbentries'), 4289 );
572 @valid = ( [ 'gi|1789447|gb|AAC76102.1|', 581, 'AAC76102', '1.1e-74', 671 ] );
575 while ( my $hit = $result->next_hit ) {
576 my $d = shift @valid;
577 is( $hit->name, shift @$d );
578 is( $hit->length, shift @$d );
579 is( $hit->accession, shift @$d );
580 float_is( $hit->significance, shift @$d );
581 is( $hit->raw_score, shift @$d );
582 is( sprintf( "%.4f", $hit->frac_identical('query') ), '0.3640' );
583 is( sprintf( "%.4f", $hit->frac_identical('hit') ), '0.3660' );
584 is( sprintf( "%.4f", $hit->frac_conserved('query') ), '0.5370' );
585 is( sprintf( "%.4f", $hit->frac_conserved('hit') ), '0.5400' );
586 is( sprintf( "%.4f", $hit->frac_aligned_query ), '0.6200' );
587 is( sprintf( "%.4f", $hit->frac_aligned_hit ), '0.7100' );
591 while ( my $hsp = $hit->next_hsp ) {
592 is( $hsp->query->start, 21 );
593 is( $hsp->query->end, 1265 );
594 is( $hsp->query->strand, 1 );
595 is( $hsp->hit->start, 1 );
596 is( $hsp->hit->end, 413 );
597 is( $hsp->hit->strand, 0 );
598 is( $hsp->length('total'), 421 );
599 float_is( $hsp->evalue, 1.1e-74 );
600 float_is( $hsp->pvalue, '1.1e-74' );
601 is( $hsp->score, 671 );
602 is( $hsp->bits, 265.8 );
603 is( sprintf( "%.2f", $hsp->percent_identity ), 35.87 );
605 is( sprintf( "%.4f", $hsp->frac_identical('query') ), 0.3639 );
606 is( sprintf( "%.4f", $hsp->frac_identical('hit') ), 0.3656 );
607 is( sprintf( "%.4f", $hsp->frac_conserved('query') ), 0.5373 );
608 is( sprintf( "%.2f", $hsp->frac_conserved('hit') ), 0.54 );
610 is( sprintf( "%.4f", $hsp->frac_identical('hsp') ), 0.3587 );
611 is( sprintf( "%.4f", $hsp->frac_conserved('hsp') ), 0.5297 );
613 is( $hsp->query->frame(), 2 );
614 is( $hsp->hit->frame(), 0 );
615 is( $hsp->gaps('query'), 6 );
616 is( $hsp->gaps('hit'), 8 );
617 is( $hsp->gaps, 14 );
618 is( $hsp->query_string,
619 'MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFHCFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARP---ESSGEQKMAEAHELLKKFYHHLLINTKEGQEALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHVKILRRNVEEIILCYDSDKAGYEATLKASELL---QKKGCKVRVAMIPDGLDPDDYIKKFGGEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVKQ'
621 is( $hsp->hit_string,
622 'MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYHCFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVPFE----AGSGPSQIERHQRQTLYQLMDGLNTFYQQSL-QQPVATSARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY-DRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSPETDIFHKGRQLYGLYEAQQDNAEPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHIQLLFRATNNVICCYDGDRAGRDAAWRALETALPYMTDGRQLRFMFLPDGEDPDTLVRKEGKEAFEARM-EQAMPLSAFLFNSLMPQVDLSTPDGRARLSTLALPLISQVPGETLR-IYLRQ'
624 is( $hsp->homology_string,
625 'M RIP ++ + DIV++I V+LKKQG+N+ CPFH E TPSF+V+ +KQ +HCFGCGA GN FL + F E+V LA + ++ P + +G+ P E Q + + + L FY L A YL RG + E+I F IG+A WD + K + + AG+L+ + G Y DRFR RVMFPI D G V+ F GR LG+ PKY+NSPET +FHK + LY Y+A+ + R ++ EG+ DV + ++A++GTS T DH+++L R +I CYD D+AG +A +A E G ++R +PDG DPD ++K G E F+ + + ++ + AF +LS R L IS + G + +Y++Q'
628 join( ' ', $hsp->seq_inds( 'query', 'nomatch', 1 ) ),
629 '24-29 39-47 54-56 60-71 90-98 129-137 150-152 156-158 180-182 192-194 219-221 228-236 243-251 255-263 267-269 279-284 291-296 300-302 309-311 315-317 321-332 342-344 351-362 366-368 372-374 378-383 387-389 393-398 405-413 417-440 444-449 456-461 468-470 474-476 486-491 495-497 510-518 525-527 531-533 537-557 561-569 573-578 594-599 603-605 609-614 618-620 633-635 654-656 660-665 669-671 678-680 684-686 693-695 705-710 738-740 753-755 759-761 768-773 786-797 801-806 810-812 819-821 831-833 840-860 864-869 894-896 900-902 921-923 927-938 945-947 957-959 972-974 981-986 993-995 999-1013 1017-1019 1029-1037 1050-1052 1062-1067 1077-1079 1083-1085 1089-1091 1098-1103 1107-1109 1113-1115 1122-1124 1128-1130 1137-1163 1173-1184 1188-1208 1212-1217 1224-1226 1230-1232 1236-1244 1248-1250'
632 join( ' ', $hsp->seq_inds( 'query', 'mismatch', 1 ) ),
633 '24-29 39-47 54-56 60-71 90-98 129-137 150-152 156-158 180-182 192-194 219-221 228-236 243-251 255-263 267-269 279-284 291-296 300-302 309-311 315-317 342-344 351-362 366-368 372-374 378-383 387-389 393-398 405-413 420-440 444-449 456-461 468-470 474-476 486-491 495-497 510-518 525-527 531-533 537-557 561-569 573-578 594-599 603-605 609-614 633-635 654-656 660-665 669-671 678-680 684-686 693-695 705-710 738-740 753-755 759-761 768-773 786-797 801-806 810-812 819-821 831-833 840-860 864-869 894-896 900-902 921-923 927-938 945-947 957-959 972-974 981-986 993-995 999-1013 1017-1019 1029-1037 1050-1052 1062-1067 1077-1079 1083-1085 1089-1091 1098-1103 1113-1115 1122-1124 1128-1130 1137-1163 1173-1184 1188-1208 1212-1217 1224-1226 1230-1232 1236-1244'
636 join( ' ', $hsp->seq_inds( 'hit', 'nomatch', 1 ) ),
637 '2 3 7-9 12 14-17 24-26 37-39 44 46 54 58 67 70-72 75-77 79-81 83 87 88 91 92 94 97 99 104 106-108 110-113 115 117 119 120 122 124 125 128-130 132-138 140 141 144 145 148 150 154 155 157 162-164 167 169 171-177 179-181 183 184 190 191 193 195 196 202 209 211 212 214 217 219 222 226 227 237 242 244 247 248 253-256 258 259 261 264 268 271-277 279 280 289 291 298 300-303 306 310 315 318 319 322 324-331 333 337-339 344 348 349 353 355 357 360 361 364 367 369 372-380 384-387 389-395 397 398 401 403 405-407'
640 join( ' ', $hsp->seq_inds( 'hit', 'mismatch', 1 ) ),
641 '2 3 7-9 12 14-17 24-26 37-39 44 46 54 58 67 70-72 75-77 79-81 83 87 88 91 92 94 97 99 104 110-113 115 117 119 120 122 124 125 128-130 132-138 140 141 144 145 148 150 154 155 157 162-164 167 169 171-177 179-181 183 184 190 191 193 195 196 202 209 211 212 214 217 219 222 226 227 237 242 244 247 248 253-256 258 259 261 264 268 271-277 279 280 289 291 298 300-303 306 310 315 318 319 322 324 325 329-331 333 337-339 344 348 349 353 355 357 360 361 364 367 369 372-380 384-387 389-395 397 398 401 403 405-407'
643 is( join( ' ', $hsp->seq_inds( 'query', 'gaps', 1 ) ), '347 1004' );
644 is( join( ' ', $hsp->seq_inds( 'hit', 'gaps', 1 ) ),
645 '100 131 197 362 408' );
646 is( $hsp->ambiguous_seq_inds, 'query' );
652 last if ( $count++ > @valid );
657 $searchio = Bio::SearchIO->new(
658 '-format' => 'blast',
659 '-file' => test_input_file('tricky.wublast')
661 $result = $searchio->next_result;
663 while ( my $hit = $result->next_hit ) {
665 # frac_aligned_hit used to be over 1, frac_identical & frac_conserved are still too wrong
667 local $TODO = 'frac_identical & frac_conserved are still too wrong';
668 cmp_ok sprintf( "%.3f", $hit->frac_identical ), '>', 0.9;
669 cmp_ok sprintf( "%.3f", $hit->frac_conserved ), '<=', 1;
671 is( sprintf( "%.2f", $hit->frac_aligned_query ), '0.92' );
672 is( sprintf( "%.2f", $hit->frac_aligned_hit ), '0.91' );
677 # More frac_ method testing, this time on ncbi blastn
678 $searchio = Bio::SearchIO->new(
679 '-format' => 'blast',
680 '-file' => test_input_file('frac_problems.blast')
682 my @expected = ( "1.000", "0.943" );
683 while ( my $result = $searchio->next_result ) {
684 my $hit = $result->next_hit;
685 is( $hit->frac_identical, shift @expected );
689 # And even more: frac_aligned_query should never be over 1!
690 $searchio = Bio::SearchIO->new(
691 '-format' => 'blast',
692 '-file' => test_input_file('frac_problems2.blast')
694 $result = $searchio->next_result;
695 $hit = $result->next_hit;
696 is $hit->frac_aligned_query, 0.97;
698 # Also, start and end should be sane
699 $searchio = Bio::SearchIO->new(
700 '-format' => 'blast',
701 '-file' => test_input_file('frac_problems3.blast')
703 $result = $searchio->next_result;
704 $hit = $result->next_hit;
705 is $hit->start('sbjct'), 207;
706 is $hit->end('sbjct'), 1051;
710 $searchio = Bio::SearchIO->new(
711 '-format' => 'blast',
712 '-file' => test_input_file('dnaEbsub_ecoli.wutblastn')
715 $result = $searchio->next_result;
717 $result->algorithm_reference, 'Gish, W. (1996-2000) http://blast.wustl.edu
720 is( $result->database_name, 'ecoli.nt' );
721 is( $result->database_letters, 4662239 );
722 is( $result->database_entries, 400 );
723 is( $result->algorithm, 'TBLASTN' );
724 like( $result->algorithm_version, qr/^2\.0MP\-WashU/ );
725 is( $result->query_name, 'gi|142865|gb|AAA22406.1|' );
726 is( $result->query_description, 'DNA primase' );
727 is( $result->query_accession, 'AAA22406.1' );
728 is( $result->query_gi, 142865 );
729 is( $result->query_length, 603 );
730 is( $result->get_parameter('matrix'), 'blosum62' );
732 is( $result->get_statistic('lambda'), '0.320' );
733 is( $result->get_statistic('kappa'), 0.136 );
734 is( $result->get_statistic('entropy'), 0.387 );
736 is( $result->get_statistic('dbentries'), 400 );
739 ( [ 'gi|1789441|gb|AE000388.1|AE000388', 10334, 'AE000388', '1.4e-73', 671 ]
743 while ( my $hit = $result->next_hit ) {
744 my $d = shift @valid;
745 is( $hit->name, shift @$d );
746 is( $hit->length, shift @$d );
747 is( $hit->accession, shift @$d );
748 float_is( $hit->significance, shift @$d );
749 is( $hit->raw_score, shift @$d );
753 while ( my $hsp = $hit->next_hsp ) {
754 is( $hsp->query->start, 1 );
755 is( $hsp->query->end, 415 );
756 is( $hsp->query->strand, 0 );
757 is( $hsp->hit->start, 4778 );
758 is( $hsp->hit->end, 6016 );
759 is( $hsp->hit->strand, 1 );
760 is( $hsp->length('total'), 421 );
761 float_is( $hsp->evalue, 1.4e-73 );
762 float_is( $hsp->pvalue, 1.4e-73 );
763 is( $hsp->score, 671 );
764 is( $hsp->bits, 265.8 );
765 is( sprintf( "%.2f", $hsp->percent_identity ), 35.87 );
766 is( sprintf( "%.4f", $hsp->frac_identical('hit') ), 0.3656 );
767 is( sprintf( "%.4f", $hsp->frac_identical('query') ), 0.3639 );
768 is( sprintf( "%.4f", $hsp->frac_conserved('hsp') ), 0.5297 );
769 is( $hsp->query->frame(), 0 );
770 is( $hsp->hit->frame(), 1 );
771 is( $hsp->gaps('query'), 6 );
772 is( $hsp->gaps('hit'), 8 );
773 is( $hsp->gaps, 14 );
774 is( $hsp->query_string,
775 'MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFHCFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARP---ESSGEQKMAEAHELLKKFYHHLLINTKEGQEALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHVKILRRNVEEIILCYDSDKAGYEATLKASELL---QKKGCKVRVAMIPDGLDPDDYIKKFGGEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVKQ'
777 is( $hsp->hit_string,
778 'MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYHCFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVPFE----AGSGPSQIERHQRQTLYQLMDGLNTFYQQSL-QQPVATSARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY-DRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSPETDIFHKGRQLYGLYEAQQDNAEPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHIQLLFRATNNVICCYDGDRAGRDAAWRALETALPYMTDGRQLRFMFLPDGEDPDTLVRKEGKEAFEARM-EQAMPLSAFLFNSLMPQVDLSTPDGRARLSTLALPLISQVPGETLR-IYLRQ'
780 is( $hsp->homology_string,
781 'M RIP ++ + DIV++I V+LKKQG+N+ CPFH E TPSF+V+ +KQ +HCFGCGA GN FL + F E+V LA + ++ P + +G+ P E Q + + + L FY L A YL RG + E+I F IG+A WD + K + + AG+L+ + G Y DRFR RVMFPI D G V+ F GR LG+ PKY+NSPET +FHK + LY Y+A+ + R ++ EG+ DV + ++A++GTS T DH+++L R +I CYD D+AG +A +A E G ++R +PDG DPD ++K G E F+ + + ++ + AF +LS R L IS + G + +Y++Q'
784 join( ' ', $hsp->seq_inds( 'query', 'nomatch', 1 ) ),
785 '2 3 7-9 12 14-17 24-26 37-39 44 46 54 58 67 70-72 75-77 79-81 83 87 88 91 92 94 97 99 101-104 108 111-114 116 118 120 121 123 125 126 129-131 133-140 142 143 146 147 150 152 156 157 159 164-166 169 171 173-179 181-183 185 186 192 193 195 197 198 200 205 212 214 215 217 220 222 225 229 230 240 245 247 250 251 256-259 261 262 264 267 271 274-280 282 283 292 294 301 303-306 309 313 318 321 322 325 327-331 333 337-339 344 348 349 353 355 357 360 361 363 365 368 370 373-381 385-388 390-396 398 399 402 404 406-408 410'
788 join( ' ', $hsp->seq_inds( 'hit', 'nomatch', 1 ) ),
789 '4781-4786 4796-4804 4811-4813 4817-4828 4847-4855 4886-4894 4907-4909 4913-4915 4937-4939 4949-4951 4976-4978 4985-4993 5000-5008 5012-5020 5024-5026 5036-5041 5048-5053 5057-5059 5066-5068 5072-5074 5087-5089 5093-5101 5105-5116 5120-5122 5126-5128 5132-5137 5141-5143 5147-5152 5159-5167 5171-5191 5195-5200 5207-5212 5219-5221 5225-5227 5237-5242 5246-5248 5261-5269 5276-5278 5282-5284 5288-5308 5312-5320 5324-5329 5345-5350 5354-5356 5360-5365 5381-5383 5402-5404 5408-5413 5417-5419 5426-5428 5432-5434 5441-5443 5453-5458 5486-5488 5501-5503 5507-5509 5516-5521 5534-5545 5549-5554 5558-5560 5567-5569 5579-5581 5588-5608 5612-5617 5642-5644 5648-5650 5669-5671 5675-5686 5693-5695 5705-5707 5720-5722 5729-5734 5741-5743 5747-5770 5774-5776 5786-5794 5807-5809 5819-5824 5834-5836 5840-5842 5846-5848 5855-5860 5867-5869 5876-5878 5882-5884 5891-5917 5927-5938 5942-5962 5966-5971 5978-5980 5984-5986 5990-5998'
791 is( join( ' ', $hsp->seq_inds( 'query', 'gaps', 1 ) ), '109 328' );
792 is( join( ' ', $hsp->seq_inds( 'hit', 'gaps', 1 ) ),
793 '5077 5170 5368 5863 6001' );
794 is( $hsp->ambiguous_seq_inds, 'subject' );
800 last if ( $count++ > @valid );
805 $searchio = Bio::SearchIO->new(
806 '-format' => 'blast',
807 '-file' => test_input_file('dnaEbsub_ecoli.wutblastx')
810 $result = $searchio->next_result;
812 $result->algorithm_reference, 'Gish, W. (1996-2000) http://blast.wustl.edu
815 is( $result->database_name, 'ecoli.nt' );
816 is( $result->database_letters, 4662239 );
817 is( $result->database_entries, 400 );
818 is( $result->algorithm, 'TBLASTX' );
819 like( $result->algorithm_version, qr/^2\.0MP\-WashU/ );
820 is( $result->query_name, 'gi|142864|gb|M10040.1|BACDNAE' );
821 is( $result->query_description,
822 'B.subtilis dnaE gene encoding DNA primase, complete cds' );
823 is( $result->query_accession, 'M10040.1' );
824 is( $result->query_gi, 142864 );
825 is( $result->query_length, 2001 );
826 is( $result->get_parameter('matrix'), 'blosum62' );
828 is( $result->get_statistic('lambda'), 0.318 );
829 is( $result->get_statistic('kappa'), 0.135 );
830 is( $result->get_statistic('entropy'), 0.401 );
831 is( $result->get_statistic('dbentries'), 400 );
835 'gi|1789441|gb|AE000388.1|AE000388',
836 10334, 'AE000388', '6.4e-70', 318, 148.6
838 [ 'gi|2367383|gb|AE000509.1|AE000509', 10589, 'AE000509', 1, 59, 29.9 ]
842 while ( my $hit = $result->next_hit ) {
843 my $d = shift @valid;
844 is( $hit->name, shift @$d );
845 is( $hit->length, shift @$d );
846 is( $hit->accession, shift @$d );
848 # using e here to deal with 0.9992 coming out right here as well
849 float_is( $hit->significance, shift @$d );
850 is( $hit->raw_score, shift @$d );
851 is( $hit->bits, shift @$d );
854 while ( my $hsp = $hit->next_hsp ) {
856 if ( $hspcounter == 3 ) {
858 # let's actually look at the 3rd HSP
859 is( $hsp->query->start, 441 );
860 is( $hsp->query->end, 617 );
861 is( $hsp->query->strand, 1 );
862 is( $hsp->hit->start, 5192 );
863 is( $hsp->hit->end, 5368 );
864 is( $hsp->hit->strand, 1 );
865 is( $hsp->length('total'), 59 );
866 float_is( $hsp->evalue, 6.4e-70 );
867 float_is( $hsp->pvalue, 6.4e-70 );
868 is( $hsp->score, 85 );
869 is( $hsp->bits, 41.8 );
870 is( sprintf( "%.2f", $hsp->percent_identity ), '32.20' );
871 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.322 );
872 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.322 );
873 is( sprintf( "%.4f", $hsp->frac_conserved('hsp') ), 0.4746 );
874 is( $hsp->query->frame(), 2 );
875 is( $hsp->hit->frame(), 1 );
876 is( $hsp->gaps('query'), 0 );
877 is( $hsp->gaps('hit'), 0 );
880 is( $hsp->query_string,
881 'ALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGY'
883 is( $hsp->hit_string,
884 'ARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY'
886 is( $hsp->homology_string,
887 'A YL RG + E+I F IG+A WD + K + + AG+L+ + G Y'
890 join( ' ', $hsp->seq_inds( 'query', 'nomatch', 1 ) ),
891 '444-449 456-461 468-470 474-476 486-491 495-497 510-518 525-527 531-533 537-557 561-569 573-578 594-599 603-605 609-614'
894 join( ' ', $hsp->seq_inds( 'hit', 'nomatch', 1 ) ),
895 '5195-5200 5207-5212 5219-5221 5225-5227 5237-5242 5246-5248 5261-5269 5276-5278 5282-5284 5288-5308 5312-5320 5324-5329 5345-5350 5354-5356 5360-5365'
897 is( $hsp->ambiguous_seq_inds, 'query/subject' );
901 is( $hspcounter, 3 );
903 elsif ( $count == 1 ) {
905 while ( my $hsp = $hit->next_hsp ) {
906 is( $hsp->query->start, 587 );
907 is( $hsp->query->end, 706 );
908 is( $hsp->query->strand, -1 );
909 is( $hsp->hit->start, 4108 );
910 is( $hsp->hit->end, 4227 );
911 is( $hsp->hit->strand, -1 );
912 is( $hsp->length('total'), 40 );
913 float_is( $hsp->evalue, 7.1 );
914 float_is( $hsp->pvalue, '1.00' );
915 is( $hsp->score, 59 );
916 is( $hsp->bits, 29.9 );
917 is( sprintf( "%.2f", $hsp->percent_identity ), '37.50' );
918 is( sprintf( "%.4f", $hsp->frac_identical('hit') ), '0.3750' );
919 is( sprintf( "%.4f", $hsp->frac_identical('query') ), '0.3750' );
920 is( sprintf( "%.4f", $hsp->frac_conserved('hsp') ), '0.4750' );
921 is( $hsp->query->frame(), 2 );
922 is( $hsp->hit->frame(), 2 );
923 is( $hsp->gaps('query'), 0 );
924 is( $hsp->gaps('hit'), 0 );
927 is( $hsp->query_string,
928 'WLPRALPEKATTAP**SWIGNMTRFLKRSKYPLPSSRLIR' );
929 is( $hsp->hit_string, 'WLSRTTVGSSTVSPRTFWITRMKVKLSSSKVTLPSTKSTR' );
930 is( $hsp->homology_string,
931 'WL R +T +P WI M L SK LPS++ R' );
935 is( $hsps_to_do, 0 );
937 last if ( $count++ > @valid );
941 # WU-BLAST -echofilter option test (Bug 2388)
942 $searchio = Bio::SearchIO->new(
943 '-format' => 'blast',
944 '-file' => test_input_file('echofilter.wublastn')
947 $result = $searchio->next_result;
949 $result->algorithm_reference, 'Gish, W. (1996-2006) http://blast.wustl.edu
952 is( $result->database_name, 'NM_003201.fa' );
953 is( $result->database_letters, 1936 );
954 is( $result->database_entries, 1 );
955 is( $result->algorithm, 'BLASTN' );
956 like( $result->algorithm_version, qr/^2\.0MP\-WashU/ );
957 like( $result->query_name,
958 qr/ref|NM_003201.1| Homo sapiens transcription factor A, mitochondrial \(TFAM\), mRNA/
960 is( $result->query_accession, 'NM_003201.1' );
962 is( $result->query_length, 1936 );
963 is( $result->get_statistic('lambda'), 0.192 );
964 is( $result->get_statistic('kappa'), 0.182 );
965 is( $result->get_statistic('entropy'), 0.357 );
966 is( $result->get_statistic('dbletters'), 1936 );
967 is( $result->get_statistic('dbentries'), 1 );
968 is( $result->get_parameter('matrix'), '+5,-4' );
970 @valid = ( [ 'ref|NM_003201.1|', 1936, 'NM_003201', '0', 9680 ], );
972 while ( $hit = $result->next_hit ) {
973 my $d = shift @valid;
975 is( $hit->name, shift @$d );
976 is( $hit->length, shift @$d );
977 is( $hit->accession, shift @$d );
978 float_is( $hit->significance, shift @$d );
979 is( $hit->raw_score, shift @$d );
983 while ( my $hsp = $hit->next_hsp ) {
984 is( $hsp->query->start, 1 );
985 is( $hsp->query->end, 1936 );
986 is( $hsp->hit->start, 1 );
987 is( $hsp->hit->end, 1936 );
988 is( $hsp->length('total'), 1936 );
990 float_is( $hsp->evalue, 0. );
991 float_is( $hsp->pvalue, '0.' );
992 is( $hsp->score, 9680 );
993 is( $hsp->bits, 1458.4 );
994 is( $hsp->percent_identity, 100 );
995 is( $hsp->frac_identical('query'), 1.00 );
996 is( $hsp->frac_identical('hit'), 1.00 );
1001 is( $hsps_left, 0 );
1003 last if ( $count++ > @valid );
1007 # Do a multiblast report test
1008 $searchio = Bio::SearchIO->new(
1009 '-format' => 'blast',
1010 '-file' => test_input_file('multi_blast.bls')
1013 @expected = qw(CATH_RAT CATL_HUMAN CATL_RAT PAPA_CARPA);
1014 my $results_left = 4;
1015 while ( my $result = $searchio->next_result ) {
1016 like($result->algorithm_reference, qr/Gapped BLAST and PSI-BLAST/);
1017 is( $result->query_name, shift @expected, "Multiblast query test" );
1020 is( $results_left, 0 );
1022 # Test GCGBlast parsing
1024 $searchio = Bio::SearchIO->new(
1025 '-format' => 'blast',
1026 '-file' => test_input_file('test.gcgblast')
1028 $result = $searchio->next_result();
1029 like($result->algorithm_reference,qr/Gapped BLAST and PSI-BLAST/);
1030 is( $result->query_name, '/v0/people/staji002/test.gcg' );
1031 is( $result->algorithm, 'BLASTP' );
1032 is( $result->algorithm_version, '2.2.1 [Apr-13-2001]' );
1033 is( $result->database_name, 'pir' );
1034 is( $result->database_entries, 274514 );
1035 is( $result->database_letters, 93460074 );
1036 is( $result->get_statistic('querylength'), 44 );
1037 is( $result->get_statistic('effectivedblength'), 65459646 );
1038 is( $result->get_statistic('effectivespace'), 2880224424 );
1039 is( $result->get_statistic('effectivespaceused'), 2880224424 );
1041 $hit = $result->next_hit;
1042 is( $hit->description, 'F22B7.10 protein - Caenorhabditis elegans' );
1043 is( $hit->name, 'PIR2:S44629' );
1044 is( $hit->length, 628 );
1045 is( $hit->accession, 'PIR2:S44629' );
1046 float_is( $hit->significance, 2e-08 );
1047 is( $hit->raw_score, 136 );
1048 is( $hit->bits, '57.0' );
1049 $hsp = $hit->next_hsp;
1050 float_is( $hsp->evalue, 2e-08 );
1051 is( $hsp->bits, '57.0' );
1052 is( $hsp->score, 136 );
1053 is( int( $hsp->percent_identity ), 28 );
1054 is( sprintf( "%.2f", $hsp->frac_identical('query') ), 0.29 );
1055 is( $hsp->frac_conserved('total'), 69 / 135 );
1056 is( $hsp->gaps('total'), 8 );
1057 is( $hsp->gaps('hit'), 6 );
1058 is( $hsp->gaps('query'), 2 );
1060 is( $hsp->hit->start, 342 );
1061 is( $hsp->hit->end, 470 );
1062 is( $hsp->query->start, 3 );
1063 is( $hsp->query->end, 135 );
1065 is( $hsp->query_string,
1066 'CAAEFDFMEKETPLRYTKTXXXXXXXXXXXXXXRKIISDMWGVLAKQQTHVRKHQFDHGELVYHALQLLAYTALGILIMRLKLFLTPYMCVMASLICSRQLFGW--LFCKVHPGAIVFVILAAMSIQGSANLQTQ'
1068 is( $hsp->hit_string,
1069 'CSAEFDFIQYSTIEKLCGTLLIPLALISLVTFVFNFVKNT-NLLWRNSEEIG----ENGEILYNVVQLCCSTVMAFLIMRLKLFMTPHLCIVAALFANSKLLGGDRISKTIRVSALVGVI-AILFYRGIPNIRQQ'
1071 is( $hsp->homology_string,
1072 'C+AEFDF++ T + T + + +L + + ++GE++Y+ +QL T + LIMRLKLF+TP++C++A+L + +L G + + A+V VI A + +G N++ Q'
1075 #test all the database accession number formats
1076 $searchio = Bio::SearchIO->new(
1078 -file => test_input_file('testdbaccnums.out')
1080 $result = $searchio->next_result;
1081 like($result->algorithm_reference,qr/Gapped BLAST and PSI-BLAST/);
1082 is( $result->rid, '1036160600-011802-21377' );
1083 is( $result->get_statistic('querylength'), 9 );
1084 is( $result->get_statistic('effectivedblength'), 35444647 );
1085 is( $result->get_statistic('effectivespace'), 319001823 );
1086 is( $result->get_statistic('effectivespaceused'), 319001823 );
1089 [ 'pir||T14789', 'T14789', 'T14789', 'CAB53709', 'AAH01726' ],
1090 [ 'gb|NP_065733.1|CYT19', 'NP_065733', 'CYT19' ],
1091 [ 'emb|XP_053690.4|Cyt19', 'XP_053690' ],
1092 [ 'dbj|NP_056277.2|DKFZP586L0724', 'NP_056277' ],
1093 [ 'prf||XP_064862.2', 'XP_064862' ],
1094 [ 'pdb|BAB13968.1|1', 'BAB13968' ],
1095 [ 'sp|Q16478|GLK5_HUMAN', 'Q16478' ],
1096 [ 'pat|US|NP_002079.2', 'NP_002079' ],
1097 [ 'bbs|NP_079463.2|', 'NP_079463' ],
1098 [ 'gnl|db1|NP_002444.1', 'NP_002444' ],
1099 [ 'ref|XP_051877.1|', 'XP_051877' ],
1100 [ 'lcl|AAH16829.1|', 'AAH16829' ],
1101 [ 'gi|1|gb|NP_065733.1|CYT19', 'NP_065733' ],
1102 [ 'gi|2|emb|XP_053690.4|Cyt19', 'XP_053690' ],
1103 [ 'gi|3|dbj|NP_056277.2|DKFZP586L0724', 'NP_056277' ],
1104 [ 'gi|4|pir||T14789', 'T14789' ],
1105 [ 'gi|5|prf||XP_064862.2', 'XP_064862' ],
1106 [ 'gi|6|pdb|BAB13968.1|1', 'BAB13968' ],
1107 [ 'gi|7|sp|Q16478|GLK5_HUMAN', 'Q16478' ],
1108 [ 'gi|8|pat|US|NP_002079.2', 'NP_002079' ],
1109 [ 'gi|9|bbs|NP_079463.2|', 'NP_079463' ],
1110 [ 'gi|10|gnl|db1|NP_002444.1', 'NP_002444' ],
1111 [ 'gi|11|ref|XP_051877.1|', 'XP_051877' ],
1112 [ 'gi|12|lcl|AAH16829.1|', 'AAH16829' ],
1113 [ 'MY_test_ID', 'MY_test_ID' ]
1116 $hit = $result->next_hit;
1117 my $d = shift @valid;
1118 is( $hit->name, shift @$d );
1119 is( $hit->accession, shift @$d );
1120 my @accnums = $hit->each_accession_number;
1121 foreach my $a (@accnums) {
1122 is( $a, shift @$d );
1125 $hit = $result->next_hit;
1126 is( $hit->name, shift @$d );
1127 is( $hit->accession, shift @$d );
1128 is( $hit->locus, shift @$d );
1131 while ( $hit = $result->next_hit ) {
1132 my $d = shift @valid;
1133 is( $hit->name, shift @$d );
1134 is( $hit->accession, shift @$d );
1137 is( $hits_left, 0 );
1141 # parse the BLAST-like output
1142 my $infile = test_input_file('503384.MEGABLAST.2');
1143 my $in = Bio::SearchIO->new(
1146 ); # this is megablast blast-like output
1147 my $r = $in->next_result;
1150 'Contig3700', 5631, 396, 785, '0.0', 785, '0.0', 396, 639, 12, 8723,
1151 9434, 1, 4083, 4794, -1
1154 'Contig3997', 12734, 335, 664, '0.0', 664, '0.0', 335, 401, 0, 1282,
1155 1704, 1, 1546, 1968, -1
1158 'Contig634', 858, 245, 486, '1e-136', 486,
1159 '1e-136', 245, 304, 3, 7620, 7941,
1163 'Contig1853', 2314, 171, 339, '1e-91', 339,
1164 '1e-91', 171, 204, 0, 6406, 6620,
1169 like($r->algorithm_reference,qr/A greedy algorithm for aligning DNA sequences/);
1170 is( $r->algorithm, 'MEGABLAST' );
1171 is( $r->query_name, '503384' );
1172 is( $r->query_description, '11337 bp 2 contigs' );
1173 is( $r->query_length, 11337 );
1174 is( $r->database_name, 'cneoA.nt' );
1175 is( $r->database_letters, 17206226 );
1176 is( $r->database_entries, 4935 );
1177 is( $r->get_statistic('querylength'), 11318 );
1178 is( $r->get_statistic('effectivedblength'), 17112461 );
1179 is( $r->get_statistic('effectivespace'), 193678833598 );
1180 is( $r->get_statistic('effectivespaceused'), 0 );
1183 while ( my $hit = $r->next_hit ) {
1184 my $d = shift @dcompare;
1185 is( $hit->name, shift @$d );
1186 is( $hit->length, shift @$d );
1187 is( $hit->raw_score, shift @$d );
1188 is( $hit->bits, shift @$d );
1189 float_is( $hit->significance, shift @$d );
1191 my $hsp = $hit->next_hsp;
1192 is( $hsp->bits, shift @$d );
1193 float_is( $hsp->evalue, shift @$d );
1194 is( $hsp->score, shift @$d );
1195 is( $hsp->num_identical, shift @$d );
1196 is( $hsp->gaps('total'), shift @$d );
1197 is( $hsp->query->start, shift @$d );
1198 is( $hsp->query->end, shift @$d );
1199 is( $hsp->query->strand, shift @$d );
1200 is( $hsp->hit->start, shift @$d );
1201 is( $hsp->hit->end, shift @$d );
1202 is( $hsp->hit->strand, shift @$d );
1206 is( $hits_left, 0 );
1208 # Let's test RPS-BLAST
1210 my $parser = Bio::SearchIO->new(
1212 -file => test_input_file('ecoli_domains.rpsblast')
1215 $r = $parser->next_result;
1216 is( $r->algorithm, 'RPS-BLAST(BLASTP)');
1217 is( $r->algorithm_version, '2.2.4 [Aug-26-2002]');
1218 is( $r->algorithm_reference, undef );
1219 is( $r->query_name, 'gi|1786183|gb|AAC73113.1|' );
1220 is( $r->query_gi, 1786183 );
1221 is( $r->num_hits, 7 );
1222 is( $r->get_statistic('querylength'), 438 );
1223 is( $r->get_statistic('effectivedblength'), 31988 );
1224 is( $r->get_statistic('effectivespace'), 14010744 );
1225 is( $r->get_statistic('effectivespaceused'), 24054976 );
1226 $hit = $r->next_hit;
1227 is( $hit->name, 'gnl|CDD|3919' );
1228 float_is( $hit->significance, 0.064 );
1229 is( $hit->bits, 28.3 );
1230 is( $hit->raw_score, 63 );
1231 $hsp = $hit->next_hsp;
1232 is( $hsp->query->start, 599 );
1233 is( $hsp->query->end, 655 );
1234 is( $hsp->hit->start, 23 );
1235 is( $hsp->hit->end, 76 );
1237 # Test PSI-BLAST parsing
1239 $searchio = Bio::SearchIO->new(
1240 '-format' => 'blast',
1241 '-file' => test_input_file('psiblastreport.out')
1244 $result = $searchio->next_result;
1245 like($result->algorithm_reference, qr/Gapped BLAST and PSI-BLAST/);
1246 is( $result->database_name, '/home/peter/blast/data/swissprot.pr' );
1247 is( $result->database_entries, 88780 );
1248 is( $result->database_letters, 31984247 );
1250 is( $result->algorithm, 'BLASTP' );
1251 like( $result->algorithm_version, qr/^2\.0\.14/ );
1252 is( $result->query_name, 'CYS1_DICDI' );
1253 is( $result->query_length, 343 );
1254 is( $result->get_statistic('kappa'), 0.0491 );
1255 cmp_ok( $result->get_statistic('lambda'), '==', 0.270 );
1256 cmp_ok( $result->get_statistic('entropy'), '==', 0.230 );
1257 is( $result->get_statistic('dbletters'), 31984247 );
1258 is( $result->get_statistic('dbentries'), 88780 );
1259 is( $result->get_statistic('effective_hsplength'), 49 );
1260 is( $result->get_statistic('querylength'), 294 );
1261 is( $result->get_statistic('effectivedblength'), 27634027 );
1262 is( $result->get_statistic('effectivespace'), 8124403938 );
1263 is( $result->get_statistic('effectivespaceused'), 8124403938 );
1264 is( $result->get_parameter('matrix'), 'BLOSUM62' );
1265 is( $result->get_parameter('gapopen'), 11 );
1266 is( $result->get_parameter('gapext'), 1 );
1268 my @valid_hit_data = (
1269 [ 'sp|P04988|CYS1_DICDI', 343, 'P04988', '0', 721 ],
1270 [ 'sp|P43295|A494_ARATH', 313, 'P43295', '1e-75', 281 ],
1271 [ 'sp|P25804|CYSP_PEA', 363, 'P25804', '1e-74', 278 ]
1273 my @valid_iter_data = (
1274 [ 127, 127, 0, 109, 18, 0, 0, 0, 0 ],
1275 [ 157, 40, 117, 2, 38, 0, 109, 3, 5 ]
1279 while ( $iter = $result->next_iteration ) {
1281 my $di = shift @valid_iter_data;
1282 is( $iter->number, $iter_count );
1284 is( $iter->num_hits, shift @$di );
1285 is( $iter->num_hits_new, shift @$di );
1286 is( $iter->num_hits_old, shift @$di );
1287 is( scalar( $iter->newhits_below_threshold ), shift @$di );
1288 is( scalar( $iter->newhits_not_below_threshold ), shift @$di );
1289 is( scalar( $iter->newhits_unclassified ), shift @$di );
1290 is( scalar( $iter->oldhits_below_threshold ), shift @$di );
1291 is( scalar( $iter->oldhits_newly_below_threshold ), shift @$di );
1292 is( scalar( $iter->oldhits_not_below_threshold ), shift @$di );
1295 if ( $iter_count == 1 ) {
1296 while ( $hit = $result->next_hit ) {
1297 my $d = shift @valid_hit_data;
1299 is( $hit->name, shift @$d );
1300 is( $hit->length, shift @$d );
1301 is( $hit->accession, shift @$d );
1302 float_is( $hit->significance, shift @$d );
1303 is( $hit->bits, shift @$d );
1305 if ( $hit_count == 1 ) {
1307 while ( my $hsp = $hit->next_hsp ) {
1308 is( $hsp->query->start, 32 );
1309 is( $hsp->query->end, 340 );
1310 is( $hsp->hit->start, 3 );
1311 is( $hsp->hit->end, 307 );
1312 is( $hsp->length('total'), 316 );
1313 is( $hsp->start('hit'), $hsp->hit->start );
1314 is( $hsp->end('query'), $hsp->query->end );
1315 is( $hsp->strand('sbjct'), $hsp->subject->strand )
1317 float_is( $hsp->evalue, 1e-75 );
1318 is( $hsp->score, 712 );
1319 is( $hsp->bits, 281 );
1320 is( sprintf( "%.1f", $hsp->percent_identity ), 46.5 );
1321 is( sprintf( "%.4f", $hsp->frac_identical('query') ),
1323 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.482 );
1324 is( $hsp->gaps, 18 );
1328 is( $hsps_left, 0 );
1330 last if ( $hit_count++ > @valid_hit_data );
1334 is( @valid_hit_data, 0 );
1335 is( @valid_iter_data, 0 );
1339 $searchio = Bio::SearchIO->new(
1340 '-format' => 'blast',
1341 '-file' => test_input_file('ecolitst.bls'),
1345 @valid = qw(gb|AAC73113.1|);
1346 $r = $searchio->next_result;
1348 while ( my $hit = $r->next_hit ) {
1349 is( $hit->name, shift @valid );
1352 $searchio = Bio::SearchIO->new(
1353 '-format' => 'blast',
1354 '-file' => test_input_file('ecolitst.bls'),
1358 @valid = qw(gb|AAC73113.1| gb|AAC76922.1| gb|AAC76994.1|);
1359 $r = $searchio->next_result;
1361 while ( my $hit = $r->next_hit ) {
1362 is( $hit->name, shift @valid );
1366 $searchio = Bio::SearchIO->new(
1367 '-format' => 'blast',
1368 '-file' => test_input_file('ecolitst.bls'),
1372 @valid = qw(gb|AAC73113.1| gb|AAC76922.1|);
1373 $r = $searchio->next_result;
1375 while ( my $hit = $r->next_hit ) {
1376 is( $hit->name, shift @valid );
1380 my $filt_func = sub {
1382 $hit->frac_identical('query') >= 0.31;
1385 $searchio = Bio::SearchIO->new(
1386 '-format' => 'blast',
1387 '-file' => test_input_file('ecolitst.bls'),
1388 '-hit_filter' => $filt_func
1391 @valid = qw(gb|AAC73113.1| gb|AAC76994.1|);
1392 $r = $searchio->next_result;
1394 while ( my $hit = $r->next_hit ) {
1395 is( $hit->name, shift @valid );
1399 # bl2seq parsing testing
1401 # this is blastp bl2seq
1402 $searchio = Bio::SearchIO->new(
1404 -file => test_input_file('bl2seq.out')
1406 $result = $searchio->next_result;
1407 isa_ok( $result, 'Bio::Search::Result::ResultI' );
1408 is( $result->query_name, '' );
1409 is( $result->algorithm, 'BLASTP' );
1410 is( $result->algorithm_reference, undef );
1411 is( $result->get_statistic('querylength'), 320 );
1412 is( $result->get_statistic('effectivedblength'), 339 );
1413 is( $result->get_statistic('effectivespace'), 108480 );
1414 is( $result->get_statistic('effectivespaceused'), 108480 );
1415 $hit = $result->next_hit;
1416 is( $hit->name, 'ALEU_HORVU' );
1417 is( $hit->length, 362 );
1418 $hsp = $hit->next_hsp;
1419 is( $hsp->score, 481 );
1420 is( $hsp->bits, 191 );
1421 is( int $hsp->percent_identity, 34 );
1422 float_is( $hsp->evalue, 2e-53 );
1423 is( int( $hsp->frac_conserved * $hsp->length ), 167 );
1424 is( $hsp->query->start, 28 );
1425 is( $hsp->query->end, 343 );
1426 is( $hsp->hit->start, 60 );
1427 is( $hsp->hit->end, 360 );
1428 is( $hsp->gaps, 27 );
1430 # this is blastn bl2seq
1431 $searchio = Bio::SearchIO->new(
1433 -file => test_input_file('bl2seq.blastn.rev')
1435 $result = $searchio->next_result;
1436 isa_ok( $result, 'Bio::Search::Result::ResultI' );
1437 is( $result->query_name, '' );
1438 is( $result->algorithm, 'BLASTN' );
1439 is( $result->algorithm_reference, undef );
1440 is( $result->query_length, 180 );
1441 is( $result->get_statistic('querylength'), 174 );
1442 is( $result->get_statistic('effectivedblength'), 173 );
1443 is( $result->get_statistic('effectivespace'), 30102 );
1444 is( $result->get_statistic('effectivespaceused'), 30102 );
1445 $hit = $result->next_hit;
1446 is( $hit->length, 179 );
1447 is( $hit->name, 'human' );
1448 $hsp = $hit->next_hsp;
1449 is( $hsp->score, 27 );
1450 is( $hsp->bits, '54.0' );
1451 is( int $hsp->percent_identity, 88 );
1452 float_is( $hsp->evalue, 2e-12 );
1453 is( int( $hsp->frac_conserved * $hsp->length ), 83 );
1454 is( $hsp->query->start, 94 );
1455 is( $hsp->query->end, 180 );
1456 is( $hsp->query->strand, 1 );
1457 is( $hsp->hit->strand, -1 );
1458 is( $hsp->hit->start, 1 );
1459 is( $hsp->hit->end, 94 );
1460 is( $hsp->gaps, 7 );
1463 # this is blastn bl2seq
1464 $searchio = Bio::SearchIO->new(
1466 -file => test_input_file('bl2seq.blastn')
1468 $result = $searchio->next_result;
1469 isa_ok( $result, 'Bio::Search::Result::ResultI' );
1470 is( $result->query_name, '' );
1471 is( $result->query_length, 180 );
1472 is( $result->algorithm, 'BLASTN' );
1473 is( $result->algorithm_reference, undef );
1474 is( $result->get_statistic('querylength'), 174 );
1475 is( $result->get_statistic('effectivedblength'), 173 );
1476 is( $result->get_statistic('effectivespace'), 30102 );
1477 is( $result->get_statistic('effectivespaceused'), 30102 );
1478 $hit = $result->next_hit;
1479 is( $hit->name, 'human' );
1480 is( $hit->length, 179 );
1481 $hsp = $hit->next_hsp;
1482 is( $hsp->score, 27 );
1483 is( $hsp->bits, '54.0' );
1484 is( int $hsp->percent_identity, 88 );
1485 float_is( $hsp->evalue, 2e-12 );
1486 is( int( $hsp->frac_conserved * $hsp->length ), 83 );
1487 is( $hsp->query->start, 94 );
1488 is( $hsp->query->end, 180 );
1489 is( $hsp->query->strand, 1 );
1490 is( $hsp->hit->strand, 1 );
1491 is( $hsp->hit->start, 86 );
1492 is( $hsp->hit->end, 179 );
1493 is( $hsp->gaps, 7 );
1496 # this is blastn bl2seq+
1497 $searchio = Bio::SearchIO->new(
1499 -file => test_input_file('bl2seq+.blastn')
1501 $result = $searchio->next_result;
1502 isa_ok( $result, 'Bio::Search::Result::ResultI' );
1503 is( $result->query_name, 'gi|2695846|emb|Y13255.1|' );
1504 is( $result->query_description,
1505 'Acipenser baeri mRNA for immunoglobulin heavy chain, clone ScH 3.3'
1507 is( $result->query_length, 606 );
1508 is( $result->algorithm, 'BLASTN' );
1509 is( $result->algorithm_version, '2.2.29+' );
1510 is( $result->algorithm_reference, undef );
1511 is( $result->get_statistic('effectivespaceused'), 352836 );
1512 is( $result->get_statistic('kappa'), 0.621 );
1513 is( $result->get_statistic('kappa_gapped'), '0.460' );
1514 is( $result->get_statistic('lambda'), 1.33 );
1515 is( $result->get_statistic('lambda_gapped'), 1.28 );
1516 is( $result->get_statistic('entropy'), 1.12 );
1517 is( $result->get_statistic('entropy_gapped'), '0.850' );
1518 $hit = $result->next_hit;
1519 is( $hit->name, 'gi|2695846|emb|Y13255.1|' );
1520 is( $hit->description,
1521 'Acipenser baeri mRNA for immunoglobulin heavy chain, clone ScH 3.3'
1523 is( $hit->length, 606 );
1524 $hsp = $hit->next_hsp;
1525 is( $hsp->score, 606 );
1526 is( $hsp->bits, 1120 );
1527 is( $hsp->percent_identity, 100 );
1528 float_is( $hsp->evalue, '0.0' );
1529 is( $hsp->query->start, 1 );
1530 is( $hsp->query->end, 606 );
1531 is( $hsp->query->strand, 1 );
1532 is( $hsp->hit->strand, 1 );
1533 is( $hsp->hit->start, 1 );
1534 is( $hsp->hit->end, 606 );
1535 is( $hsp->gaps, 0 );
1538 # this is blastp bl2seq
1539 $searchio = Bio::SearchIO->new(
1541 -file => test_input_file('bl2seq.bug940.out')
1543 $result = $searchio->next_result;
1544 isa_ok( $result, 'Bio::Search::Result::ResultI' );
1545 is( $result->query_name, 'zinc' );
1546 is( $result->algorithm, 'BLASTP' );
1547 is( $result->query_description,
1548 'finger protein 135 (clone pHZ-17) [Homo sapiens]. neo_id RS.ctg14243-000000.6.0'
1550 is( $result->query_length, 469 );
1551 is( $result->get_statistic('querylength'), 446 );
1552 is( $result->get_statistic('effectivedblength'), 446 );
1553 is( $result->get_statistic('effectivespace'), 198916 );
1554 is( $result->get_statistic('effectivespaceused'), 198916 );
1555 $hit = $result->next_hit;
1556 is( $hit->name, 'gi|4507985|' );
1557 is( $hit->ncbi_gi, 4507985 );
1558 is( $hit->description,
1559 'zinc finger protein 135 (clone pHZ-17) [Homo sapiens]. neo_id RS.ctg14243-000000.6.0'
1561 is( $hit->length, 469 );
1562 $hsp = $hit->next_hsp;
1563 is( $hsp->score, 1626 );
1564 is( $hsp->bits, 637 );
1565 is( int $hsp->percent_identity, 66 );
1566 float_is( $hsp->evalue, 0.0 );
1567 is( int( $hsp->frac_conserved * $hsp->length ), 330 );
1568 is( $hsp->query->start, 121 );
1569 is( $hsp->query->end, 469 );
1570 is( $hsp->hit->start, 1 );
1571 is( $hsp->hit->end, 469 );
1572 is( $hsp->gaps, 120 );
1575 ok( $hit->next_hsp ); # there is more than one HSP here,
1576 # make sure it is parsed at least
1578 # cannot distinguish between blastx and tblastn reports
1579 # so we're only testing a blastx report for now
1581 # this is blastx bl2seq
1582 $searchio = Bio::SearchIO->new(
1584 -file => test_input_file('bl2seq.blastx.out')
1586 $result = $searchio->next_result;
1587 isa_ok( $result, 'Bio::Search::Result::ResultI' );
1588 is( $result->query_name, 'AE000111.1' );
1589 is( $result->query_description,
1590 'Escherichia coli K-12 MG1655 section 1 of 400 of the complete genome' );
1591 is( $result->algorithm, 'BLASTX' );
1592 is( $result->algorithm_reference, undef );
1593 is( $result->query_length, 720 );
1594 is( $result->get_statistic('querylength'), undef );
1595 is( $result->get_statistic('effectivedblength'), 787 );
1596 is( $result->get_statistic('effectivespace'), undef );
1597 is( $result->get_statistic('effectivespaceused'), 162122 );
1598 $hit = $result->next_hit;
1599 is( $hit->name, 'AK1H_ECOLI' );
1600 is( $hit->description,
1601 'P00561 Bifunctional aspartokinase/homoserine dehydrogenase I (AKI-HDI) [Includes: Aspartokinase I ; Homoserine dehydrogenase I ]'
1603 is( $hit->length, 820 );
1604 $hsp = $hit->next_hsp;
1605 is( $hsp->score, 634 );
1606 is( $hsp->bits, 248 );
1607 is( int $hsp->percent_identity, 100 );
1608 float_is( $hsp->evalue, 2e-70 );
1609 is( int( $hsp->frac_conserved * $hsp->length ), 128 );
1610 is( $hsp->query->start, 1 );
1611 is( $hsp->query->end, 384 );
1612 is( $hsp->hit->start, 1 );
1613 is( $hsp->hit->end, 128 );
1614 is( $hsp->gaps, 0 );
1615 is( $hsp->query->frame, 0 );
1616 is( $hsp->hit->frame, 0 );
1617 is( $hsp->query->strand, -1 );
1618 is( $hsp->hit->strand, 0 );
1621 # this is tblastx bl2seq (self against self)
1622 $searchio = Bio::SearchIO->new(
1624 -file => test_input_file('bl2seq.tblastx.out')
1626 $result = $searchio->next_result;
1627 isa_ok( $result, 'Bio::Search::Result::ResultI' );
1628 is( $result->query_name, 'Escherichia' );
1629 is( $result->algorithm, 'TBLASTX' );
1630 is( $result->algorithm_reference, undef );
1631 is( $result->query_description,
1632 'coli K-12 MG1655 section 1 of 400 of the complete genome' );
1633 is( $result->query_length, 720 );
1634 is( $result->get_statistic('querylength'), undef );
1635 is( $result->get_statistic('effectivedblength'), 221 );
1636 is( $result->get_statistic('effectivespace'), undef );
1637 is( $result->get_statistic('effectivespaceused'), 48620 );
1638 $hit = $result->next_hit;
1639 is( $hit->name, 'gi|1786181|gb|AE000111.1|AE000111' );
1640 is( $hit->ncbi_gi, 1786181 );
1641 is( $hit->description,
1642 'Escherichia coli K-12 MG1655 section 1 of 400 of the complete genome' );
1643 is( $hit->length, 720 );
1644 $hsp = $hit->next_hsp;
1645 is( $hsp->score, 1118 );
1646 is( $hsp->bits, 515 );
1647 is( int $hsp->percent_identity, 95 );
1648 float_is( $hsp->evalue, 1e-151 );
1649 is( int( $hsp->frac_conserved * $hsp->length ), 229 );
1650 is( $hsp->query->start, 1 );
1651 is( $hsp->query->end, 720 );
1652 is( $hsp->hit->start, 1 );
1653 is( $hsp->hit->end, 720 );
1654 is( $hsp->gaps, 0 );
1655 is( $hsp->query->frame, 0 );
1656 is( $hsp->hit->frame, 0 );
1657 is( $hsp->query->strand, 1 );
1658 is( $hsp->hit->strand, 1 );
1661 # this is NCBI tblastn
1662 $searchio = Bio::SearchIO->new(
1664 -file => test_input_file('tblastn.out')
1666 $result = $searchio->next_result;
1667 isa_ok( $result, 'Bio::Search::Result::ResultI' );
1668 is( $result->algorithm, 'TBLASTN' );
1669 like($result->algorithm_reference,qr/Gapped BLAST and PSI-BLAST/);
1670 is( $result->get_statistic('querylength'), 102 );
1671 is( $result->get_statistic('effectivedblength'), 4342 );
1672 is( $result->get_statistic('effectivespace'), 442884 );
1673 is( $result->get_statistic('effectivespaceused'), 442884 );
1674 $hit = $result->next_hit;
1675 is( $hit->name, 'gi|10040111|emb|AL390796.6|AL390796' );
1677 # Test Blast parsing with B=0 (WU-BLAST)
1678 $searchio = Bio::SearchIO->new(
1679 -file => test_input_file('no_hsps.blastp'),
1682 $result = $searchio->next_result;
1683 like($result->algorithm_reference,qr/Gish, W. \(1996-2003\)/);
1684 is( $result->query_name, 'mgri:MG00189.3' );
1685 $hit = $result->next_hit;
1686 is( $hit->name, 'mgri:MG00189.3' );
1687 is( $hit->description, 'hypothetical protein 6892 8867 +' );
1688 is( $hit->bits, 3098 );
1689 float_is( $hit->significance, 0. );
1691 $hit = $result->next_hit;
1692 is( $hit->name, 'fgram:FG01141.1' );
1693 is( $hit->description, 'hypothetical protein 47007 48803 -' );
1694 is( $hit->bits, 2182 );
1695 float_is( $hit->significance, 4.2e-226 );
1696 is( $result->num_hits, 415 );
1698 # Let's now test if _guess_format is doing its job correctly
1700 'filename.blast' => 'blast',
1701 'filename.bls' => 'blast',
1703 'f.tblx' => 'blast',
1704 'fast.bls' => 'blast',
1705 'f.fasta' => 'fasta',
1709 'f.ssearch' => 'fasta',
1710 'f.SSEARCH.m9' => 'fasta',
1712 'f.psearch' => 'fasta',
1713 'f.osearch' => 'fasta',
1714 'f.exon' => 'exonerate',
1715 'f.exonerate' => 'exonerate',
1716 'f.blastxml' => 'blastxml',
1717 'f.xml' => 'blastxml'
1719 while ( my ( $file, $expformat ) = each %pair ) {
1720 is( Bio::SearchIO->_guess_format($file),
1721 $expformat, "$expformat for $file" );
1724 # Test Wes Barris's reported bug when parsing blastcl3 output which
1725 # has integer overflow
1727 $searchio = Bio::SearchIO->new(
1728 -file => test_input_file('hsinsulin.blastcl3.blastn'),
1731 $result = $searchio->next_result;
1732 is( $result->query_name, 'human' );
1733 is( $result->database_letters(), '-24016349' );
1735 # this is of course not the right length, but is the what blastcl3
1736 # reports, the correct value is
1737 is( $result->get_statistic('dbletters'), '192913178' );
1738 is( $result->get_statistic('dbentries'), '1867771' );
1740 # test for links and groups being parsed out of WU-BLAST properly
1741 $searchio = Bio::SearchIO->new(
1743 -file => test_input_file('brassica_ATH.WUBLASTN')
1745 ok( $result = $searchio->next_result );
1746 ok( $hit = $result->next_hit );
1747 ok( $hsp = $hit->next_hsp );
1748 is( $hsp->links, '(1)-3-2' );
1749 is( $hsp->query->strand, 1 );
1750 is( $hsp->hit->strand, 1 );
1751 is( $hsp->hsp_group, '1' );
1753 ## Web blast result parsing
1755 $searchio = Bio::SearchIO->new(
1757 -file => test_input_file('catalase-webblast.BLASTP')
1759 ok( $result = $searchio->next_result );
1760 is( $result->rid, '1118324516-16598-103707467515.BLASTQ1' );
1761 ok( $hit = $result->next_hit );
1762 is( $hit->name, 'gi|40747822|gb|EAA66978.1|', 'full hit name' );
1763 is( $hit->accession, 'EAA66978', 'hit accession' );
1764 is( $hit->ncbi_gi, 40747822 );
1765 ok( $hsp = $hit->next_hsp );
1766 is( $hsp->query->start, 1, 'query start' );
1767 is( $hsp->query->end, 528, 'query start' );
1770 # tests for new BLAST 2.2.13 output
1771 $searchio = Bio::SearchIO->new(
1773 -file => test_input_file('new_blastn.txt')
1776 $result = $searchio->next_result;
1777 is( $result->database_name,
1778 'All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS,GSS,environmental samples or phase 0, 1 or 2 HTGS sequences)'
1780 is( $result->database_entries, 3742891 );
1781 is( $result->database_letters, 16670205594 );
1782 is( $result->algorithm, 'BLASTN' );
1783 is( $result->algorithm_version, '2.2.13 [Nov-27-2005]' );
1784 like($result->algorithm_reference, qr/Gapped BLAST and PSI-BLAST/);
1785 is( $result->rid, '1141079027-8324-8848328247.BLASTQ4' );
1786 is( $result->query_name, 'pyrR,' );
1787 is( $result->query_length, 558 );
1788 is( $result->get_statistic('kappa'), '0.711' );
1789 is( $result->get_statistic('kappa_gapped'), '0.711' );
1790 is( $result->get_statistic('lambda'), '1.37' );
1791 is( $result->get_statistic('lambda_gapped'), '1.37' );
1792 is( $result->get_statistic('entropy'), '1.31' );
1793 is( $result->get_statistic('entropy_gapped'), '1.31' );
1794 is( $result->get_statistic('dbletters'), '-509663586' );
1795 is( $result->get_statistic('dbentries'), 3742891 );
1796 is( $result->get_statistic('effective_hsplength'), undef );
1797 is( $result->get_statistic('effectivespace'), 8935230198384 );
1799 $result->get_statistic(
1800 'number_of_hsps_better_than_expect_value_cutoff_without_gapping'),
1803 is( $result->get_statistic('number_of_hsps_gapped'), 1771 );
1804 is( $result->get_statistic('number_of_hsps_successfully_gapped'), 0 );
1805 is( $result->get_statistic('length_adjustment'), 22 );
1806 is( $result->get_statistic('querylength'), 536 );
1807 is( $result->get_statistic('effectivedblength'), 16670205594 );
1808 is( $result->get_statistic('effectivespaceused'), 8891094027712 );
1809 is( $result->get_parameter('matrix'), 'blastn matrix:1 -3' );
1810 is( $result->get_parameter('gapopen'), 5 );
1811 is( $result->get_parameter('gapext'), 2 );
1812 is( $result->get_statistic('S2'), '60' );
1813 is( $result->get_statistic('S2_bits'), '119.4' );
1814 float_is( $result->get_parameter('expect'), '1e-23' );
1815 is( $result->get_statistic('num_extensions'), '117843' );
1819 'gi|41400296|gb|AE016958.1|', 4829781, 'AE016958', 41400296, '6e-059',
1823 'gi|54013472|dbj|AP006618.1|', 6021225, 'AP006618', 54013472, '4e-026',
1827 'gi|57546753|dbj|BA000030.2|', 9025608, 'BA000030', 57546753, '1e-023',
1833 while ( $hit = $result->next_hit ) {
1834 my $d = shift @valid;
1836 is( $hit->name, shift @$d );
1837 is( $hit->length, shift @$d );
1838 is( $hit->accession, shift @$d );
1839 is( $hit->ncbi_gi, shift @$d );
1840 float_is( $hit->significance, shift @$d );
1841 is( $hit->raw_score, shift @$d );
1842 is( $hit->bits, shift @$d );
1844 if ( $count == 0 ) {
1846 while ( my $hsp = $hit->next_hsp ) {
1847 is( $hsp->query->start, 262 );
1848 is( $hsp->query->end, 552 );
1849 is( $hsp->hit->start, 1166897 );
1850 is( $hsp->hit->end, 1167187 );
1851 is( $hsp->length('total'), 291 );
1852 is( $hsp->hit_features, 'PyrR' );
1853 is( $hsp->start('hit'), $hsp->hit->start );
1854 is( $hsp->end('query'), $hsp->query->end );
1855 is( $hsp->strand('sbjct'), $hsp->subject->strand ); # alias for hit
1856 float_is( $hsp->evalue, 6e-59 );
1857 is( $hsp->score, 119 );
1858 is( $hsp->bits, 236 );
1859 is( sprintf( "%.2f", $hsp->percent_identity ), 85.22 );
1860 is( sprintf( "%.4f", $hsp->frac_identical('query') ), 0.8522 );
1861 is( sprintf( "%.4f", $hsp->frac_identical('hit') ), 0.8522 );
1862 is( $hsp->gaps, 0 );
1866 is( $hsps_left, 0 );
1868 last if ( $count++ > @valid );
1873 $searchio = Bio::SearchIO->new(
1875 -file => test_input_file('blastp2215.blast')
1878 $result = $searchio->next_result;
1879 is( $result->database_entries, 4460989 );
1880 is( $result->database_letters, 1533424333 );
1881 is( $result->algorithm, 'BLASTP' );
1882 is( $result->algorithm_version, '2.2.15 [Oct-15-2006]' );
1883 is( $result->rid, '1169055516-21385-22799250964.BLASTQ4' );
1884 is( $result->query_name, 'gi|15608519|ref|NP_215895.1|' );
1885 is( $result->query_gi, 15608519 );
1886 is( $result->query_length, 193 );
1887 @hits = $result->hits;
1888 is( scalar(@hits), 10 );
1889 is( $hits[1]->accession, '1W30' );
1890 is( $hits[4]->significance, '2e-72' );
1891 is( $hits[7]->bits, '254' );
1892 $result = $searchio->next_result;
1893 is( $result->database_entries, 4460989 );
1894 is( $result->database_letters, 1533424333 );
1895 is( $result->algorithm, 'BLASTP' );
1896 is( $result->algorithm_version, '2.2.15 [Oct-15-2006]' );
1897 is( $result->query_name, 'gi|15595598|ref|NP_249092.1|' );
1898 is( $result->query_length, 423 );
1899 @hits = $result->hits;
1900 is( scalar(@hits), 10 );
1901 is( $hits[1]->accession, 'ZP_00972546' );
1902 is( $hits[2]->ncbi_gi, 116054132 );
1903 is( $hits[4]->significance, '0.0' );
1904 is( $hits[7]->bits, 624 );
1907 $searchio = Bio::SearchIO->new(
1910 -file => test_input_file('bug2246.blast')
1912 $result = $searchio->next_result;
1914 $result->get_statistic(
1915 'number_of_hsps_better_than_expect_value_cutoff_without_gapping'),
1918 is( $result->get_statistic('number_of_hsps_gapped'), 7049 );
1919 is( $result->get_statistic('number_of_hsps_successfully_gapped'), 55 );
1920 is( $result->get_statistic('length_adjustment'), 125 );
1921 is( $result->get_statistic('querylength'), 68 );
1922 is( $result->get_statistic('effectivedblength'), 1045382588 );
1923 is( $result->get_statistic('effectivespace'), 71086015984 );
1924 is( $result->get_statistic('effectivespaceused'), 71086015984 );
1925 $hit = $result->next_hit;
1926 is $hit->name, 'UniRef50_Q9X0H5';
1928 is $hit->accession, 'UniRef50_Q9X0H5';
1929 is $hit->description, 'Cluster: Histidyl-tRNA synthetase; n=4; Thermoto...';
1931 float_is( $hit->significance, 650 );
1934 $searchio = Bio::SearchIO->new(
1937 -file => test_input_file('bug1986.blastp')
1939 $result = $searchio->next_result;
1940 is( $result->get_statistic('querylength'), 335 );
1941 is( $result->get_statistic('effectivedblength'), 18683311 );
1942 is( $result->get_statistic('effectivespace'), 6258909185 );
1943 is( $result->get_statistic('effectivespaceused'), 6258909185 );
1944 $hit = $result->next_hit;
1945 is $hit->name, 'ENSP00000350182';
1946 is $hit->length, 425;
1947 is $hit->accession, 'ENSP00000350182';
1948 is $hit->description,
1949 'pep:novel clone::BX322644.8:4905:15090:-1 gene:ENSG00000137397 transcript:ENST00000357569';
1950 is $hit->raw_score, 301;
1952 float_is( $hit->significance, 3e-27 );
1953 $hit = $result->next_hit;
1954 is $hit->name, 'ENSP00000327738';
1955 is $hit->length, 468;
1956 is $hit->accession, 'ENSP00000327738';
1957 is $hit->description,
1958 'pep:known-ccds chromosome:NCBI36:4:189297592:189305643:1 gene:ENSG00000184108 transcript:ENST00000332517 CCDS3851.1';
1959 is $hit->raw_score, 289;
1961 float_is( $hit->significance, 8e-26 );
1965 # handle at least the first iteration with BLAST searches using databases
1966 # containing non-unique IDs
1968 my $file = test_input_file('bug1986.blast2');
1970 open my $IN, '<', $file or die "Could not read file '$file': $!\n";
1973 last if (/^Sequences/);
1981 my ($accession) = split(/\s+/);
1983 #print "Real Hit $count = $accession\n";
1984 $unique_accs{$accession}++;
1986 #last if ($count == 10);
1992 is( scalar( keys %unique_accs ), 490 );
1996 $searchio = Bio::SearchIO->new(
2001 $result = $searchio->next_result;
2003 while ( my $hit = $result->next_hit ) {
2004 $search_accs{ $hit->accession }++;
2009 is( scalar( keys %search_accs ), 490 );
2011 is_deeply( \%unique_accs, \%search_accs );
2013 # bug 2391 - long query names
2015 $file = test_input_file('bug2391.megablast');
2017 $searchio = Bio::SearchIO->new(
2021 $result = $searchio->next_result;
2023 # data is getting munged up with long names
2024 is( $result->query_name,
2025 'c6_COX;c6_QBL;6|31508172;31503325;31478402|rs36223351|1|dbSNP|C/G' );
2026 is( $result->query_description, '' );
2027 is( $result->algorithm, 'MEGABLAST' );
2029 $result->get_statistic(
2030 'number_of_hsps_better_than_expect_value_cutoff_without_gapping'),
2033 is( $result->get_statistic('number_of_hsps_gapped'), 0 );
2034 is( $result->get_statistic('number_of_hsps_successfully_gapped'), 0 );
2035 is( $result->get_statistic('length_adjustment'), 16 );
2036 is( $result->get_statistic('querylength'), 85 );
2037 is( $result->get_statistic('effectivedblength'), 59358266 );
2038 is( $result->get_statistic('effectivespace'), 5045452610 );
2039 is( $result->get_statistic('effectivespaceused'), 5045452610 );
2041 # bug 2399 - catching Expect(n) values
2043 $file = test_input_file('bug2399.tblastn');
2045 $searchio = Bio::SearchIO->new(
2050 while ( my $query = $searchio->next_result ) {
2051 while ( my $subject = $query->next_hit ) {
2052 $total_n += grep { $_->n } $subject->hsps;
2055 is( $total_n, 80 ); # n = at least 1, so this was changed to reflect that
2057 sub cmp_evalue ($$) {
2058 my ( $tval, $aval ) = @_;
2059 is( sprintf( "%g", $tval ), sprintf( "%g", $aval ) );
2062 # bug 3064 - All-gap Query/Subject lines for BLAST+ do not have numbering
2064 $file = test_input_file('blast_plus.blastp');
2066 $searchio = Bio::SearchIO->new(
2072 while ( my $query = $searchio->next_result ) {
2073 is( $query->get_statistic('querylength'), undef );
2074 is( $query->get_statistic('effectivedblength'), undef );
2075 is( $query->get_statistic('effectivespace'), undef );
2076 is( $query->get_statistic('effectivespaceused'), 55770 );
2077 while ( my $subject = $query->next_hit ) {
2078 while ( my $hsp = $subject->next_hsp ) {
2080 if ( $total_hsps == 1 ) {
2081 is( $hsp->start('query'), 5 );
2082 is( $hsp->start('hit'), 3 );
2083 is( $hsp->end('query'), 220 );
2084 is( $hsp->end('hit'), 308 );
2085 is( length( $hsp->query_string ), length( $hsp->hit_string ) );
2091 is( $total_hsps, 2 );
2093 # BLAST 2.2.20+ output file ZABJ4EA7014.CH878695.1.blast.txt
2094 # Tests SearchIO blast parsing of 'Features in/flanking this part of a subject sequence'
2095 $searchio = Bio::SearchIO->new(
2097 -file => test_input_file('ZABJ4EA7014.CH878695.1.blast.txt')
2100 $result = $searchio->next_result;
2102 # Parse BLAST header details
2103 is( $result->algorithm, 'BLASTN' );
2104 is( $result->algorithm_version, '2.2.20+' );
2105 like($result->algorithm_reference, qr/A greedy algorithm for aligning DNA\s+sequences/);
2106 is( $result->database_name,
2107 'human build 35 genome database (reference assembly only)' );
2108 is( $result->database_entries, 378 );
2109 is( $result->database_letters, 2866055344 );
2110 is( $result->query_name, 'gi|95131563|gb|CH878695.1|' );
2111 is( $result->query_description,
2112 'Homo sapiens 211000035829648 genomic scaffold' );
2113 is( $result->query_length, 29324 );
2115 # Parse BLAST footer details
2116 is( $result->get_statistic('posted_date'), 'Jul 26, 2007 3:20 PM' );
2117 is( $result->get_statistic('dbletters'), -1428911948 );
2118 is( $result->get_statistic('lambda'), '1.33' );
2119 is( $result->get_statistic('kappa'), '0.621' );
2120 is( $result->get_statistic('entropy'), '1.12' );
2121 is( $result->get_statistic('lambda_gapped'), '1.28' );
2122 is( $result->get_statistic('kappa_gapped'), '0.460' );
2123 is( $result->get_statistic('entropy_gapped'), '0.850' );
2124 is( $result->get_parameter('matrix'), 'blastn matrix:1 -2' );
2125 is( $result->get_parameter('gapopen'), 0 );
2126 is( $result->get_parameter('gapext'), 0 );
2127 is( $result->get_statistic('num_extensions'), 216 );
2128 is( $result->get_statistic('num_successful_extensions'), 216 );
2129 is( $result->get_parameter('expect'), '0.01' );
2130 is( $result->get_statistic('seqs_better_than_cutoff'), 10 );
2132 $result->get_statistic(
2133 'number_of_hsps_better_than_expect_value_cutoff_without_gapping'),
2136 is( $result->get_statistic('number_of_hsps_gapped'), 216 );
2137 is( $result->get_statistic('number_of_hsps_successfully_gapped'), 212 );
2138 is( $result->get_statistic('length_adjustment'), 34 );
2139 is( $result->get_statistic('querylength'), 29290 );
2140 is( $result->get_statistic('effectivedblength'), 2866042492 );
2141 is( $result->get_statistic('effectivespace'), 83946384590680 );
2142 is( $result->get_statistic('effectivespaceused'), 83946384590680 );
2143 is( $result->get_statistic('A'), 0 );
2144 is( $result->get_statistic('X1'), 23 );
2145 is( $result->get_statistic('X1_bits'), '44.2' );
2146 is( $result->get_statistic('X2'), 32 );
2147 is( $result->get_statistic('X2_bits'), '59.1' );
2148 is( $result->get_statistic('X3'), 54 );
2149 is( $result->get_statistic('X3_bits'), '99.7' );
2150 is( $result->get_statistic('S1'), 23 );
2151 is( $result->get_statistic('S1_bits'), '43.6' );
2152 is( $result->get_statistic('S2'), 29 );
2153 is( $result->get_statistic('S2_bits'), '54.7' );
2155 # Skip the 1st hit. It doesn't have any 'Features in/flanking this part of subject sequence:'
2156 $hit = $result->next_hit;
2158 # The 2nd hit has hsps with 'Features flanking this part of subject sequence:'
2159 $hit = $result->next_hit;
2160 is( $hit->name, 'gi|51459264|ref|NT_077382.3|Hs1_77431' );
2161 is( $hit->description, 'Homo sapiens chromosome 1 genomic contig' );
2162 is( $hit->length, 237250 );
2164 # In the 2nd hit, look at the 1st hsp
2165 $hsp = $hit->next_hsp;
2166 is( $hsp->hit_features,
2167 "16338 bp at 5' side: PRAME family member 8 11926 bp at 3' side: PRAME family member 9"
2169 is( $hsp->bits, 7286 );
2170 is( $hsp->score, 3945 );
2171 is( $hsp->expect, '0.0' );
2172 is( $hsp->hsp_length, 6145 );
2173 is( $hsp->num_identical, 5437 );
2174 is( int sprintf( "%.2f", $hsp->percent_identity ), 88 );
2175 is( $hsp->gaps, 152 );
2176 is( $hsp->start('query'), 23225 );
2177 is( $hsp->start('sbjct'), 86128 );
2178 is( $hsp->end('query'), 29324 );
2179 is( $hsp->end('sbjct'), 92165 );
2181 # In the 2nd hit, look at the 2nd hsp
2182 $hsp = $hit->next_hsp;
2183 is( $hsp->hit_features,
2184 "25773 bp at 5' side: PRAME family member 3 3198 bp at 3' side: PRAME family member 5"
2186 is( $hsp->bits, 4732 );
2187 is( $hsp->score, 2562 );
2188 is( $hsp->expect, '0.0' );
2189 is( $hsp->hsp_length, 4367 );
2190 is( $hsp->num_identical, 3795 );
2191 is( int sprintf( "%.2f", $hsp->percent_identity ), 86 );
2192 is( $hsp->gaps, 178 );
2193 is( $hsp->start('query'), 23894 );
2194 is( $hsp->start('sbjct'), 37526 );
2195 is( $hsp->end('query'), 28193 );
2196 is( $hsp->end('sbjct'), 41781 );
2198 # In the 2nd hit, look at the 3rd hsp
2199 $hsp = $hit->next_hsp;
2200 is( $hsp->hit_features,
2201 "16338 bp at 5' side: PRAME family member 8 14600 bp at 3' side: PRAME family member 9"
2203 is( $hsp->bits, 3825 );
2204 is( $hsp->score, 2071 );
2205 is( $hsp->expect, '0.0' );
2206 is( $hsp->hsp_length, 3406 );
2207 is( $hsp->num_identical, 2976 );
2208 is( int sprintf( "%.2f", $hsp->percent_identity ), 87 );
2209 is( $hsp->gaps, 89 );
2210 is( $hsp->start('query'), 14528 );
2211 is( $hsp->start('sbjct'), 86128 );
2212 is( $hsp->end('query'), 17886 );
2213 is( $hsp->end('sbjct'), 89491 );
2215 # In the 2nd hit, look at the 4th hsp
2216 $hsp = $hit->next_hsp;
2217 is( $hsp->hit_features,
2218 "29101 bp at 5' side: PRAME family member 8 2120 bp at 3' side: PRAME family member 9"
2220 is( $hsp->bits, 3241 );
2221 is( $hsp->score, 1755 );
2222 is( $hsp->expect, '0.0' );
2223 is( $hsp->hsp_length, 3158 );
2224 is( $hsp->num_identical, 2711 );
2225 is( int sprintf( "%.2f", $hsp->percent_identity ), 85 );
2226 is( $hsp->gaps, 123 );
2227 is( $hsp->start('query'), 23894 );
2228 is( $hsp->start('sbjct'), 98891 );
2229 is( $hsp->end('query'), 27005 );
2230 is( $hsp->end('sbjct'), 101971 );
2232 # In the 2nd hit, look at the 5th hsp
2233 $hsp = $hit->next_hsp;
2234 is( $hsp->hit_features, "PRAME family member 13" );
2235 is( $hsp->bits, 3142 );
2236 is( $hsp->score, 1701 );
2237 is( $hsp->expect, '0.0' );
2238 is( $hsp->hsp_length, 2507 );
2239 is( $hsp->num_identical, 2249 );
2240 is( int sprintf( "%.2f", $hsp->percent_identity ), 89 );
2241 is( $hsp->gaps, 63 );
2242 is( $hsp->start('query'), 3255 );
2243 is( $hsp->start('sbjct'), 128516 );
2244 is( $hsp->end('query'), 5720 );
2245 is( $hsp->end('sbjct'), 131000 );
2247 # testing for Bug #3298
2248 $searchio = Bio::SearchIO->new(
2249 '-format' => 'blast',
2250 '-file' => test_input_file('multiresult_blastn+.bls')
2253 is ($searchio->next_result->algorithm_version, '2.2.25+', "testing Bug 3298");
2254 is ($searchio->next_result->algorithm_version, '2.2.25+', "testing Bug 3298");
2255 is ($searchio->next_result->algorithm_version, '2.2.25+', "testing Bug 3298");
2257 # testing for Bug #3251
2258 $searchio = Bio::SearchIO->new(
2259 '-format' => 'blast',
2260 '-file' => test_input_file('rpsblast_no_hits.bls')
2263 is ($searchio->next_result->database_name, 'CDD.v.2.13', "testing Bug 3251");
2264 is ($searchio->next_result->database_name, 'CDD.v.2.13', "testing Bug 3251");
2265 is ($searchio->next_result->database_name, 'CDD.v.2.13', "testing Bug 3251");