5 use vars qw($EXHAUSTIVE $VERBOSE);
8 $EXHAUSTIVE = $ENV{BIOPERL_TILING_EXHAUSTIVE_TESTS};
9 $VERBOSE = $ENV{BIOPERL_TILING_VERBOSE_TESTS};
10 test_begin(-tests => ($EXHAUSTIVE ? 6519 : 1141) );
13 use_ok('Bio::Search::Tiling::MapTiling');
14 use_ok('Bio::Search::Tiling::MapTileUtils');
15 use_ok('Bio::SearchIO');
16 use_ok('Bio::Search::Hit::BlastHit');
19 my ($blio, $result, $hit, $tiling, $hsp);
21 my @normal_formats = qw( blast wublast
28 my @xltd_formats = qw( blastx wublastx
33 # an exhaustive listing of search reports in
51 hsinsulin.blastcl3.blastn
60 catalase-webblast.BLASTP
65 contig-by-hand.wublastp
66 ecolitst.noseqs.wublastp
72 dnaEbsub_ecoli.wublastx
79 1ZZ19XR301R-Alignment.tblastn
82 dnaEbsub_ecoli.wutblastn
89 dnaEbsub_ecoli.wutblastx
95 ecoli_domains.rpsblast
105 # a subset of search reports for
106 # run-o-the-mill regression tests
108 my %example_files = (
116 brassica_ATH.WUBLASTN
120 catalase-webblast.BLASTP
129 dnaEbsub_ecoli.wublastx
138 dnaEbsub_ecoli.wutblastn
144 dnaEbsub_ecoli.wutblastx
151 ok( $blio = new Bio::SearchIO(
152 -file=>test_input_file('dcr1_sp.WUBLASTP'),
153 -format=>'blast'), 'parse data file');
155 $result = $blio->next_result;
156 while ( $_ = $result->next_hit ) {
157 last if $_->name =~ /ASPTN/;
159 ok($hit = $_, 'got test hit');
160 ok($tiling = Bio::Search::Tiling::MapTiling->new($hit), 'create tiling');
165 isa_ok($tiling, 'Bio::Search::Tiling::TilingI');
166 foreach ( qw( next_tiling rewind_tilings identities conserved length ) ) {
167 ok( $tiling->$_, "implements '$_'" );
170 # regression test on original calculations
172 my @orig_id_results = ( 387,388,388,381,382,389 );
173 my @orig_cn_results = ( 622,619,628,608,611,613 );
175 $tiling->identities('query', 'exact'),
176 $tiling->identities('query', 'est'),
177 $tiling->identities('query', 'max'),
178 $tiling->identities('subject', 'exact'),
179 $tiling->identities('subject', 'est'),
180 $tiling->identities('subject', 'max')
183 $tiling->conserved('query', 'exact'),
184 $tiling->conserved('query', 'est'),
185 $tiling->conserved('query', 'max'),
186 $tiling->conserved('subject', 'exact'),
187 $tiling->conserved('subject', 'est'),
188 $tiling->conserved('subject', 'max')
190 map { $_ = int($_) } @id_results, @cn_results;
192 is_deeply(\@id_results, \@orig_id_results, 'identities regression test');
193 is_deeply(\@cn_results, \@orig_cn_results, 'conserved regression test');
195 # tiling iterator regression tests
198 while ($tiling->next_tiling('query')) {$qn++};
199 while ($tiling->next_tiling('subject')) {$sn++};
200 is ($qn, 8, 'tiling iterator regression test(1)');
201 is ($sn, 128, 'tiling iterator regression test(2)');
202 $tiling->rewind('subject');
203 while ($tiling->next_tiling('subject')) {$sn++};
204 is ($sn, 256, 'tiling iterator regression test(3, rewind)');
206 diag("Old blast.t tiling tests") if $VERBOSE;
208 ok($blio = Bio::SearchIO->new(
209 '-format' => 'blast',
210 '-file' => test_input_file('ecolitst.wublastp')
211 ), "ecolitst.wublastp");
212 $result = $blio->next_result;
214 $hit = $result->next_hit;
215 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
216 # Test HSP contig data returned by SearchUtils::tile_hsps()
217 # Second hit has two hsps that overlap.
219 # compare with the contig made by hand for these two contigs
220 # in t/data/contig-by-hand.wublastp
221 # (in this made-up file, the hsps from ecolitst.wublastp
222 # were aligned and contiged, and Length, Identities, Positives
223 # were counted, by a human (maj) )
225 my $hand_hit = Bio::SearchIO->new(
227 -file=>test_input_file('contig-by-hand.wublastp')
228 )->next_result->next_hit;
229 my $hand_hsp = $hand_hit->next_hsp;
230 my @hand_qrng = $hand_hsp->range('query');
231 my @hand_srng = $hand_hsp->range('hit');
232 my @hand_matches = $hand_hit->matches;
234 is(($tiling->range('query'))[0], $hand_qrng[0]);
235 is(($tiling->range('query'))[1], $hand_qrng[1]);
236 is(sprintf("%d",$tiling->identities('query')), $hand_matches[0]);
237 is(sprintf("%d",$tiling->conserved('query')), $hand_matches[1]);
238 is(($tiling->range('hit'))[0], $hand_srng[0]);
239 is(($tiling->range('hit'))[1], $hand_srng[1]);
240 is(sprintf("%d",$tiling->identities('hit')), $hand_matches[0]);
241 is(sprintf("%d",$tiling->conserved('hit')), $hand_matches[1]);
243 ok( $blio = Bio::SearchIO->new(
244 '-format' => 'blast',
245 '-file' => test_input_file('dnaEbsub_ecoli.wublastx')
246 ), "dnaEbsub_ecoli.wublastx");
248 $hit = $blio->next_result->next_hit;
249 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
250 is(sprintf("%.3f",$tiling->frac_identical(-type=>'query',-denom=>'aligned',-context=>'p2')), '0.364');
251 is(sprintf("%.3f",$tiling->frac_identical(-type=>'hit',-denom=>'aligned',-context=>'all')), '0.366');
252 is(sprintf("%.3f",$tiling->frac_conserved(-type=>'query',-denom=>'aligned',-context=>'p2')), '0.537');
253 is(sprintf("%.3f",$tiling->frac_conserved(-type=>'hit',-denom=>'aligned',-context=>'all')), '0.540');
254 is(sprintf("%.2f",$tiling->frac_aligned_query(-context=>'p2')), '0.62');
255 is(sprintf("%.2f",$tiling->frac_aligned_hit(-context=>'all')), '0.71');
257 ok( $blio = Bio::SearchIO->new(
258 '-format' => 'blast',
259 '-file' => test_input_file('tricky.wublast')
260 ), "tricky.wublast");
262 $hit = $blio->next_result->next_hit;
263 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
264 cmp_ok sprintf("%.3f",$tiling->frac_identical(-denom => 'aligned')), '>', 0.2, 'tricky.wublast(1)';
265 cmp_ok sprintf("%.3f",$tiling->frac_conserved(-denom => 'aligned')), '<=', 1, 'tricky.wublast(2)';
266 is(sprintf("%.2f",$tiling->frac_aligned_query), '0.92', 'tricky.wublast(3)');
267 is(sprintf("%.2f",$tiling->frac_aligned_hit), '0.91','tricky.wublast(4)');
269 diag("New tiling tests") if $VERBOSE;
271 # select test file set based on the environment variable
272 # BIOPERL_TILING_EXHAUSTIVE_TESTS
274 my $files = ($EXHAUSTIVE ? \%test_files : \%example_files);
276 foreach my $alg (@normal_formats, @xltd_formats) {
277 diag("*******$alg files*******") if ($files->{$alg} && $VERBOSE);
278 foreach my $tf (@{$files->{$alg}}) {
279 ok( $blio = Bio::SearchIO->new( -format=>'blast',
280 -file=>test_input_file($tf)
282 $result = $blio->next_result;
284 # compare the per-aligned-base identity avg over hsps
285 # with frac_identical (bzw, conserved)
288 while ( $hit = $result->next_hit ) {
290 # quiet the "No HSPs" warning with -verbose => -1
291 ok( $tiling = Bio::Search::Tiling::MapTiling->new(-hit=>$hit,-verbose=>-1), "tile $tf hit $hit_count #hsps ".scalar $tiling->hsps );
292 my @hsps = $tiling->hsps;
295 diag( "--no hsps for $tf hit $hit_count") if $VERBOSE;
298 my ($dpct, $est, $fast,$exact, $max);
299 my $tol = 0.10; # % difference accepted as approx. equal
301 ## loop through contexts:
302 for my $type (qw( query hit )) {
303 for my $context ($tiling->contexts($type)) {
304 diag(" --- $type $context ---") if $VERBOSE;
305 if (scalar($tiling->contexts($type, $context)) == 1) {
307 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'identical','aligned', 'est', 'fast', $context);
308 is( $est,$fast, substr($type,0,1)." id: est ($est) = fast ($fast)");
309 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'conserved','aligned', 'est', 'fast', $context);
310 is( $est,$fast, substr($type,0,1)." cn: est ($est) = fast ($fast)");
314 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'identical','aligned', 'est', 'fast', $context);
315 # cmp_ok( $dpct, "<", $tol,
316 # substr($type,0,1)." id: est ($est) ~ fast ($fast)");
317 ($dpct, $exact, $max) = $tiling->cmp_frac($type,'identical','aligned', 'exact', 'max', $context);
318 cmp_ok( abs($exact-$est)/$exact, "<" , $tol,
319 substr($type,0,1)." id: exact ($exact) ~ est ($est)");
320 cmp_ok( $exact, "<=" , $max,
321 substr($type,0,1)." id: exact ($exact) <= max ($max)");
323 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'conserved','aligned', 'est', 'fast', $context);
324 # cmp_ok( $dpct, "<", $tol,
325 # substr($type,0,1)." cn: est ($est) ~ fast ($fast)");
326 ($dpct, $exact, $max) = $tiling->cmp_frac($type,'conserved','aligned', 'exact', 'max', $context);
327 cmp_ok( abs($exact-$est)/$exact, "<" , $tol,
328 substr($type,0,1)." cn: exact ($exact) ~ est ($est)");
329 cmp_ok( $exact, "<=" , $max,
330 substr($type,0,1)." cn: exact ($exact) <= max ($max)");
340 my %expected_ranges = ( 'm0' => [7, 11037], #query
341 'm1' => [1770, 10865], #query
342 'm2' => [2462, 14599], #query
343 'all' => [231, 3563] #subject
345 $blio = Bio::SearchIO->new( -file=>test_input_file( $test_files{'bug2942'}->[0] ),
346 -format => 'blast' );
347 $hit = $blio->next_result->next_hit;
348 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
349 for ( 'm0', 'm1', 'm2' ) {
350 is_deeply( [$tiling->range('query',$_)], $expected_ranges{$_}, "bug2942: query $_: range correct");
352 is_deeply( [$tiling->range('subject', 'all')], $expected_ranges{'all'}, "bug2942: subject all : range correct" );
354 # test get_tiled_alns
356 $blio = Bio::SearchIO->new( -file=>test_input_file( 'dcr1_sp.WUBLASTP' ) );
357 $result = $blio->next_result;
358 while ($hit = $result->next_hit) {
359 last if $hit->name =~ /ASPTN/;
362 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
364 ok my @alns = $tiling->get_tiled_alns, "get_tiled_alns";
365 is scalar @alns, 6, "got all alns";
367 for my $aln ( @alns ) {
368 my (@aint, @qint, @sint);
369 my $qs = $aln->get_seq_by_id('query');
370 my $ss = $aln->get_seq_by_id('subject');
371 ok my @qfeats = $qs->get_SeqFeatures;
373 push @aint, [$_->start, $_->end];
374 push @qint, [($_->get_tag_values('query_start'))[0],
375 ($_->get_tag_values('query_end'))[0] ];
377 is( eval(join('+', map {$$_[1]-$$_[0]+1} @aint)),
378 eval(join('+', map {$$_[1]-$$_[0]+1} @qint)),
379 "aln and qfeat lengths correspond" );
380 is( $qs->length - $qs->num_gaps('-'), eval(join('+', map {$$_[1]-$$_[0]+1} @qint)), "q length correct");
381 ok my @hfeats = $ss->get_SeqFeatures;
383 ok ( @qfeats == @hfeats, "features on q and s correspond");
385 push @aint, [$_->start, $_->end];
386 push @sint, [($_->get_tag_values('subject_start'))[0],
387 ($_->get_tag_values('subject_end'))[0] ];
389 is( eval(join('+', map {$$_[1]-$$_[0]+1} @aint)),
390 eval(join('+', map {$$_[1]-$$_[0]+1} @sint)),
391 "aln and hfeat lengths correspond" );
392 is( $ss->length - $ss->num_gaps('-'), eval(join('+', map {$$_[1]-$$_[0]+1} @sint)), "s length correct");
399 package Bio::Search::Tiling::MapTiling;
402 my ($tiling, $type, $method, $denom, @actions) = @_;
404 my $context = ($actions[2] ? $actions[2] : 'all');
405 $a = $tiling->frac(-type=>$type,
408 -action=>$actions[0],
410 $b = $tiling->frac(-type=>$type,
413 -action=>$actions[1],
415 return ( abs($a-$b)/$a, f(5,$a), f(5,$b) );
418 sub f { my ($d,$val) = @_; sprintf("%.${d}f",$val) }