5 use vars qw($EXHAUSTIVE $VERBOSE);
11 $EXHAUSTIVE = $ENV{BIOPERL_TILING_EXHAUSTIVE_TESTS};
12 $VERBOSE = $ENV{BIOPERL_TILING_VERBOSE_TESTS};
13 test_begin(-tests => ($EXHAUSTIVE ? 6519 : 1141) );
16 use_ok('Bio::Search::Tiling::MapTiling');
17 use_ok('Bio::Search::Tiling::MapTileUtils');
18 use_ok('Bio::SearchIO');
19 use_ok('Bio::Search::Hit::BlastHit');
22 my ($blio, $result, $hit, $tiling, $hsp);
24 my @normal_formats = qw( blast wublast
31 my @xltd_formats = qw( blastx wublastx
36 # an exhaustive listing of search reports in
54 hsinsulin.blastcl3.blastn
63 catalase-webblast.BLASTP
68 contig-by-hand.wublastp
69 ecolitst.noseqs.wublastp
75 dnaEbsub_ecoli.wublastx
82 1ZZ19XR301R-Alignment.tblastn
85 dnaEbsub_ecoli.wutblastn
92 dnaEbsub_ecoli.wutblastx
98 ecoli_domains.rpsblast
108 # a subset of search reports for
109 # run-o-the-mill regression tests
111 my %example_files = (
119 brassica_ATH.WUBLASTN
123 catalase-webblast.BLASTP
132 dnaEbsub_ecoli.wublastx
141 dnaEbsub_ecoli.wutblastn
147 dnaEbsub_ecoli.wutblastx
154 ok( $blio = new Bio::SearchIO(
155 -file=>test_input_file('dcr1_sp.WUBLASTP'),
156 -format=>'blast'), 'parse data file');
158 $result = $blio->next_result;
159 while ( $_ = $result->next_hit ) {
160 last if $_->name =~ /ASPTN/;
162 ok($hit = $_, 'got test hit');
163 ok($tiling = Bio::Search::Tiling::MapTiling->new($hit), 'create tiling');
168 isa_ok($tiling, 'Bio::Search::Tiling::TilingI');
169 foreach ( qw( next_tiling rewind_tilings identities conserved length ) ) {
170 ok( $tiling->$_, "implements '$_'" );
173 # regression test on original calculations
175 my @orig_id_results = ( 387,388,388,381,382,389 );
176 my @orig_cn_results = ( 622,619,628,608,611,613 );
178 $tiling->identities('query', 'exact'),
179 $tiling->identities('query', 'est'),
180 $tiling->identities('query', 'max'),
181 $tiling->identities('subject', 'exact'),
182 $tiling->identities('subject', 'est'),
183 $tiling->identities('subject', 'max')
186 $tiling->conserved('query', 'exact'),
187 $tiling->conserved('query', 'est'),
188 $tiling->conserved('query', 'max'),
189 $tiling->conserved('subject', 'exact'),
190 $tiling->conserved('subject', 'est'),
191 $tiling->conserved('subject', 'max')
193 map { $_ = int($_) } @id_results, @cn_results;
195 is_deeply(\@id_results, \@orig_id_results, 'identities regression test');
196 is_deeply(\@cn_results, \@orig_cn_results, 'conserved regression test');
198 # tiling iterator regression tests
201 while ($tiling->next_tiling('query')) {$qn++};
202 while ($tiling->next_tiling('subject')) {$sn++};
203 is ($qn, 8, 'tiling iterator regression test(1)');
204 is ($sn, 128, 'tiling iterator regression test(2)');
205 $tiling->rewind('subject');
206 while ($tiling->next_tiling('subject')) {$sn++};
207 is ($sn, 256, 'tiling iterator regression test(3, rewind)');
209 diag("Old blast.t tiling tests") if $VERBOSE;
211 ok($blio = Bio::SearchIO->new(
212 '-format' => 'blast',
213 '-file' => test_input_file('ecolitst.wublastp')
214 ), "ecolitst.wublastp");
215 $result = $blio->next_result;
217 $hit = $result->next_hit;
218 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
219 # Test HSP contig data returned by SearchUtils::tile_hsps()
220 # Second hit has two hsps that overlap.
222 # compare with the contig made by hand for these two contigs
223 # in t/data/contig-by-hand.wublastp
224 # (in this made-up file, the hsps from ecolitst.wublastp
225 # were aligned and contiged, and Length, Identities, Positives
226 # were counted, by a human (maj) )
228 my $hand_hit = Bio::SearchIO->new(
230 -file=>test_input_file('contig-by-hand.wublastp')
231 )->next_result->next_hit;
232 my $hand_hsp = $hand_hit->next_hsp;
233 my @hand_qrng = $hand_hsp->range('query');
234 my @hand_srng = $hand_hsp->range('hit');
235 my @hand_matches = $hand_hit->matches;
237 is(($tiling->range('query'))[0], $hand_qrng[0]);
238 is(($tiling->range('query'))[1], $hand_qrng[1]);
239 is(sprintf("%d",$tiling->identities('query')), $hand_matches[0]);
240 is(sprintf("%d",$tiling->conserved('query')), $hand_matches[1]);
241 is(($tiling->range('hit'))[0], $hand_srng[0]);
242 is(($tiling->range('hit'))[1], $hand_srng[1]);
243 is(sprintf("%d",$tiling->identities('hit')), $hand_matches[0]);
244 is(sprintf("%d",$tiling->conserved('hit')), $hand_matches[1]);
246 ok( $blio = Bio::SearchIO->new(
247 '-format' => 'blast',
248 '-file' => test_input_file('dnaEbsub_ecoli.wublastx')
249 ), "dnaEbsub_ecoli.wublastx");
251 $hit = $blio->next_result->next_hit;
252 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
253 is(sprintf("%.3f",$tiling->frac_identical(-type=>'query',-denom=>'aligned',-context=>'p2')), '0.364');
254 is(sprintf("%.3f",$tiling->frac_identical(-type=>'hit',-denom=>'aligned',-context=>'all')), '0.366');
255 is(sprintf("%.3f",$tiling->frac_conserved(-type=>'query',-denom=>'aligned',-context=>'p2')), '0.537');
256 is(sprintf("%.3f",$tiling->frac_conserved(-type=>'hit',-denom=>'aligned',-context=>'all')), '0.540');
257 is(sprintf("%.2f",$tiling->frac_aligned_query(-context=>'p2')), '0.62');
258 is(sprintf("%.2f",$tiling->frac_aligned_hit(-context=>'all')), '0.71');
260 ok( $blio = Bio::SearchIO->new(
261 '-format' => 'blast',
262 '-file' => test_input_file('tricky.wublast')
263 ), "tricky.wublast");
265 $hit = $blio->next_result->next_hit;
266 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
267 cmp_ok sprintf("%.3f",$tiling->frac_identical(-denom => 'aligned')), '>', 0.2, 'tricky.wublast(1)';
268 cmp_ok sprintf("%.3f",$tiling->frac_conserved(-denom => 'aligned')), '<=', 1, 'tricky.wublast(2)';
269 is(sprintf("%.2f",$tiling->frac_aligned_query), '0.92', 'tricky.wublast(3)');
270 is(sprintf("%.2f",$tiling->frac_aligned_hit), '0.91','tricky.wublast(4)');
272 diag("New tiling tests") if $VERBOSE;
274 # select test file set based on the environment variable
275 # BIOPERL_TILING_EXHAUSTIVE_TESTS
277 my $files = ($EXHAUSTIVE ? \%test_files : \%example_files);
279 foreach my $alg (@normal_formats, @xltd_formats) {
280 diag("*******$alg files*******") if ($files->{$alg} && $VERBOSE);
281 foreach my $tf (@{$files->{$alg}}) {
282 ok( $blio = Bio::SearchIO->new( -format=>'blast',
283 -file=>test_input_file($tf)
285 $result = $blio->next_result;
287 # compare the per-aligned-base identity avg over hsps
288 # with frac_identical (bzw, conserved)
291 while ( $hit = $result->next_hit ) {
293 # quiet the "No HSPs" warning with -verbose => -1
294 ok( $tiling = Bio::Search::Tiling::MapTiling->new(-hit=>$hit,-verbose=>-1), "tile $tf hit $hit_count #hsps ".scalar $tiling->hsps );
295 my @hsps = $tiling->hsps;
298 diag( "--no hsps for $tf hit $hit_count") if $VERBOSE;
301 my ($dpct, $est, $fast,$exact, $max);
302 my $tol = 0.10; # % difference accepted as approx. equal
304 ## loop through contexts:
305 for my $type (qw( query hit )) {
306 for my $context ($tiling->contexts($type)) {
307 diag(" --- $type $context ---") if $VERBOSE;
308 if (scalar($tiling->contexts($type, $context)) == 1) {
310 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'identical','aligned', 'est', 'fast', $context);
311 is( $est,$fast, substr($type,0,1)." id: est ($est) = fast ($fast)");
312 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'conserved','aligned', 'est', 'fast', $context);
313 is( $est,$fast, substr($type,0,1)." cn: est ($est) = fast ($fast)");
317 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'identical','aligned', 'est', 'fast', $context);
318 # cmp_ok( $dpct, "<", $tol,
319 # substr($type,0,1)." id: est ($est) ~ fast ($fast)");
320 ($dpct, $exact, $max) = $tiling->cmp_frac($type,'identical','aligned', 'exact', 'max', $context);
321 cmp_ok( abs($exact-$est)/$exact, "<" , $tol,
322 substr($type,0,1)." id: exact ($exact) ~ est ($est)");
323 cmp_ok( $exact, "<=" , $max,
324 substr($type,0,1)." id: exact ($exact) <= max ($max)");
326 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'conserved','aligned', 'est', 'fast', $context);
327 # cmp_ok( $dpct, "<", $tol,
328 # substr($type,0,1)." cn: est ($est) ~ fast ($fast)");
329 ($dpct, $exact, $max) = $tiling->cmp_frac($type,'conserved','aligned', 'exact', 'max', $context);
330 cmp_ok( abs($exact-$est)/$exact, "<" , $tol,
331 substr($type,0,1)." cn: exact ($exact) ~ est ($est)");
332 cmp_ok( $exact, "<=" , $max,
333 substr($type,0,1)." cn: exact ($exact) <= max ($max)");
343 my %expected_ranges = ( 'm0' => [7, 11037], #query
344 'm1' => [1770, 10865], #query
345 'm2' => [2462, 14599], #query
346 'all' => [231, 3563] #subject
348 $blio = Bio::SearchIO->new( -file=>test_input_file( $test_files{'bug2942'}->[0] ),
349 -format => 'blast' );
350 $hit = $blio->next_result->next_hit;
351 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
352 for ( 'm0', 'm1', 'm2' ) {
353 is_deeply( [$tiling->range('query',$_)], $expected_ranges{$_}, "bug2942: query $_: range correct");
355 is_deeply( [$tiling->range('subject', 'all')], $expected_ranges{'all'}, "bug2942: subject all : range correct" );
357 # test get_tiled_alns
359 $blio = Bio::SearchIO->new( -file=>test_input_file( 'dcr1_sp.WUBLASTP' ) );
360 $result = $blio->next_result;
361 while ($hit = $result->next_hit) {
362 last if $hit->name =~ /ASPTN/;
365 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
367 ok my @alns = $tiling->get_tiled_alns, "get_tiled_alns";
368 is scalar @alns, 6, "got all alns";
370 for my $aln ( @alns ) {
371 my (@aint, @qint, @sint);
372 my $qs = $aln->get_seq_by_id('query');
373 my $ss = $aln->get_seq_by_id('subject');
374 ok my @qfeats = $qs->get_SeqFeatures;
376 push @aint, [$_->start, $_->end];
377 push @qint, [($_->get_tag_values('query_start'))[0],
378 ($_->get_tag_values('query_end'))[0] ];
380 is( eval(join('+', map {$$_[1]-$$_[0]+1} @aint)),
381 eval(join('+', map {$$_[1]-$$_[0]+1} @qint)),
382 "aln and qfeat lengths correspond" );
383 is( $qs->length - $qs->num_gaps('-'), eval(join('+', map {$$_[1]-$$_[0]+1} @qint)), "q length correct");
384 ok my @hfeats = $ss->get_SeqFeatures;
386 ok ( @qfeats == @hfeats, "features on q and s correspond");
388 push @aint, [$_->start, $_->end];
389 push @sint, [($_->get_tag_values('subject_start'))[0],
390 ($_->get_tag_values('subject_end'))[0] ];
392 is( eval(join('+', map {$$_[1]-$$_[0]+1} @aint)),
393 eval(join('+', map {$$_[1]-$$_[0]+1} @sint)),
394 "aln and hfeat lengths correspond" );
395 is( $ss->length - $ss->num_gaps('-'), eval(join('+', map {$$_[1]-$$_[0]+1} @sint)), "s length correct");
402 package Bio::Search::Tiling::MapTiling;
405 my ($tiling, $type, $method, $denom, @actions) = @_;
407 my $context = ($actions[2] ? $actions[2] : 'all');
408 $a = $tiling->frac(-type=>$type,
411 -action=>$actions[0],
413 $b = $tiling->frac(-type=>$type,
416 -action=>$actions[1],
418 return ( abs($a-$b)/$a, f(5,$a), f(5,$b) );
421 sub f { my ($d,$val) = @_; sprintf("%.${d}f",$val) }