t/*: remove "use lib '.'" and t/lib/Error.pm
[bioperl-live.git] / t / SeqIO / msout.t
blob536462d9eb35d387f1336a6e9d974832a9aa82ab
1 #!/usr/bin/perl
2 use version;
3 our $API_VERSION = $Bio::SeqIO::msout::API_VERSION;
4 use strict;
5 use File::Path qw(mkpath rmtree);
7 BEGIN {
8 use Bio::Root::Test;
10 test_begin(
11 -tests => 165,
12 -requires_modules => [q(Bio::SeqIO::msout)],
13 -requires_networking => 0
16 use_ok('Bio::SeqIO::msout');
20 # skip tests if the msout.pm module is too old.
21 my $api_version = $Bio::SeqIO::msout::API_VERSION;
22 cmp_ok( $api_version, '>=', qv('1.1.5'),
23 "Bio::SeqIO::msout is at least api version 1.1.5" );
25 test_file_1( 0, "msout/msout_infile1" ); # 23 tests
26 test_file_2( 0, "msout/msout_infile2" ); # 22 tests
27 test_file_3( 0, "msout/msout_infile3" ); # 17 tests
29 # tests to run for api versions >= 1.1.6
30 SKIP: {
31 skip q($Bio::SeqIO::msout::API_VERSION < 1.1.6), 22
32 unless $api_version >= qv('1.1.6');
33 test_file_1( 0, q(msout/msout_infile4) );
36 # tests to run for api versions >= 1.1.7
37 SKIP: {
38 skip q($Bio::SeqIO::msout::API_VERSION < 1.1.7), 4
39 unless $api_version >= qv('1.1.7');
40 bad_test_file_1( 0, q(msout/bad_msout_infile1) ); # 2 tests
41 bad_test_file_2( 0, q(msout/bad_msout_infile2) ); # 2 tests
44 # tests to run for api version >= 1.1.8
45 SKIP: {
46 skip q($Bio::SeqIO::msout::API_VERSION < 1.1.8), 75
47 unless $api_version >= qv('1.1.8');
49 test_file_1( 0, "msout/msout_infile1", 100 );
50 test_file_2( 0, "msout/msout_infile2", 10 );
51 test_file_1( 0, q(msout/msout_infile4), 100 );
52 bad_test_file_1( 0, q(msout/bad_msout_infile1), 1000 );
53 bad_test_file_2( 0, q(msout/bad_msout_infile2), 1000 );
54 bad_n_sites( 0, q(msout/msout_infile1) ); # 2 tests
57 sub create_dir {
59 my $dir = shift;
61 $dir = Bio::Root::Test::test_input_file($dir);
63 unless ( -d $dir ) {
64 mkpath($dir);
68 sub remove_dir {
70 my $dir = shift;
72 $dir = Bio::Root::Test::test_input_file($dir);
74 if ( -d $dir ) {
75 rmtree($dir);
77 else { warn "Tried to remove $dir, but it does not exist" }
80 sub test_file_1 {
81 ##############################################################################
82 ## Test file 1
83 ##############################################################################
85 my $gzip = shift;
86 my $infile = shift;
87 my $n_sites = shift;
88 $infile = Bio::Root::Test::test_input_file($infile);
90 # the files are now part of the git repo and don't have to be printed
91 # print_file1( $infile, $gzip );
93 my $file_sequence = $infile;
94 if ($gzip) {
95 $file_sequence = "gzip -dc <$file_sequence |";
97 my $msout = Bio::SeqIO->new(
98 -file => "$file_sequence",
99 -format => 'msout',
100 -n_sites => $n_sites,
103 isa_ok( $msout, 'Bio::SeqIO::msout' );
105 my $rh_base_conversion_table = $msout->get_base_conversion_table;
107 my %attributes = (
108 RUNS => 3,
109 SEGSITES => 7,
110 N_SITES => $n_sites,
111 SEEDS => [qw(1 1 1)],
112 MS_INFO_LINE => 'ms 6 3 -s 7 -I 3 3 2 1',
113 TOT_RUN_HAPS => 6,
114 POPS => [qw(3 2 1)],
115 NEXT_RUN_NUM => 1,
116 LAST_READ_HAP_NUM => 0,
117 POSITIONS => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
118 CURRENT_RUN_SEGSITES => 7
121 foreach my $attribute ( keys %attributes ) {
122 my $func = lc($attribute);
124 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
125 $func = ucfirst($func);
128 $func = 'get_' . $func;
129 my @returns = $msout->$func();
130 my ( $return, $got );
132 # If there were more than one return value, then compare references to
133 # arrays instead of scalars
134 unless ( @returns > 1 ) {
135 $got = shift @returns;
137 else { $got = \@returns }
139 my $expected = $attributes{$attribute};
141 if ( defined $got && defined $expected ) {
142 is_deeply( $got, $expected, "Get $attribute" );
144 else { is_deeply( $got, $expected, "Get $attribute" ) }
147 # Testing next_hap at beginning of run
148 my @data_got =
149 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
150 my @data_expected;
151 if ( !defined($n_sites) ) {
152 @data_expected = qw(1111111);
154 else {
155 @data_expected =
156 qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000);
158 is_deeply( \@data_got, \@data_expected,
159 "Get next_hap at beginning of run" );
161 # Testing next_hap after beginning of run
162 @data_got =
163 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
164 if ( !defined($n_sites) ) {
165 @data_expected = qw(5555555);
167 else {
168 @data_expected =
169 qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
171 is_deeply( \@data_got, \@data_expected,
172 "Get next_hap after beginning of run" );
174 # Surprise test! testing msout::outgroup
175 my $outgroup = $msout->outgroup;
176 is( $outgroup, 1, "Testing msout::outgroup" );
178 # Testing next_pop after beginning of pop
179 @data_got =
180 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
181 if ( !defined($n_sites) ) {
182 @data_expected = qw(4444444);
184 else {
185 @data_expected =
186 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000);
188 is_deeply( \@data_got, \@data_expected,
189 "Get next_pop after beginning of pop" );
191 # Testing next_pop at beginning of pop
192 @data_got =
193 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
194 if ( !defined($n_sites) ) {
195 @data_expected = qw(4444444 5555555);
197 else {
198 @data_expected =
199 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
201 is_deeply( \@data_got, \@data_expected,
202 "Get next_pop at beginning of pop" );
204 # Testing next_run after beginning of run
205 @data_got =
206 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
207 if ( !defined($n_sites) ) {
208 @data_expected = qw(4444444);
210 else {
211 @data_expected =
212 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000);
214 is_deeply( \@data_got, \@data_expected,
215 "Get next_run after beginning of run" );
217 # Testing next_pop at beginning of run
218 @data_got =
219 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
220 if ( !defined($n_sites) ) {
221 @data_expected = qw(5555555 5555555 5555555);
223 else {
224 @data_expected =
225 qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
227 is_deeply( \@data_got, \@data_expected,
228 "Get next_pop at beginning of run" );
230 # Testing next_hap after pop
231 @data_got = $msout->get_next_hap;
232 @data_expected = qw(1010101);
233 is_deeply( \@data_got, \@data_expected, "Get next_hap after pop" );
235 # Testing next_run after pop and hap
236 @data_got =
237 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
238 if ( !defined($n_sites) ) {
239 @data_expected = qw(1111111 1515151);
241 else {
242 @data_expected =
243 qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000);
245 is_deeply( \@data_got, \@data_expected, "Get next_run after pop and hap" );
247 # Testing next_run at beginning of run
248 @data_got =
249 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
250 if ( !defined($n_sites) ) {
251 @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151);
253 else {
254 @data_expected =
255 qw(1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000);
257 is_deeply( \@data_got, \@data_expected,
258 "Get next_run at beginning of run" );
260 is( $msout->get_next_run_num, undef, 'have all lines been read?' );
263 sub test_file_2 {
264 ##############################################################################
265 ## Test file 2
266 ##############################################################################
268 my $gzip = shift;
269 my $infile = shift;
270 my $n_sites = shift;
271 $infile = Bio::Root::Test::test_input_file($infile);
273 # the files are now part of the git repo and don't have to be printed
274 # print_file2( $infile, $gzip );
276 my $file_sequence = $infile;
277 if ($gzip) {
278 $file_sequence = "gzip -dc <$file_sequence |";
281 my $msout = Bio::SeqIO->new(
282 -file => "$file_sequence",
283 -format => 'msout',
284 -n_sites => $n_sites,
287 isa_ok( $msout, 'Bio::SeqIO::msout' );
289 my %attributes = (
290 RUNS => 3,
291 SEGSITES => 7,
292 N_SITES => $n_sites,
293 SEEDS => [qw(1 1 1)],
294 MS_INFO_LINE => 'ms 6 3',
295 TOT_RUN_HAPS => 6,
296 POPS => 6,
297 NEXT_RUN_NUM => 1,
298 LAST_READ_HAP_NUM => 0,
299 POSITIONS => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
300 CURRENT_RUN_SEGSITES => 7
303 foreach my $attribute ( keys %attributes ) {
304 my $func = lc($attribute);
306 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
307 $func = ucfirst($func);
310 $func = 'get_' . $func;
311 my @returns = $msout->$func();
312 my ( $return, $got );
314 # If there were more than one return value, then compare references to
315 # arrays instead of scalars
316 unless ( @returns > 1 ) {
317 $got = shift @returns;
319 else { $got = \@returns }
321 my $expected = $attributes{$attribute};
323 if ( defined $got && defined $expected ) {
324 is_deeply( $got, $expected, "Get $attribute" );
326 else { is_deeply( $got, $expected, "Get $attribute" ) }
329 my $rh_base_conversion_table = $msout->get_base_conversion_table;
331 # Testing next_hap at beginning of run
332 my @data_got = $msout->get_next_hap;
333 my @data_expected = '1111111';
334 is_deeply( \@data_got, \@data_expected,
335 "Get next_hap at beginning of run" );
337 # Testing next_hap after beginning of run
338 @data_got =
339 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
340 if ( !defined($n_sites) ) {
341 @data_expected = '5555555';
343 else {
344 @data_expected = '5555055500';
346 is_deeply( \@data_got, \@data_expected,
347 "Get next_hap after beginning of run" );
349 # Surprise test! testing msout::outgroup
350 my $outgroup = $msout->outgroup;
351 is( $outgroup, 0, "Testing msout::outgroup" );
353 # Testing next_pop after beginning of pop
354 @data_got =
355 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
356 if ( !defined($n_sites) ) {
357 @data_expected = qw(4444444 4444444 5555555 4444444);
359 else {
360 @data_expected = qw(4444044400 4444044400 5555055500 4444044400);
362 is_deeply( \@data_got, \@data_expected,
363 "Get next_pop after beginning of pop" );
365 # Testing next_pop at beginning of pop/run
366 @data_got =
367 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
368 if ( !defined($n_sites) ) {
369 @data_expected = qw(5555555 5555555 5555555 1010101 1111111 1515151);
371 else {
372 @data_expected =
373 qw(5555055500 5555055500 5555055500 1010010100 1111011100 1515015100);
375 is_deeply( \@data_got, \@data_expected,
376 "Get next_pop at beginning of pop/run" );
378 # Testing next_run at beginning of run
379 @data_got =
380 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
381 if ( !defined($n_sites) ) {
382 @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151);
384 else {
385 @data_expected =
386 qw(1414014100 1414014100 1515015100 1414014100 1515015100 1515015100);
388 is_deeply( \@data_got, \@data_expected,
389 "Get next_run at beginning of run" );
391 # Testing next_hap at beginning of run 2
392 @data_got =
393 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
394 if ( !defined($n_sites) ) {
395 @data_expected = '1515151';
397 else {
398 @data_expected = '1515015100';
400 is_deeply( \@data_got, \@data_expected,
401 "Get next_hap at beginning of run 2" );
403 # Testing next_run after hap
404 @data_got =
405 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
406 if ( !defined($n_sites) ) {
407 @data_expected = qw(5050505 5151515 5555555 5454545 5454545);
409 else {
410 @data_expected =
411 qw(5050050500 5151051500 5555055500 5454054500 5454054500);
413 is_deeply( \@data_got, \@data_expected, "Get next_run after hap" );
415 is( $msout->get_next_run_num, 5, 'next run should be 5.' );
417 # getting the last hap of the file via next hap
418 # Testing next_run after hap
419 @data_got = $msout->get_next_hap;
420 @data_expected = qw( 5555555 );
421 is_deeply( \@data_got, \@data_expected, "Get last hap through next_hap" );
425 sub test_file_3 {
426 ##############################################################################
427 ## Test file 3
428 ##############################################################################
430 my $gzip = shift;
431 my $infile = shift;
432 $infile = Bio::Root::Test::test_input_file($infile);
434 # the files are now part of the git repo and don't have to be printed
435 # print_file3( $infile, $gzip );
437 my $file_sequence = $infile;
438 if ($gzip) {
439 $file_sequence = "gzip -dc <$file_sequence |";
441 my $msout = Bio::SeqIO->new(
442 -file => "$file_sequence",
443 -format => 'msout',
446 isa_ok( $msout, 'Bio::SeqIO::msout' );
448 my $rh_base_conversion_table = $msout->get_base_conversion_table;
450 my %attributes = (
451 RUNS => 1,
452 SEGSITES => 7,
453 SEEDS => [qw(1 1 1)],
454 MS_INFO_LINE => 'ms 3 1',
455 TOT_RUN_HAPS => 3,
456 POPS => 3,
457 NEXT_RUN_NUM => 1,
458 LAST_READ_HAP_NUM => 0,
459 POSITIONS => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
460 CURRENT_RUN_SEGSITES => 7
463 foreach my $attribute ( keys %attributes ) {
464 my $func = lc($attribute);
466 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
467 $func = ucfirst($func);
470 $func = 'get_' . $func;
471 my @returns = $msout->$func();
472 my ( $return, $got );
474 # If there were more than one return value, then compare references to
475 # arrays instead of scalars
476 unless ( @returns > 1 ) {
477 $got = shift @returns;
479 else { $got = \@returns }
481 my $expected = $attributes{$attribute};
483 if ( defined $got && defined $expected ) {
484 is_deeply( $got, $expected, "Get $attribute" );
486 else { is_deeply( $got, $expected, "Get $attribute" ) }
489 # Testing next_hap at beginning of run
490 my @data_got =
491 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
492 my @data_expected = qw(1111111 5555555 4444444);
493 is_deeply( \@data_got, \@data_expected, "Get next_pop at end of run" );
495 is( $msout->get_next_run_num, undef, 'have all lines been read?' );
497 # Testing what happens when we read from empty stream
498 @data_got = $msout->get_next_pop;
499 @data_expected = ();
500 is_deeply( \@data_got, \@data_expected, "Get next_pop at eof" );
502 # Testing what happens when we read from empty stream
503 @data_got = $msout->get_next_run;
504 @data_expected = ();
505 is_deeply( \@data_got, \@data_expected, "Get next_run at eof" );
507 # Testing what happens when we read from empty stream
508 @data_got = $msout->get_next_hap;
509 @data_expected = undef;
510 is_deeply( \@data_got, \@data_expected, "Get next_hap at eof" );
512 # Testing what happens when we read from empty stream
513 @data_got = $msout->get_next_seq;
514 @data_expected = ();
515 is_deeply( \@data_got, \@data_expected, "Get next_seq at eof" );
519 sub print_to_file {
520 my ( $ra_in, $out ) = @_;
521 open my $OUT, '>', $out or die "\nCould not write outfile '$out': $!\n";
522 print $OUT ("@$ra_in");
523 close $OUT;
526 sub convert_bases_to_nums {
528 my ( $rh_base_conversion_table, @seqs ) = @_;
530 my @out_seqstrings;
531 foreach my $seq (@seqs) {
532 my $seqstring = $seq->seq;
533 foreach my $base ( keys %{$rh_base_conversion_table} ) {
534 $seqstring =~ s/($base)/$rh_base_conversion_table->{$base}/g;
536 push @out_seqstrings, $seqstring;
539 return @out_seqstrings;
543 sub bad_test_file_1 {
544 ##############################################################################
545 ## Bad Test file 1
546 ##############################################################################
548 # This sub tests to see if msout.pm will catch if the msinfo line's
549 # advertized haps are less than are actually in the file
551 my $gzip = shift;
552 my $infile = shift;
553 my $n_sites = shift;
554 $infile = test_input_file($infile);
556 my $file_sequence = $infile;
557 if ($gzip) {
558 $file_sequence = "gunzip -c <$file_sequence |";
560 my $msout = Bio::SeqIO->new(
561 -file => "$file_sequence",
562 -format => 'msout',
563 -n_sites => $n_sites,
566 isa_ok( $msout, 'Bio::SeqIO::msout' );
568 throws_ok { $msout->get_next_run }
569 qr/msout file has only 2 hap\(s\), which is less than indicated in msinfo line \( 9 \)/,
570 q(Caught error in bad msout file 1);
574 sub bad_test_file_2 {
575 ##############################################################################
576 ## Bad Test file 2
577 ##############################################################################
579 # This sub tests to see if msout.pm will catch if the msinfo line's
580 # advertized haps are more than are actually in the file
582 my $gzip = shift;
583 my $infile = shift;
584 my $n_sites = shift;
585 $infile = test_input_file($infile);
587 my $file_sequence = $infile;
588 if ($gzip) {
589 $file_sequence = "gunzip -c <$file_sequence |";
591 my $msout = Bio::SeqIO->new(
592 -file => "$file_sequence",
593 -format => 'msout',
594 -n_sites => $n_sites,
597 isa_ok( $msout, 'Bio::SeqIO::msout' );
599 throws_ok { $msout->get_next_run }
600 qr/\'\/\/\' not encountered when expected. There are more haplos in one of the msOUT runs than advertised in the msinfo line/,
601 q(Caught error in bad msout file 2);
605 sub bad_n_sites {
606 ##############################################################################
607 ## Bad n_sites
608 ##############################################################################
610 # this sub tests if msout.pm dies when n_sites is smaller than segsites
611 my $gzip = shift;
612 my $infile = shift;
613 $infile = Bio::Root::Test::test_input_file($infile);
615 my $file_sequence = $infile;
616 if ($gzip) {
617 $file_sequence = "gzip -dc <$file_sequence |";
619 my $msout = Bio::SeqIO->new(
620 -file => "$file_sequence",
621 -format => 'msout',
624 # test nsites -1
625 throws_ok { $msout->set_n_sites(-1) } qr|first argument needs to be a positive integer. argument supplied: -1|;
627 # test nsites smaller than next hap
628 $msout->set_n_sites(1);
629 throws_ok{$msout->get_next_seq} qr/n_sites needs to be at least the number of segsites of every run/, 'too few n_sites failed OK';