1 # -*-Perl-*- Test Harness script for Bioperl
9 test_begin( -tests => 206 );
11 use_ok('Bio::SimpleAlign');
12 use_ok('Bio::AlignIO');
13 use_ok('Bio::SeqFeature::Generic');
14 use_ok('Bio::Location::Simple');
15 use_ok('Bio::Location::Split');
18 my $DEBUG = test_debug();
20 my ( $str, $aln, @seqs, $seq );
22 $str = Bio::AlignIO->new( -file => test_input_file('testaln.pfam') );
23 isa_ok( $str, 'Bio::AlignIO' );
24 $aln = $str->next_aln();
25 is $aln->get_seq_by_pos(1)->get_nse, '1433_LYCES/9-246', "pfam input test";
27 my $aln1 = $aln->remove_columns( ['mismatch'] );
30 '::*::::*:**:*:*:***:**.***::*.*::**::**:***..**:'
31 . '*:*.::::*:.:*.*.**:***.**:*.:.**::**.*:***********:::*:.:*:**.*::*:'
32 . '.*.:*:**:****************::',
36 my $aln2 = $aln->select( 1, 3 );
37 isa_ok( $aln2, 'Bio::Align::AlignI' );
38 is( $aln2->num_sequences, 3, 'num_sequences' );
40 # test select non contiguous-sorted by default
41 $aln2 = $aln->select_noncont( 6, 7, 8, 9, 10, 1, 2, 3, 4, 5 );
42 is( $aln2->num_sequences, 10, 'num_sequences' );
44 $aln2->get_seq_by_pos(2)->id,
45 $aln->get_seq_by_pos(2)->id,
49 $aln2->get_seq_by_pos(8)->id,
50 $aln->get_seq_by_pos(8)->id,
54 # test select non contiguous-nosort option
55 $aln2 = $aln->select_noncont( 'nosort', 6, 7, 8, 9, 10, 1, 2, 3, 4, 5 );
56 is( $aln2->num_sequences, 10, 'num_sequences' );
58 $aln2->get_seq_by_pos(2)->id,
59 $aln->get_seq_by_pos(7)->id,
63 $aln2->get_seq_by_pos(8)->id,
64 $aln->get_seq_by_pos(3)->id,
68 # test select non contiguous by name
69 my $aln3 = $aln->select_noncont_by_name('1433_LYCES','BMH1_YEAST','143T_HUMAN');
70 is( $aln3->num_sequences, 3, 'select_noncont_by_name' );
71 my @seqs3 = $aln3->each_seq();
72 is $seqs3[0]->id, '1433_LYCES', 'select_noncont_by_name';
73 is $seqs3[1]->id, 'BMH1_YEAST', 'select_noncont_by_name';
74 is $seqs3[2]->id, '143T_HUMAN', 'select_noncont_by_name';
77 @seqs = $aln->each_seq();
78 is scalar @seqs, 16, 'each_seq';
79 is $seqs[0]->get_nse, '1433_LYCES/9-246', 'get_nse';
80 is $seqs[0]->id, '1433_LYCES', 'id';
81 is $seqs[0]->num_gaps, 3, 'num_gaps';
82 @seqs = $aln->each_alphabetically();
83 is scalar @seqs, 16, 'each_alphabetically';
85 is $aln->column_from_residue_number( '1433_LYCES', 10 ), 2,
86 'column_from_residue_number';
87 is $aln->displayname( '1433_LYCES/9-246', 'my_seq' ), 'my_seq',
88 'display_name get/set';
89 is $aln->displayname('1433_LYCES/9-246'), 'my_seq', 'display_name get';
90 is substr( $aln->consensus_string(50), 0, 60 ),
91 "RE??VY?AKLAEQAERYEEMV??MK?VAE??????ELSVEERNLLSVAYKNVIGARRASW",
93 is substr( $aln->consensus_string(100), 0, 60 ),
94 "?????????L????E????M???M????????????L??E?RNL?SV?YKN??G??R??W",
96 is substr( $aln->consensus_string(0), 0, 60 ),
97 "REDLVYLAKLAEQAERYEEMVEFMKKVAELGAPAEELSVEERNLLSVAYKNVIGARRASW",
100 ok( @seqs = $aln->each_seq_with_id('143T_HUMAN') );
101 is scalar @seqs, 1, 'each_seq_with_id';
103 is $aln->is_flush, 1, 'is_flush';
104 ok( $aln->id('x') && $aln->id eq 'x', 'id get/set' );
106 is $aln->length, 242, 'length';
107 is $aln->num_residues, 3769, 'num_residues';
108 is $aln->num_sequences, 16, 'num_sequences';
109 is( sprintf( "%.2f", $aln->overall_percentage_identity() ),
110 33.06, 'overall_percentage_identity' );
111 is( sprintf( "%.2f", $aln->overall_percentage_identity('align') ),
112 33.06, 'overall_percentage_identity (align)' );
113 is( sprintf( "%.2f", $aln->overall_percentage_identity('short') ),
114 35.24, 'overall_percentage_identity (short)' );
115 is( sprintf( "%.2f", $aln->overall_percentage_identity('long') ),
116 33.47, 'overall_percentage_identity (long)' );
117 is( sprintf( "%.2f", $aln->average_percentage_identity() ),
118 66.91, 'average_percentage_identity' );
120 ok $aln->set_displayname_count;
121 is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES_1',
122 'set_displayname_count';
123 ok $aln->set_displayname_flat;
124 is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES', 'set_displayname_flat';
125 ok $aln->set_displayname_normal;
126 is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES/9-246',
127 'set_displayname_normal';
129 ok $aln->map_chars( '\.', '-' );
130 @seqs = $aln->each_seq_with_id('143T_HUMAN');
131 is substr( $seqs[0]->seq, 0, 60 ),
132 'KTELIQKAKLAEQAERYDDMATCMKAVTEQGA---ELSNEERNLLSVAYKNVVGGRRSAW',
133 'uppercase, map_chars';
137 ' ::*::::* : * *: *: *:***:**.***::*.'
138 . ' *::**::**:*** . . ** :* :* . :: :: *: . :* .*. **:'
139 . '***.** :*. : .* * : : **.*:***********:::* : .: * :** .'
140 . '*::*: .*. : *: **:****************:: ',
143 ok $aln->remove_seq( $seqs[0] ), 'remove_seqs';
144 is $aln->num_sequences, 15, 'remove_seqs';
145 ok $aln->add_seq( $seqs[0] ), 'add_seq';
146 is $aln->num_sequences, 16, 'add_seq';
147 ok $seq = $aln->get_seq_by_pos(1), 'get_seq_by_pos';
148 is( $seq->id, '1433_LYCES', 'get_seq_by_pos' );
149 ok( ( $aln->missing_char(), 'P' ) and ( $aln->missing_char('X'), 'X' ) );
150 ok( ( $aln->match_char(), '.' ) and ( $aln->match_char('-'), '-' ) );
151 ok( ( $aln->gap_char(), '-' ) and ( $aln->gap_char('.'), '.' ) );
153 is $aln->purge(0.7), 12, 'purge';
154 is $aln->num_sequences, 4, 'purge';
157 test_skip( -tests => 24, -requires_module => 'IO::String' );
160 my $out = IO::String->new($string);
162 my $s1 = Bio::LocatableSeq->new(
164 -seq => 'aawtat-tn-',
169 my $s2 = Bio::LocatableSeq->new(
171 -seq => '-aaaat-tt-',
176 $a = Bio::SimpleAlign->new();
180 is( $a->consensus_iupac, "aAWWAT-TN-", 'IO::String consensus_iupac' );
181 $s1->seq('aaaaattttt');
182 $s1->alphabet('dna');
184 $s2->seq('-aaaatttt-');
186 $a = Bio::SimpleAlign->new();
190 my $strout = Bio::AlignIO->new( -fh => $out, -format => 'pfam' );
191 $strout->write_aln($a);
194 "AAA/1-10 aaaaattttt\n" . "BBB/1-8 -aaaatttt-\n",
195 'IO::String write_aln normal'
200 my $b = $a->slice( 2, 9 );
201 $strout->write_aln($b);
203 "AAA/2-9 aaaatttt\n" . "BBB/1-8 aaaatttt\n",
204 'IO::String write_aln slice';
208 $b = $a->slice( 9, 10 );
209 $strout->write_aln($b);
211 "AAA/9-10 tt\n" . "BBB/8-8 t-\n",
212 'IO::String write_aln slice';
217 $b = $a->slice( 1, 2 );
218 $strout->write_aln($b);
220 "AAA/1-2 aa\n" . "BBB/1-1 -a\n",
221 'IO::String write_aln slice';
223 # not sure what coordinates this should return...
227 $b = $a->slice( 1, 1, 1 );
228 $strout->write_aln($b);
230 "AAA/1-1 a\n" . "BBB/1-0 -\n",
231 'IO::String write_aln slice';
236 $b = $a->slice( 2, 2 );
237 $strout->write_aln($b);
239 "AAA/2-2 a\n" . "BBB/1-1 a\n",
240 'IO::String write_aln slice';
242 eval { $b = $a->slice( 11, 13 ); };
246 # remove_columns by position
249 $str = Bio::AlignIO->new( -file => test_input_file('mini-align.aln') );
250 $aln1 = $str->next_aln;
251 $aln2 = $aln1->remove_columns( [ 0, 0 ] );
252 $strout->write_aln($aln2);
254 "P84139/2-33 NEGEHQIKLDELFEKLLRARLIFKNKDVLRRC\n"
255 . "P814153/2-33 NEGMHQIKLDVLFEKLLRARLIFKNKDVLRRC\n"
256 . "BAB68554/1-14 ------------------AMLIFKDKQLLQQC\n"
257 . "gb|443893|124775/1-32 MRFRFQIKVPPAVEGARPALLIFKSRPELGGC\n",
258 'remove_columns by position';
260 # and when arguments are entered in "wrong order"?
263 my $aln3 = $aln1->remove_columns( [ 1, 1 ], [ 30, 30 ], [ 5, 6 ] );
264 $strout->write_aln($aln3);
266 "P84139/1-33 MEGEIKLDELFEKLLRARLIFKNKDVLRC\n"
267 . "P814153/1-33 MEGMIKLDVLFEKLLRARLIFKNKDVLRC\n"
268 . "BAB68554/1-14 ----------------AMLIFKDKQLLQC\n"
269 . "gb|443893|124775/2-32 -RFRIKVPPAVEGARPALLIFKSRPELGC\n",
270 'remove_columns by position (wrong order)';
272 my %cigars = $aln1->cigar_line;
273 is $cigars{'gb|443893|124775/1-32'}, '19,19:21,24:29,29:32,32',
275 is $cigars{'P814153/1-33'}, '20,20:22,25:30,30:33,33', 'cigar_line';
276 is $cigars{'BAB68554/1-14'}, '1,1:3,6:11,11:14,14', 'cigar_line';
277 is $cigars{'P84139/1-33'}, '20,20:22,25:30,30:33,33', 'cigar_line';
279 # sort_alphabetically
280 my $s3 = Bio::LocatableSeq->new(
282 -seq => '-attat-tt-',
289 is $a->get_seq_by_pos(2)->id, "BBB", 'sort_alphabetically - before';
290 ok $a->sort_alphabetically;
291 is $a->get_seq_by_pos(2)->id, "ABB", 'sort_alphabetically - after';
293 $b = $a->remove_gaps();
294 is $b->consensus_string, "aaaattt", 'remove_gaps';
296 $s1->seq('aaaaattt--');
298 $b = $a->remove_gaps( undef, 'all_gaps_only' );
299 is $b->consensus_string, "aaaaatttt", 'remove_gaps all_gaps_only';
301 # test set_new_reference:
302 $str = Bio::AlignIO->new( -file => test_input_file('testaln.clustalw') );
303 $aln = $str->next_aln();
304 my $new_aln = $aln->set_new_reference(3);
305 $a = $new_aln->get_seq_by_pos(1)->display_id;
306 $new_aln = $aln->set_new_reference('P851414');
307 $b = $new_aln->get_seq_by_pos(1)->display_id;
308 is $a, 'P851414', 'set_new_reference';
309 is $b, 'P851414', 'set_new_reference';
312 $str = Bio::AlignIO->new(
314 -file => test_input_file('testaln2.fasta')
316 $aln = $str->next_aln();
317 $new_aln = $aln->uniq_seq();
318 $a = $new_aln->num_sequences;
319 is $a, 11, 'uniq_seq';
321 # check if slice works well with a LocateableSeq in its negative strand
322 my $seq1 = Bio::LocatableSeq->new(
323 -SEQ => "ATGCTG-ATG",
330 my $seq2 = Bio::LocatableSeq->new(
331 -SEQ => "A-GCTGCATG",
339 my $aln_negative = Bio::SimpleAlign->new();
340 $aln_negative->add_seq($seq1);
341 $aln_negative->add_seq($seq2);
343 $aln_negative->column_from_residue_number(
344 $aln_negative->get_seq_by_pos(1)->display_id, 2 );
346 $aln_negative->column_from_residue_number(
347 $aln_negative->get_seq_by_pos(1)->display_id, 5 );
348 $aln_negative = $aln_negative->slice( $end_column, $start_column );
349 my $seq_negative = $aln_negative->get_seq_by_pos(1);
350 is( $seq_negative->start, 2, "bug 2099" );
351 is( $seq_negative->end, 5, "bug 2099" );
354 my $s11 = Bio::LocatableSeq->new( -id => 'testseq1', -seq => 'AAA' );
355 my $s21 = Bio::LocatableSeq->new( -id => 'testseq2', -seq => 'CCC' );
356 $a = Bio::SimpleAlign->new();
357 ok( $a->add_seq( $s11, 1 ), "bug 2793" );
358 is( $a->get_seq_by_pos(1)->seq, 'AAA', "bug 2793" );
359 ok( $a->add_seq( $s21, 2 ), "bug 2793" );
360 is( $a->get_seq_by_pos(2)->seq, 'CCC', "bug 2793" );
361 throws_ok { $a->add_seq( $s21, 0 ) } qr/must be >= 1/, 'Bad sequence, bad!';
364 # test for Bio::SimpleAlign annotation method and
365 # Bio::FeatureHolder stuff
367 $aln = Bio::SimpleAlign->new;
368 isa_ok($aln,"Bio::AnnotatableI");
370 for my $seqset ( [qw(one AGAGGAT)], [qw(two AGACGAT)], [qw(three AGAGGTT)] ) {
372 Bio::LocatableSeq->new(
379 is $aln->num_sequences, 3, 'added 3 seqs';
381 $aln->add_SeqFeature(
382 Bio::SeqFeature::Generic->new(
385 -primary_tag => 'charLabel',
388 $aln->add_SeqFeature(
389 Bio::SeqFeature::Generic->new(
392 -primary_tag => 'charLabel',
396 is( $aln->feature_count, 2, 'first 2 features added' );
398 my $splitloc = Bio::Location::Split->new;
399 $splitloc->add_sub_Location(
400 Bio::Location::Simple->new(
406 $splitloc->add_sub_Location(
407 Bio::Location::Simple->new(
413 $aln->add_SeqFeature(
414 Bio::SeqFeature::Generic->new(
415 -location => $splitloc,
416 -primary_tag => 'charLabel',
420 is( $aln->feature_count, 3, '3rd feature added' );
422 #do slices and masks as defined by the feature
424 my @slice_lens = qw(1 1 2 2);
425 for my $feature ( $aln->get_SeqFeatures ) {
426 for my $loc ( $feature->location->each_Location ) {
427 my $masked = $aln->mask_columns( $loc->start, $loc->end, '?');
429 lives_ok {my $fslice = $masked->slice( $loc->start, $loc->end )};
431 $masked->verbose(-1);
432 my $fslice = $masked->slice( $loc->start, $loc->end );
433 is( $fslice->length, $slice_lens[ $i++ ], "slice $i len" );
434 for my $s ( $fslice->each_seq ) {
435 like( $s->seq, qr/^\?+$/, 'correct masked seq' );
440 # test set_displayname_safe & restore_displayname:
441 $str = Bio::AlignIO->new( -file => test_input_file('pep-266.aln') );
442 $aln = $str->next_aln();
443 is $aln->get_seq_by_pos(3)->display_id, 'Smik_Contig1103.1',
444 'initial display id ok';
445 my ( $new_aln, $ref ) = $aln->set_displayname_safe();
446 is $new_aln->get_seq_by_pos(3)->display_id, 'S000000003', 'safe display id ok';
447 my $restored_aln = $new_aln->restore_displayname($ref);
448 is $restored_aln->get_seq_by_pos(3)->display_id, 'Smik_Contig1103.1',
449 'restored display id ok';
452 $str = Bio::AlignIO->new( -file => test_input_file('testaln.clustalw') );
453 my $list_file = test_input_file('testaln.list');
454 $aln = $str->next_aln();
455 $new_aln = $aln->sort_by_list($list_file);
456 $a = $new_aln->get_seq_by_pos(1)->display_id;
457 is $a, 'BAB68554', 'sort by list ok';
459 # test for Binary/Morphological/Mixed data
465 my $s1 = Bio::LocatableSeq->new(
467 -seq => 'aawtat-tn-',
472 my $s2 = Bio::LocatableSeq->new(
474 -seq => '-aaaat-tt-',
479 my $s3 = Bio::LocatableSeq->new(
481 -seq => '-aaaat-tt-',
486 $a = Bio::SimpleAlign->new();
491 @seqs = $a->each_seq;
492 is( $seqs[0]->start, 12 );
493 is( $seqs[1]->start, 1 );
494 is( $seqs[2]->start, 31 );
497 @seqs = $a->each_seq;
499 is( $seqs[0]->start, 1 );
500 is( $seqs[1]->start, 12 );
501 is( $seqs[2]->start, 31 );
504 'allele1' => 'GGATCCATT[C/C]CTACT',
505 'allele2' => 'GGAT[C/-][C/-]ATT[C/C]CT[A/C]CT',
506 'allele3' => 'G[G/C]ATCCATT[C/G]CTACT',
507 'allele4' => 'GGATCCATT[C/G]CTACT',
508 'allele5' => 'GGATCCATT[C/G]CTAC[T/A]',
509 'allele6' => 'GGATCCATT[C/G]CTA[C/G][T/A]',
510 'testseq' => 'GGATCCATT[C/G]CTACT'
513 my $alnin = Bio::AlignIO->new(
515 -file => test_input_file('alleles.fas')
518 $aln = $alnin->next_aln;
522 # compare all to test seq
524 for my $ls ( sort keys %testdata ) {
526 my $str = $aln->bracket_string(
527 -refseq => 'testseq',
528 -allele1 => 'allele1',
531 is( $str, $testdata{$ls}, "BIC:$str" );
535 'allele1' => 'GGATCCATT{C.C}CTACT',
536 'allele2' => 'GGAT{C.-}{C.-}ATT{C.C}CT{A.C}CT',
537 'allele3' => 'G{G.C}ATCCATT{C.G}CTACT',
538 'allele4' => 'GGATCCATT{C.G}CTACT',
539 'allele5' => 'GGATCCATT{C.G}CTAC{T.A}',
540 'allele6' => 'GGATCCATT{C.G}CTA{C.G}{T.A}',
541 'testseq' => 'GGATCCATT{C.G}CTACT'
544 for my $ls ( sort keys %testdata ) {
546 my $str = $aln->bracket_string(
547 -refseq => 'testseq',
548 -allele1 => 'allele1',
553 is( $str, $testdata{$ls}, "BIC:$str" );
556 # is _remove_col really working correctly?
557 my $a = Bio::LocatableSeq->new(
560 -seq => 'atcgatcgatcgatcg',
564 my $b = Bio::LocatableSeq->new(
567 -seq => '-tcgatc-atcgatcg',
571 my $c = Bio::LocatableSeq->new(
574 -seq => 'atcgatcgatc-atc-',
578 my $d = Bio::LocatableSeq->new(
581 -seq => '--cgatcgatcgat--',
585 my $e = Bio::LocatableSeq->new(
588 -seq => '-t-gatcgatcga-c-',
592 $aln = Bio::SimpleAlign->new();
597 my $gapless = $aln->remove_gaps();
598 foreach my $seq ( $gapless->each_seq ) {
599 if ( $seq->id eq 'a' ) {
602 is $seq->seq, 'tcgatcatcatc';
604 elsif ( $seq->id eq 'b' ) {
607 is $seq->seq, 'tcgatcatcatc';
609 elsif ( $seq->id eq 'c' ) {
612 is $seq->seq, 'tcgatcatcatc';
618 $gapless = $aln->remove_gaps();
619 foreach my $seq ( $gapless->each_seq ) {
620 if ( $seq->id eq 'a' ) {
623 is $seq->seq, 'gatcatca';
625 elsif ( $seq->id eq 'b' ) {
628 is $seq->seq, 'gatcatca';
630 elsif ( $seq->id eq 'c' ) {
633 is $seq->seq, 'gatcatca';
635 elsif ( $seq->id eq 'd' ) {
638 is $seq->seq, 'gatcatca';
640 elsif ( $seq->id eq 'e' ) {
643 is $seq->seq, 'gatcatca';
649 my $slice = $aln->slice(1,4);
651 for my $seq ($slice->each_seq) {
652 if ( $seq->id eq 'a' ) {
656 is $seq->seq, 'atcg';
658 elsif ( $seq->id eq 'b' ) {
662 is $seq->seq, '-tcg';
664 elsif ( $seq->id eq 'c' ) {
668 is $seq->seq, 'atcg';
670 elsif ( $seq->id eq 'd' ) {
674 is $seq->seq, '--cg';
676 elsif ( $seq->id eq 'e' ) {
680 is $seq->seq, '-t-g';
684 my $f = Bio::LocatableSeq->new(
686 -seq => 'a-cgatcgatcgat-g',
690 $aln = Bio::SimpleAlign->new();
694 $gapless = $aln->remove_gaps();
695 foreach my $seq ( $gapless->each_seq ) {
696 if ( $seq->id eq 'a' ) {
699 is $seq->seq, 'acgatcgatcgatg';
701 elsif ( $seq->id eq 'f' ) {
704 is $seq->seq, 'acgatcgatcgatg';
709 Bio::LocatableSeq->new( -id => 'g', -seq => 'atgc', -start => 5, -end => 8 );
710 my $h = Bio::LocatableSeq->new(
716 $aln = Bio::SimpleAlign->new();
720 # test for new method in API get_seq_by_id
721 my $retrieved = $aln->get_seq_by_id('g');
722 is( defined $retrieved, 1 );
723 my $removed = $aln->remove_columns( [ 1, 3 ] );
724 foreach my $seq ( $removed->each_seq ) {
725 if ( $seq->id eq 'g' ) {
730 elsif ( $seq->id eq 'h' ) {
737 # work out mask_columns(), see bug 2842
739 test_skip(-tests => 6, -requires_module => 'IO::String');
740 my $io = Bio::AlignIO->new( -file => test_input_file("testaln.clustalw") );
741 my $aln = $io->next_aln();
742 isa_ok( $aln, 'Bio::SimpleAlign' );
743 my $consensus = <<EOU;
744 MNEGEHQIKLDELFEKLLRARKIFKNKDVLRHSWEPKDLPHRHEQIEALAQILV
745 PVLRGETMKIIFCGHHACELGEDRGTKGFVIDELKDVDEDRNGKVDVIEINCEH
746 MDTHYRVLPNIAKLFDDCTGIGVPMHGGPTDEVTAKLKQVIDMKERFVIIVLDE
747 IDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEE
748 EVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDL
749 LRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLL
750 DENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSK
751 GRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
753 $consensus =~ s/\n//g;
755 is( $aln->consensus_string, $consensus, 'consensus string looks ok' );
758 my @cons_got = $aln->consensus_conservation;
759 # 422 positions, mostly two of six sequences conserved, set as default
760 my @cons_expect = (100 * 2/6) x 422;
761 # Exceptionally columns as a mask, manually determined (1-based columns)
762 $cons_expect[$_-1] = 100 * 1/6 for (5,12,41,70,82,310,390);
763 $cons_expect[$_-1] = 100 * 3/6 for (27,30,32,36,47,49,61,66,69,71,77,79,
764 81,91,96,97,105,114,115,117,118,121,122,129,140,146,156,159,160,162,
765 183,197,217,221,229,242,247,248,261,266,282,287,295,316,323,329,335,337,344,);
766 $cons_expect[$_-1] = 100 * 4/6 for (84,93,99,100,102,107,108,112,113,119,150,);
767 $cons_expect[$_-1] = 100 * 5/6 for (81,110);
768 # Format for string comparison
769 @cons_expect = map { sprintf "%4.1f", $_ } @cons_expect;
770 @cons_got = map { sprintf "%4.1f", $_ } @cons_got;
771 is(length($aln->consensus_string), scalar(@cons_got),"conservation length");
772 is_deeply(\@cons_got, \@cons_expect, "conservation scores");
775 is( aln2str( $aln => 'pfam' ), <<EOA, 'looks like correct unmasked alignment (from clustalw)' );
776 P84139/1-420 MNEGEHQIKLDELFEKLLRARKIFKNKDVLRHSYTPKDLPLRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIGVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTRPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLNVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
777 P814153/1-420 MNEGMHQIKLDVLFEKLLRARKIFKNKDVLRHSYTPKDLPHRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIEVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
778 P851414/1-60 -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILQTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
779 P841414/1-60 -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILVTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
780 BAB68554/1-141 --------------------MLTEDDKQLIQHVWEKVLEHQEDFGAEALERMFIVYPSTKTYFPHFDLHHDSEQIRHHGKK-VVGALGDAVKHIDNLSATLSELSNLHCY-NLRVDPVNFKLLSHCFQVVLGAHLG--REYTPQVQVAYDKFLAAVSAVLAEKYR-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
781 gb|443893|124775/1-331 -MRFRFGVVVPPAVAGARPELLVVGSRPELG-RWEPRGAVRLRPAGTAAGDGALALQEPGLWLGEVELA-AEEAAQDGAEPGRVDTFWYKFLKREPGGELSWEGNGPHHDRCCTYNENNLVDGVYCLPIG---HWGEATGHTNEMKHTTDFYFNIAGHQAMHYSRILPNIWLGSCPRQVEHVTIKLKHELGITAVMN-FQTEWDIVQNSSGCNRYPEPMTPDTMIKLYREEGLAYIWMP-TPDMSTEGRVQMLPQAVCLLHALLEKGHIVY-----VHCNAGVGRSTAAVCGWLQYVMGWNLRKVQYFLMAKRPAVYIDEEALARAQEDFFQKFGKVRSSVCSL------------------------------------------------------------------------------
784 my $newaln = $aln->mask_columns(12,20,'?');
785 is( aln2str( $newaln, 'pfam' ), <<EOA, 'looks like correct masked alignment (from clustalw)' );
786 P84139/1-420 MNEGEHQIKLD?????????RKIFKNKDVLRHSYTPKDLPLRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIGVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTRPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLNVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
787 P814153/1-420 MNEGMHQIKLD?????????RKIFKNKDVLRHSYTPKDLPHRHEQIETLAQILVPVLRGETPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIVNYFKDETGIEVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI
788 P851414/1-60 -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILQTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
789 P841414/1-60 -------------------------------------------------------------MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILVTHEHMD-HYGKTPLIAKLSD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
790 BAB68554/1-141 --------------------MLTEDDKQLIQHVWEKVLEHQEDFGAEALERMFIVYPSTKTYFPHFDLHHDSEQIRHHGKK-VVGALGDAVKHIDNLSATLSELSNLHCY-NLRVDPVNFKLLSHCFQVVLGAHLG--REYTPQVQVAYDKFLAAVSAVLAEKYR-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
791 gb|443893|124775/1-331 -MRFRFGVVVP?????????LLVVGSRPELG-RWEPRGAVRLRPAGTAAGDGALALQEPGLWLGEVELA-AEEAAQDGAEPGRVDTFWYKFLKREPGGELSWEGNGPHHDRCCTYNENNLVDGVYCLPIG---HWGEATGHTNEMKHTTDFYFNIAGHQAMHYSRILPNIWLGSCPRQVEHVTIKLKHELGITAVMN-FQTEWDIVQNSSGCNRYPEPMTPDTMIKLYREEGLAYIWMP-TPDMSTEGRVQMLPQAVCLLHALLEKGHIVY-----VHCNAGVGRSTAAVCGWLQYVMGWNLRKVQYFLMAKRPAVYIDEEALARAQEDFFQKFGKVRSSVCSL------------------------------------------------------------------------------
794 ###### test with phylip
796 my $phylip_str = <<EOF;
798 seq1 AAAATGGGGG TGGT------ GGTACCT--- -------
799 seq2 -----GGCGG TGGTGNNNNG GGTTCCCTNN NNNNNNN
800 new AAAATGGNGG TGGTN----N GGTNCCNTNN NNNNNNN
804 my $phylip_masked = <<EOF;
806 seq1 AAAATGGGGG TGGT------ GGTACCT--- -------
807 seq2 -----GGCGG TGGT?????? GGTTCCCTNN NNNNNNN
808 new AAAATGGNGG TGGT?----? GGTNCCNTNN NNNNNNN
812 my $phy_fh = IO::String->new( $phylip_str );
814 my $in = Bio::AlignIO->new( -fh => $phy_fh, -format => 'phylip' );
817 $aln = $in->next_aln();
818 eq_or_diff( aln2str( $aln, 'phylip' ), $phylip_str );
820 $newaln = $aln->mask_columns(15,20,'?');
821 eq_or_diff( aln2str( $newaln,'phylip' ), $phylip_masked, 'align after looks ok' );
827 my ( $aln, $fmt ) = @_;
829 my $out_fh = IO::String->new( $out );
830 my $alignio_out = Bio::AlignIO->new(-fh => $out_fh, -format => $fmt);
831 $alignio_out->write_aln( $aln );