1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id: SeqStats.t 11525 2007-06-27 10:16:38Z sendu $
9 test_begin(-tests => 47);
12 use_ok('Bio::Tools::SeqStats');
15 my ($seqobj, $count, $seqobj_stats, $wt);
17 my $str = Bio::SeqIO->new(-file=> test_input_file('multifa.seq'), '-format' => 'Fasta');
18 $seqobj = $str->next_seq();
19 ok defined $seqobj, 'new Bio::Root::IO object';
21 my $seq_stats = Bio::Tools::SeqStats->new('-seq' => $seqobj);
22 ok defined $seq_stats && $seq_stats, 'new Bio::Tools:SeqStats object';
25 my $hash_ref = $seq_stats->count_monomers();
26 is ( $hash_ref->{'A'}, 80 , 'count_monomers()');
28 $hash_ref = $seq_stats-> count_codons();
29 ok defined $hash_ref && $hash_ref , 'count_codons()';
31 my $weight = $seq_stats->get_mol_wt();
32 ok defined $weight && $weight , 'get_mol_wt()' ;
34 $seqobj = Bio::PrimarySeq->new(-seq=>'ACTGTGGCGTCAACTG',
35 -alphabet=>'dna', -id=>'test');
36 ok $seqobj_stats = Bio::Tools::SeqStats->new(-seq=>$seqobj);
37 isa_ok $seqobj_stats, 'Bio::Tools::SeqStats';
39 $count = $seqobj_stats->count_monomers(); # for DNA sequence
45 $count = $seqobj_stats->count_codons();
46 is $count->{'ACT'}, 2;
47 is $count->{'GTG'}, 1;
48 is $count->{'GCG'}, 1;
49 is $count->{'TCA'}, 1;
51 $seqobj = Bio::PrimarySeq->new(-seq=>'ACTACTTCA', -alphabet=>'dna',
53 $seqobj_stats = Bio::Tools::SeqStats->new('-seq' => $seqobj);
54 $wt = $seqobj_stats->get_mol_wt(); # for DNA sequence
55 is &round($$wt[0]), 2738 ;
57 $seqobj = Bio::PrimarySeq->new(-seq=>'ACXACNNCA',
58 -alphabet=>'dna', -id=>'test');
59 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
60 is &round($$wt[0]), 2693;
61 is &round($$wt[1]), 2813;
63 $seqobj = Bio::PrimarySeq->new(-seq=>'ACTGTGGCGTCAACTG',
64 -alphabet=>'dna', -id=>'test');
65 $count = Bio::Tools::SeqStats->count_monomers($seqobj); # for DNA sequence
73 $seqobj = Bio::PrimarySeq->new(-seq=>'MQSERGITIDISLWKFETSKYYVT',
74 -alphabet=>'protein', -id=>'test');
75 $seqobj_stats = Bio::Tools::SeqStats->new('-seq' => $seqobj);
76 $count = $seqobj_stats->count_monomers(); # for amino sequence
81 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
85 # Issue 3185: https://redmine.open-bio.org/issues/3185
87 $seqobj = Bio::PrimarySeq->new(-seq=>'MQSERGITIDISLWKFETSKYYVT*',
88 -alphabet=>'protein', -id=>'test');
89 is($seqobj->seq, 'MQSERGITIDISLWKFETSKYYVT*');
90 $seqobj_stats = Bio::Tools::SeqStats->new('-seq' => $seqobj);
91 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
96 ok $seqobj = Bio::PrimarySeq->new(-seq=>'MQSERGITIDISLWKFETSKYYVT',
97 -alphabet=>'protein');
98 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
99 is &round($$wt[0]), 2896 ;
103 $seqobj = Bio::PrimarySeq->new(-seq=>'UYXUYNNYU', -alphabet=>'rna');
104 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
105 is &round($$wt[0]), 2768;
106 is &round($$wt[1]), 2891;
108 ok $seqobj = Bio::PrimarySeq->new(-seq=>'TGCCGTGTGTGCTGCTGCT', -alphabet=>'rna');
109 $wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
110 is &round($$wt[0]), 6104 ;
113 # hydropathicity aka "gravy" score
116 # normal seq (should succeed)
117 ok $seqobj = Bio::PrimarySeq->new(-seq=>'MSFVLVAPDMLATAAADVVQIGSAVSAGS',
118 -alphabet=>'protein');
119 my $gravy = Bio::Tools::SeqStats->hydropathicity($seqobj);
120 is int($gravy*1000), 1224; # check to nearest 0.1%
122 # ambiguous sequence (should fail)
123 ok $seqobj = Bio::PrimarySeq->new(-seq=>'XXXB**BS', -alphabet=>'protein');
124 eval { Bio::Tools::SeqStats->hydropathicity($seqobj) };
125 like $@, qr/ambiguous amino acids/i;
127 # empty sequence (should fail)
128 ok $seqobj = Bio::PrimarySeq->new(-seq=>'', -alphabet=>'protein');
129 eval { Bio::Tools::SeqStats->hydropathicity($seqobj) };
130 like $@, qr/hydropathicity not defined/i;
132 # DNA sequence (should fail)
133 ok $seqobj = Bio::PrimarySeq->new(-seq=>'GATTACA', -alphabet=>'dna');
134 eval { Bio::Tools::SeqStats->hydropathicity($seqobj) };
135 like $@, qr/only meaningful for protein/;
142 # perl does not have an explicit rounding function
143 sub round { return int ((shift @_) + 0.5 ) }