1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 194);
13 use_ok('Bio::Seq::Quality');
14 use_ok('Bio::PrimarySeq');
15 use_ok('Bio::LocatableSeq');
16 use_ok('Bio::Seq::SimulatedRead');
19 my $VERBOSE = test_debug();
21 my ($ref, $ref2, $ref3, $ref4, $ref5, $read, $errors);
23 $ref = Bio::Seq::Quality->new(-id => 'human_id',
24 -seq => 'TAAAAAAACCCC',
25 -qual => '1 2 3 4 5 6 7 8 9 10 11 12',
26 -trace => '0 5 10 15 20 25 30 35 40 45 50 55',
27 -desc => 'The human genome' );
29 $ref2 = Bio::Seq->new(-id => 'other_genome',
31 -desc => '"Secret" sequence');
33 $ref3 = Bio::PrimarySeq->new(-seq => 'ACACTGATCTAGCGTCGTGCTAGCTGACGTAGCTGAT' );
35 $ref4 = Bio::LocatableSeq->new(-id => 'a_thaliana',
36 -seq => 'CGTATTCTGAGGAGAGCTCT' );
41 ok $read = Bio::Seq::SimulatedRead->new();
42 isa_ok $read, 'Bio::Seq::SimulatedRead';
43 isa_ok $read, 'Bio::LocatableSeq';
44 isa_ok $read, 'Bio::Seq::Quality';
46 $errors->{'1'}->{'+'} = 'T';
47 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
48 is $read->reference, $ref;
50 is $read->errors->{'1'}->{'+'}->[0], 'T';
52 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 0 );
55 is $read->seq, 'TAAAAAAACCCC';
57 is $read->desc, undef;
58 is $read->revcom->seq, 'GGGGTTTTTTTA';
60 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1 );
63 is $read->seq, 'TAAAAAAACCCC';
64 is join(' ',@{$read->qual}), '';
66 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
68 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1, -coord_style => 'bioperl' );
69 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
71 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1, -coord_style => 'genbank' );
72 is $read->desc, 'reference=human_id position=1..12 description="The human genome"';
74 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -qual_levels => [30, 10]);
77 is $read->seq, 'TAAAAAAACCCC';
78 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30';
80 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
81 is $read->revcom->seq, 'GGGGTTTTTTTA';
83 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref2 );
86 is $read->seq, 'ACGTACGT';
87 is join(' ',@{$read->qual}), '';
88 is $read->desc, 'reference=other_genome start=1 end=8 strand=+1 description="\"Secret\" sequence"';
90 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref3 );
93 is $read->seq, 'ACACTGATCTAGCGTCGTGCTAGCTGACGTAGCTGAT';
94 is join(' ',@{$read->qual}), '';
95 is $read->desc, 'start=1 end=37 strand=+1';
97 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -qual_levels => [30, 10]);
98 is $read->seq, 'GGGGTTTTTTTA';
99 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30';
100 is $read->desc, 'reference=human_id start=1 end=12 strand=-1 description="The human genome"';
102 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -qual_levels => [30, 10], -coord_style => 'genbank' );
103 is $read->desc, 'reference=human_id position=complement(1..12) description="The human genome"';
105 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -start => 2, -end => 8, -qual_levels => [30, 10]);
106 is $read->seq, 'AAAAAAA';
107 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30';
108 is $read->desc, 'reference=human_id start=2 end=8 strand=+1 description="The human genome"';
110 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -start => 2, -end => 8, -qual_levels => [30, 10]);
111 is $read->seq, 'TTTTTTT';
112 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30';
113 is $read->desc, 'reference=human_id start=2 end=8 strand=-1 description="The human genome"';
116 $errors->{'6'}->{'+'} = 'GG';
117 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]);
120 is $read->seq, 'TTTTTTGGT';
121 is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30';
122 is $read->desc, 'reference=human_id start=2 end=8 strand=-1 errors=6+G,6+G description="The human genome"';
125 $errors->{'6'}->{'+'} = 'GG';
126 $errors->{'1'}->{'%'} = 'T';
127 $errors->{'3'}->{'-'} = undef;
128 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => 1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]);
131 is $read->seq, 'TAAAAGGA';
132 is join(' ', @{$read->qual}), '10 30 30 30 30 10 10 30';
133 is $read->desc, 'reference=human_id start=2 end=8 strand=+1 errors=1%T,3-,6+G,6+G description="The human genome"';
136 $errors->{'6'}->{'+'} = 'GG';
137 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors, -qual_levels => [30, 10]);
140 is $read->seq, 'TAAAAAGGAACCCC';
141 is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30 30 30 30 30 30';
142 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 errors=6+G,6+G description="The human genome"';
144 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors, -mid => 'ACGT', -errors => $errors, -qual_levels => [30, 10]);
147 is $read->seq, 'ACGTTAGGAAAAAACCCC';
148 is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30 30 30 30 30 30 30 30 30 30';
149 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=ACGT errors=6+G,6+G description="The human genome"';
151 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => 'TTTAAA', -qual_levels => [30, 10]);
154 is $read->seq, 'TTTAAATAAAAAAACCCC';
155 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
156 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=TTTAAA description="The human genome"';
158 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => '', -qual_levels => []);
161 is $read->seq, 'TAAAAAAACCCC';
162 is join(' ', @{$read->qual}), '';
163 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
169 $errors->{'6'}->{'+'} = ['G', 'G'];
170 $errors->{'1'}->{'%'} = ['A', 'G', 'T'];
171 $errors->{'3'}->{'-'} = [undef, undef];
172 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => 1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]), 'redundant errors';
175 is $read->seq, 'TAAAAGGA';
176 is join(' ', @{$read->qual}), '10 30 30 30 30 10 10 30';
177 is $read->desc, 'reference=human_id start=2 end=8 strand=+1 errors=1%A,1%G,1%T,3-,3-,6+G,6+G description="The human genome"';
180 # Specifying errors() after new()
182 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => '', -qual_levels => []);
185 is $read->seq, 'TAAAAAAACCCC';
186 is join(' ', @{$read->qual}), '';
187 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
190 ok $read->errors($errors), 'errors()';
193 is $read->seq, 'TAAAAAAACCCC';
194 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
197 $errors->{'6'}->{'+'} = 'GG';
198 ok $read->errors($errors);
199 is $read->seq, 'TAAAAAGGAACCCC';
202 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 errors=6+G,6+G description="The human genome"';
205 # More tracking tests
207 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => 'ACGT', -qual_levels => [], -coord_style => 'genbank' );
208 is $read->desc, 'reference=human_id position=1..12 mid=ACGT description="The human genome"';
210 ok $read->mid('AAAA');
211 is $read->desc, 'reference=human_id position=1..12 mid=AAAA description="The human genome"';
214 $errors->{'6'}->{'+'} = 'GG';
215 ok $read->errors($errors);
216 is $read->desc, 'reference=human_id position=1..12 mid=AAAA errors=6+G,6+G description="The human genome"';
218 ok not($read->track(0)), 'track()';
220 is $read->desc, undef;
223 is $read->desc, 'reference=human_id position=1..12 mid=AAAA errors=6+G,6+G description="The human genome"';
226 # qual_levels() method
228 ok $read = Bio::Seq::SimulatedRead->new(-verbose => $VERBOSE, );
229 ok $read->qual_levels([30, 10]), 'qual_levels()';
230 is join(' ', @{$read->qual_levels}), '30 10';
234 ok $read->reference($ref), 'reference()';
235 is $read->reference(), $ref;
239 ok $read = Bio::Seq::SimulatedRead->new(-verbose => $VERBOSE, ), 'mid()';
240 ok $read->qual_levels([30, 10]);
241 ok $read->reference($ref);
242 ok $read->mid('ACGT');
243 ok $read->mid, 'ACGT';
245 is $read->seq, 'ACGTTAAAAAAACCCC';
246 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
247 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=ACGT description="The human genome"';
249 ok $read->mid('TTTAAA');
250 ok $read->mid, 'TTTAAA';
251 is $read->seq, 'TTTAAATAAAAAAACCCC';
252 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
253 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=TTTAAA description="The human genome"';
256 # Edge case... mutation of the last bases of a simulated read with MID
259 $errors->{'18'}->{'%'} = 'T';
260 $read->errors($errors);
261 is $read->seq, 'TTTAAATAAAAAAACCCT';
264 # Try different BioPerl object types
266 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref ), 'Bio::Seq::Quality';
267 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref2 ), 'Bio::Seq';
268 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref3 ), 'Bio::PrimarySeq';
269 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref4 ), 'Bio::LocatableSeq';
272 # More detailed tests of the error specifications
275 $errors->{'0'}->{'-'} = undef;
276 warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
277 qr/Positions of substitutions and deletions have to be strictly positive but got 0/;
278 is $read->seq, 'TAAAAAAACCCC';
281 $errors->{'1'}->{'-'} = undef;
282 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
283 is $read->seq, 'AAAAAAACCCC';
286 $errors->{'12'}->{'-'} = undef;
287 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
288 is $read->seq, 'TAAAAAAACCC';
291 $errors->{'13'}->{'-'} = undef;
292 warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
293 qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
294 is $read->seq, 'TAAAAAAACCCC';
297 $errors->{'0'}->{'%'} = 'G';
298 warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
299 qr/Positions of substitutions and deletions have to be strictly positive/; # there should be a warning too
300 is $read->seq, 'TAAAAAAACCCC';
303 $errors->{'1'}->{'%'} = 'G';
304 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
305 is $read->seq, 'GAAAAAAACCCC';
308 $errors->{'12'}->{'%'} = 'G';
309 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
310 is $read->seq, 'TAAAAAAACCCG';
313 $errors->{'13'}->{'%'} = 'G';
314 warning_like { $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors ) }
315 qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
316 is $read->seq, 'TAAAAAAACCCC';
319 $errors->{'0'}->{'+'} = 'A';
320 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
321 is $read->seq, 'ATAAAAAAACCCC';
324 $errors->{'1'}->{'+'} = 'A';
325 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
326 is $read->seq, 'TAAAAAAAACCCC';
329 $errors->{'12'}->{'+'} = 'A';
330 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
331 is $read->seq, 'TAAAAAAACCCCA';
334 $errors->{'13'}->{'+'} = 'A';
335 warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors ) }
336 qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
337 is $read->seq, 'TAAAAAAACCCC';
340 $errors->{'1'}->{'%'} = 'G';
341 $errors->{'2'}->{'%'} = 'G';
342 $errors->{'3'}->{'%'} = 'G';
343 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
344 is $read->seq, 'GGGAAAAACCCC';
347 $errors->{'1'}->{'+'} = 'G';
348 $errors->{'2'}->{'+'} = 'G';
349 $errors->{'3'}->{'+'} = 'G';
350 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
351 is $read->seq, 'TGAGAGAAAAACCCC';
354 $errors->{'1'}->{'-'} = undef;
355 $errors->{'2'}->{'-'} = undef;
356 $errors->{'3'}->{'-'} = undef;
357 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
358 is $read->seq, 'AAAAACCCC';
361 $errors->{'1'}->{'+'} = 'GGG';
362 $errors->{'2'}->{'-'} = undef;
363 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
364 is $read->seq, 'TGGGAAAAAACCCC';
367 $errors->{'2'}->{'+'} = 'CC';
368 $errors->{'2'}->{'-'} = undef;
369 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
370 is $read->seq, 'TCCAAAAAACCCC';
373 $errors->{'2'}->{'%'} = 'C';
374 $errors->{'2'}->{'-'} = undef;
375 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
376 is $read->seq, 'TAAAAAACCCC';