1 # -*-Perl-*- Test Harness script for Bioperl
9 test_begin( -tests => 312 );
11 use_ok('Bio::PrimarySeq');
12 use_ok('Bio::Location::Simple');
13 use_ok('Bio::Location::Fuzzy');
14 use_ok('Bio::Location::Split');
19 ok my $seq = Bio::PrimarySeq->new(), 'Bare object';
20 isa_ok $seq, 'Bio::PrimarySeqI';
24 is $seq->alphabet, undef;
25 is $seq->is_circular, undef;
29 ok $seq = Bio::PrimarySeq->new( -seq => '', -nowarnonempty => 1);
32 is $seq->alphabet, undef;
36 ok $seq = Bio::PrimarySeq->new(
37 '-seq' => 'TTGGTGGCGTCAACT',
38 '-display_id' => 'new-id',
40 '-accession_number' => 'X677667',
41 '-desc' => 'Sample Bio::Seq object'
44 is $seq->accession_number(), 'X677667';
45 is $seq->seq(), 'TTGGTGGCGTCAACT';
46 is $seq->display_id(), 'new-id';
47 is $seq->alphabet(), 'dna';
48 is $seq->is_circular(), undef;
49 ok $seq->is_circular(1);
50 is $seq->is_circular(0), 0;
52 # check IdentifiableI and DescribableI interfaces
53 isa_ok $seq, 'Bio::IdentifiableI';
54 isa_ok $seq, 'Bio::DescribableI';
56 # make sure all methods are implemented
57 is $seq->authority("bioperl.org"), "bioperl.org";
58 is $seq->authority, "bioperl.org";
59 is $seq->namespace("t"), "t";
60 is $seq->namespace, "t";
61 is $seq->version(0), 0;
63 is $seq->lsid_string(), "bioperl.org:t:X677667";
64 is $seq->namespace_string, "t:X677667.0";
65 is $seq->version(47), 47;
67 is $seq->namespace_string, "t:X677667.47";
68 is $seq->description, 'Sample Bio::Seq object';
69 is $seq->display_name, "new-id";
73 is $seq->subseq(2, 5), 'TGGT';
75 is $seq->subseq( -start => 1, -end => 15), 'TTGGTGGCGTCAACT';
77 my $location = Bio::Location::Simple->new(
82 is $seq->subseq($location), 'ACCA';
84 my $splitlocation = Bio::Location::Split->new();
85 $splitlocation->add_sub_Location(
86 Bio::Location::Simple->new(
93 $splitlocation->add_sub_Location(
94 Bio::Location::Simple->new(
101 is $seq->subseq($splitlocation), 'TTGGTGACGC';
103 my $fuzzy = Bio::Location::Fuzzy->new(
109 is $seq->subseq($fuzzy), 'GGTGGC';
112 ok my $seq = Bio::PrimarySeq->new( -seq => 'TT-GTGGCGTCAACT' );
113 is $seq->subseq(2, 5, 'nogap'), 'TGT';
114 is $seq->subseq( -start => 2, -end => 5, -nogap => 1 ), 'TGT';
115 my $location = Bio::Location::Simple->new(
120 is $seq->subseq( $location, -nogap => 1), 'TGT';
122 is $seq->subseq(-start=>2, -end=>5, -replace_with=>'aa'), 'T-GT';
123 is $seq->seq, 'TaaGGCGTCAACT';
125 throws_ok { $seq->subseq(-start=>2, -end=>5, -replace_with=>'?!'); } qr/.+/;
129 ok my $seq = Bio::PrimarySeq->new( -seq => 'AACCGGTT', -is_circular => 1 );
130 is $seq->subseq( -start => 7, -end => 10 ), 'TTAA';
133 ### Test for Bug #2936
134 # Without strand input argument (case: user don't think is necessary)
135 my $split_loc_obj1 = Bio::Location::Split->new();
136 $split_loc_obj1->add_sub_Location(
137 Bio::Location::Simple->new(
142 $split_loc_obj1->add_sub_Location(
143 Bio::Location::Simple->new(
148 # With strand input argument (case: user provides the argument)
149 my $split_loc_obj2 = Bio::Location::Split->new();
150 $split_loc_obj2->add_sub_Location(
151 Bio::Location::Simple->new(
157 $split_loc_obj2->add_sub_Location(
158 Bio::Location::Simple->new(
164 is $split_loc_obj1->to_FTstring, "join(1..10,20..30)";
165 is $split_loc_obj2->to_FTstring, "join(1..10,20..30)";
166 $split_loc_obj1->flip_strand;
167 $split_loc_obj2->flip_strand;
168 is $split_loc_obj1->to_FTstring, "complement(join(1..10,20..30))";
169 is $split_loc_obj2->to_FTstring, "complement(join(1..10,20..30))";
173 my $trunc = $seq->trunc( 1, 4 );
174 isa_ok $trunc, 'Bio::PrimarySeqI';
175 is $trunc->seq(), 'TTGG' or diag( "Expecting TTGG. Got " . $trunc->seq() );
177 $trunc = $seq->trunc($splitlocation);
178 isa_ok $trunc, 'Bio::PrimarySeqI' ;
179 is $trunc->seq(), 'TTGGTGACGC';
181 $trunc = $seq->trunc($fuzzy);
182 isa_ok $trunc, 'Bio::PrimarySeqI';
183 is $trunc->seq(), 'GGTGGC';
185 my $rev = $seq->revcom();
186 isa_ok $rev, 'Bio::PrimarySeqI';
188 is $rev->seq(), 'AGTTGACGCCACCAA'
189 or diag( 'revcom() failed, was ' . $rev->seq() );
191 is $rev->display_id, 'new-id';
192 is $rev->display_name(), 'new-id';
193 is $rev->accession_number(), 'X677667';
194 is $rev->alphabet, 'dna';
195 is $rev->description, 'Sample Bio::Seq object';
196 is $rev->is_circular(), 0;
197 is $rev->version, 47;
198 is $rev->authority, 'bioperl.org';
199 is $rev->namespace, 't';
200 is $rev->namespace_string(), 't:X677667.47';
206 my $aa = $seq->translate(); # TTG GTG GCG TCA ACT
207 is $aa->seq, 'LVAST', "Translation: " . $aa->seq;
209 # tests for non-standard initiator codon coding for
210 # M by making translate() look for an initiator codon and
211 # terminator codon ("complete", the 5th argument below)
212 $seq->seq('TTGGTGGCGTCAACTTAA'); # TTG GTG GCG TCA ACT TAA
213 $aa = $seq->translate( undef, undef, undef, undef, 1 );
214 is $aa->seq, 'MVAST', "Translation: " . $aa->seq;
216 # same test as previous, but using named parameter
217 $aa = $seq->translate( -complete => 1 );
218 is $aa->seq, 'MVAST', "Translation: " . $aa->seq;
220 # find ORF, ignore codons outside the ORF or CDS
221 $seq->seq('TTTTATGGTGGCGTCAACTTAATTT'); # ATG GTG GCG TCA ACT
222 $aa = $seq->translate( -orf => 1 );
223 is $aa->seq, 'MVAST*', "Translation: " . $aa->seq;
225 # smallest possible ORF
226 $seq->seq("ggggggatgtagcccc"); # atg tga
227 $aa = $seq->translate( -orf => 1 );
228 is $aa->seq, 'M*', "Translation: " . $aa->seq;
230 # same as previous but complete, so * is removed
231 $aa = $seq->translate(
235 is $aa->seq, 'M', "Translation: " . $aa->seq;
237 # ORF without termination codon
238 # should warn, let's change it into throw for testing
240 $seq->seq("ggggggatgtggcccc"); # atg tgg ccc
241 eval { $seq->translate( -orf => 1 ); };
242 like( $@, qr/\batgtggccc\b/i );
244 $aa = $seq->translate( -orf => 1 );
245 is $aa->seq, 'MWP', "Translation: MWP";
248 # use non-standard codon table where terminator is read as Q
249 $seq->seq('ATGGTGGCGTCAACTTAG'); # ATG GTG GCG TCA ACT TAG
250 $aa = $seq->translate( -codontable_id => 6 );
251 is $aa->seq, 'MVASTQ' or diag( "Translation: " . $aa->seq );
253 # insert an odd character instead of terminating with *
254 $aa = $seq->translate( -terminator => 'X' );
255 is $aa->seq, 'MVASTX' or diag( "Translation: " . $aa->seq );
257 # change frame from default
258 $aa = $seq->translate( -frame => 1 ); # TGG TGG CGT CAA CTT AG
259 is $aa->seq, 'WWRQL' or diag( "Translation: " . $aa->seq );
261 $aa = $seq->translate( -frame => 2 ); # GGT GGC GTC AAC TTA G
262 is $aa->seq, 'GGVNL' or diag( "Translation: " . $aa->seq );
264 # TTG is initiator in Standard codon table? Afraid so.
265 $seq->seq("ggggggttgtagcccc"); # ttg tag
266 $aa = $seq->translate( -orf => 1 );
267 is $aa->seq, 'L*' or diag( "Translation: " . $aa->seq );
269 # Replace L at 1st position with M by setting complete to 1
270 $seq->seq("ggggggttgtagcccc"); # ttg tag
271 $aa = $seq->translate(
275 is $aa->seq, 'M' or diag( "Translation: " . $aa->seq );
277 # Ignore non-ATG initiators (e.g. TTG) in codon table
278 $seq->seq("ggggggttgatgtagcccc"); # atg tag
279 $aa = $seq->translate(
284 is $aa->seq, 'M' or diag( "Translation: " . $aa->seq );
286 # test for character '?' in the sequence string
287 is $seq->seq('TTGGTGGCG?CAACT'), 'TTGGTGGCG?CAACT';
289 # issue #105 - when there are starts and stops in both frame 0 and
290 # frame 1, frame 0 start < frame 1 start, then should return the frame
291 # 0 ORF per the pod ('the first orf') even if frame 1 stop < frame 0 stop
293 # Turn off warnings for a few tests
294 my $verbosity = $seq->verbose();
297 $seq->seq('ATGAATGTAAATAA');
298 $aa = $seq->translate( -orf => 1 );
299 my $aa0 = $seq->translate(-frame => 0);
301 is $aa->seq, $aa0->seq, "frame 0 start, frame 1 stop < frame 0 stop";
302 $seq->seq('AAATGAATGTAAATAA');
303 $aa = $seq->translate( -orf => 1, -frame=>1 );
304 my $aa2 = $seq->translate(-frame => 2);
305 is $aa->seq, $aa2->seq, "frame 1 start, frame 2 stop < frame 1 stop";
307 $seq->verbose($verbosity);
309 # test for some aliases
310 $seq = Bio::PrimarySeq->new(
312 -description => 'Alias desc'
314 is $seq->description, 'Alias desc';
315 is $seq->display_id, 'aliasid';
319 ok $seq->seq('actgx');
320 is $seq->alphabet, 'protein', 'Alphabet';
321 ok $seq->seq('actge');
322 is $seq->alphabet, 'protein';
323 ok $seq->seq('actgf');
324 is $seq->alphabet, 'protein';
325 ok $seq->seq('actgi');
326 is $seq->alphabet, 'protein';
327 ok $seq->seq('actgj');
328 is $seq->alphabet, 'protein';
329 ok $seq->seq('actgl');
330 is $seq->alphabet, 'protein';
331 ok $seq->seq('actgo');
332 is $seq->alphabet, 'protein';
333 ok $seq->seq('actgp');
334 is $seq->alphabet, 'protein';
335 ok $seq->seq('actgq');
336 is $seq->alphabet, 'protein';
337 ok $seq->seq('actgz');
338 is $seq->alphabet, 'protein';
339 ok $seq->seq('actgn');
340 is $seq->alphabet, 'dna';
341 ok $seq->seq('acugn');
342 is $seq->alphabet, 'rna';
343 ok $seq->seq('bdhkm');
344 is $seq->alphabet, 'protein';
345 ok $seq->seq('rsvwx');
346 is $seq->alphabet, 'protein';
347 ok $seq->seq('AAACTYAAAAGAATTGRCGG'); # valid degenerate DNA PCR primer sequence (90% ACGTN)
348 is $seq->alphabet, 'dna';
349 ok $seq->seq('AAACTYAAAKGAATTGRCGG'); # another primer previously detected as protein (85% ACGTN)
350 is $seq->alphabet, 'dna';
351 ok $seq->seq('YWACTYAAAKGARTTGRCGG'); # 70% ACGTNWSRM. Everything <= 70% is considered a protein
352 is $seq->alphabet, 'dna';
353 ok $seq->seq('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'); # Bug 2438
354 is $seq->alphabet, 'protein', 'Bug 2438';
355 ok $seq->seq('CAGTCXXXXXXXXXXXXXXXXXXXXXXXXXXXCAGCG');
356 is $seq->alphabet, 'protein';
357 ok $seq->seq('WTGGGGCTATGAAAAAAAAAWTTKMGMMAAAAAWTTWTKRWMRATC'); # showed up on MAKER list
358 is $seq->alphabet, 'dna';
360 ok $seq->seq('actgn', 'protein'); # accept specified alphabet, no matter what
361 is $seq->alphabet, 'protein';
362 ok $seq->seq('bdhkm', 'dna');
363 is $seq->alphabet, 'dna';
368 $seq = Bio::PrimarySeq->new( -display_id => 0, -seq => 'GATC' );
370 is $seq->display_id, 0, "Bug #2864";
372 # Test that the check for terminators inside the translated protein
373 # works when the terminator isn't '*':
375 $seq = Bio::PrimarySeq->new(-seq=>'ATGCTCTAAGCAGGGTAA'); # ML*AG*
376 eval { $aa = $seq->translate(-complete=>1, -throw=>1, -terminator=>'#') };
378 ok $error =~ /\QTerminator codon inside CDS!\E/, 'Terminator + inside sequence';
380 $seq = Bio::PrimarySeq->new(-seq=>'ATGCTCGCAGGGTAA'); # MLAG*
381 $aa = $seq->translate(-complete=>1, -throw=>1, -terminator=>'#');
386 ok $seq = Bio::PrimarySeq->new(), 'Length method';
388 ok $seq->length(123);
389 is $seq->length, 123;
391 ok $seq = Bio::PrimarySeq->new( -seq => 'ATGCTCTAAGCAGGGTAA' );
393 ok $seq->seq('ATGCTCTAAG');
395 is $seq->seq(undef), undef;
398 ok $seq = Bio::PrimarySeq->new( -length => 123 );
399 is $seq->length, 123;
401 ok $seq = Bio::PrimarySeq->new( -seq => 'ATGCTCTAAGCAGGGTAA' );
403 ok $seq->length( $seq->length ); # save memory by removing seq
404 is $seq->seq( undef ), undef; # ... but keeping a record of length
407 ok $seq->seq('ACGT');
408 is $seq->length, 4; # manually-specified length changed when sequence is changed
410 throws_ok { $seq->length(666); } qr/.+/; # Cannot lie about length
413 # Sequence validation method
414 is $seq->validate_seq( undef ), 1;
415 is $seq->validate_seq( '' ), 1;
416 is $seq->validate_seq( 'acgt' ), 1;
417 is $seq->validate_seq( 'ACGT' ), 1;
418 is $seq->validate_seq( 'XFRH' ), 1;
419 is $seq->validate_seq( '-~' ), 1; # gap symbols
420 is $seq->validate_seq( '-.*?=~' ), 1; # other valid symbols
421 is $seq->validate_seq( '0' ), 0;
422 is $seq->validate_seq( ' ' ), 0;
423 is $seq->validate_seq( 'AAAA$' ), 0;
424 is $seq->validate_seq( 'tt&t!' ), 0;
426 throws_ok { $seq->validate_seq('tt&t!', 1); } qr/.+/;
429 # Test direct option (no sequence validation)
430 throws_ok { $seq = Bio::PrimarySeq->new(-seq => 'A\T$AGQ+T'); } qr/.+/, 'Validation';
431 ok $seq = Bio::PrimarySeq->new( -seq => 'A\T$AGQ+T', -direct => 1 );
432 is $seq->seq, 'A\T$AGQ+T';
433 throws_ok { $seq->seq('NT@/') } qr/.+/;
435 # Set a sequence by reference
436 my $string = 'AAAACCCCGGGGTTTT';
437 ok $seq = Bio::PrimarySeq->new( -ref_to_seq => \$string );
438 is $seq->seq, 'AAAACCCCGGGGTTTT';
441 # Test internal PrimarySeqI _find_orfs function and translate( -orf => 'longest' )
445 ['TTTTATGGTGGCGTCAACTTAATTT',
449 #bigger test (this is a tomato unigene)
450 ['GAAGGCTGGTTCTGAGTTGGATCTATGTTTGATGAAGGGAAGTAGACCGGAGGTCTTGCATCAGCAATATTAGTACCAAATCCAGGTGGAGGCGCATCCTGTCTCCGTTGCATTTCAACTTTCATTTCAGCAATCTGTTGCATCAGTTGCATGATCAATTCATTCTGTTCCACTACAGTGGGCTGAGCGACCACAACGTCAGTAAGACGCCCTTCGTCATTGTTGTCTCCCATAACTGTTTTTCCTTTATCTGAATTTGATCGAGGGAAGGAATCTGTAGGACCTTTCGATCTGGTGAAGTAAGGATGATCTGCCAGCTTTATTGACACAGATCAGTAAAAAGGTACCTGAAAGGTAAAAACAACTCAAAGGCAAATTTGTTAGTGCATATCCAGAGTACAAAATGCTTAATATCGCACATAAAACCGATAAACACACAAGTCGTTTTGTTTGAGGATATCTTAACCCACGAATAAGGACGGATATATATTTTGAACAAACAGGAATTTGTTTGTTTGGCGTTATCTTGGGAAATCTG',
451 [[98,254,156,2],[347,476,129,2],[219,303,84,0],[16,73,57,1],[403,454,51,1],[310,358,48,1],[235,280,45,1],[491,536,45,2],[150,186,36,0],[507,537,30,0],[5,32,27,2],[511,538,27,1],[24,45,21,0],[305,326,21,2],[450,465,15,0]],
456 foreach my $test (@tests) {
457 my ($test_seq, $orfs) = @$test;
458 my @orfs = Bio::PrimarySeqI::_find_orfs_nucleotide(
461 Bio::Tools::CodonTable->new,
463 ); # ATG GTG GCG TCA ACT
464 is_deeply( \@orfs, $orfs, '_find_orfs 1')
465 or diag "for $test_seq, _find_orfs returned:\n"
466 .Dumper([map [@$_], @orfs]);
468 is_deeply( $orfs->[0],
469 (sort {$b->[2] <=> $a->[2]} @$orfs)[0],
470 'orfs are sorted by descending length'
473 # make sure we get the same sequence by taking the longest orf
474 # nucleotide from the test data and translating it, as by
475 # calling translate with -orf => 'longest'
478 ->new( -seq => $test_seq, -id => 'fake_id' )
479 ->translate( -orf => 'longest' )
483 ->new( -seq => substr( $test_seq, $orfs->[0][0], $orfs->[0][2] ),
488 'got correct -orf => "longest" seq',
494 # Extensive location and subsequence tests
495 ok $seq = Bio::PrimarySeq->new('-seq' => 'AAAAACCCCCGGGGGTTTTT',);
496 ok $seq->is_circular(1);
498 # NOTE: "_no_strand" variables tests the possibility that the user didn't set
499 # Strand for positive coordinates (or the object comes from
500 # Bio::Factory::FTLocationFactory->from_string)
503 # Coordinates: 1..5 => AAAAA
504 # Revcom: complement(1..5) => TTTTT
505 ok my $loc1_strand = Bio::Location::Simple->new('-start' => 1, '-end' => 5,'-strand' => 1);
506 ok my $loc1_no_strand = Bio::Location::Simple->new('-start' => 1, '-end' => 5);
507 is $seq->subseq($loc1_strand), 'AAAAA';
508 is $seq->subseq($loc1_no_strand), 'AAAAA';
509 is $loc1_strand->to_FTstring, '1..5';
510 is $loc1_no_strand->to_FTstring, '1..5';
511 $loc1_strand->flip_strand;
512 $loc1_no_strand->flip_strand;
513 is $seq->subseq($loc1_strand), 'TTTTT';
514 is $seq->subseq($loc1_no_strand), 'TTTTT';
515 is $loc1_strand->to_FTstring, 'complement(1..5)';
516 is $loc1_no_strand->to_FTstring, 'complement(1..5)';
517 is $loc1_strand->length, 5;
518 is $loc1_no_strand->length, 5;
520 # Basic split, both locations in positive strand
521 # Coords: join(6..10,16..20) => CCCCCTTTTT
522 # Revcom: complement(join(6..10,16..20)) => AAAAAGGGGG
523 ok my $loc2_strand = Bio::Location::Split->new();
524 ok my $loc2_no_strand = Bio::Location::Split->new();
525 ok $loc2_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10, '-strand' => 1) );
526 ok $loc2_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 16, '-end' => 20, '-strand' => 1) );
527 ok $loc2_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10) );
528 ok $loc2_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 16, '-end' => 20) );
529 is $seq->subseq($loc2_strand), 'CCCCCTTTTT';
530 is $seq->subseq($loc2_no_strand), 'CCCCCTTTTT';
531 is $loc2_strand->to_FTstring, 'join(6..10,16..20)';
532 is $loc2_no_strand->to_FTstring, 'join(6..10,16..20)';
533 $loc2_strand->flip_strand;
534 $loc2_no_strand->flip_strand;
535 is $seq->subseq($loc2_strand), 'AAAAAGGGGG';
536 is $seq->subseq($loc2_no_strand), 'AAAAAGGGGG';
537 is $loc2_strand->to_FTstring, 'complement(join(6..10,16..20))';
538 is $loc2_no_strand->to_FTstring, 'complement(join(6..10,16..20))';
539 is $loc2_strand->length, 15;
540 is $loc2_no_strand->length, 15;
542 # Basic split, both locations in negative strand
543 # Coords: complement(join(6..10,16..20)) => AAAAAGGGGG
544 # Revcom: join(6..10,16..20) => CCCCCTTTTT
545 my $loc3_strand = Bio::Location::Split->new();
546 $loc3_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10, '-strand' => -1) );
547 $loc3_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 16, '-end' => 20, '-strand' => -1) );
548 is $seq->subseq($loc3_strand), 'AAAAAGGGGG';
549 is $loc3_strand->to_FTstring, 'complement(join(6..10,16..20))';
550 $loc3_strand->flip_strand;
551 is $seq->subseq($loc3_strand), 'CCCCCTTTTT';
552 is $loc3_strand->to_FTstring, 'join(6..10,16..20)';
553 is $loc3_strand->length, 15;
555 ## Cut by origin-split, same strand, single sequence that pass through origin
556 #Coords: join(16..20,1..2) => TTTTTAA
557 #Revcom: complement(join(16..20,1..2)) => TTAAAAA
558 my $loc4_strand = Bio::Location::Split->new();
559 my $loc4_no_strand = Bio::Location::Split->new();
560 $loc4_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 16, '-end' => 20, '-strand' => 1) );
561 $loc4_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 2, '-strand' => 1) );
562 $loc4_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 16, '-end' => 20) );
563 $loc4_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 2) );
564 is $seq->subseq($loc4_strand), 'TTTTTAA';
565 is $seq->subseq($loc4_no_strand), 'TTTTTAA';
566 is $loc4_strand->to_FTstring, 'join(16..20,1..2)';
567 is $loc4_no_strand->to_FTstring, 'join(16..20,1..2)';
568 $loc4_strand->flip_strand;
569 $loc4_no_strand->flip_strand;
570 is $seq->subseq($loc4_strand), 'TTAAAAA';
571 is $seq->subseq($loc4_no_strand), 'TTAAAAA';
572 is $loc4_strand->to_FTstring, 'complement(join(16..20,1..2))';
573 is $loc4_no_strand->to_FTstring, 'complement(join(16..20,1..2))';
574 is $loc4_strand->length, 7;
575 is $loc4_no_strand->length, 7;
577 ## Cut by origin-combo split, same strand, 2 sequences with 1st passing through origin
578 #Coords: join(19..20,1..2,11..13) => TTAAGGG
579 #Revcom: complement(join(19..20,1..2,11..13)) => CCCTTAA
580 my $loc5_strand = Bio::Location::Split->new();
581 my $loc5_no_strand = Bio::Location::Split->new();
582 $loc5_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 19, '-end' => 20, '-strand' => 1) );
583 $loc5_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 2, '-strand' => 1) );
584 $loc5_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 11, '-end' => 13, '-strand' => 1) );
585 $loc5_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 19, '-end' => 20) );
586 $loc5_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 2) );
587 $loc5_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 11, '-end' => 13) );
588 is $seq->subseq($loc5_strand), 'TTAAGGG';
589 is $seq->subseq($loc5_no_strand), 'TTAAGGG';
590 is $loc5_strand->to_FTstring, 'join(19..20,1..2,11..13)';
591 is $loc5_no_strand->to_FTstring, 'join(19..20,1..2,11..13)';
592 $loc5_strand->flip_strand;
593 $loc5_no_strand->flip_strand;
594 is $seq->subseq($loc5_strand), 'CCCTTAA';
595 is $seq->subseq($loc5_no_strand), 'CCCTTAA';
596 is $loc5_strand->to_FTstring, 'complement(join(19..20,1..2,11..13))';
597 is $loc5_no_strand->to_FTstring, 'complement(join(19..20,1..2,11..13))';
598 is $loc5_strand->length, 15;
599 is $loc5_no_strand->length, 15;
601 ## Cut by origin-combo split, same strand, 2 sequences with 2nd passing through origin
602 #Coords: join(6..10,19..20,1..4) => CCCCCTTAAAA
603 #Revcom: complement(join(6..10,19..20,1..4)) => TTTTAAGGGGG
604 my $loc6_strand = Bio::Location::Split->new();
605 my $loc6_no_strand = Bio::Location::Split->new();
606 $loc6_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10, '-strand' => 1) );
607 $loc6_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 19, '-end' => 20, '-strand' => 1) );
608 $loc6_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 4, '-strand' => 1) );
609 $loc6_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10) );
610 $loc6_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 19, '-end' => 20) );
611 $loc6_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 4) );
612 is $seq->subseq($loc6_strand), 'CCCCCTTAAAA';
613 is $seq->subseq($loc6_no_strand), 'CCCCCTTAAAA';
614 is $loc6_strand->to_FTstring, 'join(6..10,19..20,1..4)';
615 is $loc6_no_strand->to_FTstring, 'join(6..10,19..20,1..4)';
616 $loc6_strand->flip_strand;
617 $loc6_no_strand->flip_strand;
618 is $seq->subseq($loc6_strand), 'TTTTAAGGGGG';
619 is $seq->subseq($loc6_no_strand), 'TTTTAAGGGGG';
620 is $loc6_strand->to_FTstring, 'complement(join(6..10,19..20,1..4))';
621 is $loc6_no_strand->to_FTstring, 'complement(join(6..10,19..20,1..4))';
622 is $loc6_strand->length, 19;
623 is $loc6_no_strand->length, 19;
625 ## Trans-splicing, 2 sequences in different strands, 2nd in complement
626 #Coords: join(6..10,complement(16..20)) => CCCCCAAAAA
627 #Revcom: join(16..20,complement(6..10)) => TTTTTGGGGG
628 my $loc7_strand = Bio::Location::Split->new();
629 my $loc7_no_strand = Bio::Location::Split->new();
630 $loc7_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10, '-strand' => 1) );
631 $loc7_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 16, '-end' => 20, '-strand' => -1) );
632 $loc7_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10) );
633 $loc7_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 16, '-end' => 20, '-strand' => -1) );
634 is $seq->subseq($loc7_strand), 'CCCCCAAAAA';
635 is $seq->subseq($loc7_no_strand), 'CCCCCAAAAA';
636 is $loc7_strand->to_FTstring, 'join(6..10,complement(16..20))';
637 is $loc7_no_strand->to_FTstring, 'join(6..10,complement(16..20))';
638 $loc7_strand->flip_strand;
639 $loc7_no_strand->flip_strand;
640 is $seq->subseq($loc7_strand), 'TTTTTGGGGG';
641 is $seq->subseq($loc7_no_strand), 'TTTTTGGGGG';
642 is $loc7_strand->to_FTstring, 'join(16..20,complement(6..10))';
643 is $loc7_no_strand->to_FTstring, 'join(16..20,complement(6..10))';
644 is $loc7_strand->length, 10;
645 is $loc7_no_strand->length, 10;
647 ## Trans-splicing, 2 sequences in different strands, 1st in complement
648 #Coords: join(complement(16..20),6..10) => AAAAACCCCC
649 #Revcom: join(complement(6..10),16..20) => GGGGGTTTTT
650 my $loc8_strand = Bio::Location::Split->new();
651 my $loc8_no_strand = Bio::Location::Split->new();
652 $loc8_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 16, '-end' => 20, '-strand' => -1) );
653 $loc8_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10, '-strand' => 1) );
654 $loc8_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 16, '-end' => 20, '-strand' => -1) );
655 $loc8_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10) );
656 is $seq->subseq($loc8_strand), 'AAAAACCCCC';
657 is $seq->subseq($loc8_no_strand), 'AAAAACCCCC';
658 is $loc8_strand->to_FTstring, 'join(complement(16..20),6..10)';
659 is $loc8_no_strand->to_FTstring, 'join(complement(16..20),6..10)';
660 $loc8_strand->flip_strand;
661 $loc8_no_strand->flip_strand;
662 is $seq->subseq($loc8_strand), 'GGGGGTTTTT';
663 is $seq->subseq($loc8_no_strand), 'GGGGGTTTTT';
664 is $loc8_strand->to_FTstring, 'join(complement(6..10),16..20)';
665 is $loc8_no_strand->to_FTstring, 'join(complement(6..10),16..20)';
666 is $loc8_strand->length, 10;
667 is $loc8_no_strand->length, 10;
669 ## Trans-splicing w/cut by origin, 2 sequences with 1st passing through origin, 2nd in complement
670 #Coords: join(19..20,1..3,complement(11..13)) => TTAAACCC
671 #Revcom: join(11..13,complement(1..3),complement(19..20)) => GGGTTTAA
672 my $loc9_strand = Bio::Location::Split->new();
673 my $loc9_no_strand = Bio::Location::Split->new();
674 $loc9_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 19, '-end' => 20, '-strand' => 1) );
675 $loc9_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 3, '-strand' => 1) );
676 $loc9_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 11, '-end' => 13, '-strand' => -1) );
677 $loc9_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 19, '-end' => 20) );
678 $loc9_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 3) );
679 $loc9_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 11, '-end' => 13, '-strand' => -1) );
680 is $seq->subseq($loc9_strand), 'TTAAACCC';
681 is $seq->subseq($loc9_no_strand), 'TTAAACCC';
682 is $loc9_strand->to_FTstring, 'join(19..20,1..3,complement(11..13))';
683 is $loc9_no_strand->to_FTstring, 'join(19..20,1..3,complement(11..13))';
684 $loc9_strand->flip_strand;
685 $loc9_no_strand->flip_strand;
686 is $seq->subseq($loc9_strand), 'GGGTTTAA';
687 is $seq->subseq($loc9_no_strand), 'GGGTTTAA';
688 is $loc9_strand->to_FTstring, 'join(11..13,complement(1..3),complement(19..20))';
689 is $loc9_no_strand->to_FTstring, 'join(11..13,complement(1..3),complement(19..20))';
690 is $loc9_strand->length, 8;
691 is $loc9_no_strand->length, 8;
693 ## Trans-splicing w/cut by origin, 2 sequences with 1st passing through origin, 1st in complement
694 #Coords: join(complement(1..3),complement(19..20),11..13) => TTTAAGGG
695 #Revcom: join(complement(11..13),19..20,1..3) => CCCTTAAA
696 my $loc10_strand = Bio::Location::Split->new();
697 my $loc10_no_strand = Bio::Location::Split->new();
698 $loc10_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 3, '-strand' => -1) );
699 $loc10_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 19, '-end' => 20, '-strand' => -1) );
700 $loc10_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 11, '-end' => 13, '-strand' => 1) );
701 $loc10_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 3, '-strand' => -1) );
702 $loc10_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 19, '-end' => 20, '-strand' => -1) );
703 $loc10_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 11, '-end' => 13) );
704 is $seq->subseq($loc10_strand), 'TTTAAGGG';
705 is $seq->subseq($loc10_no_strand), 'TTTAAGGG';
706 is $loc10_strand->to_FTstring, 'join(complement(1..3),complement(19..20),11..13)';
707 is $loc10_no_strand->to_FTstring, 'join(complement(1..3),complement(19..20),11..13)';
708 $loc10_strand->flip_strand;
709 $loc10_no_strand->flip_strand;
710 is $seq->subseq($loc10_strand), 'CCCTTAAA';
711 is $seq->subseq($loc10_no_strand), 'CCCTTAAA';
712 is $loc10_strand->to_FTstring, 'join(complement(11..13),19..20,1..3)';
713 is $loc10_no_strand->to_FTstring, 'join(complement(11..13),19..20,1..3)';
714 is $loc10_strand->length, 8;
715 is $loc10_no_strand->length, 8;
717 ## Trans-splicing w/cut by origin, 2 sequences with 2nd passing through origin, 2nd in complement
718 #Coords: join(6..10,complement(1..2),complement(18..20)) => CCCCCTTAAA
719 #Revcom: join(18..20,1..2,complement(6..10)) => TTTAAGGGGG
720 my $loc11_strand = Bio::Location::Split->new();
721 my $loc11_no_strand = Bio::Location::Split->new();
722 $loc11_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10, '-strand' => 1) );
723 $loc11_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 2, '-strand' => -1) );
724 $loc11_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 18, '-end' => 20, '-strand' => -1) );
725 $loc11_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10) );
726 $loc11_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 2, '-strand' => -1) );
727 $loc11_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 18, '-end' => 20, '-strand' => -1) );
728 is $seq->subseq($loc11_strand), 'CCCCCTTAAA';
729 is $seq->subseq($loc11_no_strand), 'CCCCCTTAAA';
730 is $loc11_strand->to_FTstring, 'join(6..10,complement(1..2),complement(18..20))';
731 is $loc11_no_strand->to_FTstring, 'join(6..10,complement(1..2),complement(18..20))';
732 $loc11_strand->flip_strand;
733 $loc11_no_strand->flip_strand;
734 is $seq->subseq($loc11_strand), 'TTTAAGGGGG';
735 is $seq->subseq($loc11_no_strand), 'TTTAAGGGGG';
736 is $loc11_strand->to_FTstring, 'join(18..20,1..2,complement(6..10))';
737 is $loc11_no_strand->to_FTstring, 'join(18..20,1..2,complement(6..10))';
738 is $loc11_strand->length, 10;
739 is $loc11_no_strand->length, 10;
741 ## Trans-splicing w/cut by origin, 2 sequences with 2nd passing through origin, 1st in complement
742 #Coords: join(complement(6..10),18..20,1..2) => GGGGGTTTAA
743 #Revcom: join(complement(1..2),complement(18..20),6..10) => TTAAACCCCC
744 my $loc12_strand = Bio::Location::Split->new();
745 my $loc12_no_strand = Bio::Location::Split->new();
746 $loc12_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10, '-strand' => -1) );
747 $loc12_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 18, '-end' => 20, '-strand' => 1) );
748 $loc12_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 2, '-strand' => 1) );
749 $loc12_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 6, '-end' => 10, '-strand' => -1) );
750 $loc12_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 18, '-end' => 20) );
751 $loc12_no_strand->add_sub_Location( Bio::Location::Simple->new('-start' => 1, '-end' => 2) );
752 is $seq->subseq($loc12_strand), 'GGGGGTTTAA';
753 is $seq->subseq($loc12_no_strand), 'GGGGGTTTAA';
754 is $loc12_strand->to_FTstring, 'join(complement(6..10),18..20,1..2)';
755 is $loc12_no_strand->to_FTstring, 'join(complement(6..10),18..20,1..2)';
756 $loc12_strand->flip_strand;
757 $loc12_no_strand->flip_strand;
758 is $seq->subseq($loc12_strand), 'TTAAACCCCC';
759 is $seq->subseq($loc12_no_strand), 'TTAAACCCCC';
760 is $loc12_strand->to_FTstring, 'join(complement(1..2),complement(18..20),6..10)';
761 is $loc12_no_strand->to_FTstring, 'join(complement(1..2),complement(18..20),6..10)';
762 is $loc12_strand->length, 10;
763 is $loc12_no_strand->length, 10;