1 # -*-Perl-*- Test Harness script for Bioperl
9 test_begin(-tests => 364);
13 use_ok 'Bio::SeqFeature::Generic';
17 my ($feat, $str, $feat2, $pair, @sft);
19 my $DEBUG = test_debug();
21 ok $feat = Bio::SeqFeature::Generic->new(
26 is $feat->primary_tag, '';
27 is $feat->source_tag, '';
28 is $feat->display_name, '';
30 ok $feat = Bio::SeqFeature::Generic->new(
35 -source => 'internal',
36 -display_name => 'my exon feature',
43 is $feat->start, 40, 'Start of feature location';
44 is $feat->end, 80, 'End of feature location';
45 is $feat->primary_tag, 'exon', 'Primary tag';
46 is $feat->source_tag, 'internal', 'Source tag';
47 is $feat->display_name, 'my exon feature', 'Display name';
48 is $feat->phase, undef, 'Undef phase by default';
49 is $feat->phase(1), 1, 'Phase accessor returns';
50 is $feat->phase, 1, 'Phase is persistent';
52 ok $feat->gff_string();
54 ok $feat2 = Bio::SeqFeature::Generic->new(
59 -source => 'program_a',
66 is $feat2->phase, 1, 'Set phase from constructor';
69 # Test attaching a SeqFeature::Generic to a Bio::Seq or SeqFeature::Generic
71 # Make the parent sequence object
72 my $seq = Bio::Seq->new(
73 -seq => 'aaaaggggtttt',
74 -display_id => 'test',
79 ok my $sf1 = Bio::SeqFeature::Generic->new(
85 # Add the SeqFeature to the parent
86 ok $seq->add_SeqFeature($sf1);
88 # Test that it gives the correct sequence
89 is $sf1->start, 4, 'Start of first seqfeature';
90 is $sf1->end, 9, 'End of first seqfeature';
91 is $sf1->strand, 1, 'Strand of first seqfeature';
92 ok my $sf_seq1 = $sf1->seq;
93 is $sf_seq1->seq, 'aggggt', 'Sequence of first seqfeature';
95 # Make a second seqfeature on the opposite strand
96 ok my $sf2 = Bio::SeqFeature::Generic->new(
102 # Now add the PrimarySeq to the seqfeature before adding it to the parent
103 ok $sf2->attach_seq($seq->primary_seq);
104 ok $seq->add_SeqFeature($sf2);
106 # Test again that we have the correct sequence
107 is $sf2->start, 4, 'Start of second seqfeature';
108 is $sf2->end, 9, 'End of second seqfeature';
109 is $sf2->strand, -1, 'Strand of second seqfeature';
110 ok my $sf_seq2 = $sf2->seq;
111 is $sf_seq2->seq, 'acccct', 'Sequence of second seqfeature';
115 # Some tests for bug #947
117 ok my $sfeat = Bio::SeqFeature::Generic->new(-primary => 'test');
119 ok $sfeat->add_sub_SeqFeature(
120 Bio::SeqFeature::Generic->new(
128 ok $sfeat->add_sub_SeqFeature(
129 Bio::SeqFeature::Generic->new(
137 is $sfeat->start, 2, 'sfeat start for EXPAND-ED feature (bug #947)';
138 is $sfeat->end, 8, 'sfeat end for EXPAND-ED feature (bug #947)';
140 # Some tests to see if we can set a feature to start at 0
141 ok $sfeat = Bio::SeqFeature::Generic->new(-start => 0, -end => 0 );
143 ok defined $sfeat->start;
144 is $sfeat->start, 0, 'Can create feature starting and ending at 0';
145 ok defined $sfeat->end;
146 is $sfeat->end, 0, 'Can create feature starting and ending at 0';
149 # Test for bug when Locations are not created explicitly
151 ok my $feat1 = Bio::SeqFeature::Generic->new(
157 ok $feat2 = Bio::SeqFeature::Generic->new(
163 ok my $overlap = $feat1->location->union($feat2->location);
164 is $overlap->start, 1;
165 is $overlap->end, 25;
167 ok my $intersect = $feat1->location->intersection($feat2->location);
168 is $intersect->start, 10;
169 is $intersect->end, 15;
172 # Now let's test spliced_seq
173 ok my $seqio = Bio::SeqIO->new(
174 -file => test_input_file('AY095303S1.gbk'),
177 isa_ok $seqio, 'Bio::SeqIO';
178 ok my $geneseq = $seqio->next_seq;
179 isa_ok $geneseq, 'Bio::Seq';
180 ok my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
184 test_skip(-tests => 5,
185 -requires_modules => [qw(Bio::DB::GenBank)],
186 -requires_networking => 1);
188 use_ok 'Bio::DB::GenBank';
190 $db = Bio::DB::GenBank->new(-verbose=> $DEBUG);
192 my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
194 is $cdsseq->subseq(1,76),
195 'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC';
196 is $cdsseq->translate->subseq(1,100),
197 'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLK'.
198 'APRPTDAPSAIDDAPSTSGLGLGGGVASPR';
199 # Test what happens without
200 $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
201 is $cdsseq->subseq(1,76),
202 'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC';
203 is $cdsseq->translate->subseq(1,100),
204 'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLK'.
205 'APRPTDAPSAIDDAPSTSGLGLGGGVASPR';
208 ok $seqio = Bio::SeqIO->new(
209 -file => test_input_file('AF032047.gbk'),
212 isa_ok $seqio, 'Bio::SeqIO';
213 ok $geneseq = $seqio->next_seq;
214 isa_ok $geneseq, 'Bio::Seq';
215 ok( ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures );
217 test_skip(-tests => 2,
218 -requires_modules => ['Bio::DB::GenBank', 'LWP::Protocol::https'],
219 -requires_networking => 1);
221 my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
222 is $cdsseq->subseq(1,70),
223 'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTGTCTGGCCTGGAGGCTATCCAGCATG';
224 is $cdsseq->translate->seq,
225 'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFLNCYVSGFHPSDIEVDLLKNGKKIEKVE'.
226 'HSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*';
232 ok $seqio = Bio::SeqIO->new(
233 -format => 'genbank',
234 -file => test_input_file('NC_001284.gbk')
236 isa_ok $seqio, 'Bio::SeqIO';
237 ok my $genome = $seqio->next_seq;
239 for my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
240 ok my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
241 chop $spliced; # remove stop codon
242 is $spliced, ($cds->get_tag_values('translation'))[0], 'spliced_seq translation matches expected';
246 ok my $seq = Bio::SeqIO->new(
248 -file => test_input_file('sbay_c127.fas')
251 ok my $sf = Bio::SeqFeature::Generic->new(
256 -primary => 'splicedgene'
259 ok $sf->attach_seq($seq);
262 'TTCAATGACT' => 'FNDFYSMGKS',
263 'TCAATGACTT' => 'SMTSIPWVNQ',
264 'GTTCAATGAC' => 'VQ*LLFHG*I',
267 for my $phase (-1..3) {
268 ok my $sfseq = $sf->spliced_seq(-phase => $phase);
269 ok exists $phase_check{$sfseq->subseq(1,10)};
270 is $sfseq->translate->subseq(1,10), $phase_check{$sfseq->subseq(1,10)}, 'phase check';
274 ok $sf->add_tag_value('note','n1');
275 ok $sf->add_tag_value('note','n2');
276 ok $sf->add_tag_value('comment','c1');
277 is_deeply [sort $sf->get_all_tags()], [sort qw(note comment)] , 'Tags found';
278 is_deeply [sort $sf->get_tagset_values('note')], [sort qw(n1 n2)] , 'get_tagset_values tag values found';
279 is_deeply [sort $sf->get_tagset_values(qw(note comment))], [sort qw(c1 n1 n2)] , 'get_tagset_values tag values for multiple tags found';
281 is_deeply [sort $sf->get_tag_values('note')], [sort qw(n1 n2)] , 'get_tag_values tag values found';
282 } 'get_tag_values lives with tag';
284 is_deeply [$sf->get_tagset_values('notag') ], [], 'get_tagset_values no tag values found';
285 } 'get_tagset_values lives with no tag';
286 throws_ok { $sf->get_tag_values('notag') } qr/tag value that does not exist/, 'get_tag_values throws with no tag';
288 # Circular sequence SeqFeature tests
289 $seq = Bio::SeqIO->new(
290 -format => 'genbank',
291 -file => test_input_file('PX1CG.gb')
294 ok $seq->is_circular, 'Phi-X174 genome is circular';
296 # Retrieving the spliced sequence from any split location requires spliced_seq()
300 'A' => [3981, 136, 1, 1542, 'join(3981..5386,1..136)', 'ATGGTTCGTT'],
301 'A*' => [4497, 136, 1, 1026, 'join(4497..5386,1..136)', 'ATGAAATCGC'],
302 'B' => [5075, 51, 1, 363, 'join(5075..5386,1..51)', 'ATGGAACAAC'],
305 ok my @split_sfs = grep {
306 $_->location->isa('Bio::Location::SplitLocationI')
307 } $seq->get_SeqFeatures();
309 is @split_sfs, 3, 'Only 3 split locations';
311 for my $sf (@split_sfs) {
312 isa_ok $sf->location, 'Bio::Location::SplitLocationI';
313 ok my ($tag) = $sf->get_tag_values('product');
314 my ($start, $end, $strand, $length, $ftstring, $first_ten) = @{$sf_data{$tag}};
316 is $sf->location->to_FTstring, $ftstring, 'Feature string';
317 is $sf->spliced_seq->subseq(1,10), $first_ten, 'First ten nucleotides';
318 is $sf->strand, $strand, 'Strand';
319 is $sf->start, $start, 'Start';
320 is $sf->end, $end, 'End';
321 is $sf->length, $length, 'Expected length';
324 # spliced_seq() on the reverse strand, bug #88 (github)
325 $seq = Bio::SeqIO->new( -file => test_input_file('AF222649-rc.gbk') )->next_seq;
326 # All should start with "ATG"
327 for my $feat ( $seq->get_SeqFeatures('CDS') ) {
328 ok $feat->spliced_seq->seq =~ /^ATG/, "Reverse strand is spliced correctly";