1 # -*-Perl-*- Test Harness script for Bioperl
5 use constant TEST_COUNT => 116;
8 use lib '.','..','./t/lib';
11 test_begin(-tests => TEST_COUNT,
12 -requires_module => 'DB_File');
14 $ENV{ORACLE_HOME} ||= '/home/oracle/Home';
15 use_ok('Bio::SeqFeature::Generic');
16 use_ok('Bio::DB::SeqFeature::Store');
17 use_ok('Bio::DB::SeqFeature::Store::GFF3Loader');
18 use_ok('Bio::Root::IO');
19 use_ok('Bio::DB::Fasta');
23 my $DEBUG = test_debug();
25 my $gff_file = test_input_file('seqfeaturedb','test.gff3');
27 my (@f,$f,$f2,$sf1,$sf2,$sf3,@s,$s,$seq1,$seq2,$count,$new_features);
30 @args = (-adaptor => 'memory') unless @args;
33 my $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
34 skip "DB load failed? Skipping all! $@", (TEST_COUNT - 6) if $@;
37 is( $db->isa('Bio::SeqFeature::CollectionI'), 1 );
39 my $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db) };
40 skip "GFF3 loader failed? Skipping all! $@", (TEST_COUNT - 6) if $@;
45 # skip("skipping memory adaptor-specific tests",27)
46 # unless $db->isa('Bio::DB::SeqFeature::Store::memory');
50 my $n = Bio::SeqFeature::Generic->new(
51 # -primary_id => '_some_id', # you're not allowed to do this!!
52 -primary => 'repeat_123',
56 -display_name => 'My favorite feature'
58 ok( my $id = $db->add_features([$n]), 'adding a feature' );
59 ok( @f = $db->fetch($n->primary_id));
62 is( $f->primary_id, $n->primary_id);
64 $f2 = Bio::SeqFeature::Generic->new(
68 -primary => 'repeat_123', # -primary_tag is a synonym
69 -source_tag => 'repeatmasker',
70 -display_name => 'alu family',
74 sillytag => 'this is silly!' }
76 ok( $db->store($f2) , 'adding a feature with no primary_id' );
77 ok( $f2->primary_id );
79 # test fetching features
80 is( $db->fetch('-1'), undef, 'searching for a feature that shouldnt exist');
81 is( $db->get_features_by_type('repeat_123:repeatmasker'), 1, 'simple type' );
82 is( $db->get_features_by_type('repeat_123:'), 2, 'base type with colon' );
83 is( $db->get_features_by_type('repeat_123'), 2, 'base type alone' );
84 is( $db->get_features_by_type('rep.*'), 0, 'queried types are not interpolated' );
85 ok( @f = $db->types );
87 isa_ok($f[0], 'Bio::DB::GFF::Typename');
89 # test removing features
90 ok( $db->delete( $f ), 'feature deletion' );
91 is( $db->fetch( $f->primary_id ), undef );
94 ok( $db->store($f, $f2) );
96 # test adding seqfeatures
97 $sf1 = Bio::SeqFeature::Generic->new( -primary=>'seqfeat1', -start=>23, -end=>512 );
98 $sf2 = Bio::SeqFeature::Generic->new( -primary=>'seqfeat2', -start=>23, -end=>512 );
99 $sf3 = Bio::SeqFeature::Generic->new( -primary=>'seqfeat1', -start=>23, -end=>512, source_tag => 'dna' );
100 ok $db->add_features([$sf1, $sf2, $sf3]), 'adding subfeatures';
101 is $db->add_SeqFeature($f, $sf1), 1;
102 is $db->add_SeqFeature($f, $sf2, $sf3), 2;
103 is $db->add_SeqFeature($f, $sf1, $sf2, $sf3), 3;
105 # test fetching seqfeatures
106 is $db->fetch_SeqFeatures($f), 3;
107 is $db->fetch_SeqFeatures($f, 'seqfeat2'), 1;
108 is $db->fetch_SeqFeatures($f, 'seqfeat1:dna'), 1;
109 is $db->fetch_SeqFeatures($f, 'seqfeat1'), 2;
110 is $db->fetch_SeqFeatures($f, 'seqfeat1', 'seqfeat2'), 3;
111 is $db->fetch_SeqFeatures($f, 'seqfeat4'), 0;
113 $new_features = scalar $db->features;
116 # exercise the loader
117 ok($loader->load($gff_file));
119 # there should be one gene named 'abc-1'
120 @f = $db->get_features_by_name('abc-1');
124 # there should be three subfeatures of type "exon" and three of type "CDS"
125 is($f->get_SeqFeatures('exon'),3);
126 is($f->get_SeqFeatures('CDS'),3);
128 # the sequence of feature abc-1 should match the sequence of the first exon at the beginning
129 $seq1 = $f->seq->seq;
130 $seq2 = (sort {$a->start<=>$b->start} $f->get_SeqFeatures('exon'))[0]->seq->seq;
131 is(substr($seq1,0,length $seq2),$seq2);
133 # sequence lengths should match
134 is(length $seq1, $f->length);
136 # if we pull out abc-1 again we should get the same object
137 ($s) = $db->get_features_by_name('abc-1');
140 # test case-sensitivity
141 ($s) = $db->get_features_by_name('Abc-1');
142 is($s, $f, 'feature names should be case insensitive');
144 # we should get two objects when we ask for abc-1 using get_features_by_alias
145 # this also depends on selective subfeature indexing
146 @f = $db->get_features_by_alias('abc-1');
149 # the two features should be different
152 # test that targets are working
153 ($f) = $db->get_features_by_name('match1');
157 ok($s->seq_id eq 'CEESC13F');
158 $seq1 = $s->seq->seq;
159 is(substr($seq1,0,10), 'ttgcgttcgg');
161 # can we fetch subfeatures?
162 # gene3.a has the Index=1 attribute, so we should fetch it
163 ($f) = $db->get_features_by_name('gene3.a');
166 # gene 3.b doesn't have an index, so we shouldn't get it
167 ($f) = $db->get_features_by_name('gene3.b');
170 # test three-tiered genes
171 ($f) = $db->get_features_by_name('gene3');
173 my @transcripts = $f->get_SeqFeatures;
175 is($transcripts[0]->method,'mRNA');
176 is($transcripts[0]->source,'confirmed');
178 # test that exon #2 is shared between the two transcripts
179 my @exons1 = $transcripts[0]->get_SeqFeatures('CDS');
181 my @exons2 = $transcripts[1]->get_SeqFeatures('CDS');
182 my ($shared1) = grep {$_->display_name||'' eq 'shared_exon'} @exons1;
183 my ($shared2) = grep {$_->display_name||'' eq 'shared_exon'} @exons2;
184 ok($shared1 && $shared2);
185 is($shared1, $shared2);
186 is($shared1->primary_id, $shared2->primary_id);
189 is($shared1->phase, 0);
190 is($shared1->strand, +1);
191 is(($f->attributes('expressed'))[0], 'yes');
194 is (scalar $db->get_features_by_type('transcript'), 4, 'base type');
195 is (scalar $db->get_features_by_type('transcript:confirmed'), 2, 'base:source type');
198 my ($gene3a) = grep { $_->display_name eq 'gene3.a'} @transcripts;
199 my ($gene3b) = grep { $_->display_name eq 'gene3.b'} @transcripts;
202 ok($gene3a->Is_expressed);
203 ok(!$gene3b->Is_expressed);
205 # the representation of the 3'-UTR in the two transcripts a and b is
206 # different (not recommended but supported by the GFF3 spec). In the
207 # first case, there are two 3'UTRs existing as independent
208 # features. In the second, there is one UTR with a split location.
209 is($gene3a->Three_prime_UTR, 2);
210 is($gene3b->Three_prime_UTR, 1);
211 my ($utr) = $gene3b->Three_prime_UTR;
212 is($utr->segments, 2);
213 my $location = $utr->location;
214 isa_ok($location, 'Bio::Location::Split');
215 is($location->sub_Location,2);
217 # ok, test that queries are working properly.
218 # find all features with the attribute "expressed"
219 @f = $db->get_features_by_attribute({expressed=>'yes'});
222 # find all top-level features on Contig3 -- there should be two
223 @f = $db->get_features_by_location(-seq_id=>'Contig3');
226 # find all top-level features on Contig3 that overlap a range -- only one
227 @f = $db->get_features_by_location(-seq_id=>'Contig3',-start=>40000,-end=>50000);
230 # find all top-level features on Contig3 of type 'assembly_component'
231 @f = $db->features(-seq_id=>'Contig3',-type=>'assembly_component');
236 is(scalar @f, 27+$new_features);
238 my $i = $db->get_seq_stream;
241 my $feature_count = @f;
242 while ($i->next_seq) { $count++ }
243 is($feature_count,$count);
245 # regression test on bug in which get_SeqFeatures('type') did not filter inline segments
246 @f = $db->get_features_by_name('agt830.3');
247 ok(@f && !$f[0]->get_SeqFeatures('exon'));
248 ok(@f && $f[0]->get_SeqFeatures('EST_match'));
250 # regression test on bug in which the load_id disappeared
251 is(@f && $f[0]->load_id, 'Match2');
253 # regress on proper handling of multiple ID features
254 my ($alignment) = $db->get_features_by_name('agt830.5');
256 is($alignment->target->start,1);
257 is($alignment->target->end, 654);
258 is($alignment->get_SeqFeatures, 2);
259 my $gff3 = $alignment->gff3_string(1);
260 my @lines = split "\n",$gff3;
262 ok("@lines" !~ /Parent=/s);
263 ok("@lines" =~ /ID=/s);
265 # regress on multiple parentage
266 my ($gp) = $db->get_features_by_name('gparent1');
267 my ($p1,$p2) = $gp->get_SeqFeatures;
268 my @c = sort {$a->start<=>$b->start} $p1->get_SeqFeatures;
272 @c = sort {$a->start<=>$b->start} $p2->get_SeqFeatures;
278 test_skip(-tests => 2, -excludes_os => 'mswin');
280 if (my $child = open(F,"-|")) { # parent reads from child
281 cmp_ok(scalar <F>,'>',0);
283 # The challenge is to make sure that the handle
284 # still works in the parent!
285 my @f = $db->features();
286 cmp_ok(scalar @f,'>',0);
290 my @f = $db->features();
291 my $feature_count = @f;
292 print $feature_count;
298 # test the -ignore_seqregion flag
300 # the original should have a single feature named 'Contig1'
301 my @f = $db->get_features_by_name('Contig1');
304 $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
305 $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db,
306 -ignore_seqregion=>1)
308 $loader->load($gff_file);
309 @f = $db->get_features_by_name('Contig1');
312 # test keyword search
313 my @results = $db->search_notes('interesting');
314 is(scalar @results,2,'keyword search; 1 term');
316 @results = $db->search_notes('terribly interesting');
317 is(scalar @results,2,'keyword search; 2 terms');
319 # test our ability to substitute a FASTA file for the database
320 my $fasta_dir = make_fasta_testdir();
321 my $dbfa = Bio::DB::Fasta->new($fasta_dir, -reindex => 1);
324 ok(my $contig1=$dbfa->seq('Contig1'));
326 $db = Bio::DB::SeqFeature::Store->new(@args,-fasta=>$dbfa);
327 $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db);
328 ok($loader->load($gff_file));
330 ok($db->dna_accessor);
331 my $f = $db->segment('Contig1');
332 ok($f->dna eq $contig1);
334 ok(my $contig2 = $dbfa->seq('Contig2'));
335 ($f) = $db->get_feature_by_name('match4');
336 my $length = $f->length;
337 ok(substr($contig2,0,$length) eq $f->dna);
339 # DESTROY for $dbfa sometimes is not being called at script end,
340 # so call it explicitly to close temporal filehandles
341 # and allow their deletion
344 # Remove temporal database file used for SQLite tests
345 if ($db->isa('Bio::DB::SeqFeature::Store::DBI::SQLite')) {
347 unlink $db->{dbh_file};
351 # testing namespaces for mysql and Pg adaptor
356 for (my $i=0; $i < @args; $i++) {
357 if ($args[$i] eq '-adaptor') {
358 $adaptor = $args[$i+1];
363 skip "Namespaces only supported for DBI::mysql and DBI::Pg adaptors", 6, if ($adaptor ne 'DBI::mysql' && $adaptor ne 'DBI::Pg');
365 push(@args, ('-namespace', 'bioperl_seqfeature_t_test_schema'));
366 $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
369 $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db) };
372 $loader->load($gff_file);
374 # there should be one gene named 'abc-1'
375 ok( @f = $db->get_features_by_name('abc-1') );
379 # there should be three subfeatures of type "exon" and three of type "CDS"
380 is($f->get_SeqFeatures('exon'),3);
381 is($f->get_SeqFeatures('CDS'),3);
383 $db->remove_namespace();
387 sub make_fasta_testdir {
388 # this obfuscation is to deal with lockfiles by GDBM_File which can
389 # only be created on local filesystems apparently so will cause test
390 # to block and then fail when the testdir is on an NFS mounted system
391 my $io = Bio::Root::IO->new(-verbose => $DEBUG);
392 my $tempdir = test_output_dir();
393 my $test_dbdir = $io->catfile($tempdir, 'dbfa');
394 mkdir($test_dbdir); # make the directory
395 my $indir = test_input_file('dbfa');
396 opendir(INDIR,$indir) || die("cannot open dir $indir");
397 # effectively do a cp -r but only copy the files that are in there, no subdirs
398 for my $file ( map { $io->catfile($indir,$_) } readdir(INDIR) ) {
399 next unless (-f $file );
400 copy($file, $test_dbdir);