5 test_begin( -tests => 109,
6 -requires_modules => [qw(Bio::DB::Fasta Bio::SeqIO)] );
12 my $DEBUG = test_debug();
15 # Test Bio::DB::Fasta, but also the underlying module, Bio::DB::IndexedBase
17 my $test_dir = setup_temp_dir('dbfa');
18 my $test_file = test_input_file('dbfa', 'mixed_alphabet.fasta');
20 test_input_file('dbfa', 'mixed_alphabet.fasta'),
21 test_input_file('dbfa', '6.fa')
26 # Test basic functionalities
27 ok my $db = Bio::DB::Fasta->new($test_dir, -reindex => 1), 'Index a directory';
28 is $db->glob, '*.{fa,FA,fasta,FASTA,fast,FAST,dna,DNA,fna,FNA,faa,FAA,fsa,FSA}';
29 isa_ok $db, 'Bio::DB::Fasta';
30 is $db->length('CEESC13F'), 389;
31 is $db->seq('CEESC13F:1,10'), 'cttgcttgaa';
32 is $db->seq('CEESC13F:1-10'), 'cttgcttgaa';
33 is $db->seq('CEESC13F:1..10'), 'cttgcttgaa';
34 is $db->seq('CEESC13F:1..10/1'), 'cttgcttgaa';
35 is $db->seq('CEESC13F:1..10/+1'), 'cttgcttgaa';
36 is $db->seq('CEESC13F:1..10/-1'), 'ttcaagcaag';
37 is $db->seq('CEESC13F/1'), 'cttgcttgaaaaatttatataaatatttaagagaagaaaaataaataatcgcatctaatgacgtctgtccttgtatccctggtttccattgactggtgcactttcctgtctttgaggacatggacaatattcggcatcagttcctggctctccctcctctcctggtgctccagcagaaccgttctctccattatctcccttgtctccacgtggtccacgctctcctggtgctcctggaataccttgagctccctcgtgccgaattcctgcagcccgggggatccactagttctagagcggccgccaccgcggtgggagctccagcttttgttncctttagtgagggttaatttcgagcttggcgtaatcatggtcatagctgtttcctg';
38 is $db->seq('CEESC13F/-1'), 'caggaaacagctatgaccatgattacgccaagctcgaaattaaccctcactaaaggnaacaaaagctggagctcccaccgcggtggcggccgctctagaactagtggatcccccgggctgcaggaattcggcacgagggagctcaaggtattccaggagcaccaggagagcgtggaccacgtggagacaagggagataatggagagaacggttctgctggagcaccaggagaggagggagagccaggaactgatgccgaatattgtccatgtcctcaaagacaggaaagtgcaccagtcaatggaaaccagggatacaaggacagacgtcattagatgcgattatttatttttcttctcttaaatatttatataaatttttcaagcaag';
39 is $db->seq('AW057119', 1, 10), 'tcatgttggc';
40 is $db->seq('AW057119', 1, 10, 1), 'tcatgttggc';
41 is $db->seq('AW057119', 1, 10, -1), 'gccaacatga';
42 is $db->seq('AW057119', 10, 1), 'gccaacatga';
43 is $db->seq('AW057119', 10, 1, -1), 'tcatgttggc';
44 is $db->header('AW057119'), 'AW057119 test description';
45 is $db->seq('foobarbaz'), undef;
46 is $db->get_Seq_by_id('foobarbaz'), undef;
47 is $db->file('AW057119'), '1.fa';
48 is $db->file('AW057410'), '3.fa';
49 is $db->file('CEESC13F'), '6.fa';
51 # Bio::DB::RandomAccessI and Bio::DB::SeqI methods
52 ok my $primary_seq = $db->get_Seq_by_id('AW057119');
53 ok $primary_seq = $db->get_Seq_by_acc('AW057119');
54 ok $primary_seq = $db->get_Seq_by_version('AW057119');
55 ok $primary_seq = $db->get_Seq_by_primary_id('AW057119');
56 isa_ok $primary_seq, 'Bio::PrimarySeq::Fasta';
57 isa_ok $primary_seq, 'Bio::PrimarySeqI';
59 # Bio::PrimarySeqI methods
60 is $primary_seq->id, 'AW057119';
61 is $primary_seq->display_id, 'AW057119';
62 like $primary_seq->primary_id, qr/^Bio::PrimarySeq::Fasta=HASH/;
63 is $primary_seq->alphabet, 'dna';
64 is $primary_seq->accession_number, 'unknown';
65 is $primary_seq->is_circular, undef;
66 is $primary_seq->subseq(11, 20), 'ttctcggggt';
67 is $primary_seq->description, 'test description', 'bug 3126';
68 is $primary_seq->seq, 'tcatgttggcttctcggggtttttatggattaatacattttccaaacgattctttgcgccttctgtggtgccgccttctccgaaggaactgacgaaaaatgacgtggatttgctgacaaatccaggcgaggaatatttggacggattgatgaaatggcacggcgacgagcgacccgtgttcaaaagagaggacatttatcgttggtcggatagttttccagaatatcggctaagaatgatttgtctgaaagacacgacaagggtcattgcagtcggtcaatattgttactttgatgctctgaaagaaaggagagcagccattgttcttcttaggattgggatggacggatcctgaatatcgtaatcgggcagttatggagcttcaagcttcgatggcgctggaggagagggatcggtatccgactgccaacgcggcatcgcatccaaataagttcatgaaacgattttggcacatattcaacggcctcaaagagcacgaggacaaaggtcacaaggctgccgctgtttcatacaagagcttctacgacctcanagacatgatcattcctgaaaatctggatgtcagtggtattactgtaaatgatgcacgaaaggtgccacaaagagatataatcaactacgatcaaacatttcatccatatcatcgagaaatggttataatttctcacatgtatgacaatgatgggtttggaaaagtgcgtatgatgaggatggaaatgtacttggaattgtctagcgatgtctttanaccaacaagactgcacattagtcaattatgcagatagcc';
69 ok my $trunc = $primary_seq->trunc(11,20);
70 isa_ok $trunc, 'Bio::PrimarySeq::Fasta';
71 isa_ok $trunc, 'Bio::PrimarySeqI';
72 is $trunc->length, 10;
73 is $trunc->seq, 'ttctcggggt';
74 ok my $rev = $trunc->revcom;
75 isa_ok $rev, 'Bio::PrimarySeq::Fasta';
76 isa_ok $rev, 'Bio::PrimarySeqI';
77 is $rev->seq, 'accccgagaa';
83 # Re-open an existing index.
84 # Doing this test properly involves unloading and reloading Bio::DB::Fasta.
87 test_skip(-tests => 1, -requires_modules => [qw(Class::Unload)]);
88 use_ok('Class::Unload');
89 Class::Unload->unload( 'Bio::DB::Fasta' );
90 Class::Unload->unload( 'Bio::DB::IndexedBase' );
91 require Bio::DB::Fasta;
94 ok my $db = Bio::DB::Fasta->new($test_dir), 'Re-open an existing index';
95 is $db->seq('AW057119', 1, 10), 'tcatgttggc';
100 # Test tied hash access
102 ok tie(%h, 'Bio::DB::Fasta', $test_dir), 'Tied hash access';
103 ok exists $h{'AW057146'};
104 is $h{'AW057146:1,10'} , 'aatgtgtaca'; # in file 1.fa
105 is $h{'AW057146:10,1'} , 'tgtacacatt'; # reverse complement
106 is $h{'AW057443:11,20'}, 'gaaccgtcag'; # in file 4.fa
111 # Test writing the Bio::PrimarySeq::Fasta objects with SeqIO
112 ok my $db = Bio::DB::Fasta->new($test_dir, -reindex => 1), 'Writing with SeqIO';
113 my $out = Bio::SeqIO->new(
114 -format => 'genbank',
115 -file => '>'.test_output_file()
117 my $primary_seq = Bio::Seq->new(-primary_seq => $db->get_Seq_by_acc('AW057119'));
119 $out->write_seq($primary_seq)
123 $out = Bio::SeqIO->new(-format => 'embl', -file => '>'.test_output_file());
125 $out->write_seq($primary_seq)
132 # Test alphabet and reverse-complement RNA
133 ok my $db = Bio::DB::Fasta->new( $test_file, -reindex => 1), 'Index a single file';
134 is $db->alphabet('gi|352962132|ref|NG_030353.1|'), 'dna';
135 is $db->alphabet('gi|352962148|ref|NM_001251825.1|'), 'rna';
136 is $db->alphabet('gi|194473622|ref|NP_001123975.1|'), 'protein';
137 is $db->alphabet('gi|61679760|pdb|1Y4P|B'), 'protein';
138 is $db->alphabet('123'), '';
139 is $db->seq('gi|352962148|ref|NM_001251825.1|', 20, 29, 1), 'GUCAGCGUCC';
140 is $db->seq('gi|352962148|ref|NM_001251825.1|', 20, 29, -1), 'GGACGCUGAC';
142 # Test empty sequence
143 is $db->seq('123'), '';
145 is $db->file('gi|352962132|ref|NG_030353.1|'), 'mixed_alphabet.fasta';
151 ok my $db = Bio::DB::Fasta->new( $test_file, -reindex => 1);
152 ok my $stream = $db->get_PrimarySeq_stream;
153 isa_ok $stream, 'Bio::DB::Indexed::Stream';
156 # note use of modified iterator, needed b/c of overloading
157 while (defined(my $seq = $stream->next_seq)) {
163 # retrieve seq with ID of 0
164 my $seq = $db->get_Seq_by_id(0);
165 isa_ok $seq, 'Bio::PrimarySeq::Fasta';
166 is $seq->display_id, 0;
168 # ActivePerl will not allow deletion if the tie-hash is still active
170 # Strawberry Perl temporary file
171 unlink "$test_file.index" if -e "$test_file.index";
172 # ActivePerl temporary files
173 unlink "$test_file.index.dir" if -e "$test_file.index.dir";
174 unlink "$test_file.index.pag" if -e "$test_file.index.pag";
179 # Concurrent databases (bug #3390)
180 ok my $db1 = Bio::DB::Fasta->new( test_input_file('dbfa', '1.fa') );
181 ok my $db3 = Bio::DB::Fasta->new( test_input_file('dbfa', '3.fa') );
182 ok my $db4 = Bio::DB::Fasta->new( $test_dir );
183 ok my $db2 = Bio::DB::Fasta->new( test_input_file('dbfa', '2.fa') );
184 is $db4->file('AW057231'), '1.fa';
185 is $db2->file('AW057302'), '2.fa';
186 is $db4->file('AW057119'), '1.fa';
187 is $db3->file('AW057336'), '3.fa';
188 is $db1->file('AW057231'), '1.fa';
189 is $db4->file('AW057410'), '3.fa';
191 # ActivePerl will not allow deletion if the tie-hash is still active
195 # Strawberry Perl temporary file
196 unlink $db1->index_name if -e $db1->index_name;
197 unlink $db2->index_name if -e $db2->index_name;
198 unlink $db3->index_name if -e $db3->index_name;
199 # ActivePerl temporary files
200 unlink $db1->index_name().'.dir' if -e $db1->index_name().'.dir';
201 unlink $db2->index_name().'.dir' if -e $db2->index_name().'.dir';
202 unlink $db3->index_name().'.dir' if -e $db3->index_name().'.dir';
203 unlink $db1->index_name().'.pag' if -e $db1->index_name().'.pag';
204 unlink $db2->index_name().'.pag' if -e $db2->index_name().'.pag';
205 unlink $db3->index_name().'.pag' if -e $db3->index_name().'.pag';
210 # Test an arbitrary index filename and cleaning
211 my $name = 'arbitrary.idx';
212 ok my $db = Bio::DB::Fasta->new( $test_file,
213 -reindex => 1, -index_name => $name, -clean => 1,
215 is $db->index_name, $name;
217 # Tied-hash in Strawberry Perl produce $name,
218 # while in ActivePerl produce "$name.dir" and "$name.pag"
219 if (-e "$name.pag") {
221 # ActivePerl will not allow deletion if the tie-hash is still active
223 unlink "$name.dir" if -e "$name.dir";
224 unlink "$name.pag" if -e "$name.pag";
229 # ActivePerl will not allow deletion if the tie-hash is still active
231 unlink $name if -e $name;
240 ok my $db = Bio::DB::Fasta->new( $test_file,
241 -reindex => 1, -clean => 1, -makeid => \&extract_gi,
243 is_deeply [sort $db->get_all_primary_ids], ['', 194473622, 352962132, 352962148, 61679760];
244 is $db->get_Seq_by_id('gi|352962148|ref|NM_001251825.1|'), undef;
245 isa_ok $db->get_Seq_by_id(194473622), 'Bio::PrimarySeqI';
250 # Test makeid that generates several IDs, bug #3389
251 ok my $db = Bio::DB::Fasta->new( $test_file,
252 -reindex => 1, -clean => 1, -makeid => \&extract_gi_and_ref,
253 ), 'Make multiple IDs, bug #3389';
254 is_deeply [sort $db->get_all_primary_ids], ['', 194473622, 352962132, 352962148, 61679760, 'NG_030353.1', 'NM_001251825.1', 'NP_001123975.1'];
255 is $db->get_Seq_by_id('gi|352962148|ref|NM_001251825.1|'), undef;
256 isa_ok $db->get_Seq_by_id('NG_030353.1'), 'Bio::PrimarySeqI';
261 # Test opening set of files and test IDs
262 ok my $db = Bio::DB::Fasta->new( $test_files, -reindex => 1), 'Index a set of files';
265 my @ids = sort $db->get_all_primary_ids();
266 is_deeply \@ids, [ qw(
307 gi|194473622|ref|NP_001123975.1|
308 gi|352962132|ref|NG_030353.1|
309 gi|352962148|ref|NM_001251825.1|
310 gi|61679760|pdb|1Y4P|B
312 like $db->index_name, qr/^fileset_.+\.index$/;
314 my $index = $db->index_name;
315 # ActivePerl will not allow deletion if the tie-hash is still active
317 # Strawberry Perl temporary file
318 unlink $index if -e $index;
319 # ActivePerl temporary files
320 unlink "$index.dir" if -e "$index.dir";
321 unlink "$index.pag" if -e "$index.pag";
326 # Squash warnings locally
327 local $SIG{__WARN__} = sub {};
330 my $test_dir = setup_temp_dir('bad_dbfa');
331 throws_ok {my $db = Bio::DB::Fasta->new($test_dir, -reindex => 1)}
332 qr/FASTA header doesn't match/;
335 # Empty lines within a sequence is bad...
336 throws_ok {my $db = Bio::DB::Fasta->new(test_input_file('badfasta.fa'), -reindex => 1)}
337 qr/Blank lines can only precede header lines/;
343 # but empty lines preceding headers are okay, but let's check the seqs just in case
345 lives_ok {$db = Bio::DB::Fasta->new(test_input_file('spaced_fasta.fa'), -reindex => 1)};
346 is length($db->seq('CEESC39F')), 375, 'length is correct in sequences past spaces';
347 is length($db->seq('CEESC13F')), 389;
349 is $db->subseq('CEESC39F', 51, 60) , 'acatatganc', 'subseq is correct';
350 is $db->subseq('CEESC13F', 146, 155), 'ggctctccct', 'subseq is correct';
352 # Remove temporary test file
353 my $outfile = test_input_file('spaced_fasta.fa').'.index';
355 # ActivePerl will not allow deletion if the tie-hash is still active
357 # Strawberry Perl temporary file
358 unlink $outfile if -e $outfile;
359 # ActivePerl temporary files
360 unlink "$outfile.dir" if -e "$outfile.dir";
361 unlink "$outfile.pag" if -e "$outfile.pag";
368 # Extract GI from RefSeq
370 my ($id) = ($header =~ /gi\|(\d+)/m);
375 sub extract_gi_and_ref {
376 # Extract GI and from RefSeq
378 my ($gi) = ($header =~ /gi\|(\d+)/m);
380 my ($ref) = ($header =~ /ref\|([^|]+)/m);
387 # this obfuscation is to deal with lockfiles by GDBM_File which can
388 # only be created on local filesystems apparently so will cause test
389 # to block and then fail when the testdir is on an NFS mounted system
391 my $data_dir = shift;
393 my $io = Bio::Root::IO->new();
394 my $tempdir = test_output_dir();
395 my $test_dir = $io->catfile($tempdir, $data_dir);
396 mkdir($test_dir); # make the directory
397 my $indir = test_input_file($data_dir);
398 opendir(my $INDIR,$indir) || die("cannot open dir $indir");
399 # effectively do a cp -r but only copy the files that are in there, no subdirs
400 for my $file ( map { $io->catfile($indir,$_) } readdir($INDIR) ) {
401 next unless (-f $file );
402 copy($file, $test_dir);