fixed the pod for add_child. This function does not take any arguments. it generates...
[cxgn-corelibs.git] / t / CXGN / blastdb.t
blobff7d0d1bd38497a2964ae33d9b8603ea66f9626d
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use English;
6 use FindBin;
7 use File::Spec;
8 use File::Temp qw/tempdir/;
9 use File::Copy;
10 use File::Path;
11 use File::Basename;
13 use Test::More tests => 52;
15 use Bio::SeqIO;
17 use CXGN::DB::Connection;
19 use List::MoreUtils qw/all/;
20 use CXGN::Publish qw/copy_or_print/;
22 BEGIN { use_ok('CXGN::BlastDB'); }
24 #get some dbs
25 my @dbs = CXGN::BlastDB->retrieve_all;
26 ok(@dbs > 0,'retrieve_all command exists, got some dbs');
28 my @uni = CXGN::BlastDB->search_ilike(title => '%univec%core');
29 ok(@uni == 1, 'exactly one univec core blast db registered');
31 ok( (all { $_->list_files && 1 || 0 } @dbs),
32 'all databases exist on disk');
34 ok( (all {my $c = $_->sequences_count; defined($c) && $c > 0} @dbs),
35 'all databases have more than zero sequences in them');
37 #test needs_update
38 my @update_results = map {
39 my $u = $_->needs_update;
40 #diag $_->file_base.' '.($u ? 'needs' : 'does NOT need')." updating\n";
42 } @dbs;
44 ok( (all { $_ == 0 || $_ == 1 } @update_results),
45 'needs_update returns valid values',
48 #test list_files
49 ok( (all { map {-f} $_->list_files } @dbs ),
50 'list_files only returns files that actually exist',
53 #test format_from_file
54 my $testseqs = File::Spec->catfile($FindBin::Bin,'data','blastdb_test.nucleotide.seq');
55 my $tempdir = tempdir(CLEANUP=>1);
56 my $test_ffbn = File::Spec->catfile($tempdir,'test-cxgn-blastdb');
57 my ($tester) = grep $_->index_seqs, @dbs;
58 $tester or BAIL_OUT('cannot find any dbs with index_seqs set!');
59 my $old_dbpath = CXGN::BlastDB->dbpath;
60 CXGN::BlastDB->dbpath($tempdir);
61 my $st = time;
62 $tester->format_from_file($testseqs);
63 my $et = time;
64 ok( $tester->files_are_complete, 'format_from_file made test blast db ok' );
65 ok( $tester->is_indexed, 'tester should be marked as indexed on disk');
66 #diag "new testing copy has files:\n", map {"$_\n"} $tester->list_files;
67 #test format_time
68 cmp_ok($st,'<=',$tester->format_time+60,'format_time reasonable 1');
69 cmp_ok($et,'>=', $tester->format_time-60,'format_time reasonable 2');
70 cmp_ok($tester->format_time,'>',0,'format_time is not too small');
72 #test to_fasta
73 my $tester_seqs = Bio::SeqIO->new( -fh => $tester->to_fasta, -format => 'fasta');
74 my $orig_seqs = Bio::SeqIO->new(-file => $testseqs, -format => 'fasta');
75 while(my $tseq = $tester_seqs->next_seq) {
76 my $oseq = $orig_seqs->next_seq
77 or last;
78 same_seqs( $tseq, $oseq );
79 same_seqs( $oseq, $tester->get_sequence( $oseq->id ) );
81 ok(! $orig_seqs->next_seq, "not more sequences in original than in tester filehandle");
82 CXGN::BlastDB->dbpath($old_dbpath);
84 #test is_split
85 my ($nr) = CXGN::BlastDB->search_like( file_base => '%nr' );
86 isa_ok($nr,'CXGN::BlastDB');
87 ok( $nr->is_split, 'nr is correctly detected as being split' );
88 my ($uv) = @uni;
89 isa_ok($uv,'CXGN::BlastDB');
90 ok(! $uv->is_split, 'univec is correctly detected as being NOT split' );
92 #test files_are_complete
93 ok( $nr->files_are_complete, 'nr has a complete set of files' );
94 ok( $uv->files_are_complete, 'univec has a complete set of files' );
95 #copy univec somewhere
96 my $copydest = File::Spec->catdir( $tempdir, (fileparse($uv->file_base))[1]);
97 foreach ( $uv->list_files ) {
98 -d $copydest or mkpath([$copydest]) or die "could not make path $copydest";
99 copy_or_print($_,$copydest) or die "could not copy $_ to $tempdir: $!";
101 CXGN::BlastDB->dbpath( $tempdir );
102 ok( $uv->files_are_complete, 'copied univec has a complete set of files');
103 #now delete a file and see if it notices
104 if( my $goner = ($uv->list_files)[0] ) {
105 unlink $goner or die "could not delete '$goner': $!";
107 ok( ! $uv->files_are_complete, 'deleted blast db file was noticed');
109 #test identifier_url
110 like( $nr->identifier_url('foo'), qr/ncbi.nlm.nih.gov.+foo/, 'identifier_url works' );
112 # compares two Bio::PrimarySeqI objects - 5 tests
113 sub same_seqs {
114 my ($one, $two) = @_;
115 isa_ok( $one, 'Bio::PrimarySeqI', 'seq object one' );
116 isa_ok( $two, 'Bio::PrimarySeqI', 'seq object two' );
117 is( $two->id, $one->id, $one->id.' id OK');
118 is( $two->seq, $one->seq, $one->id.' seq OK');
119 is( $two->description, $one->description, $one->id.' desc OK');