Merge branch 'master' of github.com:solgenomics/cxgn-corelibs
[cxgn-corelibs.git] / t / CXGN / blastdb.t
blob68e0ba23bf78dff3924305bb8b153813eba54939
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use English;
6 use FindBin;
7 use File::Spec;
8 use File::Temp qw/tempdir/;
9 use File::Copy;
10 use File::Path;
11 use File::Basename;
13 use Test::More;
15 use Bio::SeqIO;
17 use CXGN::DB::Connection;
18 use CXGN::BlastDB;
20 use List::MoreUtils qw/all/;
21 use CXGN::Publish qw/copy_or_print/;
23 BEGIN {
24 eval { CXGN::BlastDB->retrieve_all };
25 if ($@ =~ m/DBI connect/) {
26 plan skip_all => "Could not connect to database";
28 die $@ if $@;
29 plan tests => 52;
31 BEGIN { use_ok('CXGN::BlastDB'); }
34 #get some dbs
35 my @dbs = CXGN::BlastDB->retrieve_all;
36 ok(@dbs > 0,'retrieve_all command exists, got some dbs');
38 my @uni = CXGN::BlastDB->search_ilike(title => '%univec%core');
39 ok(@uni == 1, 'exactly one univec core blast db registered');
41 ok( (all { $_->list_files && 1 || 0 } @dbs),
42 'all databases exist on disk');
44 ok( (all {my $c = $_->sequences_count; defined($c) && $c > 0} @dbs),
45 'all databases have more than zero sequences in them');
47 #test needs_update
48 my @update_results = map {
49 my $u = $_->needs_update;
50 #diag $_->file_base.' '.($u ? 'needs' : 'does NOT need')." updating\n";
52 } @dbs;
54 ok( (all { $_ == 0 || $_ == 1 } @update_results),
55 'needs_update returns valid values',
58 #test list_files
59 ok( (all { map {-f} $_->list_files } @dbs ),
60 'list_files only returns files that actually exist',
63 #test format_from_file
64 my $testseqs = File::Spec->catfile($FindBin::Bin,'data','blastdb_test.nucleotide.seq');
65 my $tempdir = tempdir(CLEANUP=>1);
66 my $test_ffbn = File::Spec->catfile($tempdir,'test-cxgn-blastdb');
67 my ($tester) = grep $_->index_seqs, @dbs;
68 $tester or BAIL_OUT('cannot find any dbs with index_seqs set!');
69 my $old_dbpath = CXGN::BlastDB->dbpath;
70 CXGN::BlastDB->dbpath($tempdir);
71 my $st = time;
72 $tester->format_from_file($testseqs);
73 my $et = time;
74 ok( $tester->files_are_complete, 'format_from_file made test blast db ok' );
75 ok( $tester->is_indexed, 'tester should be marked as indexed on disk');
76 #diag "new testing copy has files:\n", map {"$_\n"} $tester->list_files;
77 #test format_time
78 cmp_ok($st,'<=',$tester->format_time+60,'format_time reasonable 1');
79 cmp_ok($et,'>=', $tester->format_time-60,'format_time reasonable 2');
80 cmp_ok($tester->format_time,'>',0,'format_time is not too small');
82 #test to_fasta
83 my $tester_seqs = Bio::SeqIO->new( -fh => $tester->to_fasta, -format => 'fasta');
84 my $orig_seqs = Bio::SeqIO->new(-file => $testseqs, -format => 'fasta');
85 while(my $tseq = $tester_seqs->next_seq) {
86 my $oseq = $orig_seqs->next_seq
87 or last;
88 same_seqs( $tseq, $oseq );
89 same_seqs( $oseq, $tester->get_sequence( $oseq->id ) );
91 ok(! $orig_seqs->next_seq, "not more sequences in original than in tester filehandle");
92 CXGN::BlastDB->dbpath($old_dbpath);
94 #test is_split
95 my ($nr) = CXGN::BlastDB->search_like( file_base => '%nr' );
96 isa_ok($nr,'CXGN::BlastDB');
97 ok( $nr->is_split, 'nr is correctly detected as being split' );
98 my ($uv) = @uni;
99 isa_ok($uv,'CXGN::BlastDB');
100 ok(! $uv->is_split, 'univec is correctly detected as being NOT split' );
102 #test files_are_complete
103 ok( $nr->files_are_complete, 'nr has a complete set of files' );
104 ok( $uv->files_are_complete, 'univec has a complete set of files' );
105 #copy univec somewhere
106 my $copydest = File::Spec->catdir( $tempdir, (fileparse($uv->file_base))[1]);
107 foreach ( $uv->list_files ) {
108 -d $copydest or mkpath([$copydest]) or die "could not make path $copydest";
109 copy_or_print($_,$copydest) or die "could not copy $_ to $tempdir: $!";
111 CXGN::BlastDB->dbpath( $tempdir );
112 ok( $uv->files_are_complete, 'copied univec has a complete set of files');
113 #now delete a file and see if it notices
114 if( my $goner = ($uv->list_files)[0] ) {
115 unlink $goner or die "could not delete '$goner': $!";
117 ok( ! $uv->files_are_complete, 'deleted blast db file was noticed');
119 #test identifier_url
120 like( $nr->identifier_url('foo'), qr/ncbi.nlm.nih.gov.+foo/, 'identifier_url works' );
122 # compares two Bio::PrimarySeqI objects - 5 tests
123 sub same_seqs {
124 my ($one, $two) = @_;
125 isa_ok( $one, 'Bio::PrimarySeqI', 'seq object one' );
126 isa_ok( $two, 'Bio::PrimarySeqI', 'seq object two' );
127 is( $two->id, $one->id, $one->id.' id OK');
128 is( $two->seq, $one->seq, $one->id.' seq OK');
129 is( $two->description, $one->description, $one->id.' desc OK');