t/CXGN/blastdb.t

   1 #!/usr/bin/perl
   2
   3 use strict;
   4 use warnings;
   5 use English;
   6 use FindBin;
   7 use File::Spec;
   8 use File::Temp qw/tempdir/;
   9 use File::Copy;
  10 use File::Path;
  11 use File::Basename;
  12
  13 use Test::More;
  14
  15 use Bio::SeqIO;
  16
  17 use CXGN::DB::Connection;
  18 use CXGN::BlastDB;
  19
  20 use List::MoreUtils qw/all/;
  21 use CXGN::Publish qw/copy_or_print/;
  22
  23 BEGIN {
  24     eval { CXGN::BlastDB->retrieve_all };
  25     if ($@ =~ m/DBI connect/) {
  26         plan skip_all => "Could not connect to database";
  27     }
  28     die $@ if $@;
  29     plan tests => 52;
  30 }
  31 BEGIN { use_ok('CXGN::BlastDB'); }
  32
  33
  34 #get some dbs
  35 my @dbs = CXGN::BlastDB->retrieve_all;
  36 ok(@dbs > 0,'retrieve_all command exists, got some dbs');
  37
  38 my @uni = CXGN::BlastDB->search_ilike(title => '%univec%core');
  39 ok(@uni == 1, 'exactly one univec core blast db registered');
  40
  41 ok( (all { $_->list_files && 1 || 0 } @dbs),
  42     'all databases exist on disk');
  43
  44 ok( (all {my $c = $_->sequences_count; defined($c) && $c > 0} @dbs),
  45     'all databases have more than zero sequences in them');
  46
  47 #test needs_update
  48 my @update_results = map {
  49   my $u = $_->needs_update;
  50   #diag $_->file_base.' '.($u ? 'needs' : 'does NOT need')." updating\n";
  51   $u
  52 } @dbs;
  53
  54 ok( (all { $_ == 0 || $_ == 1 } @update_results),
  55     'needs_update returns valid values',
  56   );
  57
  58 #test list_files
  59 ok( (all { map {-f} $_->list_files } @dbs ),
  60     'list_files only returns files that actually exist',
  61   );
  62
  63 #test format_from_file
  64 my $testseqs = File::Spec->catfile($FindBin::Bin,'data','blastdb_test.nucleotide.seq');
  65 my $tempdir = tempdir(CLEANUP=>1);
  66 my $test_ffbn = File::Spec->catfile($tempdir,'test-cxgn-blastdb');
  67 my ($tester) = grep $_->index_seqs, @dbs;
  68 $tester or BAIL_OUT('cannot find any dbs with index_seqs set!');
  69 my $old_dbpath = CXGN::BlastDB->dbpath;
  70 CXGN::BlastDB->dbpath($tempdir);
  71 my $st = time;
  72 $tester->format_from_file($testseqs);
  73 my $et = time;
  74 ok( $tester->files_are_complete, 'format_from_file made test blast db ok' );
  75 ok( $tester->is_indexed, 'tester should be marked as indexed on disk');
  76 #diag "new testing copy has files:\n", map {"$_\n"} $tester->list_files;
  77 #test format_time
  78 cmp_ok($st,'<=',$tester->format_time+60,'format_time reasonable 1');
  79 cmp_ok($et,'>=', $tester->format_time-60,'format_time reasonable 2');
  80 cmp_ok($tester->format_time,'>',0,'format_time is not too small');
  81
  82 #test to_fasta
  83 my $tester_seqs = Bio::SeqIO->new( -fh => $tester->to_fasta, -format => 'fasta');
  84 my $orig_seqs = Bio::SeqIO->new(-file => $testseqs, -format => 'fasta');
  85 while(my $tseq = $tester_seqs->next_seq) {
  86   my $oseq = $orig_seqs->next_seq
  87     or last;
  88   same_seqs( $tseq, $oseq );
  89   same_seqs( $oseq, $tester->get_sequence( $oseq->id ) );
  90 };
  91 ok(! $orig_seqs->next_seq, "not more sequences in original than in tester filehandle");
  92 CXGN::BlastDB->dbpath($old_dbpath);
  93
  94 #test is_split
  95 my ($nr) = CXGN::BlastDB->search_like( file_base => '%nr' );
  96 isa_ok($nr,'CXGN::BlastDB');
  97 ok( $nr->is_split, 'nr is correctly detected as being split' );
  98 my ($uv) = @uni;
  99 isa_ok($uv,'CXGN::BlastDB');
 100 ok(! $uv->is_split, 'univec is correctly detected as being NOT split' );
 101
 102 #test files_are_complete
 103 ok( $nr->files_are_complete, 'nr has a complete set of files' );
 104 ok( $uv->files_are_complete, 'univec has a complete set of files' );
 105 #copy univec somewhere
 106 my $copydest = File::Spec->catdir( $tempdir, (fileparse($uv->file_base))[1]);
 107 foreach ( $uv->list_files ) {
 108   -d $copydest or mkpath([$copydest]) or die "could not make path $copydest";
 109   copy_or_print($_,$copydest) or die "could not copy $_ to $tempdir: $!";
 110 }
 111 CXGN::BlastDB->dbpath( $tempdir );
 112 ok( $uv->files_are_complete, 'copied univec has a complete set of files');
 113 #now delete a file and see if it notices
 114 if( my $goner = ($uv->list_files)[0] ) {
 115   unlink $goner or die "could not delete '$goner': $!";
 116 }
 117 ok( ! $uv->files_are_complete, 'deleted blast db file was noticed');
 118
 119 #test identifier_url
 120 like( $nr->identifier_url('foo'), qr/ncbi.nlm.nih.gov.+foo/, 'identifier_url works' );
 121
 122 # compares two Bio::PrimarySeqI objects - 5 tests
 123 sub same_seqs {
 124     my ($one, $two) = @_;
 125     isa_ok( $one, 'Bio::PrimarySeqI', 'seq object one' );
 126     isa_ok( $two, 'Bio::PrimarySeqI', 'seq object two' );
 127     is( $two->id, $one->id, $one->id.' id OK');
 128     is( $two->seq, $one->seq, $one->id.' seq OK');
 129     is( $two->description, $one->description, $one->id.' desc OK');
 130 }