Merge pull request #42 from solgenomics/topic/duplicate_image_warning
[cxgn-corelibs.git] / bin / bdb_update_blast_dbs.pl
blob4e568bcd210340cca85f6e02c9f4bddb6e6986f3
1 #!/usr/bin/env perl
3 # NOTE: This script is deprecated.
4 # The current version has bin moved to the sgn/ repo
5 # as bin/bdb_update_blast_dbs.pl
8 use strict;
9 use warnings;
10 use English;
11 use Carp;
12 #$Carp::Verbose = 1;
13 use FindBin;
14 use Getopt::Std;
16 #use Data::Dumper;
18 use File::Spec;
19 use File::Temp qw/tempfile/;
21 use CXGN::Tools::Wget qw/wget_filter/;
22 use CXGN::BlastDB;
24 sub usage {
25 my $message = shift || '';
26 $message = "Error: $message\n" if $message;
28 my $file_bases = join '', sort map ' '.$_->file_base."\n", CXGN::BlastDB->retrieve_all;
30 die <<EOU;
31 $message
32 Usage:
34 Do not use this script. It is deprecated.
35 Use cxgn/sgn/bin/bdb_update_blast_dbs.pl instead.
37 $FindBin::Script [ options ] -d <path>
39 Go over all the BLAST databases we keep in stock and update them if
40 needed. When run with just the -g option, goes over all the BLAST
41 dbs listed in the sgn.blast_db table and updates them if needed,
42 putting them under the top-level BLAST db path given with the -d
43 option.
45 Options:
47 -d <path> required. path where all blast DB files are expected to go.
49 -t <path> path to put tempfiles. must be writable. Defaults to /tmp.
51 -x dry run, just print what you would update
53 -f <db name> force-update the DB with the given file base (e.g. 'genbank/nr')
55 Current list of file_bases:
56 $file_bases
57 EOU
60 our %opt;
61 getopts('xt:d:f:',\%opt) or usage('invalid arguments');
62 $opt{t} ||= File::Spec->tmpdir;
64 #if a alternate blast dbs path was given, set it in the BlastDB
65 #object
66 $opt{d} or usage('-d option is required');
67 -d $opt{d} or usage("directory $opt{d} not found");
68 CXGN::BlastDB->dbpath($opt{d});
70 my @dbs = $opt{f} ? CXGN::BlastDB->search( file_base => $opt{f} )
71 : CXGN::BlastDB->retrieve_all;
72 unless(@dbs) {
73 print $opt{f} ? "No database found with file_base='$opt{f}'.\n"
74 : "No dbs found in database.\n";
77 foreach my $db (@dbs) {
79 #check if the blast db needs an update
80 unless($opt{f} || $db->needs_update) {
81 print $db->file_base." is up to date.\n";
82 next;
85 #skip the DB if it does not have a source url defined
86 unless($db->source_url) {
87 warn $db->file_base." needs to be updated, but has no source_url. Skipped.\n";
88 next;
91 if( $opt{x} ) {
92 print "Would update ".$db->file_base." from source url ".$db->source_url."\n";
93 next;
94 } else {
95 print "Updating ".$db->file_base." from source url...\n";
98 eval {
99 # check whether we have permissions to do the format
100 if( my $perm_error = $db->check_format_permissions() ) {
101 die "Cannot format ".$db->file_base.":\n$perm_error";
104 #download the sequences from the source url to a tempfile
105 print "Downloading source (".$db->source_url.")...\n";
106 my (undef,$sourcefile) = tempfile('blastdb-source-XXXXXXXX',
107 DIR => $opt{t},
108 UNLINK => 1,
111 my $wget_opts = { cache => 0 };
112 $wget_opts->{gunzip} = 1 if $db->source_url =~ /\.gz$/i;
113 wget_filter( $db->source_url => $sourcefile, $wget_opts );
115 #formatdb it into the correct place
116 print "Formatting database...\n";
117 $db->format_from_file($sourcefile);
119 unlink $sourcefile or warn "$! unlinking tempfile '$sourcefile'";
121 print $db->file_base." done.\n";
122 }; if( $EVAL_ERROR ) {
123 print "Update failed for ".$db->file_base.":\n$EVAL_ERROR";