Merge branch 'master' into topic/analyze_phenotypes_page
[sgn.git] / bin / bdb_update_blast_dbs.pl
blob6368d9bacddf70d3e172dbdc4a4b93f7c7a8735b
1 #!/usr/bin/env perl
3 use strict;
4 use warnings;
5 use English;
6 use Carp;
7 #$Carp::Verbose = 1;
8 use FindBin;
9 use Getopt::Std;
10 use Data::Dumper;
11 use File::Spec;
12 use File::Temp qw/tempfile/;
14 use CXGN::Tools::Wget qw/wget_filter/;
15 use SGN::Schema;
16 use CXGN::Blast;
17 use CXGN::DB::InsertDBH;
19 sub usage {
20 my $sgn_schema = shift;
21 my $message = shift || '';
22 $message = "Error: $message\n" if $message;
24 # my $file_bases = join '', sort map ' '.$_->file_base."\n", CXGN::BlastDB->retrieve_all($sgn_schema, $opt{d});
26 die <<EOU;
27 $message
28 Usage:
29 $FindBin::Script [ options ] -d <path>
31 Go over all the BLAST databases we keep in stock and update them if
32 needed. When run with just the -g option, goes over all the BLAST
33 dbs listed in the sgn.blast_db table and updates them if needed,
34 putting them under the top-level BLAST db path given with the -d
35 option.
37 Options:
39 -H <dbhost>
41 -D <dbname>
43 -p <password> (if not supplied, will prompt)
45 -U <dbuser> (if -p option is supplied)
47 -d <path> required. path where all blast DB files are expected to go.
49 -t <path> path to put tempfiles. must be writable. Defaults to /tmp.
51 -x dry run, just print what you would update
53 -f <db name> force-update the DB with the given file base (e.g. 'genbank/nr')
55 Current list of file_bases:
57 EOU
61 our %opt;
62 getopts('xt:d:f:H:D:p:U:h',\%opt) or die "Invalid arguments";
64 $opt{t} ||= File::Spec->tmpdir;
66 print STDERR "Connecting to database... $opt{H} $opt{D}\n";
68 my $dbh;
70 if (!$opt{p}) {
71 $dbh = CXGN::DB::InsertDBH->new( { dbhost => $opt{H}, dbname => $opt{D} });
73 else {
74 $dbh = CXGN::DB::Connection->new( { dbhost => $opt{H}, dbname => $opt{D}, dbpass => $opt{p}, dbuser => $opt{U} });
77 print STDERR "Creating schema object...\n";
78 my $sgn_schema = SGN::Schema->connect( sub{ $dbh->get_actual_dbh() });
80 if ($opt{h}) { usage($sgn_schema); exit(); }
82 #if a alternate blast dbs path was given, set it in the BlastDB
83 #object
84 $opt{d} or usage($sgn_schema, '-d option is required');
85 -d $opt{d} or usage($sgn_schema, "directory $opt{d} not found");
87 my $bdbs = CXGN::Blast->new( sgn_schema => $sgn_schema, dbpath => $opt{d} );
89 my @dbs = $opt{f} ? CXGN::Blast->search( $sgn_schema, $opt{d}, file_base => $opt{f} )
90 : CXGN::Blast->retrieve_all($sgn_schema, $opt{d});
91 unless(@dbs) {
92 print $opt{f} ? "No database found with file_base='$opt{f}'.\n"
93 : "No dbs found in database.\n";
96 foreach my $db (@dbs) {
98 print STDERR "Processing database ".$db->title()."\n";
100 #check if the blast db needs an update
101 unless($opt{f} || $db->needs_update) {
102 print $db->file_base." is up to date.\n";
103 next;
106 print STDERR "checking source url..\n";
107 #skip the DB if it does not have a source url defined
108 unless($db->source_url) {
109 warn $db->file_base." needs to be updated, but has no source_url. Skipped.\n";
110 next;
113 if( $opt{x} ) {
114 print "Would update ".$db->file_base." from source url ".$db->source_url."\n";
115 next;
116 } else {
117 print "Updating ".$db->file_base." from source url...\n";
120 eval {
122 print STDERR "Checking permissions...\n";
124 # check whether we have permissions to do the format
125 if( my $perm_error = $db->check_format_permissions() ) {
126 die "Cannot format ".$db->file_base.":\n$perm_error";
129 #download the sequences from the source url to a tempfile
130 print STDERR "Downloading source (".$db->source_url.")...\n";
131 my (undef,$sourcefile) = tempfile('blastdb-source-XXXXXXXX',
132 DIR => $opt{t},
133 UNLINK => 1,
136 my $wget_opts = { cache => 0 };
137 $wget_opts->{gunzip} = 1 if $db->source_url =~ /\.gz$/i;
138 wget_filter( $db->source_url => $sourcefile, $wget_opts );
140 #formatdb it into the correct place
141 print STDERR "Formatting database...";
142 $db->format_from_file($sourcefile);
144 unlink $sourcefile or warn "$! unlinking tempfile '$sourcefile'";
146 print $db->file_base." done.\n";
147 }; if( $EVAL_ERROR ) {
148 print "Update failed for ".$db->file_base.":\n$EVAL_ERROR";
153 $dbh->disconnect();