Add empty line after package statement for $VERSION (for dzils's [PkgVersion])
[bioperl-live.git] / bin / bp_bioflat_index
blob1a8be87c08195cd097fe33b49cd57173ed34e55f
1 #!/usr/bin/perl
4 =head1 NAME
6 bp_bioflat_index.pl - index sequence files using Bio::DB::Flat
8 =head1 DESCRIPTION
10 Create or update a biological sequence database indexed with the
11 Bio::DB::Flat indexing scheme. The arguments are a list of flat files
12 containing the sequence information to be indexed.
14 =head1 USAGE
16 bp_bioflat_index.pl <options> file1 file2 file3...
18 Options:
20 --create Create or reinitialize the index. If not specified,
21 the index must already exist.
23 --format <format> The format of the sequence files. Must be one
24 of "genbank", "swissprot", "embl" or "fasta".
26 --location <path> Path to the directory in which the index files
27 are stored.
29 --dbname <name> The symbolic name of the database to be created.
31 --indextype <type> Type of index to create. Either "bdb" or "flat".
32 "binarysearch" is the same as "flat".
34 Options can be abbreviated. For example, use -i for --indextype.
36 The following environment variables will be used as defaults if the
37 corresponding options are not provided:
39 OBDA_FORMAT format of sequence file
40 OBDA_LOCATION path to directory in which index files are stored
41 OBDA_DBNAME name of database
42 OBDA_INDEX type of index to create
44 =cut
46 use strict;
47 use warnings;
48 use Bio::Root::Root;
49 use Bio::Root::IO;
50 use Bio::DB::Flat;
51 use Getopt::Long;
52 use File::Path qw(mkpath rmtree);
54 my ($CREATE,$FORMAT,$LOCATION,$DBNAME,$INDEXTYPE);
56 GetOptions( 'create' => \$CREATE,
57 'format:s' => \$FORMAT,
58 'location:s' => \$LOCATION,
59 'dbname:s' => \$DBNAME,
60 'indextype:s' => \$INDEXTYPE );
62 $FORMAT = $ENV{OBDA_FORMAT} unless defined $FORMAT;
63 $LOCATION = $ENV{OBDA_LOCATION} unless defined $LOCATION;
64 $DBNAME = $ENV{OBDA_DBNAME} unless defined $DBNAME;
65 $INDEXTYPE = $ENV{OBDA_INDEXTYPE} unless defined $INDEXTYPE;
67 my $root = 'Bio::Root::Root';
68 my $io = 'Bio::Root::IO';
70 # confirm that database directory is there
71 defined $LOCATION or
72 $root->throw("please provide a base directory with the --location option");
74 -d $LOCATION or
75 $root->throw("$LOCATION is not a valid directory; use --create to create a new index");
77 defined $DBNAME or
78 $root->throw("please provide a database name with the --dbname option");
80 defined $FORMAT or
81 $root->throw("please specify the format for the input files with the --format option");
83 unless (defined $INDEXTYPE) {
84 $INDEXTYPE = 'flat';
85 $root->warn('setting index type to "flat", use the --indextype option to override');
88 # Confirm that database is there and that --create flag is sensible.
89 my $path = $io->catfile($LOCATION,$DBNAME,'config.dat');
90 if (-e $path) {
91 if ($CREATE) {
92 $root->warn("existing index detected; deleting.");
93 rmtree($io->catfile($LOCATION,$DBNAME),1,1);
94 } else {
95 $root->warn("existing index detected; ignoring --indextype and --format options.");
96 undef $INDEXTYPE;
99 elsif (!$CREATE) {
100 $root->throw("Cannot find database config file at location $path; use --create to create a new index");
103 # open for writing/updating
104 my $db = Bio::DB::Flat->new(-directory => $LOCATION,
105 -dbname => $DBNAME,
106 $INDEXTYPE ? (
107 -index => $INDEXTYPE
109 : (),
110 -write_flag => 1,
111 -format => $FORMAT) or
112 $root->throw("can't create Bio::DB::Flat object");
114 my $entries = $db->build_index(@ARGV);
116 print STDERR "(Re)indexed $entries entries.\n ";
118 __END__