6 ## Used to output the 'usage' message
9 ## Used to parse command line options
12 ## Used to create temporary files, if necessary
16 use Bio
::DB
::SeqFeature
::Store
;
17 use Bio
::DB
::SeqFeature
::Store
::GFF3Loader
;
21 ## The available options. Note, these defaults are 'hard coded' into
22 ## the USAGE POD, so if you change one of the defaults (you shouldn't),
23 ## you should update the USAGE.
25 my $DSN = 'dbi:mysql:test';
26 my $SFCLASS = 'Bio::DB::SeqFeature';
27 my $ADAPTOR = 'DBI::mysql';
31 my $TMP = File
::Spec
->tmpdir();
32 my $IGNORE_SEQREGION = 0;
38 my $NOALIAS_TARGET = 0;
39 my $SUMMARY_STATS = 0;
40 my $NOSUMMARY_STATS = 0;
43 ## Two flags based on http://stackoverflow.com/questions/1232116
44 ## how-to-create-pod-and-use-pod2usage-in-perl
48 GetOptions
( 'd|dsn=s' => \
$DSN,
49 's|seqfeature=s' => \
$SFCLASS,
50 'n|namespace=s' => \
$NAMESPACE,
51 'a|adaptor=s' => \
$ADAPTOR,
52 'v|verbose!' => \
$VERBOSE,
54 'T|temporary-directory=s' => \
$TMP,
55 'i|ignore-seqregion' => \
$IGNORE_SEQREGION,
56 'c|create' => \
$CREATE,
58 'p|password=s' => \
$PASS,
59 'z|zip' => \
$COMPRESS,
60 'S|subfeatures!' => \
$INDEX_SUB,
62 ## Any good single letter choices here?
63 'noalias-target' => \
$NOALIAS_TARGET,
64 'summary' => \
$SUMMARY_STATS,
65 'N|nosummary' => \
$NOSUMMARY_STATS,
68 ## I miss '--help' when it isn't there!
69 'h|help!' => \
$opt_help,
70 'm|man!' => \
$opt_man,
72 or pod2usage
( -message
=>
73 "\nTry 'bp_seqfeature_load.pl --help' for more information\n",
78 ## Should we output usage information?
79 pod2usage
( -verbose
=> 1 ) if $opt_help;
80 pod2usage
( -verbose
=> 2 ) if $opt_man;
82 ## Did we get any files to process?
84 or pod2usage
( -message
=>
85 "\nYou need to pass some GFF or fasta files to load\n",
90 pod2usage
( -message
=> "\n--fts requires --create\n",
93 ) if ($FTS and not $CREATE);
100 bp_seqfeature_load.pl - Load GFF into a SeqFeature database
104 Pass any number of GFF or fasta format files (or GFF with embedded
105 fasta) to load the features and sequences into a SeqFeature
106 database. The database (and adaptor) to use is specified on the
107 command line. Use the --create flag to create a new SeqFeature
112 bp_seqfeature_load.pl [options] gff_or_fasta_file1 [gff_or_fasta_file2 [...]]
114 Try 'bp_seqfeature_load.pl --help' or '--man' for more information.
122 DBI data source (default dbi:mysql:test)
124 =item -n, --namespace
126 The table prefix to use (default undef) Allows several independent
127 sequence feature databases to be stored in a single database
129 =item -s, --seqfeature
131 The type of SeqFeature to create... RTSC (default Bio::DB::SeqFeature)
135 The storage adaptor (class) to use (default DBI::mysql)
139 Turn on verbose progress reporting (default true) Use --noverbose to
144 Activate fast loading. (default 0) Only available for some adaptors.
146 =item -T, --temporary-directory
148 Specify temporary directory for fast loading (default
149 File::Spec->tmpdir())
151 =item -i, --ignore-seqregion
153 If true, then ignore ##sequence-region directives in the GFF3 file
154 (default, create a feature for each region)
158 Create the database and reinitialize it (default false) Note, this
159 will erase previous database contents, if any.
163 User to connect to database as
167 Password to use to connect to database
171 Compress database tables to save space (default false)
173 =item -S, --subfeatures
175 Turn on indexing of subfeatures (default true) Use --nosubfeatures to
180 Index the attribute table for full-text search (default false). Applicable
181 only when --create is specified. Currently applicable to the DBI::SQLite
182 storage adaptor only (using the most recent supported FTS indexing method,
183 which may not be portable to older DBI::SQLite versions).
187 Generate summary statistics for coverage graphs (default false) This
188 can be run on a previously loaded database or during the load. It will
189 default to true if --create is used.
191 =item -N, --nosummary
193 Do not generate summary statistics to save some space and load time (default if
194 --create is not specified, use this option to explicitly turn off summary
195 statistics when --create is specified)
197 =item --noalias-target
199 Don't create an Alias attribute whose value is the target_id in a
200 Target attribute (if the feature contains a Target attribute, the
201 default is to create an Alias attribute whose value is the target_id
202 in the Target attribute)
206 Please see http://www.sequenceontology.org/gff3.shtml for information
207 about the GFF3 format. BioPerl extends the format slightly by adding a
208 ##index-subfeatures directive. Set this to a true value if you wish
209 the database to be able to retrieve a feature's individual parts (such
210 as the exons of a transcript) independently of the top level feature:
212 ##index-subfeatures 1
214 It is also possible to control the indexing of subfeatures on a
215 case-by-case basis by adding "index=1" or "index=0" to the feature's
216 attribute list. This should only be used for subfeatures.
218 Subfeature indexing is true by default. Set to false (0) to save lots
219 of database space and speed performance. You may use --nosubfeatures
230 or die "Fast loading is requested, but I cannot write into the directory $TMP";
231 $DSN .= ";mysql_local_infile=1" if $ADAPTOR =~ /mysql/i && $DSN !~ /mysql_local_infile/;
235 @options = ($USER,$PASS) if $USER || $PASS;
237 my $store = Bio
::DB
::SeqFeature
::Store
->new
240 -namespace
=> $NAMESPACE,
241 -adaptor
=> $ADAPTOR,
247 -compress
=> $COMPRESS,
250 or die "Couldn't create connection to the database";
252 $store->init_database('erase') if $CREATE;
253 $SUMMARY_STATS++ if $CREATE; # this is a good thing
255 my $loader = Bio
::DB
::SeqFeature
::Store
::GFF3Loader
->new
258 -sf_class
=> $SFCLASS,
259 -verbose
=> $VERBOSE,
262 -ignore_seqregion
=> $IGNORE_SEQREGION,
263 -index_subfeatures
=> $INDEX_SUB,
264 -noalias_target
=> $NOALIAS_TARGET,
265 -summary_stats
=> $NOSUMMARY_STATS ?
0 : $SUMMARY_STATS,
267 or die "Couldn't create GFF3 loader";
269 # on signals, give objects a chance to call their DESTROY methods
270 $SIG{TERM
} = $SIG{INT
} = sub { undef $loader; undef $store; die "Aborted..."; };
272 $loader->load(@ARGV);