t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / scripts / Bio-DB-GFF / bp_bulk_load_gff.pl
blobc4ad73a4f3359aa454998fe1730f350d64d2c543
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 # use lib './blib/lib';
6 use DBI;
7 use IO::File;
8 use File::Spec;
9 use Getopt::Long;
10 use Bio::DB::GFF;
11 use Bio::DB::GFF::Util::Binning 'bin';
13 use constant MYSQL => 'mysql';
14 use constant FDATA => 'fdata';
15 use constant FTYPE => 'ftype';
16 use constant FGROUP => 'fgroup';
17 use constant FDNA => 'fdna';
18 use constant FATTRIBUTE => 'fattribute';
19 use constant FATTRIBUTE_TO_FEATURE => 'fattribute_to_feature';
21 =head1 NAME
23 bp_bulk_load_gff.pl - Bulk-load a Bio::DB::GFF database from GFF files.
25 =head1 SYNOPSIS
27 % bp_bulk_load_gff.pl -d testdb dna1.fa dna2.fa features1.gff features2.gff ...
29 =head1 DESCRIPTION
31 This script loads a Bio::DB::GFF database with the features contained
32 in a list of GFF files and/or FASTA sequence files. You must use the
33 exact variant of GFF described in L<Bio::DB::GFF>. Various
34 command-line options allow you to control which database to load and
35 whether to allow an existing database to be overwritten.
37 This script differs from bp_load_gff.pl in that it is hard-coded to use
38 MySQL and cannot perform incremental loads. See L<bp_load_gff.pl> for an
39 incremental loader that works with all databases supported by
40 Bio::DB::GFF, and L<bp_fast_load_gff.pl> for a MySQL loader that supports
41 fast incremental loads.
43 =head2 NOTES
45 If the filename is given as "-" then the input is taken from standard
46 input. Compressed files (.gz, .Z, .bz2) are automatically
47 uncompressed.
49 FASTA format files are distinguished from GFF files by their filename
50 extensions. Files ending in .fa, .fasta, .fast, .seq, .dna and their
51 uppercase variants are treated as FASTA files. Everything else is
52 treated as a GFF file. If you wish to load -fasta files from STDIN,
53 then use the -f command-line swith with an argument of '-', as in
55 gunzip my_data.fa.gz | bp_fast_load_gff.pl -d test -f -
57 The nature of the bulk load requires that the database be on the local
58 machine and that the indicated user have the "file" privilege to load
59 the tables and have enough room in /usr/tmp (or whatever is specified
60 by the \$TMPDIR environment variable), to hold the tables transiently.
62 Local data may now be uploaded to a remote server via the --local option
63 with the database host specified in the dsn, e.g. dbi:mysql:test:db_host
65 The adaptor used is dbi::mysqlopt. There is currently no way to
66 change this.
68 About maxfeature: the default value is 100,000,000 bases. If you have
69 features that are close to or greater that 100Mb in length, then the
70 value of maxfeature should be increased to 1,000,000,000. This value
71 must be a power of 10.
73 Note that Windows users must use the --create option.
75 If the list of GFF or fasta files exceeds the kernel limit for the
76 maximum number of command-line arguments, use the
77 --long_list /path/to/files option.
80 =head1 COMMAND-LINE OPTIONS
82 Command-line options can be abbreviated to single-letter options.
83 e.g. -d instead of --database.
85 --database <dsn> Database name (default dbi:mysql:test)
86 --adaptor Adaptor name (default mysql)
87 --create Reinitialize/create data tables without asking
88 --user Username to log in as
89 --fasta File or directory containing fasta files to load
90 --long_list Directory containing a very large number of
91 GFF and/or FASTA files
92 --password Password to use for authentication
93 (Does not work with Postgres, password must be
94 supplied interactively or be left empty for
95 ident authentication)
96 --maxbin Set the value of the maximum bin size
97 --local Flag to indicate that the data source is local
98 --maxfeature Set the value of the maximum feature size (power of 10)
99 --group A list of one or more tag names (comma or space separated)
100 to be used for grouping in the 9th column.
101 --gff3_munge Activate GFF3 name munging (see Bio::DB::GFF)
102 --summary Generate summary statistics for drawing coverage histograms.
103 This can be run on a previously loaded database or during
104 the load.
105 --Temporary Location of a writable scratch directory
107 =head1 SEE ALSO
109 L<Bio::DB::GFF>, L<fast_load_gff.pl>, L<load_gff.pl>
111 =head1 AUTHOR
113 Lincoln Stein, lstein@cshl.org
115 Copyright (c) 2002 Cold Spring Harbor Laboratory
117 This library is free software; you can redistribute it and/or modify
118 it under the same terms as Perl itself. See DISCLAIMER.txt for
119 disclaimers of warranty.
121 =cut
123 package Bio::DB::GFF::Adaptor::fauxmysql;
125 use Bio::DB::GFF::Adaptor::dbi::mysqlopt;
126 use vars '@ISA';
127 @ISA = 'Bio::DB::GFF::Adaptor::dbi::mysqlopt';
129 sub insert_sequence {
130 my $self = shift;
131 my ($id,$offset,$seq) = @_;
132 print join("\t",$id,$offset,$seq),"\n";
135 package Bio::DB::GFF::Adaptor::fauxmysqlcmap;
137 use Bio::DB::GFF::Adaptor::dbi::mysqlcmap;
138 use vars '@ISA';
139 @ISA = 'Bio::DB::GFF::Adaptor::dbi::mysqlcmap';
141 sub insert_sequence {
142 my $self = shift;
143 my ($id,$offset,$seq) = @_;
144 print join("\t",$id,$offset,$seq),"\n";
147 package Bio::DB::GFF::Adaptor::fauxpg;
149 use Bio::DB::GFF::Adaptor::dbi::pg;
150 use vars '@ISA';
151 @ISA = 'Bio::DB::GFF::Adaptor::dbi::pg';
153 #these two subs are to separate the table creation from the
154 #index creation
155 sub do_initialize {
156 my $self = shift;
157 my $erase = shift;
158 $self->drop_all if $erase;
160 my $dbh = $self->features_db;
161 my $schema = $self->schema;
162 foreach my $table_name ($self->tables) {
163 my $create_table_stmt = $schema->{$table_name}{table} ;
164 $dbh->do($create_table_stmt) || warn $dbh->errstr;
165 # $self->create_other_schema_objects(\%{$schema->{$table_name}});
170 sub _create_indexes_etc {
171 my $self = shift;
173 my $dbh = $self->features_db;
174 my $schema = $self->schema;
175 foreach my $table_name ($self->tables) {
176 $self->create_other_schema_objects(\%{$schema->{$table_name}});
180 sub insert_sequence {
181 my $self = shift;
182 my ($id,$offset,$seq) = @_;
183 print "$id\t$offset\t$seq\n";
186 package main;
188 eval "use Time::HiRes"; undef $@;
189 my $timer = defined &Time::HiRes::time;
191 my $bWINDOWS = 0; # Boolean: is this a MSWindows operating system?
192 if ($^O =~ /MSWin32/i) {
193 $bWINDOWS = 1;
196 my ($DSN,$ADAPTOR,$FORCE,$USER,$PASSWORD,$FASTA,$LOCAL,$MAX_BIN,$GROUP_TAG,$LONG_LIST,$MUNGE,$TMPDIR);
198 GetOptions ('database:s' => \$DSN,
199 'adaptor:s' => \$ADAPTOR,
200 'create' => \$FORCE,
201 'user:s' => \$USER,
202 'password:s' => \$PASSWORD,
203 'fasta:s' => \$FASTA,
204 'local' => \$LOCAL,
205 'maxbin|maxfeature:s' => \$MAX_BIN,
206 'group:s' => \$GROUP_TAG,
207 'long_list:s' => \$LONG_LIST,
208 'gff3_munge' => \$MUNGE,
209 'Temporary:s' => \$TMPDIR,
210 ) or (system('pod2text', $0), exit -1);
212 # If called as pg_bulk_load_gff.pl behave as that did.
213 if ($0 =~/pg_bulk_load_gff.pl/){
214 $ADAPTOR ||= 'Pg';
215 $DSN ||= 'test';
217 $DSN ||= 'dbi:mysql:test';
218 $MAX_BIN ||= 1_000_000_000; # to accomodate human-sized chromosomes
221 if ($bWINDOWS && not $FORCE) {
222 die "Note that Windows users must use the --create option.\n";
225 unless ($FORCE) {
226 die "This will delete all existing data in database $DSN. If you want to do this, rerun with the --create option.\n"
227 if $bWINDOWS;
228 open (TTY,"/dev/tty") or die "/dev/tty: $!\n"; #TTY use removed for win compatability
229 print STDERR "This operation will delete all existing data in database $DSN. Continue? ";
230 my $f = <TTY>;
231 die "Aborted\n" unless $f =~ /^[yY]/;
232 close TTY;
235 # postgres DBD::Pg allows 'database', but also 'dbname', and 'db':
236 # and it must be Pg (not pg)
237 $DSN=~s/pg:database=/Pg:/i;
238 $DSN=~s/pg:dbname=/Pg:/i;
239 $DSN=~s/pg:db=/Pg:/i;
241 # leave these lines for mysql
242 $DSN=~s/database=//i;
243 $DSN=~s/;host=/:/i; #cater for dsn in the form of "dbi:mysql:database=$dbname;host=$host"
246 my($DBI,$DBD,$DBNAME,$HOST)=split /:/,$DSN;
247 $DBNAME=$DSN unless $DSN=~/:/;
248 $ADAPTOR ||= $DBD;
249 $ADAPTOR ||= 'mysql';
251 if ($DBD eq 'Pg') {
252 # rebuild DSN, DBD::Pg requires full dbname=<name> format
253 $DSN = "dbi:Pg:dbname=$DBNAME";
254 if ($HOST) { $DSN .= ";host=$HOST"; }
257 my ($use_mysql,$use_mysqlcmap,$use_pg) = (0,0,0);
258 if ( $ADAPTOR eq 'mysqlcmap' ) {
259 $use_mysqlcmap = 1;
261 elsif ( $ADAPTOR =~ /^mysql/ ) {
262 $use_mysql = 1;
264 elsif ( $ADAPTOR eq "Pg" ) {
265 $use_pg = 1;
267 else{
268 die "$ADAPTOR is not an acceptable database adaptor.";
272 my (@auth,$AUTH);
273 if (defined $USER) {
274 push @auth,(-user=>$USER);
275 if ( $use_mysql or $use_mysqlcmap ) {
276 $AUTH .= " -u$USER";
278 elsif ( $use_pg ) {
279 $AUTH .= " -U $USER ";
282 if (defined $PASSWORD) {
283 push @auth,(-pass=>$PASSWORD);
284 if ( $use_mysql or $use_mysqlcmap ) {
285 $AUTH .= " -p$PASSWORD";
287 # elsif ( $use_pg ) {
288 # $AUTH .= " -W $PASSWORD ";
292 if (defined $HOST) {
293 $AUTH .= " -h$HOST";
295 if (defined $DBNAME) {
296 if ( $use_mysql or $use_mysqlcmap ) {
297 $AUTH .= " -D$DBNAME ";
300 if (defined $LOCAL) {
301 $LOCAL='local';
302 $AUTH.=' --local-infile=1';
303 }else {
304 $LOCAL='';
307 my $faux_adaptor;
308 if ( $use_mysqlcmap ) {
309 $faux_adaptor = "fauxmysqlcmap";
311 elsif ( $use_mysql ) {
312 $faux_adaptor = "fauxmysql";
314 elsif ( $use_pg ) {
315 $faux_adaptor = "fauxpg";
318 my $db = Bio::DB::GFF->new(-adaptor=>$faux_adaptor,-dsn => $DSN,@auth)
319 or die "Can't open database: ",Bio::DB::GFF->error,"\n";
321 $db->gff3_name_munging(1) if $MUNGE;
323 $MAX_BIN ? $db->initialize(-erase=>1,-MAX_BIN=>$MAX_BIN) : $db->initialize(1);
324 $MAX_BIN ||= $db->meta('max_bin') || 100_000_000;
326 # deal with really long lists of files
327 if ($LONG_LIST) {
328 -d $LONG_LIST or die "The --long_list argument must be a directory\n";
329 opendir GFFDIR,$LONG_LIST or die "Could not open $LONG_LIST for reading: $!";
330 @ARGV = map { "$LONG_LIST\/$_" } readdir GFFDIR;
331 closedir GFFDIR;
333 if (defined $FASTA && -d $FASTA) {
334 opendir FASTA,$FASTA or die "Could not open $FASTA for reading: $!";
335 push @ARGV, map { "$FASTA\/$_" } readdir FASTA;
336 closedir FASTA;
338 elsif (defined $FASTA && -f $FASTA) {
339 push @ARGV, $FASTA;
343 foreach (@ARGV) {
344 $_ = "gunzip -c $_ |" if /\.gz$/;
345 $_ = "uncompress -c $_ |" if /\.Z$/;
346 $_ = "bunzip2 -c $_ |" if /\.bz2$/;
349 my (@gff,@fasta);
350 foreach (@ARGV) {
351 if (/\.(fa|fasta|dna|seq|fast)(?:$|\.)/i) {
352 push @fasta,$_;
353 } else {
354 push @gff,$_;
357 @ARGV = @gff;
358 push @fasta,$FASTA if defined $FASTA;
360 # drop everything that was there before
361 my %FH;
362 my $tmpdir = File::Spec->tmpdir() || '/tmp';
363 $tmpdir =~ s!\\!\\\\!g if $bWINDOWS; #eliminates backslash mis-interpretation
364 -d $tmpdir or die <<END;
365 I could not find a suitable temporary directory to write scratch files into ($tmpdir by default).
366 Please select a directory and indicate its location by setting the TMP environment variable, or
367 by using the --Temporary switch.
369 my @fasta_files_to_be_unlinked;
370 my @files = (FDATA,FTYPE,FGROUP,FDNA,FATTRIBUTE,FATTRIBUTE_TO_FEATURE);
371 foreach (@files) {
372 $FH{$_} = IO::File->new(">$tmpdir/$_.$$") or die $_,": $!";
373 $FH{$_}->autoflush;
376 if ( $use_pg ) {
377 $FH{FDATA() }->print("COPY fdata (fid, fref, fstart, fstop, fbin, ftypeid, fscore, fstrand, fphase, gid, ftarget_start, ftarget_stop) FROM stdin;\n");
378 $FH{FTYPE() }->print("COPY ftype (ftypeid, fmethod, fsource) FROM stdin;\n");
379 $FH{FGROUP() }->print("COPY fgroup (gid, gclass, gname) FROM stdin;\n");
380 $FH{FATTRIBUTE() }->print("COPY fattribute (fattribute_id, fattribute_name) FROM stdin;\n");
381 $FH{FATTRIBUTE_TO_FEATURE()}->print("COPY fattribute_to_feature (fid, fattribute_id, fattribute_value) FROM stdin;\n");
383 my $FID = 1;
384 my $GID = 1;
385 my $FTYPEID = 1;
386 my $ATTRIBUTEID = 1;
387 my %GROUPID = ();
388 my %FTYPEID = ();
389 my %ATTRIBUTEID = ();
390 my %DONE = ();
391 my $FEATURES = 0;
393 my %tmpfiles; # keep track of temporary fasta files
394 my $count;
395 my $fasta_sequence_id;
396 my $gff3;
397 my $current_file; #used to reset GFF3 flag in mix of GFF and GFF3 files
399 $db->preferred_groups(split (/[,\s]+/,$GROUP_TAG)) if defined $GROUP_TAG;
401 my $last = Time::HiRes::time() if $timer;
402 my $start = $last;
404 # avoid hanging on standalone --fasta load
405 if (!@ARGV) {
406 $FH{NULL} = IO::File->new(">$tmpdir/null");
407 push @ARGV, "$tmpdir/null";
410 my ($cmap_db);
411 if ($use_mysqlcmap){
412 my $options = {
413 AutoCommit => 1,
414 FetchHashKeyName => 'NAME_lc',
415 LongReadLen => 3000,
416 LongTruncOk => 1,
417 RaiseError => 1,
420 $cmap_db = DBI->connect( $DSN, $USER, $PASSWORD, $options );
422 # Only load CMap::Utils if using cmap
423 unless (!$use_mysqlcmap or
424 eval {
425 require Bio::GMOD::CMap::Utils;
426 Bio::GMOD::CMap::Utils->import('next_number');
430 print STDERR "Error loading Bio::GMOD::CMap::Utils\n";
434 while (<>) {
436 $current_file ||= $ARGV;
438 # reset GFF3 flag if new filehandle
439 unless($current_file eq $ARGV){
440 undef $gff3;
441 $current_file = $ARGV;
444 chomp;
445 my ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group);
447 # close sequence filehandle if required
448 if ( /^\#|\s+|^$|^>|\t/ && defined $FH{FASTA}) {
449 $FH{FASTA}->close;
450 delete $FH{FASTA};
453 # print to fasta file if the handle is open
454 if ( defined $FH{FASTA} ) {
455 $FH{FASTA}->print("$_\n");
456 next;
459 elsif (/^>(\S+)/) { # uh oh, sequence coming
460 $FH{FASTA} = IO::File->new(">$tmpdir/$1\.fa") or die "FASTA: $!\n";
461 $FH{FASTA}->print("$_\n");
462 print STDERR "Preparing embedded sequence $1\n";
463 push @fasta, "$tmpdir/$1\.fa";
464 push @fasta_files_to_be_unlinked,"$tmpdir/$1\.fa";
465 $tmpfiles{"$tmpdir/$1\.fa"}++;
466 next;
469 elsif (/^\#\#\s*gff-version\s+(\d+)/) {
470 $gff3 = ($1 >= 3);
471 $db->print_gff3_warning() if $gff3;
472 next;
475 elsif (/^\#\#\s*group-tags\s+(.+)/) {
476 $db->preferred_groups(split(/\s+/,$1));
477 next;
480 elsif (/^\#\#\s*sequence-region\s+(\S+)\s+(\d+)\s+(\d+)/i) { # header line
481 ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group) =
482 ($1,'reference','Component',$2,$3,'.','.','.',$gff3 ? "ID=Sequence:$1": qq(Sequence "$1"));
485 elsif (/^\#/) {
486 next;
489 else {
490 ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group) = split "\t";
492 if ( not defined( $ref ) or length ($ref) == 0) {
493 warn "\$ref is null. source = $source, method = $method, group = $group\n";
494 next;
496 $FEATURES++;
497 my $size = $stop-$start+1;
498 warn "Feature $group ($size) is larger than $MAX_BIN. You will have trouble retrieving this feature.\nRerun script with --maxfeature set to a higher power of 10.\n" if $size > $MAX_BIN;
500 $source = '\N' unless defined $source;
501 $score = '\N' if $score eq '.';
502 $strand = '\N' if $strand eq '.';
503 $phase = '\N' if $phase eq '.';
505 my ($group_class,$group_name,$target_start,$target_stop,$attributes) = $db->split_group($group,$gff3);
507 # GFF2/3 transition
508 $group_class = [$group_class] unless ref $group_class;
509 $group_name = [$group_name] unless ref $group_name;
511 for (my $i=0; $i < @$group_name; $i++) {
512 $group_class->[$i] ||= '\N';
513 $group_name->[$i] ||= '\N';
514 $target_start ||= '\N';
515 $target_stop ||= '\N';
516 $method ||= '\N';
517 $source ||= '\N';
519 my $fid = $FID++;
520 my $gid = $GROUPID{lc join('',$group_class->[$i],$group_name->[$i])} ||= $GID++;
521 my $ftypeid = $FTYPEID{lc join('',$source,$method)} ||= $FTYPEID++;
523 my $bin = bin($start,$stop,$db->min_bin);
524 $FH{ FDATA() }->print( join("\t",$fid,$ref,$start,$stop,$bin,$ftypeid,$score,$strand,$phase,$gid,$target_start,$target_stop),"\n" );
525 if ($use_mysqlcmap){
526 my $feature_id = next_number(
527 db => $cmap_db,
528 table_name => 'cmap_feature',
529 id_field => 'feature_id',
531 or die 'No feature id';
532 my $direction = $strand eq '-' ? -1:1;
533 $FH{ FGROUP() }->print(
534 join("\t",$feature_id,$feature_id,'NULL',0, $group_name->[$i],0,0,'NULL',1,$direction, $group_class->[$i],)
535 ,"\n"
536 ) unless $DONE{"G$gid"}++;
538 else {
539 $FH{ FGROUP() }->print( join("\t",$gid,$group_class->[$i],$group_name->[$i]),"\n") unless $DONE{"G$gid"}++;
541 $FH{ FTYPE() }->print( join("\t",$ftypeid,$method,$source),"\n" ) unless $DONE{"T$ftypeid"}++;
543 foreach (@$attributes) {
544 my ($key,$value) = @$_;
545 my $attributeid = $ATTRIBUTEID{$key} ||= $ATTRIBUTEID++;
546 $FH{ FATTRIBUTE() }->print( join("\t",$attributeid,$key),"\n" ) unless $DONE{"A$attributeid"}++;
547 $FH{ FATTRIBUTE_TO_FEATURE() }->print( join("\t",$fid,$attributeid,$value),"\n");
550 if ( $fid % 1000 == 0) {
551 my $now = Time::HiRes::time() if $timer;
552 my $elapsed = $timer ? sprintf(" in %5.2fs",$now - $last) : '';
553 $last = $now;
554 print STDERR "$fid features parsed$elapsed...";
555 print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
560 $FH{FASTA}->close if exists $FH{FASTA};
562 for my $file (@fasta) {
563 warn "Preparing DNA file $file....\n";
564 if ($use_pg){
565 $FH{FDNA() }->print("COPY fdna (fref, foffset, fdna) FROM stdin;\n");
567 my $old = select($FH{FDNA()});
568 $db->load_fasta($file) or warn "Couldn't load fasta file $file: $!";
569 if ($use_pg){
570 $FH{FDNA() }->print("\\.\n\n");
572 warn "done...\n";
573 select $old;
574 unlink $file if $tmpfiles{$file};
577 if ($use_pg) {
578 $FH{FDATA() }->print("\\.\n\n");
579 $FH{FTYPE() }->print("\\.\n\n");
580 $FH{FGROUP() }->print("\\.\n\n");
581 $FH{FATTRIBUTE() }->print("\\.\n\n");
582 $FH{FATTRIBUTE_TO_FEATURE()}->print("\\.\n\n");
586 $_->close foreach values %FH;
587 printf STDERR "Total parse time %5.2fs\n",(Time::HiRes::time() - $start) if $timer;
588 warn "Loading feature data and analyzing tables. You may see RDBMS messages here...\n";
590 if ($use_pg){
591 warn "Loading feature data. You may see Postgres comments...\n";
593 foreach (@files) {
594 my $file = "$tmpdir/$_.$$";
596 $AUTH ? system("psql $AUTH -f $file $DBNAME")
597 : system('psql','-f', $file, $DBNAME);
599 unlink $file;
602 warn "Updating sequences ...\n";
603 $db->update_sequences();
605 warn "Creating indexes ...\n";
606 $db->_create_indexes_etc();
608 warn "done...\n";
612 elsif( $use_mysql or $use_mysqlcmap ) {
613 $start = time();
615 my $success = 1;
616 my $TERMINATEDBY = $bWINDOWS ? q( LINES TERMINATED BY '\r\n') : '';
617 for my $f (@files) {
618 my $table = function_to_table($f,$ADAPTOR);
619 my $sql = join ('; ',
620 "lock tables $table write",
621 "delete from $table",
622 "load data $LOCAL infile '$tmpdir/$f.$$' replace into table $table $TERMINATEDBY",
623 "unlock tables");
624 my $command = MYSQL . qq[$AUTH -s -e "$sql"];
625 $command =~ s/\n/ /g;
626 $success &&= system($command) == 0;
627 unlink "$tmpdir/$f.$$";
629 printf STDERR "Total load time %5.2fs\n",(time() - $start) if $timer;
630 print STDERR "done...\n";
632 print STDERR "Analyzing/optimizing tables. You will see database messages...\n";
633 $start = time();
634 my $sql = '';
635 for my $f (@files) {
636 my $table = function_to_table($f,$ADAPTOR);
637 $sql .= "analyze table $table;";
639 my $command = MYSQL . qq[$AUTH -N -s -e "$sql"];
640 $success &&= system($command) == 0;
641 printf STDERR "Optimization time time %5.2fs\n",(time() - $start);
643 if ($success) {
644 print "$FEATURES features successfully loaded\n";
645 } else {
646 print "FAILURE: Please see standard error for details\n";
647 exit -1;
651 foreach (@fasta_files_to_be_unlinked) {
652 unlink "$tmpdir/$_.$$";
655 warn "Building summary statistics for coverage histograms...\n";
656 my (@args,$AUTH);
657 if (defined $USER) {
658 push @args,(-user=>$USER);
659 $AUTH .= " -u$USER";
661 if (defined $PASSWORD) {
662 push @args,(-pass=>$PASSWORD);
663 $AUTH .= " -p$PASSWORD";
665 push @args,(-preferred_groups=>[split(/[,\s+]+/,$GROUP_TAG)]) if defined $GROUP_TAG;
666 my $db = Bio::DB::GFF->new(-adaptor=>"dbi::$ADAPTOR",-dsn => $DSN,@args)
667 or die "Can't open database: ",Bio::DB::GFF->error,"\n";
668 $db->build_summary_statistics;
670 exit 0;
672 sub function_to_table {
673 my $function = shift;
674 my $adaptor = shift;
676 if ($function eq 'fdata'){
677 return 'fdata';
679 elsif ($function eq 'ftype'){
680 return 'ftype';
682 elsif ($function eq 'fgroup'){
683 return 'cmap_feature' if ($adaptor eq 'mysqlcmap');
684 return 'fgroup';
686 elsif ($function eq 'fdna'){
687 return 'fdna';
689 elsif ($function eq 'fattribute'){
690 return 'fattribute';
692 elsif ($function eq 'fattribute_to_feature'){
693 return 'fattribute_to_feature';
695 return '';
698 __END__