1 package Bio
::DB
::SeqFeature
::Store
::DBI
::SQLite
;
7 Bio::DB::SeqFeature::Store::DBI::SQLite -- SQLite implementation of Bio::DB::SeqFeature::Store
11 use Bio::DB::SeqFeature::Store;
13 # Open the sequence database
14 my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::SQLite',
15 -dsn => '/path/to/database.db');
17 # get a feature from somewhere
18 my $feature = Bio::SeqFeature::Generic->new(...);
21 $db->store($feature) or die "Couldn't store!";
23 # primary ID of the feature is changed to indicate its primary ID
25 my $id = $feature->primary_id;
27 # get the feature back out
28 my $f = $db->fetch($id);
30 # change the feature and update it
32 $db->update($f) or die "Couldn't update!";
36 my @features = $db->fetch_many(@list_of_ids);
39 @features = $db->get_features_by_name('ZK909');
42 @features = $db->get_features_by_alias('sma-3');
45 @features = $db->get_features_by_name('gene');
48 @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
51 @features = $db->get_features_by_attribute({description => 'protein kinase'})
53 # ...by the GFF "Note" field
54 @result_list = $db->search_notes('kinase');
56 # ...by arbitrary combinations of selectors
57 @features = $db->features(-name => $name,
62 -attributes => $attributes);
64 # ...using an iterator
65 my $iterator = $db->get_seq_stream(-name => $name,
70 -attributes => $attributes);
72 while (my $feature = $iterator->next_seq) {
73 # do something with the feature
76 # ...limiting the search to a particular region
77 my $segment = $db->segment('Chr1',5000=>6000);
78 my @features = $segment->features(-type=>['mRNA','match']);
80 # getting & storing sequence information
81 # Warning: this returns a string, and not a PrimarySeq object
82 $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
83 my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
85 # what feature types are defined in the database?
86 my @types = $db->types;
88 # create a new feature in the database
89 my $feature = $db->new_feature(-primary_tag => 'mRNA',
96 Bio::DB::SeqFeature::Store::SQLite is the SQLite adaptor for
97 Bio::DB::SeqFeature::Store. You will not create it directly, but
98 instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
100 See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
102 =head2 Using the SQLite adaptor
104 To establish a connection to the database, call
105 Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::SQLite',@more_args). The
106 additional arguments are as follows:
108 Argument name Description
109 ------------- -----------
111 -dsn The path to the SQLite database file.
113 -namespace A prefix to attach to each table. This allows you
114 to have several virtual databases in the same
117 -temp Boolean flag. If true, a temporary database
118 will be created and destroyed as soon as
119 the Store object goes out of scope. (synonym -temporary)
121 -autoindex Boolean flag. If true, features in the database will be
122 reindexed every time they change. This is the default.
124 -fts Boolean flag. If true, when the -create flag is true, the
125 attribute table will be created and indexed index for
126 full-text search using the most recent FTS extension
127 supported by DBD::SQLite.
129 -tmpdir Directory in which to place temporary files during "fast" loading.
130 Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
132 -dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes."
133 (synonyms -options, -dbi_attr)
135 -write Pass true to open database for writing or updating.
137 If successful, a new instance of
138 Bio::DB::SeqFeature::Store::DBI::SQLite will be returned.
140 In addition to the standard methods supported by all well-behaved
141 Bio::DB::SeqFeature::Store databases, several following
142 adaptor-specific methods are provided. These are described in the next
149 use base
'Bio::DB::SeqFeature::Store::DBI::mysql';
150 use Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
;
152 use DBI
qw(:sql_types);
154 use Cwd
qw(abs_path getcwd);
155 use Bio
::DB
::GFF
::Util
::Rearrange
'rearrange';
156 use Bio
::SeqFeature
::Lite
;
158 use constant DEBUG
=>0;
159 use constant EXPERIMENTAL_COVERAGE
=>1;
161 # Using same limits as MySQL adaptor so I don't have to make something up.
162 use constant MAX_INT
=> 2_147_483_647
;
163 use constant MIN_INT
=> -2_147_483_648
;
164 use constant SUMMARY_BIN_SIZE
=> 1000; # we checkpoint coverage this often, about 20 meg overhead per feature type on hg
165 use constant USE_SPATIAL
=>0;
167 # The binning scheme places each feature into a bin.
168 # Bins are variably sized as powers of two. For example,
169 # there are 585 bins of size 2**17 (131072 bases)
172 @BINS = map {2**$_} (17, 20, 23, 26, 29); # TO DO: experiment with different bin sizes
174 for my $b (sort {$b<=>$a} @BINS) {
176 $start += $BINS[-1]/$b;
190 # my @BINS = sort {$a<=>$b} keys %BINS;
194 my ($start,$end) = @_;
196 my $len = $end - $start;
197 for my $bin (@BINS) {
200 my $binstart = int $start/$bin;
201 my $binend = int $end/$bin;
202 return $binstart+$BINS{$bin} if $binstart == $binend;
205 die "unreasonable coordinates ",$start+1,"..$end";
210 my ($start,$end) = @_;
213 for my $bin (@BINS) {
214 my $binstart = int $start/$bin;
215 my $binend = int $end/$bin;
216 push @results,$binstart+$BINS{$bin}..$binend+$BINS{$bin};
223 # object initialization
238 ) = rearrange
(['DSN',
239 ['TEMP','TEMPORARY'],
242 ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
244 ['PASS','PASSWD','PASSWORD'],
245 ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
246 ['WRITE','WRITEABLE'],
251 $writeable = 1 if $is_temporary or $dump_dir;
253 $dsn or $self->throw("Usage: ".__PACKAGE__
."->init(-dsn => \$dbh || \$dsn)");
259 $dsn = "dbi:SQLite:$dsn" unless $dsn =~ /^dbi:/;
260 $dbh = DBI
->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr
);
261 $dbh->do("PRAGMA synchronous = OFF;"); # makes writes much faster
262 $dbh->do("PRAGMA temp_store = MEMORY;"); # less disk I/O; some speedup
263 $dbh->do("PRAGMA cache_size = 20000;"); # less disk I/O; some speedup
264 # Keep track of database file location
266 my ($db_file) = ($dsn =~ m/(?:db(?:name)?|database)=(.+)$/);
267 $self->{dbh_file
} = "$cwd/$db_file";
271 $self->{is_temp
} = $is_temporary;
272 $self->{namespace
} = $namespace;
273 $self->{writeable
} = $writeable;
275 $self->default_settings;
276 $self->autoindex($autoindex) if defined $autoindex;
277 $self->dumpdir($dump_dir) if $dump_dir;
278 if ($self->is_temp) {
279 $self->init_tmp_database();
281 $self->init_database('erase');
285 sub table_definitions
{
291 id integer primary key autoincrement,
292 typeid integer not null,
293 strand integer default 0,
294 "indexed" integer default 1,
299 locationlist
=> <<END,
301 id integer primary key autoincrement,
302 seqname text not null
304 create index index_locationlist on locationlist (seqname);
309 id integer primary key autoincrement,
310 tag text not null collate nocase
312 create index index_typelist on typelist (tag);
317 name text not null collate nocase,
318 display_name integer default 0
320 create index index_name_id on name(id);
321 create index index_name_name on name(name);
327 attribute_id integer not null,
328 attribute_value text collate nocase
330 create index index_attribute_id on attribute(attribute_id);
331 create index index_attribute_value on attribute(attribute_value);
334 attributelist
=> <<END,
336 id integer primary key autoincrement,
339 create index index_attributelist_tag on attributelist(tag);
341 parent2child
=> <<END,
345 primary key(id, child)
351 name text primary key,
358 offset integer not null,
360 primary key(id,offset)
365 if ($self->{'fts'}) {
366 delete($defs->{attribute
});
369 unless ($self->_has_spatial_index) {
370 $defs->{feature_location
} = <<END;
372 id int(10) primary key,
378 create index index_feature_location on feature_location(seqid,bin,start,end);
383 if (EXPERIMENTAL_COVERAGE
) {
384 $defs->{interval_stats
} = <<END;
386 typeid integer not null,
387 seqid integer not null,
388 bin integer not null,
389 cum_count integer not null,
390 primary key (typeid,seqid,bin)
400 # must do this first before calling table_definitions
401 $self->_create_spatial_index;
402 $self->_create_attribute_fts;
403 $self->SUPER::_init_database
(@_);
406 # FIXME: ensure this works with _create_attribute_fts...
407 sub init_tmp_database
{
410 $self->_create_spatial_index;
411 $self->_create_attribute_fts;
412 $self->SUPER::init_tmp_database
(@_);
415 sub _create_spatial_index
{
417 my $dbh = $self->dbh;
418 local $dbh->{PrintError
} = 0;
419 $dbh->do("DROP TABLE IF EXISTS feature_index"); # spatial index
421 $dbh->do("CREATE VIRTUAL TABLE feature_index USING RTREE(id,seqid,bin,start,end)");
425 sub _create_attribute_fts
{
427 my $dbh = $self->dbh;
428 if ($self->{'fts'}) {
430 for (@fts_versions = grep(/^ENABLE_FTS[0-9]+$/, DBD
::SQLite
::compile_options
)) { s/ENABLE_// }
431 # use the latest supported FTS version.
432 # DBD::SQLite::compile_options appears to be sorted
433 # alphabetically, so this should work through version FTS9.
434 die 'fts not supported by this version of DBD::SQLite' if (!@fts_versions);
435 $dbh->do("DROP TABLE IF EXISTS attribute");
436 $dbh->do("CREATE VIRTUAL TABLE "
437 . $self->_attribute_table
438 . " USING " . $fts_versions[-1]
439 . "(id, attribute_id, attribute_value)");
444 # return 1 if an existing attribute table in the connected database is an FTS
449 if (!defined($self->{'has_fts'})) {
450 # If the attribute table is a virtual table, assume it is an FTS
451 # table. Per http://www.sqlite.org/fileformat2.html:
452 # For (sqlite_master) rows that define views, triggers, and virtual
453 # tables, the rootpage column is 0 or NULL.
454 ($self->{'has_fts'}) = $self->dbh->selectrow_array("select count(*) from sqlite_master where type = 'table' and name = '"
455 . $self->_attribute_table
456 . "' and (rootpage = 0 or rootpage is null);");
458 return $self->{'has_fts'};
461 sub _has_spatial_index
{
463 return $self->{'_has_spatial_index'} if exists $self->{'_has_spatial_index'};
464 my $dbh = $self->dbh;
465 my ($count) = $dbh->selectrow_array("select count(*) from sqlite_master where name='feature_index'");
466 return $self->{'_has_spatial_index'} = $count;
469 sub _finish_bulk_update
{
471 my $dbh = $self->dbh;
472 my $dir = $self->{dumpdir
} || '.';
474 $self->begin_work; # making this a transaction greatly improves performance
476 for my $table ('feature', $self->index_tables) {
477 my $fh = $self->dump_filehandle($table);
478 my $path = $self->dump_path($table);
481 open $fh, '<', $path or $self->throw("Could not read file '$path': $!");
482 my $qualified_table = $self->_qualify($table);
485 if ($table =~ /feature$/) {
486 $sth = $dbh->prepare("REPLACE INTO $qualified_table VALUES (?,?,?,?,?)");
490 my ($id,$typeid,$strand,$indexed,$obj) = split(/\t/);
491 $sth->bind_param(1, $id);
492 $sth->bind_param(2, $typeid);
493 $sth->bind_param(3, $strand);
494 $sth->bind_param(4, $indexed);
495 $sth->bind_param(5, pack('H*',$obj), {TYPE
=> SQL_BLOB
});
499 my $feature_index = $self->_feature_index_table;
500 if ($table =~ /parent2child$/) {
501 $sth = $dbh->prepare("REPLACE INTO $qualified_table VALUES (?,?)");
502 } elsif ($table =~ /$feature_index$/) {
503 $sth = $dbh->prepare(
504 $self->_has_spatial_index ?
"REPLACE INTO $qualified_table VALUES (?,?,?,?,?)"
505 :"REPLACE INTO $qualified_table (id,seqid,bin,start,end) VALUES (?,?,?,?,?)"
507 } else { # attribute or name
508 $sth = $dbh->prepare("REPLACE INTO $qualified_table VALUES (?,?,?)");
513 $sth->execute(split(/\t/));
519 $self->commit; # commit the transaction
520 delete $self->{bulk_update_in_progress
};
521 delete $self->{filehandles
};
526 my @t = $self->SUPER::index_tables
;
527 return (@t,$self->_feature_index_table);
530 sub _enable_keys
{ } # nullop
531 sub _disable_keys
{ } # nullop
533 sub _fetch_indexed_features_sql
{
535 my $location_table = $self->_qualify('feature_location');
536 my $feature_table = $self->_qualify('feature');
538 SELECT typeid,seqid,start-1,end
539 FROM $location_table as l,$feature_table as f
540 WHERE l.id=f.id AND f.\"indexed\"=1
541 ORDER BY typeid,seqid,start
546 # get primary sequence between start and end
548 sub _fetch_sequence
{
550 my ($seqid,$start,$end) = @_;
552 # backward compatibility to the old days when I liked reverse complementing
553 # dna by specifying $start > $end
555 if (defined $start && defined $end && $start > $end) {
557 ($start,$end) = ($end,$start);
559 $start-- if defined $start;
560 $end-- if defined $end;
562 my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
563 my $offset2 = $self->_offset_boundary($seqid,$end || 'right');
564 my $sequence_table = $self->_sequence_table;
565 my $locationlist_table = $self->_locationlist_table;
567 # CROSS JOIN gives a hint to the SQLite query optimizer -- mucho speedup!
568 my $sth = $self->_prepare(<<END);
569 SELECT sequence,offset
570 FROM $locationlist_table as ll CROSS JOIN $sequence_table as s
579 $sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
581 while (my($frag,$offset) = $sth->fetchrow_array) {
582 substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
585 substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
588 $seq =~ tr/gatcGATC/ctagCTAG/;
594 sub _offset_boundary
{
596 my ($seqid,$position) = @_;
598 my $sequence_table = $self->_sequence_table;
599 my $locationlist_table = $self->_locationlist_table;
602 # use "CROSS JOIN" to give a hint to the SQLite query optimizer.
603 $sql = $position eq 'left' ?
"SELECT min(offset) FROM $locationlist_table as ll CROSS JOIN $sequence_table as s ON ll.id=s.id WHERE ll.seqname=?"
604 :$position eq 'right' ?
"SELECT max(offset) FROM $locationlist_table as ll CROSS JOIN $sequence_table as s ON ll.id=s.id WHERE ll.seqname=?"
605 :"SELECT max(offset) FROM $locationlist_table as ll CROSS JOIN $sequence_table as s ON ll.id=s.id WHERE ll.seqname=? AND offset<=?";
606 my $sth = $self->_prepare($sql);
607 my @args = $position =~ /^-?\d+$/ ?
($seqid,$position) : ($seqid);
608 $sth->execute(@args) or $self->throw($sth->errstr);
609 my $boundary = $sth->fetchall_arrayref->[0][0];
615 # Efficiently fetch a series of IDs from the database
616 # Can pass an array or an array ref
620 @_ or $self->throw('usage: fetch_many($id1,$id2,$id3...)');
621 my $ids = join ',',map {ref($_) ? @
$_ : $_} @_ or return;
622 my $features = $self->_feature_table;
624 my $sth = $self->_prepare(<<END);
625 SELECT id,object FROM $features WHERE id IN ($ids)
627 $sth->execute() or $self->throw($sth->errstr);
628 return $self->_sth2objs($sth);
633 my ($seq_id,$start,$end,$strand,
634 $name,$class,$allow_aliases,
641 ) = rearrange
([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
642 'NAME','CLASS','ALIASES',
643 ['TYPES','TYPE','PRIMARY_TAG'],
644 ['ATTRIBUTES','ATTRIBUTE'],
651 my (@from,@where,@args,@group);
652 $range_type ||= 'overlaps';
654 my $feature_table = $self->_feature_table;
655 @from = "$feature_table as f";
658 # hacky backward compatibility workaround
659 undef $class if $class && $class eq 'Sequence';
660 $name = "$class:$name" if defined $class && length $class > 0;
661 # last argument is the join field
662 my ($from,$where,$group,@a) = $self->_name_sql($name,$allow_aliases,'f.id');
663 push @from,$from if $from;
664 push @where,$where if $where;
665 push @group,$group if $group;
669 if (defined $seq_id) {
670 # last argument is the name of the features table
671 my ($from,$where,$group,@a) = $self->_location_sql($seq_id,$start,$end,$range_type,$strand,'f');
672 push @from,$from if $from;
673 push @where,$where if $where;
674 push @group,$group if $group;
678 if (defined($sources)) {
679 my @sources = ref($sources) eq 'ARRAY' ? @
{$sources} : ($sources);
680 if (defined($types)) {
681 my @types = ref($types) eq 'ARRAY' ? @
{$types} : ($types);
683 foreach my $type (@types) {
684 # *** not sure what to do if user supplies both -source and -type
685 # where the type includes a source!
687 push(@final_types, $type);
690 foreach my $source (@sources) {
691 push(@final_types, $type.':'.$source);
695 $types = \
@final_types;
698 $types = [map { ':'.$_ } @sources];
701 if (defined($types)) {
702 # last argument is the name of the features table
703 my ($from,$where,$group,@a) = $self->_types_sql($types,'f');
704 push @from,$from if $from;
705 push @where,$where if $where;
706 push @group,$group if $group;
710 if (defined $attributes) {
711 # last argument is the join field
712 my ($from,$where,$group,@a) = $self->_attributes_sql($attributes,'f.id');
713 push @from,$from if $from;
714 push @where,$where if $where;
715 push @group,$group if $group;
719 if (defined $fromtable) {
720 # last argument is the join field
721 my ($from,$where,$group,@a) = $self->_from_table_sql($fromtable,'f.id');
722 push @from,$from if $from;
723 push @where,$where if $where;
724 push @group,$group if $group;
728 # if no other criteria are specified, then
729 # only fetch indexed (i.e. top level objects)
730 @where = '"indexed"=1' unless @where;
732 my $from = join ', ',@from;
733 my $where = join ' AND ',map {"($_)"} @where;
734 my $group = join ', ',@group;
735 $group = "GROUP BY $group" if @group;
744 $self->_print_query($query,@args) if DEBUG
|| $self->debug;
746 my $sth = $self->_prepare($query);
747 $sth->execute(@args) or $self->throw($sth->errstr);
748 return $iterator ? Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
->new($sth,$self) : $self->_sth2objs($sth);
751 sub _make_attribute_group
{
753 my ($table_name,$attributes) = @_;
754 my $key_count = keys %$attributes or return;
755 my $count = $key_count-1;
756 return "f.id HAVING count(f.id)>$count";
761 my ($seq_id,$start,$end,$range_type,$strand,$location) = @_;
763 # the additional join on the location_list table badly impacts performance
764 # so we build a copy of the table in memory
765 my $seqid = $self->_locationid_nocreate($seq_id) || 0; # zero is an invalid primary ID, so will return empty
767 my $feature_index = $self->_feature_index_table;
768 my $from = "$feature_index as fi";
770 my ($bin_where,@bin_args);
771 if (defined $start && defined $end && !$self->_has_spatial_index) {
772 my @bins = $self->search_bins($start,$end);
773 $bin_where = ' AND bin in ('.join(',',@bins).')';
776 $start = MIN_INT
unless defined $start;
777 $end = MAX_INT
unless defined $end;
779 my ($range,@range_args);
780 if ($range_type eq 'overlaps') {
781 $range = "fi.end>=? AND fi.start<=?".$bin_where;
782 @range_args = ($start,$end,@bin_args);
783 } elsif ($range_type eq 'contains') {
784 $range = "fi.start>=? AND fi.end<=?".$bin_where;
785 @range_args = ($start,$end,@bin_args);
786 } elsif ($range_type eq 'contained_in') {
787 $range = "fi.start<=? AND fi.end>=?";
788 @range_args = ($start,$end);
790 $self->throw("range_type must be one of 'overlaps', 'contains' or 'contained_in'");
793 if (defined $strand) {
794 $range .= " AND strand=?";
795 push @range_args,$strand;
800 AND $location.id=fi.id
806 my @args = ($seqid,@range_args);
807 return ($from,$where,$group,@args);
810 sub _feature_index_table
{
812 return $self->_has_spatial_index ?
$self->_qualify('feature_index')
813 : $self->_qualify('feature_location') }
815 # Do a case-insensitive search a la the PostgreSQL adaptor
818 my ($name,$allow_aliases,$join) = @_;
819 my $name_table = $self->_name_table;
821 my $from = "$name_table as n";
822 my ($match,$string) = $self->_match_sql($name);
824 my $where = "n.id=$join AND n.name $match COLLATE NOCASE";
825 $where .= " AND n.display_name>0" unless $allow_aliases;
826 return ($from,$where,'',$string);
829 sub _search_attributes
{
831 my ($search_string,$attribute_names,$limit) = @_;
832 my @words = map {quotemeta($_)} split /\s+/,$search_string;
834 my $name_table = $self->_name_table;
835 my $attribute_table = $self->_attribute_table;
836 my $attributelist_table = $self->_attributelist_table;
837 my $type_table = $self->_type_table;
838 my $typelist_table = $self->_typelist_table;
839 my $has_fts = $self->_has_fts;
841 my @tags = @
$attribute_names;
842 my $tag_sql = join ' OR ',("al.tag=?") x
@tags;
844 my $perl_regexp = join '|',@words;
849 $sql_regexp = "a.attribute_value MATCH ?";
850 @wild_card_words = join(' OR ', @words);
852 $sql_regexp = join ' OR ',("a.attribute_value LIKE ?") x
@words;
853 @wild_card_words = map { "%$_%" } @words;
855 # CROSS JOIN hinders performance with FTS attribute table for DBD::SQLite 1.42
857 SELECT name,attribute_value,tl.tag,n.id
858 FROM $attributelist_table AS al
859 JOIN $attribute_table AS a ON al.id = a.attribute_id
860 JOIN $name_table AS n ON n.id = a.id
861 JOIN $type_table AS t ON t.id = n.id
862 JOIN $typelist_table AS tl ON tl.id = t.typeid
867 $sql .= "LIMIT $limit" if defined $limit;
868 $self->_print_query($sql,@tags,@wild_card_words) if DEBUG
|| $self->debug;
869 my $sth = $self->_prepare($sql);
870 $sth->execute(@tags, @wild_card_words) or $self->throw($sth->errstr);
873 while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
874 my (@hits) = $value =~ /$perl_regexp/ig;
875 my @words_in_row = split /\b/,$value;
876 my $score = int(@hits*100/@words/@words_in_row);
877 push @results,[$name,$value,$score,$type,$id];
880 @results = sort {$b->[2]<=>$a->[2]} @results;
889 if ($name =~ /(?:^|[^\\])[*?]/) {
890 $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
891 $name =~ s/(^|[^\\])\*/$1%/g;
892 $name =~ s/(^|[^\\])\?/$1_/g;
896 $match = "= ? COLLATE NOCASE";
899 return ($match,$string);
902 sub _attributes_sql
{
904 my ($attributes,$join) = @_;
906 my ($wf,@bind_args) = $self->_make_attribute_where('a','al',$attributes);
907 my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
909 my $attribute_table = $self->_attribute_table;
910 my $attributelist_table = $self->_attributelist_table;
912 my $from = "$attribute_table AS a" . ($self->_has_fts
914 : " INDEXED BY index_attribute_id") . ", $attributelist_table AS al";
915 my $a_al_join = $self->_has_fts ?
'a.attribute_id MATCH al.id' : 'a.attribute_id=al.id';
923 my $group = $group_by;
925 my @args = (@bind_args,@group_args);
926 return ($from,$where,$group,@args);
929 # overridden because of case-sensitivity of matches
932 my ($types,$type_table) = @_;
933 my ($primary_tag,$source_tag);
935 my @types = ref $types eq 'ARRAY' ? @
$types : $types;
937 my $typelist = $self->_typelist_table;
938 my $from = "$typelist AS tl";
942 for my $type (@types) {
944 if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
945 $primary_tag = $type->method;
946 $source_tag = $type->source;
948 ($primary_tag,$source_tag) = split ':',$type,2;
951 if (length $source_tag) {
952 push @matches,"tl.tag=? COLLATE NOCASE";
953 push @args,"$primary_tag:$source_tag";
955 push @matches,"tl.tag LIKE ?";
956 push @args,"$primary_tag:%";
959 my $matches = join ' OR ',@matches;
962 tl.id=$type_table.typeid
966 return ($from,$where,'',@args);
971 $self->dbh->do("ANALYZE $_") foreach $self->index_tables;
975 # Replace Bio::SeqFeatureI into database.
980 my $index_flag = shift || undef;
982 # ?? shouldn't need to do this
983 # $self->_load_class($object);
984 my $id = $object->primary_id;
985 my $features = $self->_feature_table;
987 my $sth = $self->_prepare(<<END);
988 REPLACE INTO $features (id,object,"indexed",strand,typeid) VALUES (?,?,?,?,?)
991 my ($seqid,$start,$end,$strand,$bin) = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x6
;
993 my $primary_tag = $object->primary_tag;
994 my $source_tag = $object->source_tag || '';
995 $primary_tag .= ":$source_tag";
996 my $typeid = $self->_typeid($primary_tag,1);
998 my $frozen = $self->no_blobs() ?
0 : $self->freeze($object);
1000 $sth->bind_param(1, $id);
1001 $sth->bind_param(2, $frozen, {TYPE
=> SQL_BLOB
});
1002 $sth->bind_param(3, $index_flag||0);
1003 $sth->bind_param(4, $strand);
1004 $sth->bind_param(5, $typeid);
1006 $sth->execute() or $self->throw($sth->errstr);
1008 my $dbh = $self->dbh;
1009 $object->primary_id($dbh->func('last_insert_rowid')) unless defined $id;
1011 $self->flag_for_indexing($dbh->func('last_insert_rowid')) if $self->{bulk_update_in_progress
};
1014 # doesn't work with this schema, since we have to update name and attribute
1015 # tables which need object ids, which we can only know by replacing feats in
1016 # the feature table one by one
1019 my $index_flag = shift || undef;
1022 my $features = $self->_feature_table;
1025 foreach my $object (@objects) {
1026 my $id = $object->primary_id;
1027 my (undef,undef,undef,$strand) = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x4
;
1028 my $primary_tag = $object->primary_tag;
1029 my $source_tag = $object->source_tag || '';
1030 $primary_tag .= ":$source_tag";
1031 my $typeid = $self->_typeid($primary_tag,1);
1033 push(@insert_values, ($id,0,$index_flag||0,$strand,$typeid));
1038 push(@value_blocks, '(?,?,?,?,?)');
1040 my $value_blocks = join(',', @value_blocks);
1041 my $sql = qq{REPLACE INTO
$features (id
,object
,"indexed",strand
,typeid
) VALUES
$value_blocks};
1043 my $sth = $self->_prepare($sql);
1044 $sth->execute(@insert_values) or $self->throw($sth->errstr);
1047 sub _get_location_and_bin
{
1050 my $seqid = $self->_locationid($obj->seq_id||'');
1051 my $start = $obj->start;
1052 my $end = $obj->end;
1053 my $strand = $obj->strand;
1054 return ($seqid,$start,$end,$strand,$self->calculate_bin($start,$end));
1060 # Insert one Bio::SeqFeatureI into database. primary_id must be undef
1065 my $index_flag = shift || 0;
1067 $self->_load_class($object);
1068 defined $object->primary_id and $self->throw("$object already has a primary id");
1070 my $features = $self->_feature_table;
1071 my $sth = $self->_prepare(<<END);
1072 INSERT INTO $features (id,object,"indexed") VALUES (?,?,?)
1074 $sth->execute(undef,$self->freeze($object),$index_flag) or $self->throw($sth->errstr);
1075 my $dbh = $self->dbh;
1076 $object->primary_id($dbh->func('last_insert_rowid'));
1077 $self->flag_for_indexing($dbh->func('last_insert_rowid')) if $self->{bulk_update_in_progress
};
1080 =head2 toplevel_types
1082 Title : toplevel_types
1083 Usage : @type_list = $db->toplevel_types
1084 Function: Get the toplevel types in the database
1085 Returns : array of Bio::DB::GFF::Typename objects
1089 This is similar to types() but only returns the types of
1090 INDEXED (toplevel) features.
1094 sub toplevel_types
{
1096 eval "require Bio::DB::GFF::Typename"
1097 unless Bio
::DB
::GFF
::Typename
->can('new');
1098 my $typelist_table = $self->_typelist_table;
1099 my $feature_table = $self->_feature_table;
1101 SELECT distinct(tag) from $typelist_table as tl,$feature_table as f
1102 WHERE tl.id=f.typeid
1106 $self->_print_query($sql) if DEBUG
|| $self->debug;
1107 my $sth = $self->_prepare($sql);
1108 $sth->execute() or $self->throw($sth->errstr);
1111 while (my($tag) = $sth->fetchrow_array) {
1112 push @results,Bio
::DB
::GFF
::Typename
->new($tag);
1120 my ($table,$namefield,$name,$add_if_missing) = @_;
1121 my $qualified_table = $self->_qualify($table);
1122 my $sth = $self->_prepare(<<END);
1123 SELECT id FROM $qualified_table WHERE $namefield=? COLLATE NOCASE
1125 $sth->execute($name) or die $sth->errstr;
1126 my ($id) = $sth->fetchrow_array;
1128 return $id if defined $id;
1129 return unless $add_if_missing;
1131 $sth = $self->_prepare(<<END);
1132 INSERT INTO $qualified_table ($namefield) VALUES (?)
1134 $sth->execute($name) or die $sth->errstr;
1135 my $dbh = $self->dbh;
1136 return $dbh->func('last_insert_rowid');
1139 # special-purpose store for bulk loading - write to a file rather than to the db
1143 my $indexed = shift;
1146 my $store_fh = $self->dump_filehandle('feature');
1147 my $dbh = $self->dbh;
1149 my $autoindex = $self->autoindex;
1152 my $id = $self->next_id;
1153 my ($seqid,$start,$end,$strand) = $indexed ?
$self->_get_location_and_bin($obj) : (undef)x4
;
1154 my $primary_tag = $obj->primary_tag;
1155 my $source_tag = $obj->source_tag || '';
1156 $primary_tag .= ":$source_tag";
1157 my $typeid = $self->_typeid($primary_tag,1);
1159 # Encode BLOB in hex so we can more easily import it into SQLite
1161 join("\t",$id,$typeid,$strand,$indexed,
1162 unpack('H*', $self->freeze($obj))),"\n";
1163 $obj->primary_id($id);
1164 $self->_update_indexes($obj) if $indexed && $autoindex;
1168 # remember whether we are have ever stored a non-indexed feature
1169 unless ($indexed or $self->{indexed_flag
}++) {
1170 $self->subfeatures_are_indexed(0);
1175 sub _dump_update_name_index
{
1178 my $fh = $self->dump_filehandle('name');
1179 my $dbh = $self->dbh;
1180 my ($names,$aliases) = $self->feature_names($obj);
1181 # unlike DBI::mysql, don't quote, as quotes will be quoted when loaded
1182 print $fh join("\t",$id,$_,1),"\n" foreach @
$names;
1183 print $fh join("\t",$id,$_,0),"\n" foreach @
$aliases;
1186 sub _update_name_index
{
1189 my $name = $self->_name_table;
1190 my $primary_id = $obj->primary_id;
1192 $self->_delete_index($name,$id);
1193 my ($names,$aliases) = $self->feature_names($obj);
1195 my $sth = $self->_prepare("INSERT INTO $name (id,name,display_name) VALUES (?,?,?)");
1197 $sth->execute($id,$_,1) or $self->throw($sth->errstr) foreach @
$names;
1198 $sth->execute($id,$_,0) or $self->throw($sth->errstr) foreach @
$aliases;
1203 sub _dump_update_attribute_index
{
1206 my $fh = $self->dump_filehandle('attribute');
1207 my $dbh = $self->dbh;
1208 for my $tag ($obj->all_tags) {
1209 my $tagid = $self->_attributeid($tag);
1210 for my $value ($obj->each_tag_value($tag)) {
1211 # unlike DBI::mysql, don't quote, as quotes will be quoted when loaded
1212 print $fh join("\t",$id,$tagid,$value),"\n";
1217 sub _update_indexes
{
1220 defined (my $id = $obj->primary_id) or return;
1221 $self->SUPER::_update_indexes
($obj);
1223 if ($self->{bulk_update_in_progress
}) {
1224 $self->_dump_update_location_index($obj,$id);
1226 $self->_update_location_index($obj,$id);
1230 sub _update_location_index
{
1233 my ($seqid,$start,$end,$strand,$bin) = $self->_get_location_and_bin($obj);
1235 my $table = $self->_feature_index_table;
1236 $self->_delete_index($table,$id);
1240 if ($self->_has_spatial_index) {
1241 $sql = "INSERT INTO $table (id,seqid,bin,start,end) values (?,?,?,?,?)";
1242 @args = ($id,$seqid,$bin,$start,$end);
1244 $sql = "INSERT INTO $table (id,seqid,bin,start,end) values (?,?,?,?,?)";
1245 @args = ($id,$seqid,$bin,$start,$end);
1248 my $sth = $self->_prepare($sql);
1249 $sth->execute(@args);
1253 sub _dump_update_location_index
{
1256 my $table = $self->_feature_index_table;
1257 my $fh = $self->dump_filehandle($table);
1258 my $dbh = $self->dbh;
1259 my ($seqid,$start,$end,$strand,$bin) = $self->_get_location_and_bin($obj);
1260 my @args = $self->_has_spatial_index ?
($id,$seqid,$bin,$start,$end)
1261 : ($id,$seqid,$bin,$start,$end);
1262 print $fh join("\t",@args),"\n";
1267 # Remove filehandles, so temporal files can be properly deleted
1268 if (%DBI::installed_drh
) {
1269 DBI
->disconnect_all;
1270 %DBI::installed_drh
= ();
1279 Nathan Weeks - Nathan.Weeks@ars.usda.gov
1281 Copyright (c) 2009 Nathan Weeks
1283 Modified 2010 to support cumulative statistics by Lincoln Stein
1284 <lincoln.stein@gmail.com>.
1286 This library is free software; you can redistribute it and/or modify
1287 it under the same terms as Perl itself. See the Bioperl license for