4 Bio::DB::GFF::Adaptor::dbi -- Database adaptor for DBI (SQL) databases
12 This is the base class for DBI-based adaptors. It does everything
13 except generating the text of the queries to be used. See the section
14 QUERIES TO IMPLEMENT for the list of methods that must be implemented.
18 package Bio
::DB
::GFF
::Adaptor
::dbi
;
20 # base class for dbi-based implementations
24 use Bio
::DB
::GFF
::Util
::Rearrange
; # for rearrange()
25 use Bio
::DB
::GFF
::Util
::Binning
;
26 use Bio
::DB
::GFF
::Adaptor
::dbi
::iterator
;
27 use Bio
::DB
::GFF
::Adaptor
::dbi
::caching_handle
;
29 use base
qw(Bio::DB::GFF);
31 # constants for choosing
33 use constant MAX_SEGMENT
=> 1_000_000_000
; # the largest a segment can get
35 # this is the largest that any reference sequence can be (100 megabases)
36 use constant MAX_BIN
=> 1_000_000_000
;
38 # this is the smallest bin (1 K)
39 use constant MIN_BIN
=> 1000;
41 # size of range over which it is faster to force the database to use the range for indexing
42 use constant STRAIGHT_JOIN_LIMIT
=> 200_000
;
44 # this is the size to which DNA should be shredded
45 use constant DNA_CHUNK_SIZE
=> 2000;
47 # size of summary bins for interval coverage statistics
48 use constant SUMMARY_BIN_SIZE
=> 1000;
50 # for debugging fbin optimization
51 use constant EPSILON
=> 1e-7; # set to zero if you trust mysql's floating point comparisons
52 use constant OPTIMIZE
=> 1; # set to zero to turn off optimization completely
54 ##############################################################################
60 Usage : $db = Bio::DB::GFF->new(@args)
61 Function: create a new adaptor
62 Returns : a Bio::DB::GFF object
66 This is the constructor for the adaptor. It is called automatically
67 by Bio::DB::GFF-E<gt>new. In addition to arguments that are common among
68 all adaptors, the following class-specific arguments are recgonized:
73 -dsn the DBI data source, e.g. 'dbi:mysql:ens0040'
75 -user username for authentication
77 -pass the password for authentication
81 # Create a new Bio::DB::GFF::Adaptor::dbi object
84 my ($features_db,$username,$auth,$other) = rearrange
([
85 [qw(FEATUREDB DB DSN)],
87 [qw(PASSWORD PASSWD PASS)],
90 $features_db || $class->throw("new(): Provide a data source or DBI database");
92 if (!ref($features_db)) {
93 my $dsn = $features_db;
95 push @args,$username if defined $username;
96 push @args,$auth if defined $auth;
97 $features_db = Bio
::DB
::GFF
::Adaptor
::dbi
::caching_handle
->new($dsn,@args)
98 || $class->throw("new(): Failed to connect to $dsn: "
99 . Bio
::DB
::GFF
::Adaptor
::dbi
::caching_handle
->errstr);
101 $features_db->isa('DBI::db')
102 || $class->throw("new(): $features_db is not a DBI handle");
107 features_db
=> $features_db
113 $self->features_db->debug(@_);
114 $self->SUPER::debug
(@_);
120 Usage : $dbh = $db->features_db
121 Function: get database handle
122 Returns : a DBI handle
126 Note: what is returned is not really a DBI::db handle, but a
127 subclass of one. This means that you cannot manipulate the
128 handle's attributes directly. Instead call the attribute
131 my $dbh = $db->features_db;
132 $dbh->attribute(AutoCommit=>0);
136 sub features_db
{ shift->{features_db
} }
137 sub dbh
{ shift->{features_db
} }
142 Usage : $string = $db->get_dna($name,$start,$stop,$class)
143 Function: get DNA string
145 Args : name, class, start and stop of desired segment
148 This method performs the low-level fetch of a DNA substring given its
149 name, class and the desired range. It is actually a front end to the
150 abstract method make_dna_query(), which it calls after some argument
151 consistency checking.
157 my ($ref,$start,$stop,$class) = @_;
159 my ($offset_start,$offset_stop);
161 my $has_start = defined $start;
162 my $has_stop = defined $stop;
165 if ($has_start && $has_stop && $start > $stop) {
167 ($start,$stop) = ($stop,$start);
170 # turn start and stop into 0-based offsets
171 my $cs = $self->dna_chunk_size;
172 $start -= 1; $stop -= 1;
173 $offset_start = int($start/$cs)*$cs;
174 $offset_stop = int($stop/$cs)*$cs;
177 # special case, get it all
178 if (!($has_start || $has_stop)) {
179 $sth = $self->dbh->do_query('select fdna,foffset from fdna where fref=? order by foffset',$ref);
183 $sth = $self->dbh->do_query('select fdna,foffset from fdna where fref=? and foffset>=? order by foffset',
187 else { # both start and stop defined
188 $sth = $self->dbh->do_query('select fdna,foffset from fdna where fref=? and foffset>=? and foffset<=? order by foffset',
189 $ref,$offset_start,$offset_stop);
193 while (my($frag,$offset) = $sth->fetchrow_array) {
194 substr($frag,0,$start-$offset) = '' if $has_start && $start > $offset;
197 substr($dna,$stop-$start+1) = '' if $has_stop && $stop-$start+1 < length($dna);
200 $dna =~ tr/gatcGATC/ctagCTAG/;
210 Title : get_abscoords
211 Usage : ($refseq,$refclass,$start,$stop,$strand) = $db->get_abscoords($name,$class)
212 Function: get absolute coordinates for landmark
213 Returns : an array ref -- see below
214 Args : name and class of desired landmark
217 This method performs the low-level resolution of a landmark into a
218 reference sequence and position.
220 The result is an array ref, each element of which is a five-element
221 list containing reference sequence name, class, start, stop and strand.
227 my ($name,$class,$refseq) = @_;
229 my $sth = $self->make_abscoord_query($name,$class,$refseq);
232 while (my @row = $sth->fetchrow_array) {
238 #$self->error("$name not found in database");
239 my $sth2 = $self->make_aliasabscoord_query($name,$class);
241 while (my @row2 = $sth2->fetchrow_array) {
247 $self->error("$name not found in database");
258 Usage : $db->get_features($search,$options,$callback)
259 Function: retrieve features from the database
260 Returns : number of features retrieved
264 This is the low-level method that is called to retrieve GFF lines from
265 the database. It is responsible for retrieving features that satisfy
266 range and feature type criteria, and passing the GFF fields to a
269 See the manual page for Bio::DB::GFF for the interpretation of the
270 arguments and how the information retrieved by get_features is passed
271 to the callback for processing.
273 Internally, get_features() is a front end for range_query(). The
274 latter method constructs the query and executes it. get_features()
275 calls fetchrow_array() to recover the fields and passes them to the
280 # Given sequence name, range, and optional filter, retrieve list of
281 # all features. Passes features through callback.
284 my ($search,$options,$callback) = @_;
285 $callback || $self->throw('must provide a callback argument');
287 my $sth = $self->range_query(@
{$search}{qw(rangetype
297 BINSIZE)}) or return;
300 while (my @row = $sth->fetchrow_array) {
312 Function: return list of landmark classes in database
313 Returns : a list of classes
317 This routine returns the list of reference classes known to the
318 database, or empty if classes are not used by the database. Classes
319 are distinct from types, being essentially qualifiers on the reference
322 NOTE: In the current mysql-based schema, this query takes a while to
323 run due to the classes not being normalized.
329 my ($query,@args) = $self->make_classes_query or return;
330 my $sth = $self->dbh->do_query($query,@args);
332 while (my ($c) = $sth->fetchrow_array) {
338 =head2 make_classes_query
340 Title : make_classes_query
341 Usage : ($query,@args) = $db->make_classes_query
342 Function: return query fragment for generating list of reference classes
343 Returns : a query and args
349 sub make_classes_query
{
354 =head2 _feature_by_name
356 Title : _feature_by_name
357 Usage : $db->get_features_by_name($name,$class,$callback)
358 Function: get a list of features by name and class
359 Returns : count of number of features retrieved
360 Args : name of feature, class of feature, and a callback
363 This method is used internally. The callback arguments are those used
364 by make_feature(). Internally, it invokes the following abstract procedures:
366 make_features_select_part
367 make_features_from_part
368 make_features_by_name_where_part
369 make_features_by_alias_where_part (for aliases)
370 make_features_join_part
374 sub _feature_by_name
{
376 my ($class,$name,$location,$callback) = @_;
377 $callback || $self->throw('must provide a callback argument');
379 my $select = $self->make_features_select_part;
380 my $from = $self->make_features_from_part(undef,{sparse_groups
=>1});
381 my ($where,@args) = $self->make_features_by_name_where_part($class,$name);
382 my $join = $self->make_features_join_part;
383 my $range = $self->make_features_by_range_where_part('overlaps',
384 {refseq
=>$location->[0],
386 start
=>$location->[1],
387 stop
=>$location->[2]}) if $location;
389 my $query1 = "SELECT $select FROM $from WHERE $where AND $join";
390 $query1 .= " AND $range" if $range;
393 $from = $self->make_features_from_part(undef,{attributes
=>1});
394 ($where,@args) = $self->make_features_by_alias_where_part($class,$name); # potential bug - @args1==@args2?
396 my $query2 = "SELECT $select FROM $from WHERE $where AND $join";
397 $query2 .= " AND $range" if $range;
401 for my $query ($query1,$query2) {
402 my $sth = $self->dbh->do_query($query,@args);
403 while (my @row = $sth->fetchrow_array) {
413 =head2 _feature_by_id
415 Title : _feature_by_id
416 Usage : $db->_feature_by_id($ids,$type,$callback)
417 Function: get a list of features by ID
418 Returns : count of number of features retrieved
419 Args : arrayref containing list of IDs to fetch and a callback
422 This method is used internally. The $type selector is one of
423 "feature" or "group". The callback arguments are those used by
424 make_feature(). Internally, it invokes the following abstract
427 make_features_select_part
428 make_features_from_part
429 make_features_by_id_where_part
430 make_features_join_part
436 my ($ids,$type,$callback) = @_;
437 $callback || $self->throw('must provide a callback argument');
439 my $select = $self->make_features_select_part;
440 my $from = $self->make_features_from_part;
441 my ($where,@args) = $type eq 'feature' ?
$self->make_features_by_id_where_part($ids)
442 : $self->make_features_by_gid_where_part($ids);
443 my $join = $self->make_features_join_part;
444 my $query = "SELECT $select FROM $from WHERE $where AND $join";
445 my $sth = $self->dbh->do_query($query,@args);
448 while (my @row = $sth->fetchrow_array) {
456 sub _feature_by_attribute
{
458 my ($attributes,$callback) = @_;
459 $callback || $self->throw('must provide a callback argument');
461 my $select = $self->make_features_select_part;
462 my $from = $self->make_features_from_part(undef,{attributes
=>$attributes});
463 my ($where,@args) = $self->make_features_by_range_where_part('',{attributes
=>$attributes});
464 my $join = $self->make_features_join_part({attributes
=>$attributes});
465 my $query = "SELECT $select FROM $from WHERE $where AND $join";
466 my $sth = $self->dbh->do_query($query,@args);
469 while (my @row = $sth->fetchrow_array) {
480 Usage : $db->get_types($refseq,$refclass,$start,$stop,$count)
481 Function: get list of types
482 Returns : a list of Bio::DB::GFF::Typename objects
486 This method is responsible for fetching the list of feature type names
487 from the database. The query may be limited to a particular range, in
488 which case the range is indicated by a landmark sequence name and
489 class and its subrange, if any. These arguments may be undef if it is
490 desired to retrieve all feature types in the database (which may be a
491 slow operation in some implementations).
493 If the $count flag is false, the method returns a simple list of
494 vBio::DB::GFF::Typename objects. If $count is true, the method returns
495 a list of $name=E<gt>$count pairs, where $count indicates the number of
496 times this feature occurs in the range.
498 Internally, this method calls upon the following functions to generate
499 the SQL and its bind variables:
501 ($q1,@args) = make_types_select_part(@args);
502 ($q2,@args) = make_types_from_part(@args);
503 ($q3,@args) = make_types_where_part(@args);
504 ($q4,@args) = make_types_join_part(@args);
505 ($q5,@args) = make_types_group_part(@args);
507 The components are then combined as follows:
509 $query = "SELECT $q1 FROM $q2 WHERE $q3 AND $q4 GROUP BY $q5";
511 If any of the query fragments contain the ? bind variable, then the
512 same number of bind arguments must be provided in @args. The
513 fragment-generating functions are described below.
519 my ($srcseq,$class,$start,$stop,$want_count,$typelist) = @_;
520 my $straight = $self->do_straight_join($srcseq,$start,$stop,[]) ?
'straight_join' : '';
521 my ($select,@args1) = $self->make_types_select_part($srcseq,$start,$stop,$want_count,$typelist);
522 my ($from,@args2) = $self->make_types_from_part($srcseq,$start,$stop,$want_count,$typelist);
523 my ($join,@args3) = $self->make_types_join_part($srcseq,$start,$stop,$want_count,$typelist);
524 my ($where,@args4) = $self->make_types_where_part($srcseq,$start,$stop,$want_count,$typelist);
525 my ($group,@args5) = $self->make_types_group_part($srcseq,$start,$stop,$want_count,$typelist);
527 my $query = "SELECT $straight $select FROM $from WHERE $join AND $where";
528 $query .= " GROUP BY $group" if $group;
529 my @args = (@args1,@args2,@args3,@args4,@args5);
530 my $sth = $self->dbh->do_query($query,@args) or return;
533 while (my ($method,$source,$count) = $sth->fetchrow_array) {
534 my $type = Bio
::DB
::GFF
::Typename
->new($method,$source);
535 $result{$type} = $count;
538 return $want_count ?
%result : values %obj;
544 Usage : $db->range_query($range_type,$refseq,$refclass,$start,$stop,$types,$order_by_group,$attributes,$binsize)
545 Function: create statement handle for range/overlap queries
546 Returns : a DBI statement handle
550 This method constructs the statement handle for this module's central
551 query: given a range and/or a list of feature types, fetch their GFF
554 The positional arguments are as follows:
558 $isrange A flag indicating that this is a range.
559 query. Otherwise an overlap query is
562 $refseq The reference sequence name (undef if no range).
564 $refclass The reference sequence class (undef if no range).
566 $start The start of the range (undef if none).
568 $stop The stop of the range (undef if none).
570 $types Array ref containing zero or feature types in the
571 format [method,source].
573 $order_by_group A flag indicating that statement handler should group
574 the features by group id (handy for iterative fetches)
576 $attributes A hash containing select attributes.
578 $binsize A bin size for generating tables of feature density.
580 If successful, this method returns a statement handle. The handle is
581 expected to return the fields described for get_features().
583 Internally, range_query() makes calls to the following methods,
584 each of which is expected to be overridden in subclasses:
586 $select = $self->make_features_select_part;
587 $from = $self->make_features_from_part;
588 $join = $self->make_features_join_part;
589 ($where,@args) = $self->make_features_by_range_where_part($isrange,$srcseq,$class,
590 $start,$stop,$types,$class);
592 The query that is constructed looks like this:
594 SELECT $select FROM $from WHERE $join AND $where
596 The arguments that are returned from make_features_by_range_where_part() are
597 passed to the statement handler's execute() method.
599 range_query() also calls a do_straight_join() method, described
600 below. If this method returns true, then the keyword "straight_join"
601 is inserted right after SELECT.
607 my($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes,$bin) = @_;
609 my $dbh = $self->features_db;
611 # NOTE: straight_join is necessary in some database to force the right index to be used.
612 my %a = (refseq
=>$refseq,class=>$class,start
=>$start,stop
=>$stop,types
=>$types,attributes
=>$attributes,bin_width
=>$bin);
613 my $straight = $self->do_straight_join(\
%a) ?
'straight_join' : '';
614 my $select = $self->make_features_select_part(\
%a);
615 my $from = $self->make_features_from_part($sparse,\
%a);
616 my $join = $self->make_features_join_part(\
%a);
617 my ($where,@args) = $self->make_features_by_range_where_part($rangetype,\
%a);
618 my ($group_by,@more_args) = $self->make_features_group_by_part(\
%a);
619 my $order_by = $self->make_features_order_by_part(\
%a) if $order_by_group;
621 my $query = "SELECT $straight $select FROM $from WHERE $join";
622 $query .= " AND $where" if $where;
624 $query .= " GROUP BY $group_by";
625 push @args,@more_args;
627 $query .= " ORDER BY $order_by" if $order_by;
629 my $sth = $self->dbh->do_query($query,@args);
633 =head2 make_features_by_range_where_part
635 Title : make_features_by_range_where_part
636 Usage : ($string,@args) =
637 $db->make_features_select_part($isrange,$refseq,$class,$start,$stop,$types)
638 Function: make where part of the features query
639 Returns : the list ($query,@bind_args)
643 This method creates the part of the features query that immediately
644 follows the WHERE keyword and is ANDed with the string returned by
645 make_features_join_part().
647 The six positional arguments are a flag indicating whether to perform
648 a range search or an overlap search, the reference sequence, class,
649 start and stop, all of which define an optional range to search in,
650 and an array reference containing a list [$method,$souce] pairs.
652 The method result is a multi-element list containing the query string
653 and the list of runtime arguments to bind to it with the execute()
656 This method's job is to clean up arguments and perform consistency
657 checking. The real work is done by the following abstract methods:
661 refseq_query() Return the query string needed to match the reference
664 range_query() Return the query string needed to find all features contained
667 overlap_query() Return the query string needed to find all features that overlap
670 See Bio::DB::Adaptor::dbi::mysql for an example of how this works.
676 sub make_features_by_range_where_part
{
678 my ($rangetype,$options) = @_;
680 my ($refseq,$class,$start,$stop,$types,$attributes) =
681 @
{$options}{qw(refseq class start stop types attributes)};
686 my ($q,@a) = $self->refseq_query($refseq,$class);
691 if (defined $start or defined $stop) {
692 $start = 0 unless defined($start);
693 $stop = MAX_SEGMENT
unless defined($stop);
695 my ($range_query,@range_args) =
696 $rangetype eq 'overlaps' ?
$self->overlap_query($start,$stop)
697 : $rangetype eq 'contains' ?
$self->contains_query($start,$stop)
698 : $rangetype eq 'contained_in' ?
$self->contained_in_query($start,$stop)
701 push @query,$range_query;
702 push @args,@range_args;
705 if (defined $types && @
$types) {
706 my ($type_query,@type_args) = $self->types_query($types);
707 push @query,$type_query;
708 push @args,@type_args;
712 my ($attribute_query,@attribute_args) = $self->make_features_by_attribute_where_part($attributes);
713 push @query,"($attribute_query)";
714 push @args,@attribute_args;
717 my $query = join "\n\tAND ",@query;
718 return wantarray ?
($query,@args) : $self->dbh->dbi_quote($query,@args);
721 =head2 do_straight_join
723 Title : do_straight_join
724 Usage : $boolean = $db->do_straight_join($refseq,$class,$start,$stop,$types)
725 Function: optimization flag
727 Args : see range_query()
730 This subroutine, called by range_query() returns a boolean flag.
731 If true, range_query() will perform a straight join, which can be
732 used to optimize certain SQL queries. The four arguments correspond
733 to similarly-named arguments passed to range_query().
737 sub do_straight_join
{ 0 } # false by default
742 Usage : $string = $db->string_match($field,$value)
743 Function: create a SQL fragment for performing exact or regexp string matching
744 Returns : query string
745 Args : the table field and match value
748 This method examines the passed value for meta characters. If so it
749 produces a SQL fragment that performs a regular expression match.
750 Otherwise, it produces a fragment that performs an exact string match.
752 This method is not used in the module, but is available for use by
759 my ($field,$value) = @_;
760 return qq($field = ?
) if $value =~ /^[!@%&a-zA-Z0-9_\'\" ~-]+$/;
761 return qq($field REGEXP ?
);
767 Usage : $string = $db->exact_match($field,$value)
768 Function: create a SQL fragment for performing exact string matching
769 Returns : query string
770 Args : the table field and match value
773 This method produces the SQL fragment for matching a field name to a
774 constant string value.
780 my ($field,$value) = @_;
781 return qq($field = ?
);
787 Usage : @search_results = $db->search_notes("full text search string",$limit)
788 Function: Search the notes for a text string, using mysql full-text search
789 Returns : array of results
790 Args : full text search string, and an optional row limit
793 This is a mysql-specific method. Given a search string, it performs a
794 full-text search of the notes table and returns an array of results.
795 Each row of the returned array is a arrayref containing the following fields:
797 column 1 A Bio::DB::GFF::Featname object, suitable for passing to segment()
798 column 2 The text of the note
799 column 3 A relevance score.
800 column 4 A Bio::DB::GFF::Typename object
806 my ($search_string,$limit) = @_;
808 $search_string =~ tr/*?//d;
810 my @words = $search_string =~ /(\w+)/g;
811 my $regex = join '|',@words;
812 my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
813 my $search = join(' OR ',@searches);
816 SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
817 FROM fgroup,fattribute_to_feature,fdata,ftype
818 WHERE fgroup.gid=fdata.gid
819 AND fdata.fid=fattribute_to_feature.fid
820 AND fdata.ftypeid=ftype.ftypeid
825 my $sth = $self->dbh->do_query($query);
827 while (my ($class,$name,$note,$method,$source) = $sth->fetchrow_array) {
828 next unless $class && $name; # sorry, ignore NULL objects
829 my @matches = $note =~ /($regex)/g;
830 my $relevance = 10*@matches;
831 my $featname = Bio
::DB
::GFF
::Featname
->new($class=>$name);
832 my $type = Bio
::DB
::GFF
::Typename
->new($method,$source);
833 push @results,[$featname,$note,$relevance,$type];
834 last if $limit && @results >= $limit;
843 Usage : $value = $db->meta($name [,$newval])
844 Function: get or set a meta variable
846 Args : meta variable name and optionally value
849 Get or set a named metavariable for the database. Metavariables can
850 be used for database-specific settings. This method calls two
851 class-specific methods which must be implemented:
853 make_meta_get_query() Returns a sql fragment which given a meta
854 parameter name, returns its value. One bind
856 make_meta_set_query() Returns a sql fragment which takes two bind
857 arguments, the parameter name and its value
860 Don't make changes unless you know what you're doing! It will affect the
867 my $param_name = uc shift;
872 my $sql = $self->make_meta_set_query() or return;
873 my $sth = $self->dbh->prepare_delayed($sql)
874 or $self->error("Can't prepare $sql: ",$self->dbh->errstr), return;
875 $sth->execute($param_name,$value)
876 or $self->error("Can't execute $sql: ",$self->dbh->errstr), return;
878 return $self->{meta
}{$param_name} = $value;
881 elsif (exists $self->{meta
}{$param_name}) {
882 return $self->{meta
}{$param_name};
886 undef $self->{meta
}{$param_name}; # so that we don't check again
887 my $sql = $self->make_meta_get_query() or return;
888 my $sth = $self->dbh->prepare_delayed($sql)
889 or $self->error("Can't prepare $sql: ",$self->dbh->errstr), return;
890 $sth->execute($param_name)
891 or $self->error("Can't execute $sql: ",$sth->errstr),return;
892 my ($value) = $sth->fetchrow_array;
894 return $self->{meta
}{$param_name} = $value;
899 =head2 make_meta_get_query
901 Title : make_meta_get_query
902 Usage : $sql = $db->make_meta_get_query
903 Function: return SQL fragment for getting a meta parameter
904 Returns : SQL fragment
908 By default this does nothing; meta parameters are not stored or
913 sub make_meta_get_query
{
914 return 'SELECT fvalue FROM fmeta WHERE fname=?';
920 $self->meta('chunk_size') || DNA_CHUNK_SIZE
;
923 =head2 make_meta_set_query
925 Title : make_meta_set_query
926 Usage : $sql = $db->make_meta_set_query
927 Function: return SQL fragment for setting a meta parameter
928 Returns : SQL fragment
932 By default this does nothing; meta parameters are not stored or
937 sub make_meta_set_query
{
941 =head2 default_meta_values
943 Title : default_meta_values
944 Usage : %values = $db->default_meta_values
945 Function: empty the database
946 Returns : a list of tag=>value pairs
950 This method returns a list of tag=E<gt>value pairs that contain default
951 meta information about the database. It is invoked by initialize() to
952 write out the default meta values. The base class version returns an
955 For things to work properly, meta value names must be UPPERCASE.
959 sub default_meta_values
{
961 my @values = $self->SUPER::default_meta_values
;
966 straight_join_limit
=> STRAIGHT_JOIN_LIMIT
,
967 chunk_size
=> DNA_CHUNK_SIZE
,
973 return $self->meta('min_bin') || MIN_BIN
;
977 return $self->meta('max_bin') || MAX_BIN
;
980 sub straight_join_limit
{
982 return $self->meta('straight_join_limit') || STRAIGHT_JOIN_LIMIT
;
985 =head2 get_features_iterator
987 Title : get_features_iterator
988 Usage : $iterator = $db->get_features_iterator($search,$options,$callback)
989 Function: create an iterator on a features() query
990 Returns : A Bio::DB::GFF::Adaptor::dbi::iterator object
991 Args : see get_features()
994 This method is similar to get_features(), except that it returns an
995 iterator across the query. See
996 L<Bio::DB::GFF::Adaptor::dbi::iterator>.
1000 sub get_features_iterator
{
1002 my ($search,$options,$callback) = @_;
1003 $callback || $self->throw('must provide a callback argument');
1004 my $sth = $self->range_query(@
{$search}{qw(rangetype
1014 BINSIZE)}) or return;
1015 return Bio
::DB
::GFF
::Adaptor
::dbi
::iterator
->new($sth,$callback);
1018 ########################## loading and initialization #####################
1020 =head2 do_initialize
1022 Title : do_initialize
1023 Usage : $success = $db->do_initialize($drop_all)
1024 Function: initialize the database
1025 Returns : a boolean indicating the success of the operation
1026 Args : a boolean indicating whether to delete existing data
1029 This method will load the schema into the database. If $drop_all is
1030 true, then any existing data in the tables known to the schema will be
1033 Internally, this method calls schema() to get the schema data.
1037 # Create the schema from scratch.
1038 # You will need create privileges for this.
1040 #shift->throw("do_initialize(): must be implemented by subclass");
1043 $self->drop_all if $erase;
1045 my $dbh = $self->features_db;
1046 my $schema = $self->schema;
1047 foreach my $table_name ($self->tables) {
1048 my $create_table_stmt = $schema->{$table_name}{table
} ;
1049 $dbh->do($create_table_stmt) || warn $dbh->errstr;
1050 $self->create_other_schema_objects(\
%{$schema->{$table_name}});
1059 Usage : $db->finish_load
1060 Function: called after load_gff_line()
1061 Returns : number of records loaded
1065 This method performs schema-specific cleanup after loading a set of
1066 GFF records. It finishes each of the statement handlers prepared by
1074 my $dbh = $self->features_db or return;
1075 $dbh->do('UNLOCK TABLES') if $self->lock_on_load;
1077 foreach (keys %{$self->{load_stuff
}{sth
}}) {
1078 $self->{load_stuff
}{sth
}{$_}->finish;
1081 my $counter = $self->{load_stuff
}{counter
};
1082 delete $self->{load_stuff
};
1087 =head2 create_other_schema_objects
1089 Title : create_other_schema_objects
1090 Usage : $self->create_other_schema_objects($table_name)
1091 Function: create other schema objects like : indexes, sequences, triggers
1098 sub create_other_schema_objects
{
1099 #shift->throw("create_other_schema_objects(): must be implemented by subclass");
1101 my $table_schema = shift ;
1102 my $dbh = $self->features_db;
1103 foreach my $object_type(keys %$table_schema){
1104 if ($object_type !~ /table/) {
1105 foreach my $object_name(keys %{$table_schema->{$object_type}}){
1106 my $create_object_stmt = $table_schema->{$object_type}{$object_name};
1107 $dbh->do($create_object_stmt) || warn $dbh->errstr;
1117 Usage : $db->drop_all
1118 Function: empty the database
1123 This method drops the tables known to this module. Internally it
1124 calls the abstract tables() method.
1128 # Drop all the GFF tables -- dangerous!
1130 #shift->throw("drop_all(): must be implemented by subclass");
1132 my $dbh = $self->features_db;
1133 my $schema = $self->schema;
1135 local $dbh->{PrintError
} = 0;
1136 foreach ($self->tables) {
1137 $dbh->do("drop table $_") || warn $dbh->errstr;
1139 #when dropping a table - the indexes and triggers are being dropped automatically
1140 # sequences needs to be dropped - if there are any (Oracle, PostgreSQL)
1141 if ($schema->{$_}{sequence
}){
1142 foreach my $sequence_name(keys %{$schema->{$_}{sequence
}}) {
1143 $dbh->do("drop sequence $sequence_name");
1147 #$self->drop_other_schema_objects($_);
1154 The clone() method should be used when you want to pass the
1155 Bio::DB::GFF object to a child process across a fork(). The child must
1156 call clone() before making any queries.
1158 This method does two things: (1) it sets the underlying database
1159 handle's InactiveDestroy parameter to 1, thereby preventing the
1160 database connection from being destroyed in the parent when the dbh's
1161 destructor is called; (2) it replaces the dbh with the result of
1162 dbh-E<gt>clone(), so that we now have an independent handle.
1168 $self->features_db->clone;
1172 =head1 QUERIES TO IMPLEMENT
1174 The following astract methods either return DBI statement handles or
1175 fragments of SQL. They must be implemented by subclasses of this
1176 module. See Bio::DB::GFF::Adaptor::dbi::mysql for examples.
1181 =head2 drop_other_schema_objects
1183 Title : drop_other_schema_objects
1184 Usage : $self->create_other_schema_objects($table_name)
1185 Function: create other schema objects like : indexes, sequences, triggers
1193 sub drop_other_schema_objects
{
1194 #shift->throw("drop_other_schema_objects(): must be implemented by subclass");
1198 =head2 make_features_select_part
1200 Title : make_features_select_part
1201 Usage : $string = $db->make_features_select_part()
1202 Function: make select part of the features query
1207 This abstract method creates the part of the features query that
1208 immediately follows the SELECT keyword.
1212 sub make_features_select_part
{
1213 shift->throw("make_features_select_part(): must be implemented by subclass");
1219 Usage : @tables = $db->tables
1220 Function: return list of tables that belong to this module
1221 Returns : list of tables
1225 This method lists the tables known to the module.
1229 # return list of tables that "belong" to us.
1231 my $schema = shift->schema;
1232 return keys %$schema;
1238 Usage : $schema = $db->schema
1239 Function: return the CREATE script for the schema
1244 This method returns an array ref containing the various CREATE
1245 statements needed to initialize the database tables. The keys are the
1246 table names, and the values are strings containing the appropriate
1252 shift->throw("The schema() method must be implemented by subclass");
1258 Usage : $db->DESTROY
1259 Function: disconnect database at destruct time
1264 This is the destructor for the class.
1270 $self->features_db->disconnect if defined $self->features_db;
1273 ################## query cache ##################
1276 #########################################
1277 ## Moved from mysql.pm and mysqlopt.pm ##
1278 #########################################
1280 =head2 make_features_by_name_where_part
1282 Title : make_features_by_name_where_part
1283 Usage : $db->make_features_by_name_where_part
1284 Function: create the SQL fragment needed to select a feature by its group name & class
1285 Returns : a SQL fragment and bind arguments
1291 sub make_features_by_name_where_part
{
1293 my ($class,$name) = @_;
1294 if ($name =~ /\*/) {
1298 return ("fgroup.gclass=? AND fgroup.gname LIKE ?",$class,$name);
1300 return ("fgroup.gclass=? AND fgroup.gname=?",$class,$name);
1304 sub make_features_by_alias_where_part
{
1306 my ($class,$name) = @_;
1307 if ($name =~ /\*/) {
1310 return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value LIKE ? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=fdata.fid AND ftype.ftypeid=fdata.ftypeid",$class,$name)
1312 return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value=? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=fdata.fid AND ftype.ftypeid=fdata.ftypeid",$class,$name);
1317 sub make_features_by_attribute_where_part
{
1319 my $attributes = shift;
1322 foreach (keys %$attributes) {
1323 push @sql,"(fattribute.fattribute_name=? AND fattribute_to_feature.fattribute_value=?)";
1324 push @args,($_,$attributes->{$_});
1326 return (join(' OR ',@sql),@args);
1329 =head2 make_features_by_id_where_part
1331 Title : make_features_by_id_where_part
1332 Usage : $db->make_features_by_id_where_part($ids)
1333 Function: create the SQL fragment needed to select a set of features by their ids
1334 Returns : a SQL fragment and bind arguments
1335 Args : arrayref of IDs
1340 sub make_features_by_id_where_part
{
1343 my $set = join ",",@
$ids;
1344 return ("fdata.fid IN ($set)");
1347 =head2 make_features_by_gid_where_part
1349 Title : make_features_by_id_where_part
1350 Usage : $db->make_features_by_gid_where_part($ids)
1351 Function: create the SQL fragment needed to select a set of features by their ids
1352 Returns : a SQL fragment and bind arguments
1353 Args : arrayref of IDs
1358 sub make_features_by_gid_where_part
{
1361 my $set = join ",",@
$ids;
1362 return ("fgroup.gid IN ($set)");
1366 =head2 make_features_from_part
1368 Title : make_features_from_part
1369 Usage : $string = $db->make_features_from_part()
1370 Function: make from part of the features query
1375 This method creates the part of the features query that immediately
1376 follows the FROM keyword.
1380 sub make_features_from_part
{
1383 my $options = shift || {};
1384 return $options->{attributes
} ?
"fdata,ftype,fgroup,fattribute,fattribute_to_feature\n"
1385 : "fdata,ftype,fgroup\n";
1389 =head2 make_features_join_part
1391 Title : make_features_join_part
1392 Usage : $string = $db->make_features_join_part()
1393 Function: make join part of the features query
1398 This method creates the part of the features query that immediately
1399 follows the WHERE keyword.
1403 sub make_features_join_part
{
1405 my $options = shift || {};
1406 return !$options->{attributes
} ?
<<END1 : <<END2;
1407 fgroup
.gid
= fdata
.gid
1408 AND ftype
.ftypeid
= fdata
.ftypeid
1410 fgroup
.gid
= fdata
.gid
1411 AND ftype
.ftypeid
= fdata
.ftypeid
1412 AND fattribute
.fattribute_id
=fattribute_to_feature
.fattribute_id
1413 AND fdata
.fid
=fattribute_to_feature
.fid
1417 =head2 make_features_order_by_part
1419 Title : make_features_order_by_part
1420 Usage : ($query,@args) = $db->make_features_order_by_part()
1421 Function: make the ORDER BY part of the features() query
1422 Returns : a SQL fragment and bind arguments, if any
1426 This method creates the part of the features query that immediately
1427 follows the ORDER BY part of the query issued by features() and
1432 sub make_features_order_by_part
{
1434 my $options = shift || {};
1435 return "fgroup.gname";
1438 =head2 make_features_group_by_part
1440 Title : make_features_group_by_part
1441 Usage : ($query,@args) = $db->make_features_group_by_part()
1442 Function: make the GROUP BY part of the features() query
1443 Returns : a SQL fragment and bind arguments, if any
1447 This method creates the part of the features query that immediately
1448 follows the GROUP BY part of the query issued by features() and
1453 sub make_features_group_by_part
{
1455 my $options = shift || {};
1456 if (my $att = $options->{attributes
}) {
1457 my $key_count = keys %$att;
1458 return unless $key_count > 1;
1459 return ("fdata.fid,fref,fstart,fstop,fsource,
1460 fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,
1461 ftarget_stop,fdata.gid
1462 HAVING count(fdata.fid) > ?",$key_count-1);
1464 elsif (my $b = $options->{bin_width
}) {
1465 return "fref,fstart,fdata.ftypeid";
1472 Title : refseq_query
1473 Usage : ($query,@args) = $db->refseq_query($name,$class)
1474 Function: create SQL fragment that selects the desired reference sequence
1475 Returns : a list containing the query and bind arguments
1476 Args : reference sequence name and class
1479 This method is called by make_features_by_range_where_part() to
1480 construct the part of the select WHERE section that selects a
1481 particular reference sequence. It returns a mult-element list in
1482 which the first element is the SQL fragment and subsequent elements
1488 my ($name,$class) = @_;
1489 return ('gff.refseq=? AND gff.refclass=?',
1493 The current schema does not distinguish among different classes of
1498 # IMPORTANT NOTE: THE MYSQL SCHEMA IGNORES THE SEQUENCE CLASS
1499 # THIS SHOULD BE FIXED
1502 my ($refseq,$refclass) = @_;
1503 my $query = "fdata.fref=?";
1504 return wantarray ?
($query,$refseq) : $self->dbh->dbi_quote($query,$refseq);
1510 Usage : @attributes = $db->attributes($id,$name)
1511 Function: get the attributes on a particular feature
1512 Returns : an array of string
1516 Some GFF version 2 files use the groups column to store a series of
1517 attribute/value pairs. In this interpretation of GFF, the first such
1518 pair is treated as the primary group for the feature; subsequent pairs
1519 are treated as attributes. Two attributes have special meaning:
1520 "Note" is for backward compatibility and is used for unstructured text
1521 remarks. "Alias" is considered as a synonym for the feature name.
1523 If no name is provided, then attributes() returns a flattened hash, of
1524 attribute=E<gt>value pairs. This lets you do:
1526 %attributes = $db->attributes($id);
1528 Normally, attributes() will be called by the feature:
1530 @notes = $feature->attributes('Note');
1539 my $from = 'fattribute_to_feature,fattribute';
1540 my $join = 'fattribute.fattribute_id=fattribute_to_feature.fattribute_id';
1541 my $where1 = 'fid=? AND fattribute_name=?';
1542 my $where2 = 'fid=?';
1543 $sth = defined($tag) ?
$self->dbh->do_query("SELECT fattribute_value FROM $from WHERE $where1 AND $join",$id,$tag)
1544 : $self->dbh->do_query("SELECT fattribute_name,fattribute_value FROM $from WHERE $where2 AND $join",$id);
1547 $sth = $self->dbh->do_query("SELECT fattribute_name FROM fattribute");
1550 while (my @stuff = $sth->fetchrow_array) {
1551 push @result,@stuff;
1559 =head2 overlap_query_nobin
1561 Title : overlap_query
1562 Usage : ($query,@args) = $db->overlap_query($start,$stop)
1563 Function: create SQL fragment that selects the desired features by range
1564 Returns : a list containing the query and bind arguments
1565 Args : the start and stop of a range, inclusive
1568 This method is called by make_features_byrange_where_part() to construct the
1569 part of the select WHERE section that selects a set of features that
1570 overlap a range. It returns a multi-element list in which the first
1571 element is the SQL fragment and subsequent elements are bind values.
1574 sub overlap_query_nobin {
1575 my ($start,$stop) = @_;
1576 return ('gff.stopE<gt>=? AND gff.startE<lt>=?',
1581 # find features that overlap a given range
1582 sub overlap_query_nobin
{
1584 my ($start,$stop) = @_;
1586 my $query = qq(fdata
.fstop
>=? AND fdata
.fstart
<=?
);
1587 return wantarray ?
($query,$start,$stop) : $self->dbh->dbi_quote($query,$start,$stop);
1590 =head2 contains_query_nobin
1592 Title : contains_query
1593 Usage : ($query,@args) = $db->contains_query_nobin($start,$stop)
1594 Function: create SQL fragment that selects the desired features by range
1595 Returns : a list containing the query and bind arguments
1596 Args : the start and stop of a range, inclusive
1599 This method is called by make_features_byrange_where_part() to construct the
1600 part of the select WHERE section that selects a set of features
1601 entirely enclosed by a range. It returns a multi-element list in which
1602 the first element is the SQL fragment and subsequent elements are bind
1603 values. For example:
1605 sub contains_query_nobin {
1606 my ($start,$stop) = @_;
1607 return ('gff.start>=? AND gff.stop<=?',
1612 # find features that are completely contained within a range
1613 sub contains_query_nobin
{
1615 my ($start,$stop) = @_;
1616 my $query = qq(fdata
.fstart
>=? AND fdata
.fstop
<=?
);
1617 return wantarray ?
($query,$start,$stop) : $self->dbh->dbi_quote($query,$start,$stop);
1620 =head2 contained_in_query_nobin
1622 Title : contained_in_query_nobin
1623 Usage : ($query,@args) = $db->contained_in_query($start,$stop)
1624 Function: create SQL fragment that selects the desired features by range
1625 Returns : a list containing the query and bind arguments
1626 Args : the start and stop of a range, inclusive
1629 This method is called by make_features_byrange_where_part() to construct the
1630 part of the select WHERE section that selects a set of features
1631 entirely enclosed by a range. It returns a multi-element list in which
1632 the first element is the SQL fragment and subsequent elements are bind
1635 sub contained_in_query_nobin {
1636 my ($start,$stop) = @_;
1637 return ('gff.start<=? AND gff.stop>=?',
1643 # find features that are completely contained within a range
1644 sub contained_in_query_nobin
{
1646 my ($start,$stop) = @_;
1647 my $query = qq(fdata
.fstart
<=? AND fdata
.fstop
>=?
);
1648 return wantarray ?
($query,$start,$stop) : $self->dbh->dbi_quote($query,$start,$stop);
1654 Usage : ($query,@args) = $db->types_query($types)
1655 Function: create SQL fragment that selects the desired features by type
1656 Returns : a list containing the query and bind arguments
1657 Args : an array reference containing the types
1660 This method is called by make_features_byrange_where_part() to construct the
1661 part of the select WHERE section that selects a set of features based
1662 on their type. It returns a multi-element list in which the first
1663 element is the SQL fragment and subsequent elements are bind values.
1664 The argument is an array reference containing zero or more
1665 [$method,$source] pairs.
1669 # generate the fragment of SQL responsible for searching for
1670 # features with particular types and methods
1677 for my $type (@
$types) {
1678 my ($method,$source) = @
$type;
1679 my ($mlike, $slike) = (0, 0);
1680 if ($method && $method =~ m/\.\*/) {
1681 $method =~ s/%/\\%/g;
1682 $method =~ s/_/\\_/g;
1683 $method =~ s/\.\*\??/%/g;
1686 if ($source && $source =~ m/\.\*/) {
1687 $source =~ s/%/\\%/g;
1688 $source =~ s/_/\\_/g;
1689 $source =~ s/\.\*\??/%/g;
1693 if (defined $method && length $method) {
1694 push @pair, $mlike ?
qq(fmethod LIKE ?
) : qq(fmethod
= ?
);
1695 push @args, $method;
1697 if (defined $source && length $source) {
1698 push @pair, $slike ?
qq(fsource LIKE ?
) : qq(fsource
= ?
);
1699 push @args, $source;
1701 push @method_queries,"(" . join(' AND ',@pair) .")" if @pair;
1703 my $query = " (".join(' OR ',@method_queries).")\n" if @method_queries;
1704 return wantarray ?
($query,@args) : $self->dbh->dbi_quote($query,@args);
1707 =head2 make_types_select_part
1709 Title : make_types_select_part
1710 Usage : ($string,@args) = $db->make_types_select_part(@args)
1711 Function: create the select portion of the SQL for fetching features type list
1712 Returns : query string and bind arguments
1716 This method is called by get_types() to generate the query fragment
1717 and bind arguments for the SELECT part of the query that retrieves
1718 lists of feature types. The four positional arguments are as follows:
1720 $refseq reference sequence name
1721 $start start of region
1723 $want_count true to return the count of this feature type
1725 If $want_count is false, the SQL fragment returned must produce a list
1726 of feature types in the format (method, source).
1728 If $want_count is true, the returned fragment must produce a list of
1729 feature types in the format (method, source, count).
1733 #------------------------- support for the types() query ------------------------
1734 sub make_types_select_part
{
1736 my ($srcseq,$start,$stop,$want_count) = @_;
1737 my $query = $want_count ?
'ftype.fmethod,ftype.fsource,count(fdata.ftypeid)'
1738 : 'fmethod,fsource';
1742 =head2 make_types_from_part
1744 Title : make_types_from_part
1745 Usage : ($string,@args) = $db->make_types_from_part(@args)
1746 Function: create the FROM portion of the SQL for fetching features type lists
1747 Returns : query string and bind arguments
1751 This method is called by get_types() to generate the query fragment
1752 and bind arguments for the FROM part of the query that retrieves lists
1753 of feature types. The four positional arguments are as follows:
1755 $refseq reference sequence name
1756 $start start of region
1758 $want_count true to return the count of this feature type
1760 If $want_count is false, the SQL fragment returned must produce a list
1761 of feature types in the format (method, source).
1763 If $want_count is true, the returned fragment must produce a list of
1764 feature types in the format (method, source, count).
1768 sub make_types_from_part
{
1770 my ($srcseq,$start,$stop,$want_count) = @_;
1771 my $query = defined($srcseq) || $want_count ?
'fdata,ftype' : 'ftype';
1775 =head2 make_types_join_part
1777 Title : make_types_join_part
1778 Usage : ($string,@args) = $db->make_types_join_part(@args)
1779 Function: create the JOIN portion of the SQL for fetching features type lists
1780 Returns : query string and bind arguments
1784 This method is called by get_types() to generate the query fragment
1785 and bind arguments for the JOIN part of the query that retrieves lists
1786 of feature types. The four positional arguments are as follows:
1788 $refseq reference sequence name
1789 $start start of region
1791 $want_count true to return the count of this feature type
1795 sub make_types_join_part
{
1797 my ($srcseq,$start,$stop,$want_count) = @_;
1798 my $query = defined($srcseq) || $want_count ?
'fdata.ftypeid=ftype.ftypeid'
1800 return $query || '1=1';
1803 =head2 make_types_where_part
1805 Title : make_types_where_part
1806 Usage : ($string,@args) = $db->make_types_where_part(@args)
1807 Function: create the WHERE portion of the SQL for fetching features type lists
1808 Returns : query string and bind arguments
1812 This method is called by get_types() to generate the query fragment
1813 and bind arguments for the WHERE part of the query that retrieves
1814 lists of feature types. The four positional arguments are as follows:
1816 $refseq reference sequence name
1817 $start start of region
1819 $want_count true to return the count of this feature type
1823 sub make_types_where_part
{
1825 my ($srcseq,$start,$stop,$want_count,$typelist) = @_;
1827 if (defined($srcseq)) {
1828 push @query,'fdata.fref=?';
1830 if (defined $start or defined $stop) {
1831 $start = 1 unless defined $start;
1832 $stop = MAX_SEGMENT
unless defined $stop;
1833 my ($q,@a) = $self->overlap_query($start,$stop);
1838 if (defined $typelist && @
$typelist) {
1839 my ($q,@a) = $self->types_query($typelist);
1843 my $query = @query ?
join(' AND ',@query) : '1=1';
1844 return wantarray ?
($query,@args) : $self->dbh->dbi_quote($query,@args);
1847 =head2 make_types_group_part
1849 Title : make_types_group_part
1850 Usage : ($string,@args) = $db->make_types_group_part(@args)
1851 Function: create the GROUP BY portion of the SQL for fetching features type lists
1852 Returns : query string and bind arguments
1856 This method is called by get_types() to generate the query fragment
1857 and bind arguments for the GROUP BY part of the query that retrieves
1858 lists of feature types. The four positional arguments are as follows:
1860 $refseq reference sequence name
1861 $start start of region
1863 $want_count true to return the count of this feature type
1867 sub make_types_group_part
{
1869 my ($srcseq,$start,$stop,$want_count) = @_;
1870 return unless $srcseq or $want_count;
1871 return 'ftype.ftypeid,ftype.fmethod,ftype.fsource';
1875 =head2 get_feature_id
1877 Title : get_feature_id
1878 Usage : $integer = $db->get_feature_id($ref,$start,$stop,$typeid,$groupid)
1879 Function: get the ID of a feature
1880 Returns : an integer ID or undef
1884 This internal method is called by load_gff_line to look up the integer
1885 ID of an existing feature. It is ony needed when replacing a feature
1886 with new information.
1890 # this method is called when needed to look up a feature's ID
1891 sub get_feature_id
{
1893 my ($ref,$start,$stop,$typeid,$groupid) = @_;
1894 my $s = $self->{load_stuff
};
1895 unless ($s->{get_feature_id
}) {
1896 my $dbh = $self->features_db;
1897 $s->{get_feature_id
} =
1898 $dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
1900 my $sth = $s->{get_feature_id
} or return;
1901 $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
1902 my ($fid) = $sth->fetchrow_array;
1908 =head2 make_abscoord_query
1910 Title : make_abscoord_query
1911 Usage : $sth = $db->make_abscoord_query($name,$class);
1912 Function: create query that finds the reference sequence coordinates given a landmark & classa
1913 Returns : a DBI statement handle
1914 Args : name and class of landmark
1917 The statement handler should return rows containing five fields:
1919 1. reference sequence name
1920 2. reference sequence class
1923 5. strand ("+" or "-")
1925 This query always returns "Sequence" as the class of the reference
1930 # given sequence name, return (reference,start,stop,strand)
1931 sub make_abscoord_query
{
1933 my ($name,$class,$refseq) = @_;
1934 #my $query = GETSEQCOORDS;
1935 my $query = $self->getseqcoords_query();
1936 my $getforcedseqcoords = $self->getforcedseqcoords_query() ;
1937 if ($name =~ /\*/) {
1941 $query =~ s/gname=\?/gname LIKE ?/;
1943 defined $refseq ?
$self->dbh->do_query($getforcedseqcoords,$name,$class,$refseq)
1944 : $self->dbh->do_query($query,$name,$class);
1947 sub make_aliasabscoord_query
{
1949 my ($name,$class) = @_;
1950 #my $query = GETALIASCOORDS;
1951 my $query = $self->getaliascoords_query();
1952 if ($name =~ /\*/) {
1956 $query =~ s/gname=\?/gname LIKE ?/;
1958 $self->dbh->do_query($query,$name,$class);
1961 sub getseqcoords_query
{
1962 shift->throw("getseqcoords_query(): must be implemented by a subclass");
1965 sub getaliascoords_query
{
1966 shift->throw("getaliascoords_query(): must be implemented by a subclass");
1971 my ($start,$stop,$minbin,$maxbin) = @_;
1972 if ($start && $start < 0 && $stop > 0) { # split the queries
1973 my ($lower_query,@lower_args) = $self->_bin_query($start,0,$minbin,$maxbin);
1974 my ($upper_query,@upper_args) = $self->_bin_query(0,$stop,$minbin,$maxbin);
1975 my $query = "$lower_query\n\t OR $upper_query";
1976 my @args = (@lower_args,@upper_args);
1977 return wantarray ?
($query,@args) : $self->dbh->dbi_quote($query,@args);
1979 return $self->_bin_query($start,$stop,$minbin,$maxbin);
1985 my ($start,$stop,$minbin,$maxbin) = @_;
1988 $start = 0 unless defined($start);
1989 $stop = $self->meta('max_bin') unless defined($stop);
1992 $minbin = defined $minbin ?
$minbin : $self->min_bin;
1993 $maxbin = defined $maxbin ?
$maxbin : $self->max_bin;
1995 while ($tier >= $minbin) {
1996 my ($tier_start,$tier_stop) = (bin_bot
($tier,$start)-EPSILON
(),bin_top
($tier,$stop)+EPSILON
());
1997 ($tier_start,$tier_stop) = ($tier_stop,$tier_start) if $tier_start > $tier_stop; # can happen when working with negative coordinates
1998 if ($tier_start == $tier_stop) {
1999 push @bins,'fbin=?';
2000 push @args,$tier_start;
2002 push @bins,'fbin between ? and ?';
2003 push @args,($tier_start,$tier_stop);
2007 $query = join("\n\t OR ",@bins);
2008 return wantarray ?
($query,@args)
2009 : $self->dbh->dbi_quote($query,@args);
2012 # find features that overlap a given range
2015 my ($start,$stop) = @_;
2018 my ($iq,@iargs) = $self->overlap_query_nobin($start,$stop);
2020 my ($bq,@bargs) = $self->bin_query($start,$stop);
2021 $query = "($bq)\n\tAND $iq";
2022 @args = (@bargs,@iargs);
2029 return wantarray ?
($query,@args) : $self->dbh->dbi_quote($query,@args);
2032 # find features that are completely contained within a ranged
2033 sub contains_query
{
2035 my ($start,$stop) = @_;
2036 my ($bq,@bargs) = $self->bin_query($start,$stop,undef,bin
($start,$stop,$self->min_bin));
2037 my ($iq,@iargs) = $self->contains_query_nobin($start,$stop);
2038 my $query = "($bq)\n\tAND $iq";
2039 my @args = (@bargs,@iargs);
2040 return wantarray ?
($query,@args) : $self->dbh->dbi_quote($query,@args);
2043 # find features that are completely contained within a range
2044 sub contained_in_query
{
2046 my ($start,$stop) = @_;
2047 my ($bq,@bargs) = $self->bin_query($start,$stop,abs($stop-$start)+1,undef);
2048 my ($iq,@iargs) = $self->contained_in_query_nobin($start,$stop);
2049 my $query = "($bq)\n\tAND $iq";
2050 my @args = (@bargs,@iargs);
2051 return wantarray ?
($query,@args) : $self->dbh->dbi_quote($query,@args);
2054 # implement the _delete_fattribute_to_feature() method
2055 sub _delete_fattribute_to_feature
{
2057 my @feature_ids = @_;
2058 my $dbh = $self->features_db;
2059 my $fields = join ',',map{$dbh->quote($_)} @feature_ids;
2061 my $query = "delete from fattribute_to_feature where fid in ($fields)";
2062 warn "$query\n" if $self->debug;
2063 my $result = $dbh->do($query);
2064 defined $result or $self->throw($dbh->errstr);
2068 # implement the _delete_features() method
2069 sub _delete_features
{
2071 my @feature_ids = @_;
2072 my $dbh = $self->features_db;
2073 my $fields = join ',',map{$dbh->quote($_)} @feature_ids;
2075 # delete from fattribute_to_feature
2076 $self->_delete_fattribute_to_feature(@feature_ids);
2078 my $query = "delete from fdata where fid in ($fields)";
2079 warn "$query\n" if $self->debug;
2080 my $result = $dbh->do($query);
2081 defined $result or $self->throw($dbh->errstr);
2085 # implement the _delete_groups() method
2086 sub _delete_groups
{
2089 my $dbh = $self->features_db;
2090 my $fields = join ',',map{$dbh->quote($_)} @group_ids;
2092 foreach my $gid (@group_ids){
2093 my @features = $self->get_feature_by_gid($gid);
2094 $self->delete_features(@features);
2097 my $query = "delete from fgroup where gid in ($fields)";
2098 warn "$query\n" if $self->debug;
2099 my $result = $dbh->do($query);
2100 defined $result or $self->throw($dbh->errstr);
2104 # implement the _delete() method
2107 my $delete_spec = shift;
2108 my $ranges = $delete_spec->{segments
} || [];
2109 my $types = $delete_spec->{types
} || [];
2110 my $force = $delete_spec->{force
};
2111 my $range_type = $delete_spec->{range_type
};
2112 my $dbh = $self->features_db;
2114 my $query = 'delete from fdata';
2118 for my $segment (@
$ranges) {
2119 my $ref = $dbh->quote($segment->abs_ref);
2120 my $start = $segment->abs_start;
2121 my $stop = $segment->abs_stop;
2122 my $range = $range_type eq 'overlaps' ?
$self->overlap_query($start,$stop)
2123 : $range_type eq 'contains' ?
$self->contains_query($start,$stop)
2124 : $range_type eq 'contained_in' ?
$self->contained_in_query($start,$stop)
2125 : $self->throw("Invalid range type '$range_type'");
2126 push @range_part,"(fref=$ref AND $range)";
2128 push @where,'('. join(' OR ',@range_part).')' if @range_part;
2132 my $types_where = $self->types_query($types);
2133 my $types_query = "select ftypeid from ftype where $types_where";
2134 my $result = $dbh->selectall_arrayref($types_query);
2135 my @typeids = map {$_->[0]} @
$result;
2136 my $typelist = join ',',map{$dbh->quote($_)} @typeids;
2137 $typelist ||= "0"; # don't cause DBI to die with invalid SQL when
2138 # unknown feature types were requested.
2139 push @where,"(ftypeid in ($typelist))";
2141 $self->throw("This operation would delete all feature data and -force not specified")
2142 unless @where || $force;
2143 $query .= " where ".join(' and ',@where) if @where;
2144 warn "$query\n" if $self->debug;
2145 my $result = $dbh->do($query);
2147 defined $result or $self->throw($dbh->errstr);
2152 =head2 feature_summary
2154 Title : feature_summary
2155 Usage : $summary = $db->feature_summary(@args)
2156 Function: returns a coverage summary across indicated region/type
2157 Returns : a Bio::SeqFeatureI object containing the "coverage" tag
2161 This method is used to get coverage density information across a
2162 region of interest. You provide it with a region of interest, optional
2163 a list of feature types, and a count of the number of bins over which
2164 you want to calculate the coverage density. An object is returned
2165 corresponding to the requested region. It contains a tag called
2166 "coverage" that will return an array ref of "bins" length. Each
2167 element of the array describes the number of features that overlap the
2168 bin at this position.
2172 Argument Description
2173 -------- -----------
2175 -seq_id Sequence ID for the region
2176 -start Start of region
2178 -type/-types Feature type of interest or array ref of types
2179 -bins Number of bins across region. Defaults to 1000.
2180 -iterator Return an iterator across the region
2182 Note that this method uses an approximate algorithm that is only
2183 accurate to 500 bp, so when dealing with bins that are smaller than
2184 1000 bp, you may see some shifting of counts between adjacent bins.
2186 Although an -iterator option is provided, the method only ever returns
2187 a single feature, so this is fairly useless.
2192 sub feature_summary
{
2194 my ($seq_name,$start,$end,$types,$bins,$iterator) =
2195 rearrange
([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],
2196 ['TYPES
','TYPE
','PRIMARY_TAG
'],
2200 my ($coverage,$tag) = $self->coverage_array(-seqid=> $seq_name,
2204 -bins => $bins) or return;
2206 for (@$coverage) { $score += $_ }
2207 $score /= @$coverage;
2209 my $feature = Bio::SeqFeature::Lite->new(-seq_id => $seq_name,
2215 { coverage => [$coverage] });
2217 ? Bio::DB::GFF::FeatureIterator->new($feature)
2221 =head2 coverage_array
2223 Title : coverage_array
2224 Usage : $arrayref = $db->coverage_array(@args)
2225 Function: returns a coverage summary across indicated region/type
2226 Returns : an array reference
2230 This method is used to get coverage density information across a
2231 region of interest. The arguments are identical to feature_summary,
2232 except that instead of returning a Bio::SeqFeatureI object, it returns
2233 an array reference of the desired number of bins. The value of each
2234 element corresponds to the number of features in the bin.
2238 Argument Description
2239 -------- -----------
2241 -seq_id Sequence ID for the region
2242 -start Start of region
2244 -type/-types Feature type of interest or array ref of types
2245 -bins Number of bins across region. Defaults to 1000.
2247 Note that this method uses an approximate algorithm that is only
2248 accurate to 500 bp, so when dealing with bins that are smaller than
2249 1000 bp, you may see some shifting of counts between adjacent bins.
2253 sub coverage_array {
2255 my ($seq_name,$start,$end,$types,$bins) =
2256 rearrange([['SEQID
','SEQ_ID
','REF
'],'START
',['STOP
','END'],
2257 ['TYPES
','TYPE
','PRIMARY_TAG
'],'BINS
'],@_);
2259 $types = $self->parse_types($types);
2260 my $dbh = $self->features_db;
2265 my $segment = $self->segment($seq_name) or $self->throw("unknown seq_id $seq_name");
2266 $end = $segment->end;
2269 my $binsize = ($end-$start+1)/$bins;
2270 my $seqid = $seq_name;
2272 return [] unless $seqid;
2274 # where each bin starts
2275 my @his_bin_array = map {$start + $binsize * $_} (0..$bins);
2276 my @sum_bin_array = map {int(($_-1)/SUMMARY_BIN_SIZE)} @his_bin_array;
2278 my $interval_stats_table = 'finterval_stats
';
2280 # pick up the type ids
2281 my ($type_from,@a) = $self->types_query($types);
2282 my $query = "select ftypeid,fmethod,fsource from ftype where $type_from";
2283 my $sth = $dbh->prepare_delayed($query);
2284 my (@t,$report_tag);
2286 while (my ($t,$method,$source) = $sth->fetchrow_array) {
2287 $report_tag ||= "$method:$source";
2294 SELECT fbin,fcum_count
2295 FROM $interval_stats_table
2297 AND fref=? AND fbin >= ?
2301 $sth = $dbh->prepare_delayed($sql) or warn $dbh->errstr;
2303 for my $typeid (@t) {
2305 for (my $i=0;$i<@sum_bin_array;$i++) {
2307 my @args = ($typeid,$seqid,$sum_bin_array[$i]);
2308 $self->_print_query($sql,@args) if $self->debug;
2310 $sth->execute(@args) or $self->throw($sth->errstr);
2311 my ($bin,$cum_count) = $sth->fetchrow_array;
2312 push @
{$bins{$typeid}},[$bin,$cum_count];
2316 return unless %bins;
2319 my $firstbin = int(($start-1)/$binsize);
2320 for my $type (keys %bins) {
2321 my $arry = $bins{$type};
2322 my $last_count = $arry->[0][1];
2326 for my $b (@
$arry) {
2327 my ($bin,$count) = @
$b;
2328 $delta = $count - $last_count if $bin > $last_bin;
2329 $merged_bins[$i++] = $delta;
2330 $last_count = $count;
2335 return wantarray ?
(\
@merged_bins,$report_tag) : \
@merged_bins;
2339 =head2 build_summary_statistics
2341 Title : build_summary_statistics
2342 Usage : $db->build_summary_statistics
2343 Function: prepares the table needed to call feature_summary()
2348 This method is used to build the summary statistics table that is used
2349 by the feature_summary() and coverage_array() methods. It needs to be
2350 called whenever the database is updated.
2354 sub build_summary_statistics
{
2356 my $interval_stats_table = 'finterval_stats';
2357 my $dbh = $self->dbh;
2360 my $sbs = SUMMARY_BIN_SIZE
;
2363 $self->_add_interval_stats_table;
2364 $self->disable_keys($interval_stats_table);
2365 $dbh->do("DELETE FROM $interval_stats_table");
2367 my $insert = $dbh->prepare(<<END) or $self->throw($dbh->errstr);
2368 INSERT INTO $interval_stats_table
2369 (ftypeid,fref,fbin,fcum_count)
2375 my $sql = 'select ftypeid,fref,fstart,fstop from fdata order by ftypeid,fref,fstart';
2376 my $select = $dbh->prepare($sql) or $self->throw($dbh->errstr);
2378 my $current_bin = -1;
2379 my ($current_type,$current_seqid,$count);
2381 my (%residuals,$last_bin);
2383 my $le = -t \
*STDERR ?
"\r" : "\n";
2387 while (my($typeid,$seqid,$start,$end) = $select->fetchrow_array) {
2388 print STDERR
$count," features processed$le" if ++$count % 1000 == 0;
2390 my $bin = int($start/$sbs);
2391 $current_type ||= $typeid;
2392 $current_seqid ||= $seqid;
2394 # because the input is sorted by start, no more features will contribute to the
2395 # current bin so we can dispose of it
2396 if ($bin != $current_bin) {
2397 if ($seqid != $current_seqid or $typeid != $current_type) {
2398 # load all bins left over
2399 $self->_load_bins($insert,\
%residuals,\
$cum_count,$current_type,$current_seqid);
2403 # load all up to current one
2404 $self->_load_bins($insert,\
%residuals,\
$cum_count,$current_type,$current_seqid,$current_bin);
2408 $last_bin = $current_bin;
2409 ($current_seqid,$current_type,$current_bin) = ($seqid,$typeid,$bin);
2411 # summarize across entire spanned region
2412 my $last_bin = int(($end-1)/$sbs);
2413 for (my $b=$bin;$b<=$last_bin;$b++) {
2418 # load all bins left over
2419 $self->_load_bins($insert,\
%residuals,\
$cum_count,$current_type,$current_seqid);
2420 $self->enable_keys($interval_stats_table);
2424 if ($result) { $dbh->commit } else { warn "Can't build summary statistics: $@"; $dbh->rollback };
2430 my ($insert,$residuals,$cum_count,$type,$seqid,$stop_after) = @_;
2431 for my $b (sort {$a<=>$b} keys %$residuals) {
2432 last if defined $stop_after and $b > $stop_after;
2433 $$cum_count += $residuals->{$b};
2434 my @args = ($type,$seqid,$b,$$cum_count);
2435 $insert->execute(@args) or warn $insert->errstr;
2436 delete $residuals->{$b}; # no longer needed
2440 sub _add_interval_stats_table
{
2442 my $schema = $self->schema;
2443 my $create_table_stmt = $schema->{'finterval_stats'}{'table'};
2444 my $dbh = $self->features_db;
2445 $dbh->do("drop table finterval_stats");
2446 $dbh->do($create_table_stmt) || warn $dbh->errstr;
2449 sub disable_keys
{ } # noop
2450 sub enable_keys
{ } # noop
2458 Schemas need work to support multiple hierarchical groups.
2462 L<Bio::DB::GFF>, L<bioperl>
2466 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
2468 Copyright (c) 2001 Cold Spring Harbor Laboratory.
2470 This library is free software; you can redistribute it and/or modify
2471 it under the same terms as Perl itself.