Bio::DB::Universal: move into its own distribution
[bioperl-live.git] / Bio / DB / SeqFeature.pm
bloba482177aa545b46aaffc0ccb87fcf2389e28e2e9
1 package Bio::DB::SeqFeature;
4 =head1 NAME
6 Bio::DB::SeqFeature -- Normalized feature for use with Bio::DB::SeqFeature::Store
8 =head1 SYNOPSIS
10 use Bio::DB::SeqFeature::Store;
12 # Open the sequence database
13 my $db = Bio::DB::SeqFeature::Store->new( -adaptor => 'DBI::mysql',
14 -dsn => 'dbi:mysql:test');
15 my ($feature) = $db->get_features_by_name('ZK909');
16 my @subfeatures = $feature->get_SeqFeatures();
17 my @exons_only = $feature->get_SeqFeatures('exon');
19 # create a new object
20 my $new = $db->new_feature(-primary_tag=>'gene',
21 -seq_id => 'chr3',
22 -start => 10000,
23 -end => 11000);
25 # add a new exon
26 $feature->add_SeqFeature($db->new_feature(-primary_tag=>'exon',
27 -seq_id => 'chr3',
28 -start => 5000,
29 -end => 5551));
31 =head1 DESCRIPTION
33 The Bio::DB::SeqFeature object is the default SeqFeature class stored
34 in Bio::DB::SeqFeature databases. It implements both the
35 Bio::DB::SeqFeature::NormalizedFeatureI and
36 Bio::DB::SeqFeature::NormalizedTableFeatureI interfaces, which means that its
37 subfeatures, if any, are stored in the database in a normalized
38 fashion, and that the parent/child hierarchy of features and
39 subfeatures are also stored in the database as set of tuples. This
40 provides efficiencies in both storage and retrieval speed.
42 Typically you will not create Bio::DB::SeqFeature directly, but will
43 ask the database to do so on your behalf, as described in
44 L<Bio::DB::SeqFeature::Store>.
46 =cut
48 # just like Bio::DB::SeqFeature::NormalizedFeature except that the parent/child
49 # relationships are stored in a table in the Bio::DB::SeqFeature::Store
51 use strict;
52 use Carp 'croak';
53 use Bio::DB::SeqFeature::Store;
54 use base qw(Bio::DB::SeqFeature::NormalizedFeature
55 Bio::DB::SeqFeature::NormalizedTableFeatureI);
57 =head2 new
59 Title : new
60 Usage : $feature = Bio::DB::SeqFeature::NormalizedFeature->new(@args)
61 Function: create a new feature
62 Returns : the new seqfeature
63 Args : see below
64 Status : public
66 This method creates and, if possible stores into a database, a new
67 Bio::DB::SeqFeature::NormalizedFeature object using the specialized
68 Bio::DB::SeqFeature class.
70 The arguments are the same to Bio::SeqFeature::Generic-E<gt>new() and
71 Bio::Graphics::Feature-E<gt>new(). The most important difference is the
72 B<-store> option, which if present creates the object in a
73 Bio::DB::SeqFeature::Store database, and the B<-index> option, which
74 controls whether the feature will be indexed for retrieval (default is
75 true). Ordinarily, you would only want to turn indexing on when
76 creating top level features, and off only when storing
77 subfeatures. The default is on.
79 Arguments are as follows:
81 -seq_id the reference sequence
82 -start the start position of the feature
83 -end the stop position of the feature
84 -display_name the feature name (returned by seqname)
85 -primary_tag the feature type (returned by primary_tag)
86 -source the source tag
87 -score the feature score (for GFF compatibility)
88 -desc a description of the feature
89 -segments a list of subfeatures (see Bio::Graphics::Feature)
90 -subtype the type to use when creating subfeatures
91 -strand the strand of the feature (one of -1, 0 or +1)
92 -phase the phase of the feature (0..2)
93 -url a URL to link to when rendered with Bio::Graphics
94 -attributes a hashref of tag value attributes, in which the key is the tag
95 and the value is an array reference of values
96 -store a previously-opened Bio::DB::SeqFeature::Store object
97 -index index this feature if true
99 Aliases:
101 -id an alias for -display_name
102 -seqname an alias for -display_name
103 -display_id an alias for -display_name
104 -name an alias for -display_name
105 -stop an alias for end
106 -type an alias for primary_tag
108 =cut
110 sub add_segment {
111 my $self = shift;
112 $self->_add_segment(0,@_);
116 =head2 Bio::SeqFeatureI methods
118 The following Bio::SeqFeatureI methods are supported:
120 seq_id(), start(), end(), strand(), get_SeqFeatures(),
121 display_name(), primary_tag(), source_tag(), seq(),
122 location(), primary_id(), overlaps(), contains(), equals(),
123 intersection(), union(), has_tag(), remove_tag(),
124 add_tag_value(), get_tag_values(), get_all_tags()
126 Some methods that do not make sense in the context of a genome
127 annotation database system, such as attach_seq(), are not supported.
129 Please see L<Bio::SeqFeatureI> for more details.
131 =cut
133 =head2 add_SeqFeature
135 Title : add_SeqFeature
136 Usage : $flag = $feature->add_SeqFeature(@features)
137 Function: Add subfeatures to the feature
138 Returns : true if successful
139 Args : list of Bio::SeqFeatureI objects
140 Status : public
142 Add one or more subfeatures to the feature. For best results,
143 subfeatures should be of the same class as the parent feature
144 (i.e. do not try mixing Bio::DB::SeqFeature::NormalizedFeature with
145 other feature types).
147 An alias for this method is add_segment().
149 =cut
151 =head2 update
153 Title : update
154 Usage : $flag = $feature->update()
155 Function: Update feature in the database
156 Returns : true if successful
157 Args : none
158 Status : public
160 After changing any fields in the feature, call update() to write it to
161 the database. This is not needed for add_SeqFeature() as update() is
162 invoked automatically.
164 =cut
166 =head2 get_SeqFeatures
168 Title : get_SeqFeature
169 Usage : @subfeatures = $feature->get_SeqFeatures([@types])
170 Function: return subfeatures of this feature
171 Returns : list of subfeatures
172 Args : list of subfeature primary_tags (optional)
173 Status : public
175 This method extends the Bio::SeqFeatureI get_SeqFeatures() slightly by
176 allowing you to pass a list of primary_tags, in which case only
177 subfeatures whose primary_tag is contained on the list will be
178 returned. Without any types passed all subfeatures are returned.
180 =cut
182 =head2 object_store
184 Title : object_store
185 Usage : $store = $feature->object_store([$new_store])
186 Function: get or set the database handle
187 Returns : current database handle
188 Args : new database handle (optional)
189 Status : public
191 This method will get or set the Bio::DB::SeqFeature::Store object that
192 is associated with the feature. After changing the store, you should
193 probably unset the primary_id() of the feature and call update() to ensure
194 that the object is written into the database as a new feature.
196 =cut
198 =head2 overloaded_names
200 Title : overloaded_names
201 Usage : $overload = $feature->overloaded_names([$new_overload])
202 Function: get or set overloading of object strings
203 Returns : current flag
204 Args : new flag (optional)
205 Status : public
207 For convenience, when objects of this class are stringified, they are
208 represented in the form "primary_tag(display_name)". To turn this
209 feature off, call overloaded_names() with a false value. You can
210 invoke this on an individual feature object or on the class:
212 Bio::DB::SeqFeature::NormalizedFeature->overloaded_names(0);
214 =cut
216 =head2 segment
218 Title : segment
219 Usage : $segment = $feature->segment
220 Function: return a Segment object corresponding to feature
221 Returns : a Bio::DB::SeqFeature::Segment
222 Args : none
223 Status : public
225 This turns the feature into a Bio::DB::SeqFeature::Segment object,
226 which you can then use to query for overlapping features. See
227 L<Bio::DB::SeqFeature::Segment>.
229 =cut
231 =head2 AUTOLOADED methods
233 @subfeatures = $feature->Exon;
235 If you use an unknown method that begins with a capital letter, then
236 the feature autogenerates a call to get_SeqFeatures() using the
237 lower-cased method name as the primary_tag. In other words
238 $feature-E<gt>Exon is equivalent to:
240 @subfeature s= $feature->get_SeqFeatures('exon')
242 =cut
244 =head2 load_id
246 Title : load_id
247 Usage : $id = $feature->load_id
248 Function: get the GFF3 load ID
249 Returns : the GFF3 load ID (string)
250 Args : none
251 Status : public
253 For features that were originally loaded by the GFF3 loader, this
254 method returns the GFF3 load ID. This method may not be supported in
255 future versions of the module.
257 =cut
259 =head2 primary_id
261 Title : primary_id
262 Usage : $id = $feature->primary_id([$new_id])
263 Function: get/set the database ID of the feature
264 Returns : the current primary ID
265 Args : none
266 Status : public
268 This method gets or sets the primary ID of the feature in the
269 underlying Bio::DB::SeqFeature::Store database. If you change this
270 field and then call update(), it will have the effect of making a copy
271 of the feature in the database under a new ID.
273 =cut
275 =head2 target
277 Title : target
278 Usage : $segment = $feature->target
279 Function: return the segment correspondent to the "Target" attribute
280 Returns : a Bio::DB::SeqFeature::Segment object
281 Args : none
282 Status : public
284 For features that are aligned with others via the GFF3 Target
285 attribute, this returns a segment corresponding to the aligned
286 region. The CIGAR gap string is not yet supported.
288 =cut
290 =head2 Internal methods
292 =over 4
294 =item $feature-E<gt>as_string()
296 Internal method used to implement overloaded stringification.
298 =item $boolean = $feature-E<gt>type_match(@list_of_types)
300 Internal method that will return true if the primary_tag of the feature and
301 source_tag match any of the list of types (in primary_tag:source_tag
302 format) provided.
304 =back
306 =cut
308 # This adds subfeatures. It has the property of converting the
309 # provided features into an object like itself and storing them
310 # into the database. If the feature already has a primary id and
311 # an object_store() method, then it is not stored into the database,
312 # but its primary id is reused.
313 sub _add_segment {
314 my $self = shift;
315 my $normalized = shift;
317 my $store = $self->object_store;
318 my $store_parentage = eval{$store->can_store_parentage};
320 return $self->SUPER::_add_segment($normalized,@_)
321 unless $normalized && $store_parentage;
323 my @segments = $self->_create_subfeatures($normalized,@_);
325 my $pos = "@{$self}{'start','stop','ref','strand'}";
327 # fix boundaries
328 $self->_fix_boundaries(\@segments,1);
330 # freakish fixing of our non-standard Target attribute
331 $self->_fix_target(\@segments);
333 # write our children out
334 if ($normalized) {
335 $store->add_SeqFeature($self,@segments);
336 } else {
337 push @{$self->{segments}},@segments;
340 # write us back to disk
341 $self->update if $self->primary_id && $pos ne "@{$self}{'start','stop','ref','strand'}";
344 # segments can be stored directly in the object (legacy behavior)
345 # or stored in the database
346 # an optional list of types can be used to specify which types to return
347 sub get_SeqFeatures {
348 my $self = shift;
349 my @types = @_;
351 my @inline_segs = exists $self->{segments} ? @{$self->{segments}} : ();
352 @inline_segs = grep {$_->type_match(@types)} @inline_segs if @types;
353 my $store = $self->object_store;
355 my @db_segs;
357 if ($store && $store->can_store_parentage) {
358 if (!@types || $store->subfeature_types_are_indexed) {
359 @db_segs = $store->fetch_SeqFeatures($self,@types);
360 } else {
361 @db_segs = grep {$_->type_match(@types)} $store->fetch_SeqFeatures($self);
365 my @segs = (@inline_segs,@db_segs);
366 foreach (@segs) {
367 eval {$_->object_store($store)};
369 return @segs;
372 sub denormalized_segments {
373 my $self = shift;
374 return exists $self->{segments} ? @{$self->{segments}} : ();
377 sub denormalized_segment_count {
378 my $self = shift;
379 return 0 unless exists $self->{segments};
380 return scalar @{$self->{segments}};
383 # for Bio::LocationI compatibility
384 sub is_remote { return }
386 # for Bio::LocationI compatibility
387 sub location_type { return 'EXACT' }
389 # for Bio::DB::GFF compatibility
391 sub feature_id {shift->primary_id}
396 __END__
398 =head1 BUGS
400 This is an early version, so there are certainly some bugs. Please
401 use the BioPerl bug tracking system to report bugs.
403 =head1 SEE ALSO
405 L<bioperl>,
406 L<Bio::DB::SeqFeature::Store>,
407 L<Bio::DB::SeqFeature::Segment>,
408 L<Bio::DB::SeqFeature::NormalizedFeature>,
409 L<Bio::DB::SeqFeature::GFF3Loader>,
410 L<Bio::DB::SeqFeature::Store::DBI::mysql>,
411 L<Bio::DB::SeqFeature::Store::bdb>
413 =head1 AUTHOR
415 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
417 Copyright (c) 2006 Cold Spring Harbor Laboratory.
419 This library is free software; you can redistribute it and/or modify
420 it under the same terms as Perl itself.
422 =cut