1 package Bio
::DB
::SeqFeature
::NormalizedFeature
;
6 Bio::DB::SeqFeature::NormalizedFeature -- Normalized feature for use with Bio::DB::SeqFeature::Store
10 use Bio::DB::SeqFeature::Store;
11 # Open the sequence database
12 my $db = Bio::DB::SeqFeature::Store->new( -adaptor => 'DBI::mysql',
13 -dsn => 'dbi:mysql:test');
14 my ($feature) = $db->get_features_by_name('ZK909');
15 my @subfeatures = $feature->get_SeqFeatures();
16 my @exons_only = $feature->get_SeqFeatures('exon');
19 $db->seqfeature_class('Bio::DB::SeqFeature::NormalizedFeature');
20 my $new = $db->new_feature(-primary_tag=>'gene',
26 $feature->add_SeqFeature($db->new_feature(-primary_tag=>'exon',
33 The Bio::DB::SeqFeature::NormalizedFeature object is an alternative
34 representation of SeqFeatures for use with Bio::DB::SeqFeature::Store
35 database system. It is identical to Bio::DB::SeqFeature, except that
36 instead of storing feature/subfeature relationships in a database
37 table, the information is stored in the object itself. This actually
38 makes the objects somewhat inconvenient to work with from SQL, but
39 does speed up access somewhat.
41 To use this class, pass the name of the class to the
42 Bio::DB::SeqFeature::Store object's seqfeature_class() method. After
43 this, $db-E<gt>new_feature() will create objects of type
44 Bio::DB::SeqFeature::NormalizedFeature. If you are using the GFF3
45 loader, pass Bio::DB::SeqFeature::Store::GFF3Loader-E<gt>new() the
46 -seqfeature_class argument:
48 use Bio::DB::SeqFeature::Store::GFF3Loader;
50 my $store = connect_to_db_somehow();
51 my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(
53 -seqfeature_class => 'Bio::DB::SeqFeature::NormalizedFeature'
60 use base
'Bio::SeqFeature::Lite';
61 use base
'Bio::DB::SeqFeature::NormalizedFeatureI';
62 use overload
'""' => \
&as_string
,
69 my $USE_OVERLOADED_NAMES = 1;
71 # some of this is my fault and some of it is changing bioperl API
72 *get_all_SeqFeatures
= *sub_SeqFeature
= *merged_segments
= \
&segments
;
74 ##### CLASS METHODS ####
79 Usage : $feature = Bio::DB::SeqFeature::NormalizedFeature->new(@args)
80 Function: create a new feature
81 Returns : the new seqfeature
85 This method creates and, if possible stores into a database, a new
86 Bio::DB::SeqFeature::NormalizedFeature object using the specialized
87 Bio::DB::SeqFeature class.
89 The arguments are the same to Bio::SeqFeature::Generic-E<gt>new() and
90 Bio::Graphics::Feature-E<gt>new(). The most important difference is the
91 B<-store> option, which if present creates the object in a
92 Bio::DB::SeqFeature::Store database, and he B<-index> option, which
93 controls whether the feature will be indexed for retrieval (default is
94 true). Ordinarily, you would only want to turn indexing on when
95 creating top level features, and off only when storing
96 subfeatures. The default is on.
98 Arguments are as follows:
100 -seq_id the reference sequence
101 -start the start position of the feature
102 -end the stop position of the feature
103 -display_name the feature name (returned by seqname)
104 -primary_tag the feature type (returned by primary_tag)
105 -source the source tag
106 -score the feature score (for GFF compatibility)
107 -desc a description of the feature
108 -segments a list of subfeatures (see Bio::Graphics::Feature)
109 -subtype the type to use when creating subfeatures
110 -strand the strand of the feature (one of -1, 0 or +1)
111 -phase the phase of the feature (0..2)
112 -url a URL to link to when rendered with Bio::Graphics
113 -attributes a hashref of tag value attributes, in which the key is the tag
114 and the value is an array reference of values
115 -store a previously-opened Bio::DB::SeqFeature::Store object
116 -index index this feature if true
120 -id an alias for -display_name
121 -seqname an alias for -display_name
122 -display_id an alias for -display_name
123 -name an alias for -display_name
124 -stop an alias for end
125 -type an alias for primary_tag
132 my $db = $args{-store
} || $args{-factory
};
133 my $index = exists $args{-index} ?
$args{-index} : 1;
134 my $self = $class->SUPER::new
(@_);
138 $db->store($self); # this will set the primary_id
140 $db->store_noindex($self); # this will set the primary_id
142 $self->object_store($db);
147 =head2 Bio::SeqFeatureI methods
149 The following Bio::SeqFeatureI methods are supported:
151 seq_id(), start(), end(), strand(), get_SeqFeatures(),
152 display_name(), primary_tag(), source_tag(), seq(),
153 location(), primary_id(), overlaps(), contains(), equals(),
154 intersection(), union(), has_tag(), remove_tag(),
155 add_tag_value(), get_tag_values(), get_all_tags()
157 Some methods that do not make sense in the context of a genome
158 annotation database system, such as attach_seq(), are not supported.
160 Please see L<Bio::SeqFeatureI> for more details.
167 require Bio
::PrimarySeq
unless Bio
::PrimarySeq
->can('new');
169 my ($start,$end) = ($self->start,$self->end);
170 if ($self->strand < 0) {
171 ($start,$end) = ($end,$start);
174 if (my $store = $self->object_store) {
175 return Bio
::PrimarySeq
->new(-seq
=> $store->fetch_sequence($self->seq_id,$start,$end) || '',
176 -id
=> $self->display_name);
178 return $self->SUPER::seq
($self->seq_id,$start,$end);
184 my ($newstart,$newstop) = @_;
185 my $store = $self->object_store or return;
186 my ($start,$stop) = ($self->start+$newstart-1,$self->end+$newstop-1);
187 if ($self->strand < 0) {
188 ($start,$stop) = ($stop,$start);
190 my $seq = $store->fetch_sequence($self->seq_id,$start,$stop);
191 return Bio
::PrimarySeq
->new($seq);
194 =head2 add_SeqFeature
196 Title : add_SeqFeature
197 Usage : $flag = $feature->add_SeqFeature(@features)
198 Function: Add subfeatures to the feature
199 Returns : true if successful
200 Args : list of Bio::SeqFeatureI objects
203 Add one or more subfeatures to the feature. For best results,
204 subfeatures should be of the same class as the parent feature
205 (i.e. don't try mixing Bio::DB::SeqFeature::NormalizedFeature with
206 other feature types).
208 An alias for this method is add_segment().
214 $self->_add_segment(1,@_);
220 Usage : $flag = $feature->update()
221 Function: Update feature in the database
222 Returns : true if successful
226 After changing any fields in the feature, call update() to write it to
227 the database. This is not needed for add_SeqFeature() as update() is
228 invoked automatically.
234 my $store = $self->object_store or return;
235 $store->store($self);
238 =head2 get_SeqFeatures
240 Title : get_SeqFeature
241 Usage : @subfeatures = $feature->get_SeqFeatures([@types])
242 Function: return subfeatures of this feature
243 Returns : list of subfeatures
244 Args : list of subfeature primary_tags (optional)
247 This method extends the Bio::SeqFeatureI get_SeqFeatures() slightly by
248 allowing you to pass a list of primary_tags, in which case only
249 subfeatures whose primary_tag is contained on the list will be
250 returned. Without any types passed all subfeatures are returned.
255 # segments can be either normalized IDs or ordinary feature objects
256 sub get_SeqFeatures
{
260 my $s = $self->{segments
} or return;
261 my $store = $self->object_store;
270 my @r = grep {$_->type_match(@types)} (@ordinary,$store->fetch_many(\
@ids));
272 eval {$r->object_store($store) };
280 Usage : $store = $feature->object_store([$new_store])
281 Function: get or set the database handle
282 Returns : current database handle
283 Args : new database handle (optional)
286 This method will get or set the Bio::DB::SeqFeature::Store object that
287 is associated with the feature. After changing the store, you should
288 probably unset the feature's primary_id() and call update() to ensure
289 that the object is written into the database as a new feature.
295 my $d = $self->{store
};
296 $self->{store
} = shift if @_;
301 =head2 overloaded_names
303 Title : overloaded_names
304 Usage : $overload = $feature->overloaded_names([$new_overload])
305 Function: get or set overloading of object strings
306 Returns : current flag
307 Args : new flag (optional)
310 For convenience, when objects of this class are stringified, they are
311 represented in the form "primary_tag(display_name)". To turn this
312 feature off, call overloaded_names() with a false value. You can
313 invoke this on an individual feature object or on the class:
315 Bio::DB::SeqFeature::NormalizedFeature->overloaded_names(0);
320 sub overloaded_names
{
322 my $d = $USE_OVERLOADED_NAMES;
323 $USE_OVERLOADED_NAMES = shift if @_;
330 Usage : $segment = $feature->segment
331 Function: return a Segment object corresponding to feature
332 Returns : a Bio::DB::SeqFeature::Segment
336 This turns the feature into a Bio::DB::SeqFeature::Segment object,
337 which you can then use to query for overlapping features. See
338 L<Bio::DB::SeqFeature::Segment>.
344 return Bio
::DB
::SeqFeature
::Segment
->new($self);
349 =head2 AUTOLOADED methods
351 @subfeatures = $feature->Exon;
353 If you use an unknown method that begins with a capital letter, then
354 the feature autogenerates a call to get_SeqFeatures() using the
355 lower-cased method name as the primary_tag. In other words
356 $feature-E<gt>Exon is equivalent to:
358 @subfeature s= $feature->get_SeqFeatures('exon')
360 If you use an unknown method that begins with Tag_(tagname),
361 Att_(tagname) Is_(tagname), then it will be the same as calling the
362 each_tag_value() method with the tagname. In a list context, these
363 autogenerated procedures return the list of results. In scalar
364 context, they return the first item in the list!!
370 my($pack,$func_name) = $AUTOLOAD=~/(.+)::([^:]+)$/;
374 # ignore DESTROY calls
375 return if $func_name eq 'DESTROY';
377 # call attributes if func_name begins with "Tag_" or "Att_":
378 if ($func_name =~ /^(Tag|Att|Is)_(\w+)/) {
379 my @result = $self->each_tag_value($2);
380 return wantarray ?
@result : $result[0];
383 # fetch subfeatures if func_name has an initial cap
384 if ($func_name =~ /^[A-Z]/) {
385 return $self->get_SeqFeatures(lc $func_name);
388 # error message of last resort
389 $self->throw(qq(Can
't locate object method "$func_name" via package "$pack"));
395 $self->_add_segment(0,@_);
398 # This adds subfeatures. It has the property of converting the
399 # provided features into an object like itself and storing them
400 # into the database. If the feature already has a primary id and
401 # an object_store() method, then it is not stored into the database,
402 # but its primary id is reused.
405 my $normalized = shift;
406 my $store = $self->object_store;
408 my @segments = $self->_create_subfeatures($normalized,@_);
411 $self->_fix_boundaries(\
@segments);
413 # freakish fixing of our non-standard Target attribute
414 $self->_fix_target(\
@segments);
416 for my $seg (@segments) {
417 my $id = $normalized ?
$seg->primary_id : $seg;
418 defined $id or $self->throw("No primary ID when there should be");
419 push @
{$self->{segments
}},$id;
422 $self->update if $self->primary_id; # write us back to disk
425 sub _fix_boundaries
{
427 my $segments = shift;
428 my $normalized = shift;
430 my $min_start = $self->start || 999_999_999_999
;
431 my $max_stop = $self->end || -999_999_999_999
;
433 for my $seg (@
$segments) {
434 $min_start = $seg->start if $seg->start < $min_start;
435 $max_stop = $seg->end if $seg->end > $max_stop;
438 # adjust our boundaries, etc.
439 $self->start($min_start) if $min_start < $self->start;
440 $self->end($max_stop) if $max_stop > $self->end;
441 $self->{ref} ||= $segments->[0]->seq_id;
442 $self->{strand
} ||= $segments->[0]->strand;
448 my $normalized = shift; # ignored for now
450 # freakish fixing of our non-standard Target attribute
451 if (my $t = ($self->attributes('Target'))[0]) {
452 my ($seqid,$tstart,$tend,$strand) = split /\s+/,$t;
453 if (defined $tstart && defined $tend) {
454 my $min_tstart = $tstart;
455 my $max_tend = $tend;
456 for my $seg (@
$segs) {
457 my $st = ($seg->attributes('Target'))[0] or next;
458 (undef,$tstart,$tend) = split /\s+/,$st;
459 next unless defined $tstart && defined $tend;
460 $min_tstart = $tstart if $tstart < $min_tstart;
461 $max_tend = $tend if $tend > $max_tend;
463 if ($min_tstart < $tstart or $max_tend > $tend) {
464 $self->{attributes
}{Target
}[0] = join ' ',($seqid,$min_tstart,$max_tend,$strand||'');
470 # undo the load_id and Target hacks on the way out
471 sub format_attributes
{
474 my $fallback_id = shift;
476 my $load_id = $self->load_id || '';
478 my $targobj = ($self->attributes('Target'))[0];
479 # was getting an 'Use of uninitialized value with split' here, changed to cooperate -cjf 7/10/07
480 my ($target) = $targobj ?
split /\s+/,($self->attributes('Target'))[0] : ('');
481 my @tags = $self->all_tags;
484 my @values = $self->each_tag_value($t);
486 # This line prevents Alias from showing up if it matches the load id, but this is not good
487 # @values = grep {$_ ne $load_id && $_ ne $target} @values if $t eq 'Alias';
489 # these are hacks, which we don't want to appear in the file
490 next if $t eq 'load_id';
491 next if $t eq 'parent_id';
493 foreach (@values) { s/\s+$// } # get rid of trailing whitespace
494 push @result,join '=',$self->escape($t),join(',', map {$self->escape($_)} @values) if @values;
496 my $id = $self->primary_id || $fallback_id;
499 $parent_id = join (',',map {$self->escape($_)} @
$parent);
501 my $name = $self->display_name;
502 unshift @result,"ID=".$self->escape($id) if defined $id;
503 unshift @result,"Parent=".$parent_id if defined $parent_id;
504 unshift @result,"Name=".$self->escape($name) if defined $name;
505 return join ';',@result;
508 sub _create_subfeatures
{
510 my $normalized = shift;
512 my $type = $self->{subtype
} || $self->{type
};
513 my $ref = $self->seq_id;
514 my $name = $self->name;
515 my $class = $self->class;
516 my $store = $self->object_store;
517 my $source = $self->source;
520 $store or $self->throw("Feature must be associated with a Bio::DB::SeqFeature::Store database before attempting to add subfeatures to a normalized object");
523 my $index_subfeatures_policy = eval{$store->index_subfeatures};
529 if (UNIVERSAL
::isa
($seg,ref $self)) {
531 if (!$normalized) { # make sure the object has no lazy behavior
532 $seg->primary_id(undef);
533 $seg->object_store(undef);
538 elsif (ref($seg) eq 'ARRAY') {
539 my ($start,$stop) = @
{$seg};
540 next unless defined $start && defined $stop; # fixes an obscure bug somewhere above us
541 my $strand = $self->{strand
};
543 if ($start > $stop) {
544 ($start,$stop) = ($stop,$start);
547 push @segments,$self->new(-start
=> $start,
559 elsif (UNIVERSAL
::isa
($seg,'Bio::SeqFeatureI')) {
560 my $score = $seg->score if $seg->can('score');
561 my $f = $self->new(-start
=> $seg->start,
563 -strand
=> $seg->strand,
564 -seq_id
=> $seg->seq_id,
565 -name
=> $seg->display_name,
566 -primary_tag
=> $seg->primary_tag,
567 -source_tag
=> $seg->source,
571 for my $tag ($seg->get_all_tags) {
572 my @values = $seg->get_tag_values($tag);
573 $f->{attributes
}{$tag} = \
@values;
579 croak
"$seg is neither a Bio::SeqFeatureI object nor an arrayref";
583 return unless @segments;
585 if ($normalized && $store) { # parent/child data is going to be stored in the database
587 my @need_loading = grep {!defined $_->primary_id || $_->object_store ne $store} @segments;
590 if ($index_subfeatures_policy) {
591 $result = $store->store(@need_loading);
593 $result = $store->store_noindex(@need_loading);
595 $result or croak
"Couldn't store one or more subseqfeatures";
605 Usage : $id = $feature->load_id
606 Function: get the GFF3 load ID
607 Returns : the GFF3 load ID (string)
611 For features that were originally loaded by the GFF3 loader, this
612 method returns the GFF3 load ID. This method may not be supported in
613 future versions of the module.
618 return (shift->attributes('load_id'))[0];
625 Usage : @notes = $feature->notes
626 Function: get contents of the GFF3 Note tag
627 Returns : List of GFF3 Note tags
631 For features that were originally loaded by the GFF3 loader, this
632 method returns the contents of the Note tag as a list. This is a
633 convenience for Bio::Graphics, which looks for notes() when it
634 constructs a default description line.
639 return shift->attributes('Note');
645 Usage : $id = $feature->primary_id([$new_id])
646 Function: get/set the feature's database ID
647 Returns : the current primary ID
651 This method gets or sets the primary ID of the feature in the
652 underlying Bio::DB::SeqFeature::Store database. If you change this
653 field and then call update(), it will have the effect of making a copy
654 of the feature in the database under a new ID.
660 my $d = $self->{primary_id
};
661 $self->{primary_id
} = shift if @_;
668 Usage : $segment = $feature->target
669 Function: return the segment correspondent to the "Target" attribute
670 Returns : a Bio::DB::SeqFeature::Segment object
674 For features that are aligned with others via the GFF3 Target
675 attribute, this returns a segment corresponding to the aligned
676 region. The CIGAR gap string is not yet supported.
682 my @targets = $self->attributes('Target');
684 for my $t (@targets) {
685 my ($seqid,$start,$end,$strand) = split /\s+/,$t;
687 $strand = $strand eq '+' ?
1
688 : $strand eq '-' ?
-1
690 push @result,Bio
::DB
::SeqFeature
::Segment
->new($self->object_store,
696 return wantarray ?
@result : $result[0];
699 =head2 Internal methods
703 =item $feature-E<gt>as_string()
705 Internal method used to implement overloaded stringification.
707 =item $boolean = $feature-E<gt>type_match(@list_of_types)
709 Internal method that will return true if the feature's primary_tag and
710 source_tag match any of the list of types (in primary_tag:source_tag
719 return overload
::StrVal
($self) unless $self->overloaded_names;
720 my $name = $self->display_name || $self->load_id;
721 $name ||= "id=".$self->primary_id if $self->primary_id;
722 $name ||= "<unnamed>";
723 my $method = $self->primary_tag;
724 my $source= $self->source_tag;
725 my $type = $source ?
"$method:$source" : $method;
726 return "$type($name)";
732 my $store1 = $self->object_store;
733 my $store2 = eval {$b->object_store} || '';
734 return $store1 eq $store2 && $self->primary_id eq $b->primary_id;
739 return !$self->eq(shift);
742 # completely case insensitive
746 my $method = lc $self->primary_tag;
747 my $source = lc $self->source_tag;
749 my ($m,$s) = map {lc $_} split /:/,$t;
750 return 1 if $method eq $m && (!defined $s || $source eq $s);
755 sub segments
{ shift->get_SeqFeatures(@_) }
764 This is an early version, so there are certainly some bugs. Please
765 use the BioPerl bug tracking system to report bugs.
770 L<Bio::DB::SeqFeature>,
771 L<Bio::DB::SeqFeature::Store>,
772 L<Bio::DB::SeqFeature::Segment>,
773 L<Bio::DB::SeqFeature::GFF3Loader>,
774 L<Bio::DB::SeqFeature::Store::DBI::mysql>,
775 L<Bio::DB::SeqFeature::Store::bdb>
779 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
781 Copyright (c) 2006 Cold Spring Harbor Laboratory.
783 This library is free software; you can redistribute it and/or modify
784 it under the same terms as Perl itself.