t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / Bio / DB / SeqFeature / NormalizedFeature.pm
blobaca7beeeb9080fc10eb113dfbbc5a27ce40bede1
1 package Bio::DB::SeqFeature::NormalizedFeature;
4 =head1 NAME
6 Bio::DB::SeqFeature::NormalizedFeature -- Normalized feature for use with Bio::DB::SeqFeature::Store
8 =head1 SYNOPSIS
10 use Bio::DB::SeqFeature::Store;
11 # Open the sequence database
12 my $db = Bio::DB::SeqFeature::Store->new( -adaptor => 'DBI::mysql',
13 -dsn => 'dbi:mysql:test');
14 my ($feature) = $db->get_features_by_name('ZK909');
15 my @subfeatures = $feature->get_SeqFeatures();
16 my @exons_only = $feature->get_SeqFeatures('exon');
18 # create a new object
19 $db->seqfeature_class('Bio::DB::SeqFeature::NormalizedFeature');
20 my $new = $db->new_feature(-primary_tag=>'gene',
21 -seq_id => 'chr3',
22 -start => 10000,
23 -end => 11000);
25 # add a new exon
26 $feature->add_SeqFeature($db->new_feature(-primary_tag=>'exon',
27 -seq_id => 'chr3',
28 -start => 5000,
29 -end => 5551));
31 =head1 DESCRIPTION
33 The Bio::DB::SeqFeature::NormalizedFeature object is an alternative
34 representation of SeqFeatures for use with Bio::DB::SeqFeature::Store
35 database system. It is identical to Bio::DB::SeqFeature, except that
36 instead of storing feature/subfeature relationships in a database
37 table, the information is stored in the object itself. This actually
38 makes the objects somewhat inconvenient to work with from SQL, but
39 does speed up access somewhat.
41 To use this class, pass the name of the class to the
42 Bio::DB::SeqFeature::Store object's seqfeature_class() method. After
43 this, $db-E<gt>new_feature() will create objects of type
44 Bio::DB::SeqFeature::NormalizedFeature. If you are using the GFF3
45 loader, pass Bio::DB::SeqFeature::Store::GFF3Loader-E<gt>new() the
46 -seqfeature_class argument:
48 use Bio::DB::SeqFeature::Store::GFF3Loader;
50 my $store = connect_to_db_somehow();
51 my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(
52 -store=>$db,
53 -seqfeature_class => 'Bio::DB::SeqFeature::NormalizedFeature'
56 =cut
58 use strict;
59 use Carp 'croak';
60 use base 'Bio::SeqFeature::Lite';
61 use base 'Bio::DB::SeqFeature::NormalizedFeatureI';
62 use overload '""' => \&as_string,
63 eq => \&eq,
64 ne => \&ne,
65 fallback => 1;
67 use vars '$AUTOLOAD';
69 my $USE_OVERLOADED_NAMES = 1;
71 # some of this is my fault and some of it is changing bioperl API
72 *get_all_SeqFeatures = *sub_SeqFeature = *merged_segments = \&segments;
74 ##### CLASS METHODS ####
76 =head2 new
78 Title : new
79 Usage : $feature = Bio::DB::SeqFeature::NormalizedFeature->new(@args)
80 Function: create a new feature
81 Returns : the new seqfeature
82 Args : see below
83 Status : public
85 This method creates and, if possible stores into a database, a new
86 Bio::DB::SeqFeature::NormalizedFeature object using the specialized
87 Bio::DB::SeqFeature class.
89 The arguments are the same to Bio::SeqFeature::Generic-E<gt>new() and
90 Bio::Graphics::Feature-E<gt>new(). The most important difference is the
91 B<-store> option, which if present creates the object in a
92 Bio::DB::SeqFeature::Store database, and he B<-index> option, which
93 controls whether the feature will be indexed for retrieval (default is
94 true). Ordinarily, you would only want to turn indexing on when
95 creating top level features, and off only when storing
96 subfeatures. The default is on.
98 Arguments are as follows:
100 -seq_id the reference sequence
101 -start the start position of the feature
102 -end the stop position of the feature
103 -display_name the feature name (returned by seqname)
104 -primary_tag the feature type (returned by primary_tag)
105 -source the source tag
106 -score the feature score (for GFF compatibility)
107 -desc a description of the feature
108 -segments a list of subfeatures (see Bio::Graphics::Feature)
109 -subtype the type to use when creating subfeatures
110 -strand the strand of the feature (one of -1, 0 or +1)
111 -phase the phase of the feature (0..2)
112 -url a URL to link to when rendered with Bio::Graphics
113 -attributes a hashref of tag value attributes, in which the key is the tag
114 and the value is an array reference of values
115 -store a previously-opened Bio::DB::SeqFeature::Store object
116 -index index this feature if true
118 Aliases:
120 -id an alias for -display_name
121 -seqname an alias for -display_name
122 -display_id an alias for -display_name
123 -name an alias for -display_name
124 -stop an alias for end
125 -type an alias for primary_tag
127 =cut
129 sub new {
130 my $class = shift;
131 my %args = @_;
132 my $db = $args{-store} || $args{-factory};
133 my $index = exists $args{-index} ? $args{-index} : 1;
134 my $self = $class->SUPER::new(@_);
136 if ($db) {
137 if ($index) {
138 $db->store($self); # this will set the primary_id
139 } else {
140 $db->store_noindex($self); # this will set the primary_id
142 $self->object_store($db);
144 $self;
147 =head2 Bio::SeqFeatureI methods
149 The following Bio::SeqFeatureI methods are supported:
151 seq_id(), start(), end(), strand(), get_SeqFeatures(),
152 display_name(), primary_tag(), source_tag(), seq(),
153 location(), primary_id(), overlaps(), contains(), equals(),
154 intersection(), union(), has_tag(), remove_tag(),
155 add_tag_value(), get_tag_values(), get_all_tags()
157 Some methods that do not make sense in the context of a genome
158 annotation database system, such as attach_seq(), are not supported.
160 Please see L<Bio::SeqFeatureI> for more details.
162 =cut
164 sub seq {
165 my $self = shift;
167 require Bio::PrimarySeq unless Bio::PrimarySeq->can('new');
169 my ($start,$end) = ($self->start,$self->end);
170 if ($self->strand < 0) {
171 ($start,$end) = ($end,$start);
174 if (my $store = $self->object_store) {
175 return Bio::PrimarySeq->new(-seq => $store->fetch_sequence($self->seq_id,$start,$end) || '',
176 -id => $self->display_name);
177 } else {
178 return $self->SUPER::seq($self->seq_id,$start,$end);
182 sub subseq {
183 my $self = shift;
184 my ($newstart,$newstop) = @_;
185 my $store = $self->object_store or return;
186 my ($start,$stop) = ($self->start+$newstart-1,$self->end+$newstop-1);
187 if ($self->strand < 0) {
188 ($start,$stop) = ($stop,$start);
190 my $seq = $store->fetch_sequence($self->seq_id,$start,$stop);
191 return Bio::PrimarySeq->new($seq);
194 =head2 add_SeqFeature
196 Title : add_SeqFeature
197 Usage : $flag = $feature->add_SeqFeature(@features)
198 Function: Add subfeatures to the feature
199 Returns : true if successful
200 Args : list of Bio::SeqFeatureI objects
201 Status : public
203 Add one or more subfeatures to the feature. For best results,
204 subfeatures should be of the same class as the parent feature
205 (i.e. don't try mixing Bio::DB::SeqFeature::NormalizedFeature with
206 other feature types).
208 An alias for this method is add_segment().
210 =cut
212 sub add_SeqFeature {
213 my $self = shift;
214 $self->_add_segment(1,@_);
217 =head2 update
219 Title : update
220 Usage : $flag = $feature->update()
221 Function: Update feature in the database
222 Returns : true if successful
223 Args : none
224 Status : public
226 After changing any fields in the feature, call update() to write it to
227 the database. This is not needed for add_SeqFeature() as update() is
228 invoked automatically.
230 =cut
232 sub update {
233 my $self = shift;
234 my $store = $self->object_store or return;
235 $store->store($self);
238 =head2 get_SeqFeatures
240 Title : get_SeqFeature
241 Usage : @subfeatures = $feature->get_SeqFeatures([@types])
242 Function: return subfeatures of this feature
243 Returns : list of subfeatures
244 Args : list of subfeature primary_tags (optional)
245 Status : public
247 This method extends the Bio::SeqFeatureI get_SeqFeatures() slightly by
248 allowing you to pass a list of primary_tags, in which case only
249 subfeatures whose primary_tag is contained on the list will be
250 returned. Without any types passed all subfeatures are returned.
252 =cut
255 # segments can be either normalized IDs or ordinary feature objects
256 sub get_SeqFeatures {
257 my $self = shift;
258 my @types = @_;
260 my $s = $self->{segments} or return;
261 my $store = $self->object_store;
262 my (@ordinary,@ids);
263 for (@$s) {
264 if (ref ($_)) {
265 push @ordinary,$_;
266 } else {
267 push @ids,$_;
270 my @r = grep {$_->type_match(@types)} (@ordinary,$store->fetch_many(\@ids));
271 for my $r (@r) {
272 eval {$r->object_store($store) };
274 return @r;
277 =head2 object_store
279 Title : object_store
280 Usage : $store = $feature->object_store([$new_store])
281 Function: get or set the database handle
282 Returns : current database handle
283 Args : new database handle (optional)
284 Status : public
286 This method will get or set the Bio::DB::SeqFeature::Store object that
287 is associated with the feature. After changing the store, you should
288 probably unset the feature's primary_id() and call update() to ensure
289 that the object is written into the database as a new feature.
291 =cut
293 sub object_store {
294 my $self = shift;
295 my $d = $self->{store};
296 $self->{store} = shift if @_;
301 =head2 overloaded_names
303 Title : overloaded_names
304 Usage : $overload = $feature->overloaded_names([$new_overload])
305 Function: get or set overloading of object strings
306 Returns : current flag
307 Args : new flag (optional)
308 Status : public
310 For convenience, when objects of this class are stringified, they are
311 represented in the form "primary_tag(display_name)". To turn this
312 feature off, call overloaded_names() with a false value. You can
313 invoke this on an individual feature object or on the class:
315 Bio::DB::SeqFeature::NormalizedFeature->overloaded_names(0);
317 =cut
320 sub overloaded_names {
321 my $class = shift;
322 my $d = $USE_OVERLOADED_NAMES;
323 $USE_OVERLOADED_NAMES = shift if @_;
327 =head2 segment
329 Title : segment
330 Usage : $segment = $feature->segment
331 Function: return a Segment object corresponding to feature
332 Returns : a Bio::DB::SeqFeature::Segment
333 Args : none
334 Status : public
336 This turns the feature into a Bio::DB::SeqFeature::Segment object,
337 which you can then use to query for overlapping features. See
338 L<Bio::DB::SeqFeature::Segment>.
340 =cut
342 sub segment {
343 my $self = shift;
344 return Bio::DB::SeqFeature::Segment->new($self);
347 ### instance methods
349 =head2 AUTOLOADED methods
351 @subfeatures = $feature->Exon;
353 If you use an unknown method that begins with a capital letter, then
354 the feature autogenerates a call to get_SeqFeatures() using the
355 lower-cased method name as the primary_tag. In other words
356 $feature-E<gt>Exon is equivalent to:
358 @subfeature s= $feature->get_SeqFeatures('exon')
360 If you use an unknown method that begins with Tag_(tagname),
361 Att_(tagname) Is_(tagname), then it will be the same as calling the
362 each_tag_value() method with the tagname. In a list context, these
363 autogenerated procedures return the list of results. In scalar
364 context, they return the first item in the list!!
366 =cut
369 sub AUTOLOAD {
370 my($pack,$func_name) = $AUTOLOAD=~/(.+)::([^:]+)$/;
371 my $sub = $AUTOLOAD;
372 my $self = $_[0];
374 # ignore DESTROY calls
375 return if $func_name eq 'DESTROY';
377 # call attributes if func_name begins with "Tag_" or "Att_":
378 if ($func_name =~ /^(Tag|Att|Is)_(\w+)/) {
379 my @result = $self->each_tag_value($2);
380 return wantarray ? @result : $result[0];
383 # fetch subfeatures if func_name has an initial cap
384 if ($func_name =~ /^[A-Z]/) {
385 return $self->get_SeqFeatures(lc $func_name);
388 # error message of last resort
389 $self->throw(qq(Can't locate object method "$func_name" via package "$pack"));
393 sub add_segment {
394 my $self = shift;
395 $self->_add_segment(0,@_);
398 # This adds subfeatures. It has the property of converting the
399 # provided features into an object like itself and storing them
400 # into the database. If the feature already has a primary id and
401 # an object_store() method, then it is not stored into the database,
402 # but its primary id is reused.
403 sub _add_segment {
404 my $self = shift;
405 my $normalized = shift;
406 my $store = $self->object_store;
408 my @segments = $self->_create_subfeatures($normalized,@_);
410 # fix boundaries
411 $self->_fix_boundaries(\@segments);
413 # freakish fixing of our non-standard Target attribute
414 $self->_fix_target(\@segments);
416 for my $seg (@segments) {
417 my $id = $normalized ? $seg->primary_id : $seg;
418 defined $id or $self->throw("No primary ID when there should be");
419 push @{$self->{segments}},$id;
422 $self->update if $self->primary_id; # write us back to disk
425 sub _fix_boundaries {
426 my $self = shift;
427 my $segments = shift;
428 my $normalized = shift;
430 my $min_start = $self->start || 999_999_999_999;
431 my $max_stop = $self->end || -999_999_999_999;
433 for my $seg (@$segments) {
434 $min_start = $seg->start if $seg->start < $min_start;
435 $max_stop = $seg->end if $seg->end > $max_stop;
438 # adjust our boundaries, etc.
439 $self->start($min_start) if $min_start < $self->start;
440 $self->end($max_stop) if $max_stop > $self->end;
441 $self->{ref} ||= $segments->[0]->seq_id;
442 $self->{strand} ||= $segments->[0]->strand;
445 sub _fix_target {
446 my $self = shift;
447 my $segs = shift;
448 my $normalized = shift; # ignored for now
450 # freakish fixing of our non-standard Target attribute
451 if (my $t = ($self->attributes('Target'))[0]) {
452 my ($seqid,$tstart,$tend,$strand) = split /\s+/,$t;
453 if (defined $tstart && defined $tend) {
454 my $min_tstart = $tstart;
455 my $max_tend = $tend;
456 for my $seg (@$segs) {
457 my $st = ($seg->attributes('Target'))[0] or next;
458 (undef,$tstart,$tend) = split /\s+/,$st;
459 next unless defined $tstart && defined $tend;
460 $min_tstart = $tstart if $tstart < $min_tstart;
461 $max_tend = $tend if $tend > $max_tend;
463 if ($min_tstart < $tstart or $max_tend > $tend) {
464 $self->{attributes}{Target}[0] = join ' ',($seqid,$min_tstart,$max_tend,$strand||'');
470 # undo the load_id and Target hacks on the way out
471 sub format_attributes {
472 my $self = shift;
473 my $parent = shift;
474 my $fallback_id = shift;
476 my $load_id = $self->load_id || '';
478 my $targobj = ($self->attributes('Target'))[0];
479 # was getting an 'Use of uninitialized value with split' here, changed to cooperate -cjf 7/10/07
480 my ($target) = $targobj ? split /\s+/,($self->attributes('Target'))[0] : ('');
481 my @tags = $self->all_tags;
482 my @result;
483 for my $t (@tags) {
484 my @values = $self->each_tag_value($t);
486 # This line prevents Alias from showing up if it matches the load id, but this is not good
487 # @values = grep {$_ ne $load_id && $_ ne $target} @values if $t eq 'Alias';
489 # these are hacks, which we don't want to appear in the file
490 next if $t eq 'load_id';
491 next if $t eq 'parent_id';
493 foreach (@values) { s/\s+$// } # get rid of trailing whitespace
494 push @result,join '=',$self->escape($t),join(',', map {$self->escape($_)} @values) if @values;
496 my $id = $self->primary_id || $fallback_id;
497 my $parent_id;
498 if (@$parent) {
499 $parent_id = join (',',map {$self->escape($_)} @$parent);
501 my $name = $self->display_name;
502 unshift @result,"ID=".$self->escape($id) if defined $id;
503 unshift @result,"Parent=".$parent_id if defined $parent_id;
504 unshift @result,"Name=".$self->escape($name) if defined $name;
505 return join ';',@result;
508 sub _create_subfeatures {
509 my $self = shift;
510 my $normalized = shift;
512 my $type = $self->{subtype} || $self->{type};
513 my $ref = $self->seq_id;
514 my $name = $self->name;
515 my $class = $self->class;
516 my $store = $self->object_store;
517 my $source = $self->source;
519 if ($normalized) {
520 $store or $self->throw("Feature must be associated with a Bio::DB::SeqFeature::Store database before attempting to add subfeatures to a normalized object");
523 my $index_subfeatures_policy = eval{$store->index_subfeatures};
525 my @segments;
527 for my $seg (@_) {
529 if (UNIVERSAL::isa($seg,ref $self)) {
531 if (!$normalized) { # make sure the object has no lazy behavior
532 $seg->primary_id(undef);
533 $seg->object_store(undef);
535 push @segments,$seg;
538 elsif (ref($seg) eq 'ARRAY') {
539 my ($start,$stop) = @{$seg};
540 next unless defined $start && defined $stop; # fixes an obscure bug somewhere above us
541 my $strand = $self->{strand};
543 if ($start > $stop) {
544 ($start,$stop) = ($stop,$start);
545 $strand = -1;
547 push @segments,$self->new(-start => $start,
548 -stop => $stop,
549 -strand => $strand,
550 -ref => $ref,
551 -type => $type,
552 -name => $name,
553 -class => $class,
554 -source => $source,
559 elsif (UNIVERSAL::isa($seg,'Bio::SeqFeatureI')) {
560 my $score = $seg->score if $seg->can('score');
561 my $f = $self->new(-start => $seg->start,
562 -end => $seg->end,
563 -strand => $seg->strand,
564 -seq_id => $seg->seq_id,
565 -name => $seg->display_name,
566 -primary_tag => $seg->primary_tag,
567 -source_tag => $seg->source,
568 -score => $score,
569 -source => $source,
571 for my $tag ($seg->get_all_tags) {
572 my @values = $seg->get_tag_values($tag);
573 $f->{attributes}{$tag} = \@values;
575 push @segments,$f;
578 else {
579 croak "$seg is neither a Bio::SeqFeatureI object nor an arrayref";
583 return unless @segments;
585 if ($normalized && $store) { # parent/child data is going to be stored in the database
587 my @need_loading = grep {!defined $_->primary_id || $_->object_store ne $store} @segments;
588 if (@need_loading) {
589 my $result;
590 if ($index_subfeatures_policy) {
591 $result = $store->store(@need_loading);
592 } else {
593 $result = $store->store_noindex(@need_loading);
595 $result or croak "Couldn't store one or more subseqfeatures";
599 return @segments;
602 =head2 load_id
604 Title : load_id
605 Usage : $id = $feature->load_id
606 Function: get the GFF3 load ID
607 Returns : the GFF3 load ID (string)
608 Args : none
609 Status : public
611 For features that were originally loaded by the GFF3 loader, this
612 method returns the GFF3 load ID. This method may not be supported in
613 future versions of the module.
615 =cut
617 sub load_id {
618 return (shift->attributes('load_id'))[0];
622 =head2 notes
624 Title : notes
625 Usage : @notes = $feature->notes
626 Function: get contents of the GFF3 Note tag
627 Returns : List of GFF3 Note tags
628 Args : none
629 Status : public
631 For features that were originally loaded by the GFF3 loader, this
632 method returns the contents of the Note tag as a list. This is a
633 convenience for Bio::Graphics, which looks for notes() when it
634 constructs a default description line.
636 =cut
638 sub notes {
639 return shift->attributes('Note');
642 =head2 primary_id
644 Title : primary_id
645 Usage : $id = $feature->primary_id([$new_id])
646 Function: get/set the feature's database ID
647 Returns : the current primary ID
648 Args : none
649 Status : public
651 This method gets or sets the primary ID of the feature in the
652 underlying Bio::DB::SeqFeature::Store database. If you change this
653 field and then call update(), it will have the effect of making a copy
654 of the feature in the database under a new ID.
656 =cut
658 sub primary_id {
659 my $self = shift;
660 my $d = $self->{primary_id};
661 $self->{primary_id} = shift if @_;
665 =head2 target
667 Title : target
668 Usage : $segment = $feature->target
669 Function: return the segment correspondent to the "Target" attribute
670 Returns : a Bio::DB::SeqFeature::Segment object
671 Args : none
672 Status : public
674 For features that are aligned with others via the GFF3 Target
675 attribute, this returns a segment corresponding to the aligned
676 region. The CIGAR gap string is not yet supported.
678 =cut
680 sub target {
681 my $self = shift;
682 my @targets = $self->attributes('Target');
683 my @result;
684 for my $t (@targets) {
685 my ($seqid,$start,$end,$strand) = split /\s+/,$t;
686 $strand ||= '';
687 $strand = $strand eq '+' ? 1
688 : $strand eq '-' ? -1
689 : 0;
690 push @result,Bio::DB::SeqFeature::Segment->new($self->object_store,
691 $seqid,
692 $start,
693 $end,
694 $strand);
696 return wantarray ? @result : $result[0];
699 =head2 Internal methods
701 =over 4
703 =item $feature-E<gt>as_string()
705 Internal method used to implement overloaded stringification.
707 =item $boolean = $feature-E<gt>type_match(@list_of_types)
709 Internal method that will return true if the feature's primary_tag and
710 source_tag match any of the list of types (in primary_tag:source_tag
711 format) provided.
713 =back
715 =cut
717 sub as_string {
718 my $self = shift;
719 return overload::StrVal($self) unless $self->overloaded_names;
720 my $name = $self->display_name || $self->load_id;
721 $name ||= "id=".$self->primary_id if $self->primary_id;
722 $name ||= "<unnamed>";
723 my $method = $self->primary_tag;
724 my $source= $self->source_tag;
725 my $type = $source ? "$method:$source" : $method;
726 return "$type($name)";
729 sub eq {
730 my $self = shift;
731 my $b = shift;
732 my $store1 = $self->object_store;
733 my $store2 = eval {$b->object_store} || '';
734 return $store1 eq $store2 && $self->primary_id eq $b->primary_id;
737 sub ne {
738 my $self = shift;
739 return !$self->eq(shift);
742 # completely case insensitive
743 sub type_match {
744 my $self = shift;
745 my @types = @_;
746 my $method = lc $self->primary_tag;
747 my $source = lc $self->source_tag;
748 for my $t (@types) {
749 my ($m,$s) = map {lc $_} split /:/,$t;
750 return 1 if $method eq $m && (!defined $s || $source eq $s);
752 return;
755 sub segments { shift->get_SeqFeatures(@_) }
760 __END__
762 =head1 BUGS
764 This is an early version, so there are certainly some bugs. Please
765 use the BioPerl bug tracking system to report bugs.
767 =head1 SEE ALSO
769 L<bioperl>,
770 L<Bio::DB::SeqFeature>,
771 L<Bio::DB::SeqFeature::Store>,
772 L<Bio::DB::SeqFeature::Segment>,
773 L<Bio::DB::SeqFeature::GFF3Loader>,
774 L<Bio::DB::SeqFeature::Store::DBI::mysql>,
775 L<Bio::DB::SeqFeature::Store::bdb>
777 =head1 AUTHOR
779 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
781 Copyright (c) 2006 Cold Spring Harbor Laboratory.
783 This library is free software; you can redistribute it and/or modify
784 it under the same terms as Perl itself.
786 =cut