1 package Bio
::SeqFeature
::Lite
;
5 Bio::SeqFeature::Lite - Lightweight Bio::SeqFeatureI class
9 # create a simple feature with no internal structure
10 $f = Bio::SeqFeature::Lite->new(-start => 1000,
12 -type => 'transcript',
13 -name => 'alpha-1 antitrypsin',
14 -desc => 'an enzyme inhibitor',
17 # create a feature composed of multiple segments, all of type "similarity"
18 $f = Bio::SeqFeature::Lite->new(-segments => [[1000,1100],[1500,1550],[1800,2000]],
20 -type => 'gapped_alignment',
21 -subtype => 'similarity');
23 # build up a gene exon by exon
24 $e1 = Bio::SeqFeature::Lite->new(-start=>1,-stop=>100,-type=>'exon');
25 $e2 = Bio::SeqFeature::Lite->new(-start=>150,-stop=>200,-type=>'exon');
26 $e3 = Bio::SeqFeature::Lite->new(-start=>300,-stop=>500,-type=>'exon');
27 $f = Bio::SeqFeature::Lite->new(-segments=>[$e1,$e2,$e3],-type=>'gene');
31 This is a simple Bio::SeqFeatureI-compliant object that is compatible
32 with Bio::Graphics::Panel. With it you can create lightweight feature
35 All methods are as described in L<Bio::SeqFeatureI> with the following additions:
37 =head2 The new() Constructor
39 $feature = Bio::SeqFeature::Lite->new(@args);
41 This method creates a new feature object. You can create a simple
42 feature that contains no subfeatures, or a hierarchically nested object.
44 Arguments are as follows:
46 -seq_id the reference sequence
47 -start the start position of the feature
48 -end the stop position of the feature
49 -stop an alias for end
50 -name the feature name (returned by seqname())
51 -type the feature type (returned by primary_tag())
52 -primary_tag the same as -type
53 -source the source tag
54 -score the feature score (for GFF compatibility)
55 -desc a description of the feature
56 -segments a list of subfeatures (see below)
57 -subtype the type to use when creating subfeatures
58 -strand the strand of the feature (one of -1, 0 or +1)
59 -phase the phase of the feature (0..2)
60 -seq a dna or protein sequence string to attach to feature
61 -id an alias for -name
62 -seqname an alias for -name
63 -display_id an alias for -name
64 -display_name an alias for -name (do you get the idea the API has changed?)
65 -primary_id unique database ID
66 -url a URL to link to when rendered with Bio::Graphics
67 -attributes a hashref of tag value attributes, in which the key is the tag
68 and the value is an array reference of values
69 -factory a reference to a feature factory, used for compatibility with
70 more obscure parts of Bio::DB::GFF
72 The subfeatures passed in -segments may be an array of
73 Bio::SeqFeature::Lite objects, or an array of [$start,$stop]
74 pairs. Each pair should be a two-element array reference. In the
75 latter case, the feature type passed in -subtype will be used when
76 creating the subfeatures.
78 If no feature type is passed, then it defaults to "feature".
80 =head2 Non-SeqFeatureI methods
82 A number of new methods are provided for compatibility with
83 Ace::Sequence, which has a slightly different API from SeqFeatureI:
89 Get/set the URL that the graphical rendering of this feature will link to.
91 =item add_segment(@segments)
93 Add one or more segments (a subfeature). Segments can either be
94 Feature objects, or [start,stop] arrays, as in the -segments argument
95 to new(). The feature endpoints are automatically adjusted.
99 An alias for sub_SeqFeature().
101 =item get_SeqFeatures()
103 Alias for sub_SeqFeature()
105 =item get_all_SeqFeatures()
107 Alias for sub_SeqFeature()
109 =item merged_segments()
111 Another alias for sub_SeqFeature().
119 An alias for seqname().
123 An alias for sub_SeqFeature() (you don't want to know why!)
131 use base
qw(Bio::Root::Root Bio::SeqFeatureI Bio::LocationI Bio::SeqI);
136 *exons
= *sub_SeqFeature
= *merged_segments
= \
&segments
;
137 *get_all_SeqFeatures
= *get_SeqFeatures
= \
&segments
;
138 *method
= \
&primary_tag
;
139 *source
= \
&source_tag
;
140 *get_tag_values
= \
&each_tag_value
;
141 *add_SeqFeature
= \
&add_segment
;
142 *get_all_tags
= \
&all_tags
;
145 # implement Bio::SeqI and FeatureHolderI interface
147 sub primary_seq
{ return $_[0] }
149 my ($obj,$value) = @_;
150 if( defined $value ) {
151 $obj->throw("object of class ".ref($value)." does not implement ".
152 "Bio::AnnotationCollectionI. Too bad.")
153 unless $value->isa("Bio::AnnotationCollectionI");
154 $obj->{'_annotation'} = $value;
155 } elsif( ! defined $obj->{'_annotation'}) {
156 $obj->{'_annotation'} = Bio
::Annotation
::Collection
->new();
158 return $obj->{'_annotation'};
161 my ($self, $species) = @_;
163 $self->{'species'} = $species;
165 return $self->{'species'};
168 sub is_remote
{ return }
169 sub feature_count
{ return scalar @
{shift->{segments
} || []} }
171 sub target
{ return; }
172 sub hit
{ shift->target }
176 my $method = $self->primary_tag;
177 my $source = $self->source_tag;
178 return $source ne '' ?
"$method:$source" : $method;
182 # Bio::SeqFeature::Lite->new(
185 # -name => 'fred feature',
188 # Alternatively, use -segments => [ [start,stop],[start,stop]...]
189 # to create a multisegmented feature.
192 $class = ref($class) if ref $class;
195 my $self = bless {},$class;
198 if ($arg{-strand
} =~ /^[\+\-\.]$/){
199 ($arg{-strand
} eq "+") && ($self->{strand
} = '1');
200 ($arg{-strand
} eq "-") && ($self->{strand
} = '-1');
201 ($arg{-strand
} eq ".") && ($self->{strand
} = '0');
203 $self->{strand
} = $arg{-strand
} ?
($arg{-strand
} >= 0 ?
+1 : -1) : 0;
205 $self->{name
} = $arg{-name
} || $arg{-seqname
} || $arg{-display_id
}
206 || $arg{-display_name
} || $arg{-id
};
207 $self->{type
} = $arg{-type
} || $arg{-primary_tag
} || 'feature';
208 $self->{subtype
} = $arg{-subtype
} if exists $arg{-subtype
};
209 $self->{source
} = $arg{-source
} || $arg{-source_tag
} || '';
210 $self->{score
} = $arg{-score
} if exists $arg{-score
};
211 $self->{start
} = $arg{-start
};
212 $self->{stop
} = exists $arg{-end
} ?
$arg{-end
} : $arg{-stop
};
213 $self->{ref} = $arg{-seq_id
} || $arg{-ref};
214 $self->{attributes
} = $arg{-attributes
} || $arg{-tag
};
215 for my $option (qw(class url seq phase desc primary_id)) {
216 $self->{$option} = $arg{"-$option"} if exists $arg{"-$option"};
219 # is_circular is needed for Bio::PrimarySeqI compliance
220 $self->{is_circular
} = $arg{-is_circular
} || 0;
223 if (defined $self->{stop
} && defined $self->{start
}
224 && $self->{stop
} < $self->{start
}) {
225 @
{$self}{'start','stop'} = @
{$self}{'stop','start'};
226 $self->{strand
} *= -1;
230 if (my $s = $arg{-segments
}) {
231 # NB: when $self ISA Bio::DB::SeqFeature the following invokes
232 # Bio::DB::SeqFeature::add_segment and not
233 # Bio::DB::SeqFeature::add_segment (as might be expected?)
234 $self->add_segment(@
$s);
242 my $type = $self->{subtype
} || $self->{type
};
243 $self->{segments
} ||= [];
244 my $ref = $self->seq_id;
245 my $name = $self->name;
246 my $class = $self->class;
247 my $source_tag = $self->source_tag;
249 my $min_start = $self->start || 999_999_999_999
;
250 my $max_stop = $self->end || -999_999_999_999
;
252 my @segments = @
{$self->{segments
}};
255 if (ref($seg) eq 'ARRAY') {
256 my ($start,$stop) = @
{$seg};
257 next unless defined $start && defined $stop; # fixes an obscure bug somewhere above us
258 my $strand = $self->{strand
};
260 if ($start > $stop) {
261 ($start,$stop) = ($stop,$start);
265 push @segments,$self->new(-start
=> $start,
272 -phase
=> $self->{phase
},
273 -score
=> $self->{score
},
274 -source_tag
=> $source_tag,
275 -attributes
=> $self->{attributes
},
277 $min_start = $start if $start < $min_start;
278 $max_stop = $stop if $stop > $max_stop;
283 $min_start = $seg->start if ($seg->start && $seg->start < $min_start);
284 $max_stop = $seg->end if ($seg->end && $seg->end > $max_stop);
288 local $^W
= 0; # some warning of an uninitialized variable...
289 $self->{segments
} = \
@segments;
290 $self->{ref} ||= $self->{segments
}[0]->seq_id;
291 $self->{start
} = $min_start;
292 $self->{stop
} = $max_stop;
298 my $s = $self->{segments
} or return wantarray ?
() : 0;
303 my $d = $self->{score
};
304 $self->{score
} = shift if @_;
309 my $d = $self->{type
};
310 $self->{type
} = shift if @_;
315 my $d = $self->{name
};
316 $self->{name
} = shift if @_;
319 sub seq_id
{ shift->ref(@_) }
322 my $d = $self->{ref};
323 $self->{ref} = shift if @_;
328 my $d = $self->{start
};
329 $self->{start
} = shift if @_;
330 if (my $rs = $self->{refseq
}) {
331 my $strand = $rs->strand || 1;
332 return $strand >= 0 ?
($d - $rs->start + 1) : ($rs->end - $d + 1);
339 my $d = $self->{stop
};
340 $self->{stop
} = shift if @_;
341 if (my $rs = $self->{refseq
}) {
342 my $strand = $rs->strand || 1;
343 return $strand >= 0 ?
($d - $rs->start + 1) : ($rs->end - $d + 1);
349 my $d = $self->{strand
};
350 $self->{strand
} = shift if @_;
351 if (my $rs = $self->{refseq
}) {
352 my $rstrand = $rs->strand;
354 return 1 if $rstrand == $d;
355 return -1 if $rstrand != $d;
360 # this does nothing, but it is here for compatibility reasons
363 my $d = $self->{absolute
};
364 $self->{absolute
} = shift if @_;
370 local $self->{refseq
} = undef;
375 local $self->{refseq
} = undef;
380 local $self->{refseq
} = undef;
386 return $self->end - $self->start + 1;
389 #is_circular is needed for Bio::PrimarySeqI
392 my $d = $self->{is_circular
};
393 $self->{is_circular
} = shift if @_;
400 my $seq = exists $self->{seq
} ?
$self->{seq
} : '';
405 my $seq = shift->seq;
406 $seq = $seq->seq if CORE
::ref($seq);
413 Usage : $id = $obj->display_name or $obj->display_name($newid);
414 Function: Gets or sets the display id, also known as the common name of
417 The semantics of this is that it is the most likely string
418 to be used as an identifier of the sequence, and likely to
419 have "human" readability. The id is equivalent to the LOCUS
420 field of the GenBank/EMBL databanks and the ID field of the
421 Swissprot/sptrembl database. In fasta format, the >(\S+) is
422 presumed to be the id, though some people overload the id
423 to embed other information. Bioperl does not use any
424 embedded information in the ID field, and people are
425 encouraged to use other mechanisms (accession field for
426 example, or extending the sequence object) to solve this.
428 Notice that $seq->id() maps to this function, mainly for
429 legacy/convenience issues.
431 Args : None or a new id
436 sub display_name
{ shift->name(@_) }
438 *display_id
= \
&display_name
;
440 =head2 accession_number
442 Title : accession_number
443 Usage : $unique_biological_key = $obj->accession_number;
444 Function: Returns the unique biological id for a sequence, commonly
445 called the accession_number. For sequences from established
446 databases, the implementors should try to use the correct
447 accession number. Notice that primary_id() provides the
448 unique id for the implementation, allowing multiple objects
449 to have the same accession number in a particular implementation.
451 For sequences with no accession number, this method should return
459 sub accession_number
{
466 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
467 Function: Returns the type of sequence being one of
468 'dna', 'rna' or 'protein'. This is case sensitive.
470 This is not called <type> because this would cause
471 upgrade problems from the 0.5 and earlier Seq objects.
473 Returns : a string either 'dna','rna','protein'. NB - the object must
474 make a call of the type - if there is no type specified it
483 return 'dna'; # no way this will be anything other than dna!
491 Usage : $seqobj->desc($string) or $seqobj->desc()
492 Function: Sets or gets the description of the sequence
494 Returns : The description
495 Args : The description or none
502 my ($d) = $self->notes;
503 $self->{desc
} = shift if @_;
510 return $self->get_tag_values(@_);
512 return $self->{attributes
} ?
%{$self->{attributes
}} : ();
518 my $d = $self->{primary_id
};
519 $self->{primary_id
} = shift if @_;
521 # return $d if defined $d;
522 # return (overload::StrVal($self) =~ /0x([a-f0-9]+)/)[0];
527 my $notes = $self->{desc
};
528 return $notes if defined $notes;
529 return $self->attributes('Note');
534 return $self->attributes('Alias');
539 return $self->start < $self->end ?
$self->start : $self->end;
544 return $self->start > $self->end ?
$self->start : $self->end;
550 Usage : my $location = $seqfeature->location()
551 Function: returns a location object suitable for identifying location
552 of feature on sequence or parent feature
553 Returns : Bio::LocationI object
560 require Bio
::Location
::Split
unless Bio
::Location
::Split
->can('new');
562 if (my @segments = $self->segments) {
563 $location = Bio
::Location
::Split
->new();
564 foreach (@segments) {
565 $location->add_sub_Location($_);
575 require Bio
::Location
::Simple
unless Bio
::Location
::Simple
->can('new');
576 if (my @segments = $self->segments) {
578 Bio
::Location
::Simple
->new(-start
=> $_->start,
580 -strand
=> $_->strand);
583 return Bio
::Location
::Simple
->new(-start
=> $self->start,
585 -strand
=> $self->strand);
589 =head2 location_string
591 Title : location_string
592 Usage : my $string = $seqfeature->location_string()
593 Function: Returns a location string in a format recognized by gbrowse
597 This is a convenience function used by the generic genome browser. It
598 returns the location of the feature and its subfeatures in the compact
599 form "start1..end1,start2..end2,...". Use
600 $seqfeature-E<gt>location()-E<gt>toFTString() to obtain a standard
601 GenBank/EMBL location representation.
605 sub location_string
{
607 my @segments = $self->segments or return $self->to_FTstring;
608 join ',',map {$_->to_FTstring} @segments;
611 sub coordinate_policy
{
612 require Bio
::Location
::WidestCoordPolicy
unless Bio
::Location
::WidestCoordPolicy
->can('new');
613 return Bio
::Location
::WidestCoordPolicy
->new();
616 sub min_start
{ shift->low }
617 sub max_start
{ shift->low }
618 sub min_end
{ shift->high }
619 sub max_end
{ shift->high}
620 sub start_pos_type
{ 'EXACT' }
621 sub end_pos_type
{ 'EXACT' }
624 my $low = $self->min_start;
625 my $high = $self->max_end;
626 my $strand = $self->strand;
627 my $str = defined $strand && $strand<0 ?
"complement($low..$high)" : "$low..$high";
628 if (my $id = $self->seq_id()) {
629 $str = $id . ":" . $str;
635 my $d = $self->{phase
};
636 $self->{phase
} = shift if @_;
642 my $d = $self->{class};
643 $self->{class} = shift if @_;
644 return defined($d) ?
$d : 'Sequence'; # acedb is still haunting me - LS
647 # set GFF dumping version
650 my $d = $self->{gff3_version
} || 2;
651 $self->{gff3_version
} = shift if @_;
658 if ($self->version == 3) {
659 return $self->gff3_string(@_);
663 my $name = $self->name;
664 my $class = $self->class;
665 my $group = "$class $name" if $name;
666 my $strand = ('-','.','+')[$self->strand+1];
668 $string .= join("\t",
669 $self->ref||'.',$self->source||'.',$self->method||'.',
670 $self->start||'.',$self->stop||'.',
671 defined($self->score) ?
$self->score : '.',
673 defined($self->phase) ?
$self->phase : '.',
678 foreach ($self->sub_SeqFeature) {
679 $string .= $_->gff_string($recurse);
685 # Suggested strategy for dealing with the multiple parentage issue.
686 # First recurse through object tree and record parent tree.
687 # Then recurse again, skipping objects we've seen before.
689 my ($self,$recurse,$parent_tree,$seenit,$force_id) = @_;
696 $self->_traverse($parent_tree) unless %$parent_tree; # this will record parents of all children
697 my $primary_id = defined $force_id ?
$force_id : $self->_real_or_dummy_id;
699 return if $seenit->{$primary_id}++;
701 @rsf = $self->get_SeqFeatures;
703 # Detect case in which we have a split location feature. In this case we
704 # skip to the grandchildren and trick them into thinking that our parent is theirs.
705 my %types = map {$_->primary_tag=>1} @rsf;
706 my @types = keys %types;
707 if (@types == 1 && $types[0] eq $self->primary_tag) {
708 return join ("\n",map {$_->gff3_string(1,$parent_tree,{},$primary_id)} @rsf);
712 @parent_ids = keys %{$parent_tree->{$primary_id}};
715 my $group = $self->format_attributes(\
@parent_ids,$force_id);
716 my $name = $self->name;
718 my $class = $self->class;
719 my $strand = ('-','.','+')[$self->strand+1];
726 defined($self->score) ?
$self->score : '.',
728 defined($self->phase) ?
$self->phase : '.',
732 map {$_->gff3_string(1,$parent_tree,$seenit)} @rsf);
735 sub _real_or_dummy_id
{
737 my $id = $self->primary_id;
738 return $id if defined $id;
739 return return (overload
::StrVal
($self) =~ /0x([a-f0-9]+)/)[0];
744 my $tree = shift; # tree => {$child}{$parent} = 1
746 my $id = $self->_real_or_dummy_id;
747 defined $id or return;
748 $tree->{$id}{$parent->_real_or_dummy_id}++ if $parent;
749 $_->_traverse($tree,$self) foreach $self->get_SeqFeatures;
756 my $d = $self->{source
};
757 $self->{source
} = shift if @_;
761 # This probably should be deleted. Not sure why it's here, but might
762 # have been added for Ace::Sequence::Feature-compliance.
771 return exists $self->{attributes
}{$tag};
776 my $toencode = shift;
777 $toencode =~ s/([^a-zA-Z0-9_.:?^*\(\)\[\]@!+-])/uc sprintf("%%%02x",ord($1))/eg;
783 return keys %{$self->{attributes
}};
788 my ($tag_name,@tag_values) = @_;
789 push @
{$self->{attributes
}{$tag_name}},@tag_values;
794 my $tag_name = shift;
795 delete $self->{attributes
}{$tag_name};
801 my $value = $self->{attributes
}{$tag} or return;
802 my $ref = CORE
::ref $value;
803 return $ref && $ref eq 'ARRAY' ? @
{$self->{attributes
}{$tag}}
804 : $self->{attributes
}{$tag};
807 sub get_Annotations
{
810 my @values = $self->get_tag_values($tag);
811 return $values[0] if @values == 1;
815 sub format_attributes
{
818 my $fallback_id = shift;
820 my @tags = $self->get_all_tags;
823 my @values = $self->get_tag_values($t);
824 push @result,join '=',$self->escape($t),join(',', map {$self->escape($_)} @values) if @values;
826 #my $id = $self->escape($self->_real_or_dummy_id) || $fallback_id;
827 my $id = $fallback_id || $self->escape($self->_real_or_dummy_id);
831 $parent_id = join (',',map {$self->escape($_)} @
$parent);
834 my $name = $self->display_name;
835 unshift @result,"ID=".$id if defined $id;
836 unshift @result,"Parent=".$parent_id if defined $parent_id;
837 unshift @result,"Name=".$self->escape($name) if defined $name;
838 return join ';',@result;
844 Usage : my $feature = $seqfeature->clone
845 Function: Create a deep copy of the feature
846 Returns : A copy of the feature
854 # overwrite attributes
855 my $clone = bless \
%clone,CORE
::ref($self);
856 $clone{attributes
} = {};
857 for my $k (keys %{$self->{attributes
}}) {
858 @
{$clone{attributes
}{$k}} = @
{$self->{attributes
}{$k}};
866 Usage : $ref = $s->refseq([$newseq] [,$newseqclass])
867 Function: get/set reference sequence
868 Returns : current reference sequence
869 Args : new reference sequence and class (optional)
872 This method will get or set the reference sequence. Called with no
873 arguments, it returns the current reference sequence. Called with any
874 Bio::SeqFeatureI object that provides the seq_id(), start(), end() and
877 The method will generate an exception if you attempt to set the
878 reference sequence to a sequence that has a different seq_id from the
885 my $d = $self->{refseq
};
888 $self->throw("attempt to set refseq using a feature that does not share the same seq_id")
889 unless $newref->seq_id eq $self->seq_id;
890 $self->{refseq
} = $newref;
903 L<Bio::Graphics::Feature>
907 Lincoln Stein E<lt>lstein@cshl.eduE<gt>.
909 Copyright (c) 2006 Cold Spring Harbor Laboratory
911 This library is free software; you can redistribute it and/or modify
912 it under the same terms as Perl itself. See DISCLAIMER.txt for
913 disclaimers of warranty.