2 # BioPerl module for Bio::Seq::Meta::Array
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho
8 # Copyright Heikki Lehvaslaiho
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Seq::Meta::Array - array-based generic implementation of a
17 sequence class with residue-based meta information
21 use Bio::LocatableSeq;
22 use Bio::Seq::Meta::Array;
24 my $seq = Bio::Seq::Meta::Array->new(-id=>'test',
29 -verbose=>1, # to see warnings
32 # to test this is a meta seq object
33 $seq->isa("Bio::Seq::Meta::Array")
34 || $seq->throw("$seq is not a Bio::Seq::Meta::Array");
36 $seq->meta('1 2 3 4 5 6 7 8 9 10');
38 # or you could create the Meta object directly
39 $seq = Bio::Seq::Meta::Array->new(-id=>'test',
44 -meta=>'1 2 3 4 5 6 7 8 9 10',
45 -verbose=>1, # to see warnings
50 $arrayref = $seq->meta();
51 $string = $seq->meta_text();
52 $substring = $seq->submeta_text(2,5);
53 $unique_key = $seq->accession_number();
57 This class implements generic methods for sequences with residue-based
58 meta information. Meta sequences with meta data are Bio::LocatableSeq
59 objects with additional methods to store that meta information. See
60 L<Bio::LocatableSeq> and L<Bio::Seq::MetaI>.
62 The meta information in this class can be a string of variable length
63 and can be a complex structure. Blank values are undef or zero.
65 Application specific implementations should inherit from this class to
66 override and add to these methods.
68 This class can be used for storing sequence quality values but
69 Bio::Seq::Quality has named methods that make it easier.
80 This Bio::Seq::MetaI implementation inherits from Bio::LocatableSeq, which
81 itself inherits from Bio::PrimarySeq. It is not a Bio::SeqI, so bless-ing
82 objects of this class into a Bio::SeqI or vice versa and will not work as
83 expected (see bug 2262). This may be addressed in a future refactor of
90 User feedback is an integral part of the evolution of this and other
91 Bioperl modules. Send your comments and suggestions preferably to one
92 of the Bioperl mailing lists. Your participation is much appreciated.
94 bioperl-l@bioperl.org - General discussion
95 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
99 Please direct usage questions or support issues to the mailing list:
101 I<bioperl-l@bioperl.org>
103 rather than to the module maintainer directly. Many experienced and
104 reponsive experts will be able look at the problem and quickly
105 address it. Please include a thorough description of the problem
106 with code and data examples if at all possible.
108 =head2 Reporting Bugs
110 Report bugs to the Bioperl bug tracking system to help us keep track
111 the bugs and their resolution. Bug reports can be submitted via the
114 https://github.com/bioperl/bioperl-live/issues
116 =head1 AUTHOR - Heikki Lehvaslaiho
118 Email heikki-at-bioperl-dot-org
122 Chad Matsalla, bioinformatics@dieselwurks.com
123 Aaron Mackey, amackey@virginia.edu
127 The rest of the documentation details each of the object methods.
128 Internal methods are usually preceded with a _
133 # Let the code begin...
136 package Bio
::Seq
::Meta
::Array
;
139 use base
qw(Bio::LocatableSeq Bio::Seq Bio::Seq::MetaI);
141 our $DEFAULT_NAME = 'DEFAULT';
148 Usage : $metaseq = Bio::Seq::Meta::Array->new
149 ( -meta => 'aaaaaaaabbbbbbbb',
150 -seq => 'TKLMILVSHIVILSRM'
152 -accession_number => 'S000012',
154 Function: Constructor for Bio::Seq::Meta::Array class, meta data being in a
155 string. Note that you can provide an empty quality string.
156 Returns : a new Bio::Seq::Meta::Array object
161 my ($class, %args) = @_;
163 # run-time modification of @ISA is extremely evil (you should't pick your
164 # interface on the fly); this has no obvious effect on any tests so
165 # commenting out - cjfields 2011-4-6
167 #defined inheritance according to stated baseclass,
168 #if undefined then will be PrimarySeq
169 #if (defined($args{'-baseclass'})) {
170 # @ISA = ($args{'-baseclass'},"Bio::Seq::MetaI");
173 # @ISA = qw( Bio::LocatableSeq Bio::Seq Bio::Seq::MetaI );
176 my $self = $class->SUPER::new
(%args);
178 my($meta, $forceflush) =
179 $self->_rearrange([qw(META
184 $self->{'_meta'}->{$DEFAULT_NAME} = [];
186 $meta && $self->meta($meta);
187 $forceflush && $self->force_flush($forceflush);
196 Usage : $meta_values = $obj->meta($values_string);
199 Get and set method for the meta data starting from residue
200 position one. Since it is dependent on the length of the
201 sequence, it needs to be manipulated after the sequence.
203 The length of the returned value always matches the length
206 Returns : reference to an array of meta data
207 Args : new value, string or array ref, optional
212 shift->named_meta($DEFAULT_NAME, shift);
218 Usage : $meta_values = $obj->meta_text($values_arrayref);
219 Function: Variant of meta() guarantied to return a string
220 representation of meta data. For details, see L<meta>.
222 Args : new value, string or array ref, optional
227 return join ' ', map {0 unless $_} @
{shift->meta(shift)};
233 Usage : $meta_values = $obj->named_meta($name, $values_arrayref);
234 Function: A more general version of meta(). Each meta data set needs
235 to be named. See also L<meta_names>.
236 Returns : reference to an array of meta data
237 Args : scalar, name of the meta data set
238 new value, string or array ref, optional
243 my ($self, $name, $value) = @_;
245 $name ||= $DEFAULT_NAME;
247 if (defined $value) {
250 if (ref $value eq 'ARRAY' ) { # array ref
253 elsif (not ref($value)) { # scalar
254 $arrayref = [split /\s+/, $value];
256 $self->throw("I need a scalar or array ref, not [". ref($value). "]");
260 my $diff = $self->length - @
{$arrayref};
262 foreach (1..$diff) { push @
{$arrayref}, 0;}
265 $self->{'_meta'}->{$name} = $arrayref;
267 #$self->_test_gap_positions($name) if $self->verbose > 0;
270 $self->_do_flush if $self->force_flush;
272 return $self->{'_meta'}->{$name} || (" " x
$self->length);
276 =head2 _test_gap_positions
278 Title : _test_gap_positions
279 Usage : $meta_values = $obj->_test_gap_positions($name);
280 Function: Internal test for correct position of gap characters.
281 Gap being only '-' this time.
283 This method is called from named_meta() when setting meta
284 data but only if verbose is positive as this can be an
285 expensive process on very long sequences. Set verbose(1) to
286 see warnings when gaps do not align in sequence and meta
287 data and turn them into errors by setting verbose(2).
289 Returns : true on success, prints warnings
294 sub _test_gap_positions
{
299 $self->seq || return $success;
300 my $len = CORE
::length($self->seq);
301 for (my $i=0; $i < $len; $i++) {
302 my $s = substr $self->{seq
}, $i, 1;
303 my $m = substr $self->{_meta
}->{$name}, $i, 1;
304 $self->warn("Gap mismatch in column [". ($i+1). "] of [$name] meta data in seq [". $self->id. "]")
306 if ($m eq '-') && $s ne $m;
311 =head2 named_meta_text
313 Title : named_meta_text()
314 Usage : $meta_values = $obj->named_meta_text($name, $values_arrayref);
315 Function: Variant of named_meta() guarantied to return a textual
316 representation of the named meta data.
317 For details, see L<meta>.
319 Args : scalar, name of the meta data set
320 new value, string or array ref, optional
324 sub named_meta_text
{
325 return join ' ', @
{shift->named_meta(@_)};
332 Usage : $subset_of_meta_values = $obj->submeta(10, 20, $value_string);
333 $subset_of_meta_values = $obj->submeta(10, undef, $value_string);
336 Get and set method for meta data for subsequences.
338 Numbering starts from 1 and the number is inclusive, ie 1-2
339 are the first two residue of the sequence. Start cannot be
340 larger than end but can be equal.
342 If the second argument is missing the returned values
343 should extend to the end of the sequence.
345 The return value may be a string or an array reference,
346 depending on the implementation. If in doubt, use
347 submeta_text() which is a variant guarantied to return a
348 string. See L<submeta_text>.
350 Returns : A reference to an array or a string
351 Args : integer, start position
352 integer, end position, optional when a third argument present
353 new value, string or array ref, optional
358 shift->named_submeta($DEFAULT_NAME, @_);
364 Usage : $meta_values = $obj->submeta_text(20, $value_string);
365 Function: Variant of submeta() guarantied to return a textual
366 representation of meta data. For details, see L<meta>.
368 Args : new value, string or array ref, optional
374 return join ' ', @
{shift->named_submeta($DEFAULT_NAME, @_)};
379 Title : named_submeta
380 Usage : $subset_of_meta_values = $obj->named_submeta($name, 10, 20, $value_string);
381 $subset_of_meta_values = $obj->named_submeta($name, 10);
382 Function: Variant of submeta() guarantied to return a textual
383 representation of meta data. For details, see L<meta>.
384 Returns : A reference to an array or a string
385 Args : scalar, name of the meta data set
386 integer, start position
387 integer, end position, optional when a third argument present (can be undef)
388 new value, string or array ref, optional
394 my ($self, $name, $start, $end, $value) = @_;
396 $name ||= $DEFAULT_NAME;
398 $start =~ /^[+]?\d+$/ and $start > 0 or
399 $self->throw("Need at least a positive integer start value");
401 my $meta_len = scalar(@
{$self->{_meta
}->{$name}});
402 if (defined $value) {
405 if (ref $value eq 'ARRAY' ) { # array ref
408 elsif (not ref($value)) { # scalar
409 $arrayref = [split /\s+/, $value];
411 $self->throw("I need a space separated scalar or array ref, not [". ref($value). "]");
414 $self->warn("You are setting meta values beyond the length of the sequence\n".
415 "[$start > ". length($self->seq)."] in sequence ". $self->id)
416 if $start + scalar @
{$arrayref} -1 > $self->length;
419 $end or $end = @
{$arrayref} + $start;
422 # test for length; pad if needed
423 my $diff = $end - $start - scalar @
{$arrayref};
425 foreach (1..$diff) { push @
{$arrayref}, $META_GAP}
428 @
{$self->{_meta
}->{$name}}[$start..$end] = @
{$arrayref};
430 $self->_do_flush if $self->force_flush;
435 # don't set by seq length; use meta array length instead; bug 2478
437 if ($end > $meta_len) {
438 $self->warn("End is longer than meta sequence $name length; resetting to $meta_len");
441 # warn but don't reset (push use of trunc() instead)
442 $self->warn("End is longer than sequence length; use trunc() \n".
443 "if you want a fully truncated object") if $end > $self->length;
445 return [@
{$self->{_meta
}->{$name}}[$start..$end]];
450 =head2 named_submeta_text
452 Title : named_submeta_text
453 Usage : $meta_values = $obj->named_submeta_text($name, 20, $value_string);
454 Function: Variant of submeta() guarantied to return a textual
455 representation of meta data. For details, see L<meta>.
457 Args : scalar, name of the meta data
458 Args : integer, start position, optional
459 integer, end position, optional
460 new value, string or array ref, optional
464 sub named_submeta_text
{
465 return join ' ', @
{shift->named_submeta(@_)};
471 Usage : @meta_names = $obj->meta_names()
472 Function: Retrieves an array of meta data set names. The default
473 (unnamed) set name is guarantied to be the first name if it
475 Returns : an array of names
484 foreach ( sort keys %{$self->{'_meta'}} ) {
485 push (@r, $_) unless $_ eq $DEFAULT_NAME;
487 unshift @r, $DEFAULT_NAME if $self->{'_meta'}->{$DEFAULT_NAME};
494 Title : meta_length()
495 Usage : $meta_len = $obj->meta_length();
496 Function: return the number of elements in the meta set
504 return $self->named_meta_length($DEFAULT_NAME);
508 =head2 named_meta_length
510 Title : named_meta_length()
511 Usage : $meeta_len = $obj->named_meta_length($name);
512 Function: return the number of elements in the named meta set
518 sub named_meta_length
{
519 my ($self, $name) = @_;
520 $name ||= $DEFAULT_NAME;
521 return scalar @
{$self->{'_meta'}->{$name}};
528 Title : force_flush()
529 Usage : $force_flush = $obj->force_flush(1);
530 Function: Automatically pad with empty values or truncate meta values
531 to sequence length. Not done by default.
532 Returns : boolean 1 or 0
533 Args : optional boolean value
535 Note that if you turn this forced padding off, the previously padded
536 values are not changed.
541 my ($self, $value) = @_;
543 if (defined $value) {
545 $self->{force_flush
} = 1;
548 $self->{force_flush
} = 0;
552 return $self->{force_flush
};
560 Function: internal method to do the force that meta values are same
561 length as sequence . Called from L<force_flush>
571 foreach my $name ($self->meta_names) {
572 #print "seq: ", $self->length , " ", $name, ": ", $self->named_meta_length($name), "======\n";
575 if ($self->length > $self->named_meta_length($name)) {
576 my $diff = $self->length - $self->named_meta_length($name);
577 foreach (1..$diff) { push @
{$self->{'_meta'}->{$name}}, $META_GAP}
580 elsif ( $self->length < $self->named_meta_length($name) ) {
581 $self->{_meta
}->{$name} = [@
{$self->{_meta
}->{$name}}[0..($self->length-1)]]
590 Usage : $is_flush = $obj->is_flush()
591 or $is_flush = $obj->is_flush($my_meta_name)
592 Function: Boolean to tell if all meta values are in
593 flush with the sequence length.
594 Returns true if force_flush() is set
595 Set verbosity to a positive value to see failed meta sets
596 Returns : boolean 1 or 0
597 Args : optional name of the meta set
603 my ($self, $name) = shift;
605 return 1 if $self->force_flush;
611 $sticky .= "$name " if $self->length != $self->named_meta_length($name);
613 foreach my $m ($self->meta_names) {
614 $sticky .= "$m " if ($self->named_meta_length($m) > 0) && ($self->length != $self->named_meta_length($m)) ;
619 print "These meta set are not flush: $sticky\n" if $self->verbose;
627 =head1 Bio::PrimarySeqI methods
632 Usage : $newseq = $seq->revcom();
633 Function: Produces a new Bio::Seq::MetaI implementing object where
634 the order of residues and their meta information is reversed.
635 Returns : A new (fresh) Bio::Seq::Meta object
637 Throws : if the object returns false on is_flush()
639 Note: The method does nothing to meta values, it reorders them, only.
646 $self->throw("Can not get a reverse complement. The object is not flush.")
647 unless $self->is_flush;
649 my $new = $self->SUPER::revcom
;
650 my $end = $self->length - 1;
652 $new->{_meta
}->{$_} = [ reverse @
{$self->{_meta
}->{$_}}[0..$end]]
653 } keys %{$self->{_meta
}};
661 Usage : $subseq = $seq->trunc(10,100);
662 Function: Provides a truncation of a sequence together with meta data
663 Returns : a fresh Bio::Seq::Meta implementing object
664 Args : Two integers denoting first and last residue of the sub-sequence.
669 my ($self, $start, $end) = @_;
672 $start =~ /^[+]?\d+$/ and $start > 0 or
673 $self->throw("Need at least a positive integer start value as start; got [$start]");
674 $end =~ /^[+]?\d+$/ and $end > 0 or
675 $self->throw("Need at least a positive integer start value as end; got [$end]");
677 $self->throw("End position has to be larger or equal to start; got [$start..$end]");
678 $end <= $self->length or
679 $self->throw("End position can not be larger than sequence length; got [$end]");
681 my $new = $self->SUPER::trunc
($start, $end);
685 $new->{_meta
}->{$_} = [@
{$self->{_meta
}->{$_}}[$start..$end]]
686 } keys %{$self->{_meta
}};