2 # bioperl module for Bio::PrimaryQual
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com>
8 # Copyright Chad Matsalla
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object
20 use Bio::Seq::PrimaryQual;
22 # you can use either a space-delimited string for quality
24 my $string_quals = "10 20 30 40 50 40 30 20 10";
25 my $qualobj = Bio::Seq::PrimaryQual->new(
26 -qual => $string_quals,
27 -id => 'QualityFragment-12',
28 -accession_number => 'X78121',
31 # _or_ you can use an array of quality values
33 my @q2 = split/ /,$string_quals;
34 $qualobj = Bio::Seq::PrimaryQual->new(
36 -primary_id => 'chads primary_id',
37 -desc => 'chads desc',
38 -accession_number => 'chads accession_number',
42 # to get the quality values out:
44 my @quals = @{$qualobj->qual()};
46 # to give _new_ quality values
48 my $newqualstring = "50 90 1000 20 12 0 0";
49 $qualobj->qual($newqualstring);
54 This module provides a mechanism for storing quality
55 values. Much more useful as part of
56 Bio::Seq::SeqWithQuality where these quality values
57 are associated with the sequence information.
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to one
65 of the Bioperl mailing lists. Your participation is much appreciated.
67 bioperl-l@bioperl.org - General discussion
68 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
72 Please direct usage questions or support issues to the mailing list:
74 I<bioperl-l@bioperl.org>
76 rather than to the module maintainer directly. Many experienced and
77 reponsive experts will be able look at the problem and quickly
78 address it. Please include a thorough description of the problem
79 with code and data examples if at all possible.
83 Report bugs to the Bioperl bug tracking system to help us keep track
84 the bugs and their resolution. Bug reports can be submitted via the
87 https://github.com/bioperl/bioperl-live/issues
89 =head1 AUTHOR - Chad Matsalla
91 Email bioinformatics@dieselwurks.com
95 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
100 package Bio
::Seq
::PrimaryQual
;
104 use base
qw(Bio::Root::Root Bio::Seq::QualI);
106 our $MATCHPATTERN = '0-9eE\.\s+-';
112 Usage : $qual = Bio::Seq::PrimaryQual->new(
113 -qual => '10 20 30 40 50 50 20 10',
115 -accession_number => 'AL000012',
118 Function: Returns a new Bio::Seq::PrimaryQual object from basic
119 constructors, being a string _or_ a reference to an array for the
120 sequence and strings for id and accession_number. Note that you
121 can provide an empty quality string.
122 Returns : a new Bio::Seq::PrimaryQual object
127 my ($class, @args) = @_;
128 my $self = $class->SUPER::new
(@args);
130 # default: turn ON the warnings (duh)
131 my($qual,$id,$acc,$pid,$desc,$given_id,$header) =
132 $self->_rearrange([qw(QUAL
141 if( defined $id && defined $given_id ) {
142 if( $id ne $given_id ) {
143 $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]");
146 if( defined $given_id ) { $id = $given_id; }
148 # note: the sequence string may be empty
149 $self->qual(defined($qual) ?
$qual : []);
150 $header && $self->header($header);
151 $id && $self->display_id($id);
152 $acc && $self->accession_number($acc);
153 $pid && $self->primary_id($pid);
154 $desc && $self->desc($desc);
163 Usage : @quality_values = @{$obj->qual()};
164 Function: Get or set the quality as a reference to an array containing the
165 quality values. An error is generated if the quality scores are
166 invalid, see validate_qual().
167 Returns : A reference to an array.
172 my ($self,$value) = @_;
174 if( ! defined $value || length($value) == 0 ) {
175 $self->{'qual'} ||= [];
176 } elsif( ref($value) =~ /ARRAY/i ) {
177 # if the user passed in a reference to an array
178 $self->{'qual'} = $value;
180 $self->validate_qual($value, 1);
182 $self->{'qual'} = [split(/\s+/,$value)];
185 return $self->{'qual'};
192 Usager : $sequence = $obj->seq();
193 Function : Returns the quality numbers as a space-separated string.
194 Returns : Single string.
200 return join ' ', @
{ shift->qual };
204 =head2 validate_qual($qualstring)
206 Title : validate_qual($qualstring)
207 Usage : print("Valid.") if { &validate_qual($self, $quality_string); }
208 Function: Test that the given quality string is valid. It is expected to
209 contain space-delimited numbers that can be parsed using split /\d+/.
210 However, this validation takes shortcuts and only tests that the
211 string contains characters valid in numbers: 0-9 . eE +-
212 Note that empty quality strings are valid too.
213 Returns : 1 for a valid sequence, 0 otherwise
214 Args : - Scalar containing the quality string to validate.
215 - Boolean to optionally throw an error if validation failed
220 my ($self, $qualstr, $throw) = @_;
221 if ( (defined $qualstr ) &&
222 ($qualstr !~ /^[$MATCHPATTERN]*$/) ) {
224 $self->throw("Failed validation of quality score from '".
225 (defined($self->id)||'[unidentified sequence]')."'. No numeric ".
234 =head2 subqual($start,$end)
236 Title : subqual($start,$end)
237 Usage : @subset_of_quality_values = @{$obj->subqual(10,40)};
238 Function: returns the quality values from $start to $end, where the
239 first value is 1 and the number is inclusive, ie 1-2 are the
240 first two bases of the sequence. Start cannot be larger than
241 end but can be equal.
242 Returns : A reference to an array.
243 Args : a start position and an end position
248 my ($self,$start,$end) = @_;
251 $self->throw("in subqual, start [$start] has to be greater than end [$end]");
254 if( $start <= 0 || $end > $self->length ) {
255 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
258 # remove one from start, and then length is end-start
262 my @sub_qual_array = @
{$self->{qual
}}[$start..$end];
264 # return substr $self->seq(), $start, ($end-$start);
265 return \
@sub_qual_array;
273 Usage : $id_string = $obj->display_id();
274 Function: returns the display id, aka the common name of the Quality
276 The semantics of this is that it is the most likely string to be
277 used as an identifier of the quality sequence, and likely to have
278 "human" readability. The id is equivalent to the ID field of the
279 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
280 database. In fasta format, the >(\S+) is presumed to be the id,
281 though some people overload the id to embed other information.
282 Bioperl does not use any embedded information in the ID field,
283 and people are encouraged to use other mechanisms (accession
284 field for example, or extending the sequence object) to solve
285 this. Notice that $seq->id() maps to this function, mainly for
286 legacy/convience issues
293 my ($obj,$value) = @_;
294 if( defined $value) {
295 $obj->{'display_id'} = $value;
297 return $obj->{'display_id'};
304 Usage : $header = $obj->header();
305 Function: Get/set the header that the user wants printed for this
313 my ($obj,$value) = @_;
314 if( defined $value) {
315 $obj->{'header'} = $value;
317 return $obj->{'header'};
322 =head2 accession_number()
324 Title : accession_number()
325 Usage : $unique_biological_key = $obj->accession_number();
326 Function: Returns the unique biological id for a sequence, commonly
327 called the accession_number. For sequences from established
328 databases, the implementors should try to use the correct
329 accession number. Notice that primary_id() provides the unique id
330 for the implementation, allowing multiple objects to have the same
331 accession number in a particular implementation. For sequences
332 with no accession number, this method should return "unknown".
338 sub accession_number
{
339 my( $obj, $acc ) = @_;
342 $obj->{'accession_number'} = $acc;
344 $acc = $obj->{'accession_number'};
345 $acc = 'unknown' unless defined $acc;
354 Usage : $unique_implementation_key = $obj->primary_id();
355 Function: Returns the unique id for this object in this implementation.
356 This allows implementations to manage their own object ids in a
357 way the implementation can control clients can expect one id to
358 map to one object. For sequences with no accession number, this
359 method should return a stringified memory location.
366 my ($obj,$value) = @_;
367 if( defined $value) {
368 $obj->{'primary_id'} = $value;
370 return $obj->{'primary_id'};
377 Usage : $qual->desc($newval);
378 $description = $qual->desc();
379 Function: Get/set description text for a qual object
381 Returns : Value of desc
382 Args : newvalue (optional)
387 my ($obj,$value) = @_;
388 if( defined $value) {
389 $obj->{'desc'} = $value;
391 return $obj->{'desc'};
398 Usage : $id = $qual->id();
399 Function: Return the ID of the quality. This should normally be (and
400 actually is in the implementation provided here) just a synonym
408 my ($self,$value) = @_;
409 if( defined $value ) {
410 return $self->display_id($value);
412 return $self->display_id();
419 Usage : $length = $qual->length();
420 Function: Return the length of the array holding the quality values.
421 Under most circumstances, this should match the number of quality
422 values but no validation is done when the PrimaryQual object is
423 constructed and non-digits could be put into this array. Is this
424 a bug? Just enough rope...
425 Returns : A scalar (the number of elements in the quality array).
432 if (ref($self->{qual
}) ne "ARRAY") {
433 $self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual
})."(".$self->{qual
}."). Good thing this can _never_ happen.");
435 return scalar(@
{$self->{qual
}});
442 Usage : $quality = $obj->qualat(10);
443 Function: Return the quality value at the given location, where the
444 first value is 1 and the number is inclusive, ie 1-2 are the first
445 two bases of the sequence. Start cannot be larger than end but can
453 my ($self,$val) = @_;
454 my @qualat = @
{$self->subqual($val,$val)};
455 if (scalar(@qualat) == 1) {
458 $self->throw("qualat() provided more than one quality.");
465 Usage : $quality = $obj->to_string();
466 Function: Return a textual representation of what the object contains.
467 For this module, this function will return:
481 my ($self,$out,$result) = shift;
482 $out = "qual: ".join(',',@
{$self->qual()});
483 foreach (qw(display_id accession_number primary_id desc id length)) {
484 $result = $self->$_();
485 if (!$result) { $result = "<unset>"; }
486 $out .= "$_: $result\n";
492 sub to_string_automatic
{
493 my ($self,$sub_result,$out) = shift;
494 foreach (sort keys %$self) {
495 print("Working on $_\n");
496 eval { $self->$_(); };
497 if ($@
) { $sub_result = ref($_); }
498 elsif (!($sub_result = $self->$_())) {
499 $sub_result = "<unset>";
501 if (ref($sub_result) eq "ARRAY") {
502 print("This thing ($_) is an array!\n");
503 $sub_result = join(',',@
$sub_result);
505 $out .= "$_: ".$sub_result."\n";