Bio::DB::TFBS namespace has been moved to its own distribution named after itself
[bioperl-live.git] / Bio / SeqIO / phd.pm
blob7c24341990db5e98a5f55ae13bf8b9a2b2d0bc97
2 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
3 # This module is free software; you can redistribute it and/or
4 # modify it under the same terms as Perl itself.
6 # Copyright Chad Matsalla
8 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
12 =head1 NAME
14 Bio::SeqIO::phd - phd file input/output stream
16 =head1 SYNOPSIS
18 Do not use this module directly. Use it via the L<Bio::SeqIO> class.
20 =head1 DESCRIPTION
22 This object can transform .phd files (from Phil Green's phred basecaller)
23 to and from Bio::Seq::Quality objects. The phd format is described in section 10
24 at this url: http://www.phrap.org/phredphrap/phred.html
26 =head1 FEEDBACK
28 =head2 Mailing Lists
30 User feedback is an integral part of the evolution of this and other
31 Bioperl modules. Send your comments and suggestions preferably to one
32 of the Bioperl mailing lists. Your participation is much appreciated.
34 bioperl-l@bioperl.org - General discussion
35 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
37 =head2 Support
39 Please direct usage questions or support issues to the mailing list:
41 I<bioperl-l@bioperl.org>
43 rather than to the module maintainer directly. Many experienced and
44 reponsive experts will be able look at the problem and quickly
45 address it. Please include a thorough description of the problem
46 with code and data examples if at all possible.
48 =head2 Reporting Bugs
50 Report bugs to the Bioperl bug tracking system to help us keep track
51 the bugs and their resolution.
52 Bug reports can be submitted via the web:
54 https://github.com/bioperl/bioperl-live/issues
56 =head1 AUTHOR Chad Matsalla
58 Chad Matsalla
59 bioinformatics@dieselwurks.com
61 =head1 CONTRIBUTORS
63 Jason Stajich, jason@bioperl.org
64 Jean-Marc Frigerio, Frigerio@pierroton.inra.fr
66 =head1 APPENDIX
68 The rest of the documentation details each of the object
69 methods. Internal methods are usually preceded with a _
71 =cut
73 # 'Let the code begin...
76 package Bio::SeqIO::phd;
77 use strict;
78 use Bio::Seq::SeqFactory;
79 use Bio::Seq::RichSeq;
80 use Bio::Annotation::Collection;
81 use Bio::Annotation::Comment;
82 use Dumpvalue;
84 my $dumper = Dumpvalue->new();
86 use base qw(Bio::SeqIO);
88 sub _initialize {
89 my($self,@args) = @_;
90 $self->SUPER::_initialize(@args);
91 if( ! defined $self->sequence_factory ) {
92 $self->sequence_factory(Bio::Seq::SeqFactory->new
93 (-verbose => $self->verbose(),
94 -type => 'Bio::Seq::Quality'));
98 =head2 next_seq
100 Title : next_seq()
101 Usage : $swq = $stream->next_seq()
102 Function: returns the next phred sequence in the stream
103 Returns : Bio::Seq::Quality object
104 Args : NONE
106 =cut
108 sub next_seq {
109 my ($self,@args) = @_;
110 my $seq;
111 while (my $entry = $self->_readline) {
112 chomp $entry;
113 if ($entry =~ /^BEGIN_SEQUENCE\s+(\S+)/) {
114 if (defined $seq) {
115 # done with current sequence
116 $self->_pushback($entry);
117 last;
118 } else {
119 # start new sequence
120 my $id = $1;
121 $seq = $self->sequence_factory->create(
122 -id => $id,
123 -primary_id => $id,
124 -display_id => $id,
127 } elsif ($entry =~ /^BEGIN_COMMENT/) {
128 my $collection = Bio::Annotation::Collection->new;
129 while ($entry = $self->_readline) {
130 chomp $entry;
131 if ($entry =~ /^(\w+):\s+(.+)$/) {
132 my ($name, $content) = ($1, $2);
133 my $comment = Bio::Annotation::Comment->new(
134 -text => $content,
135 -tagname => $name
137 $collection->add_Annotation('header',$comment);
138 } elsif ($entry =~ /^END_COMMENT/) {
139 $seq->Bio::Seq::RichSeq::annotation($collection);
140 last;
143 } elsif ($entry =~ /^BEGIN_DNA/) {
144 my $dna = '';
145 my @qualities = ();
146 my @trace_indices = ();
147 while ($entry = $self->_readline) {
148 chomp $entry;
149 if ( $entry =~ /(\S+)\s+(\S+)\s+(\S+)/ ) {
150 # add nucleotide and quality scores to sequence
151 $dna .= $1;
152 push @qualities,$2;
153 push(@trace_indices,$3) if defined $3; # required for phd file
154 } elsif ($entry =~ /^END_DNA/) {
155 # end of sequence, save it
156 $seq->seq($dna);
157 $seq->qual(\@qualities);
158 $seq->trace(\@trace_indices);
159 last;
163 } elsif ($entry =~ /^END_SEQUENCE/) {
164 # the sequence may be over, but some other info can come after
165 next;
166 } elsif ($entry =~ /^WR\{/) {
167 # Whole-Read items
168 # Programs like Consed or Autofinish add it to phd file. See doc:
169 # http://www.phrap.org/consed/distributions/README.16.0.txt
170 #my ($type, $nane, $date, $time) = split(' ',$self->_readline);
171 #my $extra_info = '';
172 #while ($entry = $self->_readline) {
173 # chomp $entry;
174 # last if ($entry =~ /\}/);
175 # $extra_info .= $entry;
177 ### fea: save WR somewhere? but where?
180 return $seq;
184 =head2 write_header
186 Title : write_header()
187 Usage : $seqio->write_header()
188 Function: Write out the header (BEGIN_COMMENTS .. END_COMMENT) part of a phd file
189 Returns : nothing
190 Args : a Bio::Seq::Quality object
191 Notes : These are the comments that reside in the header of a phd file
192 at the present time. If not provided by the Bio::Seq::Quality object,
193 the following default values will be used:
195 CHROMAT_FILE : $swq->id()
196 ABI_THUMBPRINT : 0
197 PHRED_VERSION : 0.980904.e
198 CALL_METHOD : phred
199 QUALITY_LEVELS : 99
200 TIME : <current time>
201 TRACE_ARRAY_MIN_INDEX : 0
202 TRACE_ARRAY_MAX_INDEX : unknown
203 CHEM : unknown
204 DYE : unknown
206 =cut
208 sub write_header {
209 my ($self, $swq) = @_;
210 $self->_print("\nBEGIN_COMMENT\n\n");
211 #defaults
212 my $time = localtime();
213 for ([CHROMAT_FILE =>$swq->attribute('CHROMAT_FILE')],
214 [ABI_THUMBPRINT => 0],
215 [PHRED_VERSION => '0.980904.e'],
216 [CALL_METHOD => 'phred'],
217 [QUALITY_LEVELS => '99'],
218 [TIME => $time],
219 [TRACE_ARRAY_MIN_INDEX => 0],
220 [TRACE_ARRAY_MAX_INDEX => 'unknown'],
221 [CHEM => 'unknown'],
222 [DYE => 'unknown'])
224 $swq->attribute($_->[0],$_->[1]) unless $swq->attribute($_->[0]);
227 my @annot = $swq->annotation->get_Annotations('header');
228 for (@annot) {
229 $self->_print($_->tagname,": ",$_->text,"\n");
231 $self->_print("\nEND_COMMENT\n\n");
232 $self->flush if $self->_flush_on_write && defined $self->_fh;
233 return 1;
236 =head2 write_seq
238 Title : write_seq()
239 Usage : $seqio->write_seq($swq);
240 Function: Write out a phd file.
241 Returns : Nothing.
242 Args : a Bio::Seq::Quality object
244 =cut
246 sub write_seq {
247 my ($self,$swq) = @_;
249 $self->throw("You must pass a Bio::Seq::Quality object to write_seq")
250 unless (ref($swq) eq "Bio::Seq::Quality");
252 $self->throw("Can't create the phd because the sequence and the quality in the Quality object are of different lengths.")
253 unless $swq->length() ne 'DIFFERENT';
255 $self->_print("BEGIN_SEQUENCE ".$swq->id()."\n");
256 $self->write_header($swq);
257 $self->_print("BEGIN_DNA\n");
258 for my $curr(1 .. $swq->length()) {
259 $self->_print (sprintf("%s %s %s\n",
260 uc($swq->baseat($curr)),
261 $swq->qualat($curr),
262 $swq->trace_index_at($curr)));
264 $self->_print ("END_DNA\n\nEND_SEQUENCE\n");
266 $self->flush if $self->_flush_on_write && defined $self->_fh;
267 return 1;
270 =head2 attribute
272 Title : attribute()
273 Usage : swq->attribute(name[,value]);
274 Function: Get/Set the name attribute.
275 Returns : a string if 1 param, nothing else.
276 Args : a name or a pair name, value
278 =cut
280 sub Bio::Seq::Quality::attribute {
281 my ($self, $name, $value) = @_;
282 my $collection = $self->annotation;
283 my @annot = $collection->get_Annotations('header');
284 my %attribute;
285 my $annot;
286 for (@annot) {
287 $attribute{$_->tagname} = $_->display_text;
288 $annot = $_ if $_->tagname eq $name;
292 unless (defined $attribute{$name}) { #new comment
293 my $comment =
294 Bio::Annotation::Comment->new(-text => $value || 'unknown');
295 $comment->tagname($name);
296 $collection->add_Annotation('header',$comment);
297 return;
300 return $attribute{$name} unless (defined $value);#get
302 #print "ATTRIBUTE ",$annot," $name $attribute{$name}\n";
303 $annot->text($value); #set
304 return;
308 =head2 chromat_file
310 Title : chromat_file
311 Usage : swq->chromat_file([filename]);
312 Function: Get/Set the CHROMAT_FILE attribute.
313 Returns : a string if 1 param, nothing else.
314 Args : none or a filename
316 =cut
318 sub Bio::Seq::Quality::chromat_file {
319 my ($self,$arg) = @_;
320 return $self->attribute('CHROMAT_FILE',$arg);
323 =head2 abi_thumbprint
325 Title : abi_thumbprint
326 Usage : swq->abi_thumbprint([value]);
327 Function: Get/Set the ABI_THUMBPRINT attribute.
328 Returns : a string if 1 param, nothing else.
329 Args : none or a value
331 =cut
333 sub Bio::Seq::Quality::abi_thumbprint {
334 my ($self,$arg) = @_;
335 return $self->attribute('ABI_THUMBPRINT',$arg);
338 =head2 phred_version
340 Title : phred_version
341 Usage : swq->phred_version([value]);
342 Function: Get/Set the PHRED_VERSION attribute.
343 Returns : a string if 1 param, nothing else.
344 Args : none or a value
346 =cut
349 sub Bio::Seq::Quality::phred_version {
350 my ($self,$arg) = @_;
351 return $self->attribute('PHRED_VERSION', $arg);
355 =head2 call_method
357 Title : call_method
358 Usage : swq->call_method([value]);
359 Function: Get/Set the CALL_METHOD attribute.
360 Returns : a string if 1 param, nothing else.
361 Args : none or a value
363 =cut
365 sub Bio::Seq::Quality::call_method {
366 my ($self,$arg) = @_;
367 return $self->attribute('CALL_METHOD', $arg);
370 =head2 quality_levels
372 Title : quality_levels
373 Usage : swq->quality_levels([value]);
374 Function: Get/Set the quality_levels attribute.
375 Returns : a string if 1 param, nothing else.
376 Args : none or a value
378 =cut
380 sub Bio::Seq::Quality::quality_levels {
381 my ($self,$arg) = @_;
382 return $self->attribute('QUALITY_LEVELS', $arg);
385 =head2 trace_array_min_index
387 Title : trace_array_min_index
388 Usage : swq->trace_array_min_index([value]);
389 Function: Get/Set the trace_array_min_index attribute.
390 Returns : a string if 1 param, nothing else.
391 Args : none or a value
393 =cut
395 sub Bio::Seq::Quality::trace_array_min_index {
396 my ($self,$arg) = @_;
397 return $self->attribute('TRACE_ARRAY_MIN_INDEX', $arg);
400 =head2 trace_array_max_index
402 Title : trace_array_max_index
403 Usage : swq->trace_array_max_index([value]);
404 Function: Get/Set the trace_array_max_index attribute.
405 Returns : a string if 1 param, nothing else.
406 Args : none or a value
408 =cut
410 sub Bio::Seq::Quality::trace_array_max_index {
411 my ($self,$arg) = @_;
412 return $self->attribute('TRACE_ARRAY_MAX_INDEX', $arg);
415 =head2 chem
417 Title : chem
418 Usage : swq->chem([value]);
419 Function: Get/Set the chem attribute.
420 Returns : a string if 1 param, nothing else.
421 Args : none or a value
423 =cut
425 sub Bio::Seq::Quality::chem {
426 my ($self,$arg) = @_;
427 return $self->attribute('CHEM', $arg);
430 =head2 dye
432 Title : dye
433 Usage : swq->dye([value]);
434 Function: Get/Set the dye attribute.
435 Returns : a string if 1 param, nothing else.
436 Args : none or a value
438 =cut
440 sub Bio::Seq::Quality::dye {
441 my ($self,$arg) = @_;
442 return $self->attribute('DYE', $arg);
445 =head2 time
447 Title : time
448 Usage : swq->time([value]);
449 Function: Get/Set the time attribute.
450 Returns : a string if 1 param, nothing else.
451 Args : none or a value
453 =cut
455 sub Bio::Seq::Quality::time {
456 my ($self,$arg) = @_;
457 return $self->attribute('TIME', $arg);
460 =head2 touch
462 Title : touch
463 Usage : swq->touch();
464 Function: Set the time attribute to current time.
465 Returns : nothing
466 Args : none
468 =cut
470 sub Bio::Seq::Quality::touch {
471 my $time = localtime();
472 shift->attribute('TIME',$time);
473 return;