2 # BioPerl module for Bio::SeqIO::gcg
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Ewan Birney <birney@ebi.ac.uk>
7 # and Lincoln Stein <lstein@cshl.org>
9 # Copyright Ewan Birney & Lincoln Stein
11 # You may distribute this module under the same terms as perl itself
14 # October 18, 1999 Largely rewritten by Lincoln Stein
16 # POD documentation - main docs before the code
20 Bio::SeqIO::gcg - GCG sequence input/output stream
24 Do not use this module directly. Use it via the Bio::SeqIO class.
28 This object can transform Bio::Seq objects to and from GCG flat
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to one
37 of the Bioperl mailing lists. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
55 Report bugs to the Bioperl bug tracking system to help us keep track
56 the bugs and their resolution. Bug reports can be submitted via the web:
58 https://github.com/bioperl/bioperl-live/issues
60 =head1 AUTHORS - Ewan Birney & Lincoln Stein
62 Email: E<lt>birney@ebi.ac.ukE<gt>
63 E<lt>lstein@cshl.orgE<gt>
67 Jason Stajich, jason@bioperl.org
71 The rest of the documentation details each of the object
72 methods. Internal methods are usually preceded with a _
76 # Let the code begin...
78 package Bio
::SeqIO
::gcg
;
81 use Bio
::Seq
::SeqFactory
;
83 use base
qw(Bio::SeqIO);
87 $self->SUPER::_initialize
(@args);
88 if( ! defined $self->sequence_factory ) {
89 $self->sequence_factory(Bio
::Seq
::SeqFactory
->new
90 (-verbose
=> $self->verbose(),
91 -type
=> 'Bio::Seq::RichSeq'));
98 Usage : $seq = $stream->next_seq()
99 Function: returns the next sequence in the stream
100 Returns : Bio::Seq object
106 my ($self,@args) = @_;
107 my($id,$type,$desc,$line,$chksum,$sequence,$date,$len);
109 while( defined($_ = $self->_readline()) ) {
111 ## Get the descriptive info (anything before the line with '..')
112 unless( /\.\.$/ ) { $desc.= $_; }
113 ## Pull ID, Checksum & Type from the line containing '..'
114 /\.\.$/ && do { $line = $_; chomp;
115 if(/Check\:\s(\d+)\s/) { $chksum = $1; }
116 if(/Type:\s(\w)\s/) { $type = $1; }
119 if(/Length:\s+(\d+)\s+(\S.+\S)\s+Type/ )
120 { $len = $1; $date = $2;}
124 return if ( !defined $_);
125 chomp($desc); # remove last "\n"
127 while( defined($_ = $self->_readline()) ) {
129 ## This is where we grab the sequence info.
132 $self->throw("Looks like start of another sequence. See documentation. ");
135 next if($_ eq "\n"); ## skip whitespace lines in formatted seq
136 s/[\d\s\t]//g; ## remove anything that is not alphabet char: preserve anything that is not explicitly specified for removal (Stefan Kirov)
137 # $_ = uc($_); ## uppercase sequence: NO. Keep the case. HL
140 ##If we parsed out a checksum, we might as well test it
142 if(defined $chksum) {
143 unless(_validate_checksum
(uc($sequence),$chksum)) {
144 $self->throw("Checksum failure on parsed sequence.");
148 ## Remove whitespace from identifier because the constructor
149 ## will throw a warning otherwise...
150 if(defined $id) { $id =~ s/\s+//g;}
152 ## Turn our parsed "Type: N" or "Type: P" (if found) into the appropriate
153 ## keyword that the constructor expects...
155 if($type eq "N") { $type = "dna"; }
156 if($type eq "P") { $type = "prot"; }
159 return $self->sequence_factory->create(-seq
=> $sequence,
170 Usage : $stream->write_seq(@seq)
171 Function: writes the formatted $seq object into the stream
172 Returns : 1 for success and 0 for error
173 Args : array of Bio::PrimarySeqI object
179 my ($self,@seq) = @_;
181 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
182 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
184 $self->warn("No whitespace allowed in GCG ID [". $seq->display_id. "]")
185 if $seq->display_id =~ /\s/;
188 my $comment = $seq->desc || '';
190 my $type = ( $seq->alphabet() =~ /[dr]na/i ) ?
'N' : 'P';
193 if( $seq->can('get_dates') ) {
194 ($timestamp) = $seq->get_dates;
196 $timestamp = localtime(time);
198 my($sum,$offset,$len,$i,$j,$cnt,@out);
201 ## Set the offset if we have any non-standard numbering going on
204 $sum = $self->GCG_checksum($seq);
206 #Output the sequence header info
207 push(@out,"$comment\n");
208 push(@out,"$id Length: $len $timestamp Type: $type Check: $sum ..\n\n");
212 for($j = 0 ; $j < $len ; ) {
214 $out[$i] = sprintf("%8d ",($j+$offset)); #numbering
216 $out[$i] .= sprintf("%s",substr($str,$j,10));
218 if( $j < $len && $j % 50 != 0 ) {
220 }elsif($j % 50 == 0 ) {
221 $out[$i++] .= "\n\n";
229 return unless $self->_print(@out);
232 $self->flush if $self->_flush_on_write && defined $self->_fh;
239 Usage : $cksum = $gcgio->GCG_checksum($seq);
240 Function : returns a gcg checksum for the sequence specified
242 This method can also be called as a class method.
244 Returns : a GCG checksum string
245 Argument : a Bio::PrimarySeqI implementing object
250 my ($self,$seqobj) = @_;
255 my $seq = $seqobj->seq();
258 foreach $char ( split(/[\.\-]*/, $seq)) {
260 $checksum += ($index * (unpack("c",$char) || 0) );
266 return ($checksum % 10000);
269 =head2 _validate_checksum
271 Title : _validate_checksum
272 Usage : n/a - internal method
273 Function: if parsed gcg sequence contains a checksum field
274 : we compare it to a value computed here on the parsed
275 : sequence. A checksum mismatch would indicate some
276 : type of parsing failure occurred.
278 Returns : 1 for success, 0 for failure
279 Args : string containing parsed seq, value of parsed checksum
284 sub _validate_checksum
{
285 my($seq,$parsed_sum) = @_;
286 my($i,$len,$computed_sum,$cnt);
290 #Generate the GCG Checksum value
292 for($i=0; $i<$len ;$i++) {
294 $computed_sum += $cnt * ord(substr($seq,$i,1));
295 ($cnt == 57) && ($cnt=0);
297 $computed_sum %= 10000;
299 ## Compare and decide if success or failure
301 if($parsed_sum == $computed_sum) {