1 # BioPerl module for Bio::SeqIO::strider
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Malcolm Cook <mec@stowers-institute.org>
7 # You may distribute this module under the same terms as perl itself
10 # April 7th, 2005 Malcolm Cook authored
12 # POD documentation - main docs before the code
16 Bio::SeqIO::strider - DNA strider sequence input/output stream
20 Do not use this module directly. Use it via the Bio::SeqIO class.
24 This object can transform Bio::Seq objects to and from strider
25 'binary' format, as documented in the strider manual, in which the
26 first 112 bytes are a header, following by the sequence, followed by a
29 Note: it does NOT assign any sequence identifier, since they are not
30 contained in the byte stream of the file; the Strider application
31 simply displays the name of the file on disk as the name of the
32 sequence. The caller should set the id, probably based on the name of
33 the file (after possibly cleaning up whitespace, which ought not to be
34 used as the id in most applications).
36 Note: the strider 'comment' is mapped to the BioPerl 'description'
37 (since there is no other text field, and description maps to defline
44 User feedback is an integral part of the evolution of this and other
45 Bioperl modules. Send your comments and suggestions preferably to one
46 of the Bioperl mailing lists. Your participation is much appreciated.
48 bioperl-l@bioperl.org - General discussion
49 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 Please direct usage questions or support issues to the mailing list:
55 I<bioperl-l@bioperl.org>
57 rather than to the module maintainer directly. Many experienced and
58 reponsive experts will be able look at the problem and quickly
59 address it. Please include a thorough description of the problem
60 with code and data examples if at all possible.
64 Report bugs to the Bioperl bug tracking system to help us keep track
65 the bugs and their resolution. Bug reports can be submitted via the
68 https://github.com/bioperl/bioperl-live/issues
70 =head1 AUTHORS - Malcolm Cook
72 Email: mec@stowers-institute.org
76 Modelled after Bio::SeqIO::fasta by Ewan Birney E<lt>birney@ebi.ac.ukE<gt> and
77 Lincoln Stein E<lt>lstein@cshl.orgE<gt>
81 The rest of the documentation details each of the object
82 methods. Internal methods are usually preceded with a _
86 # Let the code begin...
88 package Bio
::SeqIO
::strider
;
93 use Bio
::Seq
::SeqFactory
;
94 use Convert
::Binary
::C
;
96 use base
qw(Bio::SeqIO);
98 my $c = Convert
::Binary
::C
->new (
99 ByteOrder
=> 'BigEndian',
105 # See this file's __DATA__ section for the c structure definitions
106 # for strider binary header data. Here we slurp it all into $headerdef.
107 $headerdef = <DATA
>};
109 $c->parse($headerdef);
111 my $size_F_HEADER = 112;
113 die "expected strider header structure size of $size_F_HEADER" unless $size_F_HEADER eq $c->sizeof('F_HEADER');
115 my %alphabet2type = (
116 # map between BioPerl alphabet and strider
117 # sequence type code.
119 # From Strider Documentation: the sequence type:
120 # 1, 2, 3 and 4 for DNA, DNA Degenerate, RNA and
121 # Protein sequence files, respectively.
123 # TODO: determine 'DNA Degenerate' based on
131 my %type2alphabet = reverse %alphabet2type;
134 my($self,@args) = @_;
135 $self->SUPER::_initialize
(@args);
136 unless ( defined $self->sequence_factory ) {
137 $self->sequence_factory(Bio
::Seq
::SeqFactory
->new(-verbose
=> $self->verbose(),
138 -type
=> 'Bio::Seq::RichSeq'));
145 Usage : $seq = $stream->next_seq()
146 Function: returns the next sequence in the stream
147 Returns : Bio::Seq object
155 my ($header,$sequence,$fulldesc);
156 eval {read $fh,$header,$size_F_HEADER};
157 $self->throw ("$@ while attempting to reading strider header from " . $self->{'_file'}) if $@
;
158 $self->throw("required $size_F_HEADER bytes while reading strider header in " . $self->{'_file'} . " but found: " . length($header))
159 unless $size_F_HEADER == length($header);
160 my $headerdata = $c->unpack('F_HEADER',$header) or return;
161 read $fh,$sequence,$headerdata->{nLength
};
162 read $fh,$fulldesc,$headerdata->{com_length
};
163 $fulldesc =~ s/\cM/ /g; # gratuitous replacement of mac
164 # linefeed with space.
165 my $seq = $self->sequence_factory->create(
166 # -id => $main::ARGV, #might want to set this in caller to $ARGV.
169 -alphabet
=> $type2alphabet{$headerdata->{type
}} || 'dna',
178 Usage : $stream->write_seq(@seq)
179 Function: writes the $seq object into the stream
180 Returns : 1 for success and 0 for error
181 Args : array of 1 to n Bio::PrimarySeqI objects
187 my ($self,@seq) = @_;
188 my $fh = $self->_fh() || *STDOUT
; #die "could not determine filehandle in strider.pm";
189 foreach my $seq (@seq) {
190 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
191 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
192 my $headerdata = $c->pack('F_HEADER',{
194 type
=> $alphabet2type{$seq->alphabet} || $alphabet2type{dna
},
195 topology
=> $seq->is_circular ?
1 : 0,
196 nLength
=> $seq->length,
198 com_length
=> length($seq->desc || ""),
200 print $fh $headerdata, $seq->seq() || "" , $seq->desc || "";
208 //The following was taken from the strider
1.4 release notes Appendix
(with
209 //some comments gleaned from other parts of manual
)
213 char versionNb
; // the format version number
, currently it is set to
0
214 char type
; // 1=DNA
, 2=DNA Degenerate
, 3=RNA
or 4=Protein
215 char topology
; // linear
or circular
- 0 for a linear sequence
, 1 for a circular one
225 int nLength
; // Sequence
length - the
length the Sequence field
(the number of char
in the text
, each being a base
or an aa
)
226 int nMinus
; // nb of
"negative" bases
, i
.e
. the number of bases numbered with negative numbers
239 int com_length
; // the
length the Comment field
(the number of char
in the text
).