maint: remove Travis stuff which has been replaced with Github actions (#325)
[bioperl-live.git] / lib / Bio / Index / AbstractSeq.pm
blobd26e767156f8442c2a19bef23d99d7252ed10457
2 # BioPerl module for Bio::Index::AbstractSeq
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Ewan Birney <birney@ebi.ac.uk>
8 # Copyright Ewan Birney
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Index::AbstractSeq - base class for AbstractSeq
18 =head1 SYNOPSIS
20 # Make a new sequence file indexing package
22 package MyShinyNewIndexer;
24 use base qw(Bio::Index::AbstractSeq);
26 # Now provide the necessary methods...
28 =head1 DESCRIPTION
30 Provides a common base class for multiple sequence files built using
31 the Bio::Index::Abstract system, and provides a Bio::DB::SeqI
32 interface.
34 =head1 FEEDBACK
36 =head2 Mailing Lists
38 User feedback is an integral part of the evolution of this
39 and other Bioperl modules. Send your comments and suggestions
40 preferably to one of the Bioperl mailing lists.
41 Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 =head2 Support
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
57 =head2 Reporting Bugs
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 the bugs and their resolution. Bug reports can be submitted via the
61 web:
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Ewan Birney
67 Email birney@ebi.ac.uk
69 =head1 APPENDIX
71 The rest of the documentation details each of the object methods.
72 Internal methods are usually preceded with a _
74 =head1 SEE ALSO
76 L<Bio::Index::Abstract>, which provides dbm indexing for flat files of
77 any type, containing sequence or not. L<Bio::Index::AbstractSeq> inherits
78 from L<Bio::Index::Abstract>
80 =cut
82 # Let's begin the code ...
84 package Bio::Index::AbstractSeq;
86 use strict;
88 use Bio::SeqIO::MultiFile;
90 use base qw(Bio::Index::Abstract Bio::DB::SeqI);
92 sub new {
93 my ($class, @args) = @_;
94 my $self = $class->SUPER::new(@args);
96 $self->{'_seqio_cache'} = [];
97 return $self;
100 =head2 _file_format
102 Title : _file_format
103 Usage : $self->_file_format
104 Function: Derived classes should override this
105 method (it throws an exception here)
106 to give the file format of the files used
107 Example :
108 Returns :
109 Args :
111 =cut
113 sub _file_format {
114 my ($self,@args) = @_;
116 my $pkg = ref($self);
117 $self->throw("Class '$pkg' must provide a file format method correctly");
120 =head2 fetch
122 Title : fetch
123 Usage : $index->fetch( $id )
124 Function: Returns a Bio::Seq object from the index
125 Example : $seq = $index->fetch( 'dJ67B12' )
126 Returns : Bio::Seq object
127 Args : ID
129 =cut
131 sub fetch {
132 my( $self, $id ) = @_;
133 my $db = $self->db();
134 my $seq;
136 if (my $rec = $db->{ $id }) {
137 my ($file, $begin) = $self->unpack_record( $rec );
139 # Get the (possibly cached) SeqIO object
140 my $seqio = $self->_get_SeqIO_object( $file );
141 my $fh = $seqio->_fh();
143 # move to start of record
144 # $begin-- if( $^O =~ /mswin/i); # workaround for Win DB_File bug
145 seek($fh, $begin, 0);
147 $seq = $seqio->next_seq();
150 # we essentially assume that the primary_id for the database
151 # is the display_id
152 if (ref($seq) && $seq->isa('Bio::PrimarySeqI') &&
153 $seq->primary_id =~ /^\D+$/) {
154 $seq->primary_id( $seq->display_id() );
156 return $seq;
159 =head2 _get_SeqIO_object
161 Title : _get_SeqIO_object
162 Usage : $index->_get_SeqIO_object( $file )
163 Function: Returns a Bio::SeqIO object for the file
164 Example : $seq = $index->_get_SeqIO_object( 0 )
165 Returns : Bio::SeqIO object
166 Args : File number (an integer)
168 =cut
170 sub _get_SeqIO_object {
171 my( $self, $i ) = @_;
173 unless ($self->{'_seqio_cache'}[$i]) {
174 my $fh = $self->_file_handle($i);
175 # make a new SeqIO object
176 my $seqio = Bio::SeqIO->new( -Format => $self->_file_format,
177 -fh => $fh);
178 $self->{'_seqio_cache'}[$i] = $seqio;
180 return $self->{'_seqio_cache'}[$i];
183 =head2 get_Seq_by_id
185 Title : get_Seq_by_id
186 Usage : $seq = $db->get_Seq_by_id()
187 Function: retrieves a sequence object, identically to
188 ->fetch, but here behaving as a Bio::DB::BioSeqI
189 Returns : new Bio::Seq object
190 Args : string represents the id
193 =cut
195 sub get_Seq_by_id {
196 my ($self,$id) = @_;
198 return $self->fetch($id);
201 =head2 get_Seq_by_acc
203 Title : get_Seq_by_acc
204 Usage : $seq = $db->get_Seq_by_acc()
205 Function: retrieves a sequence object, identically to
206 ->fetch, but here behaving as a Bio::DB::BioSeqI
207 Returns : new Bio::Seq object
208 Args : string represents the accession number
211 =cut
213 sub get_Seq_by_acc {
214 my ($self,$id) = @_;
216 return $self->fetch($id);
219 =head2 get_PrimarySeq_stream
221 Title : get_PrimarySeq_stream
222 Usage : $stream = get_PrimarySeq_stream
223 Function: Makes a Bio::DB::SeqStreamI compliant object
224 which provides a single method, next_primary_seq
225 Returns : Bio::DB::SeqStreamI
226 Args : none
229 =cut
231 sub get_PrimarySeq_stream {
232 my $self = shift;
233 my $num = $self->_file_count() || 0;
234 my @file;
236 for (my $i = 0; $i < $num; $i++) {
237 my( $file, $stored_size ) = $self->unpack_record( $self->db->{"__FILE_$i"} );
238 push(@file,$file);
241 my $out = Bio::SeqIO::MultiFile->new( '-format' => $self->_file_format , -files => \@file);
242 return $out;
245 =head2 get_all_primary_ids
247 Title : get_all_primary_ids
248 Usage : @ids = $seqdb->get_all_primary_ids()
249 Function: gives an array of all the primary_ids of the
250 sequence objects in the database. These
251 maybe ids (display style) or accession numbers
252 or something else completely different - they
253 *are not* meaningful outside of this database
254 implementation.
255 Example :
256 Returns : an array of strings
257 Args : none
260 =cut
262 sub get_all_primary_ids {
263 my ($self,@args) = @_;
264 my $db = $self->db;
266 # the problem is here that we have indexed things both on
267 # accession number and name.
269 # We could take two options
270 # here - loop over the database, returning only one copy of each
271 # id that points to the same byte position, or we rely on semantics
272 # of accession numbers.
274 # someone is going to index a database with no accession numbers.
275 # doh!. We have to uniquify the index...
277 my( %bytepos );
278 while (my($id, $rec) = each %$db) {
279 if( $id =~ /^__/ ) {
280 # internal info
281 next;
283 my ($file, $begin) = $self->unpack_record( $rec );
285 $bytepos{"$file:$begin"} = $id;
288 return values %bytepos;
292 =head2 get_Seq_by_primary_id
294 Title : get_Seq_by_primary_id
295 Usage : $seq = $db->get_Seq_by_primary_id($primary_id_string);
296 Function: Gets a Bio::Seq object by the primary id. The primary
297 id in these cases has to come from $db->get_all_primary_ids.
298 There is no other way to get (or guess) the primary_ids
299 in a database.
301 The other possibility is to get Bio::PrimarySeqI objects
302 via the get_PrimarySeq_stream and the primary_id field
303 on these objects are specified as the ids to use here.
304 Returns : A Bio::Seq object
305 Args : primary id (as a string)
306 Throws : "acc does not exist" exception
309 =cut
311 sub get_Seq_by_primary_id {
312 my ($self,$id) = @_;
313 return $self->fetch($id);