bp_process_wormbase: move program to new Bio-DB-Ace distribution
[bioperl-live.git] / Bio / DB / FileCache.pm
blob29369a61fcec10d21438e355326829794471d882
2 # POD documentation - main docs before the code
6 =head1 NAME
8 Bio::DB::FileCache - In file cache for BioSeq objects
10 =head1 SYNOPSIS
14 $cachedb = Bio::DB::FileCache->new($real_db);
17 # $real_db is a Bio::DB::RandomAccessI database
20 $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
23 # $seq is a Bio::Seq object
26 # more control provided with named-parameter form
28 $cachedb = Bio::DB::FileCache->new( -seqdb => $real_db,
29 -file => $path,
30 -keep => $flag,
32 =head1 DESCRIPTION
34 This is a disk cache system which saves the objects returned by
35 Bio::DB::RandomAccessI on disk. The disk cache grows without limit,
36 while the process is running, but is automatically unlinked at process
37 termination unless the -keep flag is set.
39 This module requires DB_File and Storable.
41 =head1 CONTACT
43 Lincoln Stein E<lt>lstein@cshl.orgE<gt>
45 =head2 Support
47 Please direct usage questions or support issues to the mailing list:
49 I<bioperl-l@bioperl.org>
51 rather than to the module maintainer directly. Many experienced and
52 reponsive experts will be able look at the problem and quickly
53 address it. Please include a thorough description of the problem
54 with code and data examples if at all possible.
56 =head2 Reporting Bugs
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 the bugs and their resolution. Bug reports can be submitted via the
60 web:
62 https://github.com/bioperl/bioperl-live/issues
64 =head1 APPENDIX
66 The rest of the documentation details each of the object
67 methods. Internal methods are usually preceded with a _
69 =cut
71 # Let the code begin...
73 package Bio::DB::FileCache;
75 use DB_File;
76 use Storable qw(freeze thaw);
77 use Fcntl qw(O_CREAT O_RDWR O_RDONLY);
78 use File::Temp 'tmpnam';
80 use strict;
83 use base qw(Bio::Root::Root Bio::DB::SeqI);
85 use Bio::Seq::RichSeq;
86 use Bio::Location::Split;
87 use Bio::Location::Fuzzy;
88 use Bio::Seq;
89 use Bio::SeqFeature::Generic;
90 use Bio::Species;
91 use Bio::Annotation::Collection;
93 =head2 new
95 Title : new
96 Usage : $db = Bio::DB::FileCache->new(
97 -seqdb => $db, # Bio::DB::RandomAccessI database
98 -file => $path, # path to index file
99 -keep => $flag, # don't unlink index file
101 Function: creates a new on-disk cache
102 Returns : a Bio::DB::RandomAccessI database
103 Args : as above
104 Throws : "Must be a randomaccess database" exception
105 "Could not open primary index file" exception
107 If no index file is specified, will create a temporary file in your
108 system's temporary file directory. The name of this temporary file
109 can be retrieved using file_name().
111 =cut
114 sub new {
115 my ($class,@args) = @_;
117 my $self = Bio::Root::Root->new();
118 bless $self,$class;
120 my ($seqdb,$file_name,$keep) = $self->_rearrange([qw(SEQDB FILE
121 KEEP)],@args);
123 if( !defined $seqdb || !ref $seqdb ||
124 ! $seqdb->isa('Bio::DB::RandomAccessI') ) {
125 $self->throw("Must be a randomaccess database not a [$seqdb]");
128 $self->seqdb($seqdb);
129 $file_name ||= tmpnam();
130 $self->file_name($file_name);
131 $self->keep($keep);
133 $self->_open_database($file_name);
134 return $self;
137 =head2 get_Seq_by_id
139 Title : get_Seq_by_id
140 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
141 Function: Gets a Bio::Seq object by its name
142 Returns : a Bio::Seq object
143 Args : the id (as a string) of a sequence
144 Throws : "id does not exist" exception
147 =cut
149 sub get_Seq_by_id{
150 my ($self,$id) = @_;
152 # look in the cache first
153 my $obj = $self->_get('id' => $id);
154 return $obj if defined $obj;
156 # get object from seqdb
157 $obj = $self->seqdb->get_Seq_by_id($id);
158 $self->_store('id' => $id, $obj);
160 return $obj;
163 =head2 get_Seq_by_acc
165 Title : get_Seq_by_acc
166 Usage : $seq = $db->get_Seq_by_acc('X77802');
167 Function: Gets a Bio::Seq object by accession number
168 Returns : A Bio::Seq object
169 Args : accession number (as a string)
170 Throws : "acc does not exist" exception
173 =cut
175 sub get_Seq_by_acc{
176 my ($self,$acc) = @_;
178 # look in the cache first
179 my $obj = $self->_get('acc' => $acc);
180 return $obj if defined $obj;
182 # get object from seqdb
183 $obj = $self->seqdb->get_Seq_by_acc($acc);
184 $self->_store('acc' => $acc, $obj);
186 return $obj;
189 =head2 seqdb
191 Title : seqdb
192 Usage : $seqdb = $db->seqdb([$seqdb])
193 Function: gets/sets the Bio::DB::RandomAccessI database
194 Returns : a Bio::DB::RandomAccessI database
195 Args : new sequence database (optional)
196 Throws : nothing
198 =cut
200 sub seqdb {
201 my ($self, $seqdb) = @_;
202 if ($seqdb) {
203 $self->{'seqdb'} = $seqdb;
204 } else {
205 return $self->{'seqdb'};
209 =head2 file_name
211 Title : file_name
212 Usage : $path = $db->file_name([$file_name])
213 Function: gets/sets the name of the cache file
214 Returns : a path
215 Args : new cache file name (optional)
216 Throws : nothing
218 It probably isn't useful to set the cache file name after you've
219 opened it.
221 =cut
225 sub file_name {
226 my $self = shift;
227 my $d = $self->{file_name};
228 $self->{file_name} = shift if @_;
232 =head2 keep
234 Title : keep
235 Usage : $keep = $db->keep([$flag])
236 Function: gets/sets the value of the "keep" flag
237 Returns : current value
238 Args : new value (optional)
239 Throws : nothing
241 The keep flag will cause the index file to be unlinked when the
242 process exits. Since on some operating systems (Unix, OS/2) the
243 unlinking occurs during the new() call immediately after opening the
244 file, it probably isn't safe to change this value.
246 =cut
249 sub keep {
250 my $self = shift;
251 my $d = $self->{keep};
252 $self->{keep} = shift if @_;
256 =head2 db
258 Title : db
259 Usage : $db->db
260 Function: returns tied hash to index database
261 Returns : a Berkeley DB tied hashref
262 Args : none
263 Throws : nothing
265 =cut
267 sub db { shift->{db} }
269 =head2 flush
271 Title : flush
272 Usage : $db->flush
273 Function: flushes the cache
274 Returns : nothing
275 Args : none
276 Throws : nothing
278 =cut
280 sub flush {
281 my $db = shift->db or return;
282 %{$db} = ();
285 sub _get {
286 my $self = shift;
287 my ($type,$id) = @_;
288 my $serialized = $self->db->{"${type}_${id}"};
289 my $obj = thaw($serialized);
290 $obj;
293 sub _store {
294 my $self = shift;
295 my ($type,$id,$obj) = @_;
296 if( ! defined $obj ) {
297 # bug #1628
298 $self->debug("tried to store an undefined value for $id, skipping");
299 return;
301 my $serialized = freeze($obj);
302 $self->db->{"${type}_${id}"} = $serialized;
305 =head2 get_Seq_by_version
307 Title : get_Seq_by_version
308 Usage : $seq = $db->get_Seq_by_version('X77802.1');
309 Function: Gets a Bio::Seq object by sequence version
310 Returns : A Bio::Seq object
311 Args : accession.version (as a string)
312 Throws : "acc.version does not exist" exception
314 =cut
316 sub get_Seq_by_version{
317 my ($self,@args) = @_;
318 $self->throw("Not implemented it");
321 sub DESTROY {
322 my $self = shift;
323 unlink $self->file_name unless $self->keep;
327 sub _open_database {
328 my $self = shift;
329 my $file = shift;
330 my $flags = O_CREAT|O_RDWR;
331 my %db;
332 tie(%db,'DB_File',$file,$flags,0666,$DB_BTREE)
333 or $self->throw("Could not open primary index file");
334 $self->{db} = \%db;
335 unlink $file unless $self->keep;
338 ## End of Package