2 # POD documentation - main docs before the code
8 Bio::DB::FileCache - In file cache for BioSeq objects
14 $cachedb = Bio::DB::FileCache->new($real_db);
17 # $real_db is a Bio::DB::RandomAccessI database
20 $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
23 # $seq is a Bio::Seq object
26 # more control provided with named-parameter form
28 $cachedb = Bio::DB::FileCache->new( -seqdb => $real_db,
34 This is a disk cache system which saves the objects returned by
35 Bio::DB::RandomAccessI on disk. The disk cache grows without limit,
36 while the process is running, but is automatically unlinked at process
37 termination unless the -keep flag is set.
39 This module requires DB_File and Storable.
43 Lincoln Stein E<lt>lstein@cshl.orgE<gt>
47 Please direct usage questions or support issues to the mailing list:
49 I<bioperl-l@bioperl.org>
51 rather than to the module maintainer directly. Many experienced and
52 reponsive experts will be able look at the problem and quickly
53 address it. Please include a thorough description of the problem
54 with code and data examples if at all possible.
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 the bugs and their resolution. Bug reports can be submitted via the
62 https://github.com/bioperl/bioperl-live/issues
66 The rest of the documentation details each of the object
67 methods. Internal methods are usually preceded with a _
71 # Let the code begin...
73 package Bio
::DB
::FileCache
;
76 use Storable
qw(freeze thaw);
77 use Fcntl
qw(O_CREAT O_RDWR O_RDONLY);
78 use File
::Temp
'tmpnam';
83 use base
qw(Bio::Root::Root Bio::DB::SeqI);
85 use Bio
::Seq
::RichSeq
;
86 use Bio
::Location
::Split
;
87 use Bio
::Location
::Fuzzy
;
89 use Bio
::SeqFeature
::Generic
;
91 use Bio
::Annotation
::Collection
;
96 Usage : $db = Bio::DB::FileCache->new(
97 -seqdb => $db, # Bio::DB::RandomAccessI database
98 -file => $path, # path to index file
99 -keep => $flag, # don't unlink index file
101 Function: creates a new on-disk cache
102 Returns : a Bio::DB::RandomAccessI database
104 Throws : "Must be a randomaccess database" exception
105 "Could not open primary index file" exception
107 If no index file is specified, will create a temporary file in your
108 system's temporary file directory. The name of this temporary file
109 can be retrieved using file_name().
115 my ($class,@args) = @_;
117 my $self = Bio
::Root
::Root
->new();
120 my ($seqdb,$file_name,$keep) = $self->_rearrange([qw(SEQDB FILE
123 if( !defined $seqdb || !ref $seqdb ||
124 ! $seqdb->isa('Bio::DB::RandomAccessI') ) {
125 $self->throw("Must be a randomaccess database not a [$seqdb]");
128 $self->seqdb($seqdb);
129 $file_name ||= tmpnam
();
130 $self->file_name($file_name);
133 $self->_open_database($file_name);
139 Title : get_Seq_by_id
140 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
141 Function: Gets a Bio::Seq object by its name
142 Returns : a Bio::Seq object
143 Args : the id (as a string) of a sequence
144 Throws : "id does not exist" exception
152 # look in the cache first
153 my $obj = $self->_get('id' => $id);
154 return $obj if defined $obj;
156 # get object from seqdb
157 $obj = $self->seqdb->get_Seq_by_id($id);
158 $self->_store('id' => $id, $obj);
163 =head2 get_Seq_by_acc
165 Title : get_Seq_by_acc
166 Usage : $seq = $db->get_Seq_by_acc('X77802');
167 Function: Gets a Bio::Seq object by accession number
168 Returns : A Bio::Seq object
169 Args : accession number (as a string)
170 Throws : "acc does not exist" exception
176 my ($self,$acc) = @_;
178 # look in the cache first
179 my $obj = $self->_get('acc' => $acc);
180 return $obj if defined $obj;
182 # get object from seqdb
183 $obj = $self->seqdb->get_Seq_by_acc($acc);
184 $self->_store('acc' => $acc, $obj);
192 Usage : $seqdb = $db->seqdb([$seqdb])
193 Function: gets/sets the Bio::DB::RandomAccessI database
194 Returns : a Bio::DB::RandomAccessI database
195 Args : new sequence database (optional)
201 my ($self, $seqdb) = @_;
203 $self->{'seqdb'} = $seqdb;
205 return $self->{'seqdb'};
212 Usage : $path = $db->file_name([$file_name])
213 Function: gets/sets the name of the cache file
215 Args : new cache file name (optional)
218 It probably isn't useful to set the cache file name after you've
227 my $d = $self->{file_name
};
228 $self->{file_name
} = shift if @_;
235 Usage : $keep = $db->keep([$flag])
236 Function: gets/sets the value of the "keep" flag
237 Returns : current value
238 Args : new value (optional)
241 The keep flag will cause the index file to be unlinked when the
242 process exits. Since on some operating systems (Unix, OS/2) the
243 unlinking occurs during the new() call immediately after opening the
244 file, it probably isn't safe to change this value.
251 my $d = $self->{keep
};
252 $self->{keep
} = shift if @_;
260 Function: returns tied hash to index database
261 Returns : a Berkeley DB tied hashref
267 sub db
{ shift->{db
} }
273 Function: flushes the cache
281 my $db = shift->db or return;
288 my $serialized = $self->db->{"${type}_${id}"};
289 my $obj = thaw
($serialized);
295 my ($type,$id,$obj) = @_;
296 if( ! defined $obj ) {
298 $self->debug("tried to store an undefined value for $id, skipping");
301 my $serialized = freeze
($obj);
302 $self->db->{"${type}_${id}"} = $serialized;
305 =head2 get_Seq_by_version
307 Title : get_Seq_by_version
308 Usage : $seq = $db->get_Seq_by_version('X77802.1');
309 Function: Gets a Bio::Seq object by sequence version
310 Returns : A Bio::Seq object
311 Args : accession.version (as a string)
312 Throws : "acc.version does not exist" exception
316 sub get_Seq_by_version
{
317 my ($self,@args) = @_;
318 $self->throw("Not implemented it");
323 unlink $self->file_name unless $self->keep;
330 my $flags = O_CREAT
|O_RDWR
;
332 tie
(%db,'DB_File',$file,$flags,0666,$DB_BTREE)
333 or $self->throw("Could not open primary index file");
335 unlink $file unless $self->keep;