bp_process_wormbase: move program to new Bio-DB-Ace distribution
[bioperl-live.git] / Bio / DB / InMemoryCache.pm
blob95548b82a07a5f52aa996f3730fe15d17aa122cc
2 # BioPerl module for Bio::DB::InMemoryCache
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Ewan Birney <birney@sanger.ac.uk>
8 # Copyright Ewan Birney
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::DB::InMemoryCache - Abstract interface for a sequence database
18 =head1 SYNOPSIS
20 $cachedb = Bio::DB::InMemoryCache->new( -seqdb => $real_db,
21 -number => 1000);
23 # get a database object somehow using a concrete class
26 $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
29 # $seq is a Bio::Seq object
32 =head1 DESCRIPTION
34 This is a memory cache system which saves the objects returned by
35 Bio::DB::RandomAccessI in memory to a hard limit of sequences.
37 =head1 CONTACT
39 Ewan Birney E<lt>birney@ebi.ac.ukE<gt>
41 =head2 Support
43 Please direct usage questions or support issues to the mailing list:
45 I<bioperl-l@bioperl.org>
47 rather than to the module maintainer directly. Many experienced and
48 reponsive experts will be able look at the problem and quickly
49 address it. Please include a thorough description of the problem
50 with code and data examples if at all possible.
52 =head2 Reporting Bugs
54 Report bugs to the Bioperl bug tracking system to help us keep track
55 the bugs and their resolution. Bug reports can be submitted via the
56 web:
58 https://github.com/bioperl/bioperl-live/issues
60 =head1 APPENDIX
62 The rest of the documentation details each of the object
63 methods. Internal methods are usually preceded with a _
65 =cut
68 # Let the code begin...
70 package Bio::DB::InMemoryCache;
73 use strict;
75 use Bio::Seq;
77 use base qw(Bio::Root::Root Bio::DB::SeqI);
79 sub new {
80 my ($class,@args) = @_;
82 my $self = Bio::Root::Root->new();
83 bless $self,$class;
85 my ($seqdb,$number,$agr) =
86 $self->_rearrange([qw(SEQDB NUMBER AGRESSION)],@args);
88 if( !defined $seqdb || !ref $seqdb ||
89 !$seqdb->isa('Bio::DB::RandomAccessI') ) {
90 $self->throw("Must be a RandomAccess database not a [$seqdb]");
93 if( !defined $number ) {
94 $number = 1000;
97 $self->seqdb($seqdb);
98 $self->number($number);
99 $self->agr($agr);
101 # we consider acc as the primary id here
102 $self->{'_cache_number_hash'} = {};
103 $self->{'_cache_id_hash'} = {};
104 $self->{'_cache_acc_hash'} = {};
105 $self->{'_cache_number'} = 1;
107 return $self;
110 =head2 get_Seq_by_id
112 Title : get_Seq_by_id
113 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
114 Function: Gets a Bio::Seq object by its name
115 Returns : a Bio::Seq object
116 Args : the id (as a string) of a sequence
117 Throws : "id does not exist" exception
119 =cut
121 sub get_Seq_by_id{
122 my ($self,$id) = @_;
124 if( defined $self->{'_cache_id_hash'}->{$id} ) {
125 my $acc = $self->{'_cache_id_hash'}->{$id};
126 my $seq = $self->{'_cache_acc_hash'}->{$acc};
127 $self->{'_cache_number_hash'}->{$seq->accession} =
128 $self->{'_cache_number'}++;
129 return $seq;
130 } else {
131 return $self->_load_Seq('id',$id);
135 =head2 get_Seq_by_acc
137 Title : get_Seq_by_acc
138 Usage : $seq = $db->get_Seq_by_acc('X77802');
139 Function: Gets a Bio::Seq object by accession number
140 Returns : A Bio::Seq object
141 Args : accession number (as a string)
142 Throws : "acc does not exist" exception
144 =cut
146 sub get_Seq_by_acc{
147 my ($self,$acc) = @_;
149 #print STDERR "In cache get for $acc\n";
150 if( defined $self->{'_cache_acc_hash'}->{$acc} ) {
151 #print STDERR "Returning cached $acc\n";
152 my $seq = $self->{'_cache_acc_hash'}->{$acc};
153 $self->{'_cache_number_hash'}->{$seq->accession} =
154 $self->{'_cache_number'}++;
155 return $seq;
156 } else {
157 return $self->_load_Seq('acc',$acc);
163 sub number {
164 my ($self, $number) = @_;
165 if ($number) {
166 $self->{'number'} = $number;
167 } else {
168 return $self->{'number'};
172 sub seqdb {
173 my ($self, $seqdb) = @_;
174 if ($seqdb) {
175 $self->{'seqdb'} = $seqdb;
176 } else {
177 return $self->{'seqdb'};
181 sub agr {
182 my ($self, $agr) = @_;
183 if ($agr) {
184 $self->{'agr'} = $agr;
185 } else {
186 return $self->{'agr'};
191 sub _load_Seq {
192 my ($self,$type,$id) = @_;
194 my $seq;
196 if( $type eq 'id') {
197 $seq = $self->seqdb->get_Seq_by_id($id);
198 }elsif ( $type eq 'acc' ) {
199 $seq = $self->seqdb->get_Seq_by_acc($id);
200 } else {
201 $self->throw("Bad internal error. Don't understand $type");
203 if( ! $seq ) {
204 # warding off bug #1628
205 $self->debug("could not find seq $id in seqdb\n");
206 return;
209 if( $self->agr() ) {
210 #print STDERR "Pulling out into memory\n";
211 my $newseq = Bio::Seq->new( -display_id => $seq->display_id,
212 -accession_number => $seq->accession,
213 -seq => $seq->seq,
214 -desc => $seq->desc,
216 if( $self->agr() == 1 ) {
217 foreach my $sf ( $seq->top_SeqFeatures() ) {
218 $newseq->add_SeqFeature($sf);
221 $newseq->annotation($seq->annotation);
223 $seq = $newseq;
226 if( $self->_number_free < 1 ) {
227 # remove the latest thing from the hash
228 my @accs = sort { $self->{'_cache_number_hash'}->{$a} <=>
229 $self->{'_cache_number_hash'}->{$b} }
230 keys %{$self->{'_cache_number_hash'}};
232 my $acc = shift @accs;
233 # remove this guy
234 my $seq = $self->{'_cache_acc_hash'}->{$acc};
236 delete $self->{'_cache_number_hash'}->{$acc};
237 delete $self->{'_cache_id_hash'}->{$seq->id};
238 delete $self->{'_cache_acc_hash'}->{$acc};
241 # up the number, register this sequence into the hash.
242 $self->{'_cache_id_hash'}->{$seq->id} = $seq->accession;
243 $self->{'_cache_acc_hash'}->{$seq->accession} = $seq;
244 $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
246 return $seq;
250 sub _number_free {
251 my $self = shift;
253 return $self->number - scalar(keys %{$self->{'_cache_number_hash'}});
256 =head2 get_Seq_by_version
258 Title : get_Seq_by_version
259 Usage : $seq = $db->get_Seq_by_version('X77802.1');
260 Function: Gets a Bio::Seq object by sequence version
261 Returns : A Bio::Seq object
262 Args : accession.version (as a string)
263 Throws : "acc.version does not exist" exception
265 =cut
267 sub get_Seq_by_version{
268 my ($self,@args) = @_;
269 $self->throw("Not implemented it");
272 ## End of Package