1 # BioPerl module for Bio::Seq::LargeLocatableSeq
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Albert Vilella
7 # based on the Bio::LargePrimarySeq module
8 # by Ewan Birney <birney@sanger.ac.uk>
10 # and the Bio::LocatableSeq module
11 # by Ewan Birney <birney@sanger.ac.uk>
13 # Copyright Albert Vilella
15 # You may distribute this module under the same terms as perl itself
17 # POD documentation - main docs before the code
21 Bio::Seq::LargeLocatableSeq - LocatableSeq object that stores sequence as
26 # normal primary seq usage
27 use Bio::Seq::LargeLocatableSeq;
28 my $seq = Bio::Seq::LargeLocatableSeq->new(-seq => "CAGT-GGT",
35 Bio::Seq::LargeLocatableSeq - object with start/end points on it that
36 can be projected into a MSA or have coordinates relative to another
39 This object, unlike Bio::LocatableSeq, stores a sequence as a series
40 of files in a temporary directory. The aim is to allow someone the
41 ability to store very large sequences (eg, E<gt> 100MBases) in a file
42 system without running out of memory (eg, on a 64 MB real memory
45 Of course, to actually make use of this functionality, the programs
46 which use this object B<must> not call $primary_seq-E<gt>seq otherwise
47 the entire sequence will come out into memory and probably crash your
48 machine. However, calls like $primary_seq-E<gt>subseq(10,100) will cause
49 only 90 characters to be brought into real memory.
55 User feedback is an integral part of the evolution of this and other
56 Bioperl modules. Send your comments and suggestions preferably to
57 the Bioperl mailing list. Your participation is much appreciated.
59 bioperl-l@bioperl.org - General discussion
60 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
64 Please direct usage questions or support issues to the mailing list:
66 I<bioperl-l@bioperl.org>
68 rather than to the module maintainer directly. Many experienced and
69 reponsive experts will be able look at the problem and quickly
70 address it. Please include a thorough description of the problem
71 with code and data examples if at all possible.
75 Report bugs to the Bioperl bug tracking system to help us keep track
76 of the bugs and their resolution. Bug reports can be submitted via
79 https://github.com/bioperl/bioperl-live/issues
81 =head1 AUTHOR - Albert Vilella
83 Email avilella-AT-gmail-DOT-com
87 The rest of the documentation details each of the object methods.
88 Internal methods are usually preceded with a _
93 # Let the code begin...
95 package Bio
::Seq
::LargeLocatableSeq
;
97 use vars
qw($AUTOLOAD);
101 use base qw(Bio::Seq::LargePrimarySeq Bio::LocatableSeq Bio::Root::IO);
107 Usage : my $obj = Bio::Seq::LargeLocatableSeq->new();
108 Function: Builds a new Bio::Seq::LargeLocatableSeq object
109 Returns : an instance of Bio::Seq::LargeLocatableSeq
116 my ($class, %params) = @_;
118 # don't let PrimarySeq set seq until we have
121 my $seq = $params{'-seq'} || $params{'-SEQ'};
123 delete $params{'-seq'};
124 delete $params{'-SEQ'};
126 my $self = $class->SUPER::new
(%params);
127 my $mapping = exists $params{'-mapping'} ?
$params{'-mapping'} : [1,1];
128 $self->mapping($mapping);
129 $self->_initialize_io(%params);
130 my $tempdir = $self->tempdir( CLEANUP
=> 1);
131 my ($tfh,$file) = $self->tempfile( DIR
=> $tempdir );
133 $tfh && $self->_fh($tfh);
134 $file && $self->_filename($file);
136 $seq && $self->seq($seq);
155 my ($obj,$value) = @_;
156 if( defined $value) {
157 $obj->{'length'} = $value;
160 return (defined $obj->{'length'}) ?
$obj->{'length'} : 0;
176 my ($self, $data) = @_;
177 if( defined $data ) {
178 if( $self->length() == 0) {
179 $self->add_sequence_as_string($data);
181 $self->warn("Trying to reset the seq string, cannot do this with a LargeLocatableSeq - must allocate a new object");
184 return $self->subseq(1,$self->length);
201 my ($self,$start,$end) = @_;
203 my $fh = $self->_fh();
205 if( ref($start) && $start->isa('Bio::LocationI') ) {
207 if( $loc->length == 0 ) {
208 $self->warn("Expect location lengths to be > 0");
210 } elsif( $loc->end < $loc->start ) {
211 # what about circular seqs
212 $self->warn("Expect location start to come before location end");
215 if( $loc->isa('Bio::Location::SplitLocationI') ) {
216 foreach my $subloc ( $loc->sub_Location ) {
217 if(! seek($fh,$subloc->start() - 1,0)) {
218 $self->throw("Unable to seek on file $start:$end $!");
220 my $ret = read($fh, $string, $subloc->length());
221 if( !defined $ret ) {
222 $self->throw("Unable to read $start:$end $!");
224 if( $subloc->strand < 0 ) {
225 # $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
226 $string = Bio
::Seq
::LargePrimarySeq
->new(-seq
=> $string)->revcom()->seq();
231 if(! seek($fh,$loc->start()-1,0)) {
232 $self->throw("Unable to seek on file ".$loc->start.":".
235 my $ret = read($fh, $string, $loc->length());
236 if( !defined $ret ) {
237 $self->throw("Unable to read ".$loc->start.":".
242 if( defined $loc->strand &&
244 # $seq = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
245 $seq = Bio
::Seq
::LargePrimarySeq
->new(-seq
=> $seq)->revcom()->seq();
249 if( $start <= 0 || $end > $self->length ) {
250 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
253 if( $end < $start ) {
254 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
257 if(! seek($fh,$start-1,0)) {
258 $self->throw("Unable to seek on file $start:$end $!");
260 my $ret = read($fh, $string, $end-$start+1);
261 if( !defined $ret ) {
262 $self->throw("Unable to read $start:$end $!");
268 =head2 add_sequence_as_string
270 Title : add_sequence_as_string
271 Usage : $seq->add_sequence_as_string("CATGAT");
272 Function: Appends additional residues to an existing LargeLocatableSeq object.
273 This allows one to build up a large sequence without storing
274 entire object in memory.
275 Returns : Current length of sequence
276 Args : string to append
280 sub add_sequence_as_string
{
281 my ($self,$str) = @_;
282 my $len = $self->length + CORE
::length($str);
283 my $fh = $self->_fh();
284 if(! seek($fh,0,2)) {
285 $self->throw("Unable to seek end of file: $!");
295 Usage : $obj->_filename($newval)
298 Returns : value of _filename
299 Args : newvalue (optional)
305 my ($obj,$value) = @_;
306 if( defined $value) {
307 $obj->{'_filename'} = $value;
309 return $obj->{'_filename'};
317 Usage : $obj->alphabet($newval)
320 Returns : value of alphabet
321 Args : newvalue (optional)
327 my ($self,$value) = @_;
328 if( defined $value) {
329 $self->SUPER::alphabet
($value);
331 return $self->SUPER::alphabet
() || 'dna';
337 my $fh = $self->_fh();
338 close($fh) if( defined $fh );
339 # this should be handled by Tempfile removal, but we'll unlink anyways.
340 unlink $self->_filename() if defined $self->_filename() && -e
$self->_filename;
341 $self->SUPER::DESTROY
();