maint: remove Travis stuff which has been replaced with Github actions (#325)
[bioperl-live.git] / lib / Bio / Seq / LargeLocatableSeq.pm
blob5d576c4a41dc3667519a8b0fbcd1032a55013eb9
1 # BioPerl module for Bio::Seq::LargeLocatableSeq
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Albert Vilella
7 # based on the Bio::LargePrimarySeq module
8 # by Ewan Birney <birney@sanger.ac.uk>
10 # and the Bio::LocatableSeq module
11 # by Ewan Birney <birney@sanger.ac.uk>
13 # Copyright Albert Vilella
15 # You may distribute this module under the same terms as perl itself
17 # POD documentation - main docs before the code
19 =head1 NAME
21 Bio::Seq::LargeLocatableSeq - LocatableSeq object that stores sequence as
22 files in the tempdir
24 =head1 SYNOPSIS
26 # normal primary seq usage
27 use Bio::Seq::LargeLocatableSeq;
28 my $seq = Bio::Seq::LargeLocatableSeq->new(-seq => "CAGT-GGT",
29 -id => "seq1",
30 -start => 1,
31 -end => 7);
33 =head1 DESCRIPTION
35 Bio::Seq::LargeLocatableSeq - object with start/end points on it that
36 can be projected into a MSA or have coordinates relative to another
37 seq.
39 This object, unlike Bio::LocatableSeq, stores a sequence as a series
40 of files in a temporary directory. The aim is to allow someone the
41 ability to store very large sequences (eg, E<gt> 100MBases) in a file
42 system without running out of memory (eg, on a 64 MB real memory
43 machine!).
45 Of course, to actually make use of this functionality, the programs
46 which use this object B<must> not call $primary_seq-E<gt>seq otherwise
47 the entire sequence will come out into memory and probably crash your
48 machine. However, calls like $primary_seq-E<gt>subseq(10,100) will cause
49 only 90 characters to be brought into real memory.
51 =head1 FEEDBACK
53 =head2 Mailing Lists
55 User feedback is an integral part of the evolution of this and other
56 Bioperl modules. Send your comments and suggestions preferably to
57 the Bioperl mailing list. Your participation is much appreciated.
59 bioperl-l@bioperl.org - General discussion
60 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62 =head2 Support
64 Please direct usage questions or support issues to the mailing list:
66 I<bioperl-l@bioperl.org>
68 rather than to the module maintainer directly. Many experienced and
69 reponsive experts will be able look at the problem and quickly
70 address it. Please include a thorough description of the problem
71 with code and data examples if at all possible.
73 =head2 Reporting Bugs
75 Report bugs to the Bioperl bug tracking system to help us keep track
76 of the bugs and their resolution. Bug reports can be submitted via
77 the web:
79 https://github.com/bioperl/bioperl-live/issues
81 =head1 AUTHOR - Albert Vilella
83 Email avilella-AT-gmail-DOT-com
85 =head1 APPENDIX
87 The rest of the documentation details each of the object methods.
88 Internal methods are usually preceded with a _
90 =cut
93 # Let the code begin...
95 package Bio::Seq::LargeLocatableSeq;
97 use vars qw($AUTOLOAD);
98 use strict;
101 use base qw(Bio::Seq::LargePrimarySeq Bio::LocatableSeq Bio::Root::IO);
104 =head2 new
106 Title : new
107 Usage : my $obj = Bio::Seq::LargeLocatableSeq->new();
108 Function: Builds a new Bio::Seq::LargeLocatableSeq object
109 Returns : an instance of Bio::Seq::LargeLocatableSeq
110 Args :
113 =cut
115 sub new {
116 my ($class, %params) = @_;
118 # don't let PrimarySeq set seq until we have
119 # opened filehandle
121 my $seq = $params{'-seq'} || $params{'-SEQ'};
122 if($seq ) {
123 delete $params{'-seq'};
124 delete $params{'-SEQ'};
126 my $self = $class->SUPER::new(%params);
127 my $mapping = exists $params{'-mapping'} ? $params{'-mapping'} : [1,1];
128 $self->mapping($mapping);
129 $self->_initialize_io(%params);
130 my $tempdir = $self->tempdir( CLEANUP => 1);
131 my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
133 $tfh && $self->_fh($tfh);
134 $file && $self->_filename($file);
135 $self->length(0);
136 $seq && $self->seq($seq);
138 return $self;
142 =head2 length
144 Title : length
145 Usage :
146 Function:
147 Example :
148 Returns :
149 Args :
152 =cut
154 sub length {
155 my ($obj,$value) = @_;
156 if( defined $value) {
157 $obj->{'length'} = $value;
160 return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
163 =head2 seq
165 Title : seq
166 Usage :
167 Function:
168 Example :
169 Returns :
170 Args :
173 =cut
175 sub seq {
176 my ($self, $data) = @_;
177 if( defined $data ) {
178 if( $self->length() == 0) {
179 $self->add_sequence_as_string($data);
180 } else {
181 $self->warn("Trying to reset the seq string, cannot do this with a LargeLocatableSeq - must allocate a new object");
184 return $self->subseq(1,$self->length);
188 =head2 subseq
190 Title : subseq
191 Usage :
192 Function:
193 Example :
194 Returns :
195 Args :
198 =cut
200 sub subseq{
201 my ($self,$start,$end) = @_;
202 my $string;
203 my $fh = $self->_fh();
205 if( ref($start) && $start->isa('Bio::LocationI') ) {
206 my $loc = $start;
207 if( $loc->length == 0 ) {
208 $self->warn("Expect location lengths to be > 0");
209 return '';
210 } elsif( $loc->end < $loc->start ) {
211 # what about circular seqs
212 $self->warn("Expect location start to come before location end");
214 my $seq = '';
215 if( $loc->isa('Bio::Location::SplitLocationI') ) {
216 foreach my $subloc ( $loc->sub_Location ) {
217 if(! seek($fh,$subloc->start() - 1,0)) {
218 $self->throw("Unable to seek on file $start:$end $!");
220 my $ret = read($fh, $string, $subloc->length());
221 if( !defined $ret ) {
222 $self->throw("Unable to read $start:$end $!");
224 if( $subloc->strand < 0 ) {
225 # $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
226 $string = Bio::Seq::LargePrimarySeq->new(-seq => $string)->revcom()->seq();
228 $seq .= $string;
230 } else {
231 if(! seek($fh,$loc->start()-1,0)) {
232 $self->throw("Unable to seek on file ".$loc->start.":".
233 $loc->end ." $!");
235 my $ret = read($fh, $string, $loc->length());
236 if( !defined $ret ) {
237 $self->throw("Unable to read ".$loc->start.":".
238 $loc->end ." $!");
240 $seq = $string;
242 if( defined $loc->strand &&
243 $loc->strand < 0 ) {
244 # $seq = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
245 $seq = Bio::Seq::LargePrimarySeq->new(-seq => $seq)->revcom()->seq();
247 return $seq;
249 if( $start <= 0 || $end > $self->length ) {
250 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
251 $self->length);
253 if( $end < $start ) {
254 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
257 if(! seek($fh,$start-1,0)) {
258 $self->throw("Unable to seek on file $start:$end $!");
260 my $ret = read($fh, $string, $end-$start+1);
261 if( !defined $ret ) {
262 $self->throw("Unable to read $start:$end $!");
264 return $string;
268 =head2 add_sequence_as_string
270 Title : add_sequence_as_string
271 Usage : $seq->add_sequence_as_string("CATGAT");
272 Function: Appends additional residues to an existing LargeLocatableSeq object.
273 This allows one to build up a large sequence without storing
274 entire object in memory.
275 Returns : Current length of sequence
276 Args : string to append
278 =cut
280 sub add_sequence_as_string{
281 my ($self,$str) = @_;
282 my $len = $self->length + CORE::length($str);
283 my $fh = $self->_fh();
284 if(! seek($fh,0,2)) {
285 $self->throw("Unable to seek end of file: $!");
287 $self->_print($str);
288 $self->length($len);
292 =head2 _filename
294 Title : _filename
295 Usage : $obj->_filename($newval)
296 Function:
297 Example :
298 Returns : value of _filename
299 Args : newvalue (optional)
302 =cut
304 sub _filename{
305 my ($obj,$value) = @_;
306 if( defined $value) {
307 $obj->{'_filename'} = $value;
309 return $obj->{'_filename'};
314 =head2 alphabet
316 Title : alphabet
317 Usage : $obj->alphabet($newval)
318 Function:
319 Example :
320 Returns : value of alphabet
321 Args : newvalue (optional)
324 =cut
326 sub alphabet{
327 my ($self,$value) = @_;
328 if( defined $value) {
329 $self->SUPER::alphabet($value);
331 return $self->SUPER::alphabet() || 'dna';
335 sub DESTROY {
336 my $self = shift;
337 my $fh = $self->_fh();
338 close($fh) if( defined $fh );
339 # this should be handled by Tempfile removal, but we'll unlink anyways.
340 unlink $self->_filename() if defined $self->_filename() && -e $self->_filename;
341 $self->SUPER::DESTROY();