2 # BioPerl module for Bio::Location::Fuzzy
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Jason Stajich <jason@bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
14 Bio::Location::Fuzzy - Implementation of a Location on a Sequence
15 which has unclear start and/or end locations
19 use Bio::Location::Fuzzy;
20 my $fuzzylocation = Bio::Location::Fuzzy->new(
23 -location_type => '..');
25 print "location string is ", $fuzzylocation->to_FTstring(), "\n";
26 print "location is of the type ", $fuzzylocation->location_type, "\n";
30 This module contains the necessary methods for representing a
31 Fuzzy Location, one that does not have clear start and/or end points.
32 This will initially serve to handle features from Genbank/EMBL feature
33 tables that are written as 1^100 meaning between bases 1 and 100 or
34 E<lt>100..300 meaning it starts somewhere before 100. Advanced
35 implementations of this interface may be able to handle the necessary
36 logic of overlaps/intersection/contains/union. It was constructed to
37 handle fuzzy locations that can be represented in Genbank/EMBL and
42 User feedback is an integral part of the evolution of this and other
43 Bioperl modules. Send your comments and suggestions preferably to one
44 of the Bioperl mailing lists. Your participation is much appreciated.
46 bioperl-l@bioperl.org - General discussion
47 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51 Please direct usage questions or support issues to the mailing list:
53 I<bioperl-l@bioperl.org>
55 rather than to the module maintainer directly. Many experienced and
56 reponsive experts will be able look at the problem and quickly
57 address it. Please include a thorough description of the problem
58 with code and data examples if at all possible.
62 Report bugs to the Bioperl bug tracking system to help us keep track
63 the bugs and their resolution. Bug reports can be submitted via the
66 https://github.com/bioperl/bioperl-live/issues
68 =head1 AUTHOR - Jason Stajich
70 Email jason-at-bioperl-dot-org
74 The rest of the documentation details each of the object
75 methods. Internal methods are usually preceded with a _
79 # Let the code begin...
81 package Bio
::Location
::Fuzzy
;
84 use base
qw(Bio::Location::Atomic Bio::Location::FuzzyLocationI);
86 our @LOCATIONCODESBSANE = (undef, 'EXACT', 'WITHIN', 'BETWEEN', 'UNCERTAIN',
89 our %FUZZYCODES = ( 'EXACT' => '..', # Position is 'exact
90 # Exact position is unknown, but is within the range specified, ((1.2)..100)
101 # The following regular expressions map to fuzzy location types. Every
102 # expression must match the complete encoded point string, and must
103 # contain two groups identifying min and max. Empty matches are automatic.
104 # converted to undef, except for 'EXACT', for which max is set to equal
107 our %FUZZYPOINTENCODE = (
108 '\>(\d+)(.{0})' => 'AFTER',
109 '\<(.{0})(\d+)' => 'BEFORE',
111 '\?(\d*)' => 'UNCERTAIN',
112 '(\d+)(.{0})\>' => 'AFTER',
113 '(.{0})(\d+)\<' => 'BEFORE',
114 '(\d+)\.(\d+)' => 'WITHIN',
115 '(\d+)\^(\d+)' => 'BETWEEN',
118 our %FUZZYRANGEENCODE = ( '\.' => 'WITHIN',
120 '\^' => 'IN-BETWEEN' );
125 Usage : my $fuzzyloc = Bio::Location::Fuzzy->new( @args);
128 Args : -start => value for start (initialize by superclass)
129 -end => value for end (initialize by superclass)
130 -strand => value for strand (initialize by superclass)
131 -location_type => either ('EXACT','WITHIN','IN-BETWEEN',
132 'UNCERTAIN') OR ( 1,2,3,4)
133 -start_ext=> extension for start - defaults to 0,
134 -start_fuz= fuzzy code for start can be
135 ('EXACT','WITHIN','BETWEEN','BEFORE','AFTER',
137 a value 1 - 5 corresponding to index+1 above
138 -end_ext=> extension for end - defaults to 0,
139 -end_fuz= fuzzy code for end can be
140 ('EXACT','WITHIN','BETWEEN','BEFORE','AFTER',
142 a value 1 - 5 corresponding to index+1 above
147 my ($class, @args) = @_;
148 my $self = $class->SUPER::new
(@args);
149 my ($location_type, $start_ext, $start_fuz, $end_ext, $end_fuz) =
150 $self->_rearrange([ qw(LOCATION_TYPE START_EXT START_FUZ
154 $location_type && $self->location_type($location_type);
155 $start_ext && $self->max_start($self->min_start + $start_ext);
156 $end_ext && $self->max_end($self->min_end + $end_ext);
157 $start_fuz && $self->start_pos_type($start_fuz);
158 $end_fuz && $self->end_pos_type($end_fuz);
165 Title : location_type
166 Usage : my $location_type = $location->location_type();
167 Function: Get location type encoded as text
168 Returns : string ('EXACT', 'WITHIN', 'IN-BETWEEN', 'UNCERTAIN')
174 my ($self,$value) = @_;
175 if( defined $value || ! defined $self->{'_location_type'} ) {
176 $value = 'EXACT' unless defined $value;
177 if(! defined $FUZZYCODES{$value} ) {
179 if( $value =~ /\.\./ ) {
181 } elsif( $value =~ /^\.$/ ) {
183 } elsif( $value =~ /\^/ ) {
184 $value = 'IN-BETWEEN';
185 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [".
186 $self->start. "] and [". $self->end. "]")
187 if defined $self->start && defined $self->end &&
188 ($self->end - 1 == $self->start);
189 } elsif( $value =~ /\?/ ) {
190 $value = 'UNCERTAIN';
191 } elsif( $value ne 'EXACT' && $value ne 'WITHIN' &&
192 $value ne 'IN-BETWEEN' ) {
193 $self->throw("Did not specify a valid location type");
196 $self->{'_location_type'} = $value;
198 return $self->{'_location_type'};
201 =head1 LocationI methods
206 Usage : $length = $fuzzy_loc->length();
207 Function: Get the length of this location.
209 Note that the length of a fuzzy location will always depend
210 on the currently active interpretation of start and end. The
211 result will therefore vary for different CoordinatePolicy objects.
220 # return $self->SUPER::length() if( !$self->start || !$self->end);
221 # $self->warn('Length is not valid for a FuzzyLocation');
228 Usage : $start = $fuzzy->start();
229 Function: get/set start of this range, handling fuzzy_starts
230 Returns : a positive integer representing the start of the location
231 Args : start location on set (can be fuzzy point string)
236 my($self,$value) = @_;
237 if( defined $value ) {
238 my ($encode,$min,$max) = $self->_fuzzypointdecode($value);
239 $self->start_pos_type($encode);
240 $self->min_start($min);
241 $self->max_start($max);
244 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations ["
245 . $self->SUPER::start
. "] and [". $self->SUPER::end
. "]")
246 if $self->location_type eq 'IN-BETWEEN' && defined $self->SUPER::end
&&
247 ($self->SUPER::end
- 1 == $self->SUPER::start
);
249 return $self->SUPER::start
();
255 Usage : $end = $fuzzy->end();
256 Function: get/set end of this range, handling fuzzy_ends
257 Returns : a positive integer representing the end of the range
258 Args : end location on set (can be fuzzy string)
263 my($self,$value) = @_;
264 if( defined $value ) {
265 my ($encode,$min,$max) = $self->_fuzzypointdecode($value);
266 $self->end_pos_type($encode);
267 $self->min_end($min);
268 $self->max_end($max);
271 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [".
272 $self->SUPER::start
. "] and [". $self->SUPER::end
. "]")
273 if $self->location_type eq 'IN-BETWEEN' && defined $self->SUPER::start
&&
274 ($self->SUPER::end
- 1 == $self->SUPER::start
);
276 return $self->SUPER::end
();
282 Usage : $min_start = $fuzzy->min_start();
283 Function: get/set the minimum starting point
284 Returns : the minimum starting point from the contained sublocations
285 Args : integer or undef on set
290 my ($self,@args) = @_;
293 $self->{'_min_start'} = $args[0]; # the value may be undef!
295 return $self->{'_min_start'};
301 Usage : my $maxstart = $location->max_start();
302 Function: Get/set maximum starting location of feature startpoint
303 Returns : integer or undef if no maximum starting point.
304 Args : integer or undef on set
309 my ($self,@args) = @_;
312 $self->{'_max_start'} = $args[0]; # the value may be undef!
314 return $self->{'_max_start'};
317 =head2 start_pos_type
319 Title : start_pos_type
320 Usage : my $start_pos_type = $location->start_pos_type();
321 Function: Get/set start position type.
322 Returns : type of position coded as text
323 ('BEFORE','AFTER','EXACT','WITHIN','BETWEEN','UNCERTAIN')
324 Args : a string on set
329 my ($self,$value) = @_;
330 if(defined $value && $value =~ /^\d+$/ ) {
331 if( $value == 0 ) { $value = 'EXACT'; }
333 my $v = $LOCATIONCODESBSANE[$value];
335 $self->warn("Provided value $value which I don't understand,".
336 " reverting to 'EXACT'");
342 if(defined($value)) {
343 $self->{'_start_pos_type'} = $value;
345 return $self->{'_start_pos_type'};
351 Usage : my $minend = $location->min_end();
352 Function: Get/set minimum ending location of feature endpoint
353 Returns : integer or undef if no minimum ending point.
354 Args : integer or undef on set
359 my ($self,@args) = @_;
362 $self->{'_min_end'} = $args[0]; # the value may be undef!
364 return $self->{'_min_end'};
370 Usage : my $maxend = $location->max_end();
371 Function: Get/set maximum ending location of feature endpoint
372 Returns : integer or undef if no maximum ending point.
373 Args : integer or undef on set
378 my ($self,@args) = @_;
381 $self->{'_max_end'} = $args[0]; # the value may be undef!
383 return $self->{'_max_end'};
389 Usage : my $end_pos_type = $location->end_pos_type();
390 Function: Get/set end position type.
391 Returns : type of position coded as text
392 ('BEFORE','AFTER','EXACT','WITHIN','BETWEEN','UNCERTAIN')
393 Args : a string on set
398 my ($self,$value) = @_;
399 if( defined $value && $value =~ /^\d+$/ ) {
400 if( $value == 0 ) { $value = 'EXACT'; }
402 my $v = $LOCATIONCODESBSANE[$value];
404 $self->warn("Provided value $value which I don't understand,".
405 " reverting to 'EXACT'");
412 if(defined($value)) {
413 $self->{'_end_pos_type'} = $value;
415 return $self->{'_end_pos_type'};
421 Usage : my $seqid = $location->seq_id();
422 Function: Get/Set seq_id that location refers to
424 Args : [optional] seq_id value to set
428 =head2 coordinate_policy
430 Title : coordinate_policy
432 Usage : $policy = $location->coordinate_policy();
433 $location->coordinate_policy($mypolicy); # set may not be possible
434 Function: Get the coordinate computing policy employed by this object.
436 See Bio::Location::CoordinatePolicyI for documentation about
437 the policy object and its use.
439 The interface *does not* require implementing classes to accept
440 setting of a different policy. The implementation provided here
441 does, however, allow one to do so.
443 Implementors of this interface are expected to initialize every
444 new instance with a CoordinatePolicyI object. The implementation
445 provided here will return a default policy object if none has
446 been set yet. To change this default policy object call this
447 method as a class method with an appropriate argument. Note that
448 in this case only subsequently created Location objects will be
451 Returns : A Bio::Location::CoordinatePolicyI implementing object.
452 Args : On set, a Bio::Location::CoordinatePolicyI implementing object.
454 See L<Bio::Location::CoordinatePolicyI>
461 Usage : my $locstr = $location->to_FTstring()
462 Function: Get/Set seq_id that location refers to
464 Args : [optional] seq_id value to set
470 my (%vals) = ( 'start' => $self->start,
471 'min_start' => $self->min_start,
472 'max_start' => $self->max_start,
473 'start_code' => $self->start_pos_type,
475 'min_end' => $self->min_end,
476 'max_end' => $self->max_end,
477 'end_code' => $self->end_pos_type );
479 my (%strs) = ( 'start' => '',
481 my ($delimiter) = $FUZZYCODES{$self->location_type};
482 $delimiter = $FUZZYCODES{'EXACT'} if ($self->location_type eq 'UNCERTAIN');
484 my $policy = ref($self->coordinate_policy);
486 # I'm lazy, lets do this in a loop since behaviour will be the same for
488 # The CoordinatePolicy now dictates start/end data here (bug 992) - cjf
489 foreach my $point ( qw(start end) ) {
490 if( ($vals{$point."_code"} ne 'EXACT') &&
491 ($vals{$point."_code"} ne 'UNCERTAIN') ) {
493 # must have max and min defined to use 'WITHIN', 'BETWEEN'
494 if ((!defined $vals{"min_$point"} ||
495 !defined $vals{"max_$point"}) &&
496 ( $vals{$point."_code"} eq 'WITHIN' ||
497 $vals{$point."_code"} eq 'BETWEEN'))
499 $vals{"min_$point"} = '' unless defined $vals{"min_$point"};
500 $vals{"max_$point"} = '' unless defined $vals{"max_$point"};
502 $self->warn("Fuzzy codes for start are in a strange state, (".
503 join(",", ($vals{"min_$point"},
505 $vals{$point."_code"})). ")");
509 if (defined $vals{$point."_code"} &&
510 ($vals{$point."_code"} eq 'BEFORE' ||
511 $vals{$point."_code"} eq 'AFTER'))
513 $strs{$point} .= $FUZZYCODES{$vals{$point."_code"}};
514 $strs{$point} .= $vals{"$point"};
517 if( defined $vals{$point."_code"} &&
518 ($vals{$point."_code"} eq 'WITHIN' ||
519 $vals{$point."_code"} eq 'BETWEEN'))
521 # Expect odd results with anything but WidestCoordPolicy for now
522 $strs{$point} .= ($point eq 'start') ?
524 $FUZZYCODES{$vals{$point."_code"}}.
527 $vals{'min_'.$point}.
528 $FUZZYCODES{$vals{$point."_code"}}.
530 $strs{$point} = "(".$strs{$point}.")";
533 } elsif ($vals{$point."_code"} eq 'UNCERTAIN') {
534 $strs{$point} = $FUZZYCODES{$vals{$point."_code"}};
535 $strs{$point} .= $vals{$point} if defined $vals{$point};
537 $strs{$point} = $vals{$point};
541 my $str = $strs{'start'} . $delimiter . $strs{'end'};
542 if($self->is_remote() && $self->seq_id()) {
543 $str = $self->seq_id() . ":" . $str;
545 if( defined $self->strand &&
546 $self->strand == -1 &&
547 $self->location_type() ne "UNCERTAIN") {
548 $str = "complement(" . $str . ")";
549 } elsif($self->location_type() eq "WITHIN") {
555 =head2 valid_Location
557 Title : valid_Location
558 Usage : if ($location->valid_location) {...};
559 Function: boolean method to determine whether location is considered valid
560 (has minimum requirements for Simple implementation)
561 Returns : Boolean value: true if location is valid, false otherwise
566 =head2 _fuzzypointdecode
568 Title : _fuzzypointdecode
569 Usage : ($type,$min,$max) = $self->_fuzzypointdecode('<5');
570 Function: Decode a fuzzy string.
571 Returns : A 3-element array consisting of the type of location, the
572 minimum integer, and the maximum integer describing the range
573 of coordinates this start or endpoint refers to. Minimum or
574 maximum coordinate may be undefined.
575 : Returns empty array on fail.
576 Args : fuzzypoint string
580 sub _fuzzypointdecode
{
581 my ($self, $string) = @_;
582 return () if( !defined $string);
583 # strip off leading and trailing space
584 $string =~ s/^\s*(\S+)\s*/$1/;
585 foreach my $pattern ( keys %FUZZYPOINTENCODE ) {
586 if( $string =~ /^$pattern$/ ) {
587 my ($min,$max) = ($1,$2) unless (($1 eq '') && (!defined $2));
588 if( ($FUZZYPOINTENCODE{$pattern} eq 'EXACT') ||
589 ($FUZZYPOINTENCODE{$pattern} eq 'UNCERTAIN')
593 $max = undef if((defined $max) && (length($max) == 0));
594 $min = undef if((defined $min) && (length($min) == 0));
596 return ($FUZZYPOINTENCODE{$pattern},$min,$max);
599 if( $self->verbose >= 1 ) {
600 $self->warn("could not find a valid fuzzy encoding for $string");