2 # BioPerl module for Bio::Map::CytoPosition
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
8 # Copyright Heikki Lehvaslaiho
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Map::CytoPosition - Marker class with cytogenetic band storing attributes
20 $m1 = Bio::Map::CytoPosition->new ( '-id' => 'A1',
23 $m2 = Bio::Map::CytoPosition->new ( '-id' => 'A2',
27 if ($m1->cytorange->overlaps($m2->cytorange)) {
28 print "Makers overlap\n";
34 CytoPosition is marker (Bio::Map::MarkerI compliant) with a location in a
35 cytogenetic map. See L<Bio::Map::MarkerI> for more information.
37 Cytogenetic locations are names of bands visible in stained mitotic
38 eucaryotic chromosomes. The naming follows strict rules which are
39 consistent at least in higher vertebates, e.g. mammals. The chromosome
40 name preceds the band names.
42 The shorter arm of the chromosome is called 'p' ('petit') and usually
43 drawn pointing up. The lower arm is called 'q' ('queue'). The bands
44 are named from the region separating these, a centromere (cen), towards
45 the tips or telomeric regions (ter) counting from 1 upwards. Depending
46 of the resolution used the bands are identified with one or more
47 digit. The first digit determines the major band and subsequent digits
48 sub bands: p1 band can be divided into subbands p11, p12 and 13 and
49 p11 can furter be divided into subbands p11.1 and p11.2. The dot after
50 second digit makes it easier to read the values. A region between ands
51 is given from the centromere outwards towards the telomere (e.g. 2p2-5
52 or 3p21-35) or from a band in the p arm to a band in the q arm.
58 User feedback is an integral part of the evolution of this and other
59 Bioperl modules. Send your comments and suggestions preferably to the
60 Bioperl mailing lists Your participation is much appreciated.
62 bioperl-l@bioperl.org - General discussion
63 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
67 Please direct usage questions or support issues to the mailing list:
69 I<bioperl-l@bioperl.org>
71 rather than to the module maintainer directly. Many experienced and
72 reponsive experts will be able look at the problem and quickly
73 address it. Please include a thorough description of the problem
74 with code and data examples if at all possible.
78 report bugs to the Bioperl bug tracking system to help us keep track
79 the bugs and their resolution. Bug reports can be submitted via the
82 https://github.com/bioperl/bioperl-live/issues
84 =head1 AUTHOR - Heikki Lehvaslaiho
86 Email: heikki-at-bioperl-dot-org
90 Sendu Bala bix@sendu.me.uk
94 The rest of the documentation details each of the object
95 methods. Internal methods are usually preceded with a _
99 # Let the code begin...
101 package Bio
::Map
::CytoPosition
;
107 use base
qw(Bio::Map::Position);
112 Usage : my $range = $obj->cytorange();
114 Converts cytogenetic location set by value method into
115 an integer range. The chromosome number determines the
116 "millions" in the values. Human X and Y chromosome
117 symbols are represented by values 100 and 101.
119 The localization within chromosomes are converted into
120 values between the range of 0 and 200,000:
123 |------------------------|-------------------------|
126 The values between -100,000 through 0 for centromere to
127 100,000 would have reflected the band numbering better but
128 use of positive integers was chosen since the
129 transformation is very easy. These values are not metric.
131 Each band defines a range in a chromosome. A band string
132 is converted into a range by padding it with lower and and
133 higher end digits (for q arm: '0' and '9') to the length
134 of five. The arm and chromosome values are added to these:
135 e.g. 21000 & 21999 (band 21) + 100,000 (q arm) + 2,000,000
136 (chromosome 2) => 2q21 : 2,121,000 .. 2,121,999. Note that
137 this notation breaks down if there is a band or a subband
138 using digit 9 in its name! This is not the case in human
141 The full algorithm used for bands:
144 pad char for start is '0', for end '9'
145 range is chromosome + 100,000 + padded range start or end
146 elsif arm is 'p' then
147 pad char for start is '9', for end '0'
148 range is chromosome + 100,000 - padded range start or end
150 Returns : Bio::Range object or undef
157 my ($chr, $r, $band, $band2, $arm, $arm2, $lc, $uc, $lcchar, $ucchar);
159 return $r if not defined $self->{_value
}; # returns undef
161 # -----1----- --------2--------- -----3----- -------4------- ---6---
162 m/([XY]|[0-9]+)(cen|qcen|pcen|[pq])?(ter|[.0-9]+)?-?([pq]?(cen|ter)?)?([.0-9]+)?/;
163 $self->warn("Not a valid value: ". $self->{_value
}), return $r
164 if not defined $1 ; # returns undef
169 $chr = 100 if $chr eq 'X';
170 $chr = 101 if $chr eq 'Y';
173 $r = Bio
::Range
->new();
177 $2 || $self->throw("$& does not make sense: 'arm' or 'cen' missing");
183 $arm2 = $2 if $4 eq ''; # it is not necessary to repeat the arm [p|q]
187 #find the correct order
188 #print STDERR "-|$&|----2|$2|-----3|$band|---4|$4|--------arm2|$arm2|-------------\n";
189 if ($band ne '' and (defined $arm2 and $2 ne $arm2 and $arm2 eq 'q') ) {
190 $lc = 'start'; $lcchar = '9';
191 $uc = 'end'; $ucchar = '9';
193 elsif ($band ne 'ter' and $2 ne $arm2 and $arm2 eq 'p') {
194 $lc = 'end'; $lcchar = '9';
195 $uc = 'start'; $ucchar = '9';
197 elsif ($band eq 'ter' and $arm2 eq 'p') {
198 $uc = 'start'; $ucchar = '9';
200 elsif ($arm2 eq 'q') {
201 if (_pad
($band, 5, '0') < _pad
($band2, 5, '0')) {
202 $lc = 'start'; $lcchar = '0';
203 $uc = 'end'; $ucchar = '9';
205 $lc = 'end'; $lcchar = '9';
206 $uc = 'start'; $ucchar = '0';
209 elsif ($arm2 eq 'p') {
210 if (_pad
($band, 5, '0') < _pad
($band2, 5, '0')) {
211 $lc = 'end'; $lcchar = '0';
212 $uc = 'start'; $ucchar = '9';
214 $lc = 'start'; $lcchar = '9';
215 $uc = 'end'; $ucchar = '0';
219 $self->throw("How did you end up here? $&");
222 #print STDERR "-------$arm2--------$band2---------$ucchar--------------\n";
223 if ( (defined $arm2 and $arm2 eq 'p') or (defined $arm2 and $arm2 eq 'p') ) {
224 $r->$uc(-(_pad
($band2, 5, $ucchar)) + 100000 + $chr );
225 if (defined $3 and $3 eq 'ter') {
226 $r->end(200000 + $chr);
228 elsif ($2 eq 'cen' or $2 eq 'qcen' or $2 eq 'pcen'){
229 $r->$lc(100000 + $chr);
232 $r->$lc(_pad
($band, 5, $lcchar) + 100000 + $chr );
234 $r->$lc(-(_pad
($band, 5, $lcchar)) + 100000 + $chr );
236 } else { #if:$arm2=q e.g. 9p22-q32
237 #print STDERR "-------$arm2--------$band2---------$ucchar--------------\n";
238 $r->$uc(_pad
($band2, 5, $ucchar) + 100000 + $chr);
239 if ($2 eq 'cen' or $2 eq 'pcen') {
240 $r->$lc(100000 + $chr);
244 $r->$lc(200000 + $chr);
246 $r->$lc(-(_pad
($band, 5, $lcchar)) + 100000 + $chr);
249 $r->$lc(_pad
($band, 5, $lcchar) + 100000 + $chr);
256 elsif (defined $4 and $4 ne '') {
257 #print STDERR "$4-----$&----\n";
258 if ($4 eq 'cen' || $4 eq 'qcen' || $4 eq 'pcen') { # e.g. 10p22.1-cen;
259 # '10pcen-qter' does not really make sense but lets have it in anyway
260 $r->end(100000 + $chr);
265 $r->start(_pad
($band, 5, '9') + $chr);
268 elsif ($2 eq 'cen') {
269 $self->throw("'cen-cen' does not make sense: $&");
271 $self->throw("Only order p-cen is valid: $&");
274 elsif ($4 eq 'qter' || $4 eq 'ter') { # e.g. 10p22.1-qter, 1p21-qter, 10pcen-qter, 7q34-qter
275 $r->end(200000 + $chr);
277 $r->start(-(_pad
($band, 5, '9')) + 100000 + $chr); #??? OK?
280 $r->start(_pad
($band, 5, '0') + 100000 + $chr);
282 elsif ($2 eq 'cen' || $2 eq 'qcen' || $2 eq 'pcen' ) {
283 $r->start(100000 + $chr);
286 elsif ($4 eq 'pter' ) {
287 #print STDERR "$2,$3--$4-----$&----\n";
290 $r->end(-(_pad
($band, 5, '0')) + 100000 + $chr);
293 $r->end(_pad
($band, 5, '9') + 100000 + $chr);
295 elsif ($2 eq 'cen' || $2 eq 'qcen' || $2 eq 'pcen' ) {
296 $r->end(100000 + $chr);
298 } else { # -p or -q at the end of the range
299 $self->throw("lone '$4' in $& does not make sense");
303 # e.g 10p22.1, 10pter
305 elsif (defined $3 ) {
307 if ($3 eq 'ter') { # e.g. 10pter
308 $r = Bio
::Range
->new('-start' => $chr,
311 } else { # e.g 10p22.1
312 $r = Bio
::Range
->new('-start' => -(_pad
($band, 5, '9')) + 100000 + $chr,
313 '-end' => -(_pad
($band, 5, '0')) + 100000 + $chr,
316 } elsif ($2 eq 'q') {
317 if ($3 eq 'ter') { # e.g. 10qter
318 $r = Bio
::Range
->new('-start' => 200000 + $chr,
319 '-end' => 200000 + $chr,
321 } else { # e.g 10q22.1
322 $r = Bio
::Range
->new('-start' => _pad
($band, 5, '0') + 100000 + $chr,
323 '-end' => _pad
($band, 5, '9') + 100000 + $chr,
326 } else { # e.g. 10qcen1.1 !
327 $self->throw("'cen' in $& does not make sense");
333 elsif (defined $2 ) { # e.g. 10p
335 $r = Bio
::Range
->new('-start' => $chr,
336 '-end' => 100000 + $chr
340 $r = Bio
::Range
->new('-start' => 100000 + $chr,
341 '-end' => 200000 + $chr
343 } else { # $2 eq 'cen' || 'qcen'
344 $r = Bio
::Range
->new('-start' => 100000 + $chr,
345 '-end' => 100000 + $chr
353 $r = Bio
::Range
->new('-start' => $chr,
354 '-end' => 200000 + $chr
359 $self->start($r->start);
367 my ($string, $len, $pad_char) = @_;
368 __PACKAGE__
->throw("function _pad needs a positive integer length, not [$len]")
369 unless $len =~ /^\+?\d+$/;
370 __PACKAGE__
->throw("function _pad needs a single character pad_char, not [$pad_char]")
371 unless length $pad_char == 1;
373 return $string . $pad_char x
( $len - length( $string ) );
379 Usage : my $value = $obj->range2value($range);
380 Function: Sets and returns the value string based on start and end values of
381 the Bio::Range object passes as an argument.
382 Returns : string or false
383 Args : Bio::Range object
388 my ($self,$value) = @_;
389 if( defined $value) {
390 if( ! $value->isa('Bio::Range') ) {
391 $self->throw("Is not a Bio::Range object but a [$value]");
394 if( ! $value->start ) {
395 $self->throw("Start is not defined in [$value]");
398 if( ! $value->end ) {
399 $self->throw("End is not defined in [$value]");
402 if( $value->start < 100000 ) {
403 $self->throw("Start value has to be in millions, not ". $value->start);
406 if( $value->end < 100000 ) {
407 $self->throw("End value has to be in millions, not ". $value->end);
411 my ($chr, $arm, $band) = $value->start =~ /(\d+)(\d)(\d{5})/;
412 my ($chr2, $arm2, $band2) = $value->end =~ /(\d+)(\d)(\d{5})/;
414 my ($chrS, $armS, $bandS, $arm2S, $band2S, $sep) = ('', '', '', '', '', '' );
422 elsif ($chr == 100) {
427 last LOC
if $arm == 0 and $arm2 == 2 and $band == 0 and $band2 == 0 ;
431 if ($arm == $arm2 ) {
434 #$armS = 'pter' if $band == 0 and $band2 == 0;
435 $bandS = 'ter' if $band == 0;
440 $bandS = 'ter' if $band == 0;
444 $armS = 'cen', if $band == 0;# and $band2 == 0;
450 $arm2S = '' if $band == 0 and $band2 == 0;
454 $arm2S = '' if $arm2 == 2 and $band == 0 and $band2 == 0;
457 last LOC
if $band == $band2 ;
460 # first band (ter is hadled with the arm)
462 if ($bandS ne 'ter') {
464 $band = 100000 - $band;
471 $bandS = substr($band, 0, 2). '.'. substr($band, 2) if length $band > 2;
473 last LOC
unless $band2;
479 $band2 = 100000 - $band2;
494 last LOC
if $band eq $band2 and $arm == $arm2;
498 $band2S = substr($band2, 0, 2). '.'. substr($band2, 2) if length $band2 > 2;
502 if ($armS eq 'p' and $arm2S eq 'p') {
507 $band2S = '' if $bandS eq $band2S ;
508 $armS = '' if $bandS eq 'cen';
509 $arm2S = '' if $armS eq $arm2S and $band2S ne 'ter';
510 $sep = '-' if $arm2S || $band2S;
511 $self->value( $chrS. $armS. $bandS. $sep. $arm2S. $band2S);
519 Usage : my $pos = $position->value;
520 Function: Get/Set the value for this position
521 Returns : scalar, value
522 Args : none to get, OR scalar to set
527 my ($self,$value) = @_;
528 if( defined $value ) {
529 $self->{'_value'} = $value;
532 return $self->{'_value'};
538 Usage : my $num = $position->numeric;
539 Function: Read-only method that is guarantied to return a numeric
540 representation of the start of this position.
541 Returns : int (the start of the range)
542 Args : optional Bio::RangeI object
548 return $self->start(@_);
554 Usage : my $mychr = $position->chr();
555 Function: Get/Set method for the chromosome string of the location.
556 Returns : chromosome value
557 Args : none to get, OR scalar to set
562 my ($self,$chr) = @_;
564 $self->{'_chr'} = $chr;
566 return $self->{'_chr'};