3 # bioperl module for Bio::LiveSeq::Chain
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
9 # Copyright Joseph Insana
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
18 Bio::LiveSeq::Chain - DoubleChain DataStructure for Perl
26 This is a general purpose module (that's why it's not in object-oriented
27 form) that introduces a novel datastructure in PERL. It implements
28 the "double linked chain". The elements of the chain can contain basically
29 everything. From chars to strings, from object references to arrays or hashes.
30 It is used in the LiveSequence project to create a dynamical DNA sequence,
31 easier to manipulate and change. It's use is mainly for sequence variation
32 analysis but it could be used - for example - in e-cell projects.
33 The Chain module in itself doesn't have any biological bias, so can be
34 used for any programming purpose.
36 Each element of the chain (with the exclusion of the first and the last of the
37 chain) is connected to other two elements (the PREVious and the NEXT one).
38 There is no absolute position (like in an array), hence if positions are
39 important, they need to be computed (methods are provided).
40 Otherwise it's easy to keep track of the elements with their "LABELs".
41 There is one LABEL (think of it as a pointer) to each ELEMENT. The labels
42 won't change after insertions or deletions of the chain. So it's
43 always possible to retrieve an element even if the chain has been
44 modified by successive insertions or deletions.
45 From this the high potential profit for bioinformatics: dealing with
46 sequences in a way that doesn't have to rely on positions, without
47 the need of constantly updating them if the sequence changes, even
50 =head1 AUTHOR - Joseph A.L. Insana
52 Email: Insana@ebi.ac.uk, jinsana@gmx.net
56 The rest of the documentation details each of the object
57 methods. Internal methods are usually preceded with a _
61 # Let the code begin...
63 # DoubleChain Data Structure for PERL
64 # by Joseph A.L. Insana - Deathson - Filius Mortis - Fal Mortais
65 # insana@ebi.ac.uk, jinsana@gmx.net
67 package Bio
::LiveSeq
::Chain
;
70 # **** performance concerns
71 # *??* create hash2dchain ???? (with hashkeys used for label)
72 # **????** how about using array of arrays instead than hash of arrays??
74 # further strict complaints:
75 # in verbose $string assignment around line 721 ???
77 # TERMINOLOGY update, naming convention:
78 # "chain" the datastructure
79 # "element" the individual units that compose a chain
80 # "label" the unique name of a single element
81 # "position" the position of an element into the chain according to a
82 # particular coordinate system (e.g. counting from the start)
83 # "value" what is stored in a single element
85 use Carp
qw(croak cluck carp);
86 use Bio
::Root
::Version
;
88 use integer
; # WARNING: this is to increase performance
89 # a little bit of attention has to be given if float need to
90 # be stored as elements of the array
91 # the use of this "integer" affects all operations but not
92 # assignments. So float CAN be assigned as elements of the chain
93 # BUT, if you assign $z=-1.8;, $z will be equal to -1 because
94 # "-" counts as a unary operation!
96 =head2 _updown_chain2string
99 Usage : $string = Bio::LiveSeq::Chain::chain2string("down",$chain,6,9)
100 Function: reads the contents of the chain, outputting a string
103 : down_chain2string($chain) -> all the chain from begin to end
104 : down_chain2string($chain,6) -> from 6 to the end
105 : down_chain2string($chain,6,4) -> from 6, going on 4 elements
106 : down_chain2string($chain,6,"",10) -> from 6 to 10
107 : up_chain2string($chain,10,"",6) -> from 10 to 6 upstream
108 Defaults: start=first element; if len undef, goes to last
109 if last undef, goes to end
110 if last defined, it overrides len (undefining it)
112 Args : "up"||"down" as first argument to specify the reading direction
113 reference (to the chain)
114 [first] [len] [last] optional integer arguments to specify how
115 much and from (and to) where to read
119 # methods rewritten 2.61
120 sub up_chain2string
{
121 _updown_chain2string
("up",@_);
123 sub down_chain2string
{
124 _updown_chain2string
("down",@_);
127 sub _updown_chain2string
{
128 my ($direction,$chain,$first,$len,$last)=@_;
129 unless($chain) { cluck
"no chain input"; return (-1); }
130 my $begin=$chain->{'begin'}; # the label of the BEGIN element
131 my $end=$chain->{'end'}; # the label of the END element
134 if ($direction eq "up") {
135 $flow=2; # used to determine the direction of chain navigation
136 unless ($first) { $first=$end; } # if undef or 0, use $end
137 } else { # defaults to "down"
138 $flow=1; # used to determine the direction of chain navigation
139 unless ($first) { $first=$begin; } # if undef or 0, use $begin
142 unless($chain->{$first}) {
143 cluck
"label for first not defined"; return (-1); }
144 if ($last) { # if last is defined, it gets priority and len is not used
145 unless($chain->{$last}) {
146 cluck
"label for last not defined"; return (-1); }
148 warn "Warning chain2string: argument LAST:$last overriding LEN:$len!";
152 if ($direction eq "up") {
153 $last=$begin; # if last not defined, go 'till begin (or upto len elements)
155 $last=$end; # if last not defined, go 'till end (or upto len elements)
160 my $label=$first; my $i=1;
161 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
162 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
164 # proceed for len elements or until last, whichever comes first
165 # if $len undef goes till end
166 while (($label) && ($label != $afterlast) && ($i <= ($len || $i + 1))) {
167 @array=@
{$chain->{$label}};
168 $string .= $array[0];
169 $label = $array[$flow];
172 return ($string); # if chain is interrupted $string won't be complete
175 =head2 _updown_labels
178 Usage : @labels = Bio::LiveSeq::Chain::_updown_labels("down",$chain,4,16)
179 Function: returns all the labels in a chain or those between two
180 specified ones (termed "first" and "last")
181 Returns : a reference to an array containing the labels
182 Args : "up"||"down" as first argument to specify the reading direction
183 reference (to the chain)
184 [first] [last] (integer for the starting and eneding labels)
189 # arguments: CHAIN_REF [FIRSTLABEL] [LASTLABEL]
190 # returns: reference to array containing the labels
192 my ($chain,$first,$last)=@_;
193 _updown_labels
("down",$chain,$first,$last);
196 my ($chain,$first,$last)=@_;
197 _updown_labels
("up",$chain,$first,$last);
199 # arguments: "up"||"down" CHAIN_REF [FIRSTLABEL] [LASTLABEL]
200 # returns: reference to array containing the labels
202 my ($direction,$chain,$first,$last)=@_;
203 unless($chain) { cluck
"no chain input"; return (0); }
204 my $begin=$chain->{'begin'}; # the label of the BEGIN element
205 my $end=$chain->{'end'}; # the label of the END element
207 if ($direction eq "up") { $flow=2;
208 unless ($first) { $first=$end; }
209 unless ($last) { $last=$begin; }
211 unless ($last) { $last=$end; }
212 unless ($first) { $first=$begin; }
214 unless($chain->{$first}) { warn "not existing label $first"; return (0); }
215 unless($chain->{$last}) { warn "not existing label $last"; return (0); }
217 my $label=$first; my @labels;
218 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
219 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
221 while (($label)&&($label != $afterlast)) {
222 push(@labels,$label);
223 $label=$chain->{$label}[$flow];
225 return (\
@labels); # if chain is interrupted @labels won't be complete
232 Usage : $start = Bio::LiveSeq::Chain::start()
233 Returns : the label marking the start of the chain
241 unless($chain) { cluck
"no chain input"; return (-1); }
242 return ($chain->{'begin'});
248 Usage : $end = Bio::LiveSeq::Chain::end()
249 Returns : the label marking the end of the chain
257 unless($chain) { cluck
"no chain input"; return (-1); }
258 return ($chain->{'end'});
264 Usage : $check = Bio::LiveSeq::Chain::label_exists($chain,$label)
265 Function: It checks if a label is defined, i.e. if an element is there or
267 Returns : 1 if the label exists, 0 if it is not there, -1 error
269 Args : reference to the chain, integer
274 my ($chain,$label)=@_;
275 unless($chain) { cluck
"no chain input"; return (-1); }
276 if ($label && $chain->{$label}) { return (1); } else { return (0) };
280 =head2 down_get_pos_of_label
282 Title : down_get_pos_of_label
283 Usage : $position = Bio::LiveSeq::Chain::down_get_pos_of_label($chain,$label,$first)
284 Function: returns the position of $label counting from $first, i.e. taking
285 $first as 1 of coordinate system. If $first is not specified it will
286 count from the start of the chain.
289 Args : reference to the chain, integer (the label of interest)
290 optional: integer (a different label that will be taken as the
291 first one, i.e. the one to count from)
292 Note: It counts "downstream". To proceed backward use up_get_pos_of_label
296 sub down_get_pos_of_label
{
297 #down_chain2string($_[0],$_[2],undef,$_[1],"counting");
298 my ($chain,$label,$first)=@_;
299 _updown_count
("down",$chain,$first,$label);
301 sub up_get_pos_of_label
{
302 #up_chain2string($_[0],$_[2],undef,$_[1],"counting");
303 my ($chain,$label,$first)=@_;
304 _updown_count
("up",$chain,$first,$label);
307 =head2 down_subchain_length
309 Title : down_subchain_length
310 Usage : $length = Bio::LiveSeq::Chain::down_subchain_length($chain,$first,$last)
311 Function: returns the length of the chain between the labels "first" and "last", included
314 Args : reference to the chain, integer, integer
315 Note: It counts "downstream". To proceed backward use up_subchain_length
319 # arguments: chain_ref [first] [last]
320 # returns the length of the chain between first and last (included)
321 sub down_subchain_length
{
322 #down_chain2string($_[0],$_[1],undef,$_[2],"counting");
323 my ($chain,$first,$last)=@_;
324 _updown_count
("down",$chain,$first,$last);
326 sub up_subchain_length
{
327 #up_chain2string($_[0],$_[1],undef,$_[2],"counting");
328 my ($chain,$first,$last)=@_;
329 _updown_count
("up",$chain,$first,$last);
332 # arguments: DIRECTION CHAIN_REF FIRSTLABEL LASTLABEL
335 my ($direction,$chain,$first,$last)=@_;
336 unless($chain) { cluck
"no chain input"; return (0); }
337 my $begin=$chain->{'begin'}; # the label of the BEGIN element
338 my $end=$chain->{'end'}; # the label of the END element
340 if ($direction eq "up") { $flow=2;
341 unless ($first) { $first=$end; }
342 unless ($last) { $last=$begin; }
344 unless ($last) { $last=$end; }
345 unless ($first) { $first=$begin; }
347 unless($chain->{$first}) { warn "not existing label $first"; return (0); }
348 unless($chain->{$last}) { warn "not existing label $last"; return (0); }
350 my $label=$first; my $count;
351 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
352 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
354 while (($label)&&($label != $afterlast)) {
356 $label=$chain->{$label}[$flow];
358 return ($count); # if chain is interrupted, $i will be up to the breaking point
364 Usage : $errorcode=Bio::LiveSeq::Chain::invert_chain($chain)
365 Function: completely inverts the order of the chain elements; begin is swapped with end and all links updated (PREV&NEXT fields swapped)
366 Returns : 1 if all OK, 0 if errors
368 Args : reference to the chain
374 unless($chain) { cluck
"no chain input"; return (0); }
375 my $begin=$chain->{'begin'}; # the name of the first element
376 my $end=$chain->{'end'}; # the name of the last element
378 $label=$begin; # starts from the beginning
379 while ($label) { # proceed with linked elements, swapping PREV and NEXT
380 @array=@
{$chain->{$label}};
381 ($chain->{$label}[1],$chain->{$label}[2])=($array[2],$array[1]); # swap
382 $label = $array[1]; # go to the next one
384 # now swap begin and end fields
385 ($chain->{'begin'},$chain->{'end'})=($end,$begin);
386 return (1); # that's it
389 # warning that method has changed name
390 #sub mutate_element {
391 #croak "Warning: old method name. Please update code to 'set_value_at_label'\n";
392 # &set_value_at_label;
395 =head2 down_get_value_at_pos
397 Title : down_get_value_at_pos
398 Usage : $value = Bio::LiveSeq::Chain::down_get_value_at_pos($chain,$position,$first)
399 Function: used to access the value of the chain at a particular position instead than directly with a label pointer. It will count the position from the start of the chain or from the label $first, if $first is specified
400 Returns : whatever is stored in the element of the chain
402 Args : reference to the chain, integer, [integer]
403 Note: It works "downstream". To proceed backward use up_get_value_at_pos
407 #sub get_value_at_pos {
408 #croak "Please use instead: down_get_value_at_pos";
409 ##&down_get_value_at_pos;
411 sub down_get_value_at_pos
{
412 my ($chain,$position,$first)=@_;
413 my $label=down_get_label_at_pos
($chain,$position,$first);
414 # check place of change
415 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
416 warn "not existing element $label"; return (0); }
417 return _get_value
($chain,$label);
419 sub up_get_value_at_pos
{
420 my ($chain,$position,$first)=@_;
421 my $label=up_get_label_at_pos
($chain,$position,$first);
422 # check place of change
423 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
424 warn "not existing element $label"; return (0); }
425 return _get_value
($chain,$label);
428 =head2 down_set_value_at_pos
430 Title : down_set_value_at_pos
431 Usage : $errorcode = Bio::LiveSeq::Chain::down_set_value_at_pos($chain,$newvalue,$position,$first)
432 Function: used to store a new value inside an element of the chain at a particular position instead than directly with a label pointer. It will count the position from the start of the chain or from the label $first, if $first is specified
435 Args : reference to the chain, newvalue, integer, [integer]
436 (newvalue can be: integer, string, object reference, hash ref)
437 Note: It works "downstream". To proceed backward use up_set_value_at_pos
438 Note2: If the $newvalue is undef, it will delete the contents of the
439 element but it won't remove the element from the chain.
443 #sub set_value_at_pos {
444 #croak "Please use instead: down_set_value_at_pos";
445 ##&down_set_value_at_pos;
447 sub down_set_value_at_pos
{
448 my ($chain,$value,$position,$first)=@_;
449 my $label=down_get_label_at_pos
($chain,$position,$first);
450 # check place of change
451 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
452 warn "not existing element $label"; return (0); }
453 _set_value
($chain,$label,$value);
456 sub up_set_value_at_pos
{
457 my ($chain,$value,$position,$first)=@_;
458 my $label=up_get_label_at_pos
($chain,$position,$first);
459 # check place of change
460 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
461 warn "not existing element $label"; return (0); }
462 _set_value
($chain,$label,$value);
467 =head2 down_set_value_at_label
469 Title : down_set_value_at_label
470 Usage : $errorcode = Bio::LiveSeq::Chain::down_set_value_at_label($chain,$newvalue,$label)
471 Function: used to store a new value inside an element of the chain defined by its label.
474 Args : reference to the chain, newvalue, integer
475 (newvalue can be: integer, string, object reference, hash ref)
476 Note: It works "downstream". To proceed backward use up_set_value_at_label
477 Note2: If the $newvalue is undef, it will delete the contents of the
478 element but it won't remove the element from the chain.
482 sub set_value_at_label
{
483 my ($chain,$value,$label)=@_;
484 unless($chain) { cluck
"no chain input"; return (0); }
486 # check place of change
487 unless($chain->{$label}) { # complain if label doesn't exist
488 warn "not existing element $label"; return (0); }
489 _set_value
($chain,$label,$value);
493 =head2 down_get_value_at_label
495 Title : down_get_value_at_label
496 Usage : $value = Bio::LiveSeq::Chain::down_get_value_at_label($chain,$label)
497 Function: used to access the value of the chain from one element defined by its label.
498 Returns : whatever is stored in the element of the chain
500 Args : reference to the chain, integer
501 Note: It works "downstream". To proceed backward use up_get_value_at_label
505 sub get_value_at_label
{
507 unless($chain) { cluck
"no chain input"; return (0); }
508 my $label = $_[1]; # the name of the element
510 # check place of change
511 unless($chain->{$label}) { # complain if label doesn't exist
512 warn "not existing label $label"; return (0); }
513 return _get_value
($chain,$label);
516 # arguments: CHAIN_REF LABEL VALUE
518 my ($chain,$label,$value)=@_;
519 $chain->{$label}[0]=$value;
521 # arguments: CHAIN_REF LABEL
523 my ($chain,$label)=@_;
524 return $chain->{$label}[0];
527 =head2 down_get_label_at_pos
529 Title : down_get_label_at_pos
530 Usage : $label = Bio::LiveSeq::Chain::down_get_label_at_pos($chain,$position,$first)
531 Function: used to retrieve the label of an an element of the chain at a particular position. It will count the position from the start of the chain or from the label $first, if $first is specified
534 Args : reference to the chain, integer, [integer]
535 Note: It works "downstream". To proceed backward use up_get_label_at_pos
539 # arguments: CHAIN_REF POSITION [FIRST]
540 # returns: LABEL of element found counting from FIRST
541 sub down_get_label_at_pos
{
542 _updown_get_label_at_pos
("down",@_);
544 sub up_get_label_at_pos
{
545 _updown_get_label_at_pos
("up",@_);
548 # arguments: [DIRECTION] CHAIN_REF POSITION [FIRST]
549 # Default DIRECTION="down"
550 # if FIRST is undefined, FIRST=START (if DIRECTION=down) or FIRST=END (up)
552 sub _updown_get_label_at_pos
{
553 my ($direction,$chain,$position,$first)=@_;
554 unless($chain) { cluck
"no chain input"; return (0); }
555 my $begin=$chain->{'begin'}; # the label of the BEGIN element
556 my $end=$chain->{'end'}; # the label of the END element
558 if ($direction eq "up") { $flow=2; unless ($first) { $first=$end; }
559 } else { $flow=1; unless ($first) { $first=$begin; } }
560 unless($chain->{$first}) { warn "not existing label $first"; return (0); }
564 while ($i < $position) {
565 $label=$chain->{$label}[$flow];
567 unless ($label) { return (0); } # chain ended before position reached
572 # for english_concerned, latin_unconcerned people
573 sub preinsert_string
{ &praeinsert_string
}
574 sub preinsert_array
{ &praeinsert_array
}
576 # praeinsert_string CHAIN_REF STRING [POSITION]
577 # the chars of STRING are passed to praeinsert_array
578 # the chars are inserted in CHAIN, before POSITION
579 # if POSITION is undef, default is to prepend the string to the beginning
580 # i.e. POSITION is START of CHAIN
581 sub praeinsert_string
{
582 my @string=split(//,$_[1]);
583 praeinsert_array
($_[0],\
@string,$_[2]);
586 # postinsert_string CHAIN_REF STRING [POSITION]
587 # the chars of STRING are passed to postinsert_array
588 # the chars are inserted in CHAIN, after POSITION
589 # if POSITION is undef, default is to append the string to the end
590 # i.e. POSITION is END of CHAIN
591 sub postinsert_string
{
592 my @string=split(//,$_[1]);
593 postinsert_array
($_[0],\
@string,$_[2]);
596 # praeinsert_array CHAIN_REF ARRAY_REF [POSITION]
597 # the elements of ARRAY are inserted in CHAIN, before POSITION
598 # if POSITION is undef, default is to prepend the elements to the beginning
599 # i.e. POSITION is START of CHAIN
600 sub praeinsert_array
{
601 _praepostinsert_array
($_[0],"prae",$_[1],$_[2]);
604 # postinsert_array CHAIN_REF ARRAY_REF [POSITION]
605 # the elements of ARRAY are inserted in CHAIN, after POSITION
606 # if POSITION is undef, default is to append the elements to the end
607 # i.e. POSITION is END of CHAIN
608 sub postinsert_array
{
609 _praepostinsert_array
($_[0],"post",$_[1],$_[2]);
613 =head2 _praepostinsert_array
615 Title : _praepostinsert_array
616 Usage : ($insbegin,$insend) = Bio::LiveSeq::Chain::_praepostinsert_array($chainref,"post",$arrayref,$position)
617 Function: the elements of the array specified by $arrayref are inserted (creating a new subchain) in the chain specified by $chainref, before or after (depending on the "prae"||"post" keyword passed as second argument) the specified position.
618 Returns : two labels: the first and the last of the inserted subchain
619 Defaults: if no position is specified, the new chain will be inserted after
620 (post) the first element of the chain
622 Args : chainref, "prae"||"post", arrayref, integer (position)
626 # returns: 0 if errors, otherwise returns references of begin and end of
628 sub _praepostinsert_array
{
630 unless($chain) { cluck
"no chain input"; return (0); }
631 my $praepost=$_[1] || "post"; # defaults to post
634 my $begin=$chain->{'begin'}; # the name of the first element of the chain
635 my $end=$chain->{'end'}; # the name of the the last element of the chain
636 # check if prae or post insertion and prepare accordingly
637 if ($praepost eq "prae") {
639 unless (($position eq 0)||($position)) { $position=$begin; } # if undef, use $begin
642 unless (($position eq 0)||($position)) { $position=$end; } # if undef, use $end
644 # check place of insertion
645 unless($chain->{$position}) { # complain if position doesn't exist
646 warn ("Warning _praepostinsert_array: not existing element $position");
650 # check if there are elements to insert
651 my $elements=$_[2]; # reference to the array containing the new elements
652 my $elements_count=scalar(@
{$elements});
653 unless ($elements_count) {
654 warn ("Warning _praepostinsert_array: no elements input"); return (0); }
656 # create new chainelements with offset=firstfree(chain)
657 my ($insertbegin,$insertend)=_create_chain_elements
($chain,$elements);
660 #print "Executing ${praepost}insertion of $elements_count elements ('@{$elements}') at position: $position\n";
662 # attach the new chain to the old chain
663 # 4 cases: prae@begin, prae@middle, post@middle, post@end
664 # NOTE: in case of double joinings always join wisely so not to
665 # delete the PREV/NEXT attribute before it is needed
668 if ($position==$begin) { # 1st case: prae@begin
669 $noerror=_join_chain_elements
($chain,$insertend,$begin);
670 $chain->{'begin'}=$insertbegin;
671 } else { # 2nd case: prae@middle
672 $noerror=_join_chain_elements
($chain,up_element
($chain,$position),$insertbegin);
673 $noerror=_join_chain_elements
($chain,$insertend,$position);
676 if ($position==$end) { # 4th case: post@end
677 $noerror=_join_chain_elements
($chain,$end,$insertbegin);
678 $chain->{'end'}=$insertend;
679 } else { # 3rd case: post@middle # note the order of joins (important)
680 $noerror=_join_chain_elements
($chain,$insertend,down_element
($chain,$position));
681 $noerror=_join_chain_elements
($chain,$position,$insertbegin);
683 } else { # this should never happen
684 die "_praepostinsert_array: Something went very wrong";
687 # check for errors and return begin,end of insertion
689 return ($insertbegin,$insertend);
690 } else { # something went wrong with the joinings
691 warn "Warning _praepostinsert_array: Joining of insertion failed";
696 # create new chain elements with offset=firstfree
697 # arguments: CHAIN_REF ARRAY_REF
698 # returns: pointers to BEGIN and END of new chained elements created
699 # returns 0 if error(s) encountered
700 sub _create_chain_elements
{
703 warn ("Warning _create_chain_elements: no chain input"); return (0); }
705 my $array_count=scalar(@
{$arrayref});
706 unless ($array_count) {
707 warn ("Warning _create_chain_elements: no elements input"); return (0); }
708 my $begin=$chain->{'firstfree'};
711 foreach $element (@
{$arrayref}) {
713 $chain->{$i}=[$element,$i+1,$i-1];
716 $chain->{'firstfree'}=$i+1; # what a new added element should be called
717 $chain->{'size'} += $end-$begin+1; # increase size of chain
718 # leave sticky edges (to be joined by whoever called this subroutine)
719 $chain->{$begin}[2]=undef;
720 $chain->{$end}[1]=undef;
721 return ($begin,$end); # return pointers to first and last of the newelements
724 # argument: CHAIN_REF ELEMENT
725 # returns: name of DOWN/NEXT element (the downstream one)
726 # returns -1 if error encountered (e.g. chain or elements undefined)
727 # returns 0 if there's no DOWN element
729 _updown_element
("down",@_);
731 # argument: CHAIN_REF ELEMENT
732 # returns: name of UP/PREV element (the upstream one)
733 # returns -1 if error encountered (e.g. chain or elements undefined)
734 # returns 0 if there's no UP element
736 _updown_element
("up",@_);
739 # used by both is_up_element and down_element
740 sub _updown_element
{
741 my $direction=$_[0] || "down"; # defaults to downstream
743 if ($direction eq "up") {
744 $flow=2; # used to determine the direction of chain navigation
746 $flow=1; # used to determine the direction of chain navigation
750 warn ("Warning ${direction}_element: no chain input"); return (-1); }
751 my $me = $_[2]; # the name of the element
752 my $it = $chain->{$me}[$flow]; # the prev||next one, upstream||downstream
754 return ($it); # return the name of prev||next element
756 return (0); # there is no prev||next element ($it is undef)
760 # used by both is_downstream and is_upstream
761 sub _is_updownstream
{
762 my $direction=$_[0] || "down"; # defaults to downstream
764 if ($direction eq "up") {
765 $flow=2; # used to determine the direction of chain navigation
767 $flow=1; # used to determine the direction of chain navigation
771 warn ("Warning is_${direction}stream: no chain input"); return (-1); }
772 my $first=$_[2]; # the name of the first element
773 my $second=$_[3]; # the name of the first element
774 if ($first==$second) {
775 warn ("Warning is_${direction}stream: first==second!!"); return (0); }
776 unless($chain->{$first}) {
777 warn ("Warning is_${direction}stream: first element not defined"); return (-1); }
778 unless($chain->{$second}) {
779 warn ("Warning is_${direction}stream: second element not defined"); return (-1); }
783 while (($label)&&(!($found))) { # searches till the end or till found
784 if ($label==$second) {
787 @array=@
{$chain->{$label}};
788 $label = $array[$flow]; # go to the prev||next one, upstream||downstream
795 Title : is_downstream
796 Usage : Bio::LiveSeq::Chain::is_downstream($chainref,$firstlabel,$secondlabel)
797 Function: checks if SECONDlabel follows FIRSTlabel
798 It runs downstream the elements of the chain from FIRST searching
800 Returns : 1 if SECOND is found /after/ FIRST; 0 otherwise (i.e. if it
801 reaches the end of the chain without having found it)
803 Args : two labels (integer)
808 _is_updownstream
("down",@_);
814 Usage : Bio::LiveSeq::Chain::is_upstream($chainref,$firstlabel,$secondlabel)
815 Function: checks if SECONDlabel follows FIRSTlabel
816 It runs upstream the elements of the chain from FIRST searching
818 Returns : 1 if SECOND is found /after/ FIRST; 0 otherwise (i.e. if it
819 reaches the end of the chain without having found it)
821 Args : two labels (integer)
826 _is_updownstream
("up",@_);
832 Usage : @errorcodes = Bio::LiveSeq::Chain::check_chain()
833 Function: a wraparound to a series of check for consistency of the chain
834 It will check for boundaries, size, backlinking and forwardlinking
835 Returns : array of 4 warn codes, each can be 1 (all ok) or 0 (something wrong)
838 Note : this is slow and through. It is not really needed. It is mostly
839 a code-developer tool.
846 warn ("Warning check_chain: no chain input"); return (-1); }
847 my ($warnbound,$warnsize,$warnbacklink,$warnforlink);
848 $warnbound=&_boundcheck
; # passes on the arguments of the subroutine
849 $warnsize=&_sizecheck
;
850 $warnbacklink=&_downlinkcheck
;
851 $warnforlink=&_uplinkcheck
;
852 return ($warnbound,$warnsize,$warnbacklink,$warnforlink);
855 # consistency check for forwardlinks walking upstream
856 # argument: a chain reference
857 # returns: 1 all OK 0 problems
859 _updownlinkcheck
("up",@_);
862 # consistency check for backlinks walking downstream
863 # argument: a chain reference
864 # returns: 1 all OK 0 problems
866 _updownlinkcheck
("down",@_);
869 # consistency check for links, common to _uplinkcheck and _downlinkcheck
870 # argument: "up"||"down", check_ref
871 # returns: 1 all OK 0 problems
872 sub _updownlinkcheck
{
873 my $direction=$_[0] || "down"; # defaults to downstream
877 warn ("Warning _${direction}linkcheck: no chain input"); return (0); }
878 my $begin=$chain->{'begin'}; # the name of the first element
879 my $end=$chain->{'end'}; # the name of the last element
880 my ($label,@array,$me,$it,$itpoints);
881 if ($direction eq "up") {
882 $flow=2; # used to determine the direction of chain navigation
884 $label=$end; # start from end
886 $flow=1; # used to determine the direction of chain navigation
888 $label=$begin; # start from beginning
892 while ($label) { # proceed with linked elements, checking neighbours
894 @array=@
{$chain->{$label}};
895 $label = $array[$flow]; # go to the next one
897 if ($it) { # no sense in checking if next one not defined (END element)
898 @array=@
{$chain->{$label}};
899 $itpoints=$array[$wolf];
900 unless ($me==$itpoints) {
901 warn "Warning: ${direction}LinkCheck: LINK wrong in $it, that doesn't point back to me ($me). It points to $itpoints\n";
909 # consistency check for size of chain
910 # argument: a chain reference
911 # returns: 1 all OK 0 wrong size
915 warn ("Warning _sizecheck: no chain input"); return (0); }
916 my $begin=$chain->{'begin'}; # the name of the first element
919 my $size=$chain->{'size'};
922 while ($label) { # proceed with linked elements, counting
923 @array=@
{$chain->{$label}};
924 $label = $array[1]; # go to the next one
927 if ($size != $count) {
928 warn "Size check reports error: assumed size: $size, real size: $count ";
935 # consistency check for begin and end (boundaries)
936 # argument: a chain reference
937 # returns: 1 all OK 0 problems
941 warn ("Warning _boundcheck: no chain input"); return (0); }
942 my $begin=$chain->{'begin'}; # the name of the first element
943 my $end=$chain->{'end'}; # the name of the (supposedly) last element
946 # check SYNC of beginning
947 if (($begin)&&($chain->{$begin})) { # if the BEGIN points to existing element
948 if ($chain->{$begin}[2]) { # if BEGIN element has PREV not undef
949 warn "Warning: BEGIN element has PREV field defined \n";
950 warn "\tWDEBUG begin: $begin\t";
951 warn "\tWDEBUG begin's PREV: $chain->{$begin}[2] \n";
955 warn "Warning: BEGIN key of chain does not point to existing element!\n";
956 warn "\tWDEBUG begin: $begin\n";
960 if (($end)&&($chain->{$end})) { # if the END points to an existing element
961 if ($chain->{$end}[1]) { # if END element has NEXT not undef
962 warn "Warning: END element has NEXT field defined \n";
963 warn "\tWDEBUG end: $end\t";
964 warn "\tWDEBUG end's NEXT: $chain->{$end}[1] \n";
968 warn "Warning: END key of chain does not point to existing element!\n";
969 warn "\tWDEBUG end: $end\n";
975 # arguments: chain_ref
976 # returns: the size of the chain (the number of elements)
977 # return code -1: unexistant chain, errors...
981 warn ("Warning chain_length: no chain input"); return (-1); }
982 my $size=$chain->{'size'};
990 # arguments: chain ref, first element name, second element name
991 # returns: 1 or 0 (1 ok, 0 errors)
992 sub _join_chain_elements
{
995 warn ("Warning _join_chain_elements: no chain input"); return (0); }
998 unless(($leftelem)&&($rightelem)) {
999 warn ("Warning _join_chain_elements: element arguments??"); return (0); }
1000 if (($chain->{$leftelem})&&($chain->{$rightelem})) { # if the elements exist
1001 $chain->{$leftelem}[1]=$rightelem;
1002 $chain->{$rightelem}[2]=$leftelem;
1005 warn ("Warning _join_chain_elements: elements not defined");
1012 Title : splice_chain
1013 Usage : @errorcodes = Bio::LiveSeq::Chain::splice_chain($chainref,$first,$length,$last)
1014 Function: removes the elements designated by FIRST and LENGTH from a chain.
1015 The chain shrinks accordingly. If LENGTH is omitted, removes
1016 everything from FIRST onward.
1017 If END is specified, LENGTH is ignored and instead the removal
1018 occurs from FIRST to LAST.
1019 Returns : the elements removed as a string
1021 Args : chainref, integer, integer, integer
1028 warn ("Warning splice_chain: no chain input"); return (-1); }
1029 my $begin=$chain->{'begin'}; # the name of the first element
1030 my $end=$chain->{'end'}; # the name of the (supposedly) last element
1032 unless (($first eq 0)||($first)) { $first=$begin; } # if undef, use $begin
1035 my (@array, $string);
1036 my ($beforecut,$aftercut);
1038 unless($chain->{$first}) {
1039 warn ("Warning splice_chain: first element not defined"); return (-1); }
1040 if ($last) { # if last is defined, it gets priority and len is not used
1041 unless($chain->{$last}) {
1042 warn ("Warning splice_chain: last element not defined"); return (-1); }
1044 warn ("Warning splice_chain: argument LAST:$last overriding LEN:$len!");
1048 $last=$end; # if last not defined, go 'till end (or to len, whichever 1st)
1051 $beforecut=$chain->{$first}[2]; # what's the element before 1st deleted?
1052 # if it is undef then it means we are splicing since the beginning
1056 my $afterlast=$chain->{$last}[1]; # if $last=$end $afterlast should be undef
1057 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
1059 # proceed for len elements or until the end, whichever comes first
1060 # if len undef goes till last
1061 while (($label)&&($label != $afterlast) && ($i <= ($len || $i + 1))) {
1062 @array=@
{$chain->{$label}};
1063 $string .= $array[0];
1064 $aftercut = $array[1]; # what's the element next last deleted?
1065 # also used as savevar to change label posdeletion
1066 delete $chain->{$label}; # this can be deleted now
1067 $label=$aftercut; # label is updated using the savevar
1071 # Now fix the chain (sticky edges, fields)
1072 # 4 cases: cut in the middle, cut from beginning, cut till end, cut all
1073 #print "\n\tstickyDEBUG beforecut: $beforecut "; # DEBUG
1074 #print "\taftercut: $aftercut \n"; # DEBUG
1076 if ($aftercut) { # 1st case, middle cut
1077 _join_chain_elements
($chain,$beforecut,$aftercut);
1078 } else { # 3rd case, end cut
1079 $chain->{'end'}=$beforecut; # update the END field
1080 $chain->{$beforecut}[1]=undef; # since we cut till the end
1083 if ($aftercut) { # 2nd case, begin cut
1084 $chain->{'begin'}=$aftercut; # update the BEGIN field
1085 $chain->{$aftercut}[2]=undef; # since we cut from beginning
1086 } else { # 4th case, all has been cut
1087 $chain->{'begin'}=undef;
1088 $chain->{'end'}=undef;
1091 $chain->{'size'}=($chain->{'size'}) - $i + 1; # update the SIZE field
1097 # arguments: CHAIN_REF POSITION [FIRST]
1098 # returns: element counting POSITION from FIRST or from START if FIRST undef
1099 # i.e. returns the element at POSITION counting from FIRST
1100 #sub element_at_pos {
1101 #croak "Warning: old method name. Please update code to 'down_get_label_at_position'\n";
1102 ##&down_element_at_pos;
1104 #sub up_element_at_pos {
1106 ##my @array=up_chain2string($_[0],$_[2],$_[1],undef,"elements");
1107 ##return $array[-1];
1108 #croak "old method name. Update code to: up_get_label_at_position";
1109 ##&up_get_label_at_pos;
1111 #sub down_element_at_pos {
1113 ##my @array=down_chain2string($_[0],$_[2],$_[1],undef,"elements");
1114 ##return $array[-1];
1115 #croak "old method name. Update code to: down_get_label_at_position";
1116 ##&down_get_label_at_pos;
1119 # arguments: CHAIN_REF ELEMENT [FIRST]
1120 # returns: the position of ELEMENT counting from FIRST or from START
1121 #i if FIRST is undef
1122 # i.e. returns the Number of elements between FIRST and ELEMENT
1123 # i.e. returns the position of element taking FIRST as 1 of coordinate system
1124 #sub pos_of_element {
1125 #croak ("Warning: old and ambiguous method name. Please update code to 'down_get_pos_of_label'\n");
1126 ##&down_pos_of_element;
1128 #sub up_pos_of_element {
1129 #croak ("Warning: old method name. Please update code to 'up_get_pos_of_label'\n");
1130 ##up_chain2string($_[0],$_[2],undef,$_[1],"counting");
1132 #sub down_pos_of_element {
1133 #croak ("Warning: old method name. Please update code to 'down_get_pos_of_label'\n");
1134 ##down_chain2string($_[0],$_[2],undef,$_[1],"counting");
1137 # wraparounds to calculate length of subchain from first to last
1138 # arguments: chain_ref [first] [last]
1139 #sub subchain_length {
1140 #croak "Warning: old method name. Please update code to 'down_subchain_length'\n";
1141 ##&down_subchain_length;
1144 # wraparounds to have elements output
1145 # same arguments as chain2string
1146 # returns label|name of every element
1148 #croak ("Warning: method no more supported. Please update code to 'down_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
1152 #croak ("Warning: method no more supported. Please update code to 'up_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
1153 ##up_chain2string($_[0],$_[1],$_[2],$_[3],"elements");
1155 #sub down_elements {
1156 #croak ("Warning: method no more supported. Please update code to 'down_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
1157 ##down_chain2string($_[0],$_[1],$_[2],$_[3],"elements");
1160 # wraparounds to have verbose output
1161 # same arguments as chain2string
1162 # returns the chain in a very verbose way
1163 sub chain2string_verbose
{
1164 carp
"Warning: method no more supported.\n";
1165 &old_down_chain2string_verbose
;
1167 sub up_chain2string_verbose
{
1168 carp
"Warning: method no more supported.\n";
1169 old_up_chain2string
($_[0],$_[1],$_[2],$_[3],"verbose");
1171 sub down_chain2string_verbose
{
1172 carp
"Warning: method no more supported.\n";
1173 old_down_chain2string
($_[0],$_[1],$_[2],$_[3],"verbose");
1177 #croak ("Warning: old method name. Please update code to 'down_chain2string'\n");
1178 ##&down_chain2string;
1180 sub old_up_chain2string
{
1181 old_updown_chain2string
("up",@_);
1183 sub old_down_chain2string
{
1184 old_updown_chain2string
("down",@_);
1187 # common to up_chain2string and down_chain2string
1188 # arguments: "up"||"down" chain_ref [first] [len] [last] [option]
1189 # [option] can be any of "verbose", "counting", "elements"
1191 # defaults: start = first element; if len undef, goes to last
1192 # if last undef, goes to end
1193 # if last def it overrides len (that gets undef)
1195 # example usage: down_chain2string($chain) -> all the chain from begin to end
1196 # example usage: down_chain2string($chain,6) -> from 6 to the end
1197 # example usage: down_chain2string($chain,6,4) -> from 6, going on 4 elements
1198 # example usage: down_chain2string($chain,6,"",10) -> from 6 to 10
1199 # example usage: up_chain2string($chain,10,"",6) -> from 10 to 6 upstream
1200 sub old_updown_chain2string
{
1201 my ($direction,$chain,$first,$len,$last,$option)=@_;
1203 warn ("Warning chain2string: no chain input"); return (-1); }
1204 my $begin=$chain->{'begin'}; # the name of the BEGIN element
1205 my $end=$chain->{'end'}; # the name of the END element
1207 if ($direction eq "up") {
1208 $flow=2; # used to determine the direction of chain navigation
1209 unless ($first) { $first=$end; } # if undef or 0, use $end
1210 } else { # defaults to "down"
1211 $flow=1; # used to determine the direction of chain navigation
1212 unless ($first) { $first=$begin; } # if undef or 0, use $begin
1215 unless($chain->{$first}) {
1216 warn ("Warning chain2string: first element not defined"); return (-1); }
1217 if ($last) { # if last is defined, it gets priority and len is not used
1218 unless($chain->{$last}) {
1219 warn ("Warning chain2string: last element not defined"); return (-1); }
1221 warn ("Warning chain2string: argument LAST:$last overriding LEN:$len!");
1225 if ($direction eq "up") {
1226 $last=$begin; # if last not defined, go 'till begin (or upto len elements)
1228 $last=$end; # if last not defined, go 'till end (or upto len elements)
1231 my (@array, $string, $count);
1232 # call for verbosity (by way of chain2string_verbose);
1233 my $verbose=0; my $elements=0; my @elements; my $counting=0;
1234 if ($option) { # keep strict happy
1235 if ($option eq "verbose") { $verbose=1; }
1236 if ($option eq "elements") { $elements=1; }
1237 if ($option eq "counting") { $counting=1; }
1241 print "BEGIN=$begin"; print " END=$end"; print " SIZE=$chain->{'size'}";
1242 print " FIRSTFREE=$chain->{'firstfree'} \n";
1247 my $afterlast=$chain->{$last}[$flow]; # if $last=$end $afterlast should be undef
1248 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
1250 # proceed for len elements or until last, whichever comes first
1251 # if $len undef goes till end
1252 while (($label)&&($label != $afterlast) && ($i <= ($len || $i + 1))) {
1253 @array=@
{$chain->{$label}};
1255 $string .= "$array[2]_${label}_$array[1]=$array[0] ";
1257 } elsif ($elements) {
1258 push (@elements,$label); # returning element names/references/identifiers
1259 } elsif ($counting) {
1262 $string .= $array[0]; # returning element content
1264 $label = $array[$flow]; # go to next||prev i.e. downstream||upstream
1267 #DEBUG#print "len: $len, first: $first, last: $last, afterlast=$afterlast \n";
1268 if ($verbose) { print "TOTALprinted: $count\n"; }
1271 } elsif ($elements) {
1279 # --------> deleted, no more supported <--------
1280 # creation of a single linked list/chain from a string
1281 # basically could be recreated by taking the *2chain methods and
1282 # omitting to set the 3rd field (label 2) containing the back links
1285 # creation of a double linked list/chain from a string
1286 # returns reference to a hash containing the chain
1287 # arguments: STRING [OFFSET]
1288 # defaults: OFFSET defaults to 1 if undef
1289 # the chain will contain as elements the single characters in the string
1291 my @string=split(//,$_[0]);
1292 array2chain
(\
@string,$_[1]);
1298 Usage : $chainref = Bio::LiveSeq::Chain::array2chain($arrayref,$offset)
1299 Function: creation of a double linked chain from an array
1300 Returns : reference to a hash containing the chain
1301 Defaults: OFFSET defaults to 1 if undef
1303 Args : a reference to an array containing the elements to be chainlinked
1304 an optional integer > 0 (this will be the starting count for
1305 the chain labels instead than having them begin from "1")
1311 my $array_count=scalar(@
{$arrayref});
1312 unless ($array_count) {
1313 warn ("Warning array2chain: no elements input"); return (0); }
1315 if (defined $begin) {
1317 warn "Warning array2chain: Zero or Negative offsets not allowed"; return (0); }
1321 my ($element,%hash);
1322 $hash{'begin'}=$begin;
1324 foreach $element (@
{$arrayref}) {
1326 # hash with keys begin..end pointing to the arrays
1327 $hash{$i}=[$element,$i+1,$i-1];
1331 $hash{firstfree
}=$i+1; # what a new added element should be called
1332 $hash{size
}=$end-$begin+1; # how many elements in the chain
1334 # eliminate pointers to unexisting elements
1335 $hash{$begin}[2]=undef;
1336 $hash{$end}[1]=undef;