fixed recursive_children cvterm function, and added tests for parents and children
[cxgn-corelibs.git] / lib / CXGN / Alignment.pm
blob183415e101fbdcb7e6171d3488676d2f44e15a95
1 #!/usr/bin/perl -w
2 #################################General Description#########################################
4 =head1 Name and Description
6 Alignment -- packages for analyzing, optimizing and displaying sequence alignments
8 =cut
10 =head1 Author
12 Chenwei Lin (cl295@cornell.edu)
14 =cut
16 =head1 Packages
18 align, image_object, align_seq, ruler, chart
20 Packages align_seq, ruler and chart inherit from image_object
22 =cut
24 ##############################End of General Description######################################
28 use strict;
29 use GD;
30 #use GD::Image;
31 use File::Temp;
36 ################################Package align###############################################
37 =head1 Package CXGN::Alignment::align
39 The basic element of the align object is an array of align_seq.
41 Its attributes include: align_name, width (pixel), height(pixel), image, align_seqs, ruler, chart, conserved_seq, sv_overlap, sv_identtity, start_value and end_value
43 It functionality includes:
45 1. Image display.
47 2. Calculation and output of pairwise similaity and putative splice variant pairs based on similarity.
49 3. Hide some alignment sequences so that they are not included in the analysis.
51 4. Select a range of sequences to be analyzed.
53 5. Asses how a align_seq member overlap with other align_seq members.
55 6. Calculate the non-gap mid point of each align sequence and group the sequences according to their overlap.
58 =cut
60 package CXGN::Alignment::align;
62 =head2 Constructer new()
64 =cut
66 =head3
68 Synopsis: my $align = CXGN::Alignment::align->new(
69 align_name=>$name,
70 width=>$width,
71 height=>$height,
72 type=>$type, #'nt' or 'pep'
75 Description: Upon constructing a align object, it sets align_name, width and height using arguments. It also genertes a image object of {length} and {height} and sets the default value of sv_overlap, sv_idnentity (the minimum number overlapping aa and percentage identity for two align_seq to be considered as putattive splice variant), sv_indel_limit (the min aa indel length for two sequences to be condiered as splice variants, instead of allele) and start_value (the start value of the ruler and align_seq objects(s))
77 Returns: an CXGN::Alignment::align object
79 =cut
81 sub new {
82 my $class = shift;
83 my %args = @_;
84 my $self = bless {}, $class;
86 ##################set attibutes from parameter
87 $self->{align_name} = $args{align_name};
88 $self->{width} = $args{width};
89 $self->{height} = $args{height};
90 $self->{type} = $args{type};
92 ##################set defaults
93 #splice variants criteria
94 $self->{sv_overlap} = 20; #set for amino acid
95 $self->{sv_identity} = 95;
96 $self->{indel_limit} = 4; #set for amono acid
97 $self->{start_value} = 1; #the end_value is initiated when the first align_seq is added
99 #image offsets
100 $self->{horizontal_offset} = 30;
101 $self->{vertical_offset} = 30;
103 ##################define some 'empty' attributes that will be asigned later
104 @{$self->{align_seqs}} = ();
105 $self->{ruler} = ();
106 $self->{chart} = ();
107 $self->{conserved_seq} = ();
108 $self->{seq_length} = ();
109 $self->{image} = ();
111 return $self;
114 =head2 Setters and getters
116 get_align_name(), set_align_name()
118 get_image(), set_image()
120 get_width(), set_width()
122 get_height(), set_height()
124 get_seq_length()
126 get_sv_criteria(), set_sv_criteria()
128 get_start_value(), set_start_value(), check_start_value()
130 get_end_value(), set_end_value(), check_end_value()
132 get_horizontal_offset(), set_horizontal_offset(), get_vertical_offset(), set_vertical_offset()
134 Those for align_name, image, sv_overlap and sv_identity, height, width, horizontal_offset, vertical_offset are straightforward, while the setters for start_value, end_value are not simple, since these attributes are related to and/or restricted by other attibutes. seq_length is determined by the members of @align_seqs therefore can not be reset.
136 =cut
138 sub get_align_name {
139 my $self= shift;
140 return $self->{align_name};
143 sub set_align_name {
144 my $self = shift;
145 my $name = shift;
146 $self->{align_name} = $name;
149 sub get_image {
150 my $self = shift;
151 return $self->{image};
154 sub set_image {
155 my $self = shift;
156 my $image = new GD::Image($self->{width}, $self->{height});#generate a new image object using the current width and height attributes of the align object
157 $self->{image} = $image;
160 sub get_width {
161 my $self = shift;
162 return $self->{width};
165 sub get_height {
166 my $self = shift;
167 return $self->{height};
170 =head3 set_width(), set_height()
172 Synopsis: set_width($x), set_height($x)
174 Description: sets the attributes {width} and {height}. Since the {iamge} attribute's width and height is related to {width} and {height}, when {width} and {height} are set, a {iamge} is re-generated. Otherwise, setting the {width} and {height} won't have any real effect.
176 Returns:
178 =cut
180 sub set_width {
181 my $self = shift;
182 $self->{width} = shift;
183 #$self->set_image();
186 sub set_height {
187 my $self = shift;
188 $self->{height} = shift;
189 #$self->set_image;
192 =head3 set_sv_criteria()
194 Synopsis: $align->set_sv_criteria($x, $y, $z), while $x is minimum oberlap, $y is a percentage similarity and $z is the minimal amino acid indel length to be considered as splice variant
196 Description: set the putative splice variants standard, the minimum overlapping bases and percentage identity (sv_overlap and sv_identity). The sub checks if the values are correct before setting the arributes
198 Returns:
200 =cut
202 sub set_sv_criteria {
203 my $self = shift;
204 my ($overlap, $identity, $indel) = @_;
206 if ($overlap <= 0) {
207 die "Overlap must be greater than 0!\n";
209 elsif (($identity < 0) || ($identity > 100)){
210 die "Percentage identity must be greater than 0 and less than 100\n";
212 elsif ($indel < 0) {
213 die "Indel limit must be greater than 0!\n";
215 else {
216 $self->{sv_overlap} = $overlap;
217 $self->{sv_identity} = $identity;
218 $self->{indel_limit} = $indel;
222 sub get_sv_criteria {
223 my $self = shift;
224 return $self->{sv_overlap}, $self->{sv_identity}, $self->{sv_indel_limit};
227 sub set_horizontal_offset {
228 my $self = shift;
229 $self->{horizontal_offset} = shift;
232 sub get_horizontal_offset {
233 my $self = shift;
234 return $self->{horizontal_offset};
237 sub set_vertical_offset {
238 my $self = shift;
239 $self->{vertical_offset} = shift;
242 sub get_vertical_offset {
243 my $self = shift;
244 return $self->{vertical_offset};
247 sub get_start_value {
248 my $self = shift;
249 return $self->{start_value};
252 sub get_end_value {
253 my $self = shift;
254 return $self->{end_value};
257 sub check_start_value {
258 my $self = shift;
259 my $value = shift;
260 if ($value < 0){
261 $value = 0;
262 print "$self->{id} start_value less than 0, reset to 0\n";
264 return $value;
267 sub check_end_value {
268 my $self = shift;
269 my $value = shift;
270 if ($value > length $self->{align_seqs}[0]->{seq}){
271 $value = length $self->{align_seqs}[0];
272 print "value greater than sequence length, reset to sequence length\n";
274 return $value;
277 sub get_seq_length {
278 my $self = shift;
279 return $self->{seq_length};
282 =head3 set_start_value(), set_end_value()
284 Synopsis: set_start_value($x), set_end_value($x)
286 Description: set the start_value and end_value. Check if the input value is correct before setting the attributes. Since the {start_value} and {end_value} attributes of the {ruler} and {align_seqs} members must be the same as those of the align object, the set subs call the set_start_value and set_end_value of the {ruler} and all members of {align_seqs}
288 Returns:
290 =cut
292 sub set_start_value {
293 my $self = shift;
294 my $value = shift;
295 $value = $self->check_start_value($value);#check if the start value is correct
296 $self->{start_value} = $value; #set the start_value of the included ruler
298 foreach (@{$self->{align_seqs}}) { #set the start_value of each included align_seq
299 $_->set_start_value($value);
303 sub set_end_value {
304 my $self = shift;
305 my $value = shift;
306 #$value = $self->check_end_value($value);#check if the end value is correct
307 $self->{end_value} = $value;
309 foreach (@{$self->{align_seqs}}) { #set the start_value of each included align_seq
310 $_->set_end_value($value);
315 =head2 Subroutines to add attributes to align object
317 add_align_seq(), _add_ruler(), _add_cvg_chart, _add_conserved_seq_obj()
319 =cut
321 =head3 add_align_seq()
323 Synopsis: $align->add_align_seq($align_seq), while align_seq is an align_seq object
325 Description: Add align_seq to align object. The align_seq objects are stored in an array align_seqs. Once the first align_seq object is added, nly align_seq object of the same length can be further added. At the same time, the end_value is set to be the same as the sequence length of the first align_seqs member added.
327 Returns:
329 =cut
331 sub add_align_seq {
332 my $self = shift;
333 my $member = shift;
334 if (!defined @{$self->{align_seqs}}){
336 #if there is no members in @align_seqs, reset the end_value of overall alignment to the length of this sequence
337 $self->set_end_value($member->get_end_value());
338 $self->{seq_length} = length ($member->get_select_seq());
340 #adjust the vertical and horizontal offsets and length
341 $member->set_horizontal_offset($self->{horizontal_offset});
342 #$member->set_vertical_offset($self->{vertical_offset} + 50);
343 $member->set_length($self->{width} - 250);
344 #add the align_seq to @align_seqs
345 push @{$self->{align_seqs}}, $member;
347 else {
349 #the length of the align_seq must the the same as the first member in @align_seqs, otherwise it won't be added
351 if ( length($member->get_seq()) == $self->{seq_length}){
353 #set the start_value and end_value is set to the current values of overall align and adjust the image length of sequence according to the width of overall alignment
354 $member->set_start_value($self->get_start_value());
355 $member->set_end_value($self->get_end_value());
356 $member->set_length($self->{width} - 250);
358 #adjust the vertical and horizontal offsets
359 my $last_member = int @{$self->{align_seqs}} - 1;
360 my $last_v_offset = $self->{align_seqs}[$last_member]->get_vertical_offset();
361 $member->set_horizontal_offset($self->{horizontal_offset});
362 #$member->set_vertical_offset($last_v_offset + 15);
364 push @{$self->{align_seqs}}, $member;
366 else {
367 my $id = $member->get_id();
369 return;
370 # die "error in adding sequence $id, length not the same as overall alignment, skip\n";
375 =head3 _add_ruler()
377 Synopsis: $align->_add_ruler($x,$y), while $x is the vertical offset and $y is the height of the ruler.
379 Description: Add a ruler to the align object, the start_value and end_value are set to the same as those of the align. If no align_seq has been added to the align object, the seq_length, start_value and end_value of the align are not set (see sub add_align_seq), then a ruler can not be added.
381 Returns:
383 =cut
385 sub _add_ruler {
386 my $self = shift;
387 my $v_offset = shift;
388 my $hi = shift;
390 my $ruler = CXGN::Alignment::ruler->new (
391 horizontal_offset=>$self->{horizontal_offset},
392 vertical_offset=>$v_offset,
393 length=>($self->{width} - 250),
394 height=>$hi,
395 start_value=>$self->{start_value},
396 end_value=>$self->{end_value}
398 ($self->{type} eq 'pep') and ($ruler->set_unit('aa'));
400 $self->{ruler} = $ruler;
403 =head3 _add_cvg_chart()
405 Synopsis: $align->_add_ruler($x,$y,$z), while $x is the vertical offset, $y is the id and $z is a hash reference whose key is a interger (a position) and value is a percentage
407 Description: Add a chart representing coverage by member align_seq. The start_value and end_value are set to the same as those of the align. The coverage of each align postion is repreesnted by a hash reference passed to the subroutine. The key of the hash is the align postion and the values are percentage converage.
409 Returns:
411 =cut
414 sub _add_cvg_chart {
415 my $self = shift;
416 my $v_offset = shift; #vertical offset
417 my $id = shift;
418 my $hash_ref = shift;
420 my $chart = CXGN::Alignment::chart->new (
421 horizontal_offset=>$self->{horizontal_offset},
422 vertical_offset=>$v_offset,
423 length=>($self->{width} - 250),
424 height=>50,
425 start_value=>$self->{start_value},
426 end_value=>$self->{end_value},
427 id=>$id,
428 hash_ref=>$hash_ref
430 $self->{chart} = $chart;
434 =head3 _add_conserved_seq_obj()
436 Synopsis: $align->_add_conserved_seq_obj($x), while $x is the vertical offset.
438 Description: Add a align_seq object representing the conserved sequence of the @align_seqs. The seq of this object is generated by another subroutine get_conserved_seq. If the sequence at a position is not conserved among all present members, it is repreesnted by - in conserved_seq. This object is NOT a member of @align_seqs.
440 Returns:
442 =cut
444 sub _add_conserved_seq_obj {
445 my $self = shift;
446 my $v_offset = shift;
448 my $seq = $self->get_conserved_seq();
449 my $seq_obj = CXGN::Alignment::align_seq->new (
450 horizontal_offset=>$self->{horizontal_offset},
451 vertical_offset=>$v_offset,
452 length=>($self->{width} - 250),
453 height=>15,
454 start_value=>$self->{start_value},
455 end_value=>$self->{end_value},
456 id=>'Overall Conserved Sequence',
457 seq=>$seq,
458 species=>' ',
460 $seq_obj->set_color(0,0,122);
461 $self->{conserved_seq} = $seq_obj;
464 =head2 Subroutines to search and ouput ids of @align_seq members
466 is_id_member(), is_member(), id_to_member(), get_member_ids(), get_nonhidden_member_ids, get_hidden_member_ids(), get_member_species(), get_member_urls()
468 =cut
470 =head3 is_id_member()
472 Synopsis: is_id_member($id)
474 Description: Does any of the align_seqs member have the same id as $id?
476 Returns: 0 for true and -1 for false
478 =cut
480 sub is_id_member {
481 my $self = shift;
482 my $id = shift;
483 foreach (@{$self->{align_seqs}}) {
484 if (($_->{id}) eq $id) {
485 return 0;
486 exit;
489 return -1;
492 =head3 is_member()
494 Synopsis: is_member($align_seq), while $align_seq is an align_seq object
496 Description: is $align_seq already a align_seqs member?
498 Returns: 0 for true and -1 for false
500 =cut
502 sub is_member {
503 my $self = shift;
504 my $member = shift;
505 foreach (@{$self->{align_seqs}}) {
506 if ($member == $_) {
507 return 0;
508 exit;
511 return -1;
513 =head3 id_to_member()
515 Synopsis: $align->id_to_member($id);
517 Description: check if a align member has the id $id and return the align member
519 Returns: an align object
521 =cut
523 sub id_to_member {
524 my $self = shift;
525 my $id = shift;
526 foreach (@{$self->{align_seqs}}) {
527 if (($_->{id}) eq $id) {
528 return $_;
529 exit;
534 =head3 get_member_ids()
536 Synopsis: $align->get_member_ids()
538 Description: Returns ids of all align_seqs members
540 Returns: an array of ids of all non-hidden @align_seq members
542 =cut
544 sub get_member_ids {
545 my $self = shift;
547 my @members = ();
548 foreach (@{$self->{align_seqs}}){
549 my $id = $_->get_id();
550 push @members, $id;
552 return \@members;
555 sub get_member_nr {
556 my $self = shift;
558 my $number = int (@{$self->{align_seqs}});
559 return $number;
563 =head3 get_nonhidden_member_ids()
565 Synopsis: $align->get_nonhidden_member_ids()
567 Description: Returns ids of align_seqs members that are not hidden
569 Returns: an array of ids of all non-hidden @align_seq members
571 =cut
573 sub get_nonhidden_member_ids {
574 my $self = shift;
576 my @members = ();
577 foreach (@{$self->{align_seqs}}){
578 if ($_->is_hidden() ne 'yes') {
579 my $id = $_->get_id();
580 push @members, $id;
583 return \@members;
586 sub get_nonhidden_member_nr {
587 my $self = shift;
588 my $number = 0;
589 foreach (@{$self->{align_seqs}}){
590 ($_->is_hidden() ne 'yes') and $number++;
592 return $number;
595 =head3 get_hidden_member_ids()
597 Synopsis: $align->get_hidden_member_ids()
599 Description: Returns ids of align_seqs members that are hidden
601 Returns: an array of ids of all hidden @align_seq members
603 =cut
606 sub get_hidden_member_ids {
607 my $self = shift;
609 my @members = ();
610 foreach (@{$self->{align_seqs}}){
611 if ($_->is_hidden() eq 'yes') {
612 my $id = $_->get_id();
613 push @members, $id;
616 return \@members;
619 sub get_hidden_member_nr {
620 my $self = shift;
622 my $number = 0;
623 foreach (@{$self->{align_seqs}}){
624 ($_->is_hidden() eq 'yes') and $number++;
627 return $number;
630 =head3 get_member_species()
632 Synopsis: $align->get_member_species()
634 Description: Return the species of each member of @align_seqs
636 Returns: A hash reference whose keys are ids and values are species
638 =cut
641 sub get_member_species {
642 my $self = shift;
644 my %member_species = ();
645 foreach (@{$self->{align_seqs}}){
646 my $id = $_->get_id;
647 my $species = $_->get_species();
648 $member_species{$id} = $species;
650 return \%member_species;
653 sub get_member_urls {
654 my $self = shift;
656 my %member_url = ();
657 foreach (@{$self->{align_seqs}}) {
658 my $id = $_->get_id;
659 my $url = $_ ->get_url();
660 $member_url{$id} = $url;
663 return \%member_url;
666 =head2 Image processing subs of the package
668 render(), render_png(), render_jpg(), render_png_file(), render_jpg_file(), write_image_map()
670 =cut
672 =head3 render()
674 Synopsis: $align->render($o) where $o represnts option, 'c' for complete, 'a' for alignment only and 's' for simple (only the ruler, coverage chart and conserved sequence, no individual members of @align_seqs)
676 Description: it does the following
677 1. Generage, set and render a ruler
678 2. Generate, set and render a chart representing coverge
679 3. Generate, set and render a align_seq object representing conserved sequence
680 4. Render all non-hidden members of the @aign_seqs
683 Returns:
685 =cut
687 sub render {
688 my $self = shift;
689 my $option = shift;#'c' for complete, 's' for simple, only chart and conserved seq, 'a' for alignment oly
691 #check the option
692 ($option eq 'a' || $option eq 'c' || $option eq 's') or die "Please enter correct option! 'a' for alignment only, 'c' for complate and 's' for chart and conserved seq only!\n";
694 #adjust the image height according to @align_seqs
695 if ($option eq 'c' || $option eq 'a') {
696 my $nr_member = $self->get_nonhidden_member_nr;
698 ($nr_member == 0) and exit "There is no sequence in align!\n";
699 ($option eq 'c') and $self->set_height($nr_member * 15 + 200);
700 ($option eq 'a') and $self->set_height($nr_member * 15 + 100);
702 else {
703 $self->set_height(150);
706 #Generate a image object for the image_object (align_seq, ruler and chart) to render
707 $self->{image} = GD::Image->new(
708 $self->{width},
709 $self->{height})
710 or die "Can't generate imag\n";
712 # the first color located is the background color, white
713 $self->{white} = $self->{image}->colorResolve(255,255,255);
715 $self->{image}->filledRectangle(0,0 ,$self->{width}, $self->{height}, $self->{white});
717 #add and render a ruler
718 $self->_add_ruler(50, $self->{height} - 20);
720 $self->{ruler}->render($self->{image});
722 #add and render a chart to indicate percentage coverage
723 if ($option eq 'c' || $option eq 's') {
724 my $hash_ref = $self->get_ngap_pct();
725 $self->_add_cvg_chart(70, "Coverage %", $hash_ref);
726 $self->{chart}->render($self->{image});
730 #add a sequence represnting the conserved region and render it
731 if ($option eq 'c' || $option eq 's') {
732 $self->_add_conserved_seq_obj(120);
733 $self->{conserved_seq}->render($self->{image});
736 #adjust vertical offset and height of each non hidden align_seqs member and render the member
737 if ($option eq 'c' ||$option eq 'a') {
738 my $align_v_offset;
739 ($option eq 'c') and $align_v_offset = 170;
740 ($option eq 'a') and $align_v_offset = 70;
741 foreach my $as (@{$self->{align_seqs}}) {
742 if ($as->is_hidden() ne 'yes') {
743 $as->set_vertical_offset($align_v_offset);
744 $as->set_height(15);
745 $as->render($self->{image});
746 $align_v_offset += 15;
753 =head3 render_png(), render_jpg()
755 Synopsis: $align->render_jpg(), $align->render_png
757 Description: Render itself and convert print out png or jpg
759 Returns:
761 =cut
763 sub render_separate_png_files {
764 my $self = shift;
765 my $background_filepath = shift;
767 $self->{display_type} = "separate";
768 $self->render();
770 open(WF, ">", $background_filepath);
771 print WF $self->{image}->png();
772 close WF;
774 my $mfp = $background_filepath;
775 $mfp =~ s/\.[^\.]+$//;
777 my @member_imgs = ();
778 foreach(@{$self->{members}}){
779 $_->set_left_margin($self->get_left_margin());
780 $_->set_top_margin($_->get_height()/2);
782 my $id = $_->get_id();
783 next unless $id;
785 my $img_path = $mfp . "." . md5_hex($id) . ".png";
786 push(@member_imgs, $img_path);
787 open(WF, ">$img_path" );
788 my $w = $_->get_width()
789 + $self->get_label_gap()
790 + $self->get_right_margin();
791 + $self->get_left_margin();
793 my $h = $_->get_height();
795 my $image = GD::Image->new($w, $h)
796 or die "Can't generate image\n";
797 my $white = $image->colorAllocate(255,255,255);
798 $image->filledRectangle(0,0,$w,$h,$white);
799 $image->transparent($white);
801 $_->render($image);
802 print WF $image->png();
803 close WF;
805 return @member_imgs;
808 sub render_png {
809 my $self = shift;
810 my $option = shift;
812 $self->render($option);
813 print $self->{image}->png();
816 sub render_jpg {
817 my $self = shift;
818 my $option = shift;
820 $self->render($option);
821 print $self->{image}->jpeg();
824 =head3 render_png_file(), render_jpg_file()
826 SYnopsis: $align->render_png_file($file_name, $option), $align->render_jpg_file($file_name, $option)
828 Description: take a filename as arguments, render itself and output pgn or jpg image to the file.
830 Returns:
832 =cut
834 sub render_png_file {
835 my $self = shift;
836 my $filename = shift;
837 my $option = shift;
839 $self->render($option);
840 open (F, ">$filename") || die "Can't open $filename for writing!!! Check write permission in dest directory.";
841 print F $self->{image}->png();
842 close(F);
846 sub render_jpg_file {
847 my $self = shift;
848 my $filename = shift;
849 my $option = shift;
851 $self ->render($option);
852 open (F, ">$filename") || die "Can't open $filename for writing!!! Check write permission in dest directory.";
853 print F $self->{image}->jpeg();
854 close(F);
859 =head3 write_image_map()
861 Synopsis: $align->write_image_map()
863 Description: get the image map string of each non-hidden @align_seqs, concat them and return as a single string
865 Returns: a string
867 =cut
869 sub write_image_map {
870 my $self = shift;
871 my $map_content;
873 $map_content = "<map name=\"align_image_map\" id=\"align_image_map\">\n"; #XHTML 1.0+ requires id; name is for backward compatibility -- Evan, 1/8/07
874 foreach (@{$self->{align_seqs}}) {
875 ($_->is_hidden() eq 'yes') and next;
876 my $string = $_->get_image_string();
877 $map_content .= $string . "\n";
879 $map_content .= "</map>\n";
880 return $map_content;
883 =head2 Subroutines to analyze sequences of @align_seq and output result
885 get_member_similarity(), get_sv_candidates(), get_allele_candidates(), get_overlap_score(), get_all_overlap_score(), get_all_medium(), get_all_range(), get_seqs(), get_nopad_seqs(), get_overlap_seqs(), get_overlap_nums(), get_ngap_pct(), get_all_ngap_length, get_conserved_seq_obj()
887 =cut
889 =head3 get_member_similarity()
891 Sysopsis: $align->get_member_similarity($al_sq) where $al_sq is an object of of algn_seq and member of @align_seqs
893 Description: To output pair-wise similarities (overlap base, percentage indentity)of the member which is specified as argument between other members of @align_seq.
895 Returns: two hash references, one for overlap bases and the other for percentage indentity. The key of both hashes are the ids of other non hidden members of @align_seqs
897 =cut
899 sub get_member_similarity {
900 my $self = shift;
901 my $al_sq = shift;
902 my %member_ol = ();
903 my %member_ip = ();
905 ($self->is_member($al_sq) != 0) and exit "Not a member!\n";
907 foreach (@{$self->{align_seqs}}) {
908 ($_ == $al_sq) and next;
909 my ($ol, $ip) = $al_sq->calculate_similarity($_);
910 my $other_id = $_->get_id();
911 $member_ol{$other_id} = $ol;
912 $member_ip{$other_id} = $ip;
915 return \%member_ol, \%member_ip;
918 =head3 get_sv_candidates()
920 Synopsis: $align->get_sv_candidates()
921 Description: make pairwise comparison between members of @align_seq of the same species. If the pair have enough overlap, and the percentage indentity is high enough, and they have enough insertion-deletion (specified as parameter), they are considered as putative splice variant pair
923 Returns: 3 hash references
924 1. for overlap bases, a 2-D hash, the two keys are the ids of putative pslice variant pair.
925 2. for indentity percentage, also 2-D
926 3. for species, the key is the species of the putative splice variant pair.
928 =cut
930 sub get_sv_candidates {
931 my $self = shift;
933 my ($indel, $overlap);
934 ($self->{type} eq 'pep') ? ($indel = '-' x $self->{indel_limit}) : ($indel = '---' x $self->{indel_limit});
935 ($self->{type} eq 'pep') ? ($overlap = $self->{sv_overlap}) : ($overlap = $self->{sv_overlap} * 3);
937 my %sv_candidate_ob = ();
938 my %sv_candidate_pi = ();
939 my %sv_candidate_sp = ();
941 foreach (my $i = 0; $i < @{$self->{align_seqs}}; $i++){
942 foreach (my $j = $i + 1; $j < @{$self->{align_seqs}}; $j++){
943 (($self->{align_seqs}[$i]->get_species()) ne ($self->{align_seqs}[$j]->get_species())) and next;
944 my ($ol_seq1, $ol_seq2) = $self->{align_seqs}[$i]->get_clean_align_seq($self->{align_seqs}[$j]);
946 (!(($ol_seq1 =~ /$indel/) || ($ol_seq2 =~ /$indel/))) and next;
948 my ($self_id, $other_id) = ($self->{align_seqs}[$i]->get_id(), $self->{align_seqs}[$j]->get_id());
949 my ($ob, $pi) = $self->{align_seqs}[$i]->calculate_similarity($self->{align_seqs}[$j]);
950 if ( ($ob >= $overlap) && ($pi >= $self->{sv_identity})){
951 $sv_candidate_ob{$self_id}{$other_id} = $ob;
952 $pi = sprintf("%.2f", $pi);#truncate the number to two digits after the decimal point
953 $sv_candidate_pi{$self_id}{$other_id} = $pi;
954 $sv_candidate_sp{$self_id} = $self->{align_seqs}[$i]->get_species;
958 return \%sv_candidate_ob, \%sv_candidate_pi, \%sv_candidate_sp;
961 =head3 get_allele_candidates()
963 Synopsis: $align->get_allele_candidates()
965 Description: make pairwise comparison between members of @align_seq of the same species. If the pair have enough overlap, and the percentage indentity is high enough, and they only have short insertion-deletion (specified as parameter), they are considered as putative allele pair
967 Returns: 3 hash references
968 1. for overlap bases, a 2-D hash, the two keys are the ids of putative alllele pair.
969 2. for indentity percentage, also 2-D
970 3. for species, the key is the species of the putative allele pair.
972 =cut
974 sub get_allele_candidates {
975 my $self = shift;
977 my ($indel, $overlap);
978 ($self->{type} eq 'pep') ? ($indel = '-' x $self->{indel_limit}) : ($indel = '---' x $self->{indel_limit});
979 ($self->{type} eq 'pep') ? ($overlap = $self->{sv_overlap}) : ($overlap = $self->{sv_overlap} * 3);
981 my %al_candidate_ob = ();
982 my %al_candidate_pi = ();
983 my %al_candidate_sp = ();
984 foreach (my $i = 0; $i < @{$self->{align_seqs}}; $i++){
985 foreach (my $j = $i + 1; $j < @{$self->{align_seqs}}; $j++){
986 (($self->{align_seqs}[$i]->get_species()) ne ($self->{align_seqs}[$j]->get_species())) and next;
987 my ($ol_seq1, $ol_seq2) = $self->{align_seqs}[$i]->get_clean_align_seq($self->{align_seqs}[$j]);
989 (($ol_seq1 =~ /$indel/) || ($ol_seq2 =~ /$indel/)) and next; #skip if the aequence pair have long indel
991 my ($self_id, $other_id) = ($self->{align_seqs}[$i]->get_id(), $self->{align_seqs}[$j]->get_id());
992 my ($ob, $pi) = $self->{align_seqs}[$i]->calculate_similarity($self->{align_seqs}[$j]);
993 if ( $ob >= $overlap && ($pi >= $self->{sv_identity})){
994 $al_candidate_ob{$self_id}{$other_id} = $ob;
995 $pi = sprintf("%.2f", $pi);#truncate the number to two digits after the decimal point
996 $al_candidate_pi{$self_id}{$other_id} = $pi;
997 $al_candidate_sp{$self_id} = $self->{align_seqs}[$i]->get_species;
1001 return \%al_candidate_ob, \%al_candidate_pi, \%al_candidate_sp;
1005 =head3 get_overlap_score();
1007 Synopsis: $align->get_overlap_score($align) where $align_seq is an object of align
1009 Description: Calculate the overlap score of a member of @ align_seqs, which is specified as parameter. At a particular position of the target sequence(s1) has an overlap (not a gap) in another sequence (s2), s1 gets 1 point for alignment score. The total alignment score of s1 is the sum of all its non-gap positions.
1011 Returns: an integer
1013 =cut
1015 sub get_overlap_score {
1016 my $self = shift;
1017 my $al_sq = shift;
1019 $self->is_member($al_sq) != 0 and exit "$al_sq->{id} is NOT a member!\n";
1020 $al_sq->{hide} eq 'yes' and exit "$self->{id} is hiden!\n";
1021 my $score = 0;
1022 foreach ( my $i = $self->{start_value}-1; $i < $self->{end_value}-1; $i++){
1023 my $base = substr($al_sq->get_seq(), $i, 1);
1024 $base eq '-' and next;
1025 foreach (@{$self->{align_seqs}}) {
1026 $_ == $al_sq and next;
1027 $_->{hide} eq 'yes' and next;
1028 my $other_base = substr ($_->get_seq(), $i, 1);
1029 if ($other_base ne '-') {
1030 $score++;
1034 return $score;
1037 =head3 get_all_overlap_score()
1039 Synopsis: $align->get_all_overlap_score()
1041 Description: score of all the non-hiden members in @align_seq
1043 Returns: a hash reference whose key is the id of a @align_seq member and the value is the overlap score
1045 =cut
1047 sub get_all_overlap_score {
1048 my $self = shift;
1050 my %member_score = ();
1051 foreach (@{$self->{align_seqs}}){
1052 ($_->is_hidden() eq 'yes') and next;
1053 my $score = $self->get_overlap_score($_);
1054 my $id = $_->get_id();
1055 $member_score{$id} = $score;
1057 return \%member_score;
1061 =head3 get_all_medium
1063 Synopsis: $align->get_all_medium()
1065 Description: Output the medium position of each alignment sequence
1067 Returns: a hash reference whose key is the id of a @align_seq member and the value is the medoium
1069 =cut
1071 sub get_all_medium {
1072 my $self = shift;
1074 my %member_medium = ();
1075 foreach (@{$self->{align_seqs}}){
1076 if ($_->is_hidden() ne 'yes') {
1077 my $medium = $_->get_medium();
1078 my $id = $_->get_id();
1079 $member_medium{$id} = $medium;
1082 return \%member_medium;
1085 =head3 get_all_range
1087 Synopsis: $align->get_all_range()
1089 Description: Output the start and end of characters of each @align_seqs member
1091 Returns: two hash references whose keys are the id of a @align_seq member and the value is the start and end position respectively
1093 =cut
1096 sub get_all_range {
1097 my $self = shift;
1099 my %member_head = ();
1100 my %member_end = ();
1101 foreach (@{$self->{align_seqs}}) {
1102 if (($_->is_hidden()) ne 'yes'){
1103 my ($head, $end) = $_->get_range();
1104 my $id = $_->get_id();
1105 $member_head{$id} = $head;
1106 $member_end{$id} = $end;
1109 return \%member_head, \%member_end;
1113 =head3 get_seqs()
1115 Synopsis: $align->get_seqs()
1117 Description: Output the alignment sequences (padded with gaps) of @align_seqs which are are not hidden, in the range specified by start_value and end_value
1119 Returns: a hash reference whose key is the id of @align_seqs members and the value is the alignment sequence
1121 =cut
1123 sub get_seqs {
1124 my $self = shift;
1125 (!defined @{$self->{align_seqs}}) and exit "No align_seqs member.\n";
1127 my %member_seqs = ();
1128 foreach (@{$self->{align_seqs}}) {
1129 if ($_->{hide} eq 'no') {
1130 my $id = $_->get_id();
1131 my $seq = $_->get_select_seq();
1132 $member_seqs{$id} = $seq;
1135 return \%member_seqs;
1139 =head3 get_nopad_seqs()
1141 Synopsis: $align->get_nopad_seqs()
1143 Description: Output the 'original' sequences (with gaps removed) of @align_seqs which are are not hidden, in the range specified by start_value and end_value
1145 Returns: a hash reference whose key is the id of @align_seqs members and the value is the sequence
1147 =cut
1149 sub get_nopad_seqs {
1150 my $self = shift;
1151 (!defined @{$self->{align_seqs}}) and exit "No align_seqs member.\n";
1153 my %member_seqs = ();
1154 foreach (@{$self->{align_seqs}}) {
1155 if ($_->{hide} eq 'no') {
1156 my $id = $_->get_id();
1157 my $seq = $_->get_select_seq();
1158 $seq =~ s/-//g;
1159 $member_seqs{$id} = $seq;
1162 return \%member_seqs;
1165 =head3 get_overlap_seqs()
1167 Sysnopsis: $align-> get_overlap_seqs()
1169 Description: for each non hidden member of @align_seqs, get the sequences that overlap with all the other non hiden member of @align_seqs, in the range from start_value to end_value
1171 Returns: a hash reference whose jkey ois the id of @align_seqs member and the value is the overlap sequence
1173 =cut
1175 sub get_overlap_seqs {
1176 my $self = shift;
1177 (!defined @{$self->{align_seqs}}) and exit "No align_seqs member.\n";
1178 my %overlap_seqs;
1180 foreach (my $i = $self->{start_value} - 1; $i < $self->{end_value} - 1; $i++){
1181 my %single_base = ();
1182 my $pause = 0;
1183 foreach (@{$self->{align_seqs}}) {
1184 $_->is_hidden() eq 'yes' and next;
1185 my $base = substr($_->get_seq(), $i, 1);
1186 if ($base eq '-') {
1187 $pause = 1;
1188 last;
1190 else {
1191 $single_base{$_->get_id()} = $base;
1194 if ($pause == 0) {
1195 foreach (keys %single_base) {
1196 if (!defined $overlap_seqs{$_}) {
1197 $overlap_seqs{$_} = $single_base{$_};
1199 else {
1200 $overlap_seqs{$_} .= $single_base{$_};
1205 return \%overlap_seqs;
1208 =head3 get_overlap_num()
1210 Synopsis: $align->get_overlap_num()
1212 Description: count the number of bases that overlap between all the non hidden @align_seqs members, in the range from start_value to end)_value
1214 Returns: an integer
1216 =cut
1218 sub get_overlap_num {
1219 my $self = shift;
1220 my $overlap_count = 0;
1221 (!defined @{$self->{align_seqs}}) and exit "No align_seqs member.\n";
1223 #look for the first non-hiden member in @align_seqs
1224 my $select;
1225 foreach (@{$self->{align_seqs}}) {
1226 ($_->is_hidden() eq 'yes') and next;
1227 $select = $_;
1228 last;
1231 foreach (my $i = $self->{start_value} - 1; $i < $self->{end_value} - 1; $i++){
1232 my $base = substr($select->get_seq(), $i, 1);
1233 $base eq '-' and next;
1234 my $pause = 0;
1235 foreach (@{$self->{align_seqs}}) {
1236 ($_ == $select) and next;
1237 ($_->is_hidden() eq 'yes') and next;
1238 my $other_base = substr($_->get_seq(), $i, 1);
1239 if ($other_base eq '-') {
1240 $pause = 1;
1241 last;
1244 if ($pause == 0) {
1245 $overlap_count++;
1248 return $overlap_count;
1251 =head3 get_ngap_pct()
1253 Synopsis: $align->get_ngap_pct()
1255 Description: go from start_value to end_value, get the percentage coverage by @align_seqs. A position is covered by a member when it has a non gap at the position.
1257 Returns: a hash reference whose key is the position and values are the percentage coverage
1259 =cut
1262 sub get_ngap_pct {
1263 my $self = shift;
1264 my %value_hash = ();
1266 (!defined @{$self->{align_seqs}}) and exit "No align_seqs member.\n";
1268 my $total_nhidden_member = 0;
1269 foreach (@{$self->{align_seqs}}) {
1270 ($_->is_hidden() eq 'yes') and next;
1271 $total_nhidden_member++;
1274 foreach (my $i = $self->{start_value} - 1; $i < $self->{end_value}; $i++){
1275 my $ngap_count = 0;
1276 foreach (@{$self->{align_seqs}}) {
1277 $_->is_hidden() eq 'yes' and next;
1278 my $seq = $_->get_seq();
1279 my $base = substr($seq, $i, 1);
1280 ($base ne '-') and $ngap_count++;
1282 my $pct = $ngap_count / $total_nhidden_member * 100;
1283 $value_hash{$i} = sprintf("%.2f", $pct);
1286 return \%value_hash;
1290 =head3 get_all_nogap_length()
1292 Synopsis: $align->get_all_nogap_length()
1294 Description: go from start_value to end_value, get the sequence length without gap of @align_seqs member..
1296 Returns: a hash reference whose key is the id and value is the length
1298 =cut
1302 sub get_all_nogap_length {
1303 my $self = shift;
1305 my %member_ng_len = ();
1306 foreach (@{$self->{align_seqs}}) {
1307 $_->is_hidden() eq 'yes' and next;
1308 $member_ng_len{$_->get_id()} = $_->get_nogap_length();
1311 return \%member_ng_len;
1316 =head3 get_conserved_seq()
1318 Synopsis: $align->get_conserved_seq()
1320 Description: go through each postion from start_value to end_value of non hidden member of @align_seqs. If all members have the same seq at the position, get the seq, otherwise put a gap (-) in the position.
1322 Returns: s string of sequence
1324 =cut
1326 sub get_conserved_seq {
1327 my $self = shift;
1328 my $seq;
1330 (!defined @{$self->{align_seqs}}) and exit "No align_seqs member.\n";
1332 my $total_nhidden_member = 0;
1333 foreach (@{$self->{align_seqs}}) {
1334 ($_->is_hidden() eq 'yes') and next;
1335 $total_nhidden_member++;
1337 ($total_nhidden_member == 0) and exit "All members are hidden!\n";
1339 foreach (my $i = $self->{start_value}-1; $i < $self->{end_value}; $i++){
1340 my %na_count = ();
1341 my $base_count = 0;
1342 my $conserved_base = '-';
1343 foreach (@{$self->{align_seqs}}) {
1344 $_->is_hidden() eq 'yes' and next;
1345 my $base = substr($_->get_seq(), $i, 1);
1346 if ($base ne '-') {
1347 $base_count++;
1348 if (!defined $na_count{$base}) {
1349 $na_count{$base} = 1;
1351 else {
1352 $na_count{$base}++;
1356 if ((int keys %na_count == 1) && ($base_count > 1)) {
1357 foreach (keys %na_count) {
1358 $conserved_base = $_;
1361 $seq .= $conserved_base;
1364 $seq = '-' x ($self->{start_value} - 1) . $seq; #fill in the position before start_value
1365 return $seq;
1370 ################################End of Package align#####################################
1377 ###############################Package image_object#######################################
1379 =head1 Package CXGN::Alignment::image_object
1381 The base class for align_seq and ruler.
1383 Its attributes include: horizontal_offset, vertical_offset, length (pixel), height (pixel), horizontal_offset, vertical_offset, color, label_corlor
1385 =cut
1387 package CXGN::Alignment::image_object;
1389 =head2 Constructer new()
1391 Synopsis: my $img_obj = Alignment::image_object->new(
1392 horizontal_offset=>$x,
1393 vertical_offset=>$y,
1394 length=>$z,
1395 height=>$h,
1396 start_value=>$start_value,
1397 end_value=>$end_value
1400 Returns: a Alignment::image_object object
1402 =cut
1404 sub new {
1405 my $class = shift;
1406 my %args = @_;
1407 my $self = bless {}, $class;
1409 $self->{horizontal_offset} = $args{horizontal_offset};
1410 $self->{vertical_offset} = $args{vertical_offset};
1411 $self->{length} = $args{length};
1412 $self->{height} = $args{height};
1413 $self->{start_value} = $args{start_value};
1414 $self->{end_value} = $args{end_value};
1416 return $self;
1420 =head2 Setters and getters
1422 set_horizontal_offset(), set_vertical_offset, set_color(), set_label_color(), set_length(), set_start_value
1424 get_horizontal_offset(), get_vertical_offset(), get_label_color(), set_label_color(), get_length(), get_start_value()
1426 =cut
1428 sub set_color {
1429 my $self = shift;
1430 ($self->{color}[0], $self->{color}[1], $self->{color}[2]) = @_;
1433 sub set_label_color {
1434 my $self = shift;
1435 ($self->{label_color}[0], $self->{label_color}[1], $self->{label_color}[2]) = @_;
1438 sub set_start_value {
1439 my $self = shift;
1440 $self->{start_value} = shift;
1443 sub set_end_value {
1444 my $self = shift;
1445 $self->{end_value} = shift;
1448 sub get_start_value {
1449 my $self = shift;
1450 return $self->{start_value};
1453 sub get_end_value {
1454 my $self = shift;
1455 return $self->{end_value};
1458 sub get_horizontal_offset {
1459 my $self = shift;
1460 if (!exists($self->{horizontal_offset})) { $self->{horizontal_offset} = 0; }
1461 return $self->{horizontal_offset};
1464 sub set_horizontal_offset {
1465 my $self = shift;
1466 $self->{horizontal_offset} = shift;
1469 sub get_vertical_offset {
1470 my $self = shift;
1471 return $self->{vertical_offset};
1474 sub set_vertical_offset {
1475 my $self = shift;
1476 $self->{vertical_offset} = shift;
1479 sub get_length {
1480 my $self = shift;
1481 if (!exists($self->{length})) { $self->{length} = 0; }
1482 return $self->{length};
1486 sub set_length {
1487 my $self = shift;
1488 $self->{length} = shift;
1491 sub get_height {
1492 my $self = shift;
1493 return $self->{height};
1496 sub set_height {
1497 my $self = shift;
1498 $self->{height} = shift;
1502 =head2 Subs for image display
1504 set_enclosing_rect(), get_enclosing_rect, render(), get_image_string()
1506 =cut
1509 sub set_enclosing_rect {
1510 my $self = shift;
1511 ($self->get_horizontal_offset(), $self->get_vertical_offset(), $self->{width}, $self->{height}) = @_;
1514 sub get_enclosing_rect {
1515 my $self = shift;
1516 return ($self->get_horizontal_offset(), $self->get_vertical_offset(), $self->get_horizontal_offset() + $self->get_length() + 150, $self->get_vertical_offset() + $self->get_height);#to include the label space, $x plus 150
1519 sub render {
1520 my $self = shift;
1521 # does nothing
1524 sub get_image_string {
1525 my $self = shift;
1527 my $coords = join ",", ($self->get_enclosing_rect());
1528 my $string;
1529 if ($self->get_url()) {
1530 $string = "<area shape=\"rect\" coords=\"".$coords."\" href=\"".$self->get_url()."\" alt=\"".$self->get_id()."\" />";
1532 return $string;
1536 =head3 _calculate scaling_factor()
1538 Synopsis: $self->_calculate_scaling_factor()
1540 Description: calculate the scaling factor, set the scaling_factor attribute and return the scaling factor. private, called by render
1542 Returns: a number, scaling factor.
1544 =cut
1546 sub _calculate_scaling_factor {
1547 my $self = shift;
1548 my $dist = ($self->{end_value} - $self->{start_value}) + 1;
1549 if ($dist ==0) { return 0; }
1550 $self->{scaling_factor} = $self->{length}/$dist;
1551 return $self->{scaling_factor};
1556 ##############################End of Package image_object###################################
1563 ##############################Package align_seq#############################################
1565 =head1 Package Alignment::align_seq
1567 Inherit from image_object. Its special attributes include id, seq, species, seq_line_width, label_spacer, hide and url
1569 =cut
1571 package CXGN::Alignment::align_seq;
1572 use base qw( CXGN::Alignment::image_object );
1574 =head2 COnstructer new()
1576 Synopsis: my $al_sq = Alignment::align_seq->new(
1577 horizontal_offset=>$x,
1578 vertical_offset=>$y,
1579 length=>$z,
1580 height=>$h,
1581 start_value=>$start_value,
1582 end_value=>$end_value
1583 id=>$id,
1584 seq=>seq,
1585 species=>$species
1588 Returns: a CXGN::Alignment::align_seq object
1590 =cut
1592 sub new {
1593 my $class = shift;
1594 my %args = @_;
1595 my $self = $class->SUPER::new(@_);
1597 $self->{id} = $args{id};
1598 $self->{seq} = $args{seq};
1599 $self->{species} = $args{species};
1601 #set defaults
1602 $self->{font} = GD::Font->Small();
1603 $self->set_color (0, 0, 255);
1604 $self->set_label_color(0, 0, 0);
1605 $self->set_seq_line_width(8);
1606 $self->{label_spacer} = 20;
1607 $self->{hide} = 'no';
1609 #define an empty attributes
1610 $self->{url} = ();
1612 return $self;
1615 =head2 Setters and getters
1617 set_seq_line_width(), get_seq_lien_width(), set_label_spacer(), get_label_spacer(), is_hide(), hide_seq(), unhide_seq(), set_url(), get_url(), set_id(), get_id(), set_species(), get_species(), set_seq(), get_seq(), get_select_seq()
1619 =cut
1621 sub set_label_spacer{
1622 my $self = shift;
1623 $self ->{label_spacer} = shift;
1626 sub get_label_spacer{
1627 my $self = shift;
1628 if (!exists $self->{label_spacer}){
1629 $self->{label_spacer} = 0;
1631 return $self->{label_spacer};
1634 sub set_seq_line_width{
1635 my $self = shift;
1636 $self->{seq_line_width} = shift;
1638 sub get_seq_line_width{
1639 my $self = shift;
1640 if (!exists $self->{seq_line_width}){
1641 $self->{seq_line_width} = 1;
1643 return $self->{seq_line_width};
1646 sub set_id {
1647 my $self = shift;
1648 $self->{id} = shift;
1651 sub get_id {
1652 my $self = shift;
1653 return $self->{id};
1656 sub set_species {
1657 my $self = shift;
1658 $self->{species} = shift;
1661 sub get_species {
1662 my $self = shift;
1663 return $self->{species};
1666 sub set_seq {
1667 my $self = shift;
1668 $self->{seq} = shift;
1671 sub get_seq {
1672 my $self = shift;
1673 return $self->{seq};
1676 sub get_select_seq {
1677 my $self = shift;
1678 my $id = $self->{id};
1679 my $seq = $self->{seq};
1680 $seq = substr($seq, $self->{start_value} - 1, $self->{end_value} - $self->{start_value} + 1);
1681 return $seq;
1685 =head3 is_hidden()
1687 Synopsis: align_seq->is_hidden();
1689 Description: check if itself is hidden
1691 Returns: 'yes' if the sequence is hidden and 'n' if not. It is set to 'no' by default when the object is constructed.
1693 =cut
1695 sub is_hidden {
1696 my $self = shift;
1697 return $self->{hide};
1701 sub set_url {
1702 my $self = shift;
1703 $self->{url} = shift;
1706 sub get_url {
1707 my $self = shift;
1708 return $self->{url};
1711 sub hide_seq {
1712 my $self = shift;
1713 $self->{hide} = 'yes';
1716 sub unhide_seq {
1717 my $self = shift;
1718 $self->{hide} = 'no';
1722 =head2 Image display subs
1724 render()
1726 =cut
1728 =head3 render()
1730 Synopsis: align_seq->render($image) where $iamge is an image object
1732 Description: render the align_seq object
1734 Returns:
1736 =cut
1738 sub render {
1739 my $self = shift;
1740 my $image = shift;
1741 my $color = $image->colorResolve($self->{color}[0], $self->{color}[1], $self->{color}[2]);
1742 my $label_color = $image->colorResolve($self->{label_color}[0], $self->{label_color}[1], $self->{label_color}[2]);
1743 my $align_seq = substr ($self->{seq}, $self->{start_value}-1, $self->{end_value}-$self->{start_value}+1);
1744 my $seq_id = $self->{id};
1745 $self->_calculate_scaling_factor();
1746 my $i;
1747 for ($i = 0; $i < length ($align_seq); $i++){
1749 #draw sequence, a line if the base is a gap and a rectangle otherwise
1750 my $base = substr($align_seq, $i, 1);
1751 my $width;
1752 if ($base eq '-'){
1753 $image->line($i*$self->{scaling_factor}+$self->{horizontal_offset}, $self->{vertical_offset}, ($i+1)*$self->{scaling_factor}+$self->{horizontal_offset}, $self->{vertical_offset}, $color);
1755 else {
1756 $image->filledRectangle($i*$self->{scaling_factor}+$self->{horizontal_offset}, $self->{vertical_offset}-$self->{seq_line_width}/2, ($i+1)*$self->{scaling_factor}+$self->{horizontal_offset}, $self->{vertical_offset}+ $self->{seq_line_width}/2, $color);
1760 #add sequence name, first chop the sequence name to up to 30 characters. add '..' if the name is longer
1761 my $show_id;
1762 my $full_id = $self->{id} . ' ' . $self->{species};
1763 if ((length $full_id) >= 30) {
1764 $show_id = substr ($full_id, 0, 30);
1765 $show_id .= '..';
1767 else {
1768 $show_id = $full_id;
1771 $image->string($self->{font}, $i*$self->{scaling_factor}+ $self->{horizontal_offset} + $self->{label_spacer}, $self->{vertical_offset} - $self->{seq_line_width} / 2, $show_id, $label_color);
1775 =head2 Sequence analyzing subs
1777 get_nogap_length(), calculate_similarity(), get_medium(), get_range()
1779 All analysis are in the range from start_value to end_value
1781 =cut
1783 sub get_nogap_length {
1784 my $self = shift;
1785 my $no_gap_seq = $self->get_select_seq();
1786 $no_gap_seq =~ s/-//g;
1787 return length ($no_gap_seq);
1790 =head3 calculate_similarity()
1792 Synopsis: $align_seq->calculate_similarity($other_align_seq) where $other_align_seq is another align_seq object
1794 Description: get the overlap base number and idnetical percentage of the seq of two align_seq objects.
1796 Returns: an integer (number of overlap base) and a float (percentage indentity)
1798 =cut
1800 sub calculate_similarity {
1801 my $self = shift;
1802 my $other_seq = shift;
1803 ($self->get_length() != $other_seq->get_length()) and exit "$self->{id} and $other_seq->{id} sequences are not of the same length, align them first!\n";
1804 my ($overlap_base, $identical_base) = (0, 0);
1805 for (my $i = $self->{start_value} - 1; $i < $self->{end_value}-1; $i++){
1806 my $self_base = substr ($self->{seq}, $i, 1);
1807 my $other_base = substr($other_seq->{seq}, $i, 1);
1808 (($self_base eq '-') || ($other_base eq '-')) and next;
1809 $overlap_base++;
1810 ($self_base eq $other_base) and $identical_base++;
1812 if ($overlap_base != 0){
1813 return $overlap_base, $identical_base/ $overlap_base * 100;
1815 else {
1816 return $overlap_base, $identical_base;
1820 =head3 $align_seq->get_clean_align_seq()
1822 Synopsis: $align_seq->get_clean_align_seq($align_seq2) where $align_seq2 is another align_seq object
1824 Description: goes through the two alignment sequences, in the conmmon range, and leave out common gaps.
1826 Returns: two 'clean' overlap sequences, with common gaps removed.
1828 =cut
1830 sub get_clean_align_seq {
1831 my $self = shift;
1832 my $other_seq = shift;
1834 ($self->get_length() != $other_seq->get_length()) and exit "$self->{id} and $other_seq->{id} sequences are not of the same length, align them first!\n";
1835 my ($start1, $end1) = $self->get_range();##Takes care of start_value and end_value
1836 my ($start2, $end2) = $other_seq->get_range();
1837 my ($start, $end);
1839 ($start1 > $start2) ? ($start = $start1) : ($start = $start2);
1840 ($end1 < $end2) ? ($end = $end1) : ($end = $end2);
1842 my $id1 = $self->get_id();
1843 my $id2 = $other_seq->get_id();
1845 my ($seq1, $seq2);
1846 for (my $i=$start-1; $i<$end-1; $i++) {
1847 my $base1 = substr ($self->get_seq(), $i, 1);
1848 my $base2 = substr ($other_seq->get_seq(), $i, 1);
1850 ($base1 eq '-' && $base2 eq '-') and next;
1851 $seq1 .= $base1;
1852 $seq2 .= $base2;
1855 return $seq1, $seq2;
1858 =head3 get_medium()
1860 Synopsis: $align_seq->get_medium()
1862 Description: calculate the middle point of non-gap bases of the align_seq
1864 Returns: an integer
1866 =cut
1868 sub get_medium {
1869 my $self = shift;
1871 my $non_gap_len = 0;
1872 my $seq = $self->get_select_seq();
1873 while ($seq =~ /[A-Z]/gi) {
1874 $non_gap_len++;
1877 my $non_gap_mid = int ($non_gap_len / 2);
1878 my $non_gap_count = 0;
1880 my $mid;
1881 foreach ( my $i = $self->{start_value}-1; $i < $self->{end_value}; $i++){
1882 my $base = substr($self->{seq}, $i, 1);
1883 ($base ne '-') and $non_gap_count++;
1884 if ($non_gap_count > $non_gap_mid) {
1885 $mid = $i;
1886 last;
1890 $mid = $mid + $self->{start_value};
1891 return $mid;
1894 =head3 get_range()
1896 Synopsis: $align_seq->get_range()
1898 Description: get the postion of the first non gap character and the last non gap character, from start_value to end_value.
1900 Returns: two intgers representing two positions.
1902 =cut
1904 sub get_range {
1905 my $self = shift;
1906 my ($base_start, $base_end);
1908 my $seq = $self->get_select_seq();
1910 my ($head_gaps) = $seq =~ /^(-+)/;
1911 my ($tail_gaps) = $seq =~ /(-+)$/;
1912 (!defined $head_gaps) and $head_gaps = '';
1913 (!defined $tail_gaps) and $tail_gaps = '';
1914 $base_start = $self->get_start_value() + length ($head_gaps);
1915 $base_end = $self->get_end_value() - length ($tail_gaps);
1917 return $base_start, $base_end;
1921 ###################################End of Package align_seq####################################
1928 ##################################Package ruler###############################################
1930 =head1 Package Alignment::ruler
1932 This class is inherited from image_object. Its special attributes include label_side, unit, label_spacing and tick_spacing. The ruler is horizontal only.
1934 =cut
1936 package CXGN::Alignment::ruler;
1938 use base qw( CXGN::Alignment::image_object);
1939 =head2 Constructor new()
1941 Synopsis: my $ruler = Alignment::ruler->new(
1942 horizontal_offset=>$x,
1943 vertical_offset=>$y,
1944 length=>$z,
1945 height=>$h,
1946 start_value=>$a,
1947 end_value=>$b
1949 Returns: a Alignment::ruler object
1951 =cut
1953 sub new {
1954 my $class = shift;
1955 my %args = @_;
1956 my $self = $class->SUPER::new(@_);
1958 #set defaults
1959 $self->{font} = GD::Font->Small();
1960 $self->set_color (127, 127, 127);
1961 $self->set_label_color (0,0, 0);
1962 $self->{label_side} = "up";
1963 $self->{unit} = "bp";
1964 $self->{label_spacing} = 50;
1965 $self->{tick_spacing} = 50;
1967 return $self;
1970 =head2 Setters and getters
1972 set_labels_up(), set_labels_down(), set_unit("my_unit"), get_unit(), set_label_spacing, get_label_spacing, set_tick_spacing, get_tick_spacing
1974 =cut
1976 sub set_labels_up {
1977 my $self = shift;
1978 $self->{label_side} = "up";
1981 sub set_labels_down {
1982 my $self = shift;
1983 $self ->{label_side} = "down";
1986 sub set_unit {
1987 my $self = shift;
1988 $self->{unit} = shift;
1991 sub get_unit {
1992 my $self = shift;
1993 return $self->{unit};
1996 sub set_label_spacing {
1997 my $self = shift;
1998 $self->{label_spacing} = shift;
2001 sub get_label_spacing {
2002 my $self = shift;
2003 return $self->{label_spacing};
2006 sub set_tick_spacing {
2007 my $self = shift;
2008 $self->{tick_spacing} = shift;
2011 sub get_tick_spacing {
2012 my $self = shift;
2013 return $self->{tick_spacing};
2016 =head2 Image display sub render()
2018 Synopsis: $ruler->render($img) where $img is a image object
2020 Description: draws ruler line, ticks (goes to near the bottom of the image), label and unit,
2022 =cut
2024 sub render {
2025 my $self = shift;
2026 my $image = shift;
2028 my $color = $image->colorResolve($self->{color}[0], $self->{color}[1], $self->{color}[2]);
2030 #####################Draw ruler line
2031 $image->line($self->{horizontal_offset}, $self->{vertical_offset}, $self->{horizontal_offset} + $self->{length}, $self->{vertical_offset}, $color);
2033 #####################Draw ticks and tick labels
2034 #Reset tick and spacing, depending on the length
2035 $self->{tick_spacing} = ((int (($self->{end_value} - $self->{start_value} + 1) / 1000))+ 1) * 100;
2036 #$self->{label_spacing} = ($self->{tick_spacing}) * 2;
2038 #####################Determine the scaling factor. Increment label spaing by 10 until the longest label (maxim value) is shorter than label spacing
2039 $self->_calculate_scaling_factor();
2040 if ($self->{scaling_factor}) {
2041 #otherwise this is an infinite loop....
2042 while (($self->{label_spacing} * $self->{scaling_factor}) < ($self->{font}->width() * length ($self->{end_value})+2)) { $self->{label_spacing} +=50; }
2045 my $tick_number = int($self->{end_value}-$self->{start_value})/$self->{tick_spacing} + 1;
2046 for (my $i = 0; $i < $tick_number - 1; $i++) {
2047 my $x = $self->{horizontal_offset} + (($i*$self->{tick_spacing})*$self->{scaling_factor});
2048 $image->dashedLine($x, $self->{vertical_offset}-2, $x, $self->{height}, $color); #draw the tick
2049 if ( $i*$self->{tick_spacing} % $self->{label_spacing} == 0){#Draw tick label
2050 my $tick_label = $i*$self->{tick_spacing} + $self->{start_value} - 1;
2051 my $horizontal_adjust = $self->{font}->width * length ($tick_label)/2;
2052 my $tick_label_x = $x - $horizontal_adjust;
2053 my $tick_label_y;
2054 if ($self->{label_side} eq 'down'){
2055 $tick_label_y = $self->{vertical_offset} + 1;
2057 else {
2058 $tick_label_y = $self->{vertical_offset} - 1 - $self->{font}->height;
2060 $image->string($self->{font}, $tick_label_x, $tick_label_y, $tick_label, $color);
2064 #Write unit
2065 my $unit_label = "[".$self->{unit}."]";
2066 my $unit_label_x = $self->{horizontal_offset} + ($self->{length}- $self->{font}->width() * length($unit_label))/2;
2067 my $unit_label_y;
2068 if ($self->{label_side} eq 'down'){
2069 $unit_label_y = $self->{horizontal_offset} + 1 + $self->{font}->height;
2071 else {
2072 $unit_label_y = $self->{horizontal_offset} - 1 - $self->{font}->height*2;
2074 $image->string($self->{font}, $unit_label_x, $unit_label_y, $unit_label, $color);
2078 ###################################End of Package ruler#####################################
2085 ##################################Package chart#############################################
2086 =head1 Package CXGN::Alignment::chart
2088 Inherit from image_object. Its special attributes include id and hash_ref. The keys of the hash is position and the vaule is a percentage.
2090 =cut
2092 package CXGN::Alignment::chart;
2094 use base qw( CXGN::Alignment::image_object);
2096 =head2 Constructer new()
2098 Synopsis: my $chart= Alignment::chart->new(
2099 horizontal_offset=>$x,
2100 vertical_offset=>$y,
2101 length=>$z,
2102 height=>$h,
2103 start_value=>$start_value,
2104 end_value=>$end_value
2105 id=>id,
2106 hash_ref=>$ref
2108 Returns: a Alignment::chart object
2110 =cut
2112 sub new {
2113 my $class = shift;
2114 my %args = @_;
2115 my $self = $class->SUPER::new(@_);
2117 $self->{id} = $args{id};
2118 $self->{hash_ref} = $args{hash_ref};
2120 #set defaults
2121 $self->{font} = GD::Font->Small();
2122 $self->set_color (0, 0, 122);
2123 $self->set_label_color(0, 0, 0);
2124 $self->{label_spacer} = 20;
2125 $self->{hide} = 'no';
2126 return $self;
2129 =head2 Setters and getters
2131 set_id(), get_id(), set_hash_ref(), get_hash_ref()
2133 Nothing special
2135 =cut
2137 sub set_id {
2138 my $self = shift;
2139 $self->{id} = shift;
2142 sub get_id {
2143 my $self = shift;
2144 return $self->{id};
2147 sub set_hash_ref {
2148 my $self = shift;
2149 $self->{hash_ref} = shift;
2152 sub get_hash_ref {
2153 my $self = shift;
2154 return $self->{hash_ref};
2157 =head2 Image displaying sub
2159 render()
2161 =cut
2163 =head 3 render()
2165 Synopsis: $chart->render($img) where $img is an image object
2167 Description: it draws a base line and a 100% line from start_value to end_value. Then it get all the hash elements whose keys are in the reange from strt_value to end_value and draw a rectangle reprensting the vlaue.
2169 Returns:
2171 =cut
2173 sub render {
2174 my $self = shift;
2175 my $image = shift;
2177 my $ref = $self->{hash_ref};
2178 my %chart_hash = %$ref;
2180 my $color = $image->colorResolve($self->{color}[0], $self->{color}[1], $self->{color}[2]);
2181 my $label_color = $image->colorResolve($self->{label_color}[0], $self->{label_color}[1], $self->{label_color}[2]);
2183 my $seq_id = $self->{id};
2184 $self->_calculate_scaling_factor();
2186 $image->line($self->{horizontal_offset}, $self->{vertical_offset}, $self->{horizontal_offset} + $self->{length}, $self->{vertical_offset}, $color); #Draw the 0% line
2187 $image->line($self->{horizontal_offset}, $self->{vertical_offset} + 33.3, $self->{horizontal_offset} + $self->{length}, $self->{vertical_offset}+ 33.3, $color); #Draw the 100% line
2189 my ($i, $adjust_i);
2190 for ($i = $self->{start_value} - 1; $i < $self->{end_value}; $i++){
2192 (!defined $chart_hash{$i}) and ($chart_hash{$i} = 0);
2193 $adjust_i = $i - $self->{start_value} + 1; #adjust horizontal offset according to start_value
2195 $image->filledRectangle($adjust_i*$self->{scaling_factor}+$self->{horizontal_offset}, $self->{vertical_offset}, ($adjust_i+1)*$self->{scaling_factor}+$self->{horizontal_offset}, $self->{vertical_offset}+$chart_hash{$i}/3, $color);
2199 #add sequence name, first chop the sequence name to up to 30 characters. add '..' if the name is longer
2200 my $show_id; ;
2201 if ((length ($self->{id})) >= 30) {
2202 $show_id = substr ($self->{id}, 0, 30);
2203 $show_id .= '..';
2205 else {
2206 $show_id = $self->{id};
2209 $image->string($self->{font}, ($adjust_i + 1) *$self->{scaling_factor}+ $self->{horizontal_offset} + $self->{label_spacer}, $self->{vertical_offset} - $self->{seq_line_width} / 2, $show_id, $label_color);
2212 ################################End of Package chart###########################################