1 package CXGN
::Phylo
::Alignment
;
8 use Digest
::MD5 qw
/md5_hex/;
9 use CXGN
::Phylo
::Alignment
::Ruler
;
10 use CXGN
::Phylo
::Alignment
::Chart
;
11 use CXGN
::Phylo
::Alignment
::Member
;
12 use CXGN
::Phylo
::Alignment
::Legend
;
13 use CXGN
::Tools
::Identifiers qw
/identifier_url identifier_namespace/;
14 use CXGN
::Tools
::Param qw
/ hash2param /;
15 use CXGN
::Tools
::Parse
::Fasta
;
16 use CXGN
::Tools
::Gene
;
17 use CXGN
::DB
::Connection
;
19 =head1 CXGN::Phylo::Alignment
21 Alignment -- packages for analyzing, optimizing and displaying sequence alignments
25 Chenwei Lin (cl295@cornell.edu)
26 Refactoring and lingual adjustments provided by Chris Carpita <csc32@cornell.edu>
30 CXGN::Phylo::Alignment
31 CXGN::Phylo::Alignment::ImageObject
32 CXGN::Phylo::Alignment::Member
33 CXGN::Phylo::Alignment::Ruler
34 CXGN::Phylo::Alignment::Chart
36 Packages Member, Ruler and Chart inherit from ImageObject
38 =head1 Package CXGN::Phylo::Alignment
40 The basic element of the alignment object is an array of member.
41 Its attributes include: name, width (pixel), height(pixel),
42 image, members, ruler, chart, conserved_seq, sv_overlap, sv_identtity,
43 start_value and end_value
45 Its functionality includes:
47 2. Calculation and output of pairwise similaity and putative splice variant pairs based on similarity.
48 3. Hide some alignment sequences so that they are not included in the analysis.
49 4. Select a range of sequences to be analyzed.
50 5. Asses how a set of members overlaps with another set.
51 6. Calculate the non-gap mid point of each alignment sequence and group the sequences according to their overlap.
53 =head2 Constructer new()
55 Create a new alignment, returns an alignment object
59 Synopsis: my $alignment = CXGN::Phylo::Alignment->new(
63 type=>$type, #'nt' or 'pep'
66 Description: Upon constructing an alignment object, it sets name,
67 width and height using arguments. It also generates an image object of
68 {width} and {height} and sets the default value of sv_overlap,
69 sv_identity (the minimum number overlapping aa and percentage
70 identity for two members to be considered as putative splice variants),
71 sv_indel_limit (the min aa indel length for two sequences to be considered
72 as splice variants, instead of alleles) and start_value (the start value
73 of the ruler and member objects(s))
75 Returns: A CXGN::Phylo::Alignment object
82 if ( ref( $_[0] ) eq "HASH" ) { #forward compatibility
86 my $self = bless {}, $class;
87 ##################set attibutes from parameter
88 $self->{name
} = $args{name
};
90 ## Default constraints for all member ImageObjects, unless their display
91 ## properties are set explicitly. If you don't use set_image, this will
92 ## determine the height and width of the created image
93 $self->{width
} = $args{width
} || 600;
94 $self->{height
} = $args{height
} || 400;
95 $self->{left_margin
} = 30;
96 $self->{top_margin
} = 30;
97 $self->{right_margin
} = 10;
99 $self->{type
} = $args{type
}; # 'pep' or 'nt' or 'cds'
101 $self->{display_type
} = "complete";
103 #also: "alignment" (only), "stats" (chart and conserved sequence only).
104 #You can use 'c','a', and 's' as shorthand.
106 ##################set defaults
107 #splice variants criteria
108 $self->{sv_overlap
} = 20; #default in case we are using peptides
109 $self->{indel_limit
} = 4; #(ditto)
110 $self->{sv_identity
} = 95;
111 $self->{start_value
} =
112 1; #the end_value is initiated when the first member is added
113 $self->{end_value
} = 1;
115 ## define some 'empty' attributes that will be asigned later
116 @
{ $self->{members
} } = ();
117 $self->{ruler
} = undef;
118 $self->{chart
} = undef $self->{image
} = undef;
119 $self->{conserved_seq
} = undef;
120 $self->{seq_length
} = 0;
122 #generally don't need to set this, since it can be inferred based on content
123 $self->{unaligned
} = 0;
124 $self->{muscle
} = {}; #muscle parameters for running alignment
125 $self->{from_file
} = undef;
127 foreach (qw
/ from_file unaligned from_ids muscle tmp_dir /) {
128 $self->{$_} = $args{$_} if exists( $args{$_} );
131 if ( defined $self->{from_file
} and -f
$self->{from_file
} ) {
134 elsif ( $self->{from_file
} ) {
135 warn "Alignment file not found: " . $self->{from_file
} . "\n";
138 if ( ref( $self->{from_ids
} ) eq "ARRAY" ) {
139 my @ids = @
{ $self->{from_ids
} };
140 foreach my $id (@ids) {
144 $g = CXGN
::Tools
::Gene
->new($id);
146 $seq = $g->get_sequence('protein');
149 my $m = CXGN
::Phylo
::Alignment
::Member
->new(
155 $self->add_member($m);
164 # get the cols with <= $max_gaps gaps in them (non-hidden sequences only), make new alignment object
165 # with just these columns of the non-hidden sequences only.
167 sub remove_gappy_cols
{
170 $mxgps = 0 unless ($mxgps); #default is 0
171 my $oseqs = $self->get_overlap_seqs($mxgps)
172 ; # ref to hash with id/seq pairs. These are seqs with only the ungappy columns
174 if ( ( scalar keys %$oseqs ) ) {
175 $cols_left = length $oseqs->{ ( keys %$oseqs )[0] };
177 # print STDERR "After winnowing cols with max_gaps = $mxgps. Cols remaining: ", $cols_left, "\n";
182 return undef if ( $cols_left == 0 ); # in case of empty sequences
183 my $alignment = CXGN
::Phylo
::Alignment
->new();
184 foreach my $id ( keys %$oseqs ) {
185 my $seq = $oseqs->{$id};
187 CXGN
::Phylo
::Alignment
::Member
->new( id
=> $id, seq
=> $seq );
188 $alignment->add_member($member);
193 =head2 Setters and getters
195 get_name(), set_name()
197 get_image(), set_image()
199 get_width(), set_width()
201 get_height(), set_height()
205 get_sv_criteria(), set_sv_criteria()
207 get_start_value(), set_start_value(), check_start_value()
209 get_end_value(), set_end_value(), check_end_value()
211 get_left_margin(), set_left_margin(), get_top_margin(), set_top_margin()
213 Those for name, image, sv_overlap and sv_identity, height, width,
214 left_margin, top_margin are straightforward, while the
215 setters for start_value, end_value are not simple, since these attributes
216 are related to and/or restricted by other attributes. seq_length is
217 determined by the first member added and therefore can not be reset.
223 return $self->{name
};
229 $self->{name
} = $name;
234 return $self->{image
};
239 $self->{image
} = shift;
244 return $self->{width
};
249 return $self->{height
};
256 $self->{tmp_dir
} = $dir;
259 die "Directory does not exist: $dir\n";
262 my $vhost = SGN
::Context
->new();
264 $vhost->get_conf('basepath')
265 . $vhost->get_conf('tempfiles_subdir')
267 unless ( -d
$self->{tmp_dir
} ) {
268 warn "Temporary directory does not exist: "
270 . "; setting to current directory";
271 $self->{tmp_dir
} = ".";
274 return $self->{tmp_dir
};
279 if ( !$self->{tmp_dir
} ) {
280 $self->set_tmp_dir();
282 return $self->{tmp_dir
};
285 =head3 set_width(), set_height()
287 Synopsis: set_width($x), set_height($x)
289 Description: sets the attributes {width} and {height}.
295 $self->{width
} = shift;
300 $self->{height
} = shift;
303 =head3 set_sv_criteria()
305 Synopsis: $alignment->set_sv_criteria($x, $y, $z), while $x is
306 the minimum overlap, $y is a percentage similarity and $z is the minimal
307 amino acid indel length to be considered as a splice variant
309 Description: Set the putative splice variants standard, the minimum
310 overlapping bases and percentage identity (sv_overlap and sv_identity).
311 The sub checks if the values are correct before setting the attributes
315 sub set_sv_criteria
{
317 my ( $overlap, $identity, $indel ) = @_;
319 if ( $overlap <= 0 ) {
320 die "Overlap must be greater than 0!\n";
322 elsif ( ( $identity < 0 ) || ( $identity > 100 ) ) {
323 die "Percentage identity must be greater than 0 and less than 100\n";
325 elsif ( $indel < 0 ) {
326 die "Indel limit must be greater than 0!\n";
329 $self->{sv_overlap
} = $overlap;
330 $self->{sv_identity
} = $identity;
331 $self->{indel_limit
} = $indel;
335 sub get_sv_criteria
{
337 return ( $self->{sv_overlap
}, $self->{sv_identity
}, $self->{indel_limit
} );
340 sub set_left_margin
{
342 $self->{left_margin
} = shift;
345 sub get_left_margin
{
347 return $self->{left_margin
};
352 $self->{top_margin
} = shift;
357 return $self->{top_margin
};
360 sub get_right_margin
{
362 return $self->{right_margin
};
365 sub set_right_margin
{
367 $self->{right_margin
} = shift;
372 return $self->{label_gap
};
377 $self->{label_gap
} = shift;
380 sub _calculate_label_gap
{
383 foreach my $m ( @
{ $self->{members
} } ) {
384 my $ls = $m->get_label_spacer();
386 my $labelwidth = $m->{font
}->width() * length( $m->get_id() ) + $ls;
387 $maxwidth = $labelwidth if ( $labelwidth > $maxwidth );
389 if ( $self->{chart
} ) {
390 my $labelwidth = $self->{chart
}->get_longest_label_width();
391 $maxwidth = $labelwidth if ( $labelwidth > $maxwidth );
393 $self->{label_gap
} = $maxwidth;
397 sub get_start_value
{
399 return $self->{start_value
};
404 return $self->{end_value
};
407 sub check_start_value
{
412 print "$self->{id} start_value less than 0, reset to 0\n";
417 sub check_end_value
{
420 if ( $value > length $self->{members
}[0]->{seq
} ) {
421 $value = length $self->{members
}[0];
422 print "value greater than sequence length, reset to sequence length\n";
430 return $self->{seq_length
};
433 =head3 set_start_value(), set_end_value()
435 Synopsis: set_start_value($x), set_end_value($x)
437 Description: set the start_value and end_value. Check if the input value is correct before setting the attributes. Since the {start_value} and {end_value} attributes of the {ruler} and {members} must be the same as those of the alignment object, the set subs call the set_start_value and set_end_value of the {ruler} and all members of {members}
443 sub set_start_value
{
447 $self->check_start_value($value); #check if the start value is correct
448 $self->{start_value
} = $value; #the ruler will use this start_value
450 foreach ( @
{ $self->{members
} } )
451 { #set the start_value of each included member
452 $_->set_start_value($value);
460 #$value = $self->check_end_value($value);#check if the end value is correct
461 $self->{end_value
} = $value;
463 foreach ( @
{ $self->{members
} } )
464 { #set the start_value of each included member
465 $_->set_end_value($value);
471 return $self->{type
};
476 $self->{type
} = shift; #'nt' or 'pep'
479 sub set_display_type
{
481 my $display_type = shift;
482 die "Please set display type to c(omplete), a(lignment), or s(tats)"
483 unless $display_type =~ /^(a|c|s)/i;
484 $self->{display_type
} = $display_type;
487 sub get_display_type
{
489 return $self->{display_type
};
494 $self->{label_shown
} = 1;
499 $self->{label_shown
} = 0;
502 sub get_combined_member_height
{
504 my $total_height = 0;
505 foreach my $m ( @
{ $self->{members
} } ) {
506 next if $m->is_hidden;
507 $total_height += $m->get_height;
509 return $total_height;
512 sub set_fasta_temp_file
{
514 $self->{fasta_temp_file
} = shift;
517 sub get_fasta_temp_file
{
519 return $self->{fasta_temp_file
};
522 sub set_cds_temp_file
{
524 $self->{cds_temp_file
} = shift;
527 sub get_cds_temp_file
{
529 return $self->{cds_temp_file
};
532 =head1 Subroutines to add ImageObjects to Alignment object
534 add_member(), _add_ruler(), _add_cvg_chart, _add_conserved_seq_obj()
540 my $from_file = shift;
541 $from_file ||= $self->{from_file
};
542 my $parser = CXGN
::Tools
::Parse
::Fasta
->new($from_file);
545 while ( my $entry = $parser->next() ) {
546 my ( $id, $seq, $species ) =
547 ( $entry->{id
}, $entry->{seq
}, $entry->{species
} );
549 my $len = length($seq);
552 $member = CXGN
::Phylo
::Alignment
::Member
->new(
556 type
=> $self->{type
}
558 my $r = $self->add_member($member);
567 Synopsis: $alignment->add_member($member), while member is an member object
569 Description: Add member to alignment object. The member objects are
570 stored in an array members. Once the first member object is added,
571 only member objects of the same length can be further added. At the
572 same time, the end_value is set to be the same as the sequence length
573 of the first member added.
580 ref $member && $member->isa( __PACKAGE__
. '::Member' )
581 or croak
"invalid member '$member'";
583 if ( ! @
{ $self->{members
} } ) {
585 #if there are no members in @members, reset the end_value of overall
586 #alignment to the length of this sequence
587 $self->set_end_value( $member->get_end_value() );
588 $self->{seq_length
} = length( $member->get_select_seq() );
590 #adjust the vertical and horizontal offsets and length
591 #add the member to @members
592 push @
{ $self->{members
} }, $member;
595 my $len = length( $member->get_seq() );
597 #the length of the member must the the same as the first member in
598 #@members, otherwise it won't be added
599 if ( $len == $self->{seq_length
} ) {
601 # Set the start_value and end_value of the member to the current
602 # values of the overall alignment
603 $member->set_start_value( $self->get_start_value() );
604 $member->set_end_value( $self->get_end_value() );
606 push @
{ $self->{members
} }, $member;
608 elsif ( !$self->{unaligned
} ) {
609 my $id = $member->get_id();
610 my $seq = $member->get_select_seq();
611 unless ( $seq =~ /-/ ) {
613 "Sequence w/ id $id does not have same length, assuming unaligned sequences are provided";
614 $self->{unaligned
} = 1;
616 #don't need to set start/end values, since those are meaningless in the unaligned context
617 #we are assuming that muscle will be run via $self->run_muscle();
618 push( @
{ $self->{members
} }, $member );
622 #if we thought we were well-aligned, then we're dead wrong
624 "Error in adding pre-aligned sequence $id, length ($len) not the same as overall alignment ("
625 . $self->{seq_length
} . ")\n";
630 #We know the family is yet-to-be aligned
631 push( @
{ $self->{members
} }, $member );
637 { # this returns a ref to a hash with ids as keys, numbers of nongaps as values
640 foreach ( @
{ $self->{members
} } ) {
641 next if ( $_->is_hidden() );
642 $nongaps{ $_->get_id() } = $_->get_nongaps()
643 ; # this get_nongaps is a method of Member, returns # of nongaps
649 { # this returns just the total number of nongap characters in the alignment's non-hidden members.
652 foreach ( @
{ $self->{members
} } ) {
653 next if ( $_->is_hidden() );
654 $nongaps += $_->get_nongaps()
655 ; # this get_nongaps is a method of Member, returns # of nongaps
658 # print "in get_gaps_nongaps: ", $self->get_seq_length()*$self->get_visible_member_nr() - $nongaps, " ", $nongaps, "\n";
660 $self->get_seq_length() * $self->get_visible_member_nr() - $nongaps,
664 #sub get_gaps{ # get total gap characters in non-hidden members
666 # my $n_cols = $self->get_seq_length();
667 # my $n_seq = $self->get_visible_member_nr();
668 # return $n_cols*$n_seq - $self->get_nongaps();
671 =head3 hide_gappy_members
673 Hide members (sequences) if they have fewer than the specified number of non-gap characters
674 Arg: Minimum number of non-gap characters in sequence for it to be kept.
675 Ret: Number of members hidden by this pass through subroutine, others may have been already hidden.
679 sub hide_gappy_members
{
681 my $min_nongaps = shift;
682 my $n_hide = 0; # counts the number of sequences hidden by this call
683 foreach ( @
{ $self->{members
} } ) {
684 next if ( $_->is_hidden() ); #
685 # my ($gaps, $nongaps) = $_->get_nongaps();
686 if ( $_->get_nongaps() < $min_nongaps ) {
690 # print "hiding id: ", $_->get_id(), ". nongaps: ", $_->get_nongaps(), "\n";
698 Get the member objects of the alignment
706 return @
{ $self->{members
} };
709 sub add_legend_item
{
711 my ( $name, $color, $url, $tooltip ) = @_;
714 $self->_add_legend() unless $self->{legend
};
715 $self->{legend
}->add_item( $name, $color, $url, $tooltip );
720 Synopsis: $alignment->_add_ruler($x,$y), where $x is the top margin
721 and $y is the height of the ruler.
723 Description: Add a ruler to the alignment object, the start_value and
724 end_value are set to the same as those of the alignment. If no member
725 has been added to the alignment object, the seq_length, start_value and
726 end_value of the alignment are not set (see sub add_member), then a ruler
734 my $ruler = CXGN
::Phylo
::Alignment
::Ruler
->new(
735 start_value
=> $self->{start_value
},
736 end_value
=> $self->{end_value
}
738 ( $self->{type
} eq 'pep' ) and ( $ruler->set_unit('aa') );
739 ( $self->{type
} eq 'nt' ) and ( $ruler->set_unit('bp') );
740 $self->{ruler
} = $ruler;
743 =head3 _add_cvg_chart()
745 Synopsis: $alignment->_add_ruler($x,$y,$z), while $x is the vertical
746 offset, $y is the id and $z is a hash reference whose key is an integer
747 (a position) and value is a percentage
749 Description: Add a chart representing coverage by a member. The
750 start_value and end_value are set to the same as those of the alignment.
751 The coverage of each alignment postion is repreesnted by a hash reference
752 passed to the subroutine. The key of the hash is the alignment postion
753 and the values are percentage converage.
760 my $similarity_hash = shift;
761 my $conservation_hash = shift;
762 my $type_hash = shift;
764 my $chart = CXGN
::Phylo
::Alignment
::Chart
->new(
765 start_value
=> $self->{start_value
},
766 end_value
=> $self->{end_value
},
768 type
=> $self->{type
},
769 similarity_hash
=> $similarity_hash,
770 conservation_hash
=> $conservation_hash,
771 type_hash
=> $type_hash,
773 $self->{chart
} = $chart;
779 $self->{legend
} = CXGN
::Phylo
::Alignment
::Legend
->new(
780 left_margin
=> $self->{left_margin
},
781 top_margin
=> $self->{height
} - 100,
785 =head3 _add_conserved_seq_obj()
787 Synopsis: $alignment->_add_conserved_seq_obj($x), while $x is the vertical offset.
789 Description: Add a member object representing the conserved sequence of
790 the @members. The seq of this object is generated by another subroutine
791 get_conserved_seq. If the sequence at a position is not conserved among
792 all present members, it is repreesnted by - in conserved_seq. This
793 object is NOT a member of @members.
797 sub _add_conserved_seq_obj
{
800 $title ||= "Overall Conserved Sequence";
801 my $seq = $self->get_conserved_seq();
802 my $seq_obj = CXGN
::Phylo
::Alignment
::Member
->new(
803 start_value
=> $self->{start_value
},
804 end_value
=> $self->{end_value
},
809 $self->{conserved_seq
} = $seq_obj;
812 =head1 Subroutines to search and ouput ids of @members
814 is_id_member(), is_member(), id_to_member(), get_member_ids(),
815 get_nonhidden_member_ids, get_hidden_member_ids(), get_member_species(),
820 =head3 is_id_member()
822 Synopsis: is_id_member($id)
824 Description: Do any of the members have the same id as $id?
826 Returns: 1 for true and 0 for false
833 foreach ( @
{ $self->{members
} } ) {
834 return 1 if $_->{id
} eq $id;
841 Synopsis: is_member($member), while $member is an member object
843 Description: Is $member already a member?
845 Returns: 1 for true and 0 for false
852 foreach ( @
{ $self->{members
} } ) {
853 return 1 if $member == $_;
858 =head3 id_to_member()
860 Synopsis: $alignment->id_to_member($id);
862 Description: check if a alignment member has the id $id and return the alignment member
864 Returns: an alignment object
871 foreach ( @
{ $self->{members
} } ) {
872 return $_ if $_->{id
} eq $id;
876 =head3 get_member_ids()
878 Synopsis: $alignment->get_member_ids()
880 Description: Returns ids of all members
882 Returns: an array of ids of all non-hidden @members elements
890 push( @member_ids, $_->get_id ) foreach ( @
{ $self->{members
} } );
897 my $number = int( @
{ $self->{members
} } );
901 =head3 get_nonhidden_member_ids()
903 Synopsis: $alignment->get_nonhidden_member_ids()
905 Description: Returns ids of members that are not hidden
907 Returns: an array of ids of all non-hidden @members elements
911 sub get_nonhidden_member_ids
{
915 foreach ( @
{ $self->{members
} } ) {
916 unless ( $_->is_hidden ) {
917 my $id = $_->get_id();
924 sub get_nonhidden_member_nr
{
927 foreach ( @
{ $self->{members
} } ) {
928 $number++ unless $_->is_hidden;
933 =head3 get_hidden_member_ids()
935 Synopsis: $alignment->get_hidden_member_ids()
937 Description: Returns ids of members that are hidden
939 Returns: an array of ids of all hidden @members elements
943 sub get_hidden_member_ids
{
946 foreach ( @
{ $self->{members
} } ) {
947 push( @members, $_->get_id() ) if ( $_->is_hidden() );
952 sub get_hidden_member_nr
{
955 foreach ( @
{ $self->{members
} } ) {
956 $number++ if $_->is_hidden;
961 sub get_visible_member_nr
{
963 my $hidden_number = 0;
964 foreach ( @
{ $self->{members
} } ) {
965 $hidden_number++ if $_->is_hidden;
967 return $self->get_member_nr - $hidden_number;
970 =head3 get_member_species()
972 Synopsis: $alignment->get_member_species()
974 Description: Return the species of each member of @members
976 Returns: A hash reference whose keys are ids and values are species
980 sub get_member_species
{
983 my %member_species = ();
984 foreach ( @
{ $self->{members
} } ) {
986 my $species = $_->get_species();
987 $member_species{$id} = $species;
989 return \
%member_species;
992 sub get_member_urls
{
996 foreach ( @
{ $self->{members
} } ) {
998 my $url = $_->get_url();
999 $member_url{$id} = $url;
1002 return \
%member_url;
1005 =head2 Image processing subs of the package
1007 render(), render_png(), render_jpg(), render_png_file(), render_jpg_file(), get_image_map()
1013 my ($option) = $self->{display_type
} =~ /^(\w)/;
1014 $option = lc($option);
1017 $self->_add_ruler() unless $self->{ruler
};
1018 $self->_add_cvg_chart( "Coverage %", $self->get_ngap_pct )
1019 if ( $option =~ /^(c|s)/i && !$self->{chart
} );
1021 #$self->_add_conserved_seq_obj() if ($option =~ /^(c|s)/i && !$self->{conserved_seq});
1023 my $label_gap = $self->_calculate_label_gap();
1025 #The ImageObjects: Chart, Ruler, Member must all be width-adjusted to account for
1026 #label spacing and alignment image margins. This is an example where ImageObjects
1027 #should be subclassed and rendered properly in the first place, taking label spacing
1028 #into account, but this is much easier for now:
1029 my $width_adjustment =
1030 $label_gap + $self->get_left_margin + $self->get_right_margin;
1031 $width_adjustment = $self->{width_adjustment
}
1032 if exists $self->{width_adjustment
};
1034 my $ruler = $self->{ruler
};
1035 my $ruler_top_margin = 0;
1037 $ruler->layout_top_margin( $self->get_top_margin );
1038 $ruler->layout_left_margin( $self->get_left_margin );
1039 $ruler->layout_width( $self->get_width - $width_adjustment );
1040 $ruler->layout_label_spacer(20);
1041 $ruler->layout_color( 140, 140, 140 );
1042 $ruler->layout_label_color( 120, 120, 120 );
1044 # $ruler->layout_height($self->get_height);
1045 $ruler_top_margin = $ruler->get_top_margin;
1047 my $chart = $self->{chart
};
1048 my $chart_height = 0;
1050 $chart->layout_top_margin( $ruler_top_margin + 20 );
1051 $chart->layout_left_margin( $self->get_left_margin );
1052 $chart->layout_width( $self->get_width - $width_adjustment );
1053 $chart->layout_label_spacer(20);
1054 $chart->layout_color( 90, 90, 150 );
1055 $chart->layout_height(40);
1056 $chart_height = $chart->get_height;
1059 if ( $option eq 'c' || $option eq 'a' || $option eq 's' )
1060 { #alignment shown, default layout if member values not set
1062 ( $option eq 'c' || $option eq 's' )
1063 and $align_v_offset = $ruler_top_margin + $chart_height + 40;
1064 ( $option eq 'a' ) and $align_v_offset = $ruler_top_margin + 20;
1065 foreach my $m ( @
{ $self->{members
} } ) {
1066 next if ( $m->is_hidden() );
1067 $m->layout_top_margin($align_v_offset);
1068 $m->set_left_margin( $self->get_left_margin() );
1069 $m->layout_width( $self->get_width() - $width_adjustment );
1070 $m->layout_label_spacer(20);
1071 $m->layout_height(15);
1072 $m->layout_color( 20, 20, 160 );
1073 $m->layout_label_color( 90, 90, 90 );
1074 $align_v_offset += $m->get_height;
1076 my $members_height = $align_v_offset;
1077 $self->set_height( $members_height + 20 );
1080 $self->set_height( $ruler_top_margin + $chart_height + 40 );
1083 $ruler->layout_height( $self->get_height - 20 ) if $ruler;
1085 my $legend = $self->{legend
};
1087 $legend->{width
} = $self->{width
} - $self->{left_margin
} - $label_gap;
1089 $self->{height
} = ( $self->{height
} + $legend->{height
} + 10 );
1090 $legend->{top_margin
} = ( $self->{height
} - $legend->{height
} - 20 );
1091 $self->{height
} -= 5;
1092 $legend->{left_margin
} += 5;
1099 Synopsis: $alignment->render($o) where $o represnts option, 'c' for complete, 'a' for alignment only and 's' for simple (only the ruler, coverage chart and conserved sequence, no individual members of @members)
1101 Description: it does the following
1102 1. Generage, set and render a ruler
1103 2. Generate, set and render a chart representing coverge
1104 3. Generate, set and render a member object representing conserved sequence
1105 4. Render all non-hidden members of the @aign_seqs
1117 #Generate a image object for the ImageObject (member, ruler and chart) to render
1118 unless ( $self->{image
} ) {
1120 GD
::Image
->new( $self->get_width(), $self->get_height() )
1121 or die "Can't generate image\n";
1124 # the first color located is the background color, white
1125 $self->{white
} = $self->{image
}->colorResolve( 255, 255, 255 );
1127 $self->{image
}->filledRectangle( 0, 0, $self->{width
}, $self->{height
},
1130 $self->{chart
}->render( $self->{image
} ) if ( $self->{chart
} );
1132 $self->{ruler
}->render( $self->{image
}, $self ) if ( $self->{ruler
} );
1133 $self->{conserved_seq
}->render( $self->{image
} )
1134 if ( $self->{conserved_seq
} );
1136 my ($option) = $self->{display_type
} =~ /^(\w)/;
1137 $option = lc($option);
1139 if ( $option eq 'a' || $option eq 'c' ) {
1140 foreach ( @
{ $self->{members
} } ) {
1141 $_->render( $self->{image
} ) unless ( $_->is_hidden );
1146 #Send one member to legend renderer as sample for base_showing mode,
1147 #where colors must be lightened. Easiest if member has already been
1148 #rendered: otherwise, scaling factor and text width must be calculated
1150 $self->{legend
}->render( $self->{image
}, $self, $m )
1151 if ( $self->{legend
} && ( $option eq 'c' || $option eq 's' ) );
1155 =head3 render_png(), render_jpg()
1157 Synopsis: $alignment->render_jpg(), $alignment->render_png
1159 Description: Render itself and convert print out png or jpg
1165 sub render_separate_png_files
{
1167 my $background_filepath = shift;
1169 $self->{display_type
} = "separate";
1172 open( WF
, ">", $background_filepath );
1173 print WF
$self->{image
}->png();
1176 my $mfp = $background_filepath;
1177 $mfp =~ s/\.[^\.]+$//;
1179 my @member_imgs = ();
1180 foreach ( @
{ $self->{members
} } ) {
1181 $_->set_left_margin( $self->get_left_margin() );
1182 $_->set_top_margin( $_->get_height() / 2 );
1184 my $id = $_->get_id();
1187 my $img_path = $mfp . "." . md5_hex
($id) . ".png";
1188 push( @member_imgs, $img_path );
1189 open( WF
, ">$img_path" );
1191 $_->get_width() + $self->get_label_gap() + $self->get_right_margin();
1192 +$self->get_left_margin();
1194 my $h = $_->get_height();
1196 my $image = GD
::Image
->new( $w, $h )
1197 or die "Can't generate image\n";
1198 my $white = $image->colorAllocate( 255, 255, 255 );
1199 $image->filledRectangle( 0, 0, $w, $h, $white );
1200 $image->transparent($white);
1203 print WF
$image->png();
1206 return @member_imgs;
1212 print $self->{image
}->png();
1218 print $self->{image
}->jpeg();
1221 =head3 render_png_file(), render_jpg_file()
1223 SYnopsis: $alignment->render_png_file($file_name, $option), $alignment->render_jpg_file($file_name, $option)
1225 Description: take a filename as arguments, render itself and output pgn or jpg image to the file.
1231 sub render_png_file
{
1233 my $filename = shift;
1235 open( F
, ">$filename" )
1237 "Can't open $filename for writing!!! Check write permission in dest directory.";
1238 print F
$self->{image
}->png;
1242 sub render_jpg_file
{
1244 my $filename = shift;
1246 open( F
, ">$filename" )
1248 "Can't open $filename for writing!!! Check write permission in dest directory.";
1249 print F
$self->{image
}->jpeg;
1253 =head3 write_image_map()
1255 Synopsis: $alignment->write_image_map()
1257 Description: get the image map string of each non-hidden @members, concat them and return as a single string
1265 return $self->{param
};
1270 my $hashref = shift;
1271 die "Not hashref" unless ( ref $hashref eq "HASH" );
1272 $self->{param
} = $hashref;
1277 my ( $show_sv, $show_breakdown ) = @_;
1280 #XHTML 1.0+ requires id; name is for backward compatibility -- Evan, 1/8/07
1281 $map_content = "<map name='align_image_map' id='align_image_map'>\n";
1282 foreach ( @
{ $self->{members
} } ) {
1283 unless ( ref && $_->isa( __PACKAGE__
. '::Member' ) ) {
1284 require Data
::Dumper
;
1285 warn "members list is:\n"
1286 . Data
::Dumper
::Dumper
( $self->{members
} );
1287 confess
"invalid member '$_'";
1289 next if ( $_->is_hidden );
1290 $_->set_url( identifier_url
( $_->get_id ) );
1291 my $string = $_->get_imagemap_string();
1292 $map_content .= $string . "\n";
1294 if ( $self->{ruler
} && $self->get_fasta_temp_file ) {
1295 my ( $sv_shared, $sv_similarity, $sv_indel ) = $self->get_sv_criteria;
1296 my $hide_seq_string = join( "%20", @
{ $self->get_hidden_member_ids } );
1298 # my $extra_url_vars = hash2param($self->get_param);
1300 $self->{ruler
}->get_imagemap_string( $self, $self->get_param );
1302 $map_content .= $self->{legend
}->get_imagemap_string() if $self->{legend
};
1303 $map_content .= "</map>\n";
1304 return $map_content;
1307 =head1 Subroutines to analyze sequences of @members and output result
1309 get_member_similarity(), get_sv_candidates(), get_allele_candidates(),
1310 get_overlap_score(), get_all_overlap_score(), get_all_medium(),
1311 get_all_range(), get_seqs(), get_nopad_seqs(), get_overlap_seqs(),
1312 get_overlap_nums(), get_ngap_pct(), get_all_ngap_length(),
1313 get_conserved_seq_obj()
1317 =head3 get_member_similarity()
1319 Sysopsis: $alignment->get_member_similarity($al_sq) where $al_sq is an object of of algn_seq and member of @members
1321 Description: To output pair-wise similarities (overlap base, percentage indentity)of the member which is specified as argument between other members of @members.
1323 Returns: two hash references, one for overlap bases and the other for percentage indentity. The key of both hashes are the ids of other non hidden members of @members
1327 sub get_member_similarity
{
1333 foreach ( @
{ $self->{members
} } ) {
1334 next if $_ == $al_sq;
1335 my ( $overlapping, $percent_ident ) = $al_sq->calculate_similarity($_);
1336 my $other_id = $_->get_id();
1337 $member_ol{$other_id} = $overlapping;
1338 $member_pi{$other_id} = $percent_ident;
1340 return \
%member_ol, \
%member_pi;
1343 =head3 get_sv_candidates()
1345 Synopsis: $alignment->get_sv_candidates()
1346 Description: make pairwise comparison between members of @members of the
1347 same species. If the pair have enough overlap, and the percentage
1348 indentity is high enough, and they have enough insertion-deletion (
1349 specified as parameter), they are considered as putative splice variant pair
1351 Returns: 3 hash references
1352 1. for overlap bases, a 2-D hash, the two keys are the ids of putative pslice variant pair.
1353 2. for indentity percentage, also 2-D
1354 3. for species, the key is the species of the putative splice variant pair.
1358 sub get_sv_candidates
{
1361 my ( $indel, $overlap );
1362 ( $self->{type
} eq 'pep' )
1363 ?
( $indel = '-' x
$self->{indel_limit
} )
1364 : ( $indel = '---' x
$self->{indel_limit
} );
1365 ( $self->{type
} eq 'pep' )
1366 ?
( $overlap = $self->{sv_overlap
} )
1367 : ( $overlap = $self->{sv_overlap
} * 3 );
1369 my %sv_candidate_ob = ();
1370 my %sv_candidate_pi = ();
1371 my %sv_candidate_sp = ();
1373 foreach ( my $i = 0 ; $i < @
{ $self->{members
} } ; $i++ ) {
1374 foreach ( my $j = $i + 1 ; $j < @
{ $self->{members
} } ; $j++ ) {
1375 ( ( $self->{members
}[$i]->get_species() ) ne
1376 ( $self->{members
}[$j]->get_species() ) )
1378 my ( $ol_seq1, $ol_seq2 ) =
1379 $self->{members
}[$i]->get_clean_member( $self->{members
}[$j] );
1381 ( !( ( $ol_seq1 =~ /$indel/ ) || ( $ol_seq2 =~ /$indel/ ) ) )
1384 my ( $self_id, $other_id ) =
1385 ( $self->{members
}[$i]->get_id(),
1386 $self->{members
}[$j]->get_id() );
1388 $self->{members
}[$i]
1389 ->calculate_similarity( $self->{members
}[$j] );
1390 if ( ( $ob >= $overlap ) && ( $pi >= $self->{sv_identity
} ) ) {
1391 $sv_candidate_ob{$self_id}{$other_id} = $ob;
1392 $pi = sprintf( "%.2f", $pi )
1393 ; #truncate the number to two digits after the decimal point
1394 $sv_candidate_pi{$self_id}{$other_id} = $pi;
1395 $sv_candidate_sp{$self_id} = $self->{members
}[$i]->get_species;
1399 return \
%sv_candidate_ob, \
%sv_candidate_pi, \
%sv_candidate_sp;
1402 =head3 get_allele_candidates()
1404 Synopsis: $alignment->get_allele_candidates()
1406 Description: make pairwise comparison between members of @members of the
1407 same species. If the pair have enough overlap, and the percentage
1408 indentity is high enough, and they only have short insertion-deletion
1409 (specified as parameter), they are considered as putative allele pair
1411 Returns: 3 hash references
1412 1. for overlap bases, a 2-D hash, the two keys are the ids of putative alllele pair.
1413 2. for indentity percentage, also 2-D
1414 3. for species, the key is the species of the putative allele pair.
1418 sub get_allele_candidates
{
1421 my ( $indel, $overlap );
1422 ( $self->{type
} eq 'pep' )
1423 ?
( $indel = '-' x
$self->{indel_limit
} )
1424 : ( $indel = '---' x
$self->{indel_limit
} );
1425 ( $self->{type
} eq 'pep' )
1426 ?
( $overlap = $self->{sv_overlap
} )
1427 : ( $overlap = $self->{sv_overlap
} * 3 );
1429 my %al_candidate_ob = ();
1430 my %al_candidate_pi = ();
1431 my %al_candidate_sp = ();
1432 foreach ( my $i = 0 ; $i < @
{ $self->{members
} } ; $i++ ) {
1433 foreach ( my $j = $i + 1 ; $j < @
{ $self->{members
} } ; $j++ ) {
1434 ( ( $self->{members
}[$i]->get_species() ) ne
1435 ( $self->{members
}[$j]->get_species() ) )
1437 my ( $ol_seq1, $ol_seq2 ) =
1438 $self->{members
}[$i]->get_clean_member( $self->{members
}[$j] );
1440 ( ( $ol_seq1 =~ /$indel/ ) || ( $ol_seq2 =~ /$indel/ ) )
1441 and next; #skip if the aequence pair have long indel
1443 my ( $self_id, $other_id ) =
1444 ( $self->{members
}[$i]->get_id(),
1445 $self->{members
}[$j]->get_id() );
1447 $self->{members
}[$i]
1448 ->calculate_similarity( $self->{members
}[$j] );
1449 if ( $ob >= $overlap && ( $pi >= $self->{sv_identity
} ) ) {
1450 $al_candidate_ob{$self_id}{$other_id} = $ob;
1451 $pi = sprintf( "%.2f", $pi )
1452 ; #truncate the number to two digits after the decimal point
1453 $al_candidate_pi{$self_id}{$other_id} = $pi;
1454 $al_candidate_sp{$self_id} = $self->{members
}[$i]->get_species;
1458 return \
%al_candidate_ob, \
%al_candidate_pi, \
%al_candidate_sp;
1461 =head3 get_overlap_score();
1463 Synopsis: $alignment->get_overlap_score($alignment) where $member is an object of align
1465 Description: Calculate the overlap score of a member of @ members, which is specified as parameter. At a particular position of the target sequence(s1) has an overlap (not a gap) in another sequence (s2), s1 gets 1 point for alignment score. The total alignment score of s1 is the sum of all its non-gap positions.
1471 sub get_overlap_score
{
1475 return unless $self->is_member($al_sq);
1476 return if $al_sq->is_hidden;
1478 foreach ( my $i = $self->{start_value
} - 1 ;
1479 $i < $self->{end_value
} - 1 ; $i++ )
1481 my $base = substr( $al_sq->get_seq(), $i, 1 );
1482 $base eq '-' and next; # if a gap, next
1484 # char in al_sq is not a gap
1485 foreach ( @
{ $self->{members
} } ) {
1486 $_ == $al_sq and next; # do all sequences except skip $al_sq
1487 next if $_->is_hidden;
1488 my $other_base = substr( $_->get_seq(), $i, 1 );
1489 if ( $other_base ne '-' ) {
1497 =head3 get_all_overlap_score()
1499 Synopsis: $alignment->get_all_overlap_score()
1501 Description: score of all the non-hiden members in @members
1503 Returns: A hash reference whose key is the id of a @members elements and the value is the overlap score
1507 sub get_all_overlap_score
{
1510 my %member_score = ();
1511 foreach ( @
{ $self->{members
} } ) {
1512 next if $_->is_hidden;
1513 my $score = $self->get_overlap_score($_);
1514 my $id = $_->get_id();
1515 $member_score{$id} = $score;
1517 return \
%member_score;
1520 =head3 get_all_medium
1522 Synopsis: $alignment->get_all_medium()
1524 Description: Returns the medium position of each alignment sequence, in hash form
1526 Returns: A hash reference whose key is the id of a @members elements and the value is the medium position
1530 sub get_all_medium
{
1533 my %member_medium = ();
1534 foreach ( @
{ $self->{members
} } ) {
1535 unless ( $_->is_hidden ) {
1536 my $medium = $_->get_medium();
1537 my $id = $_->get_id();
1538 $member_medium{$id} = $medium;
1541 return \
%member_medium;
1544 =head3 get_all_range
1546 Synopsis: $alignment->get_all_range()
1548 Description: Output the start and end of characters of each member sequence
1550 Returns: Two hash references whose keys are the id of a @members element
1551 and whose values are the start and end positions, respectively
1552 ($start_pos, $end_pos) = ($start_ref->{$id}, $end_ref->{$id})
1559 my %member_head = ();
1560 my %member_end = ();
1561 foreach ( @
{ $self->{members
} } ) {
1562 unless ( $_->is_hidden ) {
1563 my ( $head, $end ) = $_->get_range();
1564 my $id = $_->get_id();
1565 $member_head{$id} = $head;
1566 $member_end{$id} = $end;
1569 return \
%member_head, \
%member_end;
1574 Synopsis: $alignment->get_seqs()
1576 Description: Output the alignment sequences (padded with gaps) of @members which are are not hidden, in the range specified by start_value and end_value
1578 Returns: a hash reference whose key is the id of each member and the value is the alignment sequence
1584 ( ! @
{ $self->{members
} } ) and return;
1586 my %member_seqs = ();
1587 foreach ( @
{ $self->{members
} } ) {
1588 unless ( $_->is_hidden ) {
1589 my $id = $_->get_id();
1590 my $seq = $_->get_select_seq();
1591 $member_seqs{$id} = $seq;
1594 return \
%member_seqs;
1597 =head3 get_nopad_seqs()
1599 Synopsis: $alignment->get_nopad_seqs()
1600 Description: Output the 'original' sequences (with gaps removed) of @members
1601 which are are not hidden, in the range specified by start_value and end_value
1602 Returns: a hash reference whose key is the id of each members and the value is the sequence
1606 sub get_nopad_seqs
{
1608 ( ! @
{ $self->{members
} } ) and return;
1610 my %member_seqs = ();
1611 foreach ( @
{ $self->{members
} } ) {
1612 unless ( $_->is_hidden ) {
1613 my $id = $_->get_id();
1614 my $seq = $_->get_select_seq();
1616 $member_seqs{$id} = $seq;
1619 return \
%member_seqs;
1622 =head3 get_overlap_seqs()
1624 Synopsis: $alignment-> get_overlap_seqs($max_gaps)
1625 Arguments: $max_gaps specifies a maximum number of gaps allowed in kept columns. Default is zero.
1626 Description: for each non-hidden @members, get the sequences that overlap with all
1627 the other non-hidden @members, in the range from start_value to end_value
1628 Returns: a hash reference whose key is the id of each member and the value is the overlap sequence
1632 sub get_overlap_seqs
{
1634 my $max_gaps = shift;
1636 return unless ( @
{ $self->{members
} } );
1637 my $cols_removed = 0;
1641 my $i = $self->{start_value
} - 1 ;
1642 $i <= $self->{end_value
} - 1 ;
1646 my %single_base = ();
1647 my $gaps_in_col = 0;
1648 foreach ( @
{ $self->{members
} } ) {
1649 next if $_->is_hidden; # skip hidden ones
1650 my $id = $_->get_id();
1651 $overlap_seqs{$id} = ""
1652 unless ( $overlap_seqs{$id} )
1653 ; # make sure there is a key/val pair for each id (with val empty string).
1654 my $base = substr( $_->get_seq(), $i, 1 );
1655 $single_base{ $_->get_id() } = $base;
1656 if ( $base eq '-' ) {
1658 last if ( $gaps_in_col > $max_gaps );
1662 # $overlap_seqs{$_} = "" unless (defined $overlap_seqs{$_});
1663 if ( $gaps_in_col <= $max_gaps ) {
1665 foreach ( keys %single_base ) {
1667 # $overlap_seqs{$_} = "" unless ($overlap_seqs{$_});
1668 $overlap_seqs{$_} .= $single_base{$_};
1674 # $overlap_seqs{$_} = "" unless (defined $overlap_seqs{$_});
1678 # print "in get_overlap_seqs. cols kept, removed: ", $cols_kept, " ", $cols_removed, "\n";
1679 return \
%overlap_seqs;
1682 =head3 get_overlap_num()
1684 Synopsis: $alignment->get_overlap_num()
1686 Description: count the number of bases that overlap between all the non-
1687 hidden members, in the range from start_value to end_value
1693 sub get_overlap_num
{
1695 if (0) { # old way. works
1696 my $overlap_count = 0;
1697 return unless @
{ $self->{members
} };
1699 #Get the first non-hidden member in @members as a basis for comparison
1701 foreach ( @
{ $self->{members
} } ) {
1702 next if $_->is_hidden;
1708 my $i = $self->{start_value
} - 1 ;
1709 $i <= $self->{end_value
} - 1 ;
1713 my $base = substr( $select->get_seq(), $i, 1 );
1714 next if $base eq '-';
1716 foreach ( @
{ $self->{members
} } ) {
1717 next if $_ == $select; # skip $select
1718 next if $_->is_hidden;
1719 my $other_base = substr( $_->get_seq(), $i, 1 );
1720 if ( $other_base eq '-' ) {
1725 $overlap_count++ if ( $pause == 0 );
1727 return $overlap_count;
1730 { # new way. also works but uses get_overlap_cols, which is more general as can allow >0 gaps in a col.
1731 my $overlap_cols = $self->get_overlap_cols();
1732 return $overlap_cols;
1736 =head3 get_overlap_cols()
1738 Synopsis: $alignment->get_overlap_cols($max_gaps)
1740 Description: Like get_overlap_num, but can allow some number ($max_gaps) of gaps in a column (counting non-hidden members only).,
1741 instead of requiring this be strictly zero. ($max_gaps is 0 by default).
1743 Returns: The number of columns with no more than $max_gaps gaps in them.
1746 sub get_overlap_cols
{
1748 my $max_gaps = shift
1749 ; # this is the max number of gaps allowed in a column which is kept in the overlap
1750 $max_gaps = 0 unless ( defined $max_gaps );
1751 return unless @
{ $self->{members
} }; # return if no sequences
1753 my $ncols = $self->{end_value
} - $self->{start_value
} + 1;
1756 my $i = $self->{start_value
} - 1 ;
1757 $i <= $self->{end_value
} - 1 ;
1762 foreach ( @
{ $self->{members
} } ) {
1763 next if $_->is_hidden;
1764 my $base = substr( $_->get_seq(), $i, 1 );
1765 if ( $base eq '-' ) {
1767 if ( $col_gaps > $max_gaps ) {
1774 return $ncols - $gappy_cols;
1777 =head3 get_overlap_cols_nongapchars()
1779 Synopsis: $alignment->get_overlap_cols_nongapchars($max_gaps)
1781 Returns: A list of (The number of columns with no more than $max_gaps gaps in them, the number of non-gap characters in those columns)
1784 sub get_overlap_cols_nongapchars{ # gets the number of columns with <= max_gaps, and also the number of nongap chars in those columns.
1786 my $max_gaps = shift; # this is the max number of gaps allowed in a column which is kept in the overlap
1787 $max_gaps = 0 unless(defined $max_gaps);
1788 my $overlap_cols = 0;
1789 my $overlap_nongap_chars = 0;
1790 return unless @{$self->{members}}; # return if no sequences
1792 foreach (my $i = $self->{start_value} - 1; $i <= $self->{end_value} - 1; $i++) {
1794 my $n_seq_nonhidden = 0;
1795 foreach (@{$self->{members}}) {
1796 next if $_->is_hidden;
1798 my $base = substr($_->get_seq(), $i, 1);
1803 if ($col_gaps <= $max_gaps) {
1805 $overlap_nongap_chars += $n_seq_nonhidden - $col_gaps;
1808 return ($overlap_cols, $overlap_nongap_chars);
1811 =head3 get_ngap_pct()
1813 Synopsis: $alignment->get_ngap_pct()
1815 Description: go from start_value to end_value, get the percentage
1816 coverage by @members. A position is covered by a member when it has a
1817 non gap at the position.
1819 Returns: a hash reference whose key is the position and values are the
1826 my %value_hash = ();
1827 my %conservation_hash = ();
1830 ( !@
{ $self->{members
} } ) and return;
1832 my $total_nhidden_member = 0;
1833 foreach ( @
{ $self->{members
} } ) {
1834 next if $_->is_hidden;
1835 $total_nhidden_member++;
1838 foreach ( my $i = $self->{start_value
} - 1 ;
1839 $i < $self->{end_value
} ; $i++ )
1842 foreach ( @
{ $self->{members
} } ) {
1843 next if $_->is_hidden;
1844 my $seq = $_->get_seq();
1845 my $base = substr( $seq, $i, 1 );
1846 ( $base ne '-' ) and $ngap_count++;
1848 my $pct = $ngap_count / $total_nhidden_member * 100;
1849 $value_hash{$i} = sprintf( "%.2f", $pct );
1853 my %type_count = ();
1855 foreach ( @
{ $self->{members
} } ) {
1856 next if $_->is_hidden();
1857 my $base = substr( $_->get_seq(), $i, 1 );
1859 unless ( $base eq '-' ) {
1861 my $type = $self->_get_aa_type($base);
1862 $type_count{$type}++;
1867 while ( my ( $base, $count ) = each %na_count ) {
1868 $max_ind = $count if ( $count > $max_ind );
1871 while ( my ( $type, $count ) = each %type_count ) {
1872 $max_type = $count if ( $count > $max_type );
1874 if ( $max_ind > 1 ) {
1875 $conservation_hash{$i} =
1876 sprintf( "%.2f", ( $max_ind / $base_count ) * 100 );
1878 if ( $max_type > 1 && $self->{type
} eq 'pep' ) {
1880 sprintf( "%.2f", ( $max_type / $base_count ) * 100 );
1884 $conservation_hash{$i} = 0;
1888 return \
%value_hash, \
%conservation_hash, \
%type_hash;
1891 =head3 get_all_nogap_length()
1893 Synopsis: $alignment->get_all_nogap_length()
1895 Description: Go from start_value to end_value, get the sequence length
1896 without gap of @members...
1898 Returns: A hash reference whose key is the id and value is the length
1902 sub get_all_nogap_length
{
1904 my %member_ng_len = ();
1905 foreach ( @
{ $self->{members
} } ) {
1906 next if $_->is_hidden();
1907 $member_ng_len{ $_->get_id() } = $_->get_nogap_length();
1909 return \
%member_ng_len;
1912 =head3 get_conserved_seq()
1914 Synopsis: $alignment->get_conserved_seq()
1916 Description: go through each postion from start_value to end_value of
1917 non-hidden member of @members. If all members have the same seq at the
1918 position, get the seq, otherwise put a gap (-) in the position.
1920 Returns: s string of sequence
1924 sub get_conserved_seq
{
1928 return unless ( @
{ $self->{members
} } );
1930 my $total_nhidden_member = 0;
1931 foreach ( @
{ $self->{members
} } ) {
1932 next if $_->is_hidden;
1933 $total_nhidden_member++;
1935 return if ( $total_nhidden_member == 0 );
1937 foreach ( my $i = $self->{start_value
} - 1 ;
1938 $i < $self->{end_value
} ; $i++ )
1942 my $conserved_base = '-';
1943 foreach ( @
{ $self->{members
} } ) {
1944 next if $_->is_hidden();
1945 my $base = substr( $_->get_seq(), $i, 1 );
1946 unless ( $base eq '-' ) {
1951 ($conserved_base) = ( keys %na_count )
1952 if ( ( int( keys %na_count ) == 1 ) && ( $base_count > 1 ) );
1953 $seq .= $conserved_base;
1956 #fill in the positions before the start value with gap characters
1957 $seq = '-' x
( $self->{start_value
} - 1 ) . $seq;
1962 my ( $self, $aa ) = @_;
1964 return unless ( length($aa) == 1 );
1965 return "acidic" if ( $aa =~ /D|E/ );
1966 return "basic" if ( $aa =~ /R|H|K/ );
1967 return "polar" if ( $aa =~ /N|C|Q|S|T/ );
1968 return "nonpolar" if ( $aa =~ /A|G|I|L|M|F|P|W|Y|V/ );
1971 sub highlight_domains
{
1974 #Retrieved from storable hash of gene objects, saving resources when possible:
1979 my ( $start_value, $end_value ) =
1980 ( $self->{start_value
}, $self->{end_value
} );
1981 return unless $self->{type
} eq "pep";
1982 my %domains; #general domain information
1984 my $sigpos_found = 0; #flag for displaying legend item
1986 my %member_domains; # member_id => array_ref of DB-derived domains
1988 foreach my $m ( @
{ $self->{members
} } ) {
1989 my $id = $m->get_id();
1990 my $gene = $genes->{$id};
1993 $gene = CXGN
::Tools
::Gene
->new($id);
1994 $gene->fetch("protein_length");
1995 $gene->fetch_sigp();
1998 $self->{page
}->log($@
) if $@
&& ref( $self->{page
} );
1999 $genes->{$id} = $gene;
2003 #Test the input sequences against protein lengths in the database.
2004 #If they're not the same, the user put in a fragment, or incorrect
2005 #data. Thus, we won't display misleading domain information.
2006 foreach my $m ( @
{ $self->{members
} } ) {
2007 my $id = $m->get_id();
2009 my $gene = $genes->{$id};
2011 $db_seq_len = length( $gene->get_sequence('protein') );
2012 my $prot_seq = $m->get_ungapped_seq;
2013 $prot_seq =~ s/X$//; #stop codon may be translated to X
2014 ( length($prot_seq) == $db_seq_len )
2015 ?
( $correct_length{$id} = 1 )
2016 : ( $correct_length{$id} = 0 );
2019 #Iterate through members, collecting domain information that will determine
2020 #the predominant domains for which to show colors. Also, determine signal
2021 #peptide and cleavage point.
2022 foreach my $member ( @
{ $self->{members
} } ) {
2023 next if $member->is_hidden();
2025 my $id = $member->{id
};
2027 next unless $correct_length{$id};
2028 my $gene = $genes->{$id};
2031 #Grab signal peptide
2032 my ( $sigpos, $cleavage );
2033 $sigpos = $gene->isSignalPositive();
2034 $cleavage = $gene->getCleavagePosition();
2036 $member->translate_positions_to_seq( $start_value, $end_value );
2037 unless ( !$sigpos || $lb > ( $cleavage + 1 ) ) {
2038 my @pepcolor = ( 40, 180, 40 );
2039 $member->add_region( "Signal Peptide",
2040 1, $cleavage - 1, \
@pepcolor )
2046 my @domains = $gene->getDomains;
2047 $member_domains{$id} = \
@domains;
2049 foreach (@domains) {
2051 $member->translate_positions_to_seq( $start_value, $end_value );
2052 unless ( ( defined $rb && $_->{dom_start
} > $rb )
2053 || ( $_->{dom_end
} < $lb ) )
2055 $domains{ $_->{dom_desc
} }->{count
}++;
2056 $domains{ $_->{dom_desc
} }->{length_tally
} +=
2057 ( $_->{dom_end
} - $_->{dom_start
} );
2058 $domains{ $_->{dom_desc
} }->{tooltip
} = $_->{dom_full_desc
}
2059 if $_->{dom_full_desc
};
2060 $domains{ $_->{dom_desc
} }->{url
} =
2061 identifier_url
( $_->{dom_interpro_id
} );
2066 . $_->{dom_start
} . "\n"
2069 . "\nRb: $rb\nLb: $lb";
2076 $self->add_legend_item( "Signal Peptide", [ 40, 180, 40 ] )
2079 my @limited_domain_colors = (
2092 my $limited_count = scalar @limited_domain_colors;
2094 my $other_domain_color = [ 140, 140, 140 ]; #everything else
2096 my $color_limit_reached = 0;
2097 foreach my $domain (
2098 sort { $domains{$b}->{length_tally
} <=> $domains{$a}->{length_tally
} }
2101 if (@limited_domain_colors) {
2102 $domain_color{$domain} = shift @limited_domain_colors;
2105 $color_limit_reached = 1;
2109 #Second domain iteration. This time, we know which domains are assigned to
2110 #which colors, so we can set the highlighting.
2111 foreach my $m ( @
{ $self->{members
} } ) {
2112 next if $m->is_hidden();
2114 next unless $correct_length{$id};
2116 my @domains = @
{ $member_domains{$id} };
2117 foreach (@domains) {
2118 my $desc = $_->{dom_desc
};
2119 my $color = $domain_color{$desc};
2120 $color ||= $other_domain_color;
2121 $m->add_region( $desc, $_->{dom_start
}, $_->{dom_end
}, $color );
2125 while ( my ( $desc, $color ) = each %domain_color ) {
2126 $self->add_legend_item(
2128 $domains{$desc}->{url
},
2129 $domains{$desc}->{tooltip
}
2132 if ($color_limit_reached) {
2133 my $overage = ( ( scalar keys %domains ) - $limited_count );
2134 $self->add_legend_item( "Other ($overage)", $other_domain_color );
2136 return $genes; #return hashref to be stored by script, optionally
2142 open( WF
, ">$file" ) or die("Can't open file '$file' for writing");
2143 print WF
$self->to_fasta();
2146 sub print_to_stdout
{
2148 print $self->to_fasta();
2154 foreach my $m ( @
{ $self->{members
} } ) {
2155 $string .= $m->to_fasta . "\n";
2163 my @local_run_output = "";
2164 my $command_line = "";
2165 my $path = $self->get_tmp_dir();
2170 $maxiters = $self->{muscle
}->{maxiters
} if ref $self->{muscle
};
2173 if ( $run eq "local" ) {
2174 my @t = `which muscle`;
2179 "Program 'muscle' not available, alignment will not be performed.";
2182 my $wd = &Cwd
::cwd
();
2184 my $temp_file = File
::Temp
->new(
2185 TEMPLATE
=> 'unaligned-XXXXXXX',
2189 $self->print_to_file( $temp_file->filename );
2190 my $result_file = $temp_file . ".aligned.fasta";
2193 "muscle -in $temp_file -out $result_file -maxiters $maxiters";
2194 print STDERR
"Running: $command_line\n";
2195 @local_run_output = `$command_line `;
2197 my $aligned = __PACKAGE__
->new( { from_file
=> $result_file } );
2199 $self->{members
} = $aligned->{members
};
2200 $self->{seq_length
} = $aligned->{seq_length
};
2202 system "rm $result_file";
2206 if ( $run eq "cluster" ) {
2207 die 'cluster runs no longer supported in this module';