5 use CXGN
::Page
::FormattingHelpers qw
/ page_title_html
7 use CXGN
::DB
::Connection
;
12 use CatalystX
::GlobalContext
'$c';
14 our $page = CXGN
::Page
->new( "SGN Gene Family Alignment", "Chenwei Lin");
16 our $family_size_limit = 100;
18 my ($family_id) = $page->get_arguments("family_id");
19 if ($family_id eq ""){
23 ##Connect to database and define queries
25 my $dbh = CXGN
::DB
::Connection
->new();
27 my $at_family_member_q = $dbh->prepare("select sequence_name from family_member where family_id = ? and database_name like 'Arabidopsis'");
29 my $sgn_family_member_q = $dbh->prepare("select unigene_id, family_member.cds_id, organism_group_id from family_member left join cds using (cds_id) where family_id = ? and (database_name like'SGN' or database_name like 'CGN')");
31 my $organism_group_q = $dbh->prepare("select comment from groups where group_id = ?");
33 my $sgn_align_seq_q = $dbh->prepare("select alignment_seq from family_member left join cds using (cds_id) where unigene_id = ? and family_id = ?");
35 my $at_align_seq_q = $dbh->prepare("select alignment_seq from family_member where sequence_name like ? and database_name like 'Arabidopsis' and family_id = ?");
37 my $family_tree_log_q = $dbh->prepare("select tree_taxa_number, tree_overlap_length, tree_log_file_location from family where family_id = ? and tree_taxa_number > 3");
39 my $family_tree_nw_q = $dbh->prepare("select tree_nr, newick_unigene from family_tree where family_id = ?");
41 ##Variables for each display sections. There are five sections: align_image, summary, splice variants, sequence_analyisis, sequence_output.
42 my ($sum_content, $align_content, $sv_content, $al_content, $seq_sum_content, $seq_output_content, $gene_tree_content);
46 ####################################################
47 #Get family member information.
51 my @all_sgn_member = ();
52 my %group_comment = ();
54 ##Retrieve Arabidoposis family members
56 $at_family_member_q->execute($family_id);
57 while( my ($member) = $at_family_member_q->fetchrow_array()){
58 my $locus = substr ($member, 0,9);
59 push @at_member, $member;
62 ##Retrieve SGN family members, group them by organisms
64 $sgn_family_member_q->execute($family_id);
65 while( my ($member, $cds_id, $organism_group_id) = $sgn_family_member_q->fetchrow_array()){
66 push @
{$sgn_member{$organism_group_id}}, $member;
67 push @all_sgn_member, $cds_id;
70 foreach (keys %sgn_member){
71 $organism_group_q->execute($_);
72 if (my ($comment) = $organism_group_q->fetchrow_array()){
73 $group_comment{$_} = $comment;
77 ######################################################
78 #Retrieve alignment sequences and store them in a align object $family_align
81 ##First check family size
82 my $total_member_nr = int (@all_sgn_member) + int (@at_member);
83 ($total_member_nr <2 ) and ¬_applicable
($page, $family_id, $total_member_nr);
84 ($total_member_nr >= $family_size_limit) and large_size
($page, $family_id, $total_member_nr);
87 ##Retrieve alignment sequences, SGN and Arabidopsis seperately
89 foreach (keys %sgn_member) {
91 foreach (@
{$sgn_member{$organism}}) {
93 $sgn_align_seq_q->execute($unigene_id, $family_id);
94 if (my ($alignment_seq) = $sgn_align_seq_q->fetchrow_array()){
96 $align_seq{$unigene_id} = $alignment_seq;
101 foreach (@at_member) {
102 $at_align_seq_q->execute($_, $family_id);
103 if (my ($alignment_seq) = $at_align_seq_q->fetchrow_array()){
104 chomp $alignment_seq;
105 $align_seq{$_} = $alignment_seq;
109 #warn Dumper \%align_seq;#for debug purpose
111 #############################
112 #Create a family_align object and save the data in it
114 $family_align = CXGN
::Alignment
::align
->new(align_name
=>$family_id,
120 my ($my_align_seq, $len);
122 #Adding align_seqs members this way, instead of by going through keys of %align_seq, so that the align_seqs members are grouped by their species
123 foreach (keys %sgn_member) {
125 foreach (@
{$sgn_member{$organism}}) {
126 my $organism_name = $group_comment{$organism};
127 if (defined $align_seq{$_}) {
128 $len = length ($align_seq{$_});
129 $my_align_seq = CXGN
::Alignment
::align_seq
->new(horizontal_offset
=>0,
137 species
=>$organism_name
139 $my_align_seq->set_url("unigene.pl?unigene_id=$_");
141 $family_align->add_align_seq($my_align_seq);
146 foreach (@at_member) {
147 if (defined $align_seq{$_}) {
148 $len = length $align_seq{$_};
149 my $my_align_seq = CXGN
::Alignment
::align_seq
->new(horizontal_offset
=>0,
157 species
=>'Arabidopsis'
159 my $locus = substr ($_, 0,9);
160 $my_align_seq->set_url("http://www.arabidopsis.org/servlets/TairObject?type=locus&name=$locus");
162 $family_align -> add_align_seq
($my_align_seq);
167 #########################################################
168 #Retrieve selected range, overlap length, no hidden member and so on
169 #Store the numbers in $sum_content
171 my $range_start = $family_align -> get_start_value
();
172 my $range_end = $family_align -> get_end_value
();
174 my $overlap_len = $family_align -> get_overlap_num
();
176 #this number might be different from $total_member_nr, since some sequences failed when being converted from peptide to nucleotide and were left out
177 my @all_align_members = $family_align -> get_nonhidden_member_ids
();
178 my $total_align_member = int (@all_align_members);
180 #This number might be different from $total_align_member since some mebers might be hidden
181 my @nonhidden_align_members = $family_align->get_nonhidden_member_ids();
182 my $total_show_align_member = int (@nonhidden_align_members);
184 $sum_content = "<tr><td colspan=\"2\"><a target=\"blank\" href=\"/about/fam_align_analysis.pl\">For help with how we made the alignment, please click here.</a></td></tr>";
185 $sum_content .="<tr><th>Sequence Type</th><td>Nucleotide</td></tr>";
186 $sum_content .= "<tr><th>Total Family Members</th><td>$total_member_nr</td></tr>";
187 $sum_content .= "<tr><th>Available Member Sequences</th><td>$total_align_member</td></tr>";
188 $sum_content .= "<tr><th>Show Member Sequences</th><td>$total_show_align_member</td></tr>";
189 $sum_content .= "<tr><th>Range</th><td>$range_start - $range_end bp</td></tr>";
190 $sum_content .= "<tr><th>Overlap</th><td>$overlap_len bp</td></tr>";
192 $sum_content .= "<tr><td colspan=\"2\" align=\"center\"><a target=\"blank\" href=\"/about/align_term.pl\">For an explanation of the terms, plase click here.</a></td></tr>";
194 #############################################################
195 #Draw family_alignment image
197 #Generate temp file handles
198 my $html_root_path = $c->config->{'basepath'};
199 my $doc_path = $c->config->{'tempfiles_subdir'}.'/align_viewer';
200 my $path = $html_root_path . $doc_path;
202 my $tmp_image = new File
::Temp
(
209 $family_align -> render_png_file
($tmp_image, 'c');
211 $tmp_image =~ s/$html_root_path//;
213 $align_content = "<tr><td><center><img src=\"$tmp_image\" usemap=\"#align_image_map\" alt=\"\" /></center></td></tr>";
214 $align_content .= "<tr><td>To view details for a particular family member, click the member image or the identifier.</td></tr>";
217 my $map_string = $family_align->write_image_map();
218 $align_content .= "<tr><td>$map_string</td></tr>";
220 ######################################################
221 #Find putative splice variants
222 my ($ob_ref, $pi_ref, $sp_ref) = $family_align -> get_sv_candidates
();
225 my %species = %$sp_ref;
226 $sv_content = "<tr><th>Species</th><th>Sequence id</th><th>Sequence id</th><th>Overlap Bases</th><th>%Identical</th></tr>";
227 foreach my $first_key (keys %ob) {
228 foreach my $second_key (keys %{$ob{$first_key}}) {
229 $sv_content .= "<tr><td>$species{$first_key}</td><td>$first_key</td><td>$second_key</td><td>$ob{$first_key}{$second_key}</td><td>$pi{$first_key}{$second_key}</td></tr>";
234 ######################################################
235 #Find putative allele pairs
236 my ($al_ob_ref, $al_pi_ref, $al_sp_ref) = $family_align -> get_allele_candidates
();
237 my %al_ob = %$al_ob_ref;
238 my %al_pi = %$al_pi_ref;
239 my %al_species = %$al_sp_ref;
240 $al_content = "<tr><th>Species</th><th>Sequence id</th><th>Sequence id</th><th>Overlap Bases</th><th>%Identical</th></tr>";
241 foreach my $first_key (keys %al_ob) {
242 foreach my $second_key (keys %{$al_ob{$first_key}}) {
243 $al_content .= "<tr><td>$al_species{$first_key}</td><td>$first_key</td><td>$second_key</td><td>$al_ob{$first_key}{$second_key}</td><td>$al_pi{$first_key}{$second_key}</td></tr>";
248 ######################################################
250 my $family_member_ids_ref = $family_align->get_member_ids();
251 my $ov_score_ref = $family_align -> get_all_overlap_score
();
252 my $medium_ref = $family_align -> get_all_medium
();
253 my ($head_ref, $tail_ref) = $family_align -> get_all_range
();
254 my $ng_ref = $family_align -> get_all_nogap_length
();
255 $sp_ref = $family_align->get_member_species();
256 my $url_ref = $family_align->get_member_urls();
258 my @family_member_ids = @
$family_member_ids_ref;
260 my %ov_score = %$ov_score_ref;
261 my %medium = %$medium_ref;
262 my %head = %$head_ref;
263 my %tail = %$tail_ref;
269 $gap{$_} = $tail{$_} - $head{$_} + 1 - $ng{$_};
273 $seq_sum_content = "<tr><th>Sequence id</th><th>Species</th><th>Cover Range</th><th>Bases</th><th>Gaps</th><th>Medium</th><th>Overlap Score</th></tr>";
275 foreach (@family_member_ids) { ##Access the align_seqs members this way, instead of by the keys of the hashes, so that the sequences are grouped together
276 $seq_sum_content .= "<tr><td><a target=\"blank\" href=\"$url{$)}\">$_</a></td><td>$species{$_}</td><td>$head{$_} - $tail{$_}</td><td>$ng{$_}</td><td>$gap{$_}</td><td>$medium{$_}</td><td>$ov_score{$_}</td></tr>";
279 #####################################################
280 #Write sequence output content
282 my $seq_ref = $family_align->get_nopad_seqs();
283 my $align_seq_ref = $family_align -> get_seqs
();
284 my $ol_seq_ref = $family_align -> get_overlap_seqs
();
286 if (defined $seq_ref) {
288 my $tmp_seq_file = new File
::Temp
(
294 my %seqs = %$seq_ref;
296 my $seq_io_obj = Bio
::SeqIO
->new('-fh' => $tmp_seq_file,
299 foreach (@family_member_ids) {
300 my $comp_id = $_ . '-' . $species{$_};
301 $comp_id =~ s/\s/_/g;
302 my $seq_obj = Bio
::Seq
->new('-seq' => $seqs{$_},
305 $seq_io_obj->write_seq($seq_obj);
309 $tmp_seq_file =~ s/$html_root_path//;
311 $seq_output_content = "<tr><td><a target=\"blank\" href=\"$tmp_seq_file\">All Sequences (No gap)</a></td></tr>";
315 if (defined $align_seq_ref) {
317 my $tmp_align_seq_file = new File
::Temp
(
323 my %alignment_seqs = %$align_seq_ref;
324 my $seq_io_obj = Bio
::SeqIO
->new('-fh' => $tmp_align_seq_file,
325 '-format' => 'fasta');
326 foreach (@family_member_ids) {
327 my $comp_id = $_ . '-' . $species{$_};
328 $comp_id =~ s/\s/_/g;
329 my $seq_obj = Bio
::Seq
->new(
330 '-seq' => $alignment_seqs{$_},
333 $seq_io_obj->write_seq($seq_obj);
335 close $tmp_align_seq_file;
337 $tmp_align_seq_file =~ s/$html_root_path//;
339 my $pass_tmp_align_seq_file = $html_root_path . $tmp_align_seq_file;
341 $seq_output_content .= "<tr><td><a target=\"blank\" href=\"$tmp_align_seq_file\">All Alignment Sequences (Padded with gap)</a></td>";
343 my $title = 'Family ' . $family_id;
344 $seq_output_content .= "<td><a target=\"blank\" href=\"/tools/align_viewer/show_align.pl?format='fasta'&temp_file=$pass_tmp_align_seq_file&type=nt&start_value=1&end_value=10000&title=$title\">Analyze and Optimize with Alignment Viewer.</a></td></tr>";
347 if ($overlap_len > 0) {
348 my %ol_seqs = %$ol_seq_ref;
350 my $tmp_ol_seq_file = new File
::Temp
(
356 my $ol_seq_io_obj = Bio
::SeqIO
->new(
357 '-fh' => $tmp_ol_seq_file,
361 foreach (@family_member_ids) {
362 my $comp_id = $_ . '_' . $species{$_};
363 my $seq_obj = Bio
::Seq
->new(
364 '-seq' => $ol_seqs{$_},
367 $ol_seq_io_obj->write_seq($seq_obj);
369 close $tmp_ol_seq_file;
371 $tmp_ol_seq_file =~ s/$html_root_path//;
372 $seq_output_content .= "<tr><td><a target=\"blank\" href=$tmp_ol_seq_file>Overlap Sequences</a></td></tr>";
377 #####################################################
378 #Retrieve tree information
379 $family_tree_log_q->execute($family_id);
380 if (my ($taxa_num, $ol_len, $lf_loc) = $family_tree_log_q->fetchrow_array()) {
381 $gene_tree_content = "<tr><td colspan=\"2\" align=\"center\"><a target=\"blank\" href=\"/about/fam_tree_analysis.pl\">For help with gene tree construction, please click here.</a></td></tr>";
382 $gene_tree_content .= "<tr><th>No. of Taxa</th><td>$taxa_num</td></tr>";
383 $gene_tree_content .= "<tr><th>No. of Characters</th><td>$ol_len</td></tr>";
384 $gene_tree_content .= "<tr><th>Log File</th><td><a href=\"$lf_loc\">View</a></td></tr>";
386 $family_tree_nw_q->execute($family_id);
387 while (my ($tree_nr, $nw) = $family_tree_nw_q->fetchrow_array()) {
388 my $tree_title = 'Family ' . $family_id . ' Tree No. ' . $tree_nr;
389 $gene_tree_content .= "<tr><th>Tree No. $tree_nr</th><td>$nw<br><br><a target=\"blanc\" href=\"/tools/tree_browser/?tree_string=$nw&title=$tree_title\">View in Tree Browser</a><br><br></td></tr>";
393 $gene_tree_content = "<tr><td>No tree available</td></tr>";
397 ######################################################
400 print page_title_html
("Gene Family $family_id Alignment Details");
402 print blue_section_html
('Family Sequence Alignment','<table width="100%" cellpadding="5" cellspacing="0" border="0">'.$align_content.'</table>');
404 print blue_section_html
('Summary','<table width="80%" cellpadding="5" cellspacing="0" border="0">'.$sum_content.'</table>');
406 print blue_section_html
('Putative Splice Variant Pairs','<table width="100%" cellpadding="5" cellspacing="0" border="0">'.$sv_content.'</table>');
408 print blue_section_html
('Putative Allele Pairs','<table width="100%" cellpadding="5" cellspacing="0" border="0">'.$al_content.'</table>');
410 print blue_section_html
('Aligment Analysis','<table width="100%" cellpadding="5" cellspacing="0" border="1">'.$seq_sum_content.'</table>');
412 print blue_section_html
('Output Sequences','<table width="100%" cellpadding="5" cellspacing="0" border="0">'.$seq_output_content.'</table>');
414 print blue_section_html
('Family Gene Tree','<table width="100%" cellpadding="5" cellspacing="0" border="1">'.$gene_tree_content.'</table>');
420 my ($page, $family_id) = @_;
426 <b>No family id specified</b>
435 my ($page, $family_id, $num) = @_;
441 <b>The specified $family_id has only $num valid member so that alignment is not applicable.</b>
450 my ($page, $family_id, $num) = @_;
456 <b>The specified $family_id has $num valid members. Alignment is available for families having up to $family_size_limit members.</b>