Merge pull request #5134 from solgenomics/topic/fix_seedlot_search
[sgn.git] / cgi-bin / search / family.pl
blobfdbcf8917a18849c8894fecad76425dabbe3c23b
1 #!/usr/bin/perl -w
2 use strict;
3 use warnings;
4 use CXGN::Page;
5 use CXGN::Page::FormattingHelpers qw/ page_title_html
6 info_section_html
7 blue_section_html /;
8 use CXGN::DB::Connection;
9 use CXGN::Phylo::Alignment;
10 use CXGN::Tools::Identifiers;
11 use CXGN::Tools::WebImageCache;
12 use CatalystX::GlobalContext '$c';
13 use File::Temp;
15 our $page = CXGN::Page->new( "SGN Gene Family", "Chenwei Lin");
16 our ($sum_q, $at_family_member_q, $sgn_family_member_q, $organism_group_q, $other_family_q, $sgn_align_seq_q, $at_align_seq_q);
19 my $dbh = CXGN::DB::Connection->new("sgn");
21 $sum_q = $dbh->prepare("
22 SELECT
23 family_build.family_build_id,
24 family_build.status,
25 count(family_member_id),
26 build_date,
27 build_nr,
28 i_value,
29 family_annotation,
30 family_build.group_id,
31 comment,
32 family.family_nr
33 FROM sgn.family_member
34 LEFT JOIN sgn.family USING (family_id)
35 LEFT JOIN sgn.family_build USING (family_build_id)
36 LEFT JOIN sgn.groups USING (group_id)
37 WHERE family.family_id = ?
38 GROUP BY family.family_id, family.family_nr, family_build.family_build_id, family_build.status, build_date, build_nr, i_value, family_annotation, group_id, comment");
40 $at_family_member_q = $dbh->prepare("
41 SELECT sequence_name
42 FROM family_member
43 WHERE family_id = ?
44 AND database_name LIKE 'Arabidopsis'");
46 $sgn_family_member_q = $dbh->prepare("
47 SELECT unigene_id, family_member.cds_id, organism_group_id
48 FROM sgn.family_member
49 LEFT JOIN sgn.cds USING (cds_id)
50 WHERE family_id = ?
51 AND ( database_name LIKE 'SGN'
52 OR database_name LIKE 'CGN'
54 ");
56 $organism_group_q = $dbh->prepare("
57 SELECT comment
58 FROM sgn.groups
59 WHERE group_id = ?");
61 $other_family_q = $dbh->prepare("
62 SELECT family_member.family_id, build_nr, i_value, member_count, family_nr
63 FROM sgn.family_member
64 LEFT JOIN sgn.family USING (family_id)
65 LEFT JOIN sgn.family_build USING (family_build_id)
66 WHERE cds_id = ?
67 AND family_member.family_id != ?
68 AND group_id = ?
69 AND status='C'
70 ");
72 my $family_size_align_limit = 200;
74 my ($family_id, $family_nr, $i_value) = $page->get_arguments(qw/ family_id family_nr i_value/ );
75 if ($family_id eq ""){
76 empty_search($page) unless ($family_nr && $i_value);
77 my $family_id_q = $dbh->prepare("
78 SELECT family_id
79 FROM sgn.family
80 LEFT JOIN sgn.family_build USING(family_build_id)
81 WHERE
82 family_nr=?
83 AND family_build.i_value=?");
84 $family_id_q->execute($family_nr, $i_value);
85 ($family_id) = $family_id_q->fetchrow_array();
86 empty_search($page) unless $family_id;
89 my ($sum_content, $member_content, $link_content, $align_content);
92 ##################################
93 #Get information for the summary section
95 my ($annotation_content, $family_nr_content, $date_content, $i_value_content, $bn_content, $group_content, $total_gene_content);
96 my ($family_build_id, $family_build_status, $total_gene, $date, $build_nr, $annotation, $group_id, $group_comment);
97 $sum_q->execute($family_id);
98 if (($family_build_id, $family_build_status, $total_gene, $date, $build_nr, $i_value, $annotation, $group_id, $group_comment, $family_nr) = $sum_q->fetchrow_array()){
99 my $date_display = substr ($date, 0,10);
100 $total_gene_content = "<tr><th>Total Genes</th><td>" . $total_gene . "</td></tr>";
101 $date_content = "<tr><th>Build Date</th><td>" . $date_display . "</td></tr>";
102 $family_nr_content = "<tr><th>Family&nbsp;Number</th><td>" . $family_nr . "</td></tr>";
103 if ($i_value < 2){
104 $i_value_content = $i_value . " (Low stringency in grouping genes together)";
106 elsif ($i_value >=2 && $i_value < 5){
107 $i_value_content = $i_value . " (Normal stringency in grouping genes together)";
109 elsif ($i_value >= 5){
110 $i_value_content = $i_value . " (High stringency in grouping genes together)";
113 $i_value_content = "<tr><th>i Value</th><td> ". $i_value_content . "</td></tr>";
114 $bn_content = "<tr><th>Build</th><td>" . $build_nr . " <a href=\"family_build.pl?family_build_id=$family_build_id\">[Details of the Overall Family Build]</a>" . "</td></tr>";
115 $annotation_content = "<tr><th valign=\"top\">Annotation</th><td>" . $annotation . "</td></tr>";
116 $group_content = "<tr><th>Data Set</th><td>" . $group_comment . "</td></tr>";
118 my $deprecated_content = '';
119 if($family_build_status ne 'C'){
120 $deprecated_content .= "<center><div class=\"deprecated\" style=\"width:550px\">This family is from an out-of-date build. <br />";
121 $deprecated_content .= "<span style=\"color:black;font-size:0.9em\">See below for relations to current builds</span></div></center>";
123 $sum_content = $family_nr_content . $total_gene_content . $date_content .$i_value_content . $bn_content . $annotation_content . $group_content;
124 $sum_content .= "<tr><td colspan=\"2\" align=\"center\"><a href=\"/about/family_analysis.pl\" target=\"blank\">
125 Explanation of family analysis and terms used on this page
126 </a></td></tr>";
127 ####################################################
128 #Get family member information.
129 my $at_member_nr;
130 my @at_member_url = ();
131 my @at_member = ();
132 my %sgn_member = ();
133 my @all_sgn_member = ();
134 my %group_comment = ();
135 my %group_member_nr = ();
136 my $family_member_content;
138 $at_family_member_q->execute($family_id);
139 while( my ($member) = $at_family_member_q->fetchrow_array()){
140 my $locus = substr ($member, 0,9);
141 my $member_url = CXGN::Tools::Identifiers::link_identifier($locus);
142 # my $member_url = "<a href=\"http://www.arabidopsis.org/servlets/TairObject?type=locus&amp;name=$locus\" target=\"blank\">" . $member . "</a>";
143 push @at_member_url, $member_url;
144 push @at_member, $member;
146 $at_member_nr = int (@at_member);
148 $sgn_family_member_q->execute($family_id);
149 while( my ($member, $cds_id, $organism_group_id) = $sgn_family_member_q->fetchrow_array()){
150 push @{$sgn_member{$organism_group_id}}, $member;
151 push @all_sgn_member, $cds_id;
154 foreach (keys %sgn_member){
155 $organism_group_q->execute($_);
156 if (my ($comment) = $organism_group_q->fetchrow_array()){
157 $group_comment{$_} = $comment;
158 $group_member_nr{$_} = int @{$sgn_member{$_}};
162 my %sgn_member_content = ();
163 foreach (sort keys %sgn_member){
164 my $id = $_;
165 foreach (@{$sgn_member{$id}}){
166 $sgn_member_content{$id} .= CXGN::Tools::Identifiers::link_identifier("SGN-U" . $_) . " ";
167 #"<a href=\"unigene.pl?unigene_id=$_\">SGN-U" . $_ . "</a> ";
171 $family_member_content = "<tr><th>Organism</th><th># of Members</th><th>Member Id</th></tr>";
172 $family_member_content .= "<tr><td>Arabidopsis</td>" . "<td>$at_member_nr</td>" . "<td>@at_member_url</td></tr>";
173 foreach (sort keys %sgn_member){
174 $family_member_content .= "<tr><td>$group_comment{$_}</td>" . "<td>$group_member_nr{$_}</td>" . "<td>$sgn_member_content{$_}</td></tr>";
177 ######################################################
178 #Link to other families
179 my %other_family = ();
180 my %other_i = ();
181 my %other_size = ();
182 my %other_num = ();
183 my $other_family_content;
184 my $number;
186 #use the @all_sgn_member and $group from previous sections
187 my %family_regist = ();
188 foreach (@all_sgn_member){
189 $other_family_q->execute($_, $family_id, $group_id);
190 while (my ($other_family_id, $other_build, $other_i_value, $other_size, $other_family_num) = $other_family_q->fetchrow_array()){
191 $other_i{$other_build} = $other_i_value;
192 $other_size{$other_family_id} = $other_size;
193 $other_num{$other_family_id} = $other_family_num;
194 if (!defined $family_regist{$other_family_id}){
195 push @{$other_family{$other_build}}, $other_family_id;
196 $family_regist{$other_family_id} = 1;
200 my %other_family_content_family = ();
201 foreach (sort keys %other_family) {
202 my $id = $_;
203 foreach (@{$other_family{$id}}){
204 my $content = $other_num{$_};
205 $content .= "(" . $other_size{$_} . ")";
206 $other_family_content_family{$id} .= qq|
207 <a href="family.pl?family_id=$_">$content</a>&nbsp;
213 $other_family_content .= "<tr><th>Build id</th><th>i Value</th><th>Family Number (Size)</th></tr>";
215 foreach (sort keys %other_family){
216 $other_family_content .= "<tr><td>$_</td>" . "<td>$other_i{$_}</td>" . "<td>$other_family_content_family{$_}</td></tr>";
220 ######################################################
221 #Retrieve alignment sequences and draw an alignment image
223 ##First check family size
225 my $pep_align_file = "/data/prod/public/family/$i_value/pep_align/$family_nr.pep.aligned.fasta";
226 my $newick_file = "/data/prod/public/family/$i_value/newick/$family_nr.newick";
228 my $total_member_nr = int (@all_sgn_member) + $at_member_nr;
229 if ($total_member_nr == 1 || $family_build_status ne 'C') {
230 $align_content = "<tr><td>Not applicable.</td></tr>";
232 elsif( ! -f $pep_align_file ){
233 my $extra = " due to its large size";
234 $extra = "" unless ($total_member_nr > 100); #actual limit is 200, usually
235 $align_content = "<tr><td>Alignment not available for this family$extra.</td></tr>";
237 elsif ($total_member_nr > $family_size_align_limit) {
238 $align_content = "<tr><td>Family size too large for alignment on this page.";
239 $align_content .= qq|&nbsp;&nbsp;<a href="/tools/align_viewer/show_align.pl?family_id=$family_id">See Alignment in Viewer</a> |;
240 $align_content .= "</td></tr>";
242 else {
244 my $img_height = $total_member_nr * 20;
245 my $img_width = 700;
247 my $cache = CXGN::Tools::WebImageCache->new();
248 $cache->set_key($i_value . '_' . $family_nr);
249 $cache->set_expiration_time(1);
250 $cache->set_map_name("family_alignment_tree");
251 $cache->set_basedir($c->config->{"basepath"});
252 $cache->set_temp_dir("/documents/tempfiles/family");
253 if(!$cache->is_valid()){
254 my $treealign = undef;
255 my $alignment = undef;
257 my $alignment_only = 1;
258 $alignment_only = 0 if (-f $newick_file);
260 unless($alignment_only){
261 use CXGN::Phylo::Tree;
262 $treealign = CXGN::Phylo::Tree->new({
263 from_files => {
264 newick => $newick_file,
265 alignment => $pep_align_file
268 $treealign->get_layout()->set_image_width($img_width);
269 $treealign->get_layout()->set_image_height($img_height);
270 $alignment = $treealign->get_alignment();
272 else {
273 $alignment = CXGN::Phylo::Alignment->new(
274 width=>$img_width,
275 height=>$img_height,
276 type=>'pep',
277 from_file=>$pep_align_file
282 ##Draw family_alignment image and write map file
283 my $show_num = $alignment->get_nonhidden_member_nr();
284 if ($show_num > 1) {
285 my $image_mode = "s";
286 $image_mode = "c" if ($show_num < 60 && !-f $newick_file);
287 $alignment->set_display_type($image_mode);
289 my $tool_link = undef;
290 unless($alignment_only){
291 $cache->set_image_data($treealign->render_png());
292 $tool_link = 'tree_browser/index.pl?&align_type=pep';
294 else {
295 $alignment->render();
296 $cache->set_image_data($alignment->{image}->png());
297 $tool_link = 'align_viewer/show_align.pl?';
300 $align_content = "<tr><td><center><a target=\"new_tab\" href=\"../tools/${tool_link}&family_nr=$family_nr&i_value=$i_value\">";
301 $align_content .= $cache->get_image_html();
302 $align_content .= "</a></center></td></tr>";
303 $align_content .= "<tr><td align=\"center\">Click on the image above to view the detailed alignment</td></tr>";
305 else {
306 $align_content = "No alignment image available.";
309 else {
310 $align_content = "<tr><td><center><a target=\"new_tab\" href=\"../tools/align_viewer/show_align.pl?family_nr=$family_nr&i_value=$i_value\">";
311 $align_content .= $cache->get_image_html();
312 $align_content .= "</a></center></td></tr>";
313 #<img src=\"$tmp_image\" alt=\"\" border=\"0\"/></a></center></td></tr>";
314 $align_content .= "<tr><td align=\"center\">Click on the image above to view the detailed alignment</td></tr>";
319 ######################################################
320 #Page output
321 $page->header();
322 print page_title_html("SGN Gene Family $family_id");
323 print $deprecated_content if $deprecated_content;
324 print info_section_html(title => 'Summary', contents => '<table width="100%" cellpadding="5" cellspacing="0" border="0">'.$sum_content.'</table>');
325 print info_section_html(title => 'Relations to Other Builds', contents => '<table width="100%" cellpadding="5" cellspacing="0" border="0">'.$other_family_content.'</table>');
326 print info_section_html(title => 'Family Sequence Alignment', contents => '<table width="100%" cellpadding="5" cellspacing="0" border="0">'.$align_content.'</table>');
327 print info_section_html(title => 'Family Members', contents => '<table width="100%" cellpadding="5" cellspacing="0" border="1">'.$family_member_content.'</table>', collapsible => 1);
329 $page->footer();
331 sub empty_search {
332 my ($page, $family_id) = @_;
334 $page->header();
336 print <<EOF;
338 <b>No family id specified</b>
342 $page->footer();
343 exit 0;
346 sub invalid_search {
347 my ($page, $family_id) = @_;
349 $page->header();
351 print <<EOF;
353 <b>The specified family identifer ($family_id) does not result in a valid search.</b>
357 $page->footer();
358 exit 0;