moved old file to Genefamily dir
[sgn.git] / cgi-bin / search / family_build.pl
blob636fd067f393ba744de8c7eb7d0becf1559facca
2 #!/usr/bin/perl -w
3 use strict;
4 use warnings;
5 use CXGN::Page;
6 use CXGN::Page::FormattingHelpers qw/ page_title_html
7 blue_section_html /;
8 use GD::Graph::bars;
9 use GD::Text;
10 use GD;
11 use File::Temp;
12 use CXGN::DB::Connection;
13 use CatalystX::GlobalContext '$c';
15 my $page = CXGN::Page->new( "SGN Gene Family Build", "Chenwei Lin");
16 my ($family_build_sum_q, $total_family_q, $total_gene_q, $other_build_q, $organism_member_q, $family_organism_q, $family_size_q);
17 my ($family_build_id, $family_build_nr) = $page->get_arguments("family_build_id", "family_build_nr");
20 my $dbh = CXGN::DB::Connection->new("public");
22 if($family_build_nr && !$family_build_id) {
23 my $fam_id_q = $dbh->prepare("SELECT family_build_id FROM family_build WHERE build_nr=?");
24 $fam_id_q->execute($family_build_nr);
25 ($family_build_id) = $fam_id_q->fetchrow_array();
28 empty_search($page) unless $family_build_id;
30 $family_build_sum_q = $dbh->prepare("select family_build.group_id, i_value, build_date, comment, build_nr from family_build left join sgn.groups using (group_id) where family_build_id = ?");
32 $total_family_q = $dbh->prepare("select count(family_id) from family where family_build_id = ?");
33 $total_gene_q = $dbh->prepare("select count(family_member_id) from family left join family_member using (family_id) where family_build_id = ?");
35 $other_build_q = $dbh->prepare("
36 SELECT family_build_id, build_nr, i_value
37 FROM family_build
38 WHERE group_id = ?
39 AND build_nr != ?
40 AND status='C'
41 ");
43 $family_organism_q = $dbh->prepare("select family_member.family_id,organism_group_id, comment from sgn.groups left join family_member on (family_member.organism_group_id = sgn.groups.group_id) left join family using (family_id) left join family_build using (family_build_id) where family_build.family_build_id = ?");
45 $family_size_q = $dbh->prepare("select count(family_member_id) from family_member left join family using (family_id) where family_build_id = ? group by family_member.family_id");
48 my ($sum_content, $member_content, $other_build_content, $dist_content);
49 ###############################################
50 #Family build summary
51 $family_build_sum_q->execute($family_build_id);
52 my ($group_id, $i_value, $build_date, $group_comment, $build_nr, $total_family, $total_gene);
53 my ($i_value_content, $date_content, $group_comment_content, $build_nr_content, $total_family_content, $total_gene_content);
55 if (($group_id, $i_value, $build_date, $group_comment, $build_nr, $total_family) = $family_build_sum_q->fetchrow_array()){
56 my $date_content = substr($build_date, 0, 10);
57 if ($i_value < 2){
58 $i_value_content = $i_value . " (Low stringency in grouping genes together)";
60 elsif ($i_value >= 2 && $i_value < 5){
61 $i_value_content = $i_value . " (Normal stringency in grouping genes together)";
63 elsif ($i_value >= 5){
64 $i_value_content = $i_value . " (High stringency in grouping genes together)";
66 else { $i_value_content = $i_value }
68 $total_family_q->execute($family_build_id);
69 ($total_family) = $total_family_q->fetchrow_array();
70 $total_gene_q->execute($family_build_id);
71 ($total_gene) = $total_gene_q->fetchrow_array();
72 $date_content = "<tr><th>Build Date</th><td>" . $date_content . "</td></tr>";
73 $i_value_content = "<tr><th>i Value</th><td> ". $i_value_content . "</td></tr>";
74 $group_comment_content = "<tr><th>Data Set</th><td>" . $group_comment . "</td></tr>";
75 $build_nr_content = "<tr><th>Build number</th><td>" . $build_nr . "</td></tr>";
76 $total_family_content = "<tr><th>Total Families</th><td>" . $total_family . "</td></tr>";
77 $total_gene_content = "<tr><th>Total Genes/Unigenes</th><td>" . $total_gene . "</td></tr>";
78 $sum_content = $date_content . $i_value_content . $group_comment_content . $build_nr_content . $total_family_content . $total_gene_content;
79 $sum_content .= "<tr><td colspan=\"2\" align=\"center\" bgcolor=\"gray\"><a href=\"/about/family_analysis.pl\" target=\"blank\">For help with gene family analysis, please click here.</a></td></tr>";
81 else {
82 &invalid_search;
85 ###############################################
86 #Member datasets
87 my %organism_gene_count = ();
88 my %organism_family = ();
89 my %family_organism = ();
90 my %organism_uniq_family_count = ();
91 my %organism_comment = ();
92 my %organism_family_count = ();
94 $family_organism_q->execute($family_build_id);
95 while (my ($family_id, $organism_group_id, $organism_group_comment) = $family_organism_q->fetchrow_array()){
96 $organism_comment{$organism_group_id} = $organism_group_comment;
97 if (!defined $organism_gene_count{$organism_group_id}){
98 $organism_gene_count{$organism_group_id} = 1;
100 else {
101 $organism_gene_count{$organism_group_id}++;
103 $organism_family{$organism_group_id}{$family_id} = 1;
104 $family_organism{$family_id}{$organism_group_id} = 1;
107 foreach (keys %organism_family){
108 $organism_family_count{$_} = 0;
109 my $count = int (keys %{$organism_family{$_}});
110 $organism_family_count{$_} += $count;
113 foreach (keys %family_organism){
114 my $count = int (keys %{$family_organism{$_}});
115 my $family_id = $_;
116 if ($count == 1){
117 foreach (keys %{$family_organism{$family_id}}){
118 if (!defined $organism_uniq_family_count{$_}){
119 $organism_uniq_family_count{$_} = 1;
121 else {
122 $organism_uniq_family_count{$_}++;
128 my $member_data_content = "<tr><th>Species</th><th># Genes or Unigenes</th><th># Families</th><th># Unique Families</th></tr>";
129 foreach (sort {$organism_gene_count{$b} <=> $organism_gene_count{$a}} keys %organism_gene_count){
130 $member_data_content .= "<tr><td>$organism_comment{$_}</td><td>$organism_gene_count{$_}</td><td>$organism_family_count{$_}</td><td>$organism_uniq_family_count{$_}</td></tr>";
134 ###############################################
135 #Other builds of the same group
136 my %other_build = ();
137 my %other_build_id = ();
139 $other_build_q->execute($group_id, $build_nr);
140 while (my ($other_build_id, $other_build_nr, $other_i_value) = $other_build_q->fetchrow_array())
143 if ($other_i_value < 2){
144 $other_i_value .= " (Low stringency in grouping genes together)";
146 elsif($other_i_value >= 2 && $other_i_value < 5){
147 $other_i_value .= " (Normal stringency in grouping genes together)";
149 else {
150 $other_i_value .= " (High stringency in grouping genes together)";
152 $other_build{$other_build_nr} = $other_i_value;
153 $other_build_id{$other_build_nr} = $other_build_id;
157 $other_build_content = "<tr><th>Build number</th><th>i Value</th></tr>";
158 foreach (sort keys %other_build){
159 $other_build_content .= "<tr><td><a href=\"family_build.pl?family_build_id=$other_build_id{$_}\">$_</a></td><td>$other_build{$_}</td></tr>";
162 ###############################################
163 #Size distribution
164 my %size_gene_count = ();
165 my %size_family_count = ();
166 $family_size_q->execute($family_build_id);
167 while (my ($count) = $family_size_q->fetchrow_array()){
168 if ($count == 1) {
169 $size_gene_count{"1"}++;
171 elsif ($count == 2){
172 $size_gene_count{"2"} += $count;
174 elsif ($count == 3){
175 $size_gene_count{"3"} += $count;
177 elsif ($count >3 && $count <=20){
178 $size_gene_count{"4-20"} += $count;
180 elsif ($count >20 && $count <=40){
181 $size_gene_count{"21-40"} += $count;
183 elsif ($count >40 && $count <=100){
184 $size_gene_count{"41-100"} += $count;
186 else {
187 $size_gene_count{"100 up"} += $count;
191 ###############################################
192 #Draw bar chart
193 #First generte a random file.
194 my $html_root_path = $c->config->{'basepath'};
195 my $doc_path = $c->tempfiles_subdir('family_images');
196 my $path = $html_root_path . $doc_path;
197 my $tmp = new File::Temp(
198 DIR => $path,
199 SUFFIX => '.png',
200 UNLINK => 0,
203 #Draw the bar chart
204 my $graph = new GD::Graph::bars(700, 400);
206 $graph->set(
207 x_label => 'Family Size',
208 y_label => '%',
209 dclrs => [ qw(lblue) ],
210 bar_spacing => 10,
211 x_labels_vertical => 1,
212 x_label_position => 0.5,
213 text_space => 16,
214 two_axes => 1,
215 bar_width => 5,
219 $graph->set_x_label_font(gdGiantFont);
220 $graph->set_y_label_font(gdGiantFont);
221 $graph->set_x_axis_font(gdMediumBoldFont);
222 $graph->set_y_axis_font(gdMediumBoldFont);
223 $graph->set_legend_font(gdGiantFont);
224 my @x_values = ();
225 my @y_values = ();
227 foreach (sort {$a<=>$b} keys %size_gene_count){
228 push @x_values, $_;
229 push @y_values, $size_gene_count{$_} / $total_gene * 100;
232 my $gd = $graph->plot([\@x_values, \@y_values]);
233 print $tmp $gd->png;
234 $tmp =~ s/$html_root_path//;
235 close $tmp;
237 my $size_content = "<tr><td><center><img src=\"$tmp\" alt=\"\" /></center></td></tr>";
239 ###############################################
240 #Page printout
241 $page->header();
242 print page_title_html("SGN Gene Family Build $family_build_id");
243 print blue_section_html('Summary','<table width="100%" cellpadding="5" cellspacing="0" border="0">' . $sum_content . '</table>');
244 print blue_section_html('Member Data Sets','<table width="100%" cellpadding="5" cellspacing="0" border="1">' . $member_data_content . '</table>');
245 print blue_section_html('Other Builds with Different Inflation Factor','<table width="100%" cellpadding="5" cellspacing="0" border="0">' . $other_build_content . '</table>');
246 print blue_section_html('Unigene Family Size Distribution','<table width="100%" cellpadding="5" cellspacing="0" border="0" align="center">' . $size_content . '</table>');
249 $page->footer();
250 sub empty_search {
251 my ($page, $family_id) = @_;
253 $page->header();
255 print <<EOF;
257 <b>No family id specified</b>
261 $page->footer();
262 exit 0;
265 sub invalid_search {
266 my ($page, $family_build_id) = @_;
268 $page->header();
270 print <<EOF;
272 <b>The specified family identifer ($family_build_id) does not result in a valid search.</b>
276 $page->footer();
277 exit 0;