Merge branch 'master' into topic/tracking_transformation
[sgn.git] / cgi-bin / maps / physical / overgo_stats.pl
blob2e7a889e42d1dc71db7db32d4800c1d08c78c360
1 #!/usr/bin/perl -w
3 ######################################################################
5 # This program displays statistics on the progress of the overgo
6 # plating project as reflected by the physical database.
8 # As of version 1.2 this program consults both the database and the
9 # summary report files written to the directory
11 # ~/sgn/pwd/support_data/physicalmapping/report
13 # in order to determine the necessary statistics.
14 # However, in some cases it still cheats by just statically setting
15 # values.
17 ######################################################################
19 use strict;
20 use CXGN::Page;
21 use CXGN::DB::Physical;
22 use CXGN::DB::Connection;
24 use CXGN::Page::FormattingHelpers qw/blue_section_html columnar_table_html info_table_html/;
26 my $page = CXGN::Page->new('Overgo Stats', 'Robert and friends');
27 my $map_id = 9; # always, for now.
30 # Definitions;
31 our $plausible_bacs = 1;
32 our $genetic_threshold = "5.0";
33 our $number_chromos = 12;
34 our $preset_date = "20030909";
35 our $sgnblue = '#ccccff';
36 our $arizona_tom_fpc_page = 'http://www.genome.arizona.edu/fpc/tomato/';
37 our $physical_abstract_page = '/cview/map.pl?map_id=1&physical=1';
38 our $physical_overview_page = '/cview/map.pl?map_id=1&physical=1';
39 our $plate_design_page = '/maps/physical/list_overgo_plate_probes.pl?plate_no=';
40 our $list_bacs_by_plate = '/maps/physical/list_bacs_by_plate.pl?by_plate=';
41 our $explanation_page = 'overgo_process_explained.pl';
42 our %months = ('01' => 'January', '02' => 'February', '03' => 'March',
43 '04' => 'April', '05' => 'May', '06' => 'June',
44 '07' => 'July', '08' => 'August', '09' => 'September',
45 '10' => 'October', '11' => 'November', '12' => 'December');
47 # Connect to the database.
48 my $dbh = CXGN::DB::Connection->new('physical');
50 ######################################################################
52 # This is where we should pull all of the above fresh from the DB.
54 ######################################################################
56 # Figure out the last date on which these data were updated.
57 my ($overgo_version, $overgo_date) = CXGN::DB::Physical::get_current_overgo_version_and_updated_on($dbh);
58 if ($overgo_date =~ /^(\d\d\d\d-\d\d-\d\d)/) {
59 $overgo_date = $1;
60 } else {
61 $page->message_page( "error","<I>ERROR: Incorrectly formatted overgo plating date.</I>\n" );
64 my ($fpc_version, $fpc_date) = CXGN::DB::Physical::get_current_fpc_version_and_date($dbh);
66 #warn "got dates '$overgo_date' and '$fpc_date'\n";
67 my @dates = sort ($overgo_date, $fpc_date);
68 my $most_recent_date = pop @dates;
69 $most_recent_date ||= $preset_date;
71 my $last_date;
72 if ($most_recent_date =~ /^(\d\d\d\d)-?(\d\d)-?(\d\d)/) {
73 $last_date = $months{$2} . " " . $3 . ", " . $1;
74 } else {
75 $page->message_page("error", "<I>ERROR: Incorrectly formatted string $most_recent_date.</I>\n" );
78 #warn "hi";
80 # Get the total number of BACs considered.
81 my $number_of_bacs = CXGN::DB::Physical::get_total_number_of_bacs($dbh);
83 # BACs which hit the plate at all (ie. have a match to at least one pool.
84 my $bac_partial_hits_sth = $dbh->prepare("SELECT COUNT(DISTINCT bac_id) FROM overgo_results");
85 $bac_partial_hits_sth->execute;
86 my ($bac_partial_hits) = $bac_partial_hits_sth->fetchrow_array;
87 $bac_partial_hits_sth->finish;
89 # Get all plates numbers.
90 my $plate_numbers_sth = $dbh->prepare("SELECT DISTINCT plate_number FROM overgo_plates");
91 $plate_numbers_sth->execute;
92 my %unprocessed_plates;
93 while (my ($pn) = $plate_numbers_sth->fetchrow_array) {
94 $unprocessed_plates{$pn} = 1;
96 $plate_numbers_sth->finish;
98 # List all plates processed to date.
99 my $proc_plate_sth = $dbh->prepare("SELECT DISTINCT op.plate_number, op.plate_size, op.empty_wells FROM overgo_plates AS op INNER JOIN overgo_results AS ores ON op.plate_id=ores.overgo_plate_id ORDER BY op.plate_number");
100 my %processed_plates;
101 $proc_plate_sth->execute;
102 my ($total_wells, $total_empty, $total_matching_wells, $total_matching_bacs);
103 while (my ($pn, $wells, $empty) = $proc_plate_sth->fetchrow_array) {
104 delete $unprocessed_plates{$pn};
105 my $wells_with_hits = CXGN::DB::Physical::count_wells_with_plausible_hits_on_plate_n($dbh, $pn, $overgo_version, $map_id);
106 my $bac_hits_on_plate = CXGN::DB::Physical::count_all_bacs_which_hit_plate_n($dbh, $pn, $overgo_version, $map_id);
107 $processed_plates{$pn} = [ qq|<a href="$plate_design_page$pn">Plate $pn</a>|,
108 ($wells - $empty),
109 $empty,
110 $wells_with_hits,
111 qq|<a href="$list_bacs_by_plate$pn">$bac_hits_on_plate</a>|,
113 $total_wells += $wells;
114 $total_empty += $empty;
115 $total_matching_wells += $wells_with_hits;
116 $total_matching_bacs += $bac_hits_on_plate;
118 $proc_plate_sth->finish;
120 #warn "hello there";
122 # Prepare a listing of unprocessed plates.
123 my $unprocessed_plates_table =
124 do {
125 my $unproc_width = 5;
126 my $unproc_row = 0;
127 my @unproc_list = (sort {$a <=> $b} keys %unprocessed_plates);
128 my @table;
129 while(my @row = splice @unproc_list,0,$unproc_width) {
130 push @table,\@row;
132 columnar_table_html(data=>\@table);
133 } if %unprocessed_plates;
135 # Count the total number of ambiguous bacs.
137 my $ambig_sth = $dbh->prepare("SELECT COUNT(DISTINCT bac_id) FROM tentative_overgo_associations WHERE overgo_version=?");
138 $ambig_sth->execute($overgo_version);
139 my ($ambiguous_bacs) = $ambig_sth->fetchrow_array;
140 $ambig_sth->finish;
142 #warn "one";
144 # Now work out FPC stats.
145 my $total_contigged_bacs_sth = $dbh->prepare("SELECT COUNT(DISTINCT ba.bac_id) FROM bac_contigs AS bc INNER JOIN bac_associations AS ba ON bc.bac_contig_id=ba.bac_contig_id INNER JOIN ba_plausibility AS bap using(bac_assoc_id) WHERE bc.fpc_version=? AND bap.map_id=?");
146 $total_contigged_bacs_sth->execute($fpc_version, $map_id);
147 my ($total_contigged_bacs) = $total_contigged_bacs_sth->fetchrow_array;
148 $total_contigged_bacs_sth->finish;
150 my $total_bac_singletons = $number_of_bacs - $total_contigged_bacs;
152 #warn "two";
154 # Stats for deconvolution.
155 my $plausible_bacs_sth = $dbh->prepare("SELECT COUNT(DISTINCT bac_id) FROM overgo_associations INNER JOIN oa_plausibility AS oap USING(overgo_assoc_id) WHERE overgo_version=? AND oap.plausible=1 AND oap.map_id=?");
156 $plausible_bacs_sth->execute($overgo_version, $map_id);
157 my ($plausible_bacs_count) = $plausible_bacs_sth->fetchrow_array;
158 $plausible_bacs_sth->finish;
160 #warn "three";
162 my $distinct_contigs_sth = $dbh->prepare("select count(distinct bac_contig_id) from plausible_bacs_in_all_contigs");
163 $distinct_contigs_sth->execute();
164 #warn "how are you doing today?";
166 my ($distinct_contigs_count) = $distinct_contigs_sth->fetchrow_array;
167 $distinct_contigs_sth->finish;
169 #warn "overgo_version $overgo_version, fpc_version $fpc_version, map_id $map_id";
170 my $bacs_in_distinct_contigs_sth = $dbh->prepare("select count(distinct bac_id) from plausible_bacs_in_all_contigs;");
171 $bacs_in_distinct_contigs_sth->execute();
172 my ($bacs_in_distinct_contigs_count) = $bacs_in_distinct_contigs_sth->fetchrow_array;
173 $bacs_in_distinct_contigs_sth->finish;
175 #warn "four";
177 my $plausible_contigs_sth = $dbh->prepare("select count(distinct bac_contig_id) from plausible_bacs_in_all_contigs where bac_plausible = 1;");
178 $plausible_contigs_sth->execute();
179 my ($plausible_contigs_count) = $plausible_contigs_sth->fetchrow_array;
180 $plausible_contigs_sth->finish;
182 #warn "five";
187 ######################################################################
189 # And this is where we display the page.
191 ######################################################################
193 #warn "nice day to print some html, isn't it?";
195 $page->header("Overgo plating results as of " . $last_date,"Overgo plating results as of " . $last_date);
197 my $number_of_plates = (scalar (keys %processed_plates));
199 # Overview of the Overgo Plating done to date.
200 my @bleh = ( qq|Total number of <a href="$explanation_page#bacs">BACs</a>| => $number_of_bacs,
201 qq|Total number of <a href='$explanation_page#probes'>probes</a>| => $total_wells,
202 qq|Number of <a href='$explanation_page#plates'>overgo plates</a> processed so far| => $number_of_plates,
203 qq|Number of BACs which <a href='$explanation_page#hittheplates'>hit the plates</a> one or more times| => $bac_partial_hits,
204 qq|Number of BACs which <a href='$explanation_page#ambiguity'>unambiguously</a> matched one or more plates with a row, column match| => $total_matching_bacs,
205 qq|Number of BACs which <a href='$explanation_page#ambiguity'>ambiguously</a> matched one or more plates| => $ambiguous_bacs,
206 qq|Average number of <a href='$explanation_page#ambiguity'>unambiguous</a> BACs matching each probe| => sprintf("%.2f", ($total_matching_bacs / $total_wells)),
209 sub gen_summary {
210 my $html;
211 while(my ($field,$value) = splice @_,0,2) {
212 $html .= "<b>$field:</b> $value<br />\n";
214 $html;
217 print blue_section_html('Summary',gen_summary(@bleh));
219 my @headings = ( qq|<a href="$explanation_page#plates">Overgo plate</a>|,
220 qq|<a href="$explanation_page#probes">Probes</a>|,
221 qq|<a href="$explanation_page#emptywells">Empty wells</a>|,
222 qq|<a href="$explanation_page#anchorpoint">Anchor points</a>|,
223 qq|<a href="$explanation_page#bacs">BACs</a> matching plate|,
227 print blue_section_html('Plates overview',
228 info_table_html('Processed plates' =>
229 columnar_table_html(headings => \@headings,
230 data => [@processed_plates{sort {$a <=> $b} keys %processed_plates},
231 ['<b>Total</b>',$total_wells,$total_empty,$total_matching_wells,$total_matching_bacs],
233 __border => 1,
235 'Unprocessed plates' => $unprocessed_plates_table || '<span class="ghosted">All plates processed.</span>',
236 __border => 0,
240 print blue_section_html(qq|Overview of <a href="$explanation_page#fpc_contigging">BAC contigging</a>|,<<EOH);
241 <b>NB. -</b> These data are taken from the <a href='$arizona_tom_fpc_page' target='ARIZONA'>Tomato Physical Mapping Project</a> pages of the Arizona Genomics Institute, who are using the FPC process to assemble BACs into contigs. <br />
242 <b>Number of BACs Contigged:</b> $total_contigged_bacs <br />
243 <b>Number of BAC Singletons:</b> $total_bac_singletons
246 @bleh = (qq|Number of unique <a href="$explanation_page#anchorpoint">anchor points</a> for BAC <-> genetic map associations|
247 => $total_matching_wells,
248 qq|Number of BACs which <a href='$explanation_page#plausible'>plausibly</a> matched one chromosome/position|
249 => $plausible_bacs_count,
250 qq|Contigs|
251 => "$distinct_contigs_count contigs made of $bacs_in_distinct_contigs_count plausible BACs",
252 qq|Mean number of anchor points per chromosome|
253 => sprintf("%.2f", ($total_matching_wells / $number_chromos)),
254 qq|Mean number of BACs per chromosome|
255 => sprintf("%.2f", ($total_matching_bacs / $number_chromos)),
256 qq|Mean number of BAC contigs per chromosome|
257 => sprintf("%.2f", ($plausible_contigs_count / $number_chromos)),
258 qq|Mean number of BACs per anchor point|
259 => sprintf("%.2f", ($total_matching_bacs / $total_matching_wells)),
262 print blue_section_html('Physical map statistics',<<EOH.gen_summary(@bleh));
263 $total_matching_bacs BACs matched well to one or more probe markers on the F2-2000 Genetic map.<br />
264 These BACs were then screened to find which ones only matched to markers within a small area of one chromosome - no more than <b>$genetic_threshold cM</b> across.<br />
266 # Commented out at Eileen's request.
267 #print "<b>Number of Contigs where all BACs fell on the same chromosome:</b> " . $plausible_contigs_count . "<br />\n";
269 print blue_section_html
270 ('Links',
271 "<ul>".join("\n",
272 map {"<li>$_</li>"} ( qq|<a href='$physical_abstract_page'>View the Abstract for the physical F2-2000 map.</a>|,
273 qq|<a href='$explanation_page'>Explanation of the Overgo Plating process</a>|,
274 qq|<a href='$physical_overview_page'>View an overview of all chromosomes of the physical map.</a>|,
275 qq|<a href='$arizona_tom_fpc_page' target='ARIZONA'>View the FPC pages for the physical mapping project.</a>|,
278 ."</ul>\n"
282 $page->footer;