3 ######################################################################
5 # This program displays statistics on the progress of the overgo
6 # plating project as reflected by the physical database.
8 # As of version 1.2 this program consults both the database and the
9 # summary report files written to the directory
11 # ~/sgn/pwd/support_data/physicalmapping/report
13 # in order to determine the necessary statistics.
14 # However, in some cases it still cheats by just statically setting
17 ######################################################################
21 use CXGN
::DB
::Physical
;
22 use CXGN
::DB
::Connection
;
24 use CXGN
::Page
::FormattingHelpers qw
/blue_section_html columnar_table_html info_table_html/;
26 my $page = CXGN
::Page
->new('Overgo Stats', 'Robert and friends');
27 my $map_id = 9; # always, for now.
31 our $plausible_bacs = 1;
32 our $genetic_threshold = "5.0";
33 our $number_chromos = 12;
34 our $preset_date = "20030909";
35 our $sgnblue = '#ccccff';
36 our $arizona_tom_fpc_page = 'http://www.genome.arizona.edu/fpc/tomato/';
37 our $physical_abstract_page = '/cview/map.pl?map_id=1&physical=1';
38 our $physical_overview_page = '/cview/map.pl?map_id=1&physical=1';
39 our $plate_design_page = '/maps/physical/list_overgo_plate_probes.pl?plate_no=';
40 our $list_bacs_by_plate = '/maps/physical/list_bacs_by_plate.pl?by_plate=';
41 our $explanation_page = 'overgo_process_explained.pl';
42 our %months = ('01' => 'January', '02' => 'February', '03' => 'March',
43 '04' => 'April', '05' => 'May', '06' => 'June',
44 '07' => 'July', '08' => 'August', '09' => 'September',
45 '10' => 'October', '11' => 'November', '12' => 'December');
47 # Connect to the database.
48 my $dbh = CXGN
::DB
::Connection
->new('physical');
50 ######################################################################
52 # This is where we should pull all of the above fresh from the DB.
54 ######################################################################
56 # Figure out the last date on which these data were updated.
57 my ($overgo_version, $overgo_date) = CXGN
::DB
::Physical
::get_current_overgo_version_and_updated_on
($dbh);
58 if ($overgo_date =~ /^(\d\d\d\d-\d\d-\d\d)/) {
61 $page->message_page( "error","<I>ERROR: Incorrectly formatted overgo plating date.</I>\n" );
64 my ($fpc_version, $fpc_date) = CXGN
::DB
::Physical
::get_current_fpc_version_and_date
($dbh);
66 #warn "got dates '$overgo_date' and '$fpc_date'\n";
67 my @dates = sort ($overgo_date, $fpc_date);
68 my $most_recent_date = pop @dates;
69 $most_recent_date ||= $preset_date;
72 if ($most_recent_date =~ /^(\d\d\d\d)-?(\d\d)-?(\d\d)/) {
73 $last_date = $months{$2} . " " . $3 . ", " . $1;
75 $page->message_page("error", "<I>ERROR: Incorrectly formatted string $most_recent_date.</I>\n" );
80 # Get the total number of BACs considered.
81 my $number_of_bacs = CXGN
::DB
::Physical
::get_total_number_of_bacs
($dbh);
83 # BACs which hit the plate at all (ie. have a match to at least one pool.
84 my $bac_partial_hits_sth = $dbh->prepare("SELECT COUNT(DISTINCT bac_id) FROM overgo_results");
85 $bac_partial_hits_sth->execute;
86 my ($bac_partial_hits) = $bac_partial_hits_sth->fetchrow_array;
87 $bac_partial_hits_sth->finish;
89 # Get all plates numbers.
90 my $plate_numbers_sth = $dbh->prepare("SELECT DISTINCT plate_number FROM overgo_plates");
91 $plate_numbers_sth->execute;
92 my %unprocessed_plates;
93 while (my ($pn) = $plate_numbers_sth->fetchrow_array) {
94 $unprocessed_plates{$pn} = 1;
96 $plate_numbers_sth->finish;
98 # List all plates processed to date.
99 my $proc_plate_sth = $dbh->prepare("SELECT DISTINCT op.plate_number, op.plate_size, op.empty_wells FROM overgo_plates AS op INNER JOIN overgo_results AS ores ON op.plate_id=ores.overgo_plate_id ORDER BY op.plate_number");
100 my %processed_plates;
101 $proc_plate_sth->execute;
102 my ($total_wells, $total_empty, $total_matching_wells, $total_matching_bacs);
103 while (my ($pn, $wells, $empty) = $proc_plate_sth->fetchrow_array) {
104 delete $unprocessed_plates{$pn};
105 my $wells_with_hits = CXGN
::DB
::Physical
::count_wells_with_plausible_hits_on_plate_n
($dbh, $pn, $overgo_version, $map_id);
106 my $bac_hits_on_plate = CXGN
::DB
::Physical
::count_all_bacs_which_hit_plate_n
($dbh, $pn, $overgo_version, $map_id);
107 $processed_plates{$pn} = [ qq|<a href
="$plate_design_page$pn">Plate
$pn</a
>|,
111 qq|<a href
="$list_bacs_by_plate$pn">$bac_hits_on_plate</a
>|,
113 $total_wells += $wells;
114 $total_empty += $empty;
115 $total_matching_wells += $wells_with_hits;
116 $total_matching_bacs += $bac_hits_on_plate;
118 $proc_plate_sth->finish;
122 # Prepare a listing of unprocessed plates.
123 my $unprocessed_plates_table =
125 my $unproc_width = 5;
127 my @unproc_list = (sort {$a <=> $b} keys %unprocessed_plates);
129 while(my @row = splice @unproc_list,0,$unproc_width) {
132 columnar_table_html
(data
=>\
@table);
133 } if %unprocessed_plates;
135 # Count the total number of ambiguous bacs.
137 my $ambig_sth = $dbh->prepare("SELECT COUNT(DISTINCT bac_id) FROM tentative_overgo_associations WHERE overgo_version=?");
138 $ambig_sth->execute($overgo_version);
139 my ($ambiguous_bacs) = $ambig_sth->fetchrow_array;
144 # Now work out FPC stats.
145 my $total_contigged_bacs_sth = $dbh->prepare("SELECT COUNT(DISTINCT ba.bac_id) FROM bac_contigs AS bc INNER JOIN bac_associations AS ba ON bc.bac_contig_id=ba.bac_contig_id INNER JOIN ba_plausibility AS bap using(bac_assoc_id) WHERE bc.fpc_version=? AND bap.map_id=?");
146 $total_contigged_bacs_sth->execute($fpc_version, $map_id);
147 my ($total_contigged_bacs) = $total_contigged_bacs_sth->fetchrow_array;
148 $total_contigged_bacs_sth->finish;
150 my $total_bac_singletons = $number_of_bacs - $total_contigged_bacs;
154 # Stats for deconvolution.
155 my $plausible_bacs_sth = $dbh->prepare("SELECT COUNT(DISTINCT bac_id) FROM overgo_associations INNER JOIN oa_plausibility AS oap USING(overgo_assoc_id) WHERE overgo_version=? AND oap.plausible=1 AND oap.map_id=?");
156 $plausible_bacs_sth->execute($overgo_version, $map_id);
157 my ($plausible_bacs_count) = $plausible_bacs_sth->fetchrow_array;
158 $plausible_bacs_sth->finish;
162 my $distinct_contigs_sth = $dbh->prepare("select count(distinct bac_contig_id) from plausible_bacs_in_all_contigs");
163 $distinct_contigs_sth->execute();
164 #warn "how are you doing today?";
166 my ($distinct_contigs_count) = $distinct_contigs_sth->fetchrow_array;
167 $distinct_contigs_sth->finish;
169 #warn "overgo_version $overgo_version, fpc_version $fpc_version, map_id $map_id";
170 my $bacs_in_distinct_contigs_sth = $dbh->prepare("select count(distinct bac_id) from plausible_bacs_in_all_contigs;");
171 $bacs_in_distinct_contigs_sth->execute();
172 my ($bacs_in_distinct_contigs_count) = $bacs_in_distinct_contigs_sth->fetchrow_array;
173 $bacs_in_distinct_contigs_sth->finish;
177 my $plausible_contigs_sth = $dbh->prepare("select count(distinct bac_contig_id) from plausible_bacs_in_all_contigs where bac_plausible = 1;");
178 $plausible_contigs_sth->execute();
179 my ($plausible_contigs_count) = $plausible_contigs_sth->fetchrow_array;
180 $plausible_contigs_sth->finish;
187 ######################################################################
189 # And this is where we display the page.
191 ######################################################################
193 #warn "nice day to print some html, isn't it?";
195 $page->header("Overgo plating results as of " . $last_date,"Overgo plating results as of " . $last_date);
197 my $number_of_plates = (scalar (keys %processed_plates));
199 # Overview of the Overgo Plating done to date.
200 my @bleh = ( qq|Total number of
<a href
="$explanation_page#bacs">BACs
</a
>| => $number_of_bacs,
201 qq|Total number of
<a href
='$explanation_page#probes'>probes
</a
>| => $total_wells,
202 qq|Number of
<a href
='$explanation_page#plates'>overgo plates
</a
> processed so far
| => $number_of_plates,
203 qq|Number of BACs which
<a href
='$explanation_page#hittheplates'>hit the plates
</a
> one
or more
times| => $bac_partial_hits,
204 qq|Number of BACs which
<a href
='$explanation_page#ambiguity'>unambiguously
</a
> matched one
or more plates with a row
, column match
| => $total_matching_bacs,
205 qq|Number of BACs which
<a href
='$explanation_page#ambiguity'>ambiguously
</a
> matched one
or more plates
| => $ambiguous_bacs,
206 qq|Average number of
<a href
='$explanation_page#ambiguity'>unambiguous
</a> BACs matching each probe| => sprintf("%.2f", ($total_matching_bacs / $total_wells)),
211 while(my ($field,$value) = splice @_,0,2) {
212 $html .= "<b>$field:</b> $value<br />\n";
217 print blue_section_html
('Summary',gen_summary
(@bleh));
219 my @headings = ( qq|<a href
="$explanation_page#plates">Overgo plate
</a
>|,
220 qq|<a href
="$explanation_page#probes">Probes
</a
>|,
221 qq|<a href
="$explanation_page#emptywells">Empty wells
</a
>|,
222 qq|<a href
="$explanation_page#anchorpoint">Anchor points
</a
>|,
223 qq|<a href
="$explanation_page#bacs">BACs
</a
> matching plate
|,
227 print blue_section_html
('Plates overview',
228 info_table_html
('Processed plates' =>
229 columnar_table_html
(headings
=> \
@headings,
230 data
=> [@processed_plates{sort {$a <=> $b} keys %processed_plates},
231 ['<b>Total</b>',$total_wells,$total_empty,$total_matching_wells,$total_matching_bacs],
235 'Unprocessed plates' => $unprocessed_plates_table || '<span class="ghosted">All plates processed.</span>',
240 print blue_section_html
(qq|Overview of
<a href
="$explanation_page#fpc_contigging">BAC contigging
</a
>|,<<EOH);
241 <b>NB. -</b> These data are taken from the <a href='$arizona_tom_fpc_page' target='ARIZONA'>Tomato Physical Mapping Project</a> pages of the Arizona Genomics Institute, who are using the FPC process to assemble BACs into contigs. <br />
242 <b>Number of BACs Contigged:</b> $total_contigged_bacs <br />
243 <b>Number of BAC Singletons:</b> $total_bac_singletons
246 @bleh = (qq|Number of unique
<a href
="$explanation_page#anchorpoint">anchor points
</a
> for BAC
<-> genetic
map associations
|
247 => $total_matching_wells,
248 qq|Number of BACs which
<a href
='$explanation_page#plausible'>plausibly
</a> matched one chromosome/position
|
249 => $plausible_bacs_count,
251 => "$distinct_contigs_count contigs made of $bacs_in_distinct_contigs_count plausible BACs",
252 qq|Mean number of anchor points per chromosome
|
253 => sprintf("%.2f", ($total_matching_wells / $number_chromos)),
254 qq|Mean number of BACs per chromosome
|
255 => sprintf("%.2f", ($total_matching_bacs / $number_chromos)),
256 qq|Mean number of BAC contigs per chromosome
|
257 => sprintf("%.2f", ($plausible_contigs_count / $number_chromos)),
258 qq|Mean number of BACs per anchor point
|
259 => sprintf("%.2f", ($total_matching_bacs / $total_matching_wells)),
262 print blue_section_html
('Physical map statistics',<<EOH.gen_summary(@bleh));
263 $total_matching_bacs BACs matched well to one or more probe markers on the F2-2000 Genetic map.<br />
264 These BACs were then screened to find which ones only matched to markers within a small area of one chromosome - no more than <b>$genetic_threshold cM</b> across.<br />
266 # Commented out at Eileen's request.
267 #print "<b>Number of Contigs where all BACs fell on the same chromosome:</b> " . $plausible_contigs_count . "<br />\n";
269 print blue_section_html
272 map {"<li>$_</li>"} ( qq|<a href
='$physical_abstract_page'>View the Abstract
for the physical F2
-2000 map.</a
>|,
273 qq|<a href
='$explanation_page'>Explanation of the Overgo Plating process
</a
>|,
274 qq|<a href
='$physical_overview_page'>View an overview of all chromosomes of the physical
map.</a
>|,
275 qq|<a href
='$arizona_tom_fpc_page' target
='ARIZONA'>View the FPC pages
for the physical mapping project
.</a
>|,