Merge pull request #42 from solgenomics/topic/duplicate_image_warning
[cxgn-corelibs.git] / lib / CXGN / DB / Physical.pm
blob70f807f64b84476a7755ec597348a5a8f3db8aaa
1 package CXGN::DB::Physical;
3 ######################################################################
5 # Name : $Name: $
6 # Author : $Author: rob $
8 # This module exists to serve the needs of scripts for the second
9 # generation Physical Mapping database. It replaces the old
10 # modules PhysicalDB.pm and BACDB.pm, both of which can now be
11 # considered outmoded.
13 ######################################################################
15 =head1 physical_db_tools.pm
17 This module is designed to provide all the necessary re-usable functionality
18 for support of the physical database. Scripts relating to this database
19 - be they script for its update and maintenance, or cgi scripts to
20 allow users to navigate and peruse it, should make use of this module
21 to connect to and query the db. Additionally, any functionality
22 which you notice as being used in multiple scripts is probably best
23 bundled and added to this module. This will make it easier to keep
24 things synchronized.
26 Note that this module replaces two older modules which used to
27 service the physical db -- Physical.pm and BACDB.pm. Neither of
28 there modules should be used any more and calls to them should be weeded
29 out of functional code wherever they are found. (I don't think that
30 there are too many of them left but you should remove them if you do come
31 across them.)
33 I am off for a beer now.
35 =cut
37 use strict;
38 use DBI;
39 use CXGN::DB::Connection;
41 ######################################################################
43 # Global variables.
45 ######################################################################
46 # New stuff.
47 my $physical_db_name = 'Physical2';
48 my %hosts = ('henbane' => {
49 'user' => 'robert',
50 'password' => 'jasminesaville',
51 'db' => 'Physical2'
53 'amatxu' => {
54 'user' => 'koni',
55 'password' => 'bitchbadass',
56 'db' => 'physical'
58 'zamolxis' => {
59 'user' => 'web_usr',
60 'password' => 'tomato',
61 'db' => 'physical'
63 'siren' => {
64 'user' => 'web_usr',
65 'password' => 'tomato',
66 'db' => 'physical'
68 'toblerone' => {
69 'user' => 'web_usr',
70 'password' => 'tomato',
71 'db' => 'physical'
73 'sunshine' => {
74 'user' => 'web_usr',
75 'password' => 'tomato',
76 'db' => 'physical'
78 'sabazius' => {
79 'user' => 'web_usr',
80 'password' => 'tomato',
81 'db' => 'physical'
82 });
84 my %initial_version = ('updated_by' => 1,
85 'comments' => 'This is the original, unexpurgated version containing all data from the lab. No user modifications have been made to it.');
86 # Stuff originally from PhysicalDB.pm
87 my $users = {'ra97' => 'Robert Ahrens',
88 'eas68' => 'Beth Skwarecki',
89 'yx25' => 'Yimin Xu',
90 'yw84' => 'Eileen Wang'};
91 my $species = {'tomato' => 'Whatever Rod\'s BAC library is.'};
92 my $plate_summary = 'plate_summary_version_';
93 my $plate_report = 'deconvolution_report_plate_';
94 my $all_reports = 'all_deconvolution_report_version_';
95 # Stuff originally from BACDB.pm
96 my $cornell_prefix = 'P';
97 my $arizona_prefix = 'LE_HBa';
98 my $stop_col = 'I';
99 my $stop_filter = 'h';
100 my $dbh;
101 my $bac_sth = {};
102 my $konnex = {'db' => 'physical', 'user' => 'robert'};
103 my $filter;
104 my $filter_shift;
105 my $map_id = 9;
108 ######################################################################
110 # Static data accessor methods.
112 ######################################################################
116 sub get_current_map_id {
117 return $map_id;
120 sub get_physical_db_name () {
121 return $physical_db_name;
125 sub get_hosts_table () {
126 return \%hosts;
130 sub cornell_prefix () {
131 return $cornell_prefix;
135 sub arizona_prefix () {
136 return $arizona_prefix;
140 sub get_filter_matrices () {
141 # If the filter hashes are not already defined then define them now.
142 if (!$filter || !$filter_shift) {
143 ($filter, $filter_shift) = &initialize_filters();
145 # Now return the hash references.
146 return ($filter, $filter_shift);
150 sub get_users_hashref () {
151 return $users;
155 sub get_species_hashref () {
156 return $species;
160 sub get_initial_version_hashref () {
161 return \%initial_version;
165 sub get_last_row () {
166 # Yes, this is right. Don't ask me WHY it's called $stop_col, not
167 # $stop_row. Yes, I gaffed. If you feel like wading through ALL
168 # physical_db code (NOT just this module, but ALL SCRIPTS which
169 # might reference this module and this method...) and ensuring
170 # compliance with this then, by all means, be my guest.
171 return $stop_col;
175 ######################################################################
177 # Database query methods.
179 ######################################################################
182 sub connect_physical_db {
184 my $dbh = CXGN::DB::Connection->new('physical');
185 return $dbh;
190 sub disconnect_physical_db ($) {
192 return; # bah!
197 sub get_user_id ($$) {
199 # Queries the database to find the userid for a given user's net-id.
200 my ($dbh, $net_id) = @_;
201 my $user_sth = $dbh->prepare("SELECT user_id FROM users WHERE net_id=?");
202 $user_sth->execute($net_id);
203 my $userid = $user_sth->fetchrow_array;
204 $user_sth->finish;
205 return ($userid || 0);
210 sub get_current_overgo_version ($) {
212 # Returns the value of overgo_version listed as current=1 in the
213 # table overgo_version.
214 my ($dbh) = @_;
215 my $stm = "SELECT overgo_version FROM physical.overgo_version WHERE current=1";
216 my $sth = $dbh->prepare($stm);
217 $sth->execute;
218 my $version = $sth->fetchrow_array;
219 $sth->finish();
220 return ($version || 0);
225 sub get_current_overgo_version_and_updated_on ($) {
227 # As get_current_overgo_version, above, but also returns the udpated_on datetime.
228 my ($dbh) = @_;
229 my ($overgo_v, $updated) = $dbh->selectrow_array("SELECT overgo_version, updated_on FROM physical.overgo_version WHERE current=1");
230 return ($overgo_v, $updated);
235 sub get_latest_overgo_version ($;$) {
237 # Returns the latest (that is, highest numbered) version stored
238 # in the table physical.overgo_version.
239 # If the second argument supplied is an integer then it will be
240 # used as a user_id to pare down the choices. Otherwise, the
241 # highest version number in the table will be returned.
242 my ($dbh, $userid) = @_;
243 my $stm = $userid ? "SELECT overgo_version FROM physical.overgo_version WHERE updated_by=$userid ORDER BY overgo_version DESC" : "SELECT overgo_version FROM overgo_version ORDER BY overgo_version DESC";
244 my $sth = $dbh->prepare($stm);
245 $sth->execute;
246 my $version = $sth->fetchrow_array;
247 $sth->finish;
248 return ($version || 0);
253 sub get_current_fpc_version ($) {
255 # Returns the fpc_version.fpc_version marked current in the db.
256 my ($dbh) = @_;
257 my $sth = $dbh->prepare("SELECT fpc_version FROM physical.fpc_version WHERE current=1");
258 $sth->execute;
259 my $fpc_version = $sth->fetchrow_array;
260 $sth->finish;
261 return ($fpc_version || 0);
266 sub get_current_fpc_version_and_date ($) {
268 # Same as above, save that it returns a date as well.
269 my ($dbh) = @_;
270 my ($fpc_version, $date) = $dbh->selectrow_array("SELECT fpc_version, updated_on FROM physical.fpc_version WHERE current=1");
271 return ($fpc_version, $date);
276 sub get_plate_id ($$) {
278 my ($dbh, $plateno) = @_;
279 my $sth = $dbh->prepare("SELECT plate_id FROM physical.overgo_plates WHERE plate_number=?");
280 $sth->execute($plateno);
281 my $plate_id = $sth->fetchrow_array;
282 $sth->finish;
283 #print STDERR "Got plate_id $plate_id with plate number $plateno.\n";
284 return ($plate_id || 0);
289 sub get_plate_number_by_plate_id ($$) {
291 my ($dbh, $plate_id) = @_;
292 my $sth = $dbh->prepare("SELECT plate_number FROM physical.overgo_plates WHERE plate_id=?");
293 $sth->execute($plate_id);
294 my $plate_number = $sth->fetchrow_array;
295 $sth->finish;
296 return ($plate_number || 0);
301 sub get_total_number_of_bacs ($) {
303 my ($dbh) = @_;
304 my $sth = $dbh->prepare("SELECT COUNT(bac_id) FROM physical.bacs WHERE bad_clone!=1");
305 $sth->execute;
306 my $total_bacs = $sth->fetchrow_array;
307 $sth->finish;
308 return ($total_bacs || 0);
313 sub count_all_bacs_which_hit_all_plates {
315 my ($dbh, $overgo_version) = @_;
316 $overgo_version ||= &get_current_overgo_version($dbh);
317 my $sth = $dbh->prepare("SELECT COUNT(DISTINCT b.bac_id) FROM physical.bacs AS b INNER JOIN overgo_associations AS oa ON b.bac_id=oa.bac_id INNER JOIN probe_markers AS pm ON oa.overgo_probe_id=pm.overgo_probe_id WHERE oa.overgo_version=?");
318 $sth->execute($overgo_version);
319 my $bac_count = $sth->fetchrow_array;
320 $sth->finish;
321 return ($bac_count || 0);
326 sub count_all_bacs_which_hit_plate_n {
328 my ($dbh, $plateno, $overgo_version) = @_;
329 $overgo_version ||= &get_current_overgo_version($dbh);
330 my $sth = $dbh->prepare("SELECT COUNT(DISTINCT b.bac_id) FROM physical.bacs AS b INNER JOIN overgo_associations AS oa ON b.bac_id=oa.bac_id INNER JOIN probe_markers AS pm ON oa.overgo_probe_id=pm.overgo_probe_id INNER JOIN overgo_plates AS op ON pm.overgo_plate_id=op.plate_id WHERE oa.overgo_version=? AND op.plate_number=?");
331 $sth->execute($overgo_version, $plateno,);
332 my $bac_count = $sth->fetchrow_array;
333 $sth->finish;
334 return ($bac_count || 0);
339 sub count_all_bacs_which_plausibly_hit_all_plates {
341 my ($dbh, $overgo_version, $map_id) = @_;
342 $overgo_version ||= &get_current_overgo_version($dbh);
343 my $sth = $dbh->prepare("SELECT COUNT(DISTINCT b.bac_id) FROM physical.bacs AS b INNER JOIN overgo_associations AS oa ON b.bac_id=oa.bac_id INNER JOIN oa_plausibility AS oap USING(overgo_assoc_id) INNER JOIN probe_markers AS pm ON oa.overgo_probe_id=pm.overgo_probe_id WHERE oa.overgo_version=? AND oap.plausible=1 AND oap.map_id=$map_id");
344 $sth->execute($overgo_version);
345 my $bac_count = $sth->fetchrow_array;
346 $sth->finish;
347 return ($bac_count || 0);
352 sub count_all_bacs_which_plausibly_hit_plate_n {
354 my ($dbh, $plateno, $overgo_version, $map_id) = @_;
355 $overgo_version ||= &get_current_overgo_version($dbh);
356 my $sth = $dbh->prepare("SELECT COUNT(DISTINCT b.bac_id) FROM physical.bacs AS b INNER JOIN physical.overgo_associations AS oa ON b.bac_id=oa.bac_id INNER JOIN oa_plausibility AS oap USING(overgo_assoc_id) INNER JOIN physical.probe_markers AS pm ON oa.overgo_probe_id=pm.overgo_probe_id INNER JOIN physical.overgo_plates AS op ON pm.overgo_plate_id=op.plate_id WHERE oa.overgo_version=? AND oap.plausible=1 AND op.plate_number=? AND oap.map_id=$map_id");
357 $sth->execute($overgo_version, $plateno);
358 my $bac_count = $sth->fetchrow_array;
359 $sth->finish;
360 return ($bac_count || 0);
365 sub count_wells_with_plausible_hits_on_plate_n {
367 my ($dbh, $plateno, $overgo_version, $map_id) = @_;
368 $overgo_version ||= &get_current_overgo_version($dbh);
369 my $sth = $dbh->prepare("SELECT COUNT(DISTINCT oa.overgo_probe_id) FROM physical.overgo_plates AS op INNER JOIN probe_markers AS pm ON op.plate_id=pm.overgo_plate_id INNER JOIN overgo_associations AS oa ON pm.overgo_probe_id=oa.overgo_probe_id INNER JOIN physical.oa_plausibility oap USING(overgo_assoc_id) WHERE op.plate_number=? AND oa.overgo_version=? AND oap.plausible=1 AND oap.map_id=?");
370 $sth->execute($plateno, $overgo_version, $map_id);
371 my $wellswithhits = $sth->fetchrow_array;
372 $sth->finish;
373 return $wellswithhits;
378 sub count_distinct_anchor_points_on_map_chromosome {
380 #<<<<<<< .mine
381 # my ($dbh, $map_id, $chromonum, $overgo_version) = @_;
382 #=======
383 my ($dbh, $map_id, $chromonum, $overgo_version, $map_id2) = @_;
384 #>>>>>>> .r635
385 $overgo_version ||= &get_current_overgo_version($dbh);
386 my $sth = $dbh->prepare("SELECT COUNT(DISTINCT md.loc_id) FROM physical.overgo_associations AS oa INNER JOIN physical.oa_plausibility AS oap USING(overgo_assoc_id) INNER JOIN physical.probe_markers AS pm ON oa.overgo_probe_id=pm.overgo_probe_id INNER JOIN sgn.marker_locations AS ml ON pm.marker_id=ml.marker_id INNER JOIN sgn.mapdata AS md ON ml.loc_id=md.loc_id INNER JOIN sgn.linkage_groups USING(lg_id) WHERE oa.overgo_version=? AND md.map_id=? AND lg.lg_name=? AND oap.map_id=?");
387 $sth->execute($overgo_version, $map_id, $chromonum, $map_id2);
388 my $count = $sth->fetchrow_array;
389 $sth->finish;
390 return ($count || 0);
395 sub get_plate_as_hash ($$;$) {
396 # Return the names of the markers on the plate in a hash whose keys are the letter names
397 # of the plate rows and whose values are arrays corresponding to the row contents.
399 # N.B. - In the database we count columns on the plates from 1 to 12, whereas the
400 # resulting platehashes count columns CS style - from 0 to 11. This is a probable
401 # source of errors which you should try to be aware of.
403 my ($dbh, $plateno, $overgo_version) = @_;
404 # Check this plate exists.
405 my $plate_stats_sth = $dbh->prepare("SELECT plate_id, row_max, col_max FROM physical.overgo_plates WHERE plate_number=?");
406 $plate_stats_sth->execute($plateno);
407 my ($plate_id, $row_max, $col_max) = $plate_stats_sth->fetchrow_array;
408 $plate_stats_sth->finish;
409 $plate_id || return 0;
410 # Prepare a blank plate hash.
411 my %thisplate=();
412 $row_max ++;
413 for (my $row='A'; $row ne $row_max; $row ++) {
414 for (my $col=0; $col<$col_max; $col ++) {
415 $thisplate{$row}[$col] = '_';
418 # If given an overgo version, exclude probe_markers deprecated in that version.
419 my $platemarkers_stm = $overgo_version ? "SELECT alias, pm.overgo_plate_row, pm.overgo_plate_col FROM physical.probe_markers AS pm LEFT JOIN sgn.marker AS m ON pm.marker_id=m.marker_id LEFT JOIN sgn.marker_alias AS ma ON (m.marker_id = ma.marker_id) LEFT JOIN physical.deprecated_probes AS dp ON (pm.overgo_probe_id=dp.overgo_probe_id AND dp.overgo_version=$overgo_version) WHERE pm.overgo_plate_id=? AND dp.dp_id IS NULL ORDER BY pm.overgo_plate_row, pm.overgo_plate_col" : "SELECT m.marker_name, pm.overgo_plate_row, pm.overgo_plate_col FROM physical.probe_markers AS pm LEFT JOIN sgn.marker AS m ON pm.marker_id=m.marker_id LEFT JOIN sgn.marker_alias AS ma ON (m.marker_id = ma.marker_id) WHERE pm.overgo_plate_id=? ORDER BY pm.overgo_plate_row, pm.overgo_plate_col";
420 my $platemarkers_sth = $dbh->prepare($platemarkers_stm);
421 $platemarkers_sth->execute($plate_id);
422 # Now insert the retrieved markers into the hash.
423 while (my ($mrkr, $row, $col) = $platemarkers_sth->fetchrow_array) {
424 $col --; # This is a bit hacky. Should have a better policy than this.
425 $thisplate{$row}[$col] = $mrkr;
427 $platemarkers_sth->finish;
428 return \%thisplate;
433 ######################################################################
435 # Database modification methods.
437 ######################################################################
440 sub new_overgo_version ($$) {
442 # This subroutine requires the DBH and a registered user's net-id.
443 # On a successful operation it creates a new version entry in the
444 # overgo_version table and returns that version's overgo_version.
445 my ($dbh, $user) = @_;
446 my $userid = &get_user_id($user);
447 $userid || die "ERROR: User with net id $user is not authorized to use this database.\n";
448 my $current_datetime = &get_and_format_current_datetime();
449 my $version_sth = $dbh->prepare("INSERT INTO physical.overgo_version SET updated_by=?, updated_on=?");
450 $version_sth->execute($userid, $current_datetime);
451 my $version = $version_sth->{'mysql_insertid'};
452 $version_sth->finish;
453 return $version;
458 sub new_fpc_version ($$$$;$) {
460 # This subroutine requires the DBH and a registered user's net-id.
461 # On a successful operation it created a new fpc version entry in
462 # the fpc_version table and returns that version's fpc_version.
463 # Any comments on this FPC version may optionally be added as a fifth
464 # argument.
465 my ($dbh, $user, $date, $path, $comments) = @_;
466 my $userid = &get_user_id($user);
467 $userid || die "physical_db_tools ERROR: User $user is not authorized to use this database.\n";
468 my $version_sth = $dbh->prepare("INSERT INTO physical.fpc_version SET updated_on=?, updated_by=?, current=0, fpcfile=?, comments=?");
469 $version_sth->execute($date, $userid, $path, $comments);
470 my $version = $version_sth->{'mysql_insertid'};
471 $version_sth->finish;
472 return $version;
477 sub set_current_fpc_version ($$) {
479 # This subroutine takes in an INT which is an fpc_version value
480 # and sets it as the current one, clearing all non-current values
481 # en route.
482 # To set no version as "current" pass in a $new_cv value of 0.
483 my ($dbh, $new_cv) = @_;
484 $new_cv =~ /^\d+$/ || return 0;
485 $dbh->do("UPDATE physical.fpc_version SET current=0");
486 $dbh->do("UPDATE physical.fpc_version SET current=1 WHERE fpc_version=$new_cv");
487 return $new_cv;
492 sub remove_plate_by_plate_id ($$) {
494 my ($dbh, $pid) = @_;
495 # Get the ids for all the overgo probe_markers on this plate.
496 my @probemarkers;
497 my $probe_sth = $dbh->prepare("SELECT overgo_probe_id overgo_probe_id FROM physical.probe_markers WHERE overgo_plate_id=?");
498 $probe_sth->execute($pid);
499 while (my $pm = $probe_sth->fetchrow_array) {
500 push @probemarkers, $pm;
502 $probe_sth->finish;
503 # Remove BAC associations to these probes from overgo_associations
504 # and tentative overgo_associations.
505 my $clear_overgo_assocs_sth = $dbh->prepare("DELETE FROM physical.overgo_associations WHERE overgo_probe_id=?");
506 my $clear_tentative_overgo_assocs_sth = $dbh->prepare("DELETE FROM physical.tentative_overgo_associations WHERE overgo_probe_id=?");
507 foreach my $pm (@probemarkers) {
508 $clear_overgo_assocs_sth->execute($pm);
509 $clear_tentative_overgo_assocs_sth->execute($pm);
511 $clear_overgo_assocs_sth->finish;
512 $clear_tentative_overgo_assocs_sth->finish;
513 # Remove the probe markers which are on this plate.
514 $dbh->do("DELETE FROM probe_markers WHERE overgo_plate_id=$pid");
515 # Remove the plate iteslf from overgo_plates.
516 $dbh->do("DELETE FROM overgo_plates WHERE plate_id=$pid");
521 sub remove_overgo_plate_number ($$) {
523 # This subroutine removes an overgo plate from the database, entirely.
524 # It removes the plate itself from overgo_plates.
525 # It removes its probes from probe_markers.
526 # It removes associations to those probes from overgo_associations
527 # and tentative_overgo_associations.
528 # Use it wisely.
529 my ($dbh, $plateno) = @_;
530 my $plate_id = &get_plate_id($dbh, $plateno);
531 if ($plate_id) {
532 &remove_plate_by_plate_id($dbh, $plate_id);
533 } else {
534 print STDERR "physical_db_tools::remove_overgo_plate_number WARNING: overgo plate $plateno is not found in the physical database.\n";
540 sub clear_overgo_associations {
542 # This doesn't care about map_id. It clears the associations for ALL
543 # map_id's.
545 # Clears out the OVERGO_ASSOCIATIONS and TENTATIVE_OVERGO_ASSOCIATIONS
546 # tables of data for a given overgo version
547 my ($dbh, $overgo_version) = @_;
548 # Get tentative_association ids from tentative_overgo_associations.
549 my @toa=();
550 my $get_toa_ids_sth = $dbh->prepare("SELECT tentative_assoc_id FROM physical.tentative_overgo_associations WHERE overgo_version=?");
551 $get_toa_ids_sth->execute($overgo_version);
552 while (my $toa_id = $get_toa_ids_sth->fetchrow_array) {
553 push @toa, $toa_id;
555 $get_toa_ids_sth->finish;
556 # Now clear out groups from tentative_association_conflict_groups that match those toa_ids.
557 my $clear_tacg_sth = $dbh->prepare("DELETE FROM physical.tentative_association_conflict_groups WHERE tentative_assoc_id=?");
558 foreach (@toa) {
559 $clear_tacg_sth->execute($_);
561 $clear_tacg_sth->finish;
563 # delete plausibilities for associations in this overgo version
564 my $overgo_assocs = $dbh->selectcol_arrayref("SELECT overgo_assoc_id FROM physical.overgo_associations WHERE overgo_version=$overgo_version");
565 my $delete_oap = $dbh->prepare("DELETE FROM physical.oa_plausibility WHERE overgo_assoc_id=?");
566 for (@$overgo_assocs){
567 $delete_oap->execute($_);
570 # Now bulk DELETE from overgo_associations and tentative_overgo_associations.
571 $dbh->do("DELETE FROM physical.overgo_associations WHERE overgo_version=$overgo_version");
572 $dbh->do("DELETE FROM physical.tentative_overgo_associations WHERE overgo_version=$overgo_version");
577 ######################################################################
579 # Support methods.
581 ######################################################################
584 sub get_and_format_current_datetime () {
586 # Uses a system call to get the current date and time and format
587 # them appropriately for insertion into a mysql DATETIME field.
588 my $system_dt = `date +%Y%m%d%T`;
589 if ($system_dt =~ /(\d\d\d\d)(\d\d)(\d\d)(\d\d:\d\d:\d\d)/) {
590 return $1 . "-" . $2 . "-" . $3 . " " . $4;
591 } else {
592 die "physical_db_tools ERROR: Ill-formatted date retrieved from system: $system_dt\n";
598 sub auto_configure_db_settings () {
600 # Determines the appropriate settings with which to connect to
601 # the database, based on which machine we are working on.
602 my %hosts = %{ &get_hosts_table() };
603 my $hostname = `hostname`;
604 chomp $hostname;
605 if ($hostname =~ /^([^\.]+)\./) { $hostname = $1; }
606 if ($hosts{$hostname}) {
607 return $hosts{$hostname};
608 } else {
609 die "physical_db_tools ERROR: Auto-configuration information not known for host $hostname.\n";
615 sub infer_all_BACs_from_filters ($$;$) {
617 # This subroutine generates a load file for the physical.bacs table.
618 # It does this by iterating through all of the BAC filters used in
619 # generating this library.
620 # The first argument is the Database Handle ($dbh).
621 # The second argument must be the path to a file containing a "\n"
622 # delimited list of bad clones. If no list is available then this
623 # argument should be set to 'nobad'.
624 # The third argument is the (optional) path to the output file to
625 # be written. If this is absent then a file named bacs.load will
626 # be generated in the current working directory.
628 my ($dbh, $badclones, $outfile) = @_;
629 $outfile ||= 'bacs.load';
630 my @rows = ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P');
631 my ($filter, $filter_shift) = &get_filter_matrices();
633 # Read in the list of bad clones from the file $badclones.
634 my %badclones=();
635 if ($badclones ne 'nobad') {
636 open BAD, "<$badclones"
637 or die "ERROR: Can't read from $badclones.\n";
638 my @bc = <BAD>;
639 close BAD;
640 chomp @bc;
641 my $pfx = &cornell_prefix();
642 foreach (@bc) {
643 $badclones{($pfx . $_)} = 1;
647 # Get the species ID from the DB.
648 my $species_sth = $dbh->prepare("SELECT species_id FROM physical.species WHERE short_name='tomato'");
649 $species_sth->execute();
650 my $species_id = $species_sth->fetchrow_array;
651 $species_sth->finish;
652 $species_id || die "physical_db_tools ERROR: No species_id found in database.\n";
654 # Open the file to write this data to.
655 open BACS, ">$outfile"
656 or die "ERROR: Can't write to file $outfile.\n";
658 # Work through the filters and extrapolate the name of every possible BAC.
659 my $bac_id=0;
660 foreach my $fltr_code (sort keys %$filter_shift) {
661 my $shift = $$filter_shift{$fltr_code};
662 foreach my $spot (keys %$filter) {
663 my $spot_code = ($$filter{$spot} + $shift);
664 foreach my $row (@rows) {
665 for (my $col=1; $col<=24; $col++) {
666 $bac_id ++;
667 my $cu_name = $cornell_prefix . sprintf("%03d", $spot_code) . $row . sprintf("%02d", $col);
668 my $az_name = $arizona_prefix . sprintf("%04d", $spot_code) . $row . sprintf("%02d", $col);
669 my $sp6_end_seq_id=0;
670 my $t7_end_seq_id=0;
671 my $genbank_accession="";
672 my $estimated_length=0;
673 print BACS "$bac_id\t$cu_name\t$az_name\t$species_id\t$sp6_end_seq_id\t$t7_end_seq_id\t$genbank_accession\t" . ($badclones{$cu_name} || "0") . "\t$estimated_length\n";
679 close BACS;
684 sub initialize_filters () {
686 # This subroutine populates a pair of hashes which contain information
687 # about the BAC filters used in the overgo mapping experiments.
689 # Prepare the data for filter A:
690 my %filter = ();
691 my $val = 0;
692 for (my $row=1; $row<7; $row ++) {
693 my $col='A';
694 while ($col ne $stop_col) {
695 $filter{($row . $col)} = ++ $val;
696 $col ++;
700 # Handle other filters.
701 my %filter_shift = ();
702 my $shift = 0;
703 my $f='a';
704 while ($f ne $stop_filter) {
705 $filter_shift{$f} = $shift;
706 $shift += 48;
707 $f ++;
710 return (\%filter, \%filter_shift);
715 sub BAC_CUID_from_filter ($$$) {
717 my ($f_code, $spot, $position) = @_;
718 my ($filter, $filter_shift) = &get_filter_matrices();
719 $f_code = lc $f_code;
720 if (not defined $$filter_shift{$f_code}) {
721 print STDERR "WARNING: Filter $f_code not defined.\n";
722 return "";
724 if (not $$filter{$spot}) {
725 print STDERR "WARNING: Spot position $spot unknown.\n";
726 return "";
728 my $cuname;
729 if ($position =~ /(\w)(\d+)/) {
730 my ($p_prefix, $p_suffix) = ($1, $2);
731 $cuname = $cornell_prefix .
732 sprintf("%03d", ($$filter{$spot} + $$filter_shift{$f_code})) .
733 $p_prefix . sprintf("%02d", $p_suffix);
734 } else {
735 print STDERR "WARNING: Badly formed position spec $position.\n";
736 $cuname = "";
738 return $cuname;
743 sub BAC_AZID_from_filter ($$$) {
745 my ($f_code, $spot, $position) = @_;
746 my ($filter, $filter_shift) = &get_filter_matrices();
747 $f_code = lc $f_code;
748 defined ($$filter_shift{$f_code})
749 or die "ERROR: Filter $f_code not defined.\n";
750 $$filter{$spot} or die "ERROR: Spot position $spot unknown.\n";
751 my $azname;
752 if ($position =~ /(\w)(\d+)/) {
753 my ($p_prefix, $p_suffix) = ($1, $2);
754 $azname = $arizona_prefix .
755 sprintf("%04d", ($$filter{$spot} + $$filter_shift{$f_code})) .
756 $p_prefix . sprintf("%02d", $p_suffix);
757 } else {
758 print STDERR "WARNING: Badly formed position spec $position. Skipping this BAC.\n";
759 $azname = "";
761 return $azname;
766 sub print_plate_from_hash ($;*) {
768 # Takes in a reference to a hash and prints the plate found therein.
769 # If given a STREAM as a second argument prints the hash to that stream.
770 # Otherwise, prints to STDOUT.
772 my ($plate, $stream) = @_;
773 (ref $plate) or die "physical_db_tools::print_plate_from_hash ERROR: Must reference a plate-hash in order to print it.\n";
774 $stream ||= *STDOUT;
776 for (my $row='A'; $row ne $stop_col; $row++) {
777 print $stream "" . join("\t", @{$$plate{$row}}) . "\n";
783 return 1;