1 package CXGN
::Garbage
::BACDB
;
4 use CXGN
::Garbage
::local_db_link
;
5 use CXGN
::Garbage
::PhysicalDB
;
7 my $cornell_prefix = 'P';
8 my $arizona_prefix = 'LE_HBa';
10 my $stop_filter = 'h';
13 my $konnex = {'db' => 'physical', 'user' => 'robert'};
15 my ($filter, $filter_shift) = &initialize_filters
();
18 sub connect_BAC_db
() {
20 $dbh = local_db_link
::connect_db
($konnex);
21 $$bac_sth{'user_id_by_net_id'} = $dbh->prepare("SELECT user_id FROM users WHERE net_id=?");
22 $$bac_sth{'cu_name_from_id'} = $dbh->prepare("SELECT cornell_clone_name FROM bacs WHERE bac_id=?");
23 $$bac_sth{'id_from_cu_name'} = $dbh->prepare("SELECT bac_id FROM bacs WHERE cornell_clone_name=?");
24 $$bac_sth{'id_from_az_name'} = $dbh->prepare("SELECT bac_id FROM bacs WHERE arizona_clone_name=?");
25 $$bac_sth{'insert_bac'} = $dbh->prepare("INSERT INTO bacs SET cornell_clone_name=?, arizona_clone_name=?");
26 $$bac_sth{'set_bad_clone'} = $dbh->prepare("UPDATE bacs SET bad_clone=1 WHERE bac_id=?");
27 $$bac_sth{'repeal_bad_clone'} = $dbh->prepare("UPDATE bacs SET bad_clone=0 WHERE bac_id=?");
28 $$bac_sth{'bac_ctg_id_from_name'} = $dbh->prepare("SELECT contig_id FROM bac_contigs WHERE contig_name=? AND fpc_version=?");
29 $$bac_sth{'insert_bac_ctg'} = $dbh->prepare("INSERT INTO bac_contigs SET contig_name=?, fpc_version=?");
30 $$bac_sth{'insert_bac_assoc'} = $dbh->prepare("INSERT INTO bac_associations SET bac_id=?, contig_id=?");
31 $$bac_sth{'current_fpc_version'} = $dbh->prepare("SELECT fpc_version FROM fpc_version ORDER BY fpc_version DESC");
39 foreach (keys %$bac_sth) {
40 $$bac_sth{$_}->finish;
42 local_db_link
::disconnect_db
($dbh);
47 sub initialize_filters
() {
49 # Prepare the data for filter A:
52 for (my $row=1; $row<7; $row ++) {
54 while ($col ne $stop_col) {
55 $filter{($row . $col)} = ++ $val;
60 # Handle other filters.
61 my %filter_shift = ();
64 while ($f ne $stop_filter) {
65 $filter_shift{$f} = $shift;
70 return (\
%filter, \
%filter_shift);
79 print "BACDB::insert_BAC WARNING: Attempting to insert a BAC, $name, which is muy suspicious, given that we're now inserting them en masse.\n";
82 my ($cu_name, $az_name);
83 if ($name =~ /^$cornell_prefix(\d\d\d)(\w\d\d)$/) {
85 $az_name = $arizona_prefix . sprintf("%4d", $1) . $2;
86 } elsif ($name =~ /^$arizona_prefix(\d\d\d\d)(\w\d\d)$/) {
87 $cu_name = $cornell_prefix . sprintf("%3d", $1) . $2;
90 print STDERR
"WARNING: BAC name $name not of CU or AZ type. Ignored.\n";
93 $$bac_sth{'insert_bac'}->execute($cu_name, $az_name);
94 return $$bac_sth{'insert_bac'}->{'mysql_insertid'};
103 if ($name =~ /^$cornell_prefix\S+/) {
104 $$bac_sth{'id_from_cu_name'}->execute($name);
105 $bac_id = $$bac_sth{'id_from_cu_name'}->fetchrow_array;
106 } elsif ($name =~ /^$arizona_prefix\S+/) {
107 $$bac_sth{'id_from_az_name'}->execute($name);
108 $bac_id = $$bac_sth{'id_from_az_name'}->fetchrow_array;
110 print STDERR
"WARNING: Trying to get ill-named BAC: $name.\n";
112 $bac_id ||= &insert_BAC
($name);
118 sub BAC_CUID_from_filter
($$$) {
120 my ($f_code, $spot, $position) = @_;
121 $f_code = lc $f_code;
122 if (not defined $$filter_shift{$f_code}) {
123 print STDERR
"WARNING: Filter $f_code not defined.\n";
126 if (not $$filter{$spot}) {
127 print STDERR
"WARNING: Spot position $spot unknown.\n";
131 if ($position =~ /(\w)(\d+)/) {
132 my ($p_prefix, $p_suffix) = ($1, $2);
133 $cuname = $cornell_prefix .
134 sprintf("%03d", ($$filter{$spot} + $$filter_shift{$f_code})) .
135 $p_prefix . sprintf("%02d", $p_suffix);
137 print STDERR
"WARNING: Badly formed position spec $position.\n";
145 sub BAC_AZID_from_filter
($$$) {
147 my ($f_code, $spot, $position) = @_;
148 $f_code = lc $f_code;
149 defined ($$filter_shift{$f_code})
150 or die "ERROR: Filter $f_code not defined.\n";
151 $$filter{$spot} or die "ERROR: Spot position $spot unknown.\n";
153 if ($position =~ /(\w)(\d+)/) {
154 my ($p_prefix, $p_suffix) = ($1, $2);
155 $azname = $arizona_prefix .
156 sprintf("%04d", ($$filter{$spot} + $$filter_shift{$f_code})) .
157 $p_prefix . sprintf("%02d", $p_suffix);
159 print STDERR
"WARNING: Badly formed position spec $position. Skipping this BAC.\n";
167 sub dump_filter
($) {
170 foreach (keys %$filter) {
171 print $_ . ":" . ($$filter{$_} + $$filter_shift{$fcode}) . "\n";
177 sub BAC_CUID_from_ID
($) {
180 $$bac_sth{'cu_name_from_id'}->execute($id);
181 my $cuname = $$bac_sth{'cu_name_from_id'}->fetchrow_array;
187 sub BAC_AZID_from_ID
($) {
190 $$bac_sth{'az_name_from_id'}->execute($id);
191 my $azname = $$bac_sth{'az_name_from_id'}->fetchrow_array;
197 sub set_bad_clone
($) {
201 if ($ident =~ /^\d+$/) {
204 $id = &get_BAC_id
($ident);
207 $$bac_sth{'set_bad_clone'}->execute($id);
209 print STDERR
"set_bad_clone WARNING: No ID found for BAC with identifier $ident.\n";
215 sub get_bac_ctg_id
($$) {
217 my ($ctg_name, $fpc_version) = @_;
218 $$bac_sth{'bac_ctg_id_from_name'}->execute($ctg_name, $fpc_version);
219 my $ctg_id = $$bac_sth{'bac_ctg_id_from_name'}->fetchrow_array;
223 $$bac_sth{'insert_bac_ctg'}->execute($ctg_name, $fpc_version);
224 return $$bac_sth{'insert_bac_ctg'}->{'mysql_insertid'};
230 sub insert_bac_association
($$) {
232 my ($bac_id, $ctg_id) = @_;
233 $$bac_sth{'insert_bac_assoc'}->execute($bac_id, $ctg_id);
238 sub get_fpc_version
(;$$) {
240 my ($user, $date, $path) = @_;
243 if ($user && $date) {
244 my $this_fpc_sth = $dbh->prepare("SELECT fpc_version FROM fpc_version WHERE updated_on=? AND updated_by=?");
245 $this_fpc_sth->execute($date, $user);
246 $fpcversion = $this_fpc_sth->fetchrow_array || "0";
247 $this_fpc_sth->finish;
249 $$bac_sth{'current_fpc_version'}->execute();
250 $fpcversion = $$bac_sth{'current_fpc_version'}->fetchrow_array || "0";
257 sub new_fpc_version
($$$) {
259 my ($date, $user, $fpcfile) = @_;
261 $$bac_sth{'user_id_by_net_id'}->execute($user);
262 my $userid = $$bac_sth{'user_id_by_net_id'}->fetchrow_array;
263 $userid || die "ERROR: User with net id $user not found in table USERS.\n";
264 my $query_sth = $dbh->prepare("SELECT fpc_version FROM fpc_version WHERE updated_on=? AND updated_by=? AND fpcfile=?");
265 $query_sth->execute($date, $userid, $fpcfile);
266 if (my $prev_id = $query_sth->fetchrow_array) {
267 print STDERR
"WARNING: FPC version $prev_id already describes this file.\n";
269 #print STDERR "WARNING: FPC version $prev_id already describes this file.\n";
273 my $new_fpcv_sth = $dbh->prepare("INSERT INTO fpc_version SET updated_on=$date, updated_by=?, fpcfile=?");
274 $new_fpcv_sth->execute($userid, $fpcfile);
275 my $new_version_no = $new_fpcv_sth->{'mysql_insertid'};
276 $new_fpcv_sth->finish;
277 return $new_version_no;
282 sub infer_all_BACs_from_filters
($;$) {
284 my ($badclones, $outfile) = @_;
285 $outfile ||= 'bacs.load';
286 my @rows = ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P');
289 if ($badclones ne 'nobad') {
290 open BAD
, "<$badclones"
291 or die "ERROR: Can't read from $badclones.\n";
295 my $pfx = &cornell_prefix
();
297 $badclones{($pfx . $_)} = 1;
301 # Get the species ID from the DB.
302 my $species_sth = $dbh->prepare("SELECT species_id FROM species WHERE short_name='tomato'");
303 $species_sth->execute();
304 my $species_id = $species_sth->fetchrow_array;
305 $species_sth->finish;
307 # Open the file to write this data to.
308 open BACS
, ">$outfile"
309 or die "ERROR: Can't write to file $outfile.\n";
311 # Work through the filters and extrapolate the name of every possible BAC.
313 foreach my $fltr_code (sort keys %$filter_shift) {
314 my $shift = $$filter_shift{$fltr_code};
315 foreach my $spot (keys %$filter) {
316 my $spot_code = ($$filter{$spot} + $shift);
317 foreach my $row (@rows) {
318 for (my $col=1; $col<=24; $col++) {
320 my $cu_name = $cornell_prefix . sprintf("%03d", $spot_code) . $row . sprintf("%02d", $col);
321 my $az_name = $arizona_prefix . sprintf("%04d", $spot_code) . $row . sprintf("%02d", $col);
322 my $sp6_end_seq_id=0;
324 my $genbank_accession="";
325 my $estimated_length=0;
326 print BACS
"$bac_id\t$cu_name\t$az_name\t$species_id\t$sp6_end_seq_id\t$t7_end_seq_id\t$genbank_accession\t" . ($badclones{$cu_name} || "0") . "\t$estimated_length\n";
337 sub cornell_prefix
() {
338 return $cornell_prefix;
342 sub arizona_prefix
() {
343 return $arizona_prefix;