3 # basic script to load pcr marker information
6 # usage: load_marker_data.pl -H hostname D dbname -i infile -m map_id
8 # copy and edit this file as necessary
9 # common changes include the following:
12 # different column headings
18 load_solcap_markers.pl - a script to load markers into the SGN database.
22 usage: load_solcap_markers.pl
30 The hostname of the server hosting the database.
34 the name of the database
38 (optional) test mode. Rollback after the script terminates. Database should not be affected. Good for test runs.
42 protocol (e.g. SSR, SNP, Indel, dCAPs, GG, etc)
46 infile with the marker info
55 The tab-delimited map file has the following columns:
58 Temp (annealing temperature)
62 (optional columns, depending on the protocol)
63 pd (an additional primer seq, if protocol is dCAPS)
64 Indel (additional primer seq, if protocol is Indel)
65 ASPE1 (additional primer seq, if protocol is SNP)
66 ASPE2 (additional primer seq, if protocol is SNP)
67 seq3 (3' flanking sequence, if protocol is SNP or Indel)
68 seq5 (5' flanking sequence, if protocol is SNP or Indel)
70 <accession name a> provide band sizes for accession name a
72 <accession name b> provide band sizes for accession name b
77 Naama Menda <nm249@cornell.edu>
85 use CXGN
::Tools
::File
::Spreadsheet
;
86 use CXGN
::Tools
::Text
;
87 use CXGN
::Marker
::Modifiable
;
88 use CXGN
::Marker
::Tools
;
89 use CXGN
::Marker
::Location
;
90 use CXGN
::Marker
::PCR
::Experiment
;
91 use CXGN
::Map
::Version
;
92 use CXGN
::DB
::Connection
;
93 use CXGN
::DB
::InsertDBH
;
95 use CXGN
::DB
::SQLWrappers
;
96 use CXGN
::Cview
::Map
::Tools
;
101 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_p, $opt_m, $opt_a, $opt_b);
103 getopts
('H:D:i:tp:m:a:b:');
106 my $map_id = $opt_m || die "Must pass a -m option with a valid sgn map_id!\n";
108 my $protocol = $opt_p || die "ERROR: No -p option passed for protocol name (SolCap markers are loaded with one file per protocol: SNP, Indel, SSR, CAPS) \n";
110 my $dbh = CXGN
::DB
::InsertDBH
->new({
113 dbargs
=> {AutoCommit
=> 0,
117 my $sql=CXGN
::DB
::SQLWrappers
->new($dbh);
119 my $map_version_id = CXGN
::Cview
::Map
::Tools
::find_current_version
($dbh, $map_id);
123 # parameters for this specific instance
124 my $experiment_type_id = 1; # 'mapping'
126 my $stock_id ;#= '88x87'; # parent1 x parent2
128 # make an object to give us the values from the spreadsheet
129 my $ss = CXGN
::Tools
::File
::Spreadsheet
->new($opt_i);
130 my @markers = $ss->row_labels(); # row labels are the marker names
131 my @columns = $ss->column_labels(); # column labels are the headings for the data columns
133 # make sure the spreadsheet is how we expect it to be
134 @columns = qw
| marker protocol temp
| #fwd rev |
135 or die"Column headings are not as expected";
137 for my $marker_name (@markers) {
139 print "\n\nMARKER: $marker_name\n";
141 my @marker_ids = CXGN
::Marker
::Tools
::marker_name_to_ids
($dbh,$marker_name);
142 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'" }
143 # just get the first ID in the list (if the list is longer than 1, we've already died)
144 my $marker_id = $marker_ids[0];
147 print STDERR
"Marker $marker_name does not exist in database. SKIPPING!\n";
149 #$marker_id = CXGN::Marker::Tools::insert_marker($dbh,$marker_name);
150 #print "marker added: $marker_id\n";
152 else { print "marker_id found: $marker_id\n" }
156 ((grep {/temp/} @columns) && ($ss->value_at($marker_name,'temp')))
157 ?
$ss->value_at($marker_name,'temp') : 55;
158 print "temp: $annealing_temp\n";
160 my ($primer_id_fwd, $primer_id_rev, $fwd, $rev);
161 if ($protocol ne 'SNP') { # SNP only has seq5 and seq3
162 $fwd = $ss->value_at($marker_name,'fwd')
163 or die "No foward primer found for $marker_name";
166 $primer_id_fwd = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$fwd);
167 $primer_id_fwd = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($fwd)) if !$primer_id_fwd;
168 print "primer_id_fwd: $primer_id_fwd\n";
170 $rev=$ss->value_at($marker_name,'rev')
171 or warn "No reverse primer found for $marker_name";
173 $primer_id_rev = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$rev);
174 $primer_id_rev = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($rev)) if !$primer_id_rev;
175 print "primer_id_rev: $primer_id_rev\n";
177 #my $protocol = $ss->value_at($marker_name,'protocol') ;
179 my ($pd, $primer_id_pd, $indel, $indel_id, $snp, $snp_id, $aspe1, $aspe2, $aspe1_id, $aspe2_id, $seq5, $seq5_id, $seq3, $seq3_id);
181 if (($protocol eq 'dCAPS') && ($pd = $ss->value_at($marker_name,'pd'))) {
183 $primer_id_pd = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$pd);
184 $primer_id_pd = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($pd)) if !$primer_id_pd;
185 print "primer_id_pd: $primer_id_pd\n";
186 }############## add here sequences for different protocols
187 if (($protocol eq 'Indel') && ($indel = $ss->value_at($marker_name,'Indel'))) {
188 print "indel: $indel\n";
189 $indel_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$indel);
190 $indel_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($indel)) if !$indel_id;
191 print "indel: $indel_id\n";
193 if (($protocol eq 'ASPE') && ($snp = $ss->value_at($marker_name,'ASPE'))) {
195 $snp_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$snp);
196 $snp_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($snp)) if !$snp_id;
197 print "snp_id: $snp_id\n";
199 $aspe1 = $ss->value_at($marker_name,'ASPE1');
200 print "aspe1: $aspe1\n";
202 $aspe1_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$aspe1);
203 $aspe1_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($aspe1)) if !$aspe1_id;
204 print "aspe1_id: $aspe1_id\n";
206 $aspe2 = $ss->value_at($marker_name,'ASPE2');
208 print "aspe2: $aspe2\n";
209 $aspe2_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$aspe2);
210 $aspe2_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($aspe2)) if !$aspe2_id;
211 print "aspe2_id: $aspe2_id\n";
214 if ($protocol eq 'ASPE' || $protocol eq 'Indel') {
215 $seq3 = $ss->value_at($marker_name,'seq3');
216 print "seq3: $seq3\n";
217 $seq3_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$seq3);
218 $seq3_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($seq3)) if !$seq3_id;
219 print "seq3_id: $seq3_id\n";
221 $seq5 = $ss->value_at($marker_name,'seq5');
222 print "seq5: $seq5\n";
223 $seq5_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$seq5);
224 $seq5_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($seq5)) if !$seq5_id;
225 print "seq5_id: $seq5_id\n";
228 if (($protocol eq 'SNP') && ($snp = $ss->value_at($marker_name,'SNP'))) {
230 $snp_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$snp);
231 $snp_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($snp)) if !$snp_id;
232 print "snp_id: $snp_id\n";
234 # $aspe1 = $ss->value_at($marker_name,'ASPE1');
235 # print "aspe1: $aspe1\n";
237 # $aspe1_id = CXGN::Marker::Tools::get_sequence_id($dbh,$aspe1);
238 # $aspe1_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($aspe1)) if !$aspe1_id;
239 # print "aspe1_id: $aspe1_id\n";
241 # $aspe2 = $ss->value_at($marker_name,'ASPE2');
243 # print "aspe2: $aspe2\n";
244 # $aspe2_id = CXGN::Marker::Tools::get_sequence_id($dbh,$aspe2);
245 # $aspe2_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($aspe2)) if !$aspe2_id;
246 # print "aspe2_id: $aspe2_id\n";
249 if ($protocol eq 'SNP' || $protocol eq 'Indel') {
250 $seq3 = $ss->value_at($marker_name,'seq3');
251 print "seq3: $seq3\n";
252 $seq3_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$seq3);
253 $seq3_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($seq3)) if !$seq3_id;
254 print "seq3_id: $seq3_id\n";
256 $seq5 = $ss->value_at($marker_name,'seq5');
257 print "seq5: $seq5\n";
258 $seq5_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$seq5);
259 $seq5_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($seq5)) if !$seq5_id;
260 print "seq5_id: $seq5_id\n";
267 $band_size_a = $ss->value_at($marker_name, $opt_a);
270 $band_size_b = $ss->value_at($marker_name, $opt_b);
273 my $enzyme = $ss->value_at($marker_name, "enzyme");
275 # check if data already in pcr_experiment and marker_experiment, and if not, add it
276 # there's a lot of stuff to check here.. I know these aren't in the database so will come back later
278 my $names = ["marker_id", "annealing_temp", "primer_id_fwd",
279 "primer_id_rev", "experiment_type_id", "map_id","primer_id_pd","stock_id"];
280 my @fields = ($marker_id,$annealing_temp,$primer_id_fwd,
281 $primer_id_rev,$experiment_type_id,$map_id,$primer_id_pd,$stock_id);
283 # does this check if pcr_experiment already exists?
284 #NOW it does!! Should not have 2 rows with the same pcr_experiment data!
286 my $q = "SELECT marker_experiment.marker_experiment_id, pcr_experiment_id FROM sgn.marker_experiment JOIN sgn.marker_location using(location_id) join sgn.map_version using(map_version_id) WHERE map_id=? and marker_id=?";
287 my $s = $dbh->prepare($q);
288 $s->execute($opt_m, $marker_id);
289 my ($marker_experiment_id, $pcr_experiment_id) = $s->fetchrow_array();
291 print STDERR
"This marker ($marker_id) has marker_experiment_id $marker_experiment_id,pcr_experiment_id $pcr_experiment_id on $map_id\n";
293 print STDERR
"marker_id: $marker_id, map_id=$map_id, stock_id=$stock_id\n";
294 my $pcr_exp_info=$sql->insert_unless_exists('pcr_experiment',{marker_id
=>$marker_id,annealing_temp
=>$annealing_temp,primer_id_fwd
=>$primer_id_fwd, primer_id_rev
=>$primer_id_rev, experiment_type_id
=>$experiment_type_id,map_id
=>$map_id,primer_id_pd
=>$primer_id_pd, additional_enzymes
=> $enzyme }); #,stock_id => $stock_id } );
298 if($pcr_exp_info->{inserted
}) { print "INSERTED NEW pcr_experiment\n" ; }
299 if($pcr_exp_info->{exists}) { print "EXISTING pcr_experiment\n" ; }
303 $pcr_experiment_id = $pcr_exp_info->{id
};
305 if ($marker_experiment_id) {
306 $q = "UPDATE sgn.marker_experiment set pcr_experiment_id=? WHERE marker_experiment_id=?";
307 my $s = $dbh->prepare($q);
308 $s->execute($pcr_experiment_id,$marker_experiment_id);
313 #THIS DOES NOT CHECK FOR EXISTING ID
314 #CXGN::Marker::Tools::insert($dbh,"pcr_experiment","pcr_experiment_id",$names,@fields);
315 print STDERR
"pcr experiment added: $pcr_experiment_id\n";
317 print STDERR
"Instantiating the PCR::Experiment object...\n";
319 my $pcr_ex = CXGN
::Marker
::PCR
::Experiment
->new($dbh, $pcr_experiment_id);
320 # set the sequence types
322 print STDERR
"MARKER_ID = $marker_id\n";
325 # print STDERR "The marker $marker_name exists, but is not on this map...\n"; next();
329 warn "marker $marker_name is not in the database!!!!\n";
332 $pcr_ex->store_sequence('forward_primer', $fwd) if ($protocol ne 'SNP');
333 $pcr_ex->store_sequence('reverse_primer', $rev) if ($protocol ne 'SNP');
334 $pcr_ex->store_sequence('aspe_primer', $aspe1) if $aspe1;
335 $pcr_ex->store_sequence('aspe_primer', $aspe2) if $aspe2;
336 $pcr_ex->store_sequence('indel', $indel) if $indel;
337 $pcr_ex->store_sequence('SNP', $snp) if $snp;
338 $pcr_ex->store_sequence('five_prime_flanking_region', $seq5) if $seq5;
339 $pcr_ex->store_sequence('three_prime_flanking_region', $seq3) if $seq3;
342 $pcr_ex->add_pcr_bands_for_stock($band_size_a, $opt_a) if $band_size_a;
343 $pcr_ex->add_pcr_bands_for_stock($band_size_b, $opt_b) if $band_size_b;
344 print STDERR
"Storing pcr_experiment_id $pcr_experiment_id for marker $marker_id ($pcr_ex->{marker_id})\n";
346 $pcr_ex->store_unless_exists();
349 print STDERR
"Checking if map_version_id=$map_version_id, map_id=$map_id , marker $marker_id and protocol $protocol exist in marker_experiment\n";
350 # check for existing marker_experiment and update if found
351 my $q = "SELECT marker_experiment_id FROM marker_experiment "
352 . "JOIN marker_location USING (location_id) JOIN map_version "
353 . "USING (map_version_id) WHERE rflp_experiment_id is null "
354 . "AND map_version_id = ? AND marker_id = ? AND protocol ilike ?";
356 my $sth = $dbh->prepare($q);
357 $sth->execute($map_version_id,$marker_id,$protocol);
359 while (my ($id) = $sth->fetchrow_array()) {
360 print "Found experiment id $id\n";
364 # load the first experiment (several occurences here means multiple placed markers).
365 if (my $exp_id = shift(@exp_id)) {
366 #if (@exp_id > 1) { print STDERR join(', ', @exp_id)."\n\n"; }
367 # this really should not be the case
369 my $marker_experiment_id = $exp_id;
370 print STDERR
"Updating marker_experiment $marker_experiment_id\n";
371 my $u = "UPDATE marker_experiment set pcr_experiment_id = ? where marker_experiment_id = ?";
372 $sth = $dbh->prepare($u);
373 $sth->execute($pcr_experiment_id,$marker_experiment_id);
374 print STDERR
"UPDATED pcr_experiment_id = $pcr_experiment_id for marker_experiment $marker_experiment_id\n";
377 # if not loading map and experiments together, may want to match other protocols
379 # if not, insert new marker_experiment
381 print "No experiment_id found for marker $marker_name. SKIPPING!!!\n";
383 $names = ["marker_id", "pcr_experiment_id", "protocol"];
384 my @fields = ($marker_id, $pcr_experiment_id, $protocol);
385 # 'SSR' or 'unknown'?
387 #my $marker_experiment_id = CXGN::Marker::Tools::insert
388 # ($dbh,"marker_experiment","marker_experiment_id",$names,@fields);
389 # print "marker experiment added: $marker_experiment_id\n";
390 #print "ADD $marker_experiment_id\n";
398 print "Failed; rolling back.\n";
404 print"Rolling back.\n";
408 print"Committing.\n";