3 # basic script to load pcr marker information
6 # usage: load_marker_data.pl -H hostname D dbname -i infile -m map_id
8 # copy and edit this file as necessary
9 # common changes include the following:
12 # different column headings
18 load_solcap_markers.pl - a script to load markers into the SGN database.
22 usage: load_solcap_markers.pl
30 The hostname of the server hosting the database.
34 the name of the database
38 (optional) test mode. Rollback after the script terminates. Database should not be affected. Good for test runs.
42 protocol (e.g. SSR, SNP, Indel, dCAPs, etc)
46 infile with the marker info
55 The tab-delimited map file has the following columns:
58 Temp (annealing temperature)
62 (optional columns, depending on the protocol)
63 pd (an additional primer seq, if protocol is dCAPS)
64 Indel (additional primer seq, if protocol is Indel)
65 ASPE1 (additional primer seq, if protocol is SNP)
66 ASPE2 (additional primer seq, if protocol is SNP)
67 seq3 (3' flanking sequence, if protocol is SNP or Indel)
68 seq5 (5' flanking sequence, if protocol is SNP or Indel)
70 <accession name a> provide band sizes for accession name a
72 <accession name b> provide band sizes for accession name b
77 Naama Menda <nm249@cornell.edu>
85 use CXGN
::Tools
::File
::Spreadsheet
;
86 use CXGN
::Tools
::Text
;
87 use CXGN
::Marker
::Modifiable
;
88 use CXGN
::Marker
::Tools
;
89 use CXGN
::Marker
::Location
;
90 use CXGN
::Marker
::PCR
::Experiment
;
91 use CXGN
::Map
::Version
;
92 use CXGN
::DB
::Connection
;
93 use CXGN
::DB
::InsertDBH
;
95 use CXGN
::DB
::SQLWrappers
;
100 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_p, $opt_m, $opt_a, $opt_b);
102 getopts
('H:D:i:tp:m:');
105 my $map_id = $opt_m || die "Must pass a -m option with a valid sgn map_id!\n";
106 my $protocol = $opt_p || die "ERROR: No -p option passed for protocol name (SolCap markers are loaded with one file per protocol: SNP, Indel, SSR, CAPS) \n";
108 my $dbh = CXGN
::DB
::InsertDBH
->new({
111 dbargs
=> {AutoCommit
=> 0,
115 my $sql=CXGN
::DB
::SQLWrappers
->new($dbh);
119 # parameters for this specific instance
120 my $experiment_type_id = 1; # 'mapping'
122 my $accession_id ;#= '88x87'; # parent1 x parent2
124 # make an object to give us the values from the spreadsheet
125 my $ss = CXGN
::Tools
::File
::Spreadsheet
->new($opt_i);
126 my @markers = $ss->row_labels(); # row labels are the marker names
127 my @columns = $ss->column_labels(); # column labels are the headings for the data columns
129 # make sure the spreadsheet is how we expect it to be
130 @columns = qw
| marker protocol Temp fwd rev
|
131 or die"Column headings are not as expected";
133 for my $marker_name (@markers) {
135 print "marker: $marker_name\n";
137 my @marker_ids = CXGN
::Marker
::Tools
::marker_name_to_ids
($dbh,$marker_name);
138 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'" }
139 # just get the first ID in the list (if the list is longer than 1, we've already died)
140 my $marker_id = $marker_ids[0];
143 $marker_id = CXGN
::Marker
::Tools
::insert_marker
($dbh,$marker_name);
144 print "marker added: $marker_id\n";
146 else { print "marker_id found: $marker_id\n" }
150 ((grep {/temp/} @columns) && ($ss->value_at($marker_name,'temp')))
151 ?
$ss->value_at($marker_name,'temp') : 55;
152 print "temp: $annealing_temp\n";
154 my $fwd = $ss->value_at($marker_name,'fwd')
155 or die "No foward primer found for $marker_name";
158 my $primer_id_fwd = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$fwd);
159 $primer_id_fwd = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($fwd)) if !$primer_id_fwd;
160 print "primer_id_fwd: $primer_id_fwd\n";
162 my $rev=$ss->value_at($marker_name,'rev')
163 or die"No reverse primer found for $marker_name";
165 my $primer_id_rev = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$rev);
166 $primer_id_rev = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($rev)) if !$primer_id_rev;
167 print "primer_id_rev: $primer_id_rev\n";
169 #my $protocol = $ss->value_at($marker_name,'protocol') ;
170 my ($pd, $primer_id_pd, $indel, $indel_id, $snp, $snp_id, $aspe1, $aspe2, $aspe1_id, $aspe2_id, $seq5, $seq5_id, $seq3, $seq3_id);
172 if (($protocol eq 'dCAPS') && ($pd = $ss->value_at($marker_name,'pd'))) {
174 $primer_id_pd = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$pd);
175 $primer_id_pd = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($pd)) if !$primer_id_pd;
176 print "primer_id_pd: $primer_id_pd\n";
177 }############## add here sequences for different protocols
178 if (($protocol eq 'Indel') && ($indel = $ss->value_at($marker_name,'Indel'))) {
179 print "indel: $indel\n";
180 $indel_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$indel);
181 $indel_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($indel)) if !$indel_id;
182 print "indel: $indel_id\n";
184 if (($protocol eq 'SNP') && ($snp = $ss->value_at($marker_name,'SNP'))) {
186 $snp_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$snp);
187 $snp_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($snp)) if !$snp_id;
188 print "snp_id: $snp_id\n";
190 $aspe1 = $ss->value_at($marker_name,'ASPE1');
191 print "aspe1: $aspe1\n";
193 $aspe1_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$aspe1);
194 $aspe1_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($aspe1)) if !$aspe1_id;
195 print "aspe1_id: $aspe1_id\n";
197 $aspe2 = $ss->value_at($marker_name,'ASPE2');
199 print "aspe2: $aspe2\n";
200 $aspe2_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$aspe2);
201 $aspe2_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($aspe2)) if !$aspe2_id;
202 print "aspe2_id: $aspe2_id\n";
205 if ($protocol eq 'SNP' || $protocol eq 'Indel') {
206 $seq3 = $ss->value_at($marker_name,'seq3');
207 print "seq3: $seq3\n";
208 $seq3_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$seq3);
209 $seq3_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($seq3)) if !$seq3_id;
210 print "seq3_id: $seq3_id\n";
212 $seq5 = $ss->value_at($marker_name,'seq5');
213 print "seq5: $seq5\n";
214 $seq5_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$seq5);
215 $seq5_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($seq5)) if !$seq5_id;
216 print "seq5_id: $seq5_id\n";
219 # check if data already in pcr_experiment and marker_experiment, and if not, add it
220 # there's a lot of stuff to check here.. I know these aren't in the database so will come back later
222 my $names = ["marker_id", "annealing_temp", "primer_id_fwd",
223 "primer_id_rev", "experiment_type_id", "map_id","primer_id_pd","accession_id"];
224 my @fields = ($marker_id,$annealing_temp,$primer_id_fwd,
225 $primer_id_rev,$experiment_type_id,$map_id,$primer_id_pd,$accession_id);
227 # does this check if pcr_experiment already exists?
228 #NOW it does!! Should not have 2 rows with the same pcr_experiment data!
230 my $pcr_experiment=$sql->insert_unless_exists('pcr_experiment',{marker_id
=>$marker_id, ,annealing_temp
=>$annealing_temp,primer_id_fwd
=>$primer_id_fwd, primer_id_rev
=>$primer_id_rev, experiment_type_id
=>$experiment_type_id,map_id
=>$map_id,primer_id_pd
=>$primer_id_pd,accession_id
=>$accession_id } );
232 if($pcr_experiment->{inserted
}) { print "INSERTED NEW pcr_experiment" ; }
233 if($pcr_experiment->{exists}) { print "EXISTING pcr_experiment" ; }
235 my $pcr_experiment_id = $pcr_experiment->{id
};
236 #THIS DOES NOT CHECK FOR EXISTING ID
237 #CXGN::Marker::Tools::insert($dbh,"pcr_experiment","pcr_experiment_id",$names,@fields);
238 print "pcr experiment added: $pcr_experiment_id\n";
240 #new pcr_experiment object
241 my $pcr_ex = CXGN
::Marker
::PCR
::Experiment
->new($dbh, $pcr_experiment_id);
242 # set the sequence types
243 $pcr_ex->store_sequence('forward_primer', $fwd);
244 $pcr_ex->store_sequence('reverse_primer', $rev);
245 $pcr_ex->store_sequence('aspe_primer', $aspe1) if $aspe1;
246 $pcr_ex->store_sequence('aspe_primer', $aspe2) if $aspe2;
247 $pcr_ex->store_sequence('indel', $indel) if $indel;
248 $pcr_ex->store_sequence('SNP', $snp) if $snp;
249 $pcr_ex->store_sequence('five_prime_flanking_region', $seq5) if $seq5;
250 $pcr_ex->store_sequence('three_prime_flanking_region', $seq3) if $seq3;
251 print "Checking if map $map_id , marker $marker_id and protocol $protocol exist in marker_experiment\n";
252 # check for existing marker_experiment and update if found
253 my $q = "SELECT marker_experiment_id FROM marker_experiment "
254 . "JOIN marker_location USING (location_id) JOIN map_version "
255 . "USING (map_version_id) WHERE rflp_experiment_id is null "
256 . "AND map_id = ? AND marker_id = ? AND protocol ilike ?";
258 my $sth = $dbh->prepare($q);
259 $sth->execute($map_id,$marker_id,$protocol);
261 while (my ($id) = $sth->fetchrow_array()) {
262 print "Found experiment id $id\n";
267 if (@exp_id > 1) { print join(', ', @exp_id) and exit() }
268 # this really should not be the case
270 my $marker_experiment_id = $exp_id[0];
271 print "Updating marker_experiment $marker_experiment_id\n";
272 my $u = "UPDATE marker_experiment set pcr_experiment_id = ? where marker_experiment_id = ?";
273 $sth = $dbh->prepare($u);
274 $sth->execute($pcr_experiment_id,$marker_experiment_id);
275 print "UPDATED pcr_experiment_id = $pcr_experiment_id for marker_experiment $marker_experiment_id\n";
278 # if not loading map and experiments together, may want to match other protocols
280 # if not, insert new marker_experiment
282 print "No experiment_id found for marker $marker_name. SKIPPING!!!\n";
284 my $names = ["marker_id", "pcr_experiment_id", "protocol"];
285 my @fields = ($marker_id, $pcr_experiment_id, $protocol);
286 # 'SSR' or 'unknown'?
288 #my $marker_experiment_id = CXGN::Marker::Tools::insert
289 # ($dbh,"marker_experiment","marker_experiment_id",$names,@fields);
290 # print "marker experiment added: $marker_experiment_id\n";
291 #print "ADD $marker_experiment_id\n";
299 print "Failed; rolling back.\n";
305 print"Rolling back.\n";
309 print"Committing.\n";