3 # basic script to load pcr marker information
6 # usage: load_marker_data.pl -H hostname D dbname -i infile -m map_id
8 # copy and edit this file as necessary
9 # common changes include the following:
12 # different column headings
18 load_solcap_markers.pl - a script to load markers into the SGN database.
22 usage: load_solcap_markers.pl
30 The hostname of the server hosting the database.
34 the name of the database
38 (optional) test mode. Rollback after the script terminates. Database should not be affected. Good for test runs.
42 protocol (e.g. SSR, SNP, Indel, dCAPs, etc)
46 infile with the marker info
55 The tab-delimited map file has the following columns:
58 Temp (annealing temperature)
66 Naama Menda <nm249@cornell.edu>
74 use CXGN
::Tools
::File
::Spreadsheet
;
75 use CXGN
::Tools
::Text
;
76 use CXGN
::Marker
::Modifiable
;
77 use CXGN
::Marker
::Tools
;
78 use CXGN
::Marker
::Location
;
79 use CXGN
::Marker
::PCR
::Experiment
;
80 use CXGN
::Map
::Version
;
81 use CXGN
::DB
::Connection
;
82 use CXGN
::DB
::InsertDBH
;
84 use CXGN
::DB
::SQLWrappers
;
89 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_p, $opt_m);
91 getopts
('H:D:i:tp:m:');
94 my $map_id = $opt_m || die "Must pass a -m option with a valid sgn map_id!\n";
95 my $protocol = $opt_p || die "ERROR: No -p option passed for protocol name (SolCap markers are loaded with one file per protocol: SNP, Indel, SSR, CAPS) \n";
97 my $dbh = CXGN
::DB
::InsertDBH
->new({
100 dbargs
=> {AutoCommit
=> 0,
104 my $sql=CXGN
::DB
::SQLWrappers
->new($dbh);
108 # parameters for this specific instance
109 my $experiment_type_id = 1; # 'mapping'
111 my $accession_id ;#= '88x87'; # parent1 x parent2
113 # make an object to give us the values from the spreadsheet
114 my $ss = CXGN
::Tools
::File
::Spreadsheet
->new($opt_i);
115 my @markers = $ss->row_labels(); # row labels are the marker names
116 my @columns = $ss->column_labels(); # column labels are the headings for the data columns
118 # make sure the spreadsheet is how we expect it to be
119 @columns = qw
| marker protocol Temp fwd rev
|
120 or die"Column headings are not as expected";
122 for my $marker_name (@markers) {
124 print "marker: $marker_name\n";
126 my @marker_ids = CXGN
::Marker
::Tools
::marker_name_to_ids
($dbh,$marker_name);
127 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'" }
128 # just get the first ID in the list (if the list is longer than 1, we've already died)
129 my $marker_id = $marker_ids[0];
132 $marker_id = CXGN
::Marker
::Tools
::insert_marker
($dbh,$marker_name);
133 print "marker added: $marker_id\n";
135 else { print "marker_id found: $marker_id\n" }
139 ((grep {/temp/} @columns) && ($ss->value_at($marker_name,'temp')))
140 ?
$ss->value_at($marker_name,'temp') : 55;
141 print "temp: $annealing_temp\n";
143 my $fwd = $ss->value_at($marker_name,'fwd')
144 or die "No foward primer found for $marker_name";
147 my $primer_id_fwd = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$fwd);
148 $primer_id_fwd = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($fwd)) if !$primer_id_fwd;
149 print "primer_id_fwd: $primer_id_fwd\n";
151 my $rev=$ss->value_at($marker_name,'rev')
152 or die"No reverse primer found for $marker_name";
154 my $primer_id_rev = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$rev);
155 $primer_id_rev = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($rev)) if !$primer_id_rev;
156 print "primer_id_rev: $primer_id_rev\n";
158 #my $protocol = $ss->value_at($marker_name,'protocol') ;
159 my ($pd, $primer_id_pd, $indel, $indel_id, $snp, $snp_id, $aspe1, $aspe2, $aspe1_id, $aspe2_id, $seq5, $seq5_id, $seq3, $seq3_id);
161 if (($protocol eq 'dCAPS') && ($pd = $ss->value_at($marker_name,'pd'))) {
163 $primer_id_pd = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$pd);
164 $primer_id_pd = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($pd)) if !$primer_id_pd;
165 print "primer_id_pd: $primer_id_pd\n";
166 }############## add here sequences for different protocols
167 if (($protocol eq 'Indel') && ($indel = $ss->value_at($marker_name,'Indel'))) {
168 print "indel: $indel\n";
169 $indel_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$indel);
170 $indel_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($indel)) if !$indel_id;
171 print "indel: $indel_id\n";
173 if (($protocol eq 'SNP') && ($snp = $ss->value_at($marker_name,'SNP'))) {
175 $snp_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$snp);
176 $snp_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($snp)) if !$snp_id;
177 print "snp_id: $snp_id\n";
179 $aspe1 = $ss->value_at($marker_name,'ASPE1');
180 print "aspe1: $aspe1\n";
182 $aspe1_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$aspe1);
183 $aspe1_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($aspe1)) if !$aspe1_id;
184 print "aspe1_id: $aspe1_id\n";
186 $aspe2 = $ss->value_at($marker_name,'ASPE2');
188 print "aspe2: $aspe2\n";
189 $aspe2_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$aspe2);
190 $aspe2_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($aspe2)) if !$aspe2_id;
191 print "aspe2_id: $aspe2_id\n";
194 if ($protocol eq 'SNP' || $protocol eq 'Indel') {
195 $seq3 = $ss->value_at($marker_name,'seq3');
196 print "seq3: $seq3\n";
197 $seq3_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$seq3);
198 $seq3_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($seq3)) if !$seq3_id;
199 print "seq3_id: $seq3_id\n";
201 $seq5 = $ss->value_at($marker_name,'seq5');
202 print "seq5: $seq5\n";
203 $seq5_id = CXGN
::Marker
::Tools
::get_sequence_id
($dbh,$seq5);
204 $seq5_id = CXGN
::Marker
::Tools
::insert
($dbh,"sequence","sequence_id",['sequence'], ($seq5)) if !$seq5_id;
205 print "seq5_id: $seq5_id\n";
208 # check if data already in pcr_experiment and marker_experiment, and if not, add it
209 # there's a lot of stuff to check here.. I know these aren't in the database so will come back later
211 my $names = ["marker_id", "annealing_temp", "primer_id_fwd",
212 "primer_id_rev", "experiment_type_id", "map_id","primer_id_pd","accession_id"];
213 my @fields = ($marker_id,$annealing_temp,$primer_id_fwd,
214 $primer_id_rev,$experiment_type_id,$map_id,$primer_id_pd,$accession_id);
216 # does this check if pcr_experiment already exists?
217 #NOW it does!! Should not have 2 rows with the same pcr_experiment data!
219 my $pcr_experiment=$sql->insert_unless_exists('pcr_experiment',{marker_id
=>$marker_id, ,annealing_temp
=>$annealing_temp,primer_id_fwd
=>$primer_id_fwd, primer_id_rev
=>$primer_id_rev, experiment_type_id
=>$experiment_type_id,map_id
=>$map_id,primer_id_pd
=>$primer_id_pd,accession_id
=>$accession_id } );
221 if($pcr_experiment->{inserted
}) { print "INSERTED NEW pcr_experiment" ; }
222 if($pcr_experiment->{exists}) { print "EXISTING pcr_experiment" ; }
224 my $pcr_experiment_id = $pcr_experiment->{id
};
225 #THIS DOES NOT CHECK FOR EXISTING ID
226 #CXGN::Marker::Tools::insert($dbh,"pcr_experiment","pcr_experiment_id",$names,@fields);
227 print "pcr experiment added: $pcr_experiment_id\n";
229 #new pcr_experiment object
230 my $pcr_ex = CXGN
::Marker
::PCR
::Experiment
->new($dbh, $pcr_experiment_id);
231 # set the sequence types
232 $pcr_ex->store_sequence('forward_primer', $fwd);
233 $pcr_ex->store_sequence('reverse_primer', $rev);
234 $pcr_ex->store_sequence('aspe_primer', $aspe1) if $aspe1;
235 $pcr_ex->store_sequence('aspe_primer', $aspe2) if $aspe2;
236 $pcr_ex->store_sequence('indel', $indel) if $indel;
237 $pcr_ex->store_sequence('SNP', $snp) if $snp;
238 $pcr_ex->store_sequence('five_prime_flanking_region', $seq5) if $seq5;
239 $pcr_ex->store_sequence('three_prime_flanking_region', $seq3) if $seq3;
240 print "Checking if map $map_id , marker $marker_id and protocol $protocol exist in marker_experiment\n";
241 # check for existing marker_experiment and update if found
242 my $q = "SELECT marker_experiment_id FROM marker_experiment "
243 . "JOIN marker_location USING (location_id) JOIN map_version "
244 . "USING (map_version_id) WHERE rflp_experiment_id is null "
245 . "AND map_id = ? AND marker_id = ? AND protocol ilike ?";
247 my $sth = $dbh->prepare($q);
248 $sth->execute($map_id,$marker_id,$protocol);
250 while (my ($id) = $sth->fetchrow_array()) {
251 print "Found experiment id $id\n";
256 if (@exp_id > 1) { print join(', ', @exp_id) and exit() }
257 # this really should not be the case
259 my $marker_experiment_id = $exp_id[0];
260 print "Updating marker_experiment $marker_experiment_id\n";
261 my $u = "UPDATE marker_experiment set pcr_experiment_id = ? where marker_experiment_id = ?";
262 $sth = $dbh->prepare($u);
263 $sth->execute($pcr_experiment_id,$marker_experiment_id);
264 print "UPDATED pcr_experiment_id = $pcr_experiment_id for marker_experiment $marker_experiment_id\n";
267 # if not loading map and experiments together, may want to match other protocols
269 # if not, insert new marker_experiment
271 print "No experiment_id found for marker $marker_name. SKIPPING!!!\n";
273 my $names = ["marker_id", "pcr_experiment_id", "protocol"];
274 my @fields = ($marker_id, $pcr_experiment_id, $protocol);
275 # 'SSR' or 'unknown'?
277 #my $marker_experiment_id = CXGN::Marker::Tools::insert
278 # ($dbh,"marker_experiment","marker_experiment_id",$names,@fields);
279 # print "marker experiment added: $marker_experiment_id\n";
280 #print "ADD $marker_experiment_id\n";
288 print "Failed; rolling back.\n";
294 print"Rolling back.\n";
298 print"Committing.\n";