adding the product to the loading script
[sgn.git] / bin / load_solcap_markers.pl
blobc418aaedcd194e94f0d81d85cb2db5a6b878e455
1 #!/usr/bin/perl
3 # basic script to load pcr marker information
6 # usage: load_marker_data.pl -H hostname D dbname -i infile -m map_id
8 # copy and edit this file as necessary
9 # common changes include the following:
10 # experiment_type_id
11 # accession_id
12 # different column headings
14 =head1
16 NAME
18 load_solcap_markers.pl - a script to load markers into the SGN database.
20 =head1 DESCRIPTION
22 usage: load_solcap_markers.pl
24 Options:
26 =over 6
28 =item -H
30 The hostname of the server hosting the database.
32 =item -D
34 the name of the database
36 =item -t
38 (optional) test mode. Rollback after the script terminates. Database should not be affected. Good for test runs.
40 =item p
42 protocol (e.g. SSR, SNP, Indel, dCAPs, etc)
44 =item -i
46 infile with the marker info
48 =item -m
50 sgn map_id
53 =back
55 The tab-delimited map file has the following columns:
57 Marker
58 Temp (annealing temperature)
59 fwd (forward primer)
60 rev (reverse primer)
62 (optional columns, depending on the protocol)
63 pd (an additional primer seq, if protocol is dCAPS)
64 Indel (additional primer seq, if protocol is Indel)
65 ASPE1 (additional primer seq, if protocol is SNP)
66 ASPE2 (additional primer seq, if protocol is SNP)
67 seq3 (3' flanking sequence, if protocol is SNP or Indel)
68 seq5 (5' flanking sequence, if protocol is SNP or Indel)
70 <accession name a> provide band sizes for accession name a
71 (provided using -a)
72 <accession name b> provide band sizes for accession name b
73 (provided using -b)
75 =head1 AUTHORS
77 Naama Menda <nm249@cornell.edu>
80 =cut
82 use strict;
83 use warnings;
85 use CXGN::Tools::File::Spreadsheet;
86 use CXGN::Tools::Text;
87 use CXGN::Marker::Modifiable;
88 use CXGN::Marker::Tools;
89 use CXGN::Marker::Location;
90 use CXGN::Marker::PCR::Experiment;
91 use CXGN::Map::Version;
92 use CXGN::DB::Connection;
93 use CXGN::DB::InsertDBH;
94 use Data::Dumper;
95 use CXGN::DB::SQLWrappers;
97 use Getopt::Std;
100 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_p, $opt_m, $opt_a, $opt_b);
102 getopts('H:D:i:tp:m:');
105 my $map_id = $opt_m || die "Must pass a -m option with a valid sgn map_id!\n";
106 my $protocol = $opt_p || die "ERROR: No -p option passed for protocol name (SolCap markers are loaded with one file per protocol: SNP, Indel, SSR, CAPS) \n";
108 my $dbh = CXGN::DB::InsertDBH->new({
109 dbname => $opt_D,
110 dbhost => $opt_H,
111 dbargs => {AutoCommit => 0,
112 RaiseError => 1}
115 my $sql=CXGN::DB::SQLWrappers->new($dbh);
117 eval {
119 # parameters for this specific instance
120 my $experiment_type_id = 1; # 'mapping'
122 my $accession_id ;#= '88x87'; # parent1 x parent2
124 # make an object to give us the values from the spreadsheet
125 my $ss = CXGN::Tools::File::Spreadsheet->new($opt_i);
126 my @markers = $ss->row_labels(); # row labels are the marker names
127 my @columns = $ss->column_labels(); # column labels are the headings for the data columns
129 # make sure the spreadsheet is how we expect it to be
130 @columns = qw | marker protocol Temp fwd rev |
131 or die"Column headings are not as expected";
133 for my $marker_name (@markers) {
135 print "marker: $marker_name\n";
137 my @marker_ids = CXGN::Marker::Tools::marker_name_to_ids($dbh,$marker_name);
138 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'" }
139 # just get the first ID in the list (if the list is longer than 1, we've already died)
140 my $marker_id = $marker_ids[0];
142 if(!$marker_id) {
143 $marker_id = CXGN::Marker::Tools::insert_marker($dbh,$marker_name);
144 print "marker added: $marker_id\n";
146 else { print "marker_id found: $marker_id\n" }
148 # clean this up?
149 my $annealing_temp =
150 ((grep {/temp/} @columns) && ($ss->value_at($marker_name,'temp')))
151 ? $ss->value_at($marker_name,'temp') : 55;
152 print "temp: $annealing_temp\n";
154 my $fwd = $ss->value_at($marker_name,'fwd')
155 or die "No foward primer found for $marker_name";
156 print "fwd: $fwd\n";
158 my $primer_id_fwd = CXGN::Marker::Tools::get_sequence_id($dbh,$fwd);
159 $primer_id_fwd = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($fwd)) if !$primer_id_fwd;
160 print "primer_id_fwd: $primer_id_fwd\n";
162 my $rev=$ss->value_at($marker_name,'rev')
163 or die"No reverse primer found for $marker_name";
164 print "rev: $rev\n";
165 my $primer_id_rev = CXGN::Marker::Tools::get_sequence_id($dbh,$rev);
166 $primer_id_rev = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($rev)) if !$primer_id_rev;
167 print "primer_id_rev: $primer_id_rev\n";
169 #my $protocol = $ss->value_at($marker_name,'protocol') ;
170 my ($pd, $primer_id_pd, $indel, $indel_id, $snp, $snp_id, $aspe1, $aspe2, $aspe1_id, $aspe2_id, $seq5, $seq5_id, $seq3, $seq3_id);
172 if (($protocol eq 'dCAPS') && ($pd = $ss->value_at($marker_name,'pd'))) {
173 print "pd: $pd\n";
174 $primer_id_pd = CXGN::Marker::Tools::get_sequence_id($dbh,$pd);
175 $primer_id_pd = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($pd)) if !$primer_id_pd;
176 print "primer_id_pd: $primer_id_pd\n";
177 }############## add here sequences for different protocols
178 if (($protocol eq 'Indel') && ($indel = $ss->value_at($marker_name,'Indel'))) {
179 print "indel: $indel\n";
180 $indel_id = CXGN::Marker::Tools::get_sequence_id($dbh,$indel);
181 $indel_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($indel)) if !$indel_id;
182 print "indel: $indel_id\n";
184 if (($protocol eq 'SNP') && ($snp = $ss->value_at($marker_name,'SNP'))) {
185 print "snp: $snp\n";
186 $snp_id = CXGN::Marker::Tools::get_sequence_id($dbh,$snp);
187 $snp_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($snp)) if !$snp_id;
188 print "snp_id: $snp_id\n";
190 $aspe1 = $ss->value_at($marker_name,'ASPE1');
191 print "aspe1: $aspe1\n";
192 if ($aspe1) {
193 $aspe1_id = CXGN::Marker::Tools::get_sequence_id($dbh,$aspe1);
194 $aspe1_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($aspe1)) if !$aspe1_id;
195 print "aspe1_id: $aspe1_id\n";
197 $aspe2 = $ss->value_at($marker_name,'ASPE2');
198 if ($aspe2) {
199 print "aspe2: $aspe2\n";
200 $aspe2_id = CXGN::Marker::Tools::get_sequence_id($dbh,$aspe2);
201 $aspe2_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($aspe2)) if !$aspe2_id;
202 print "aspe2_id: $aspe2_id\n";
205 if ($protocol eq 'SNP' || $protocol eq 'Indel') {
206 $seq3 = $ss->value_at($marker_name,'seq3');
207 print "seq3: $seq3\n";
208 $seq3_id = CXGN::Marker::Tools::get_sequence_id($dbh,$seq3);
209 $seq3_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($seq3)) if !$seq3_id;
210 print "seq3_id: $seq3_id\n";
212 $seq5 = $ss->value_at($marker_name,'seq5');
213 print "seq5: $seq5\n";
214 $seq5_id = CXGN::Marker::Tools::get_sequence_id($dbh,$seq5);
215 $seq5_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($seq5)) if !$seq5_id;
216 print "seq5_id: $seq5_id\n";
219 # check if data already in pcr_experiment and marker_experiment, and if not, add it
220 # there's a lot of stuff to check here.. I know these aren't in the database so will come back later
222 my $names = ["marker_id", "annealing_temp", "primer_id_fwd",
223 "primer_id_rev", "experiment_type_id", "map_id","primer_id_pd","accession_id"];
224 my @fields = ($marker_id,$annealing_temp,$primer_id_fwd,
225 $primer_id_rev,$experiment_type_id,$map_id,$primer_id_pd,$accession_id);
227 # does this check if pcr_experiment already exists?
228 #NOW it does!! Should not have 2 rows with the same pcr_experiment data!
230 my $pcr_experiment=$sql->insert_unless_exists('pcr_experiment',{marker_id=>$marker_id, ,annealing_temp=>$annealing_temp,primer_id_fwd=>$primer_id_fwd, primer_id_rev=>$primer_id_rev, experiment_type_id=>$experiment_type_id,map_id=>$map_id,primer_id_pd=>$primer_id_pd,accession_id=>$accession_id } );
232 if($pcr_experiment->{inserted}) { print "INSERTED NEW pcr_experiment" ; }
233 if($pcr_experiment->{exists}) { print "EXISTING pcr_experiment" ; }
235 my $pcr_experiment_id = $pcr_experiment->{id};
236 #THIS DOES NOT CHECK FOR EXISTING ID
237 #CXGN::Marker::Tools::insert($dbh,"pcr_experiment","pcr_experiment_id",$names,@fields);
238 print "pcr experiment added: $pcr_experiment_id\n";
240 #new pcr_experiment object
241 my $pcr_ex = CXGN::Marker::PCR::Experiment->new($dbh, $pcr_experiment_id);
242 # set the sequence types
243 $pcr_ex->store_sequence('forward_primer', $fwd);
244 $pcr_ex->store_sequence('reverse_primer', $rev);
245 $pcr_ex->store_sequence('aspe_primer', $aspe1) if $aspe1;
246 $pcr_ex->store_sequence('aspe_primer', $aspe2) if $aspe2;
247 $pcr_ex->store_sequence('indel', $indel) if $indel;
248 $pcr_ex->store_sequence('SNP', $snp) if $snp;
249 $pcr_ex->store_sequence('five_prime_flanking_region', $seq5) if $seq5;
250 $pcr_ex->store_sequence('three_prime_flanking_region', $seq3) if $seq3;
251 print "Checking if map $map_id , marker $marker_id and protocol $protocol exist in marker_experiment\n";
252 # check for existing marker_experiment and update if found
253 my $q = "SELECT marker_experiment_id FROM marker_experiment "
254 . "JOIN marker_location USING (location_id) JOIN map_version "
255 . "USING (map_version_id) WHERE rflp_experiment_id is null "
256 . "AND map_id = ? AND marker_id = ? AND protocol ilike ?";
258 my $sth = $dbh->prepare($q);
259 $sth->execute($map_id,$marker_id,$protocol);
260 my @exp_id;
261 while (my ($id) = $sth->fetchrow_array()) {
262 print "Found experiment id $id\n";
263 push (@exp_id,$id);
266 if (@exp_id) {
267 if (@exp_id > 1) { print join(', ', @exp_id) and exit() }
268 # this really should not be the case
269 # update
270 my $marker_experiment_id = $exp_id[0];
271 print "Updating marker_experiment $marker_experiment_id\n";
272 my $u = "UPDATE marker_experiment set pcr_experiment_id = ? where marker_experiment_id = ?";
273 $sth = $dbh->prepare($u);
274 $sth->execute($pcr_experiment_id,$marker_experiment_id);
275 print "UPDATED pcr_experiment_id = $pcr_experiment_id for marker_experiment $marker_experiment_id\n";
278 # if not loading map and experiments together, may want to match other protocols
280 # if not, insert new marker_experiment
281 else {
282 print "No experiment_id found for marker $marker_name. SKIPPING!!!\n";
283 next();
284 my $names = ["marker_id", "pcr_experiment_id", "protocol"];
285 my @fields = ($marker_id, $pcr_experiment_id, $protocol);
286 # 'SSR' or 'unknown'?
288 #my $marker_experiment_id = CXGN::Marker::Tools::insert
289 # ($dbh,"marker_experiment","marker_experiment_id",$names,@fields);
290 # print "marker experiment added: $marker_experiment_id\n";
291 #print "ADD $marker_experiment_id\n";
297 if ($@) {
298 print $@;
299 print "Failed; rolling back.\n";
300 $dbh->rollback();
302 else {
303 print"Succeeded.\n";
304 if ($opt_t) {
305 print"Rolling back.\n";
306 $dbh->rollback();
308 else {
309 print"Committing.\n";
310 $dbh->commit();