make QTL section on cvterm page open by default
[sgn.git] / bin / load_solcap_markers.pl
blob7ffa8d2374918cbf7413e340b1ba4ec3d2d89158
1 #!/usr/bin/perl
3 # basic script to load pcr marker information
6 # usage: load_marker_data.pl -H hostname D dbname -i infile -m map_id
8 # copy and edit this file as necessary
9 # common changes include the following:
10 # experiment_type_id
11 # accession_id
12 # different column headings
14 =head1
16 NAME
18 load_solcap_markers.pl - a script to load markers into the SGN database.
20 =head1 DESCRIPTION
22 usage: load_solcap_markers.pl
24 Options:
26 =over 6
28 =item -H
30 The hostname of the server hosting the database.
32 =item -D
34 the name of the database
36 =item -t
38 (optional) test mode. Rollback after the script terminates. Database should not be affected. Good for test runs.
40 =item p
42 protocol (e.g. SSR, SNP, Indel, dCAPs, etc)
44 =item -i
46 infile with the marker info
48 =item -m
50 sgn map_id
53 =back
55 The tab-delimited map file has the following columns:
57 Marker
58 Temp (annealing temperature)
59 fwd (forward primer)
60 rev (reverse primer)
64 =head1 AUTHORS
66 Naama Menda <nm249@cornell.edu>
69 =cut
71 use strict;
72 use warnings;
74 use CXGN::Tools::File::Spreadsheet;
75 use CXGN::Tools::Text;
76 use CXGN::Marker::Modifiable;
77 use CXGN::Marker::Tools;
78 use CXGN::Marker::Location;
79 use CXGN::Marker::PCR::Experiment;
80 use CXGN::Map::Version;
81 use CXGN::DB::Connection;
82 use CXGN::DB::InsertDBH;
83 use Data::Dumper;
84 use CXGN::DB::SQLWrappers;
86 use Getopt::Std;
89 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_p, $opt_m);
91 getopts('H:D:i:tp:m:');
94 my $map_id = $opt_m || die "Must pass a -m option with a valid sgn map_id!\n";
95 my $protocol = $opt_p || die "ERROR: No -p option passed for protocol name (SolCap markers are loaded with one file per protocol: SNP, Indel, SSR, CAPS) \n";
97 my $dbh = CXGN::DB::InsertDBH->new({
98 dbname => $opt_D,
99 dbhost => $opt_H,
100 dbargs => {AutoCommit => 0,
101 RaiseError => 1}
104 my $sql=CXGN::DB::SQLWrappers->new($dbh);
106 eval {
108 # parameters for this specific instance
109 my $experiment_type_id = 1; # 'mapping'
111 my $accession_id ;#= '88x87'; # parent1 x parent2
113 # make an object to give us the values from the spreadsheet
114 my $ss = CXGN::Tools::File::Spreadsheet->new($opt_i);
115 my @markers = $ss->row_labels(); # row labels are the marker names
116 my @columns = $ss->column_labels(); # column labels are the headings for the data columns
118 # make sure the spreadsheet is how we expect it to be
119 @columns = qw | marker protocol Temp fwd rev |
120 or die"Column headings are not as expected";
122 for my $marker_name (@markers) {
124 print "marker: $marker_name\n";
126 my @marker_ids = CXGN::Marker::Tools::marker_name_to_ids($dbh,$marker_name);
127 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'" }
128 # just get the first ID in the list (if the list is longer than 1, we've already died)
129 my $marker_id = $marker_ids[0];
131 if(!$marker_id) {
132 $marker_id = CXGN::Marker::Tools::insert_marker($dbh,$marker_name);
133 print "marker added: $marker_id\n";
135 else { print "marker_id found: $marker_id\n" }
137 # clean this up?
138 my $annealing_temp =
139 ((grep {/temp/} @columns) && ($ss->value_at($marker_name,'temp')))
140 ? $ss->value_at($marker_name,'temp') : 55;
141 print "temp: $annealing_temp\n";
143 my $fwd = $ss->value_at($marker_name,'fwd')
144 or die "No foward primer found for $marker_name";
145 print "fwd: $fwd\n";
147 my $primer_id_fwd = CXGN::Marker::Tools::get_sequence_id($dbh,$fwd);
148 $primer_id_fwd = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($fwd)) if !$primer_id_fwd;
149 print "primer_id_fwd: $primer_id_fwd\n";
151 my $rev=$ss->value_at($marker_name,'rev')
152 or die"No reverse primer found for $marker_name";
153 print "rev: $rev\n";
154 my $primer_id_rev = CXGN::Marker::Tools::get_sequence_id($dbh,$rev);
155 $primer_id_rev = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($rev)) if !$primer_id_rev;
156 print "primer_id_rev: $primer_id_rev\n";
158 #my $protocol = $ss->value_at($marker_name,'protocol') ;
159 my ($pd, $primer_id_pd, $indel, $indel_id, $snp, $snp_id, $aspe1, $aspe2, $aspe1_id, $aspe2_id, $seq5, $seq5_id, $seq3, $seq3_id);
161 if (($protocol eq 'dCAPS') && ($pd = $ss->value_at($marker_name,'pd'))) {
162 print "pd: $pd\n";
163 $primer_id_pd = CXGN::Marker::Tools::get_sequence_id($dbh,$pd);
164 $primer_id_pd = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($pd)) if !$primer_id_pd;
165 print "primer_id_pd: $primer_id_pd\n";
166 }############## add here sequences for different protocols
167 if (($protocol eq 'Indel') && ($indel = $ss->value_at($marker_name,'Indel'))) {
168 print "indel: $indel\n";
169 $indel_id = CXGN::Marker::Tools::get_sequence_id($dbh,$indel);
170 $indel_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($indel)) if !$indel_id;
171 print "indel: $indel_id\n";
173 if (($protocol eq 'SNP') && ($snp = $ss->value_at($marker_name,'SNP'))) {
174 print "snp: $snp\n";
175 $snp_id = CXGN::Marker::Tools::get_sequence_id($dbh,$snp);
176 $snp_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($snp)) if !$snp_id;
177 print "snp_id: $snp_id\n";
179 $aspe1 = $ss->value_at($marker_name,'ASPE1');
180 print "aspe1: $aspe1\n";
181 if ($aspe1) {
182 $aspe1_id = CXGN::Marker::Tools::get_sequence_id($dbh,$aspe1);
183 $aspe1_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($aspe1)) if !$aspe1_id;
184 print "aspe1_id: $aspe1_id\n";
186 $aspe2 = $ss->value_at($marker_name,'ASPE2');
187 if ($aspe2) {
188 print "aspe2: $aspe2\n";
189 $aspe2_id = CXGN::Marker::Tools::get_sequence_id($dbh,$aspe2);
190 $aspe2_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($aspe2)) if !$aspe2_id;
191 print "aspe2_id: $aspe2_id\n";
194 if ($protocol eq 'SNP' || $protocol eq 'Indel') {
195 $seq3 = $ss->value_at($marker_name,'seq3');
196 print "seq3: $seq3\n";
197 $seq3_id = CXGN::Marker::Tools::get_sequence_id($dbh,$seq3);
198 $seq3_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($seq3)) if !$seq3_id;
199 print "seq3_id: $seq3_id\n";
201 $seq5 = $ss->value_at($marker_name,'seq5');
202 print "seq5: $seq5\n";
203 $seq5_id = CXGN::Marker::Tools::get_sequence_id($dbh,$seq5);
204 $seq5_id = CXGN::Marker::Tools::insert($dbh,"sequence","sequence_id",['sequence'], ($seq5)) if !$seq5_id;
205 print "seq5_id: $seq5_id\n";
208 # check if data already in pcr_experiment and marker_experiment, and if not, add it
209 # there's a lot of stuff to check here.. I know these aren't in the database so will come back later
211 my $names = ["marker_id", "annealing_temp", "primer_id_fwd",
212 "primer_id_rev", "experiment_type_id", "map_id","primer_id_pd","accession_id"];
213 my @fields = ($marker_id,$annealing_temp,$primer_id_fwd,
214 $primer_id_rev,$experiment_type_id,$map_id,$primer_id_pd,$accession_id);
216 # does this check if pcr_experiment already exists?
217 #NOW it does!! Should not have 2 rows with the same pcr_experiment data!
219 my $pcr_experiment=$sql->insert_unless_exists('pcr_experiment',{marker_id=>$marker_id, ,annealing_temp=>$annealing_temp,primer_id_fwd=>$primer_id_fwd, primer_id_rev=>$primer_id_rev, experiment_type_id=>$experiment_type_id,map_id=>$map_id,primer_id_pd=>$primer_id_pd,accession_id=>$accession_id } );
221 if($pcr_experiment->{inserted}) { print "INSERTED NEW pcr_experiment" ; }
222 if($pcr_experiment->{exists}) { print "EXISTING pcr_experiment" ; }
224 my $pcr_experiment_id = $pcr_experiment->{id};
225 #THIS DOES NOT CHECK FOR EXISTING ID
226 #CXGN::Marker::Tools::insert($dbh,"pcr_experiment","pcr_experiment_id",$names,@fields);
227 print "pcr experiment added: $pcr_experiment_id\n";
229 #new pcr_experiment object
230 my $pcr_ex = CXGN::Marker::PCR::Experiment->new($dbh, $pcr_experiment_id);
231 # set the sequence types
232 $pcr_ex->store_sequence('forward_primer', $fwd);
233 $pcr_ex->store_sequence('reverse_primer', $rev);
234 $pcr_ex->store_sequence('aspe_primer', $aspe1) if $aspe1;
235 $pcr_ex->store_sequence('aspe_primer', $aspe2) if $aspe2;
236 $pcr_ex->store_sequence('indel', $indel) if $indel;
237 $pcr_ex->store_sequence('SNP', $snp) if $snp;
238 $pcr_ex->store_sequence('five_prime_flanking_region', $seq5) if $seq5;
239 $pcr_ex->store_sequence('three_prime_flanking_region', $seq3) if $seq3;
240 print "Checking if map $map_id , marker $marker_id and protocol $protocol exist in marker_experiment\n";
241 # check for existing marker_experiment and update if found
242 my $q = "SELECT marker_experiment_id FROM marker_experiment "
243 . "JOIN marker_location USING (location_id) JOIN map_version "
244 . "USING (map_version_id) WHERE rflp_experiment_id is null "
245 . "AND map_id = ? AND marker_id = ? AND protocol ilike ?";
247 my $sth = $dbh->prepare($q);
248 $sth->execute($map_id,$marker_id,$protocol);
249 my @exp_id;
250 while (my ($id) = $sth->fetchrow_array()) {
251 print "Found experiment id $id\n";
252 push (@exp_id,$id);
255 if (@exp_id) {
256 if (@exp_id > 1) { print join(', ', @exp_id) and exit() }
257 # this really should not be the case
258 # update
259 my $marker_experiment_id = $exp_id[0];
260 print "Updating marker_experiment $marker_experiment_id\n";
261 my $u = "UPDATE marker_experiment set pcr_experiment_id = ? where marker_experiment_id = ?";
262 $sth = $dbh->prepare($u);
263 $sth->execute($pcr_experiment_id,$marker_experiment_id);
264 print "UPDATED pcr_experiment_id = $pcr_experiment_id for marker_experiment $marker_experiment_id\n";
267 # if not loading map and experiments together, may want to match other protocols
269 # if not, insert new marker_experiment
270 else {
271 print "No experiment_id found for marker $marker_name. SKIPPING!!!\n";
272 next();
273 my $names = ["marker_id", "pcr_experiment_id", "protocol"];
274 my @fields = ($marker_id, $pcr_experiment_id, $protocol);
275 # 'SSR' or 'unknown'?
277 #my $marker_experiment_id = CXGN::Marker::Tools::insert
278 # ($dbh,"marker_experiment","marker_experiment_id",$names,@fields);
279 # print "marker experiment added: $marker_experiment_id\n";
280 #print "ADD $marker_experiment_id\n";
286 if ($@) {
287 print $@;
288 print "Failed; rolling back.\n";
289 $dbh->rollback();
291 else {
292 print"Succeeded.\n";
293 if ($opt_t) {
294 print"Rolling back.\n";
295 $dbh->rollback();
297 else {
298 print"Committing.\n";
299 $dbh->commit();