Merge pull request #3948 from solgenomics/lukasmueller-patch-1
[sgn.git] / bin / old_load_markers.pl
blob516f152daf153542d29575a8dbf047c1114fb965
1 #!/usr/bin/perl
3 # basic script to load pcr marker information
5 # usage: load_marker_data <dbhost> <dbname> <COMMIT|ROLLBACK> <map_id> <map file>
6 # example: load_marker_data.pl db-devel sandbox COMMIT 9 marker-file.csv
8 # copy and edit this file as necessary
9 # common changes include the following:
10 # experiment_type_id
11 # accession_id
12 # different column headings
14 use strict;
16 use CXGN::Tools::File::Spreadsheet;
17 use CXGN::Tools::Text;
18 use CXGN::Marker::Modifiable;
19 use CXGN::Marker::Tools;
20 use CXGN::Marker::Location;
21 use CXGN::Marker::PCR::Experiment;
22 use CXGN::Map::Version;
23 use CXGN::DB::Connection;
24 use CXGN::DB::InsertDBH;
25 use Data::Dumper;
26 #CXGN::DB::Connection->verbose(0);
28 my ($map_id, $map_file);
30 unless ($ARGV[0]) {
31 print "Usage: load_marker_data <dbhost> <dbname> "
32 . "<COMMIT|ROLLBACK> <map_id> <marker file>\n" and exit();
34 unless ($ARGV[0] eq 'db-devel' or $ARGV[0] eq 'db') {
35 die "First argument must be valid database host";
37 unless ($ARGV[1] eq 'sandbox' or $ARGV[1] eq 'cxgn') {
38 die "Second argument must be valid database name";
40 unless ($ARGV[2] eq 'COMMIT' or $ARGV[2] eq 'ROLLBACK') {
41 die "Third argument must be either COMMIT or ROLLBACK";
43 unless ($map_id=$ARGV[3]) {
44 die "Fourth argument must be the map_id";
46 unless ($map_file=$ARGV[4]) {
47 die "Fifth argument must be the marker file";
50 my $dbh=CXGN::DB::InsertDBH::connect
51 ({dbhost=>$ARGV[0],dbname=>$ARGV[1],dbschema=>'sgn'});
53 eval {
55 # parameters for this specific instance
56 my $experiment_type_id = 1; # 'mapping'
58 # my $accession_id = '88x87'; # parent1 x parent2
60 # make an object to give us the values from the spreadsheet
61 my $ss = CXGN::Tools::File::Spreadsheet->new($map_file);
62 my @markers = $ss->row_labels(); # row labels are the marker names
63 my @columns = $ss->column_labels(); # column labels are the headings for the data columns
65 # make sure the spreadsheet is how we expect it to be
66 @columns = qw | marker protocol temp fwd rev pd |
67 or die"Column headings are not as expected";
69 for my $marker_name (@markers) {
71 print "marker: $marker_name\n";
73 my @marker_ids = CXGN::Marker::Tools::marker_name_to_ids($dbh,$marker_name);
74 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'" }
75 # just get the first ID in the list (if the list is longer than 1, we've already died)
76 my $marker_id = @marker_ids[0];
78 if(!$marker_id) {
79 $marker_id = CXGN::Marker::Tools::insert_marker($dbh,$marker_name);
80 print "marker added: $marker_id\n";
82 else { print "marker_id found: $marker_id\n" }
84 # clean this up?
85 my $annealing_temp =
86 ((grep {/temp/} @columns) && ($ss->value_at($marker_name,'temp')))
87 ? $ss->value_at($marker_name,'temp') : 55;
88 print "temp: $annealing_temp\n";
90 my $fwd = $ss->value_at($marker_name,'fwd')
91 or die "No foward primer found for $marker_name";
92 print "fwd: $fwd\n";
94 my $primer_id_fwd = CXGN::Marker::Tools::get_sequence_id($dbh,$fwd);
96 print "primer_id_fwd: $primer_id_fwd\n";
98 my $rev=$ss->value_at($marker_name,'rev')
99 or die"No reverse primer found for $marker_name";
100 print "rev: $rev\n";
101 my $primer_id_rev = CXGN::Marker::Tools::get_sequence_id($dbh,$rev);
102 print "primer_id_rev: $primer_id_rev\n";
104 my $protocol = $ss->value_at($marker_name,'protocol');
105 my ($pd, $primer_id_pd);
106 if (($protocol eq 'dCAPS') && ($pd = $ss->value_at($marker_name,'pd'))) {
107 print "pd: $pd\n";
108 $primer_id_pd = CXGN::Marker::Tools::get_sequence_id($dbh,$pd);
109 print "primer_id_pd: $primer_id_pd\n";
112 # check if data already in pcr_experiment and marker_experiment, and if not, add it
113 # there's a lot of stuff to check here.. I know these aren't in the database so will come back later
115 my $names = ["marker_id", "annealing_temp", "primer_id_fwd",
116 "primer_id_rev", "experiment_type_id", "map_id","primer_id_pd","accession_id"];
117 my @fields = ($marker_id,$annealing_temp,$primer_id_fwd,
118 $primer_id_rev,$experiment_type_id,$map_id,$primer_id_pd,$accession_id);
121 # does this check if pcr_experiment already exists?
122 my $pcr_experiment_id = CXGN::Marker::Tools::insert($dbh,"pcr_experiment","pcr_experiment_id",$names,@fields);
123 print "pcr experiment added: $pcr_experiment_id\n";
125 # check for existing marker_experiment and update if found
126 my $q = "SELECT marker_experiment_id FROM marker_experiment "
127 . "JOIN marker_location USING (location_id) JOIN map_version "
128 . "USING (map_version_id) WHERE rflp_experiment_id is null "
129 . "AND map_id = ? AND marker_id = ? AND protocol = ?";
131 my $sth = $dbh->prepare($q);
132 $sth->execute($map_id,$marker_id,$protocol);
133 my @exp_id;
134 while (my $id = $sth->fetchrow_array()) { push (@exp_id,$id) }
136 if (@exp_id) {
137 if (@exp_id > 1) { print join(', ', @exp_id) and exit() }
138 # this really should not be the case
139 # update
140 my $marker_experiment_id = $exp_id[0];
142 my $u = "UPDATE marker_experiment set pcr_experiment_id = ? where marker_experiment_id = ?";
143 $sth = $dbh->prepare($u);
144 $sth->execute($pcr_experiment_id,$marker_experiment_id);
146 print "UPDATE $marker_experiment_id\n";
149 # if not loading map and experiments together, may want to match other protocols
151 # if not, insert new marker_experiment
152 else {
153 my $names = ["marker_id", "pcr_experiment_id", "protocol"];
154 my @fields = ($marker_id, $pcr_experiment_id, $protocol);
155 # 'SSR' or 'unknown'?
157 my $marker_experiment_id = CXGN::Marker::Tools::insert
158 ($dbh,"marker_experiment","marker_experiment_id",$names,@fields);
159 # print "marker experiment added: $marker_experiment_id\n";
160 print "ADD $marker_experiment_id\n";
166 if ($@) {
167 print $@;
168 print "Failed; rolling back.\n";
169 $dbh->rollback();
171 else {
172 print"Succeeded.\n";
173 if ($ARGV[2] eq 'COMMIT') {
174 print"Committing.\n";
175 $dbh->commit();
177 else {
178 print"Rolling back.\n";
179 $dbh->rollback();