Merge pull request #5199 from solgenomics/topic/tracking_transformation
[sgn.git] / bin / update_marker_synonyms.pl
blobc67994a799edbed0ea953f9ff78b08972a1c17b7
1 #!/usr/bin/perl
3 =head1
5 NAME
7 load_solcap_markers.pl
9 =head1 DESCRIPTION
11 basic script to add marker synonyms (aliased)
12 usage: update_marker_synonyms.pl -H hostname D dbname -i infile
15 Options:
17 =over 5
19 =item -H
21 The hostname of the server hosting the database.
23 =item -D
25 the name of the database
27 =item -t
29 (optional) test mode. Rollback after the script terminates. Database should not be affected. Good for test runs.
32 =item -i
34 infile with the marker synonyms
36 =item -u
38 update existing markers to new one.
39 BE CAREFUL with this option. It will DELETE existing marker id and will update it to
40 the new marker (as designated in the file row header. This was introduced since we loaded the Yellow Stuffer map with SNP markers, which were late used for the solcap chip, but loaded as new markers with different name. The old name will be kept as a marker alias)
42 =back
44 The tab-delimited file
46 Row headers should be the marker name
47 subsequenct columns should have as many synonyms as wanted.
48 Synonyms can also be "|" delimited
50 =head1 AUTHORS
52 Naama Menda <nm249@cornell.edu>
55 =cut
57 use strict;
58 use warnings;
59 use Carp;
61 use CXGN::Tools::File::Spreadsheet;
62 use CXGN::Tools::Text;
63 use CXGN::Marker;
64 use CXGN::Marker::Modifiable;
65 use CXGN::Marker::Tools;
66 use CXGN::DB::InsertDBH;
67 use Data::Dumper;
68 use CXGN::DB::SQLWrappers;
70 use Getopt::Std;
73 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_u);
75 getopts('H:D:tui:');
77 my $dbh = CXGN::DB::InsertDBH->new({
78 dbname => $opt_D,
79 dbhost => $opt_H,
80 dbargs => {AutoCommit => 0,
81 RaiseError => 1}
82 });
83 $dbh->do("SET search_path TO sgn");
85 my $sql=CXGN::DB::SQLWrappers->new($dbh);
87 eval {
88 # make an object to give us the values from the spreadsheet
89 my $ss = CXGN::Tools::File::Spreadsheet->new($opt_i);
90 my @markers = $ss->row_labels(); # row labels are the marker names
91 my @columns = $ss->column_labels(); # column labels are the headings for the data columns
93 # make sure the spreadsheet is how we expect it to be
94 for my $marker_name (@markers) {
96 print STDERR "\n\nMARKER: $marker_name\n";
98 my @marker_ids = CXGN::Marker::Tools::marker_name_to_ids($dbh,$marker_name);
99 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'" }
100 # just get the first ID in the list (if the list is longer than 1, we've already died)
101 my $marker_id = $marker_ids[0];
103 if(!$marker_id) {
104 print STDERR "Marker $marker_name does not exist in database. SKIPPING!\n";
105 next();
107 else { print STDERR "marker_id found: $marker_id\n" }
108 my @non_unique_aliases;
109 foreach my $column (@columns) {
110 my $alias_names = $ss->value_at($marker_name, $column);
111 push (@non_unique_aliases , (split('\|', $alias_names)) ) unless $alias_names eq '0';
113 my %seen =() ;
114 my @aliases = grep { ! $seen{$_}++ } @non_unique_aliases ;
116 #see if all our aliases exist. if not, enter them.
117 SYNONYM: for my $alias(@aliases) {
118 my $q = $dbh->prepare('select marker_id from marker_alias where alias ilike ?');
119 $q->execute($alias);
120 if(my($id)=$q->fetchrow_array()) {
121 unless($id==$marker_id) {
122 if ($opt_u) { #update marker $id to $marker_id
123 #check if the marker is a SNP marker
124 my $protocol_q = $dbh->prepare("SELECT distinct protocol FROM sgn.marker_experiment WHERE marker_id = ?" ) ;
125 $protocol_q->execute($id);
126 my ($protocol) = $protocol_q->fetchrow_array;
127 if ($protocol eq 'SNP') {
128 print STDERR "UPDATING marker_id $id to new marker $marker_id!\n";
129 $dbh->do("update marker_experiment set marker_id = $marker_id where marker_id = $id");
130 $dbh->do("update pcr_experiment set marker_id = $marker_id where marker_id = $id");
131 $dbh->do("update snp set marker_id = $marker_id where marker_id = $id");
132 $dbh->do("delete from marker_alias where marker_id = $id");
133 $dbh->do("delete from marker where marker_id = $id");
134 } else { print STDERR "PROTOCOL for marker $id is $protocol! Not updating to marker_id $marker_id!!\n"; }
135 } else {
136 warn "Alias '$alias' found, but associated with marker ID '$id' instead of our ID ($marker_id). SKIPPING\n\n";
137 next SYNONYM;
141 else {
142 my $alias_id=$sql->insert("marker_alias",{alias=>$alias,marker_id=>$marker_id,preferred=>'f'});
143 print STDERR "INSERTING other alias '$alias'.\n";
150 if ($@) {
151 print $@;
152 print "Failed; rolling back.\n";
153 $dbh->rollback();
155 else {
156 print"Succeeded.\n";
157 if ($opt_t) {
158 print"Rolling back.\n";
159 $dbh->rollback();
161 else {
162 print"Committing.\n";
163 $dbh->commit();