11 basic script to add marker synonyms (aliased)
12 usage: update_marker_synonyms.pl -H hostname D dbname -i infile
21 The hostname of the server hosting the database.
25 the name of the database
29 (optional) test mode. Rollback after the script terminates. Database should not be affected. Good for test runs.
34 infile with the marker synonyms
38 update existing markers to new one.
39 BE CAREFUL with this option. It will DELETE existing marker id and will update it to
40 the new marker (as designated in the file row header. This was introduced since we loaded the Yellow Stuffer map with SNP markers, which were late used for the solcap chip, but loaded as new markers with different name. The old name will be kept as a marker alias)
44 The tab-delimited file
46 Row headers should be the marker name
47 subsequenct columns should have as many synonyms as wanted.
48 Synonyms can also be "|" delimited
52 Naama Menda <nm249@cornell.edu>
61 use CXGN
::Tools
::File
::Spreadsheet
;
62 use CXGN
::Tools
::Text
;
64 use CXGN
::Marker
::Modifiable
;
65 use CXGN
::Marker
::Tools
;
66 use CXGN
::DB
::InsertDBH
;
68 use CXGN
::DB
::SQLWrappers
;
73 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_u);
77 my $dbh = CXGN
::DB
::InsertDBH
->new({
80 dbargs
=> {AutoCommit
=> 0,
83 $dbh->do("SET search_path TO sgn");
85 my $sql=CXGN
::DB
::SQLWrappers
->new($dbh);
88 # make an object to give us the values from the spreadsheet
89 my $ss = CXGN
::Tools
::File
::Spreadsheet
->new($opt_i);
90 my @markers = $ss->row_labels(); # row labels are the marker names
91 my @columns = $ss->column_labels(); # column labels are the headings for the data columns
93 # make sure the spreadsheet is how we expect it to be
94 for my $marker_name (@markers) {
96 print STDERR
"\n\nMARKER: $marker_name\n";
98 my @marker_ids = CXGN
::Marker
::Tools
::marker_name_to_ids
($dbh,$marker_name);
99 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'" }
100 # just get the first ID in the list (if the list is longer than 1, we've already died)
101 my $marker_id = $marker_ids[0];
104 print STDERR
"Marker $marker_name does not exist in database. SKIPPING!\n";
107 else { print STDERR
"marker_id found: $marker_id\n" }
108 my @non_unique_aliases;
109 foreach my $column (@columns) {
110 my $alias_names = $ss->value_at($marker_name, $column);
111 push (@non_unique_aliases , (split('\|', $alias_names)) ) unless $alias_names eq '0';
114 my @aliases = grep { ! $seen{$_}++ } @non_unique_aliases ;
116 #see if all our aliases exist. if not, enter them.
117 SYNONYM
: for my $alias(@aliases) {
118 my $q = $dbh->prepare('select marker_id from marker_alias where alias ilike ?');
120 if(my($id)=$q->fetchrow_array()) {
121 unless($id==$marker_id) {
122 if ($opt_u) { #update marker $id to $marker_id
123 #check if the marker is a SNP marker
124 my $protocol_q = $dbh->prepare("SELECT distinct protocol FROM sgn.marker_experiment WHERE marker_id = ?" ) ;
125 $protocol_q->execute($id);
126 my ($protocol) = $protocol_q->fetchrow_array;
127 if ($protocol eq 'SNP') {
128 print STDERR
"UPDATING marker_id $id to new marker $marker_id!\n";
129 $dbh->do("update marker_experiment set marker_id = $marker_id where marker_id = $id");
130 $dbh->do("update pcr_experiment set marker_id = $marker_id where marker_id = $id");
131 $dbh->do("update snp set marker_id = $marker_id where marker_id = $id");
132 $dbh->do("delete from marker_alias where marker_id = $id");
133 $dbh->do("delete from marker where marker_id = $id");
134 } else { print STDERR
"PROTOCOL for marker $id is $protocol! Not updating to marker_id $marker_id!!\n"; }
136 warn "Alias '$alias' found, but associated with marker ID '$id' instead of our ID ($marker_id). SKIPPING\n\n";
142 my $alias_id=$sql->insert("marker_alias",{alias
=>$alias,marker_id
=>$marker_id,preferred
=>'f'});
143 print STDERR
"INSERTING other alias '$alias'.\n";
152 print "Failed; rolling back.\n";
158 print"Rolling back.\n";
162 print"Committing.\n";