minor fixes
[sgn.git] / bin / load_stock_data.pl
blob5f691c11726a27f6ff654392f9251c25a5c9f064
2 =head1
4 load_cassava_clone_names.pl
6 =head1 SYNOPSIS
8 $load_stock_data.pl -H [dbhost] -D [dbname] [-t]
10 =head1 COMMAND-LINE OPTIONS
12 -H host name
13 -D database name
14 -i infile
15 -t Test run . Rolling back at the end.
18 =head2 DESCRIPTION
20 Updated script for loading and adding cassava clone names and synonyms.
21 The owners of the stock accession are not stored in stockprop, but in phenome.stock_owner.
23 Naama Menda (nm249@cornell.edu)
25 April 2013
27 =cut
30 #!/usr/bin/perl
31 use strict;
32 use Getopt::Std;
33 use CXGN::Tools::File::Spreadsheet;
35 use CXGN::Phenome::Schema;
36 use Bio::Chado::Schema;
37 use CXGN::DB::InsertDBH;
38 use Carp qw /croak/ ;
40 use CXGN::Chado::Dbxref;
41 use CXGN::Chado::Phenotype;
42 use CXGN::People::Person;
43 use Try::Tiny;
44 use SGN::Model::Cvterm;
46 our ($opt_H, $opt_D, $opt_i, $opt_t);
48 getopts('H:i:tD:');
50 my $dbhost = $opt_H;
51 my $dbname = $opt_D;
52 my $file = $opt_i;
54 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
55 dbname=>$dbname,
56 dbargs => {AutoCommit => 1,
57 RaiseError => 1}
60 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do => ['SET search_path TO public;'] }
62 my $phenome_schema= CXGN::Phenome::Schema->connect( sub { $dbh->get_actual_dbh } , { on_connect_do => ['set search_path to public,phenome;'] } );
65 #getting the last database ids for resetting at the end in case of rolling back
66 # my $last_stockprop_id= $schema->resultset('Stock::Stockprop')->get_column('stockprop_id')->max;
67 # my $last_stock_id= $schema->resultset('Stock::Stock')->get_column('stock_id')->max;
68 # my $last_stockrel_id= $schema->resultset('Stock::StockRelationship')->get_column('stock_relationship_id')->max;
69 # my $last_cvterm_id= $schema->resultset('Cv::Cvterm')->get_column('cvterm_id')->max;
70 # my $last_cv_id= $schema->resultset('Cv::Cv')->get_column('cv_id')->max;
71 # my $last_db_id= $schema->resultset('General::Db')->get_column('db_id')->max;
72 # my $last_dbxref_id= $schema->resultset('General::Dbxref')->get_column('dbxref_id')->max;
73 # my $last_organism_id = $schema->resultset('Organism::Organism')->get_column('organism_id')->max;
75 # my %seq = (
76 # 'db_db_id_seq' => $last_db_id,
77 # 'dbxref_dbxref_id_seq' => $last_dbxref_id,
78 # 'cv_cv_id_seq' => $last_cv_id,
79 # 'cvterm_cvterm_id_seq' => $last_cvterm_id,
80 # 'stock_stock_id_seq' => $last_stock_id,
81 # 'stockprop_stockprop_id_seq' => $last_stockprop_id,
82 # 'stock_relationship_stock_relationship_id_seq' => $last_stockrel_id,
83 # 'organism_organism_id_seq' => $last_organism_id,
84 # );
86 #new spreadsheet, skip first column
87 my $spreadsheet=CXGN::Tools::File::Spreadsheet->new($file,1);
89 ##############
90 ##parse first the file with the clone names and synonyms. Load is into stock, and stockprop
91 #############
92 # population for grouping the clones
94 my $population_name = 'Cassava clones';
96 my $species = 'Manihot esculenta'; #
98 my $sp_person_id = CXGN::People::Person->get_person_by_username($dbh, 'pkulakow'); #add person id as an option.
99 die "Need to have a user pre-loaded in cassavabase! " if !$sp_person_id;
101 my $organism = $schema->resultset("Organism::Organism")->find_or_create( {
102 species => $species } );
103 my $organism_id = $organism->organism_id();
105 my $stock_rs = $schema->resultset("Stock::Stock");
108 #the cvterm for the population
109 print "Finding/creating cvterm for population\n";
110 my $population_cvterm = SGN::Model::Cvterm->get_cvterm_row($schema, 'population', 'stock_type');
112 print "Creating a stock for population $population_name (cvterm = " . $population_cvterm->name . ")\n";
113 my $population = $stock_rs->find_or_create(
115 'me.name' => $population_name,
116 'me.uniquename' => $population_name,
117 'me.organism_id' => $organism_id,
118 type_id => $population_cvterm->cvterm_id,
120 { join => 'type' }
123 #the cvterm for the accession
124 print "Finding/creating cvtem for 'stock type' \n";
125 my $accession_cvterm = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type');
127 #the cvterm for the relationship type
128 print "Finding/creating cvtem for stock relationship 'member_of' \n";
130 my $member_of = my $accession_cvterm = SGN::Model::Cvterm->get_cvterm_row($schema, 'member_of', 'stock_relationship');
132 ## For the stock module
133 ################################
135 print "parsing spreadsheet... \n";
136 my @rows = $spreadsheet->row_labels();
137 my @columns = $spreadsheet->column_labels();
139 #Contributing Organization Official IITA Clone Name IITA prefix Three letter Location code notation Preferred cassavabase clone name synonym1_oldclonename synonym2 synonym3 synonym4 synonym5 error synonym1 error synonym2 error synonym3 error synonym4 error synonym5 icass icass corrected
141 #11 IITA IITA-TMS-BI090563-1 IITA-TMS IBA TMS-BI090563-1 BI09/0563-1
143 my ($new_count,$existing, $count, $syn_count, $merge);
144 my $coderef= sub {
145 foreach my $num (@rows ) {
146 my $accession = $spreadsheet->value_at($num, 'Preferred cassavabase clone name');
147 if (!$accession) { next; }
148 print "\nCassava clone name is '" . $accession . "'\n";
149 $count++;
150 my $organization = $spreadsheet->value_at($num, 'Contributing Organization');
151 my $location_code = $spreadsheet->value_at($num, 'Three letter Location code');
153 my $iita_clone_name = $spreadsheet->value_at($num, "Official IITA Clone Name");
154 my $syn1 = $spreadsheet->value_at($num, "synonym1_oldclonename");
155 my $syn2 = $spreadsheet->value_at($num, "synonym2");
156 my $syn3 = $spreadsheet->value_at($num, "synonym3");
157 my $syn4 = $spreadsheet->value_at($num, "synonym4");
158 my $syn5 = $spreadsheet->value_at($num, "synonym5");
159 my $icass = $spreadsheet->value_at($num, "icass");
160 my $ploidy = $spreadsheet -> value_at($num, "PLOIDY");
161 my $genome_structure = $spreadsheet->value_at($num, "GENOME");
163 # see if a stock exists with any of the synonyms
164 my @stocks = $stock_rs->search( {
165 -or => [
166 uniquename => $accession,
167 uniquename => $iita_clone_name,
168 uniquename => $syn1,
169 uniquename => $syn2,
170 uniquename => $syn3,
171 uniquename => $syn4,
172 uniquename => $syn5,
173 uniquename => $icass,
174 ], }, );
175 my $existing_stock = $stock_rs->search( { uniquename => $accession } )->single;
176 foreach my $s(@stocks) {
177 print "Looking at accession $accession, Found stock '" . $s->uniquename . "(stock_id = " . $s->stock_id . ")'\n";
178 $existing++;
181 if (!@stocks) {
182 print "NEW stock: $accession\n";
183 $new_count++;
184 }elsif (!$existing_stock) {
186 my %stock_hash = map { $_->stock_id => $_ } @stocks;
187 my @keys = keys %stock_hash;
188 my @sorted = sort { $a <=> $b } @keys;
189 print "Existing stock_id is " . $sorted[0] . " name = " . ($stock_hash{$sorted[0]})->uniquename . "\n";
190 $existing_stock = $stock_hash{$sorted[0]};
191 $existing_stock->uniquename($accession);
192 $existing_stock->name($accession);
193 $existing_stock->update;
196 if (scalar(@stocks) >1) {
197 my @stock_names = map( $_->uniquename , @stocks );
198 my @stock_ids = map ($_->stock_id, @stocks);
199 print "MERGE: stocks " . join (", " , @stock_names) . "need to be merged\n";
200 $merge .= "$accession : merge stock_ids : " .join (", " , @stock_ids) . "( names: " . join (" | " , @stock_names) . ")\n";
202 my $stock = $existing_stock ? $existing_stock :
203 $schema->resultset("Stock::Stock")->find_or_create(
204 { organism_id => $organism_id,
205 name => $accession,
206 uniquename => $accession,
207 type_id => $accession_cvterm->cvterm_id(),
208 #description => '',
210 my $stock_id = $stock->stock_id;
211 print "Adding owner $sp_person_id \n";
212 #add the owner for this stock
213 $phenome_schema->resultset("StockOwner")->find_or_create(
215 stock_id => $stock->stock_id,
216 sp_person_id => $sp_person_id,
218 #####################
219 if ($organization) { $stock->create_stockprops( { organization => $organization }, { autocreate => 1 } ); }
220 if ($location_code) { $stock->create_stockprops( { location_code => $location_code }, { autocreate => 1 } ) } ;
221 if ($ploidy) { $stock->create_stockprops( { ploidy_level => $ploidy }, { autocreate => 1 }) };
222 if ($genome_structure) { $stock->create_stockprops( { genome_structure => $genome_structure }, { autocreate=>1})};
224 #the stock belongs to the population:
225 #add new stock_relationship
227 $population->find_or_create_related('stock_relationship_objects', {
228 type_id => $member_of->cvterm_id(),
229 subject_id => $stock->stock_id(),
230 } );
231 print "Adding synonyms #\n";
232 my @synonyms = ($iita_clone_name,$syn1, $syn2, $syn3, $syn4, $syn5, $icass);
233 foreach my $syn (@synonyms) {
234 if ($syn && defined($syn) && ($syn ne $accession) ) {
235 my $existing_synonym = $stock->search_related(
236 'stockprops' , {
237 'me.value' => $syn,
238 'type.name' => { ilike => '%synonym%' }
240 { join => 'type' }
241 )->single;
242 if (!$existing_synonym) {
243 $syn_count++;
244 print STDOUT "Adding synonym: $syn \n" ;
245 #add the synonym as a stockprop
246 $stock->create_stockprops({ stock_synonym => $syn},
247 {autocreate => 1,
248 #cv_name => 'local', #use the default stock_property cv
249 allow_duplicate_values=> 1,
255 my @props = $stock->search_related('stockprops');
256 foreach my $p ( @props ) {
257 print "**the prop value for stock " . $stock->name() . " is " . $p->value() . "\n" if $p;
259 #########
261 print "TOTAL: \n $count rows \n $new_count new accessions \n $existing existing stocks \n $syn_count new synonyms \n MERGE :\n $merge\n";
262 if ($opt_t) {
263 die "TEST RUN! rolling back\n";
268 try {
269 $schema->txn_do($coderef);
270 if (!$opt_t) { print "Transaction succeeded! Commiting stocks and their properties! \n\n"; }
271 } catch {
272 # Transaction failed
273 # foreach my $value ( keys %seq ) {
274 # my $maxval= $seq{$value} || 0;
275 # if ($maxval) { $dbh->do("SELECT setval ('$value', $maxval, true)") ; }
276 # else { $dbh->do("SELECT setval ('$value', 1, false)"); }
278 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";