7 $load_stock_data.pl -H [dbhost] -D [dbname] [-t]
9 =head1 COMMAND-LINE OPTIONS
14 -t Test run . Rolling back at the end.
19 Updated script for loading and adding banana accession names and synonyms.
20 The owners of the stock accession are not stored in stockprop, but in phenome.stock_owner.
22 Naama Menda (nm249@cornell.edu)
23 Guillaume Bauchet (gjb99@cornell.edu)
35 use CXGN
::Tools
::File
::Spreadsheet
;
37 use CXGN
::Phenome
::Schema
;
39 use Bio
::Chado
::Schema
;
40 use CXGN
::DB
::InsertDBH
;
43 use CXGN
::Chado
::Dbxref
;
44 use CXGN
::Chado
::Phenotype
;
45 use CXGN
::People
::Person
;
47 use SGN
::Model
::Cvterm
;
49 our ($opt_H, $opt_D, $opt_i, $opt_t);
57 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
59 dbargs
=> {AutoCommit
=> 0,
63 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do
=> ['SET search_path TO public;'] }
65 my $phenome_schema= CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh } , { on_connect_do
=> ['set search_path to public,phenome;'] } );
68 #getting the last database ids for resetting at the end in case of rolling back
69 # my $last_stockprop_id= $schema->resultset('Stock::Stockprop')->get_column('stockprop_id')->max;
70 # my $last_stock_id= $schema->resultset('Stock::Stock')->get_column('stock_id')->max;
71 # my $last_stockrel_id= $schema->resultset('Stock::StockRelationship')->get_column('stock_relationship_id')->max;
72 # my $last_cvterm_id= $schema->resultset('Cv::Cvterm')->get_column('cvterm_id')->max;
73 # my $last_cv_id= $schema->resultset('Cv::Cv')->get_column('cv_id')->max;
74 # my $last_db_id= $schema->resultset('General::Db')->get_column('db_id')->max;
75 # my $last_dbxref_id= $schema->resultset('General::Dbxref')->get_column('dbxref_id')->max;
76 # my $last_organism_id = $schema->resultset('Organism::Organism')->get_column('organism_id')->max;
79 # 'db_db_id_seq' => $last_db_id,
80 # 'dbxref_dbxref_id_seq' => $last_dbxref_id,
81 # 'cv_cv_id_seq' => $last_cv_id,
82 # 'cvterm_cvterm_id_seq' => $last_cvterm_id,
83 # 'stock_stock_id_seq' => $last_stock_id,
84 # 'stockprop_stockprop_id_seq' => $last_stockprop_id,
85 # 'stock_relationship_stock_relationship_id_seq' => $last_stockrel_id,
86 # 'organism_organism_id_seq' => $last_organism_id,
89 #new spreadsheet, skip first column
90 my $spreadsheet=CXGN
::Tools
::File
::Spreadsheet
->new($file); # 0 or 1
93 ##parse first the file with the clone names and synonyms. Load is into stock, and stockprop
95 # population for grouping the clones
97 my $population_name = 'Banana accessions';
101 my $organism = $schema->resultset("Organism::Organism")->find_or_create( {
102 species
=> $species } );
103 my $organism_id = $organism->organism_id();
105 my $stock_rs = $schema->resultset("Stock::Stock");
109 #the cvterm for the accession
110 print "Finding/creating cvterm for 'stock type' \n";
111 my $accession_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'accession', 'stock_type');
113 #the cvterm for the relationship type
114 print "Finding/creating cvterm for stock relationship 'member_of' \n";
116 my $member_of = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'member_of', 'stock_relationship');
118 #the cvterm for the population
119 print "Finding/creating cvterm for population\n";
120 my $population_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'population', 'stock_type');
122 print "Creating a stock for population $population_name (cvterm = " . $population_cvterm->name . ")\n";
124 my $population = $stock_rs->find_or_create(
126 'me.name' => $population_name,
127 'me.uniquename' => $population_name,
128 'me.organism_id' => $organism_id,
129 type_id
=> $population_cvterm->cvterm_id,
134 # link to MGIS database
136 my $mg_row = $schema->resultset("General::Db")->find( { name
=>'MGIS' });
138 my $new_mg_row = $schema->resultset("General::Db")->create( {
142 $mg_dbid = $new_mg_row->db_id();
145 $mg_dbid = $mg_row->db_id();
151 ## For the stock module
152 ################################
154 print "parsing spreadsheet... \n";
155 my @rows = $spreadsheet->row_labels();
156 my @columns = $spreadsheet->column_labels();
158 ## LABEL GENOTYPE SYNONYM1 SYNONYM2 SYNONYM3 SYNONYM4 PLOIDY GERMPLASM_GROUP CATEGORY INSTITUTE LOCATION LOCATION_CODE CURATOR NAME PROJECT_DESCRIPTION PROJECT_TYPE YEAR
159 # LABEL NAME UNIQUENAME GENOTYPE SYNONYM1 SYNONYM2 PLOIDY GERMPLASM_GROUP CATEGORY INSTITUTE LOCATION LOCATION_CODE CURATOR PROJECT_NAME PROJECT_DESCRIPTION PROJECT_TYPE YEAR
160 # 1 Calcutta-4 Calcutta-4_ITC0249_2015_Collection_IITA_SEN Calcutta-4 ITC0249 2X AA Collection IITA Sendusu SEN mibatte 2015_Collection_IITA_SEN Collection, IITA-Sendusu, 2015 2015_banana_germplasm 2015
163 my ($new_count,$existing, $count, $syn_count, $merge);
165 foreach my $num (@rows ) {
166 my $accession = $spreadsheet->value_at($num,'NAME');
167 if (!$accession) { next; }
168 print "\nBanana accession name is '" . $accession . "'\n";
170 my $organization = $spreadsheet->value_at($num, 'INSTITUTE');
171 my $location_code = $spreadsheet->value_at($num, 'LOCATION_CODE');
172 my $location = $spreadsheet->value_at($num, 'LOCATION');
173 my $ITC_name = $spreadsheet->value_at($num, "SYNONYM1");
174 #my $original_name = $spreadsheet->value_at($num, "GENOTYPE_NAME_ORIGINAL");
175 my $syn1 = $spreadsheet->value_at($num, "UNIQUENAME");
176 my $syn2 = $spreadsheet->value_at($num, "GENOTYPE");
177 my $syn3 = $spreadsheet->value_at($num, "SYNONYM1");
178 #my $syn4 = $spreadsheet->value_at($num, "SYNONYM4");
179 my $ploidy = $spreadsheet->value_at($num, "PLOIDY");
180 my $germplasm_group = $spreadsheet->value_at($num, "GERMPLASM_GROUP");
181 my $curator = $spreadsheet->value_at($num, "CURATOR");
182 my $name = $spreadsheet->value_at($num, "PROJECT_NAME");
183 my $category = $spreadsheet->value_at($num, "CATEGORY");
184 my $project_type = $spreadsheet->value_at($num, "PROJECT_TYPE");
185 my $year = $spreadsheet->value_at($num, "YEAR");
186 my $project_description = $spreadsheet->value_at($num, "PROJECT_DESCRIPTION");
187 #my $project_description = "$name $project_type ($year) $location";
189 print"project descr is $project_description /n";
191 # see if a stock exists with any of the synonyms
192 my @stocks = $stock_rs->search( {
194 uniquename
=> $accession,
198 #uniquename => $syn4,
201 my $existing_stock = $stock_rs->search( { uniquename
=> $accession } )->single;
202 foreach my $s(@stocks) {
203 print "Looking at accession $accession, Found stock '" . $s->uniquename . "(stock_id = " . $s->stock_id . ")'\n";
210 print "NEW stock: $accession\n";
212 }elsif (!$existing_stock) {
214 my %stock_hash = map { $_->stock_id => $_ } @stocks;
215 my @keys = keys %stock_hash;
216 my @sorted = sort { $a <=> $b } @keys;
217 print "Existing stock_id is " . $sorted[0] . " name = " . ($stock_hash{$sorted[0]})->uniquename . "\n";
218 $existing_stock = $stock_hash{$sorted[0]};
219 $existing_stock->uniquename($accession);
220 $existing_stock->name($accession);
221 $existing_stock->update;
224 if (scalar(@stocks) >1) {
225 my @stock_names = map( $_->uniquename , @stocks );
226 my @stock_ids = map ($_->stock_id, @stocks);
227 print "MERGE: stocks " . join (", " , @stock_names) . "need to be merged\n";
228 $merge .= "$accession : merge stock_ids : " .join (", " , @stock_ids) . "( names: " . join (" | " , @stock_names) . ")\n";
230 my $stock = $existing_stock ?
$existing_stock :
231 $schema->resultset("Stock::Stock")->find_or_create(
232 { organism_id
=> $organism_id,
234 uniquename
=> $accession,
235 type_id
=> $accession_cvterm->cvterm_id(),
238 my $stock_id = $stock->stock_id;
239 print "Adding owner $curator\n";
241 my $curator_person_id = CXGN
::People
::Person
->get_person_by_username($dbh, $curator); #add person id as an option.
242 if (!$curator_person_id) { die "Person $curator does not exist in the database! Please add this user before continuing \n";}
243 #add owner for this stock
244 $phenome_schema->resultset("StockOwner")->find_or_create(
246 stock_id
=> $stock->stock_id,
247 sp_person_id
=> $curator_person_id,
251 my $row = $schema -> resultset
("General::Dbxref")->find_or_create(
253 accession
=> $ITC_name, db_id
=>$mg_dbid,
255 my $link_row = $schema -> resultset
("Stock::StockDbxref")->find_or_create(
256 { stock_id
=> $stock_id,
257 dbxref_id
=> $row->dbxref_id(),
260 print "Creating a MGIS link for accession $accession (stock_id = $stock_id dbxref_id= " . $row->dbxref_id . ")\n";
262 # you store the location in nd_geolocation
263 # link it with each experiment
264 # the nd_experiment table has nd_geolocation_id column
266 # you store a project name for your group of phenotyping /genotyping experiments
267 # call it something like "2015 phenotyping trials in location ABC..."
268 # and link that project with the project_id for the breeding program using project_relationship
269 # if you don't have a project_id for the breeding program you create a new one in the project table
270 # and add to it a projectprop woth type_id of the "breeding_program" cvterm
271 # select * from stock join nd_experiment_stock ON nd_experiment_stock.nd_experiment_stock_id = stock.stock_id join nd_experiment on nd_experiment.nd_experiment_id = nd_experiment_stock.nd_experiment_id join nd_experiment_project on nd_experiment_project.nd_experiment_id = nd_experiment_stock.nd_experiment_id ;
272 my $banana_project = $schema->resultset("Project::Project")->find_or_create(
274 name
=> "2015_banana_germplasm_IITA_NARO",
276 description
=> $project_type,
279 my $project_description = "$name $project_type ($year) $location";
281 #print"project descr is $project_description /n";
283 my $project = $schema->resultset("Project::Project")->find_or_create(
286 description
=> $project_description,
289 #associate the new trial with breeding program
290 my $trial = CXGN
::Trial
->new({ schema
=> $schema, trial_id
=> $project->project_id() });
291 $trial->set_breeding_program($banana_project->project_id);
293 print "banana id = " . $banana_project->project_id . " project_id = " . $project->project_id . "\n";
294 #store the geolocation data and props:
295 my $geo_description = $location;
298 #####################
299 if ($organization) { $stock->create_stockprops( { organization
=> $organization }, { autocreate
=> 1 } ); }
300 if ($location_code) { $stock->create_stockprops( { location_code
=> $location_code }, { autocreate
=> 1 } ) } ;
301 if ($ploidy) { $stock->create_stockprops( { ploidy_level
=> $ploidy }, { autocreate
=> 1 }) };
302 if ($germplasm_group) { $stock->create_stockprops( { genome_structure
=> $germplasm_group }, { autocreate
=>1})};
304 #the stock belongs to the population:
305 #add new stock_relationship
307 $population->find_or_create_related('stock_relationship_objects', {
308 type_id
=> $member_of->cvterm_id(),
309 subject_id
=> $stock->stock_id(),
311 print "Adding synonyms #\n";
312 my @synonyms = ($accession,$syn2, $syn3);
313 foreach my $syn (@synonyms) {
314 if ($syn && defined($syn) && ($syn ne $accession) ) {
315 my $existing_synonym = $stock->search_related(
318 'type.name' => { ilike
=> '%synonym' }
322 if (!$existing_synonym) {
324 print STDOUT
"Adding synonym: $syn \n" ;
325 #add the synonym as a stockprop
326 $stock->create_stockprops({ stock_synonym
=> $syn},
328 #cv_name => 'local', #use default stock_property cv
329 allow_duplicate_values
=> 1,
335 my @props = $stock->search_related('stockprops');
336 foreach my $p ( @props ) {
337 print "**the prop value for stock " . $stock->name() . " is " . $p->value() . "\n" if $p;
349 print "TOTAL: \n $count rows \n $new_count new accessions \n $existing existing stocks \n $syn_count new synonyms \n MERGE :\n $merge\n";
350 #if ($opt_t) { die "test rolling back";}
355 $schema->txn_do($coderef);
357 print "Transaction succeeded! Commiting stocks and their properties! \n\n";
358 $schema->txn_commit();
361 $schema->txn_rollback();
362 die "TEST RUN! rolling back\n";
366 # foreach my $value ( keys %seq ) {
367 # my $maxval= $seq{$value} || 0;
368 # if ($maxval) { $dbh->do("SELECT setval ('$value', $maxval, true)") ; }
369 # else { $dbh->do("SELECT setval ('$value', 1, false)"); }
371 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";