carousel bordeaux image credit
[sgn.git] / bin / load_stock_banana.pl
blob1ebcafd25400314c7905d93bae5e8e3cb15f6392
1 =head1
3 load_stock_data.pl
5 =head1 SYNOPSIS
7 $load_stock_data.pl -H [dbhost] -D [dbname] [-t]
9 =head1 COMMAND-LINE OPTIONS
11 -H host name
12 -D database name
13 -i infile
14 -t Test run . Rolling back at the end.
17 =head2 DESCRIPTION
19 Updated script for loading and adding banana accession names and synonyms.
20 The owners of the stock accession are not stored in stockprop, but in phenome.stock_owner.
22 Naama Menda (nm249@cornell.edu)
23 Guillaume Bauchet (gjb99@cornell.edu)
25 July 2015
29 =cut
32 #!/usr/bin/perl
33 use strict;
34 use Getopt::Std;
35 use CXGN::Tools::File::Spreadsheet;
37 use CXGN::Phenome::Schema;
38 use CXGN::BreedersToolbox::Projects;
39 use Bio::Chado::Schema;
40 use CXGN::DB::InsertDBH;
41 use Carp qw /croak/ ;
43 use CXGN::Chado::Dbxref;
44 use CXGN::Chado::Phenotype;
45 use CXGN::People::Person;
46 use Try::Tiny;
48 our ($opt_H, $opt_D, $opt_i, $opt_t);
50 getopts('H:i:tD:');
52 my $dbhost = $opt_H;
53 my $dbname = $opt_D;
54 my $file = $opt_i;
56 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
57 dbname=>$dbname,
58 dbargs => {AutoCommit => 0,
59 RaiseError => 1}
62 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do => ['SET search_path TO public;'] }
64 my $phenome_schema= CXGN::Phenome::Schema->connect( sub { $dbh->get_actual_dbh } , { on_connect_do => ['set search_path to public,phenome;'] } );
67 #getting the last database ids for resetting at the end in case of rolling back
68 # my $last_stockprop_id= $schema->resultset('Stock::Stockprop')->get_column('stockprop_id')->max;
69 # my $last_stock_id= $schema->resultset('Stock::Stock')->get_column('stock_id')->max;
70 # my $last_stockrel_id= $schema->resultset('Stock::StockRelationship')->get_column('stock_relationship_id')->max;
71 # my $last_cvterm_id= $schema->resultset('Cv::Cvterm')->get_column('cvterm_id')->max;
72 # my $last_cv_id= $schema->resultset('Cv::Cv')->get_column('cv_id')->max;
73 # my $last_db_id= $schema->resultset('General::Db')->get_column('db_id')->max;
74 # my $last_dbxref_id= $schema->resultset('General::Dbxref')->get_column('dbxref_id')->max;
75 # my $last_organism_id = $schema->resultset('Organism::Organism')->get_column('organism_id')->max;
77 # my %seq = (
78 # 'db_db_id_seq' => $last_db_id,
79 # 'dbxref_dbxref_id_seq' => $last_dbxref_id,
80 # 'cv_cv_id_seq' => $last_cv_id,
81 # 'cvterm_cvterm_id_seq' => $last_cvterm_id,
82 # 'stock_stock_id_seq' => $last_stock_id,
83 # 'stockprop_stockprop_id_seq' => $last_stockprop_id,
84 # 'stock_relationship_stock_relationship_id_seq' => $last_stockrel_id,
85 # 'organism_organism_id_seq' => $last_organism_id,
86 # );
88 #new spreadsheet, skip first column
89 my $spreadsheet=CXGN::Tools::File::Spreadsheet->new($file); # 0 or 1
91 ##############
92 ##parse first the file with the clone names and synonyms. Load is into stock, and stockprop
93 #############
94 # population for grouping the clones
96 my $population_name = 'Banana accessions';
98 my $species = 'Musa';
100 my $organism = $schema->resultset("Organism::Organism")->find_or_create( {
101 species => $species } );
102 my $organism_id = $organism->organism_id();
104 my $stock_rs = $schema->resultset("Stock::Stock");
108 #the cvterm for the accession
109 print "Finding/creating cvterm for 'stock type' \n";
110 my $accession_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
111 { name => 'accession',
112 cv => 'stock type',
113 db => 'null',
114 dbxref => 'accession',
117 #the cvterm for the relationship type
118 print "Finding/creating cvterm for stock relationship 'member_of' \n";
120 my $member_of = $schema->resultset("Cv::Cvterm")->create_with(
121 { name => 'member_of',
122 cv => 'stock relationship',
123 db => 'null',
124 dbxref => 'member_of',
127 #the cvterm for the population
128 print "Finding/creating cvterm for population\n";
129 my $population_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
130 { name => 'population',
131 cv => 'stock type',
132 db => 'null',
133 dbxref => 'population',
136 print "Creating a stock for population $population_name (cvterm = " . $population_cvterm->name . ")\n";
138 my $population = $stock_rs->find_or_create(
140 'me.name' => $population_name,
141 'me.uniquename' => $population_name,
142 'me.organism_id' => $organism_id,
143 type_id => $population_cvterm->cvterm_id,
145 { join => 'type' }
148 # link to MGIS database
149 my $mg_dbid;
150 my $mg_row = $schema->resultset("General::Db")->find( { name=>'MGIS' });
151 if (!$mg_row) {
152 my $new_mg_row = $schema->resultset("General::Db")->create( {
153 name => "MGIS"
156 $mg_dbid = $new_mg_row->db_id();
158 else {
159 $mg_dbid = $mg_row->db_id();
165 ## For the stock module
166 ################################
168 print "parsing spreadsheet... \n";
169 my @rows = $spreadsheet->row_labels();
170 my @columns = $spreadsheet->column_labels();
172 ## LABEL GENOTYPE SYNONYM1 SYNONYM2 SYNONYM3 SYNONYM4 PLOIDY GERMPLASM_GROUP CATEGORY INSTITUTE LOCATION LOCATION_CODE CURATOR NAME PROJECT_DESCRIPTION PROJECT_TYPE YEAR
173 # LABEL NAME UNIQUENAME GENOTYPE SYNONYM1 SYNONYM2 PLOIDY GERMPLASM_GROUP CATEGORY INSTITUTE LOCATION LOCATION_CODE CURATOR PROJECT_NAME PROJECT_DESCRIPTION PROJECT_TYPE YEAR
174 # 1 Calcutta-4 Calcutta-4_ITC0249_2015_Collection_IITA_SEN Calcutta-4 ITC0249 2X AA Collection IITA Sendusu SEN mibatte 2015_Collection_IITA_SEN Collection, IITA-Sendusu, 2015 2015_banana_germplasm 2015
177 my ($new_count,$existing, $count, $syn_count, $merge);
178 my $coderef= sub {
179 foreach my $num (@rows ) {
180 my $accession = $spreadsheet->value_at($num,'NAME');
181 if (!$accession) { next; }
182 print "\nBanana accession name is '" . $accession . "'\n";
183 $count++;
184 my $organization = $spreadsheet->value_at($num, 'INSTITUTE');
185 my $location_code = $spreadsheet->value_at($num, 'LOCATION_CODE');
186 my $location = $spreadsheet->value_at($num, 'LOCATION');
187 my $ITC_name = $spreadsheet->value_at($num, "SYNONYM1");
188 #my $original_name = $spreadsheet->value_at($num, "GENOTYPE_NAME_ORIGINAL");
189 my $syn1 = $spreadsheet->value_at($num, "UNIQUENAME");
190 my $syn2 = $spreadsheet->value_at($num, "GENOTYPE");
191 my $syn3 = $spreadsheet->value_at($num, "SYNONYM1");
192 #my $syn4 = $spreadsheet->value_at($num, "SYNONYM4");
193 my $ploidy = $spreadsheet->value_at($num, "PLOIDY");
194 my $germplasm_group = $spreadsheet->value_at($num, "GERMPLASM_GROUP");
195 my $curator = $spreadsheet->value_at($num, "CURATOR");
196 my $name = $spreadsheet->value_at($num, "PROJECT_NAME");
197 my $category = $spreadsheet->value_at($num, "CATEGORY");
198 my $project_type = $spreadsheet->value_at($num, "PROJECT_TYPE");
199 my $year = $spreadsheet->value_at($num, "YEAR");
200 my $project_description = $spreadsheet->value_at($num, "PROJECT_DESCRIPTION");
201 #my $project_description = "$name $project_type ($year) $location";
203 print"project descr is $project_description /n";
205 # see if a stock exists with any of the synonyms
206 my @stocks = $stock_rs->search( {
207 -or => [
208 uniquename => $accession,
209 uniquename => $syn1,
210 uniquename => $syn2,
211 uniquename => $syn3,
212 #uniquename => $syn4,
213 ], }, );
215 my $existing_stock = $stock_rs->search( { uniquename => $accession } )->single;
216 foreach my $s(@stocks) {
217 print "Looking at accession $accession, Found stock '" . $s->uniquename . "(stock_id = " . $s->stock_id . ")'\n";
218 $existing++;
223 if (!@stocks) {
224 print "NEW stock: $accession\n";
225 $new_count++;
226 }elsif (!$existing_stock) {
228 my %stock_hash = map { $_->stock_id => $_ } @stocks;
229 my @keys = keys %stock_hash;
230 my @sorted = sort { $a <=> $b } @keys;
231 print "Existing stock_id is " . $sorted[0] . " name = " . ($stock_hash{$sorted[0]})->uniquename . "\n";
232 $existing_stock = $stock_hash{$sorted[0]};
233 $existing_stock->uniquename($accession);
234 $existing_stock->name($accession);
235 $existing_stock->update;
238 if (scalar(@stocks) >1) {
239 my @stock_names = map( $_->uniquename , @stocks );
240 my @stock_ids = map ($_->stock_id, @stocks);
241 print "MERGE: stocks " . join (", " , @stock_names) . "need to be merged\n";
242 $merge .= "$accession : merge stock_ids : " .join (", " , @stock_ids) . "( names: " . join (" | " , @stock_names) . ")\n";
244 my $stock = $existing_stock ? $existing_stock :
245 $schema->resultset("Stock::Stock")->find_or_create(
246 { organism_id => $organism_id,
247 name => $accession,
248 uniquename => $accession,
249 type_id => $accession_cvterm->cvterm_id(),
250 #description => '',
252 my $stock_id = $stock->stock_id;
253 print "Adding owner $curator\n";
255 my $curator_person_id = CXGN::People::Person->get_person_by_username($dbh, $curator); #add person id as an option.
256 if (!$curator_person_id) { die "Person $curator does not exist in the database! Please add this user before continuing \n";}
257 #add owner for this stock
258 $phenome_schema->resultset("StockOwner")->find_or_create(
260 stock_id => $stock->stock_id,
261 sp_person_id => $curator_person_id,
264 if ($ITC_name) {
265 my $row = $schema -> resultset("General::Dbxref")->find_or_create(
267 accession => $ITC_name, db_id=>$mg_dbid,
269 my $link_row = $schema -> resultset("Stock::StockDbxref")->find_or_create(
270 { stock_id => $stock_id,
271 dbxref_id => $row->dbxref_id(),
273 $link_row->update();
274 print "Creating a MGIS link for accession $accession (stock_id = $stock_id dbxref_id= " . $row->dbxref_id . ")\n";
276 # you store the location in nd_geolocation
277 # link it with each experiment
278 # the nd_experiment table has nd_geolocation_id column
280 # you store a project name for your group of phenotyping /genotyping experiments
281 # call it something like "2015 phenotyping trials in location ABC..."
282 # and link that project with the project_id for the breeding program using project_relationship
283 # if you don't have a project_id for the breeding program you create a new one in the project table
284 # and add to it a projectprop woth type_id of the "breeding_program" cvterm
285 # select * from stock join nd_experiment_stock ON nd_experiment_stock.nd_experiment_stock_id = stock.stock_id join nd_experiment on nd_experiment.nd_experiment_id = nd_experiment_stock.nd_experiment_id join nd_experiment_project on nd_experiment_project.nd_experiment_id = nd_experiment_stock.nd_experiment_id ;
286 my $banana_project = $schema->resultset("Project::Project")->find_or_create(
288 name => "2015_banana_germplasm_IITA_NARO",
289 #name => $name,
290 description => $project_type,
291 } ) ;
293 my $project_description = "$name $project_type ($year) $location";
295 #print"project descr is $project_description /n";
297 my $project = $schema->resultset("Project::Project")->find_or_create(
299 name => $name,
300 description => $project_description,
301 } ) ;
303 #associate the new project with breeding program
304 my $cxgn_project = CXGN::BreedersToolbox::Projects->new( { schema => $schema } ) ;
305 $cxgn_project->associate_breeding_program_with_trial( $banana_project->project_id, $project->project_id);
307 print "banana id = " . $banana_project->project_id . " project_id = " . $project->project_id . "\n";
308 #store the geolocation data and props:
309 my $geo_description = $location;
312 #####################
313 if ($organization) { $stock->create_stockprops( { organization => $organization }, { autocreate => 1 } ); }
314 if ($location_code) { $stock->create_stockprops( { location_code => $location_code }, { autocreate => 1 } ) } ;
315 if ($ploidy) { $stock->create_stockprops( { ploidy_level => $ploidy }, { autocreate => 1 }) };
316 if ($germplasm_group) { $stock->create_stockprops( { genome_structure => $germplasm_group }, { autocreate=>1})};
318 #the stock belongs to the population:
319 #add new stock_relationship
321 $population->find_or_create_related('stock_relationship_objects', {
322 type_id => $member_of->cvterm_id(),
323 subject_id => $stock->stock_id(),
324 } );
325 print "Adding synonyms #\n";
326 my @synonyms = ($accession,$syn2, $syn3);
327 foreach my $syn (@synonyms) {
328 if ($syn && defined($syn) && ($syn ne $accession) ) {
329 my $existing_synonym = $stock->search_related(
330 'stockprops' , {
331 'me.value' => $syn,
332 'type.name' => 'synonym'
334 { join => 'type' }
335 )->single;
336 if (!$existing_synonym) {
337 $syn_count++;
338 print STDOUT "Adding synonym: $syn \n" ;
339 #add the synonym as a stockprop
340 $stock->create_stockprops({ synonym => $syn},
341 {autocreate => 1,
342 cv_name => 'local',
343 allow_duplicate_values=> 1,
349 my @props = $stock->search_related('stockprops');
350 foreach my $p ( @props ) {
351 print "**the prop value for stock " . $stock->name() . " is " . $p->value() . "\n" if $p;
355 #########
363 print "TOTAL: \n $count rows \n $new_count new accessions \n $existing existing stocks \n $syn_count new synonyms \n MERGE :\n $merge\n";
364 #if ($opt_t) { die "test rolling back";}
368 try {
369 $schema->txn_do($coderef);
370 if (!$opt_t) {
371 print "Transaction succeeded! Commiting stocks and their properties! \n\n";
372 $schema->txn_commit();
374 else {
375 $schema->txn_rollback();
376 die "TEST RUN! rolling back\n";
378 } catch {
379 # Transaction failed
380 # foreach my $value ( keys %seq ) {
381 # my $maxval= $seq{$value} || 0;
382 # if ($maxval) { $dbh->do("SELECT setval ('$value', $maxval, true)") ; }
383 # else { $dbh->do("SELECT setval ('$value', 1, false)"); }
385 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";