4 load_cassava_clone_names.pl
8 $load_stock_data.pl -H [dbhost] -D [dbname] [-t]
10 =head1 COMMAND-LINE OPTIONS
15 -t Test run . Rolling back at the end.
20 Updated script for loading and adding cassava clone names and synonyms.
21 The owners of the stock accession are not stored in stockprop, but in phenome.stock_owner.
23 Naama Menda (nm249@cornell.edu)
33 use CXGN
::Tools
::File
::Spreadsheet
;
35 use CXGN
::Phenome
::Schema
;
36 use Bio
::Chado
::Schema
;
37 use CXGN
::DB
::InsertDBH
;
40 use CXGN
::Chado
::Dbxref
;
41 use CXGN
::Chado
::Phenotype
;
42 use CXGN
::People
::Person
;
44 use SGN
::Model
::Cvterm
;
46 our ($opt_H, $opt_D, $opt_i, $opt_t);
54 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
56 dbargs
=> {AutoCommit
=> 1,
60 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do
=> ['SET search_path TO public;'] }
62 my $phenome_schema= CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh } , { on_connect_do
=> ['set search_path to public,phenome;'] } );
65 #getting the last database ids for resetting at the end in case of rolling back
66 # my $last_stockprop_id= $schema->resultset('Stock::Stockprop')->get_column('stockprop_id')->max;
67 # my $last_stock_id= $schema->resultset('Stock::Stock')->get_column('stock_id')->max;
68 # my $last_stockrel_id= $schema->resultset('Stock::StockRelationship')->get_column('stock_relationship_id')->max;
69 # my $last_cvterm_id= $schema->resultset('Cv::Cvterm')->get_column('cvterm_id')->max;
70 # my $last_cv_id= $schema->resultset('Cv::Cv')->get_column('cv_id')->max;
71 # my $last_db_id= $schema->resultset('General::Db')->get_column('db_id')->max;
72 # my $last_dbxref_id= $schema->resultset('General::Dbxref')->get_column('dbxref_id')->max;
73 # my $last_organism_id = $schema->resultset('Organism::Organism')->get_column('organism_id')->max;
76 # 'db_db_id_seq' => $last_db_id,
77 # 'dbxref_dbxref_id_seq' => $last_dbxref_id,
78 # 'cv_cv_id_seq' => $last_cv_id,
79 # 'cvterm_cvterm_id_seq' => $last_cvterm_id,
80 # 'stock_stock_id_seq' => $last_stock_id,
81 # 'stockprop_stockprop_id_seq' => $last_stockprop_id,
82 # 'stock_relationship_stock_relationship_id_seq' => $last_stockrel_id,
83 # 'organism_organism_id_seq' => $last_organism_id,
86 #new spreadsheet, skip first column
87 my $spreadsheet=CXGN
::Tools
::File
::Spreadsheet
->new($file,1);
90 ##parse first the file with the clone names and synonyms. Load is into stock, and stockprop
92 # population for grouping the clones
94 my $population_name = 'Cassava clones';
96 my $species = 'Manihot esculenta'; #
98 my $sp_person_id = CXGN
::People
::Person
->get_person_by_username($dbh, 'pkulakow'); #add person id as an option.
99 die "Need to have a user pre-loaded in cassavabase! " if !$sp_person_id;
101 my $organism = $schema->resultset("Organism::Organism")->find_or_create( {
102 species
=> $species } );
103 my $organism_id = $organism->organism_id();
105 my $stock_rs = $schema->resultset("Stock::Stock");
108 #the cvterm for the population
109 print "Finding/creating cvterm for population\n";
110 my $population_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'population', 'stock_type');
112 print "Creating a stock for population $population_name (cvterm = " . $population_cvterm->name . ")\n";
113 my $population = $stock_rs->find_or_create(
115 'me.name' => $population_name,
116 'me.uniquename' => $population_name,
117 'me.organism_id' => $organism_id,
118 type_id
=> $population_cvterm->cvterm_id,
123 #the cvterm for the accession
124 print "Finding/creating cvtem for 'stock type' \n";
125 my $accession_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'accession', 'stock_type');
127 #the cvterm for the relationship type
128 print "Finding/creating cvtem for stock relationship 'member_of' \n";
130 my $member_of = my $accession_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'member_of', 'stock_relationship');
132 ## For the stock module
133 ################################
135 print "parsing spreadsheet... \n";
136 my @rows = $spreadsheet->row_labels();
137 my @columns = $spreadsheet->column_labels();
139 #Contributing Organization Official IITA Clone Name IITA prefix Three letter Location code notation Preferred cassavabase clone name synonym1_oldclonename synonym2 synonym3 synonym4 synonym5 error synonym1 error synonym2 error synonym3 error synonym4 error synonym5 icass icass corrected
141 #11 IITA IITA-TMS-BI090563-1 IITA-TMS IBA TMS-BI090563-1 BI09/0563-1
143 my ($new_count,$existing, $count, $syn_count, $merge);
145 foreach my $num (@rows ) {
146 my $accession = $spreadsheet->value_at($num, 'Preferred cassavabase clone name');
147 if (!$accession) { next; }
148 print "\nCassava clone name is '" . $accession . "'\n";
150 my $organization = $spreadsheet->value_at($num, 'Contributing Organization');
151 my $location_code = $spreadsheet->value_at($num, 'Three letter Location code');
153 my $iita_clone_name = $spreadsheet->value_at($num, "Official IITA Clone Name");
154 my $syn1 = $spreadsheet->value_at($num, "synonym1_oldclonename");
155 my $syn2 = $spreadsheet->value_at($num, "synonym2");
156 my $syn3 = $spreadsheet->value_at($num, "synonym3");
157 my $syn4 = $spreadsheet->value_at($num, "synonym4");
158 my $syn5 = $spreadsheet->value_at($num, "synonym5");
159 my $icass = $spreadsheet->value_at($num, "icass");
160 my $ploidy = $spreadsheet -> value_at
($num, "PLOIDY");
161 my $genome_structure = $spreadsheet->value_at($num, "GENOME");
163 # see if a stock exists with any of the synonyms
164 my @stocks = $stock_rs->search( {
166 uniquename
=> $accession,
167 uniquename
=> $iita_clone_name,
173 uniquename
=> $icass,
175 my $existing_stock = $stock_rs->search( { uniquename
=> $accession } )->single;
176 foreach my $s(@stocks) {
177 print "Looking at accession $accession, Found stock '" . $s->uniquename . "(stock_id = " . $s->stock_id . ")'\n";
182 print "NEW stock: $accession\n";
184 }elsif (!$existing_stock) {
186 my %stock_hash = map { $_->stock_id => $_ } @stocks;
187 my @keys = keys %stock_hash;
188 my @sorted = sort { $a <=> $b } @keys;
189 print "Existing stock_id is " . $sorted[0] . " name = " . ($stock_hash{$sorted[0]})->uniquename . "\n";
190 $existing_stock = $stock_hash{$sorted[0]};
191 $existing_stock->uniquename($accession);
192 $existing_stock->name($accession);
193 $existing_stock->update;
196 if (scalar(@stocks) >1) {
197 my @stock_names = map( $_->uniquename , @stocks );
198 my @stock_ids = map ($_->stock_id, @stocks);
199 print "MERGE: stocks " . join (", " , @stock_names) . "need to be merged\n";
200 $merge .= "$accession : merge stock_ids : " .join (", " , @stock_ids) . "( names: " . join (" | " , @stock_names) . ")\n";
202 my $stock = $existing_stock ?
$existing_stock :
203 $schema->resultset("Stock::Stock")->find_or_create(
204 { organism_id
=> $organism_id,
206 uniquename
=> $accession,
207 type_id
=> $accession_cvterm->cvterm_id(),
210 my $stock_id = $stock->stock_id;
211 print "Adding owner $sp_person_id \n";
212 #add the owner for this stock
213 $phenome_schema->resultset("StockOwner")->find_or_create(
215 stock_id
=> $stock->stock_id,
216 sp_person_id
=> $sp_person_id,
218 #####################
219 if ($organization) { $stock->create_stockprops( { organization
=> $organization }, { autocreate
=> 1 } ); }
220 if ($location_code) { $stock->create_stockprops( { location_code
=> $location_code }, { autocreate
=> 1 } ) } ;
221 if ($ploidy) { $stock->create_stockprops( { ploidy_level
=> $ploidy }, { autocreate
=> 1 }) };
222 if ($genome_structure) { $stock->create_stockprops( { genome_structure
=> $genome_structure }, { autocreate
=>1})};
224 #the stock belongs to the population:
225 #add new stock_relationship
227 $population->find_or_create_related('stock_relationship_objects', {
228 type_id
=> $member_of->cvterm_id(),
229 subject_id
=> $stock->stock_id(),
231 print "Adding synonyms #\n";
232 my @synonyms = ($iita_clone_name,$syn1, $syn2, $syn3, $syn4, $syn5, $icass);
233 foreach my $syn (@synonyms) {
234 if ($syn && defined($syn) && ($syn ne $accession) ) {
235 my $existing_synonym = $stock->search_related(
238 'type.name' => { ilike
=> '%synonym%' }
242 if (!$existing_synonym) {
244 print STDOUT
"Adding synonym: $syn \n" ;
245 #add the synonym as a stockprop
246 $stock->create_stockprops({ stock_synonym
=> $syn},
248 #cv_name => 'local', #use the default stock_property cv
249 allow_duplicate_values
=> 1,
255 my @props = $stock->search_related('stockprops');
256 foreach my $p ( @props ) {
257 print "**the prop value for stock " . $stock->name() . " is " . $p->value() . "\n" if $p;
261 print "TOTAL: \n $count rows \n $new_count new accessions \n $existing existing stocks \n $syn_count new synonyms \n MERGE :\n $merge\n";
263 die "TEST RUN! rolling back\n";
269 $schema->txn_do($coderef);
270 if (!$opt_t) { print "Transaction succeeded! Commiting stocks and their properties! \n\n"; }
273 # foreach my $value ( keys %seq ) {
274 # my $maxval= $seq{$value} || 0;
275 # if ($maxval) { $dbh->do("SELECT setval ('$value', $maxval, true)") ; }
276 # else { $dbh->do("SELECT setval ('$value', 1, false)"); }
278 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";