5 load_stock_data.pl - a script to load stock data
9 load_stock_data.pl -H [dbhost] -D [dbname] [-t] [-s species name ] [-p stock population name]
11 =head1 COMMAND-LINE OPTIONS
16 -u username for associating the new stocks
17 -s species name - must be in the database. Can also be read from the input file
18 -p population name - will create a new stock of type 'population' if doesn't exist.
19 -t Test run . Rolling back at the end.
23 Updated script for loading and adding stock names and synonyms.
24 The owners of the stock accession are not stored in stockprop, but in phenome.stock_owner.
26 File format for infile (tab delimited):
28 accession genus species population_name synonyms
32 Naama Menda (nm249@cornell.edu)
41 use CXGN
::Tools
::File
::Spreadsheet
;
43 use CXGN
::Phenome
::Schema
;
44 use Bio
::Chado
::Schema
;
45 use CXGN
::DB
::InsertDBH
;
48 use CXGN
::Chado
::Dbxref
;
49 use CXGN
::Chado
::Phenotype
;
50 use CXGN
::People
::Person
;
52 use SGN
::Model
::Cvterm
;
55 my ( $dbhost, $dbname, $file, $population_name, $species, $username, $test );
58 'p=s' => \
$population_name,
62 'dbname|D=s' => \
$dbname,
63 'dbhost|H=s' => \
$dbhost,
67 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
69 dbargs
=> {AutoCommit
=> 1,
73 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do
=> ['SET search_path TO public;'] }
75 my $phenome_schema= CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh } , { on_connect_do
=> ['set search_path to public,phenome;'] } );
78 #getting the last database ids for resetting at the end in case of rolling back
79 my $last_stockprop_id= $schema->resultset('Stock::Stockprop')->get_column('stockprop_id')->max;
80 my $last_stock_id= $schema->resultset('Stock::Stock')->get_column('stock_id')->max;
81 my $last_stockrel_id= $schema->resultset('Stock::StockRelationship')->get_column('stock_relationship_id')->max;
82 my $last_cvterm_id= $schema->resultset('Cv::Cvterm')->get_column('cvterm_id')->max;
83 my $last_cv_id= $schema->resultset('Cv::Cv')->get_column('cv_id')->max;
84 my $last_db_id= $schema->resultset('General::Db')->get_column('db_id')->max;
85 my $last_dbxref_id= $schema->resultset('General::Dbxref')->get_column('dbxref_id')->max;
89 'db_db_id_seq' => $last_db_id,
90 'dbxref_dbxref_id_seq' => $last_dbxref_id,
91 'cv_cv_id_seq' => $last_cv_id,
92 'cvterm_cvterm_id_seq' => $last_cvterm_id,
93 'stock_stock_id_seq' => $last_stock_id,
94 'stockprop_stockprop_id_seq' => $last_stockprop_id,
95 'stock_relationship_stock_relationship_id_seq' => $last_stockrel_id,
99 my $spreadsheet=CXGN
::Tools
::File
::Spreadsheet
->new($file);
102 ##parse first the file with the clone names and synonyms. Load into stock, and stockprop
104 # population for grouping the clones
107 my $sp_person_id = CXGN
::People
::Person
->get_person_by_username($dbh, $username);
108 die "Need to have a user pre-loaded in the database! " if !$sp_person_id;
112 if ($species) { ## can also read species name from the input file
113 my $organism = $schema->resultset("Organism::Organism")->find( {
114 species
=> $species } );
115 $organism_id = $organism->organism_id();
116 die "Species $species does not exist in the database! " if !$organism_id;
117 } #check this again if species name is provided in the file
119 my $stock_rs = $schema->resultset("Stock::Stock");
121 #the cvterm for the population
122 print "Finding/creating cvterm for population\n";
123 my $population_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'population', 'stock_type');
127 #the cvterm for the accession
128 my $accession_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'accession', 'stock_type');
130 #the cvterm for the relationship type
131 my $member_of = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'member_of', 'stock_relationship');
133 ## For the stock module
134 ################################
136 print "parsing spreadsheet... \n";
137 my @rows = $spreadsheet->row_labels();
138 my @columns = $spreadsheet->column_labels();
141 #accession genus species population_name synonyms
143 foreach my $accession (@rows ) {
144 #remove spaces from accession name
145 $accession=~s/\s+//g;
147 my $species_name = $spreadsheet->value_at($accession, "species");
149 my $organism = $schema->resultset("Organism::Organism")->find( {
150 species
=> $species_name } );
151 $organism_id = $organism->organism_id();
152 die "Species $species_name does not exist in the database! " if !$organism_id;
155 my $population_name = $spreadsheet->value_at($accession, "population_name");
156 my $synonym_string = $spreadsheet->value_at($accession, "synonyms");
157 my @synonyms = split /\|/ , $synonym_string;
158 # see if a stock exists with any of the synonyms
159 #my @stocks = $stock_rs->search( {
161 # uniquename => $accession,
162 # uniquename => $iita_clone_name,
163 # uniquename => $syn1,
164 # uniquename => $syn2,
165 # uniquename => $syn3,
166 # uniquename => $syn4,
167 # uniquename => $syn5,
168 # uniquename => $icass,
172 print "Creating a stock for population $population_name (cvterm = " . $population_cvterm->name . ")\n";
173 my $population = $stock_rs->find_or_create(
175 'me.name' => $population_name,
176 'me.uniquename' => $population_name,
177 'me.organism_id' => $organism_id,
178 type_id
=> $population_cvterm->cvterm_id,
183 print "Find or create stock for accesssion $accession\n";
184 my $stock = $schema->resultset("Stock::Stock")->find_or_create(
185 { organism_id
=> $organism_id,
187 uniquename
=> $accession,
188 type_id
=> $accession_cvterm->cvterm_id(),
190 my $stock_id = $stock->stock_id;
191 print "Adding owner $sp_person_id \n";
192 #add the owner for this stock
193 $phenome_schema->resultset("StockOwner")->find_or_create(
195 stock_id
=> $stock->stock_id,
196 sp_person_id
=> $sp_person_id,
198 #####################
200 #the stock belongs to the population:
201 #add new stock_relationship
202 print "Accession $accession is member_of population $population_name \n";
203 $population->find_or_create_related('stock_relationship_objects', {
204 type_id
=> $member_of->cvterm_id(),
205 subject_id
=> $stock->stock_id(),
207 if ($synonym_string) {print "Adding synonyms #" . scalar(@synonyms) . "\n"; }
208 foreach my $syn (@synonyms) {
209 if ($syn && defined($syn) && ($syn ne $accession) ) {
210 my $existing_synonym = $stock->search_related(
213 'type.name' => { ilike
=> '%synonym%' }
217 if (!$existing_synonym) {
219 print STDOUT
"Adding synonym: $syn \n" ;
220 #add the synonym as a stockprop
221 $stock->create_stockprops({ stock_synonym
=> $syn},
223 allow_duplicate_values
=> 1,
232 die "TEST RUN! rolling back\n";
238 $schema->txn_do($coderef);
239 if (!$test) { print "Transaction succeeded! Commiting stocks and their properties! \n\n"; }
242 foreach my $value ( keys %seq ) {
243 my $maxval= $seq{$value} || 0;
244 if ($maxval) { $dbh->do("SELECT setval ('$value', $maxval, true)") ; }
245 else { $dbh->do("SELECT setval ('$value', 1, false)"); }
247 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";