5 load_stock_data.pl - a script to load stock data
9 load_stock_data.pl -H [dbhost] -D [dbname] [-t] [-s species name ] [-p stock population name]
11 =head1 COMMAND-LINE OPTIONS
16 -u username for associating the new stocks
17 -s species name - must be in the database. Can also be read from the input file
18 -p population name - will create a new stock of type 'population' if doesn't exist.
19 -t Test run . Rolling back at the end.
23 Updated script for loading and adding stock names and synonyms.
24 The owners of the stock accession are not stored in stockprop, but in phenome.stock_owner
26 All other stockproperties can be given as additional columns and will be loaded automatically;
27 if the corresponding stock_property does not exist in the database it will be added.
29 File format for infile (tab delimited):
31 accession genus species_name population_name synonyms other_stock_props ...
33 Multiple synonyms can be specified, separated by the | symbol
37 Naama Menda (nm249@cornell.edu) - April 2013
38 Lukas Mueller (lam87@cornell.edu) - minor edits, November 2022
46 use CXGN
::Tools
::File
::Spreadsheet
;
48 use CXGN
::Phenome
::Schema
;
49 use Bio
::Chado
::Schema
;
50 use CXGN
::DB
::InsertDBH
;
53 use CXGN
::Chado
::Dbxref
;
54 use CXGN
::Chado
::Phenotype
;
55 use CXGN
::People
::Person
;
57 use SGN
::Model
::Cvterm
;
60 my ( $dbhost, $dbname, $file, $population_name, $species, $username, $test );
63 'p=s' => \
$population_name,
67 'dbname|D=s' => \
$dbname,
68 'dbhost|H=s' => \
$dbhost,
72 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
74 dbargs
=> {AutoCommit
=> 1,
78 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do
=> ['SET search_path TO public;'] }
80 my $phenome_schema= CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh } , { on_connect_do
=> ['set search_path to public,phenome;'] } );
84 my $spreadsheet=CXGN
::Tools
::File
::Spreadsheet
->new($file);
87 # parse first the file with the clone names and synonyms. Load into stock,
88 # and stockprop population for grouping the clones
90 my $sp_person_id = CXGN
::People
::Person
->get_person_by_username($dbh, $username);
91 die "Need to have a user pre-loaded in the database! " if !$sp_person_id;
95 if ($species) { ## can also read species name from the input file
96 my $organism = $schema->resultset("Organism::Organism")->find( {
97 species
=> $species } );
98 $organism_id = $organism->organism_id();
99 die "Species $species does not exist in the database! " if !$organism_id;
100 } #check this again if species name is provided in the file
102 my $stock_rs = $schema->resultset("Stock::Stock");
104 my $stock_property_cv_id = $schema->resultset("Cv::Cv")->find( { name
=> 'stock_property' })->cv_id();
106 print STDERR
"Stock property CV ID = $stock_property_cv_id\n";
109 # the cvterm for the population
111 print "Finding/creating cvterm for population\n";
112 my $population_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'population', 'stock_type');
116 # the cvterm for the accession
118 my $accession_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'accession', 'stock_type');
120 # the cvterm for the relationship type
122 my $member_of = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'member_of', 'stock_relationship');
124 # for the stock module
126 print "parsing spreadsheet... \n";
127 my @rows = $spreadsheet->row_labels();
128 my @columns = $spreadsheet->column_labels();
132 # accession genus species population_name synonyms
135 foreach my $accession (@rows ) {
136 # remove spaces from accession name
137 $accession=~s/\s+//g;
139 my $species_name = $spreadsheet->value_at($accession, "species_name");
141 my $organism = $schema->resultset("Organism::Organism")->find( {
142 species
=> $species_name } );
143 $organism_id = $organism->organism_id();
144 die "Species $species_name does not exist in the database! " if !$organism_id;
147 my $population_name = $spreadsheet->value_at($accession, "population_name");
148 my $synonym_string = $spreadsheet->value_at($accession, "synonyms");
149 my @synonyms = split /\|/ , $synonym_string;
152 if ($population_name) {
153 print "Creating a stock for population $population_name (cvterm = " . $population_cvterm->name . ")\n";
154 $population = $stock_rs->find_or_create(
156 'me.name' => $population_name,
157 'me.uniquename' => $population_name,
158 'me.organism_id' => $organism_id,
159 type_id
=> $population_cvterm->cvterm_id,
165 print "Find or create stock for accesssion $accession\n";
166 my $stock = $schema->resultset("Stock::Stock")->find_or_create(
167 { organism_id
=> $organism_id,
169 uniquename
=> $accession,
170 type_id
=> $accession_cvterm->cvterm_id(),
172 my $stock_id = $stock->stock_id;
173 print "Adding owner $sp_person_id \n";
175 # add the owner for this stock
177 $phenome_schema->resultset("StockOwner")->find_or_create(
179 stock_id
=> $stock->stock_id,
180 sp_person_id
=> $sp_person_id,
183 # the stock belongs to the population:
184 # add new stock_relationship
186 if ($population_name) {
187 print "Accession $accession is member_of population $population_name \n";
188 $population->find_or_create_related('stock_relationship_objects', {
189 type_id
=> $member_of->cvterm_id(),
190 subject_id
=> $stock->stock_id(),
193 if ($synonym_string) {print "Adding synonyms #" . scalar(@synonyms) . "\n"; }
194 foreach my $syn (@synonyms) {
195 if ($syn && defined($syn) && ($syn ne $accession) ) {
196 my $existing_synonym = $stock->search_related(
199 'type.name' => { ilike
=> '%synonym%' }
203 if (!$existing_synonym) {
205 print STDOUT
"Adding synonym: $syn \n" ;
207 # add the synonym as a stockprop
209 $stock->create_stockprops({ stock_synonym
=> $syn},
211 allow_duplicate_values
=> 1,
217 print STDERR
"Parsing ".scalar(@columns)." columns...\n";
219 for(my $n = 5; $n<@columns; $n++) {
220 print STDERR
"Retrieving value at $accession / $columns[$n]...\n";
221 my $value = $spreadsheet->value_at($accession, $columns[$n]);
222 print STDERR
"value is $value\n";
224 my $type_rs = $schema->resultset("Cv::Cvterm")->find_or_create(
226 name
=> $columns[$n],
227 cv_id
=> $stock_property_cv_id
229 print STDERR
"TYPE ID IS ".$type_rs->cvterm_id."\n";
230 my $stockprop = $schema->resultset("Stock::Stockprop")->find_or_create(
232 stock_id
=> $stock->stock_id,
234 type_id
=> $type_rs->cvterm_id,
241 die "TEST RUN! rolling back\n";
247 $schema->txn_do($coderef);
248 if (!$test) { print "Transaction succeeded! Commiting stocks and their properties! \n\n"; }
250 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";