5 load_stock_data.pl - a script to load stock data
9 load_stock_data.pl -H [dbhost] -D [dbname] [-t] [-s species name ] [-p stock population name]
11 =head1 COMMAND-LINE OPTIONS
16 -u username for associating the new stocks
17 -s species name - must be in the database. Can also be read from the input file
18 -p population name - will create a new stock of type 'population' if doesn't exist.
19 -t Test run . Rolling back at the end.
23 Updated script for loading and adding stock names and synonyms.
24 The owners of the stock accession are not stored in stockprop, but in phenome.stock_owner
26 All other stockproperties can be given as additional columns and will be loaded automatically;
27 if the corresponding stock_property does not exist in the database it will be added.
29 File format for infile (tab delimited):
31 accession species_name population_name synonyms description other_stock_props ...
33 Multiple synonyms can be specified, separated by the | symbol
37 Naama Menda (nm249@cornell.edu) - April 2013
39 Lukas Mueller (lam87@cornell.edu) - minor edits, November 2022
47 use CXGN
::Tools
::File
::Spreadsheet
;
49 use CXGN
::Phenome
::Schema
;
50 use Bio
::Chado
::Schema
;
51 use CXGN
::DB
::InsertDBH
;
54 use CXGN
::Chado
::Dbxref
;
55 use CXGN
::Chado
::Phenotype
;
56 use CXGN
::People
::Person
;
58 use SGN
::Model
::Cvterm
;
61 my ( $dbhost, $dbname, $file, $population_name, $species, $username, $password, $test );
64 'p=s' => \
$population_name,
69 'dbname|D=s' => \
$dbname,
70 'dbhost|H=s' => \
$dbhost,
74 my $dbh = CXGN
::DB
::Connection
->new( { dbhost
=>$dbhost,
78 dbargs
=> {AutoCommit
=> 1,
82 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do
=> ['SET search_path TO public;'] }
84 my $phenome_schema= CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh } , { on_connect_do
=> ['set search_path to public,phenome;'] } );
88 my $spreadsheet=CXGN
::Tools
::File
::Spreadsheet
->new($file);
91 # parse first the file with the clone names and synonyms. Load into stock,
92 # and stockprop population for grouping the clones
94 my $sp_person_id = CXGN
::People
::Person
->get_person_by_username($dbh, $username);
95 die "Need to have a user pre-loaded in the database! " if !$sp_person_id;
99 if ($species) { ## can also read species name from the input file
100 my $organism = $schema->resultset("Organism::Organism")->find( {
101 species
=> $species } );
102 $organism_id = $organism->organism_id();
103 die "Species $species does not exist in the database! " if !$organism_id;
104 } #check this again if species name is provided in the file
106 my $stock_rs = $schema->resultset("Stock::Stock");
108 my $stock_property_cv_id = $schema->resultset("Cv::Cv")->find( { name
=> 'stock_property' })->cv_id();
110 print STDERR
"Stock property CV ID = $stock_property_cv_id\n";
113 # the cvterm for the population
115 print "Finding/creating cvterm for population\n";
116 my $population_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'population', 'stock_type');
118 # the cvterm for the accession
120 my $accession_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'accession', 'stock_type');
122 # the cvterm for the relationship type
124 my $member_of = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'member_of', 'stock_relationship');
126 # for the stock module
128 print "parsing spreadsheet... \n";
129 my @rows = $spreadsheet->row_labels();
130 my @columns = $spreadsheet->column_labels();
134 print STDERR
"COLUMN LABELS = ".join(", ", @columns)."\n";
136 # accession species population_name synonyms
139 foreach my $accession (@rows ) {
140 # remove spaces from accession name
141 $accession=~s/\s+//g;
143 my $species_name = $spreadsheet->value_at($accession, "species_name");
145 my $organism = $schema->resultset("Organism::Organism")->find( {
146 species
=> $species_name } );
147 $organism_id = $organism->organism_id();
148 die "Species $species_name does not exist in the database! " if !$organism_id;
151 my $population_names = $spreadsheet->value_at($accession, "population_name"); # new: can be more than one, | separated
152 my $synonym_string = $spreadsheet->value_at($accession, "synonyms");
153 my $description = $spreadsheet->value_at($accession, "description");
155 my @synonyms = split /\|/ , $synonym_string;
158 if ($population_names) {
159 my @populations = split /\|/, $population_names;
161 foreach my $name (@populations) {
162 print "Creating a stock for population $population_name (cvterm = " . $population_cvterm->name . ")\n";
163 my $row = $stock_rs->find_or_create( {
165 'me.uniquename' => $name,
166 'me.organism_id' => $organism_id,
167 type_id
=> $population_cvterm->cvterm_id, }, { join => 'type' }
169 push @population_rows, $row;
173 print "Find or create stock for accesssion $accession\n";
174 my $stock = $schema->resultset("Stock::Stock")->find_or_create(
175 { organism_id
=> $organism_id,
177 description
=> $description,
178 uniquename
=> $accession,
179 type_id
=> $accession_cvterm->cvterm_id(),
181 my $stock_id = $stock->stock_id;
182 print "Adding owner $sp_person_id \n";
184 # add the owner for this stock
186 $phenome_schema->resultset("StockOwner")->find_or_create(
188 stock_id
=> $stock->stock_id,
189 sp_person_id
=> $sp_person_id,
192 # the stock belongs to population(s):
193 # add new stock_relationship(s)
195 if ($population_names) {
196 foreach my $row (@population_rows) {
197 print "Accession $accession is member_of population ".$row->uniquename();
198 $row->find_or_create_related('stock_relationship_objects', {
199 type_id
=> $member_of->cvterm_id(),
200 subject_id
=> $stock->stock_id(),
205 if ($synonym_string) {print "Adding synonyms #" . scalar(@synonyms) . "\n"; }
206 foreach my $syn (@synonyms) {
207 if ($syn && defined($syn) && ($syn ne $accession) ) {
208 my $existing_synonym = $stock->search_related(
211 'type.name' => { ilike
=> '%synonym%' }
215 if (!$existing_synonym) {
217 print STDOUT
"Adding synonym: $syn \n" ;
219 # add the synonym as a stockprop
221 $stock->create_stockprops({ stock_synonym
=> $syn},
223 allow_duplicate_values
=> 1,
229 print STDERR
"Parsing ".scalar(@columns)." columns...\n";
231 for(my $n = 5; $n<@columns; $n++) {
232 print STDERR
"Retrieving value at $accession / $columns[$n]...\n";
233 my $value = $spreadsheet->value_at($accession, $columns[$n]);
234 print STDERR
"value is $value\n";
236 my $type_rs = $schema->resultset("Cv::Cvterm")->find_or_create(
238 name
=> $columns[$n],
239 cv_id
=> $stock_property_cv_id
241 print STDERR
"TYPE ID IS ".$type_rs->cvterm_id."\n";
242 my $stockprop = $schema->resultset("Stock::Stockprop")->find_or_create(
244 stock_id
=> $stock->stock_id,
246 type_id
=> $type_rs->cvterm_id,
253 die "TEST RUN! rolling back\n";
259 $schema->txn_do($coderef);
260 if (!$test) { print "Transaction succeeded! Commiting stocks and their properties! \n\n"; }
262 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";