Merge pull request #5186 from solgenomics/topic/bulk_search_lists
[sgn.git] / bin / load_stocks.pl
blob848ba718d08237ef34d5a03aa2ff7807af7c9e36
1 #!/usr/bin/perl
3 =head1
5 load_stock_data.pl - a script to load stock data
7 =head1 SYNOPSIS
9 load_stock_data.pl -H [dbhost] -D [dbname] [-t] [-s species name ] [-p stock population name]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name
14 -D database name
15 -i infile
16 -u username for associating the new stocks
17 -s species name - must be in the database. Can also be read from the input file
18 -p population name - will create a new stock of type 'population' if doesn't exist.
19 -t Test run . Rolling back at the end.
21 =head1 DESCRIPTION
23 Updated script for loading and adding stock names and synonyms.
24 The owners of the stock accession are not stored in stockprop, but in phenome.stock_owner
26 All other stockproperties can be given as additional columns and will be loaded automatically;
27 if the corresponding stock_property does not exist in the database it will be added.
29 File format for infile (tab delimited):
31 accession species_name population_name synonyms description other_stock_props ...
33 Multiple synonyms can be specified, separated by the | symbol
35 =head1 AUTHORS
37 Naama Menda (nm249@cornell.edu) - April 2013
39 Lukas Mueller (lam87@cornell.edu) - minor edits, November 2022
41 =cut
44 use strict;
45 use warnings;
46 use Getopt::Std;
47 use CXGN::Tools::File::Spreadsheet;
49 use CXGN::Phenome::Schema;
50 use Bio::Chado::Schema;
51 use CXGN::DB::InsertDBH;
52 use Carp qw /croak/ ;
54 use CXGN::Chado::Dbxref;
55 use CXGN::Chado::Phenotype;
56 use CXGN::People::Person;
57 use Try::Tiny;
58 use SGN::Model::Cvterm;
59 use Getopt::Long;
61 my ( $dbhost, $dbname, $file, $population_name, $species, $username, $password, $test );
62 GetOptions(
63 'i=s' => \$file,
64 'p=s' => \$population_name,
65 's=s' => \$species,
66 'u=s' => \$username,
67 'P=s' => \$password,
68 't' => \$test,
69 'dbname|D=s' => \$dbname,
70 'dbhost|H=s' => \$dbhost,
74 my $dbh = CXGN::DB::Connection->new( { dbhost=>$dbhost,
75 dbname=>$dbname,
76 dbuser=>'postgres',
77 dbpass=>$password,
78 dbargs => {AutoCommit => 1,
79 RaiseError => 1}
82 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do => ['SET search_path TO public;'] }
84 my $phenome_schema= CXGN::Phenome::Schema->connect( sub { $dbh->get_actual_dbh } , { on_connect_do => ['set search_path to public,phenome;'] } );
86 # new spreadsheet
88 my $spreadsheet=CXGN::Tools::File::Spreadsheet->new($file);
91 # parse first the file with the clone names and synonyms. Load into stock,
92 # and stockprop population for grouping the clones
94 my $sp_person_id = CXGN::People::Person->get_person_by_username($dbh, $username);
95 die "Need to have a user pre-loaded in the database! " if !$sp_person_id;
97 my $organism_id ;
99 if ($species) { ## can also read species name from the input file
100 my $organism = $schema->resultset("Organism::Organism")->find( {
101 species => $species } );
102 $organism_id = $organism->organism_id();
103 die "Species $species does not exist in the database! " if !$organism_id;
104 } #check this again if species name is provided in the file
106 my $stock_rs = $schema->resultset("Stock::Stock");
108 my $stock_property_cv_id = $schema->resultset("Cv::Cv")->find( { name => 'stock_property' })->cv_id();
110 print STDERR "Stock property CV ID = $stock_property_cv_id\n";
113 # the cvterm for the population
115 print "Finding/creating cvterm for population\n";
116 my $population_cvterm = SGN::Model::Cvterm->get_cvterm_row($schema, 'population', 'stock_type');
118 # the cvterm for the accession
120 my $accession_cvterm = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type');
122 # the cvterm for the relationship type
124 my $member_of = SGN::Model::Cvterm->get_cvterm_row($schema, 'member_of', 'stock_relationship');
126 # for the stock module
128 print "parsing spreadsheet... \n";
129 my @rows = $spreadsheet->row_labels();
130 my @columns = $spreadsheet->column_labels();
132 my $syn_count;
134 print STDERR "COLUMN LABELS = ".join(", ", @columns)."\n";
136 # accession species population_name synonyms
138 my $coderef= sub {
139 foreach my $accession (@rows ) {
140 # remove spaces from accession name
141 $accession=~s/\s+//g;
143 my $species_name = $spreadsheet->value_at($accession, "species_name");
144 if (!$species) {
145 my $organism = $schema->resultset("Organism::Organism")->find( {
146 species => $species_name } );
147 $organism_id = $organism->organism_id();
148 die "Species $species_name does not exist in the database! " if !$organism_id;
151 my $population_names = $spreadsheet->value_at($accession, "population_name"); # new: can be more than one, | separated
152 my $synonym_string = $spreadsheet->value_at($accession, "synonyms");
153 my $description = $spreadsheet->value_at($accession, "description");
155 my @synonyms = split /\|/ , $synonym_string;
157 my @population_rows;
158 if ($population_names) {
159 my @populations = split /\|/, $population_names;
161 foreach my $name (@populations) {
162 print "Creating a stock for population $population_name (cvterm = " . $population_cvterm->name . ")\n";
163 my $row = $stock_rs->find_or_create( {
164 'me.name' => $name,
165 'me.uniquename' => $name,
166 'me.organism_id' => $organism_id,
167 type_id => $population_cvterm->cvterm_id, }, { join => 'type' }
169 push @population_rows, $row;
173 print "Find or create stock for accesssion $accession\n";
174 my $stock = $schema->resultset("Stock::Stock")->find_or_create(
175 { organism_id => $organism_id,
176 name => $accession,
177 description => $description,
178 uniquename => $accession,
179 type_id => $accession_cvterm->cvterm_id(),
181 my $stock_id = $stock->stock_id;
182 print "Adding owner $sp_person_id \n";
184 # add the owner for this stock
186 $phenome_schema->resultset("StockOwner")->find_or_create(
188 stock_id => $stock->stock_id,
189 sp_person_id => $sp_person_id,
192 # the stock belongs to population(s):
193 # add new stock_relationship(s)
195 if ($population_names) {
196 foreach my $row (@population_rows) {
197 print "Accession $accession is member_of population ".$row->uniquename();
198 $row->find_or_create_related('stock_relationship_objects', {
199 type_id => $member_of->cvterm_id(),
200 subject_id => $stock->stock_id(),
201 } );
205 if ($synonym_string) {print "Adding synonyms #" . scalar(@synonyms) . "\n"; }
206 foreach my $syn (@synonyms) {
207 if ($syn && defined($syn) && ($syn ne $accession) ) {
208 my $existing_synonym = $stock->search_related(
209 'stockprops' , {
210 'me.value' => $syn,
211 'type.name' => { ilike => '%synonym%' }
213 { join => 'type' }
214 )->single;
215 if (!$existing_synonym) {
216 $syn_count++;
217 print STDOUT "Adding synonym: $syn \n" ;
219 # add the synonym as a stockprop
221 $stock->create_stockprops({ stock_synonym => $syn},
222 {autocreate => 0,
223 allow_duplicate_values=> 1,
229 print STDERR "Parsing ".scalar(@columns)." columns...\n";
231 for(my $n = 5; $n<@columns; $n++) {
232 print STDERR "Retrieving value at $accession / $columns[$n]...\n";
233 my $value = $spreadsheet->value_at($accession, $columns[$n]);
234 print STDERR "value is $value\n";
235 if ($value) {
236 my $type_rs = $schema->resultset("Cv::Cvterm")->find_or_create(
238 name => $columns[$n],
239 cv_id => $stock_property_cv_id
241 print STDERR "TYPE ID IS ".$type_rs->cvterm_id."\n";
242 my $stockprop = $schema->resultset("Stock::Stockprop")->find_or_create(
244 stock_id => $stock->stock_id,
245 value => $value,
246 type_id => $type_rs->cvterm_id,
252 if ($test) {
253 die "TEST RUN! rolling back\n";
258 try {
259 $schema->txn_do($coderef);
260 if (!$test) { print "Transaction succeeded! Commiting stocks and their properties! \n\n"; }
261 } catch {
262 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";