make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / load_stocks.pl
blobd15e02e7e63f7fa18b03722f986ec10ea9010b2d
1 #!/usr/bin/perl
3 =head1
5 load_stock_data.pl - a script to load stock data
7 =head1 SYNOPSIS
9 load_stock_data.pl -H [dbhost] -D [dbname] [-t] [-s species name ] [-p stock population name]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name
14 -D database name
15 -i infile
16 -u username for associating the new stocks
17 -s species name - must be in the database. Can also be read from the input file
18 -p population name - will create a new stock of type 'population' if doesn't exist.
19 -t Test run . Rolling back at the end.
21 =head1 DESCRIPTION
23 Updated script for loading and adding stock names and synonyms.
24 The owners of the stock accession are not stored in stockprop, but in phenome.stock_owner
26 All other stockproperties can be given as additional columns and will be loaded automatically;
27 if the corresponding stock_property does not exist in the database it will be added.
29 File format for infile (tab delimited):
31 accession genus species_name population_name synonyms other_stock_props ...
33 Multiple synonyms can be specified, separated by the | symbol
35 =head1 AUTHORS
37 Naama Menda (nm249@cornell.edu) - April 2013
38 Lukas Mueller (lam87@cornell.edu) - minor edits, November 2022
40 =cut
43 use strict;
44 use warnings;
45 use Getopt::Std;
46 use CXGN::Tools::File::Spreadsheet;
48 use CXGN::Phenome::Schema;
49 use Bio::Chado::Schema;
50 use CXGN::DB::InsertDBH;
51 use Carp qw /croak/ ;
53 use CXGN::Chado::Dbxref;
54 use CXGN::Chado::Phenotype;
55 use CXGN::People::Person;
56 use Try::Tiny;
57 use SGN::Model::Cvterm;
58 use Getopt::Long;
60 my ( $dbhost, $dbname, $file, $population_name, $species, $username, $test );
61 GetOptions(
62 'i=s' => \$file,
63 'p=s' => \$population_name,
64 's=s' => \$species,
65 'u=s' => \$username,
66 't' => \$test,
67 'dbname|D=s' => \$dbname,
68 'dbhost|H=s' => \$dbhost,
72 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
73 dbname=>$dbname,
74 dbargs => {AutoCommit => 1,
75 RaiseError => 1}
78 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do => ['SET search_path TO public;'] }
80 my $phenome_schema= CXGN::Phenome::Schema->connect( sub { $dbh->get_actual_dbh } , { on_connect_do => ['set search_path to public,phenome;'] } );
82 # new spreadsheet
84 my $spreadsheet=CXGN::Tools::File::Spreadsheet->new($file);
87 # parse first the file with the clone names and synonyms. Load into stock,
88 # and stockprop population for grouping the clones
90 my $sp_person_id = CXGN::People::Person->get_person_by_username($dbh, $username);
91 die "Need to have a user pre-loaded in the database! " if !$sp_person_id;
93 my $organism_id ;
95 if ($species) { ## can also read species name from the input file
96 my $organism = $schema->resultset("Organism::Organism")->find( {
97 species => $species } );
98 $organism_id = $organism->organism_id();
99 die "Species $species does not exist in the database! " if !$organism_id;
100 } #check this again if species name is provided in the file
102 my $stock_rs = $schema->resultset("Stock::Stock");
104 my $stock_property_cv_id = $schema->resultset("Cv::Cv")->find( { name => 'stock_property' })->cv_id();
106 print STDERR "Stock property CV ID = $stock_property_cv_id\n";
109 # the cvterm for the population
111 print "Finding/creating cvterm for population\n";
112 my $population_cvterm = SGN::Model::Cvterm->get_cvterm_row($schema, 'population', 'stock_type');
116 # the cvterm for the accession
118 my $accession_cvterm = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type');
120 # the cvterm for the relationship type
122 my $member_of = SGN::Model::Cvterm->get_cvterm_row($schema, 'member_of', 'stock_relationship');
124 # for the stock module
126 print "parsing spreadsheet... \n";
127 my @rows = $spreadsheet->row_labels();
128 my @columns = $spreadsheet->column_labels();
130 my $syn_count;
132 # accession genus species population_name synonyms
134 my $coderef= sub {
135 foreach my $accession (@rows ) {
136 # remove spaces from accession name
137 $accession=~s/\s+//g;
139 my $species_name = $spreadsheet->value_at($accession, "species_name");
140 if (!$species) {
141 my $organism = $schema->resultset("Organism::Organism")->find( {
142 species => $species_name } );
143 $organism_id = $organism->organism_id();
144 die "Species $species_name does not exist in the database! " if !$organism_id;
147 my $population_name = $spreadsheet->value_at($accession, "population_name");
148 my $synonym_string = $spreadsheet->value_at($accession, "synonyms");
149 my @synonyms = split /\|/ , $synonym_string;
151 my $population;
152 if ($population_name) {
153 print "Creating a stock for population $population_name (cvterm = " . $population_cvterm->name . ")\n";
154 $population = $stock_rs->find_or_create(
156 'me.name' => $population_name,
157 'me.uniquename' => $population_name,
158 'me.organism_id' => $organism_id,
159 type_id => $population_cvterm->cvterm_id,
161 { join => 'type' }
165 print "Find or create stock for accesssion $accession\n";
166 my $stock = $schema->resultset("Stock::Stock")->find_or_create(
167 { organism_id => $organism_id,
168 name => $accession,
169 uniquename => $accession,
170 type_id => $accession_cvterm->cvterm_id(),
172 my $stock_id = $stock->stock_id;
173 print "Adding owner $sp_person_id \n";
175 # add the owner for this stock
177 $phenome_schema->resultset("StockOwner")->find_or_create(
179 stock_id => $stock->stock_id,
180 sp_person_id => $sp_person_id,
183 # the stock belongs to the population:
184 # add new stock_relationship
186 if ($population_name) {
187 print "Accession $accession is member_of population $population_name \n";
188 $population->find_or_create_related('stock_relationship_objects', {
189 type_id => $member_of->cvterm_id(),
190 subject_id => $stock->stock_id(),
191 } );
193 if ($synonym_string) {print "Adding synonyms #" . scalar(@synonyms) . "\n"; }
194 foreach my $syn (@synonyms) {
195 if ($syn && defined($syn) && ($syn ne $accession) ) {
196 my $existing_synonym = $stock->search_related(
197 'stockprops' , {
198 'me.value' => $syn,
199 'type.name' => { ilike => '%synonym%' }
201 { join => 'type' }
202 )->single;
203 if (!$existing_synonym) {
204 $syn_count++;
205 print STDOUT "Adding synonym: $syn \n" ;
207 # add the synonym as a stockprop
209 $stock->create_stockprops({ stock_synonym => $syn},
210 {autocreate => 0,
211 allow_duplicate_values=> 1,
217 print STDERR "Parsing ".scalar(@columns)." columns...\n";
219 for(my $n = 5; $n<@columns; $n++) {
220 print STDERR "Retrieving value at $accession / $columns[$n]...\n";
221 my $value = $spreadsheet->value_at($accession, $columns[$n]);
222 print STDERR "value is $value\n";
223 if ($value) {
224 my $type_rs = $schema->resultset("Cv::Cvterm")->find_or_create(
226 name => $columns[$n],
227 cv_id => $stock_property_cv_id
229 print STDERR "TYPE ID IS ".$type_rs->cvterm_id."\n";
230 my $stockprop = $schema->resultset("Stock::Stockprop")->find_or_create(
232 stock_id => $stock->stock_id,
233 value => $value,
234 type_id => $type_rs->cvterm_id,
240 if ($test) {
241 die "TEST RUN! rolling back\n";
246 try {
247 $schema->txn_do($coderef);
248 if (!$test) { print "Transaction succeeded! Commiting stocks and their properties! \n\n"; }
249 } catch {
250 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";