Merge pull request #2222 from solgenomics/topic/solgs/cluster-fix-171018
[sgn.git] / bin / load_phenotypes_spreadsheet_csv.pl
blob5a9822280ee2cd50f9fbf379f70c1de938aeaa71
1 #!/usr/bin/perl
3 =head1
5 load_phenotypes_spreadsheet_csv.pl - backend script for loading phenotypes into cxgn databases from a spreadsheet csv file. uses same process as online interface.
7 =head1 SYNOPSIS
9 load_phenotypes_spreadsheet_csv.pl -H [dbhost] -D [dbname] -U [dbuser] -P [dbpass] -i [infile] -a [archive path on server] -d [datalevel] -u [username] -t [timestamps included] -o [overwrite previous values]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -U database username (required)
16 -P database userpass (required)
17 -i path to infile (required)
18 -a archive path (required) e.g. /export/prod/archive/
19 -d datalevel (required) must be plots or plants
20 -u username (required) username in database of peron uploading phenotypes
21 -t timestamps included (optional) 1 or 0
22 -o overwrite previous values (optional) 1 or 0
24 =head1 DESCRIPTION
26 perl bin/load_phenotypes_spreadsheet_csv.pl -D cass -H localhost -U postgres -P postgres -u nmorales -i ~/Downloads/combined_counts.csv -a /export/prod/archive/ -d plants
28 This script will parse and validate the input file. If there are any warnings or errors during validation it will die.
29 If there are no warnings or errors during validation it will then store the data.
31 input file should be a spreadsheet csv file. All fields should be quoted.
32 header line should be: "studyYear","studyDbId","studyName","studyDesign","locationDbId","locationName","germplasmDbId","germplasmName","germplasmSynonyms","observationLevel","observationUnitDbId","observationUnitName","replicate","blockNumber","plotNumber" followed by all measured traits
34 To include timestamps, values should be "value,YYYY-MM-DD 13:45:01+0004".
36 =head1 AUTHOR
38 Nicolas Morales (nm529@cornell.edu)
40 =cut
42 use strict;
44 use Getopt::Std;
45 use Data::Dumper;
46 use Carp qw /croak/ ;
47 use Pod::Usage;
48 use DateTime;
49 use Bio::Chado::Schema;
50 use CXGN::Metadata::Schema;
51 use CXGN::Phenome::Schema;
52 use CXGN::DB::InsertDBH;
53 use CXGN::Phenotypes::StorePhenotypes;
54 use CXGN::Phenotypes::ParseUpload;
55 use CXGN::UploadFile;
56 use File::Basename;
58 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_i, $opt_a, $opt_d, $opt_u, $opt_t, $opt_o);
60 getopts('H:D:U:P:i:a:d:u:t:o:');
62 if (!$opt_H || !$opt_D || !$opt_U ||!$opt_P || !$opt_i || !$opt_a || !$opt_d || !$opt_u ) {
63 die "Must provide options -H (hostname), -D (database name), -U (database user), -P (database password), -i (input file), -a (archive path), -d (datalevel), -u (username in db)\n";
66 my $schema = Bio::Chado::Schema->connect(
67 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
68 $opt_U, # Username
69 $opt_P # Password
71 my $metadata_schema = CXGN::Metadata::Schema->connect(
72 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
73 $opt_U, # Username
74 $opt_P # Password
76 $metadata_schema->storage->dbh->do('SET search_path TO metadata');
78 my $phenome_schema = CXGN::Phenome::Schema->connect(
79 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
80 $opt_U, # Username
81 $opt_P # Password
83 $phenome_schema->storage->dbh->do('SET search_path TO phenome');
85 my $dbh = CXGN::DB::InsertDBH->new({
86 dbhost=>$opt_H,
87 dbname=>$opt_D,
88 dbargs => {AutoCommit => 1, RaiseError => 1}
89 });
90 $dbh->do('SET search_path TO public,sgn');
92 my $q = "SELECT sp_person_id from sgn_people.sp_person where username = '$opt_u';";
93 my $h = $dbh->prepare($q);
94 $h->execute();
95 my ($sp_person_id) = $h->fetchrow_array();
96 if (!$sp_person_id){
97 die "Not a valid -u\n";
100 my $timestamp_included;
101 if ($opt_t == 1){
102 $timestamp_included = 1;
105 my $parser = CXGN::Phenotypes::ParseUpload->new();
106 my $subdirectory = "spreadsheet_csv_phenotype_upload";
107 my $validate_type = "phenotype spreadsheet csv";
108 my $metadata_file_type = "spreadsheet csv phenotype file";
109 my $upload = $opt_i;
110 my $data_level = $opt_d;
112 my $time = DateTime->now();
113 my $timestamp = $time->ymd()."_".$time->hms();
115 my $uploader = CXGN::UploadFile->new({
116 tempfile => $upload,
117 subdirectory => $subdirectory,
118 archive_path => $opt_a,
119 archive_filename => basename($upload),
120 timestamp => $timestamp,
121 user_id => $sp_person_id,
122 user_role => 'curator'
124 my $archived_filename_with_path = $uploader->archive();
125 my $md5 = $uploader->get_md5($archived_filename_with_path);
126 if (!$archived_filename_with_path) {
127 die "Could not archive file!\n";
128 } else {
129 print STDERR "File saved in archive.\n";
132 my %phenotype_metadata;
133 $phenotype_metadata{'archived_file'} = $archived_filename_with_path;
134 $phenotype_metadata{'archived_file_type'} = $metadata_file_type;
135 $phenotype_metadata{'operator'} = $opt_u;
136 $phenotype_metadata{'date'} = $timestamp;
138 my $validate_file = $parser->validate($validate_type, $archived_filename_with_path, $timestamp_included, $data_level, $schema);
139 if (!$validate_file) {
140 die "Input file itself not valid.\n";
142 if ($validate_file == 1){
143 print STDERR "File itself valid. Will now parse.\n";
144 } else {
145 if ($validate_file->{'error'}) {
146 die $validate_file->{'error'}."\n";
150 my $parsed_file = $parser->parse($validate_type, $archived_filename_with_path, $timestamp_included, $data_level, $schema);
151 if (!$parsed_file) {
152 die "Error parsing file.\n";
154 if ($parsed_file->{'error'}) {
155 die $parsed_file->{'error'},"\n";
158 print STDERR "File parsed. Will now validate contents.\n";
160 my %parsed_data;
161 my @plots;
162 my @traits;
163 if ($parsed_file && !$parsed_file->{'error'}) {
164 %parsed_data = %{$parsed_file->{'data'}};
165 @plots = @{$parsed_file->{'units'}};
166 @traits = @{$parsed_file->{'variables'}};
169 my $store_phenotypes = CXGN::Phenotypes::StorePhenotypes->new(
170 bcs_schema=>$schema,
171 metadata_schema=>$metadata_schema,
172 phenome_schema=>$phenome_schema,
173 user_id=>$sp_person_id,
174 stock_list=>\@plots,
175 trait_list=>\@traits,
176 values_hash=>\%parsed_data,
177 has_timestamps=>$timestamp_included,
178 metadata_hash=>\%phenotype_metadata,
181 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
182 if ($verified_error) {
183 die $verified_error."\n";
185 if ($verified_warning && !$opt_o) {
186 die $verified_warning."\n";
189 print STDERR "Done validating. Now storing\n";
191 my ($stored_phenotype_error, $stored_Phenotype_success) = $store_phenotypes->store();
192 if ($stored_phenotype_error) {
193 die $stored_phenotype_error."\n";
195 print STDERR $stored_Phenotype_success."\n";
196 print STDERR "Script Complete.\n";