5 load_phenotypes_spreadsheet_csv.pl - backend script for loading phenotypes into cxgn databases from a spreadsheet csv file. uses same process as online interface.
9 load_phenotypes_spreadsheet_csv.pl -H [dbhost] -D [dbname] -U [dbuser] -P [dbpass] -b [basepath] -i [infile] -a [archive path on server] -d [datalevel] -u [username] -t [timestamps included] -o [overwrite previous values] -r [temp_file_nd_experiment_id]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -U database username (required)
16 -P database userpass (required)
17 -b basepath (required) e.g. "/home/me/cxgn/sgn"
18 -r temp_file_nd_experiment_id (required) e.g. "/tmp/delete_nd_experiment_ids.txt"
19 -i path to infile (required)
20 -a archive path (required) e.g. /export/prod/archive/
21 -d datalevel (required) must be plots or plants
22 -u username (required) username in database of peron uploading phenotypes
23 -t timestamps included (optional) 1 or 0
24 -o overwrite previous values (optional) 1 or 0
28 perl bin/load_phenotypes_spreadsheet_csv.pl -D cass -H localhost -U postgres -P postgres -b /home/me/cxgn/sgn -u nmorales -i ~/Downloads/combined_counts.csv -a /export/prod/archive/ -d plants -r /tmp/delete_nd_experiment_ids.txt
30 This script will parse and validate the input file. If there are any warnings or errors during validation it will die.
31 If there are no warnings or errors during validation it will then store the data.
33 input file should be a spreadsheet csv file. All fields should be quoted.
34 header line should be: "studyYear","studyDbId","studyName","studyDesign","locationDbId","locationName","germplasmDbId","germplasmName","germplasmSynonyms","observationLevel","observationUnitDbId","observationUnitName","replicate","blockNumber","plotNumber" followed by all measured traits
36 To include timestamps, values should be "value,YYYY-MM-DD 13:45:01+0004".
40 Nicolas Morales (nm529@cornell.edu)
51 use Bio
::Chado
::Schema
;
52 use CXGN
::Metadata
::Schema
;
53 use CXGN
::Phenome
::Schema
;
54 use CXGN
::DB
::InsertDBH
;
55 use CXGN
::Phenotypes
::StorePhenotypes
;
56 use CXGN
::Phenotypes
::ParseUpload
;
60 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_b, $opt_i, $opt_a, $opt_d, $opt_u, $opt_t, $opt_o, $opt_r);
62 getopts
('H:D:U:P:b:i:a:d:u:t:o:r:');
64 if (!$opt_H || !$opt_D || !$opt_U ||!$opt_P || !$opt_b || !$opt_i || !$opt_a || !$opt_d || !$opt_u || !$opt_r) {
65 die "Must provide options -H (hostname), -D (database name), -U (database user), -P (database password), -b (basepath), -i (input file), -a (archive path), -d (datalevel), -u (username in db), -r (temp_file_nd_experiment_id)\n";
68 my $schema = Bio
::Chado
::Schema
->connect(
69 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
73 my $metadata_schema = CXGN
::Metadata
::Schema
->connect(
74 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
78 $metadata_schema->storage->dbh->do('SET search_path TO metadata');
80 my $phenome_schema = CXGN
::Phenome
::Schema
->connect(
81 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
85 $phenome_schema->storage->dbh->do('SET search_path TO phenome');
87 my $dbh = CXGN
::DB
::InsertDBH
->new({
90 dbargs
=> {AutoCommit
=> 1, RaiseError
=> 1}
92 $dbh->do('SET search_path TO public,sgn');
94 my $q = "SELECT sp_person_id from sgn_people.sp_person where username = '$opt_u';";
95 my $h = $dbh->prepare($q);
97 my ($sp_person_id) = $h->fetchrow_array();
99 die "Not a valid -u\n";
102 my $timestamp_included;
104 $timestamp_included = 1;
107 my $parser = CXGN
::Phenotypes
::ParseUpload
->new();
108 my $subdirectory = "spreadsheet_csv_phenotype_upload";
109 my $validate_type = "phenotype spreadsheet csv";
110 my $metadata_file_type = "spreadsheet csv phenotype file";
112 my $data_level = $opt_d;
114 my $time = DateTime
->now();
115 my $timestamp = $time->ymd()."_".$time->hms();
117 my $uploader = CXGN
::UploadFile
->new({
119 subdirectory
=> $subdirectory,
120 archive_path
=> $opt_a,
121 archive_filename
=> basename
($upload),
122 timestamp
=> $timestamp,
123 user_id
=> $sp_person_id,
124 user_role
=> 'curator'
126 my $archived_filename_with_path = $uploader->archive();
127 my $md5 = $uploader->get_md5($archived_filename_with_path);
128 if (!$archived_filename_with_path) {
129 die "Could not archive file!\n";
131 print STDERR
"File saved in archive.\n";
134 my %phenotype_metadata;
135 $phenotype_metadata{'archived_file'} = $archived_filename_with_path;
136 $phenotype_metadata{'archived_file_type'} = $metadata_file_type;
137 $phenotype_metadata{'operator'} = $opt_u;
138 $phenotype_metadata{'date'} = $timestamp;
140 my $validate_file = $parser->validate($validate_type, $archived_filename_with_path, $timestamp_included, $data_level, $schema);
141 if (!$validate_file) {
142 die "Input file itself not valid.\n";
144 if ($validate_file == 1){
145 print STDERR
"File itself valid. Will now parse.\n";
147 if ($validate_file->{'error'}) {
148 die $validate_file->{'error'}."\n";
152 my $parsed_file = $parser->parse($validate_type, $archived_filename_with_path, $timestamp_included, $data_level, $schema);
154 die "Error parsing file.\n";
156 if ($parsed_file->{'error'}) {
157 die $parsed_file->{'error'},"\n";
160 print STDERR
"File parsed. Will now validate contents.\n";
165 if ($parsed_file && !$parsed_file->{'error'}) {
166 %parsed_data = %{$parsed_file->{'data'}};
167 @plots = @
{$parsed_file->{'units'}};
168 @traits = @
{$parsed_file->{'variables'}};
171 my $store_phenotypes = CXGN
::Phenotypes
::StorePhenotypes
->new(
177 temp_file_nd_experiment_id
=>$opt_r,
179 metadata_schema
=>$metadata_schema,
180 phenome_schema
=>$phenome_schema,
181 user_id
=>$sp_person_id,
183 trait_list
=>\
@traits,
184 values_hash
=>\
%parsed_data,
185 has_timestamps
=>$timestamp_included,
186 metadata_hash
=>\
%phenotype_metadata,
189 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
190 if ($verified_error) {
191 die $verified_error."\n";
193 if ($verified_warning && !$opt_o) {
194 die $verified_warning."\n";
197 print STDERR
"Done validating. Now storing\n";
199 my ($stored_phenotype_error, $stored_Phenotype_success) = $store_phenotypes->store();
200 if ($stored_phenotype_error) {
201 die $stored_phenotype_error."\n";
203 print STDERR
$stored_Phenotype_success."\n";
204 print STDERR
"Script Complete.\n";