5 load_fieldbook_phenotypes.pl - backend script for loading phenotypes into cxgn databases from a fieldbook csv file. uses same process as online interface.
9 load_fieldbook_phenotypes.pl -H [dbhost] -D [dbname] -U [dbuser] -P [dbpass] -b [basepath] -i [infile] -d [datalevel] -u [username] -r [temp_file_nd_experiment_id]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -U database username (required)
16 -P database userpass (required)
17 -b basepath (required) e.g. "/home/me/cxgn/sgn"
18 -r temp_file_nd_experiment_id (required) e.g. "/tmp/delete_nd_experiment_ids.txt"
19 -i path to infile (required)
20 -a archive path (required) e.g. /export/prod/archive/
21 -d datalevel (required) must be plots or plants
22 -u username (required) username in database of peron uploading phenotypes
23 -o overwrite previous values (optional) 1 or 0
27 perl bin/load_fieldbook_phenotypes.pl -D cass -H localhost -U postgres -P postgres -b /home/me/cxgn/sgn -u nmorales -i ~/Downloads/combined_counts.csv -a /export/prod/archive/ -d plants -r /tmp/delete_nd_experiment_ids.txt
29 This script will parse and validate the input file. If there are any warnings or errors during validation it will die.
30 If there are no warnings or errors during validation it will then store the data.
32 input file should be a fieldbook csv file. All fields should be quoted.
33 "plot_id","range","plot","rep","accession","is_a_control","trait","value","timestamp","person","location","number"
37 Nicolas Morales (nm529@cornell.edu)
48 use Bio
::Chado
::Schema
;
49 use CXGN
::Metadata
::Schema
;
50 use CXGN
::Phenome
::Schema
;
51 use CXGN
::DB
::InsertDBH
;
52 use CXGN
::Phenotypes
::StorePhenotypes
;
53 use CXGN
::Phenotypes
::ParseUpload
;
57 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_b, $opt_i, $opt_a, $opt_d, $opt_u, $opt_o, $opt_r);
59 getopts
('H:D:U:P:b:i:a:d:u:o:r:');
61 if (!$opt_H || !$opt_D || !$opt_U ||!$opt_P || !$opt_b || !$opt_i || !$opt_a || !$opt_d || !$opt_u || !$opt_r) {
62 die "Must provide options -H (hostname), -D (database name), -U (database user), -P (database password), -b (basepath), -i (input file), -a (archive path), -d (datalevel), -u (username in db) -r (temp_file_nd_experiment_id)\n";
65 my $schema = Bio
::Chado
::Schema
->connect(
66 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
70 my $metadata_schema = CXGN
::Metadata
::Schema
->connect(
71 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
75 my $phenome_schema = CXGN
::Phenome
::Schema
->connect(
76 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
80 my $dbh = CXGN
::DB
::InsertDBH
->new({
83 dbargs
=> {AutoCommit
=> 1, RaiseError
=> 1}
85 $dbh->do('SET search_path TO public,sgn');
87 my $q = "SELECT sp_person_id from sgn_people.sp_person where username = '$opt_u';";
88 my $h = $dbh->prepare($q);
90 my ($sp_person_id) = $h->fetchrow_array();
92 die "Not a valid -u\n";
95 my $parser = CXGN
::Phenotypes
::ParseUpload
->new();
96 my $subdirectory = "tablet_phenotype_upload";
97 my $validate_type = "field book";
98 my $metadata_file_type = "tablet phenotype file";
99 my $timestamp_included = 1;
101 my $data_level = $opt_d;
103 my $time = DateTime
->now();
104 my $timestamp = $time->ymd()."_".$time->hms();
106 my $uploader = CXGN
::UploadFile
->new({
108 subdirectory
=> $subdirectory,
109 archive_path
=> $opt_a,
110 archive_filename
=> basename
($upload),
111 timestamp
=> $timestamp,
112 user_id
=> $sp_person_id,
113 user_role
=> 'curator'
115 my $archived_filename_with_path = $uploader->archive();
116 my $md5 = $uploader->get_md5($archived_filename_with_path);
117 if (!$archived_filename_with_path) {
118 die "Could not archive file!\n";
120 print STDERR
"File saved in archive.\n";
123 my %phenotype_metadata;
124 $phenotype_metadata{'archived_file'} = $archived_filename_with_path;
125 $phenotype_metadata{'archived_file_type'} = $metadata_file_type;
126 $phenotype_metadata{'operator'} = $opt_u;
127 $phenotype_metadata{'date'} = $timestamp;
129 my $validate_file = $parser->validate($validate_type, $archived_filename_with_path, $timestamp_included, $data_level, $schema);
130 if (!$validate_file) {
131 die "Input file itself not valid.\n";
133 if ($validate_file == 1){
134 print STDERR
"File itself valid. Will now parse.\n";
136 if ($validate_file->{'error'}) {
137 die $validate_file->{'error'}."\n";
141 my $parsed_file = $parser->parse($validate_type, $archived_filename_with_path, $timestamp_included, $data_level, $schema);
143 die "Error parsing file.\n";
145 if ($parsed_file->{'error'}) {
146 die $parsed_file->{'error'},"\n";
149 print STDERR
"File parsed. Will now validate contents.\n";
154 if ($parsed_file && !$parsed_file->{'error'}) {
155 %parsed_data = %{$parsed_file->{'data'}};
156 @plots = @
{$parsed_file->{'units'}};
157 @traits = @
{$parsed_file->{'variables'}};
160 my $store_phenotypes = CXGN
::Phenotypes
::StorePhenotypes
->new(
166 temp_file_nd_experiment_id
=>$opt_r,
168 metadata_schema
=>$metadata_schema,
169 phenome_schema
=>$phenome_schema,
170 user_id
=>$sp_person_id,
172 trait_list
=>\
@traits,
173 values_hash
=>\
%parsed_data,
174 has_timestamps
=>$timestamp_included,
175 metadata_hash
=>\
%phenotype_metadata,
178 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
179 if ($verified_error) {
180 die $verified_error."\n";
182 if ($verified_warning && !$opt_o) {
183 die $verified_warning."\n";
186 print STDERR
"Done validating. Now storing\n";
188 my ($stored_phenotype_error, $stored_Phenotype_success) = $store_phenotypes->store();
189 if ($stored_phenotype_error) {
190 die $stored_phenotype_error."\n";
192 print STDERR
$stored_Phenotype_success."\n";
193 print STDERR
"Script Complete.\n";