Merge pull request #3255 from solgenomics/topic/add_accessions_to_trial_detail
[sgn.git] / bin / load_phenotypes_fieldbook.pl
blob27bffdd65c3782f69f103cf8546e1a6361898371
1 #!/usr/bin/perl
3 =head1
5 load_fieldbook_phenotypes.pl - backend script for loading phenotypes into cxgn databases from a fieldbook csv file. uses same process as online interface.
7 =head1 SYNOPSIS
9 load_fieldbook_phenotypes.pl -H [dbhost] -D [dbname] -U [dbuser] -P [dbpass] -b [basepath] -i [infile] -d [datalevel] -u [username] -r [temp_file_nd_experiment_id]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -U database username (required)
16 -P database userpass (required)
17 -b basepath (required) e.g. "/home/me/cxgn/sgn"
18 -r temp_file_nd_experiment_id (required) e.g. "/tmp/delete_nd_experiment_ids.txt"
19 -i path to infile (required)
20 -a archive path (required) e.g. /export/prod/archive/
21 -d datalevel (required) must be plots or plants
22 -u username (required) username in database of peron uploading phenotypes
23 -o overwrite previous values (optional) 1 or 0
25 =head1 DESCRIPTION
27 perl bin/load_fieldbook_phenotypes.pl -D cass -H localhost -U postgres -P postgres -b /home/me/cxgn/sgn -u nmorales -i ~/Downloads/combined_counts.csv -a /export/prod/archive/ -d plants -r /tmp/delete_nd_experiment_ids.txt
29 This script will parse and validate the input file. If there are any warnings or errors during validation it will die.
30 If there are no warnings or errors during validation it will then store the data.
32 input file should be a fieldbook csv file. All fields should be quoted.
33 "plot_id","range","plot","rep","accession","is_a_control","trait","value","timestamp","person","location","number"
35 =head1 AUTHOR
37 Nicolas Morales (nm529@cornell.edu)
39 =cut
41 use strict;
43 use Getopt::Std;
44 use Data::Dumper;
45 use Carp qw /croak/ ;
46 use Pod::Usage;
47 use DateTime;
48 use Bio::Chado::Schema;
49 use CXGN::Metadata::Schema;
50 use CXGN::Phenome::Schema;
51 use CXGN::DB::InsertDBH;
52 use CXGN::Phenotypes::StorePhenotypes;
53 use CXGN::Phenotypes::ParseUpload;
54 use CXGN::UploadFile;
55 use File::Basename;
57 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_b, $opt_i, $opt_a, $opt_d, $opt_u, $opt_o, $opt_r);
59 getopts('H:D:U:P:b:i:a:d:u:o:r:');
61 if (!$opt_H || !$opt_D || !$opt_U ||!$opt_P || !$opt_b || !$opt_i || !$opt_a || !$opt_d || !$opt_u || !$opt_r) {
62 die "Must provide options -H (hostname), -D (database name), -U (database user), -P (database password), -b (basepath), -i (input file), -a (archive path), -d (datalevel), -u (username in db) -r (temp_file_nd_experiment_id)\n";
65 my $schema = Bio::Chado::Schema->connect(
66 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
67 $opt_U, # Username
68 $opt_P # Password
70 my $metadata_schema = CXGN::Metadata::Schema->connect(
71 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
72 $opt_U, # Username
73 $opt_P # Password
75 my $phenome_schema = CXGN::Phenome::Schema->connect(
76 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
77 $opt_U, # Username
78 $opt_P # Password
80 my $dbh = CXGN::DB::InsertDBH->new({
81 dbhost=>$opt_H,
82 dbname=>$opt_D,
83 dbargs => {AutoCommit => 1, RaiseError => 1}
84 });
85 $dbh->do('SET search_path TO public,sgn');
87 my $q = "SELECT sp_person_id from sgn_people.sp_person where username = '$opt_u';";
88 my $h = $dbh->prepare($q);
89 $h->execute();
90 my ($sp_person_id) = $h->fetchrow_array();
91 if (!$sp_person_id){
92 die "Not a valid -u\n";
95 my $parser = CXGN::Phenotypes::ParseUpload->new();
96 my $subdirectory = "tablet_phenotype_upload";
97 my $validate_type = "field book";
98 my $metadata_file_type = "tablet phenotype file";
99 my $timestamp_included = 1;
100 my $upload = $opt_i;
101 my $data_level = $opt_d;
103 my $time = DateTime->now();
104 my $timestamp = $time->ymd()."_".$time->hms();
106 my $uploader = CXGN::UploadFile->new({
107 tempfile => $upload,
108 subdirectory => $subdirectory,
109 archive_path => $opt_a,
110 archive_filename => basename($upload),
111 timestamp => $timestamp,
112 user_id => $sp_person_id,
113 user_role => 'curator'
115 my $archived_filename_with_path = $uploader->archive();
116 my $md5 = $uploader->get_md5($archived_filename_with_path);
117 if (!$archived_filename_with_path) {
118 die "Could not archive file!\n";
119 } else {
120 print STDERR "File saved in archive.\n";
123 my %phenotype_metadata;
124 $phenotype_metadata{'archived_file'} = $archived_filename_with_path;
125 $phenotype_metadata{'archived_file_type'} = $metadata_file_type;
126 $phenotype_metadata{'operator'} = $opt_u;
127 $phenotype_metadata{'date'} = $timestamp;
129 my $validate_file = $parser->validate($validate_type, $archived_filename_with_path, $timestamp_included, $data_level, $schema);
130 if (!$validate_file) {
131 die "Input file itself not valid.\n";
133 if ($validate_file == 1){
134 print STDERR "File itself valid. Will now parse.\n";
135 } else {
136 if ($validate_file->{'error'}) {
137 die $validate_file->{'error'}."\n";
141 my $parsed_file = $parser->parse($validate_type, $archived_filename_with_path, $timestamp_included, $data_level, $schema);
142 if (!$parsed_file) {
143 die "Error parsing file.\n";
145 if ($parsed_file->{'error'}) {
146 die $parsed_file->{'error'},"\n";
149 print STDERR "File parsed. Will now validate contents.\n";
151 my %parsed_data;
152 my @plots;
153 my @traits;
154 if ($parsed_file && !$parsed_file->{'error'}) {
155 %parsed_data = %{$parsed_file->{'data'}};
156 @plots = @{$parsed_file->{'units'}};
157 @traits = @{$parsed_file->{'variables'}};
160 my $store_phenotypes = CXGN::Phenotypes::StorePhenotypes->new(
161 basepath=>$opt_b,
162 dbhost=>$opt_H,
163 dbname=>$opt_D,
164 dbuser=>$opt_U,
165 dbpass=>$opt_P,
166 temp_file_nd_experiment_id=>$opt_r,
167 bcs_schema=>$schema,
168 metadata_schema=>$metadata_schema,
169 phenome_schema=>$phenome_schema,
170 user_id=>$sp_person_id,
171 stock_list=>\@plots,
172 trait_list=>\@traits,
173 values_hash=>\%parsed_data,
174 has_timestamps=>$timestamp_included,
175 metadata_hash=>\%phenotype_metadata,
178 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
179 if ($verified_error) {
180 die $verified_error."\n";
182 if ($verified_warning && !$opt_o) {
183 die $verified_warning."\n";
186 print STDERR "Done validating. Now storing\n";
188 my ($stored_phenotype_error, $stored_Phenotype_success) = $store_phenotypes->store();
189 if ($stored_phenotype_error) {
190 die $stored_phenotype_error."\n";
192 print STDERR $stored_Phenotype_success."\n";
193 print STDERR "Script Complete.\n";