Merge pull request #2479 from solgenomics/topic/fix_blast_js_problem
[sgn.git] / bin / load_phenotypes_fieldbook.pl
blob08978e8c853c67b3b65c81a7139c9a581a28976c
1 #!/usr/bin/perl
3 =head1
5 load_fieldbook_phenotypes.pl - backend script for loading phenotypes into cxgn databases from a fieldbook csv file. uses same process as online interface.
7 =head1 SYNOPSIS
9 load_fieldbook_phenotypes.pl -H [dbhost] -D [dbname] -U [dbuser] -P [dbpass] -i [infile] -d [datalevel] -u [username]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -U database username (required)
16 -P database userpass (required)
17 -i path to infile (required)
18 -a archive path (required) e.g. /export/prod/archive/
19 -d datalevel (required) must be plots or plants
20 -u username (required) username in database of peron uploading phenotypes
21 -o overwrite previous values (optional) 1 or 0
23 =head1 DESCRIPTION
25 perl bin/load_fieldbook_phenotypes.pl -D cass -H localhost -U postgres -P postgres -u nmorales -i ~/Downloads/combined_counts.csv -a /export/prod/archive/ -d plants
27 This script will parse and validate the input file. If there are any warnings or errors during validation it will die.
28 If there are no warnings or errors during validation it will then store the data.
30 input file should be a fieldbook csv file. All fields should be quoted.
31 "plot_id","range","plot","rep","accession","is_a_control","trait","value","timestamp","person","location","number"
33 =head1 AUTHOR
35 Nicolas Morales (nm529@cornell.edu)
37 =cut
39 use strict;
41 use Getopt::Std;
42 use Data::Dumper;
43 use Carp qw /croak/ ;
44 use Pod::Usage;
45 use DateTime;
46 use Bio::Chado::Schema;
47 use CXGN::Metadata::Schema;
48 use CXGN::Phenome::Schema;
49 use CXGN::DB::InsertDBH;
50 use CXGN::Phenotypes::StorePhenotypes;
51 use CXGN::Phenotypes::ParseUpload;
52 use CXGN::UploadFile;
53 use File::Basename;
55 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_i, $opt_a, $opt_d, $opt_u, $opt_o);
57 getopts('H:D:U:P:i:a:d:u:o:');
59 if (!$opt_H || !$opt_D || !$opt_U ||!$opt_P || !$opt_i || !$opt_a || !$opt_d || !$opt_u) {
60 die "Must provide options -H (hostname), -D (database name), -U (database user), -P (database password), -i (input file), -a (archive path), -d (datalevel), -u (username in db)\n";
63 my $schema = Bio::Chado::Schema->connect(
64 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
65 $opt_U, # Username
66 $opt_P # Password
68 my $metadata_schema = CXGN::Metadata::Schema->connect(
69 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
70 $opt_U, # Username
71 $opt_P # Password
73 my $phenome_schema = CXGN::Phenome::Schema->connect(
74 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
75 $opt_U, # Username
76 $opt_P # Password
78 my $dbh = CXGN::DB::InsertDBH->new({
79 dbhost=>$opt_H,
80 dbname=>$opt_D,
81 dbargs => {AutoCommit => 1, RaiseError => 1}
82 });
83 $dbh->do('SET search_path TO public,sgn');
85 my $q = "SELECT sp_person_id from sgn_people.sp_person where username = '$opt_u';";
86 my $h = $dbh->prepare($q);
87 $h->execute();
88 my ($sp_person_id) = $h->fetchrow_array();
89 if (!$sp_person_id){
90 die "Not a valid -u\n";
93 my $parser = CXGN::Phenotypes::ParseUpload->new();
94 my $subdirectory = "tablet_phenotype_upload";
95 my $validate_type = "field book";
96 my $metadata_file_type = "tablet phenotype file";
97 my $timestamp_included = 1;
98 my $upload = $opt_i;
99 my $data_level = $opt_d;
101 my $time = DateTime->now();
102 my $timestamp = $time->ymd()."_".$time->hms();
104 my $uploader = CXGN::UploadFile->new({
105 tempfile => $upload,
106 subdirectory => $subdirectory,
107 archive_path => $opt_a,
108 archive_filename => basename($upload),
109 timestamp => $timestamp,
110 user_id => $sp_person_id,
111 user_role => 'curator'
113 my $archived_filename_with_path = $uploader->archive();
114 my $md5 = $uploader->get_md5($archived_filename_with_path);
115 if (!$archived_filename_with_path) {
116 die "Could not archive file!\n";
117 } else {
118 print STDERR "File saved in archive.\n";
121 my %phenotype_metadata;
122 $phenotype_metadata{'archived_file'} = $archived_filename_with_path;
123 $phenotype_metadata{'archived_file_type'} = $metadata_file_type;
124 $phenotype_metadata{'operator'} = $opt_u;
125 $phenotype_metadata{'date'} = $timestamp;
127 my $validate_file = $parser->validate($validate_type, $archived_filename_with_path, $timestamp_included, $data_level, $schema);
128 if (!$validate_file) {
129 die "Input file itself not valid.\n";
131 if ($validate_file == 1){
132 print STDERR "File itself valid. Will now parse.\n";
133 } else {
134 if ($validate_file->{'error'}) {
135 die $validate_file->{'error'}."\n";
139 my $parsed_file = $parser->parse($validate_type, $archived_filename_with_path, $timestamp_included, $data_level, $schema);
140 if (!$parsed_file) {
141 die "Error parsing file.\n";
143 if ($parsed_file->{'error'}) {
144 die $parsed_file->{'error'},"\n";
147 print STDERR "File parsed. Will now validate contents.\n";
149 my %parsed_data;
150 my @plots;
151 my @traits;
152 if ($parsed_file && !$parsed_file->{'error'}) {
153 %parsed_data = %{$parsed_file->{'data'}};
154 @plots = @{$parsed_file->{'units'}};
155 @traits = @{$parsed_file->{'variables'}};
158 my $store_phenotypes = CXGN::Phenotypes::StorePhenotypes->new(
159 bcs_schema=>$schema,
160 metadata_schema=>$metadata_schema,
161 phenome_schema=>$phenome_schema,
162 user_id=>$sp_person_id,
163 stock_list=>\@plots,
164 trait_list=>\@traits,
165 values_hash=>\%parsed_data,
166 has_timestamps=>$timestamp_included,
167 metadata_hash=>\%phenotype_metadata,
170 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
171 if ($verified_error) {
172 die $verified_error."\n";
174 if ($verified_warning && !$opt_o) {
175 die $verified_warning."\n";
178 print STDERR "Done validating. Now storing\n";
180 my ($stored_phenotype_error, $stored_Phenotype_success) = $store_phenotypes->store();
181 if ($stored_phenotype_error) {
182 die $stored_phenotype_error."\n";
184 print STDERR $stored_Phenotype_success."\n";
185 print STDERR "Script Complete.\n";