add POD to the load_trait_props.pl script.
[sgn.git] / bin / load_trait_props.pl
blobe8469f33b3e1f175b8de0f9482bde9f9d8dfd2e9
1 #!/usr/bin/env perl
3 =head1
5 load_trait_props.pl
7 =head1 SYNOPSIS
9 $load_trait_props.pl -H [dbhost] -D [dbname] -I [input file] -o [ontology] -w
11 =head1 COMMAND-LINE OPTIONS
13 -H host name
14 -D database name
15 -w overwrite
16 -t Test run . Rolling back at the end.
17 -o Ontology name (from db table, e.g. "GO")
18 -I input file, either .xls or .xlsx format
20 =head2 DESCRIPTION
22 The input file should have the following column headers:
23 trait_name
24 trait_format
25 trait_default_value
26 trait_minimum
27 trait_maximum
28 trait_categories
29 trait_details
31 trait_name: the name of the variable human readable form (e.g., "plant height in cm")
32 trait_format can be numeric, qualitative, date or boolean
33 trait_default_value is the value if no value is given
34 trait_categories are the different possible names of the categories, separated by /, for example "1/2/3/4/5"
35 trait_details string describing the trait categories
37 =head2 AUTHOR
39 Jeremy D. Edwards (jde22@cornell.edu)
41 April 2014
43 =head2 TODO
45 Add support for other spreadsheet formats
47 =cut
49 use strict;
50 use warnings;
52 use lib 'lib';
53 use Getopt::Std;
54 use Bio::Chado::Schema;
55 use Spreadsheet::ParseExcel;
56 use Spreadsheet::ParseXLSX;
57 use CXGN::DB::InsertDBH;
58 use CXGN::DB::Connection;
59 use CXGN::Fieldbook::TraitProps;
62 our ($opt_H, $opt_D, $opt_I, $opt_o, $opt_w, $opt_t);
63 getopts('H:D:I:o:wt');
66 sub print_help {
67 print STDERR "A script to load trait properties\nUsage: load_trait_props.pl -D [database name] -H [database host, e.g., localhost] -I [input file] -o [ontology namespace, e.g., CO] -w\n\t-w\toverwrite existing trait properties if they exist (optional)\n\t-t\ttest run. roll back at the end\n";
71 if (!$opt_D || !$opt_H || !$opt_I || !$opt_o) {
72 print_help();
73 die("Exiting: options missing\n");
76 # Match a dot, extension .xls / .xlsx
77 my ($extension) = $opt_I =~ /(\.[^.]+)$/;
78 my $parser;
80 if ($extension eq '.xlsx') {
81 $parser = Spreadsheet::ParseXLSX->new();
83 else {
84 $parser = Spreadsheet::ParseExcel->new();
87 #try to open the excel file and report any errors
88 my $excel_obj = $parser->parse($opt_I);
90 if ( !$excel_obj ) {
91 die "Input file error: ".$parser->error()."\n";
94 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
95 my ( $row_min, $row_max ) = $worksheet->row_range();
96 my ( $col_min, $col_max ) = $worksheet->col_range();
98 if (($col_max - $col_min) < 1 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of phenotypes
99 die "Input file error: spreadsheet is missing header\n";
102 my $trait_name_head;
104 if ($worksheet->get_cell(0,0)) {
105 $trait_name_head = $worksheet->get_cell(0,0)->value();
108 if (!$trait_name_head || $trait_name_head ne 'trait_name') {
109 die "Input file error: no \"trait_name\" in header\n";
112 my @trait_property_names = qw(
113 trait_format
114 trait_default_value
115 trait_minimum
116 trait_maximum
117 trait_categories
118 trait_details
121 #check header for property names
122 for (my $column_number = 1; $column_number <= scalar @trait_property_names; $column_number++) {
123 my $property_name = $trait_property_names[$column_number-1];
124 if ( !($worksheet->get_cell(0,$column_number)) || !($worksheet->get_cell(0,$column_number)->value() eq $property_name) ) {
125 die "Input file error: no \"$property_name\" in header\n";
129 my @trait_props_data;
132 for my $row ( 1 .. $row_max ) {
133 my %trait_props;
134 my $trait_name;
135 my $current_row = $row+1;
138 if ($worksheet->get_cell($row,0)) {
139 $trait_name = $worksheet->get_cell($row,0)->value();
140 $trait_props{'trait_name'}=$trait_name;
141 } else {
142 next; #skip blank lines
145 my $prop_column = 1;
146 foreach my $property_name (@trait_property_names) {
147 if ($worksheet->get_cell($row,$prop_column)) {
148 if (($worksheet->get_cell($row,$prop_column)->value()) && ($worksheet->get_cell($row,$prop_column)->value() ne '')) {
149 $trait_props{$property_name}=$worksheet->get_cell($row,$prop_column)->value();
152 $prop_column++;
155 push @trait_props_data, \%trait_props;
159 my $dbh = CXGN::DB::InsertDBH
160 ->new({
161 dbname => $opt_D,
162 dbhost => $opt_H,
163 dbargs => {AutoCommit => 1,
164 RaiseError => 1},
167 my $overwrite_existing_props = 0;
169 if ($opt_w){
170 $overwrite_existing_props = 1;
173 my $is_test_run = 0;
175 if ($opt_t){
176 $is_test_run = 1;
179 my $chado_schema = Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
181 my $db_name = $opt_o;
183 my $trait_props = CXGN::Fieldbook::TraitProps->new({ chado_schema => $chado_schema, db_name => $db_name, trait_names_and_props => \@trait_props_data, overwrite => $overwrite_existing_props, is_test_run => $is_test_run});
185 print STDERR "Validating data...\t";
186 my $validate=$trait_props->validate();
188 if (!$validate) {
189 die("input data is not valid\n");
190 } else {
191 print STDERR "input data is valid\n";
194 print STDERR "Storing data...\t\t";
195 my $store = $trait_props->store();
197 if (!$store){
198 if (!$is_test_run) {
199 die("\n\nerror storing data\n");
201 } else {
202 print STDERR "successfully stored data\n";