9 $load_trait_props.pl -H [dbhost] -D [dbname] -I [input file] -o [ontology] -w
11 =head1 COMMAND-LINE OPTIONS
16 -t Test run . Rolling back at the end.
17 -o Ontology name (from db table, e.g. "GO")
18 -I input file, either .xls or .xlsx format
22 The input file should have the following column headers:
32 trait_name: the name of the variable human readable form (e.g., "plant height in cm")
33 trait_format: can be numeric, qualitative, date or boolean
34 trait_default_value: is the value if no value is given
35 trait_categories: are the different possible names of the categories, separated by /, for example "1/2/3/4/5"
36 trait_details: string describing the trait categories
40 Jeremy D. Edwards (jde22@cornell.edu)
46 Add support for other spreadsheet formats
55 use Bio
::Chado
::Schema
;
56 use Spreadsheet
::ParseExcel
;
57 use Spreadsheet
::ParseXLSX
;
58 use CXGN
::DB
::InsertDBH
;
59 use CXGN
::DB
::Connection
;
60 use CXGN
::Fieldbook
::TraitProps
;
63 our ($opt_H, $opt_D, $opt_I, $opt_o, $opt_w, $opt_t);
64 getopts
('H:D:I:o:wt');
68 print STDERR
"A script to load trait properties\nUsage: load_trait_props.pl -D [database name] -H [database host, e.g., localhost] -I [input file] -o [ontology namespace, e.g., CO] -w\n\t-w\toverwrite existing trait properties if they exist (optional)\n\t-t\ttest run. roll back at the end\n";
72 if (!$opt_D || !$opt_H || !$opt_I || !$opt_o) {
74 die("Exiting: options missing\n");
77 # Match a dot, extension .xls / .xlsx
78 my ($extension) = $opt_I =~ /(\.[^.]+)$/;
81 if ($extension eq '.xlsx') {
82 $parser = Spreadsheet
::ParseXLSX
->new();
85 $parser = Spreadsheet
::ParseExcel
->new();
88 #try to open the excel file and report any errors
89 my $excel_obj = $parser->parse($opt_I);
92 die "Input file error: ".$parser->error()."\n";
95 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
96 my ( $row_min, $row_max ) = $worksheet->row_range();
97 my ( $col_min, $col_max ) = $worksheet->col_range();
99 if (($col_max - $col_min) < 1 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of phenotypes
100 die "Input file error: spreadsheet is missing header\n";
105 if ($worksheet->get_cell(0,0)) {
106 $trait_name_head = $worksheet->get_cell(0,0)->value();
109 if (!$trait_name_head || $trait_name_head ne 'trait_name') {
110 die "Input file error: no \"trait_name\" in header\n";
113 my @trait_property_names = qw(
122 #check header for property names
123 for (my $column_number = 1; $column_number <= scalar @trait_property_names; $column_number++) {
124 my $property_name = $trait_property_names[$column_number-1];
125 if ( !($worksheet->get_cell(0,$column_number)) || !($worksheet->get_cell(0,$column_number)->value() eq $property_name) ) {
126 die "Input file error: no \"$property_name\" in header\n";
130 my @trait_props_data;
133 for my $row ( 1 .. $row_max ) {
136 my $current_row = $row+1;
139 if ($worksheet->get_cell($row,0)) {
140 $trait_name = $worksheet->get_cell($row,0)->value();
141 $trait_props{'trait_name'}=$trait_name;
143 next; #skip blank lines
147 foreach my $property_name (@trait_property_names) {
148 if ($worksheet->get_cell($row,$prop_column)) {
149 my $value = $worksheet->get_cell($row,$prop_column)->value();
150 if (defined($value) && ($value ne '')) {
151 $trait_props{$property_name}=$worksheet->get_cell($row,$prop_column)->value();
157 push @trait_props_data, \
%trait_props;
161 my $dbh = CXGN
::DB
::InsertDBH
165 dbargs
=> {AutoCommit
=> 1,
169 my $overwrite_existing_props = 0;
172 $overwrite_existing_props = 1;
181 my $chado_schema = Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
183 my $db_name = $opt_o;
185 my $trait_props = CXGN
::Fieldbook
::TraitProps
->new({ chado_schema
=> $chado_schema, db_name
=> $db_name, trait_names_and_props
=> \
@trait_props_data, overwrite
=> $overwrite_existing_props, is_test_run
=> $is_test_run});
187 print STDERR
"Validating data...\t";
188 my $validate=$trait_props->validate();
191 die("input data is not valid\n");
193 print STDERR
"input data is valid\n";
196 print STDERR
"Storing data...\t\t";
197 my $store = $trait_props->store();
201 die("\n\nerror storing data\n");
204 print STDERR
"successfully stored data\n";