1 package CXGN
::Trial
::ParseUpload
::Plugin
::TrialExcelFormat
;
4 use Spreadsheet
::ParseExcel
;
5 use CXGN
::Stock
::StockLookup
;
6 use SGN
::Model
::Cvterm
;
8 use CXGN
::List
::Validate
;
10 sub _validate_with_plugin
{
11 print STDERR
"Check 3.1.1 ".localtime();
13 my $filename = $self->get_filename();
14 my $schema = $self->get_chado_schema();
17 my %missing_accessions;
18 my %supported_trial_types;
19 my $parser = Spreadsheet
::ParseExcel
->new();
23 my %seen_accession_names;
24 my %seen_seedlot_names;
26 #currently supported trial types
27 $supported_trial_types{'biparental'} = 1; #both parents required
28 $supported_trial_types{'self'} = 1; #only female parent required
29 $supported_trial_types{'open'} = 1; #only female parent required
31 #try to open the excel file and report any errors
32 $excel_obj = $parser->parse($filename);
34 push @error_messages, $parser->error();
35 $errors{'error_messages'} = \
@error_messages;
36 $self->_set_parse_errors(\
%errors);
40 print STDERR
"Check 3.1.2 ".localtime();
42 $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
44 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
45 $errors{'error_messages'} = \
@error_messages;
46 $self->_set_parse_errors(\
%errors);
49 my ( $row_min, $row_max ) = $worksheet->row_range();
50 my ( $col_min, $col_max ) = $worksheet->col_range();
51 if (($col_max - $col_min) < 2 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of plot data
52 push @error_messages, "Spreadsheet is missing header or contains no rows";
53 $errors{'error_messages'} = \
@error_messages;
54 $self->_set_parse_errors(\
%errors);
60 my $accession_name_head;
61 my $seedlot_name_head;
62 my $num_seed_per_plot_head;
64 my $block_number_head;
65 my $is_a_control_head;
67 my $range_number_head;
71 if ($worksheet->get_cell(0,0)) {
72 $plot_name_head = $worksheet->get_cell(0,0)->value();
74 if ($worksheet->get_cell(0,1)) {
75 $accession_name_head = $worksheet->get_cell(0,1)->value();
77 if ($worksheet->get_cell(0,2)) {
78 $plot_number_head = $worksheet->get_cell(0,2)->value();
80 if ($worksheet->get_cell(0,3)) {
81 $block_number_head = $worksheet->get_cell(0,3)->value();
83 if ($worksheet->get_cell(0,4)) {
84 $is_a_control_head = $worksheet->get_cell(0,4)->value();
86 if ($worksheet->get_cell(0,5)) {
87 $rep_number_head = $worksheet->get_cell(0,5)->value();
89 if ($worksheet->get_cell(0,6)) {
90 $range_number_head = $worksheet->get_cell(0,6)->value();
92 if ($worksheet->get_cell(0,7)) {
93 $row_number_head = $worksheet->get_cell(0,7)->value();
95 if ($worksheet->get_cell(0,8)) {
96 $col_number_head = $worksheet->get_cell(0,8)->value();
98 if ($worksheet->get_cell(0,9)) {
99 $seedlot_name_head = $worksheet->get_cell(0,9)->value();
101 if ($worksheet->get_cell(0,10)) {
102 $num_seed_per_plot_head = $worksheet->get_cell(0,10)->value();
106 for (11 .. $col_max){
107 if ($worksheet->get_cell(0,$_)){
108 push @treatment_names, $worksheet->get_cell(0,$_)->value();
112 if (!$plot_name_head || $plot_name_head ne 'plot_name' ) {
113 push @error_messages, "Cell A1: plot_name is missing from the header";
115 if (!$accession_name_head || $accession_name_head ne 'accession_name') {
116 push @error_messages, "Cell B1: accession_name is missing from the header";
118 if (!$plot_number_head || $plot_number_head ne 'plot_number') {
119 push @error_messages, "Cell C1: plot_number is missing from the header";
121 if (!$block_number_head || $block_number_head ne 'block_number') {
122 push @error_messages, "Cell D1: block_number is missing from the header";
124 if (!$is_a_control_head || $is_a_control_head ne 'is_a_control') {
125 push @error_messages, "Cell E1: is_a_control is missing from the header. (Header is required, but values are optional)";
127 if (!$rep_number_head || $rep_number_head ne 'rep_number') {
128 push @error_messages, "Cell F1: rep_number is missing from the header. (Header is required, but values are optional)";
130 if (!$range_number_head || $range_number_head ne 'range_number') {
131 push @error_messages, "Cell G1: range_number is missing from the header. (Header is required, but values are optional)";
133 if (!$row_number_head || $row_number_head ne 'row_number') {
134 push @error_messages, "Cell H1: row_number is missing from the header. (Header is required, but values are optional)";
136 if (!$col_number_head || $col_number_head ne 'col_number') {
137 push @error_messages, "Cell I1: col_number is missing from the header. (Header is required, but values are optional)";
139 if (!$seedlot_name_head || $seedlot_name_head ne 'seedlot_name') {
140 push @error_messages, "Cell J1: seedlot_name is missing from the header. (Header is required, but values are optional)";
142 if (!$num_seed_per_plot_head || $num_seed_per_plot_head ne 'num_seed_per_plot') {
143 push @error_messages, "Cell K1: num_seed_per_plot is missing from the header. (Header is required, but values are optional)";
146 for my $row ( 1 .. $row_max ) {
147 #print STDERR "Check 01 ".localtime();
148 my $row_name = $row+1;
152 my $num_seed_per_plot = 0;
161 if ($worksheet->get_cell($row,0)) {
162 $plot_name = $worksheet->get_cell($row,0)->value();
164 if ($worksheet->get_cell($row,1)) {
165 $accession_name = $worksheet->get_cell($row,1)->value();
167 if ($worksheet->get_cell($row,2)) {
168 $plot_number = $worksheet->get_cell($row,2)->value();
170 if ($worksheet->get_cell($row,3)) {
171 $block_number = $worksheet->get_cell($row,3)->value();
173 if ($worksheet->get_cell($row,4)) {
174 $is_a_control = $worksheet->get_cell($row,4)->value();
176 if ($worksheet->get_cell($row,5)) {
177 $rep_number = $worksheet->get_cell($row,5)->value();
179 if ($worksheet->get_cell($row,6)) {
180 $range_number = $worksheet->get_cell($row,6)->value();
182 if ($worksheet->get_cell($row, 7)) {
183 $row_number = $worksheet->get_cell($row, 7)->value();
185 if ($worksheet->get_cell($row, 8)) {
186 $col_number = $worksheet->get_cell($row, 8)->value();
188 if ($worksheet->get_cell($row,9)) {
189 $seedlot_name = $worksheet->get_cell($row,9)->value();
191 if ($worksheet->get_cell($row,10)) {
192 $num_seed_per_plot = $worksheet->get_cell($row,10)->value();
196 if (!$plot_name && !$accession_name && !$plot_number && !$block_number) {
200 #print STDERR "Check 02 ".localtime();
202 #plot_name must not be blank
203 if (!$plot_name || $plot_name eq '' ) {
204 push @error_messages, "Cell A$row_name: plot name missing.";
206 elsif ($plot_name =~ /\s/ || $plot_name =~ /\// || $plot_name =~ /\\/ ) {
207 push @error_messages, "Cell A$row_name: plot name must not contain spaces or slashes.";
210 #file must not contain duplicate plot names
211 if ($seen_plot_names{$plot_name}) {
212 push @error_messages, "Cell A$row_name: duplicate plot name at cell A".$seen_plot_names{$plot_name}.": $plot_name";
214 $seen_plot_names{$plot_name}=$row_name;
217 #print STDERR "Check 03 ".localtime();
219 #accession name must not be blank
220 if (!$accession_name || $accession_name eq '') {
221 push @error_messages, "Cell B$row_name: accession name missing";
223 #accession name must exist in the database
224 $seen_accession_names{$accession_name}++;
227 #print STDERR "Check 04 ".localtime();
229 #plot number must not be blank
230 if (!$plot_number || $plot_number eq '') {
231 push @error_messages, "Cell C$row_name: plot number missing";
233 #plot number must be a positive integer
234 if (!($plot_number =~ /^\d+?$/)) {
235 push @error_messages, "Cell C$row_name: plot number is not a positive integer: $plot_number";
237 #block number must not be blank
238 if (!$block_number || $block_number eq '') {
239 push @error_messages, "Cell D$row_name: block number missing";
241 #block number must be a positive integer
242 if (!($block_number =~ /^\d+?$/)) {
243 push @error_messages, "Cell D$row_name: block number is not a positive integer: $block_number";
246 #is_a_control must be either yes, no 1, 0, or blank
247 if (!($is_a_control eq "yes" || $is_a_control eq "no" || $is_a_control eq "1" ||$is_a_control eq "0" || $is_a_control eq '')) {
248 push @error_messages, "Cell E$row_name: is_a_control is not either yes, no 1, 0, or blank: $is_a_control";
251 if ($rep_number && !($rep_number =~ /^\d+?$/)){
252 push @error_messages, "Cell F$row_name: rep_number must be a positive integer: $rep_number";
254 if ($range_number && !($range_number =~ /^\d+?$/)){
255 push @error_messages, "Cell G$row_name: range_number must be a positive integer: $range_number";
257 if ($row_number && !($row_number =~ /^\d+?$/)){
258 push @error_messages, "Cell H$row_name: row_number must be a positive integer: $row_number";
260 if ($col_number && !($col_number =~ /^\d+?$/)){
261 push @error_messages, "Cell I$row_name: col_number must be a positive integer: $col_number";
265 $seen_seedlot_names{$seedlot_name}++;
267 if (defined($num_seed_per_plot) && !($num_seed_per_plot =~ /^\d+?$/)){
268 push @error_messages, "Cell K$row_name: num_seed_per_plot must be a positive integer: $num_seed_per_plot";
271 my $treatment_col = 10;
272 foreach my $treatment_name (@treatment_names){
273 if($worksheet->get_cell($row,$treatment_col)){
274 my $apply_treatment = $worksheet->get_cell($row,$treatment_col);
275 if (defined($apply_treatment) && $apply_treatment ne '1'){
276 push @error_messages, "Treatment value in row $row_name should be either 1 or empty";
284 my @accessions = keys %seen_accession_names;
285 my $accession_validator = CXGN
::List
::Validate
->new();
286 my @accessions_missing = @
{$accession_validator->validate($schema,'accessions',\
@accessions)->{'missing'}};
288 if (scalar(@accessions_missing) > 0) {
289 $errors{'missing_accessions'} = \
@accessions_missing;
290 push @error_messages, "The following accessions are not in the database as uniquenames or synonyms: ".join(',',@accessions_missing);
293 my @seedlot_names = keys %seen_seedlot_names;
294 if (scalar(@seedlot_names)>0){
295 my $seedlot_validator = CXGN
::List
::Validate
->new();
296 my @seedlots_missing = @
{$seedlot_validator->validate($schema,'seedlots',\
@seedlot_names)->{'missing'}};
298 if (scalar(@seedlots_missing) > 0) {
299 $errors{'missing_seedlots'} = \
@seedlots_missing;
300 push @error_messages, "The following seedlots are not in the database: ".join(',',@seedlots_missing);
304 my @plots = keys %seen_plot_names;
305 my $rs = $schema->resultset("Stock::Stock")->search({
306 'is_obsolete' => { '!=' => 't' },
307 'uniquename' => { -in => \
@plots }
309 while (my $r=$rs->next){
310 push @error_messages, "Cell A".$seen_plot_names{$r->uniquename}.": plot name already exists: ".$r->uniquename;
313 #store any errors found in the parsed file to parse_errors accessor
314 if (scalar(@error_messages) >= 1) {
315 $errors{'error_messages'} = \
@error_messages;
316 $self->_set_parse_errors(\
%errors);
320 print STDERR
"Check 3.1.3 ".localtime();
322 return 1; #returns true if validation is passed
327 sub _parse_with_plugin
{
329 my $filename = $self->get_filename();
330 my $schema = $self->get_chado_schema();
331 my $parser = Spreadsheet
::ParseExcel
->new();
336 $excel_obj = $parser->parse($filename);
341 $worksheet = ( $excel_obj->worksheets() )[0];
342 my ( $row_min, $row_max ) = $worksheet->row_range();
343 my ( $col_min, $col_max ) = $worksheet->col_range();
346 for (10 .. $col_max){
347 if ($worksheet->get_cell(0,$_)){
348 push @treatment_names, $worksheet->get_cell(0,$_)->value();
352 for my $row ( 1 .. $row_max ) {
363 my $num_seed_per_plot = 0;
365 if ($worksheet->get_cell($row,0)) {
366 $plot_name = $worksheet->get_cell($row,0)->value();
368 if ($worksheet->get_cell($row,1)) {
369 $accession_name = $worksheet->get_cell($row,1)->value();
371 if ($worksheet->get_cell($row,2)) {
372 $plot_number = $worksheet->get_cell($row,2)->value();
374 if ($worksheet->get_cell($row,3)) {
375 $block_number = $worksheet->get_cell($row,3)->value();
377 if ($worksheet->get_cell($row,4)) {
378 $is_a_control = $worksheet->get_cell($row,4)->value();
380 if ($worksheet->get_cell($row,5)) {
381 $rep_number = $worksheet->get_cell($row,5)->value();
383 if ($worksheet->get_cell($row,6)) {
384 $range_number = $worksheet->get_cell($row,6)->value();
386 if ($worksheet->get_cell($row,7)) {
387 $row_number = $worksheet->get_cell($row, 7)->value();
389 if ($worksheet->get_cell($row,8)) {
390 $col_number = $worksheet->get_cell($row, 8)->value();
392 if ($worksheet->get_cell($row,9)) {
393 $seedlot_name = $worksheet->get_cell($row, 9)->value();
395 if ($worksheet->get_cell($row,10)) {
396 $num_seed_per_plot = $worksheet->get_cell($row, 10)->value();
400 if (!$plot_name && !$accession_name && !$plot_number && !$block_number) {
404 my $treatment_col = 10;
405 foreach my $treatment_name (@treatment_names){
406 if($worksheet->get_cell($row,$treatment_col)){
407 push @
{$design{treatments
}->{$treatment_name}}, $plot_name;
413 $design{$key}->{plot_name
} = $plot_name;
414 $design{$key}->{stock_name
} = $accession_name;
415 $design{$key}->{plot_number
} = $plot_number;
416 $design{$key}->{block_number
} = $block_number;
418 $design{$key}->{is_a_control
} = 1;
420 $design{$key}->{is_a_control
} = 0;
423 $design{$key}->{rep_number
} = $rep_number;
426 $design{$key}->{range_number
} = $range_number;
429 $design{$key}->{row_number
} = $row_number;
432 $design{$key}->{col_number
} = $col_number;
435 $design{$key}->{seedlot_name
} = $seedlot_name;
436 $design{$key}->{num_seed_per_plot
} = $num_seed_per_plot;
440 #print STDERR Dumper \%design;
441 $self->_set_parsed_data(\
%design);