Merge pull request #5230 from solgenomics/topic/open_pollinated
[sgn.git] / lib / CXGN / Pedigree / ParseUpload / Plugin / CrossInfoExcel.pm
blobcfeb3d848d1270b1bd498d9f4f05eec75d955292
1 package CXGN::Pedigree::ParseUpload::Plugin::CrossInfoExcel;
3 use Moose::Role;
4 use Spreadsheet::ParseExcel;
5 use Spreadsheet::ParseXLSX;
6 use CXGN::Stock::StockLookup;
7 use SGN::Model::Cvterm;
8 use Data::Dumper;
9 use CXGN::List::Validate;
11 sub _validate_with_plugin {
12 my $self = shift;
13 my $filename = $self->get_filename();
14 my $schema = $self->get_chado_schema();
15 my $cross_properties = $self->get_cross_properties();
16 my @error_messages;
17 my %errors;
19 # Match a dot, extension .xls / .xlsx
20 my ($extension) = $filename =~ /(\.[^.]+)$/;
21 my $parser;
23 if ($extension eq '.xlsx') {
24 $parser = Spreadsheet::ParseXLSX->new();
26 else {
27 $parser = Spreadsheet::ParseExcel->new();
30 my $excel_obj;
31 my $worksheet;
33 #try to open the excel file and report any errors
34 $excel_obj = $parser->parse($filename);
35 if (!$excel_obj){
36 push @error_messages, $parser->error();
37 $errors{'error_messages'} = \@error_messages;
38 $self->_set_parse_errors(\%errors);
39 return;
42 $worksheet = ($excel_obj->worksheets())[0]; #support only one worksheet
43 if (!$worksheet){
44 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
45 $errors{'error_messages'} = \@error_messages;
46 $self->_set_parse_errors(\%errors);
47 return;
50 my ($row_min, $row_max) = $worksheet->row_range();
51 my ($col_min, $col_max) = $worksheet->col_range();
52 if (($col_max - $col_min) < 1 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of progeny
53 push @error_messages, "Spreadsheet is missing header or no cross info data";
54 $errors{'error_messages'} = \@error_messages;
55 $self->_set_parse_errors(\%errors);
56 return;
59 #get column headers
60 my $cross_name_head;
62 if ($worksheet->get_cell(0,0)) {
63 $cross_name_head = $worksheet->get_cell(0,0)->value();
64 $cross_name_head =~ s/^\s+|\s+$//g;
67 if (!$cross_name_head || $cross_name_head ne 'cross_unique_id' ) {
68 push @error_messages, "Cell A1: cross_unique_id is missing from the header";
71 my %valid_properties;
72 my @properties = @{$cross_properties};
73 foreach my $property(@properties){
74 $valid_properties{$property} = 1;
77 for my $column (1 .. $col_max){
78 my $header_string = $worksheet->get_cell(0,$column)->value();
79 $header_string =~ s/^\s+|\s+$//g;
81 if (!$valid_properties{$header_string}){
82 push @error_messages, "Invalid info type: $header_string";
86 my %seen_cross_names;
88 for my $row (1 .. $row_max){
89 my $row_name = $row+1;
90 my $cross_name;
92 if ($worksheet->get_cell($row,0)) {
93 $cross_name = $worksheet->get_cell($row,0)->value();
96 if (!$cross_name || $cross_name eq '') {
97 push @error_messages, "Cell A$row_name: cross unique id missing";
98 } elsif ($seen_cross_names{$cross_name}) {
99 push @error_messages, "Duplicate cross unique id at cell A$row_name".": $cross_name";
100 } else {
101 $cross_name =~ s/^\s+|\s+$//g;
102 $seen_cross_names{$cross_name}++;
105 for my $column (1 .. $col_max) {
106 if ($worksheet->get_cell($row,$column)) {
107 my $info_value = $worksheet->get_cell($row,$column)->value();
108 my $info_type = $worksheet->get_cell(0,$column)->value();
109 if ( ($info_type =~ m/days/ || $info_type =~ m/number/) && !($info_value =~ /^\d+?$/) ) {
110 push @error_messages, "Cell $info_type:$row_name: is not a positive integer: $info_value";
112 elsif ( $info_type =~ m/date/ && !($info_value =~ m/(\d{4})\/(\d{2})\/(\d{2})/) ) {
113 push @error_messages, "Cell $info_type:$row_name: is not a valid date: $info_value. Dates need to be of form YYYY/MM/DD";
119 my @crosses = keys %seen_cross_names;
120 my $cross_validator = CXGN::List::Validate->new();
121 my @crosses_missing = @{$cross_validator->validate($schema,'crosses',\@crosses)->{'missing'}};
123 if (scalar(@crosses_missing) > 0){
124 push @error_messages, "The following cross unique ids are not in the database as uniquenames or synonyms: ".join(',',@crosses_missing);
125 $errors{'missing_crosses'} = \@crosses_missing;
128 #store any errors found in the parsed file to parse_errors accessor
129 if (scalar(@error_messages) >= 1) {
130 $errors{'error_messages'} = \@error_messages;
131 $self->_set_parse_errors(\%errors);
132 return;
135 return 1; #returns true if validation is passed
139 sub _parse_with_plugin {
140 my $self = shift;
141 my $filename = $self->get_filename();
142 my $schema = $self->get_chado_schema();
144 # Match a dot, extension .xls / .xlsx
145 my ($extension) = $filename =~ /(\.[^.]+)$/;
146 my $parser;
148 if ($extension eq '.xlsx') {
149 $parser = Spreadsheet::ParseXLSX->new();
151 else {
152 $parser = Spreadsheet::ParseExcel->new();
155 my $excel_obj;
156 my $worksheet;
157 my %parsed_result;
159 $excel_obj = $parser->parse($filename);
160 if (!$excel_obj){
161 return;
164 $worksheet = ($excel_obj->worksheets())[0];
165 my ($row_min, $row_max) = $worksheet->row_range();
166 my ($col_min, $col_max) = $worksheet->col_range();
168 for my $row (1 .. $row_max){
169 my $cross_name;
171 if ($worksheet->get_cell($row,0)){
172 $cross_name = $worksheet->get_cell($row,0)->value();
173 $cross_name =~ s/^\s+|\s+$//g;
176 #skip blank lines or lines with no name, type and parent
177 if (!$cross_name) {
178 next;
181 for my $column ( 1 .. $col_max ) {
182 if ($worksheet->get_cell($row,$column)) {
183 my $info_header = $worksheet->get_cell(0,$column)->value();
184 $info_header =~ s/^\s+|\s+$//g;
185 $parsed_result{$cross_name}{$info_header} = $worksheet->get_cell($row,$column)->value();
189 print STDERR "PARSED RESULT =".Dumper(\%parsed_result)."\n";
191 $self->_set_parsed_data(\%parsed_result);
193 return 1;