1 package CXGN
::Pedigree
::ParseUpload
::Plugin
::ProgeniesExcel
;
4 use Spreadsheet
::ParseExcel
;
5 use Spreadsheet
::ParseXLSX
;
6 use CXGN
::Stock
::StockLookup
;
7 use SGN
::Model
::Cvterm
;
9 use CXGN
::List
::Validate
;
11 sub _validate_with_plugin
{
13 my $filename = $self->get_filename();
14 my $schema = $self->get_chado_schema();
18 # Match a dot, extension .xls / .xlsx
19 my ($extension) = $filename =~ /(\.[^.]+)$/;
22 if ($extension eq '.xlsx') {
23 $parser = Spreadsheet
::ParseXLSX
->new();
26 $parser = Spreadsheet
::ParseExcel
->new();
32 #try to open the excel file and report any errors
33 $excel_obj = $parser->parse($filename);
35 push @error_messages, $parser->error();
36 $errors{'error_messages'} = \
@error_messages;
37 $self->_set_parse_errors(\
%errors);
41 $worksheet = ($excel_obj->worksheets())[0]; #support only one worksheet
43 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
44 $errors{'error_messages'} = \
@error_messages;
45 $self->_set_parse_errors(\
%errors);
49 my ($row_min, $row_max) = $worksheet->row_range();
50 my ($col_min, $col_max) = $worksheet->col_range();
51 if (($col_max - $col_min) < 1 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of progeny
52 push @error_messages, "Spreadsheet is missing header or no progeny data";
53 $errors{'error_messages'} = \
@error_messages;
54 $self->_set_parse_errors(\
%errors);
60 my $progeny_name_head;
62 if ($worksheet->get_cell(0,0)) {
63 $cross_name_head = $worksheet->get_cell(0,0)->value();
64 $cross_name_head =~ s/^\s+|\s+$//g;
66 if ($worksheet->get_cell(0,1)) {
67 $progeny_name_head = $worksheet->get_cell(0,1)->value();
68 $progeny_name_head =~ s/^\s+|\s+$//g;
71 if (!$cross_name_head || $cross_name_head ne 'cross_unique_id' ) {
72 push @error_messages, "Cell A1: cross_unique_id is missing from the header";
74 if (!$progeny_name_head || $progeny_name_head ne 'progeny_name') {
75 push @error_messages, "Cell B1: progeny_name is missing from the header";
79 my %seen_progeny_names;
81 for my $row (1 .. $row_max){
82 my $row_name = $row+1;
86 if ($worksheet->get_cell($row,0)) {
87 $cross_name = $worksheet->get_cell($row,0)->value();
89 if ($worksheet->get_cell($row,1)) {
90 $progeny_name = $worksheet->get_cell($row,1)->value();
93 if (!$cross_name || $cross_name eq '') {
94 push @error_messages, "Cell A$row_name: cross unique id missing";
96 $cross_name =~ s/^\s+|\s+$//g;
97 $seen_cross_names{$cross_name}++;
100 if (!$progeny_name || $progeny_name eq '') {
101 push @error_messages, "Cell B$row_name: progeny name missing";
103 $progeny_name =~ s/^\s+|\s+$//g;
104 $seen_progeny_names{$progeny_name}++;
108 my @crosses = keys %seen_cross_names;
109 my $cross_validator = CXGN
::List
::Validate
->new();
110 my @crosses_missing = @
{$cross_validator->validate($schema,'crosses',\
@crosses)->{'missing'}};
112 if (scalar(@crosses_missing) > 0){
113 push @error_messages, "The following cross unique ids are not in the database as uniquenames or synonyms: ".join(',',@crosses_missing);
116 my @progenies = keys %seen_progeny_names;
117 my $rs = $schema->resultset("Stock::Stock")->search({
118 'is_obsolete' => { '!=' => 't' },
119 'uniquename' => { -in => \
@progenies }
121 while (my $r=$rs->next){
122 push @error_messages, "Progeny name already exists in database: ".$r->uniquename;
125 #store any errors found in the parsed file to parse_errors accessor
126 if (scalar(@error_messages) >= 1) {
127 $errors{'error_messages'} = \
@error_messages;
128 $self->_set_parse_errors(\
%errors);
132 return 1; #returns true if validation is passed
136 sub _parse_with_plugin
{
138 my $filename = $self->get_filename();
139 my $schema = $self->get_chado_schema();
141 # Match a dot, extension .xls / .xlsx
142 my ($extension) = $filename =~ /(\.[^.]+)$/;
145 if ($extension eq '.xlsx') {
146 $parser = Spreadsheet
::ParseXLSX
->new();
149 $parser = Spreadsheet
::ParseExcel
->new();
155 $excel_obj = $parser->parse($filename);
160 $worksheet = ($excel_obj->worksheets())[0];
161 my ($row_min, $row_max) = $worksheet->row_range();
162 my ($col_min, $col_max) = $worksheet->col_range();
164 my %cross_progenies_hash;
166 for my $row (1 .. $row_max){
170 if ($worksheet->get_cell($row,0)){
171 $cross_name = $worksheet->get_cell($row,0)->value();
172 $cross_name =~ s/^\s+|\s+$//g;
174 if ($worksheet->get_cell($row,1)){
175 $progeny_name = $worksheet->get_cell($row,1)->value();
176 $progeny_name =~ s/^\s+|\s+$//g;
178 #skip blank lines or lines with no name, type and parent
179 if (!$cross_name && !$progeny_name) {
183 push @
{$cross_progenies_hash{$cross_name}}, $progeny_name;
186 $self->_set_parsed_data(\
%cross_progenies_hash);