Merge pull request #5230 from solgenomics/topic/open_pollinated
[sgn.git] / lib / CXGN / Pedigree / ParseUpload / Plugin / TargetNumbersExcel.pm
blobb25b6112ced96b77f61b41b7ccfe0788c56d3cec
1 package CXGN::Pedigree::ParseUpload::Plugin::TargetNumbersExcel;
3 use Moose::Role;
4 use Spreadsheet::ParseExcel;
5 use Spreadsheet::ParseXLSX;
6 use Data::Dumper;
7 use CXGN::List::Validate;
9 sub _validate_with_plugin {
10 my $self = shift;
11 my $filename = $self->get_filename();
12 my $schema = $self->get_chado_schema();
13 my @error_messages;
14 my %errors;
16 # Match a dot, extension .xls / .xlsx
17 my ($extension) = $filename =~ /(\.[^.]+)$/;
18 my $parser;
20 if ($extension eq '.xlsx') {
21 $parser = Spreadsheet::ParseXLSX->new();
23 else {
24 $parser = Spreadsheet::ParseExcel->new();
27 my $excel_obj;
28 my $worksheet;
30 #try to open the excel file and report any errors
31 $excel_obj = $parser->parse($filename);
32 if (!$excel_obj){
33 push @error_messages, $parser->error();
34 $errors{'error_messages'} = \@error_messages;
35 $self->_set_parse_errors(\%errors);
36 return;
39 $worksheet = ($excel_obj->worksheets())[0]; #support only one worksheet
40 if (!$worksheet){
41 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
42 $errors{'error_messages'} = \@error_messages;
43 $self->_set_parse_errors(\%errors);
44 return;
47 my ($row_min, $row_max) = $worksheet->row_range();
48 my ($col_min, $col_max) = $worksheet->col_range();
49 if (($col_max - $col_min) < 3 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of target number info
50 push @error_messages, "Spreadsheet is missing header or no target number data";
51 $errors{'error_messages'} = \@error_messages;
52 $self->_set_parse_errors(\%errors);
53 return;
56 #get column headers
57 my $female_accession_head;
58 my $male_accession_head;
59 my $seed_target_number_head;
60 my $progeny_target_number_head;
62 if ($worksheet->get_cell(0,0)) {
63 $female_accession_head = $worksheet->get_cell(0,0)->value();
64 $female_accession_head =~ s/^\s+|\s+$//g;
67 if ($worksheet->get_cell(0,1)) {
68 $male_accession_head = $worksheet->get_cell(0,1)->value();
69 $male_accession_head =~ s/^\s+|\s+$//g;
72 if ($worksheet->get_cell(0,2)) {
73 $seed_target_number_head = $worksheet->get_cell(0,2)->value();
74 $seed_target_number_head =~ s/^\s+|\s+$//g;
77 if ($worksheet->get_cell(0,3)) {
78 $progeny_target_number_head = $worksheet->get_cell(0,3)->value();
79 $progeny_target_number_head =~ s/^\s+|\s+$//g;
82 if (!$female_accession_head || $female_accession_head ne 'female_accession' ) {
83 push @error_messages, "Cell A1: female_accession is missing from the header";
86 if (!$male_accession_head || $male_accession_head ne 'male_accession' ) {
87 push @error_messages, "Cell A2: male_accession is missing from the header";
90 if (!$seed_target_number_head || $seed_target_number_head ne 'seed_target_number' ) {
91 push @error_messages, "Cell A3: seed_target_number is missing from the header";
94 if (!$progeny_target_number_head || $progeny_target_number_head ne 'progeny_target_number' ) {
95 push @error_messages, "Cell A4: progeny_target_number is missing from the header";
99 my %seen_female_accessions;
100 my %seen_male_accessions;
102 for my $row (1 .. $row_max){
103 my $row_name = $row+1;
104 my $female_accession;
105 my $male_accession;
106 my $seed_target_number;
107 my $progeny_target_number;
109 if ($worksheet->get_cell($row,0)) {
110 $female_accession = $worksheet->get_cell($row,0)->value();
112 if ($worksheet->get_cell($row,1)) {
113 $male_accession = $worksheet->get_cell($row,1)->value();
115 if ($worksheet->get_cell($row,2)) {
116 $seed_target_number = $worksheet->get_cell($row,2)->value();
118 if ($worksheet->get_cell($row,3)) {
119 $progeny_target_number = $worksheet->get_cell($row,3)->value();
122 if (!$female_accession || $female_accession eq '') {
123 push @error_messages, "Cell A$row_name: female accession missing";
124 } else {
125 $female_accession =~ s/^\s+|\s+$//g;
126 $seen_female_accessions{$female_accession}++;
129 if (!$male_accession || $male_accession eq '') {
130 push @error_messages, "Cell B$row_name: male accession missing";
131 } else {
132 $male_accession =~ s/^\s+|\s+$//g;
133 $seen_male_accessions{$male_accession}++;
136 if ((!$seed_target_number || $seed_target_number eq '') && (!$progeny_target_number || $progeny_target_number eq '')) {
137 push @error_messages, "Cell C/D$row_name: should have seed target number and/or progeny target number";
141 my @female_accessions = keys %seen_female_accessions;
142 my $female_accession_validator = CXGN::List::Validate->new();
143 my @female_accession_missing = @{$female_accession_validator->validate($schema,'uniquenames',\@female_accessions)->{'missing'}};
145 if (scalar(@female_accession_missing) > 0){
146 push @error_messages, "The following female accessions are not in the database as uniquenames : ".join(',',@female_accession_missing);
149 my @male_accessions = keys %seen_male_accessions;
150 my $male_accession_validator = CXGN::List::Validate->new();
151 my @male_accession_missing = @{$male_accession_validator->validate($schema,'uniquenames',\@male_accessions)->{'missing'}};
153 if (scalar(@male_accession_missing) > 0){
154 push @error_messages, "The following male accessions are not in the database as uniquenames : ".join(',',@male_accession_missing);
157 #store any errors found in the parsed file to parse_errors accessor
158 if (scalar(@error_messages) >= 1) {
159 $errors{'error_messages'} = \@error_messages;
160 $self->_set_parse_errors(\%errors);
161 return;
164 return 1; #returns true if validation is passed
169 sub _parse_with_plugin {
170 my $self = shift;
171 my $filename = $self->get_filename();
172 my $schema = $self->get_chado_schema();
174 # Match a dot, extension .xls / .xlsx
175 my ($extension) = $filename =~ /(\.[^.]+)$/;
176 my $parser;
178 if ($extension eq '.xlsx') {
179 $parser = Spreadsheet::ParseXLSX->new();
181 else {
182 $parser = Spreadsheet::ParseExcel->new();
185 my $excel_obj;
186 my $worksheet;
188 $excel_obj = $parser->parse($filename);
189 if (!$excel_obj){
190 return;
193 $worksheet = ($excel_obj->worksheets())[0];
194 my ($row_min, $row_max) = $worksheet->row_range();
195 my ($col_min, $col_max) = $worksheet->col_range();
197 my %target_number_info;
199 for my $row (1 .. $row_max){
200 my $female_accession;
201 my $male_accession;
202 my $seed_target_number;
203 my $progeny_target_number;
205 if ($worksheet->get_cell($row,0)){
206 $female_accession = $worksheet->get_cell($row,0)->value();
207 $female_accession =~ s/^\s+|\s+$//g;
210 if ($worksheet->get_cell($row,1)){
211 $male_accession = $worksheet->get_cell($row,1)->value();
212 $male_accession =~ s/^\s+|\s+$//g;
215 if ($worksheet->get_cell($row,2)){
216 $seed_target_number = $worksheet->get_cell($row,2)->value();
217 $seed_target_number =~ s/^\s+|\s+$//g;
219 if ($worksheet->get_cell($row,3)){
220 $progeny_target_number = $worksheet->get_cell($row,3)->value();
221 $progeny_target_number =~ s/^\s+|\s+$//g;
224 if ($seed_target_number) {
225 $target_number_info{$female_accession}{$male_accession}{'target_number_of_seeds'} = $seed_target_number;
227 if ($progeny_target_number) {
228 $target_number_info{$female_accession}{$male_accession}{'target_number_of_progenies'} = $progeny_target_number;
233 $self->_set_parsed_data(\%target_number_info);
235 return 1;