Merge pull request #5205 from solgenomics/topic/generic_trial_upload
[sgn.git] / lib / CXGN / Pedigree / ParseUpload / Plugin / ProgeniesExcel.pm
blobcd2ecf15b302b34f41642191a1e57333a8f2651a
1 package CXGN::Pedigree::ParseUpload::Plugin::ProgeniesExcel;
3 use Moose::Role;
4 use Spreadsheet::ParseExcel;
5 use Spreadsheet::ParseXLSX;
6 use CXGN::Stock::StockLookup;
7 use SGN::Model::Cvterm;
8 use Data::Dumper;
9 use CXGN::List::Validate;
11 sub _validate_with_plugin {
12 my $self = shift;
13 my $filename = $self->get_filename();
14 my $schema = $self->get_chado_schema();
15 my @error_messages;
16 my %errors;
18 # Match a dot, extension .xls / .xlsx
19 my ($extension) = $filename =~ /(\.[^.]+)$/;
20 my $parser;
22 if ($extension eq '.xlsx') {
23 $parser = Spreadsheet::ParseXLSX->new();
25 else {
26 $parser = Spreadsheet::ParseExcel->new();
29 my $excel_obj;
30 my $worksheet;
32 #try to open the excel file and report any errors
33 $excel_obj = $parser->parse($filename);
34 if (!$excel_obj){
35 push @error_messages, $parser->error();
36 $errors{'error_messages'} = \@error_messages;
37 $self->_set_parse_errors(\%errors);
38 return;
41 $worksheet = ($excel_obj->worksheets())[0]; #support only one worksheet
42 if (!$worksheet){
43 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
44 $errors{'error_messages'} = \@error_messages;
45 $self->_set_parse_errors(\%errors);
46 return;
49 my ($row_min, $row_max) = $worksheet->row_range();
50 my ($col_min, $col_max) = $worksheet->col_range();
51 if (($col_max - $col_min) < 1 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of progeny
52 push @error_messages, "Spreadsheet is missing header or no progeny data";
53 $errors{'error_messages'} = \@error_messages;
54 $self->_set_parse_errors(\%errors);
55 return;
58 #get column headers
59 my $cross_name_head;
60 my $progeny_name_head;
62 if ($worksheet->get_cell(0,0)) {
63 $cross_name_head = $worksheet->get_cell(0,0)->value();
64 $cross_name_head =~ s/^\s+|\s+$//g;
66 if ($worksheet->get_cell(0,1)) {
67 $progeny_name_head = $worksheet->get_cell(0,1)->value();
68 $progeny_name_head =~ s/^\s+|\s+$//g;
71 if (!$cross_name_head || $cross_name_head ne 'cross_unique_id' ) {
72 push @error_messages, "Cell A1: cross_unique_id is missing from the header";
74 if (!$progeny_name_head || $progeny_name_head ne 'progeny_name') {
75 push @error_messages, "Cell B1: progeny_name is missing from the header";
78 my %seen_cross_names;
79 my %seen_progeny_names;
81 for my $row (1 .. $row_max){
82 my $row_name = $row+1;
83 my $cross_name;
84 my $progeny_name;
86 if ($worksheet->get_cell($row,0)) {
87 $cross_name = $worksheet->get_cell($row,0)->value();
89 if ($worksheet->get_cell($row,1)) {
90 $progeny_name = $worksheet->get_cell($row,1)->value();
93 if (!$cross_name || $cross_name eq '') {
94 push @error_messages, "Cell A$row_name: cross unique id missing";
95 } else {
96 $cross_name =~ s/^\s+|\s+$//g;
97 $seen_cross_names{$cross_name}++;
100 if (!$progeny_name || $progeny_name eq '') {
101 push @error_messages, "Cell B$row_name: progeny name missing";
102 } else {
103 $progeny_name =~ s/^\s+|\s+$//g;
104 $seen_progeny_names{$progeny_name}++;
108 my @crosses = keys %seen_cross_names;
109 my $cross_validator = CXGN::List::Validate->new();
110 my @crosses_missing = @{$cross_validator->validate($schema,'crosses',\@crosses)->{'missing'}};
112 if (scalar(@crosses_missing) > 0){
113 push @error_messages, "The following cross unique ids are not in the database as uniquenames or synonyms: ".join(',',@crosses_missing);
116 my @progenies = keys %seen_progeny_names;
117 my $rs = $schema->resultset("Stock::Stock")->search({
118 'is_obsolete' => { '!=' => 't' },
119 'uniquename' => { -in => \@progenies }
121 while (my $r=$rs->next){
122 push @error_messages, "Progeny name already exists in database: ".$r->uniquename;
125 #store any errors found in the parsed file to parse_errors accessor
126 if (scalar(@error_messages) >= 1) {
127 $errors{'error_messages'} = \@error_messages;
128 $self->_set_parse_errors(\%errors);
129 return;
132 return 1; #returns true if validation is passed
136 sub _parse_with_plugin {
137 my $self = shift;
138 my $filename = $self->get_filename();
139 my $schema = $self->get_chado_schema();
141 # Match a dot, extension .xls / .xlsx
142 my ($extension) = $filename =~ /(\.[^.]+)$/;
143 my $parser;
145 if ($extension eq '.xlsx') {
146 $parser = Spreadsheet::ParseXLSX->new();
148 else {
149 $parser = Spreadsheet::ParseExcel->new();
152 my $excel_obj;
153 my $worksheet;
155 $excel_obj = $parser->parse($filename);
156 if (!$excel_obj){
157 return;
160 $worksheet = ($excel_obj->worksheets())[0];
161 my ($row_min, $row_max) = $worksheet->row_range();
162 my ($col_min, $col_max) = $worksheet->col_range();
164 my %cross_progenies_hash;
166 for my $row (1 .. $row_max){
167 my $cross_name;
168 my $progeny_name;
170 if ($worksheet->get_cell($row,0)){
171 $cross_name = $worksheet->get_cell($row,0)->value();
172 $cross_name =~ s/^\s+|\s+$//g;
174 if ($worksheet->get_cell($row,1)){
175 $progeny_name = $worksheet->get_cell($row,1)->value();
176 $progeny_name =~ s/^\s+|\s+$//g;
178 #skip blank lines or lines with no name, type and parent
179 if (!$cross_name && !$progeny_name) {
180 next;
183 push @{$cross_progenies_hash{$cross_name}}, $progeny_name;
186 $self->_set_parsed_data(\%cross_progenies_hash);
187 return 1;