Merge pull request #5205 from solgenomics/topic/generic_trial_upload
[sgn.git] / lib / CXGN / Pedigree / ParseUpload / Plugin / ValidateExistingProgeniesExcel.pm
blob6f001edca1deaed9eb9b2c6500ea39cbc2d97caf
1 package CXGN::Pedigree::ParseUpload::Plugin::ValidateExistingProgeniesExcel;
3 use Moose::Role;
4 use Spreadsheet::ParseExcel;
5 use Spreadsheet::ParseXLSX;
6 use CXGN::Stock::StockLookup;
7 use SGN::Model::Cvterm;
8 use Data::Dumper;
9 use CXGN::List::Validate;
10 use CXGN::Stock::RelatedStocks;
12 sub _validate_with_plugin {
13 my $self = shift;
14 my $filename = $self->get_filename();
15 my $schema = $self->get_chado_schema();
16 my @error_messages;
17 my @existing_pedigrees;
18 my %errors;
20 # Match a dot, extension .xls / .xlsx
21 my ($extension) = $filename =~ /(\.[^.]+)$/;
22 my $parser;
24 if ($extension eq '.xlsx') {
25 $parser = Spreadsheet::ParseXLSX->new();
27 else {
28 $parser = Spreadsheet::ParseExcel->new();
31 my $excel_obj;
32 my $worksheet;
34 #try to open the excel file and report any errors
35 $excel_obj = $parser->parse($filename);
36 if (!$excel_obj){
37 push @error_messages, $parser->error();
38 $errors{'error_messages'} = \@error_messages;
39 $self->_set_parse_errors(\%errors);
40 return;
43 $worksheet = ($excel_obj->worksheets())[0]; #support only one worksheet
44 if (!$worksheet){
45 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
46 $errors{'error_messages'} = \@error_messages;
47 $self->_set_parse_errors(\%errors);
48 return;
51 my ($row_min, $row_max) = $worksheet->row_range();
52 my ($col_min, $col_max) = $worksheet->col_range();
53 if (($col_max - $col_min) < 1 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of progeny
54 push @error_messages, "Spreadsheet is missing header or no progeny data";
55 $errors{'error_messages'} = \@error_messages;
56 $self->_set_parse_errors(\%errors);
57 return;
60 #get column headers
61 my $cross_name_head;
62 my $progeny_name_head;
64 if ($worksheet->get_cell(0,0)) {
65 $cross_name_head = $worksheet->get_cell(0,0)->value();
66 $cross_name_head =~ s/^\s+|\s+$//g;
68 if ($worksheet->get_cell(0,1)) {
69 $progeny_name_head = $worksheet->get_cell(0,1)->value();
70 $progeny_name_head =~ s/^\s+|\s+$//g;
73 if (!$cross_name_head || $cross_name_head ne 'cross_unique_id' ) {
74 push @error_messages, "Cell A1: cross_unique_id is missing from the header";
76 if (!$progeny_name_head || $progeny_name_head ne 'progeny_name') {
77 push @error_messages, "Cell B1: progeny_name is missing from the header";
80 my %seen_cross_names;
81 my %seen_progeny_names;
83 for my $row (1 .. $row_max){
84 my $row_name = $row+1;
85 my $cross_name;
86 my $progeny_name;
88 if ($worksheet->get_cell($row,0)) {
89 $cross_name = $worksheet->get_cell($row,0)->value();
91 if ($worksheet->get_cell($row,1)) {
92 $progeny_name = $worksheet->get_cell($row,1)->value();
95 if (!$cross_name || $cross_name eq '') {
96 push @error_messages, "Cell A$row_name: cross unique id missing";
97 } else {
98 $cross_name =~ s/^\s+|\s+$//g;
99 $seen_cross_names{$cross_name}++;
102 if (!$progeny_name || $progeny_name eq '') {
103 push @error_messages, "Cell B$row_name: progeny name missing";
104 } else {
105 $progeny_name =~ s/^\s+|\s+$//g;
106 $seen_progeny_names{$progeny_name}++;
110 my @crosses = keys %seen_cross_names;
111 my $cross_validator = CXGN::List::Validate->new();
112 my @crosses_missing = @{$cross_validator->validate($schema,'crosses',\@crosses)->{'missing'}};
114 if (scalar(@crosses_missing) > 0){
115 push @error_messages, "The following cross unique ids are not in the database as uniquenames or synonyms: ".join(',',@crosses_missing);
118 my @progenies = keys %seen_progeny_names;
119 my $progeny_validator = CXGN::List::Validate->new();
120 my @progenies_missing = @{$progeny_validator->validate($schema,'uniquenames',\@progenies)->{'missing'}};
122 if (scalar(@progenies_missing) > 0) {
123 push @error_messages, "The following progeny names are not in the database, or are not in the database as uniquenames: ".join(',',@progenies_missing);
126 if (scalar(@error_messages) > 0) {
127 $errors{'error_messages'} = \@error_messages;
128 $self->_set_parse_errors(\%errors);
129 return;
132 #check if progeny is already associated with cross_unique_id
133 foreach my $progeny_name(@progenies) {
134 my $cross_progeny_linkage = CXGN::Stock::RelatedStocks->get_cross_of_progeny($progeny_name, $schema);
135 my @previous_cross = @$cross_progeny_linkage;
136 if (scalar(@previous_cross) > 0) {
137 push @error_messages, "The following progeny name is already associated with a cross unique id: progeny name ".$progeny_name;
138 $errors{'existing_another_cross_linkage'} = $progeny_name;
142 #check if progeny already has pedigree
143 my %progenies_hash;
144 my @progeny_stock_ids;
145 my %return;
146 foreach my $progeny_name(@progenies) {
147 my $stock_lookup = CXGN::Stock::StockLookup->new(schema => $schema);
148 $stock_lookup->set_stock_name($progeny_name);
149 my $progeny_stock = $stock_lookup->get_stock_exact();
150 my $progeny_stock_id = $progeny_stock->stock_id();
151 push @progeny_stock_ids, $progeny_stock_id;
152 $progenies_hash{$progeny_stock_id} = $progeny_name;
154 my $female_parent_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'female_parent', 'stock_relationship')->cvterm_id;;
155 my $male_parent_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'male_parent', 'stock_relationship')->cvterm_id;;
157 my $progeny_female_parent_search = $schema->resultset('Stock::StockRelationship')->search({
158 type_id => $female_parent_cvterm_id,
159 object_id => { '-in'=>\@progeny_stock_ids },
161 my %progeny_with_female_parent_already;
162 while (my $r=$progeny_female_parent_search->next){
163 $progeny_with_female_parent_already{$r->object_id} = [$r->subject_id, $r->value];
165 my $progeny_male_parent_search = $schema->resultset('Stock::StockRelationship')->search({
166 type_id => $male_parent_cvterm_id,
167 object_id => { '-in'=>\@progeny_stock_ids },
169 my %progeny_with_male_parent_already;
170 while (my $r=$progeny_male_parent_search->next){
171 $progeny_with_male_parent_already{$r->object_id} = $r->subject_id;
174 foreach (@progeny_stock_ids){
175 if (exists($progeny_with_female_parent_already{$_})){
176 push @existing_pedigrees, $progenies_hash{$_}." already has female parent stockID ".$progeny_with_female_parent_already{$_}->[0]." saved with cross type ".$progeny_with_female_parent_already{$_}->[1];
178 if (exists($progeny_with_male_parent_already{$_})){
179 push @existing_pedigrees, $progenies_hash{$_}." already has male parent stockID ".$progeny_with_male_parent_already{$_};
183 #store any errors found in the parsed file to parse_errors accessor
184 $errors{'error_messages'} = \@error_messages;
185 $errors{'existing_pedigrees'} = \@existing_pedigrees;
186 $self->_set_parse_errors(\%errors);
187 return;