Merge pull request #2383 from solgenomics/dauglyon-patch-1
[sgn.git] / lib / CXGN / Pedigree / ParseUpload / Plugin / FamilyNameExcel.pm
blob5f32ace36e08d93856c80ea62324c523fff89204
1 package CXGN::Pedigree::ParseUpload::Plugin::FamilyNameExcel;
3 use Moose::Role;
4 use Spreadsheet::ParseExcel;
5 use CXGN::Stock::StockLookup;
6 use SGN::Model::Cvterm;
7 use Data::Dumper;
8 use CXGN::List::Validate;
10 sub _validate_with_plugin {
11 my $self = shift;
12 my $filename = $self->get_filename();
13 my $schema = $self->get_chado_schema();
14 my @error_messages;
15 my %errors;
16 my $parser = Spreadsheet::ParseExcel->new();
17 my $excel_obj;
18 my $worksheet;
20 #try to open the excel file and report any errors
21 $excel_obj = $parser->parse($filename);
22 if (!$excel_obj){
23 push @error_messages, $parser->error();
24 $errors{'error_messages'} = \@error_messages;
25 $self->_set_parse_errors(\%errors);
26 return;
29 $worksheet = ($excel_obj->worksheets())[0]; #support only one worksheet
30 if (!$worksheet){
31 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
32 $errors{'error_messages'} = \@error_messages;
33 $self->_set_parse_errors(\%errors);
34 return;
37 my ($row_min, $row_max) = $worksheet->row_range();
38 my ($col_min, $col_max) = $worksheet->col_range();
39 if (($col_max - $col_min) < 1 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of family name
40 push @error_messages, "Spreadsheet is missing header or no family name data";
41 $errors{'error_messages'} = \@error_messages;
42 $self->_set_parse_errors(\%errors);
43 return;
46 #get column headers
47 my $cross_name_head;
49 if ($worksheet->get_cell(0,0)) {
50 $cross_name_head = $worksheet->get_cell(0,0)->value();
53 if (!$cross_name_head || $cross_name_head ne 'cross_name' ) {
54 push @error_messages, "Cell A1: cross_name is missing from the header";
57 my %seen_cross_names;
59 for my $row (1 .. $row_max){
60 my $row_name = $row+1;
61 my $cross_name;
62 my $family_name;
64 if ($worksheet->get_cell($row,0)) {
65 $cross_name = $worksheet->get_cell($row,0)->value();
67 if ($worksheet->get_cell($row,1)) {
68 $family_name = $worksheet->get_cell($row,1)->value();
71 if (!$cross_name || $cross_name eq '') {
72 push @error_messages, "Cell A$row_name: cross name missing";
73 } else {
74 $seen_cross_names{$cross_name}++;
77 if (!$family_name || $family_name eq '') {
78 push @error_messages, "Cell B$row_name: family name missing";
83 my @crosses = keys %seen_cross_names;
84 my $cross_validator = CXGN::List::Validate->new();
85 my @crosses_missing = @{$cross_validator->validate($schema,'crosses',\@crosses)->{'missing'}};
87 if (scalar(@crosses_missing) > 0){
88 push @error_messages, "The following crosses are not in the database as uniquenames or synonyms: ".join(',',@crosses_missing);
89 $errors{'missing_crosses'} = \@crosses_missing;
92 #store any errors found in the parsed file to parse_errors accessor
93 if (scalar(@error_messages) >= 1) {
94 $errors{'error_messages'} = \@error_messages;
95 $self->_set_parse_errors(\%errors);
96 return;
99 return 1; #returns true if validation is passed
104 sub _parse_with_plugin {
105 my $self = shift;
106 my $filename = $self->get_filename();
107 my $schema = $self->get_chado_schema();
108 my $parser = Spreadsheet::ParseExcel->new();
109 my $excel_obj;
110 my $worksheet;
112 $excel_obj = $parser->parse($filename);
113 if (!$excel_obj){
114 return;
117 $worksheet = ($excel_obj->worksheets())[0];
118 my ($row_min, $row_max) = $worksheet->row_range();
119 my ($col_min, $col_max) = $worksheet->col_range();
121 my %cross_family_name;
123 for my $row (1 .. $row_max){
124 my $cross_name;
125 my $family_name;
127 if ($worksheet->get_cell($row,0)){
128 $cross_name = $worksheet->get_cell($row,0)->value();
131 if ($worksheet->get_cell($row,1)){
132 $family_name = $worksheet->get_cell($row,1)->value();
135 #skip blank lines or lines with no cross name
136 if (!$cross_name) {
137 next;
140 $cross_family_name{$cross_name} = $family_name;
144 $self->_set_parsed_data(\%cross_family_name);
146 return 1;