Merge pull request #2383 from solgenomics/dauglyon-patch-1
[sgn.git] / lib / CXGN / Pedigree / ParseUpload / Plugin / ProgeniesExcel.pm
blobabd8c97535fe5751305a664d7752d07f83d33656
1 package CXGN::Pedigree::ParseUpload::Plugin::ProgeniesExcel;
3 use Moose::Role;
4 use Spreadsheet::ParseExcel;
5 use CXGN::Stock::StockLookup;
6 use SGN::Model::Cvterm;
7 use Data::Dumper;
8 use CXGN::List::Validate;
10 sub _validate_with_plugin {
11 my $self = shift;
12 my $filename = $self->get_filename();
13 my $schema = $self->get_chado_schema();
14 my @error_messages;
15 my %errors;
16 my $parser = Spreadsheet::ParseExcel->new();
17 my $excel_obj;
18 my $worksheet;
20 #try to open the excel file and report any errors
21 $excel_obj = $parser->parse($filename);
22 if (!$excel_obj){
23 push @error_messages, $parser->error();
24 $errors{'error_messages'} = \@error_messages;
25 $self->_set_parse_errors(\%errors);
26 return;
29 $worksheet = ($excel_obj->worksheets())[0]; #support only one worksheet
30 if (!$worksheet){
31 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
32 $errors{'error_messages'} = \@error_messages;
33 $self->_set_parse_errors(\%errors);
34 return;
37 my ($row_min, $row_max) = $worksheet->row_range();
38 my ($col_min, $col_max) = $worksheet->col_range();
39 if (($col_max - $col_min) < 1 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of progeny
40 push @error_messages, "Spreadsheet is missing header or no progeny data";
41 $errors{'error_messages'} = \@error_messages;
42 $self->_set_parse_errors(\%errors);
43 return;
46 #get column headers
47 my $cross_name_head;
48 my $progeny_name_head;
50 if ($worksheet->get_cell(0,0)) {
51 $cross_name_head = $worksheet->get_cell(0,0)->value();
53 if ($worksheet->get_cell(0,1)) {
54 $progeny_name_head = $worksheet->get_cell(0,1)->value();
57 if (!$cross_name_head || $cross_name_head ne 'cross_name' ) {
58 push @error_messages, "Cell A1: cross_name is missing from the header";
60 if (!$progeny_name_head || $progeny_name_head ne 'progeny_name') {
61 push @error_messages, "Cell B1: progeny_name is missing from the header";
64 my %seen_cross_names;
65 my %seen_progeny_names;
67 for my $row (1 .. $row_max){
68 my $row_name = $row+1;
69 my $cross_name;
70 my $progeny_name;
72 if ($worksheet->get_cell($row,0)) {
73 $cross_name = $worksheet->get_cell($row,0)->value();
75 if ($worksheet->get_cell($row,1)) {
76 $progeny_name = $worksheet->get_cell($row,1)->value();
79 if (!$cross_name || $cross_name eq '') {
80 push @error_messages, "Cell A$row_name: cross name missing";
81 } else {
82 $seen_cross_names{$cross_name}++;
85 if (!$progeny_name || $progeny_name eq '') {
86 push @error_messages, "Cell B$row_name: progeny name missing";
87 } else {
88 $seen_progeny_names{$progeny_name}++;
92 my @crosses = keys %seen_cross_names;
93 my $cross_validator = CXGN::List::Validate->new();
94 my @crosses_missing = @{$cross_validator->validate($schema,'crosses',\@crosses)->{'missing'}};
96 if (scalar(@crosses_missing) > 0){
97 push @error_messages, "The following crosses are not in the database as uniquenames or synonyms: ".join(',',@crosses_missing);
98 $errors{'missing_crosses'} = \@crosses_missing;
101 my @progenies = keys %seen_progeny_names;
102 my $rs = $schema->resultset("Stock::Stock")->search({
103 'is_obsolete' => { '!=' => 't' },
104 'uniquename' => { -in => \@progenies }
106 while (my $r=$rs->next){
107 push @error_messages, "Progeny name already exists in database: ".$r->uniquename;
110 #store any errors found in the parsed file to parse_errors accessor
111 if (scalar(@error_messages) >= 1) {
112 $errors{'error_messages'} = \@error_messages;
113 $self->_set_parse_errors(\%errors);
114 return;
117 return 1; #returns true if validation is passed
121 sub _parse_with_plugin {
122 my $self = shift;
123 my $filename = $self->get_filename();
124 my $schema = $self->get_chado_schema();
125 my $parser = Spreadsheet::ParseExcel->new();
126 my $excel_obj;
127 my $worksheet;
129 $excel_obj = $parser->parse($filename);
130 if (!$excel_obj){
131 return;
134 $worksheet = ($excel_obj->worksheets())[0];
135 my ($row_min, $row_max) = $worksheet->row_range();
136 my ($col_min, $col_max) = $worksheet->col_range();
138 my %cross_progenies_hash;
140 for my $row (1 .. $row_max){
141 my $cross_name;
142 my $progeny_name;
144 if ($worksheet->get_cell($row,0)){
145 $cross_name = $worksheet->get_cell($row,0)->value();
147 if ($worksheet->get_cell($row,1)){
148 $progeny_name = $worksheet->get_cell($row,1)->value();
150 #skip blank lines or lines with no name, type and parent
151 if (!$cross_name && !$progeny_name) {
152 next;
155 push @{$cross_progenies_hash{$cross_name}}, $progeny_name;
158 $self->_set_parsed_data(\%cross_progenies_hash);
159 return 1;