Merge pull request #2383 from solgenomics/dauglyon-patch-1
[sgn.git] / lib / CXGN / Pedigree / ParseUpload / Plugin / CrossesSimpleExcel.pm
blob24b534f0986f33e88437cde6104636a373891e78
1 package CXGN::Pedigree::ParseUpload::Plugin::CrossesSimpleExcel;
3 use Moose::Role;
4 use Spreadsheet::ParseExcel;
5 use CXGN::Stock::StockLookup;
6 use SGN::Model::Cvterm;
7 use Data::Dumper;
8 use CXGN::List::Validate;
10 sub _validate_with_plugin {
11 my $self = shift;
12 my $filename = $self->get_filename();
13 my $schema = $self->get_chado_schema();
14 my $cross_properties = $self->get_cross_properties();
15 my @error_messages;
16 my %errors;
17 my %supported_cross_types;
18 my $parser = Spreadsheet::ParseExcel->new();
19 my $excel_obj;
20 my $worksheet;
22 #currently supported cross types
23 $supported_cross_types{'biparental'} = 1; #both parents required
24 $supported_cross_types{'self'} = 1; #only female parent required
25 $supported_cross_types{'open'} = 1; #only female parent required
26 $supported_cross_types{'bulk'} = 1; #both parents required
27 $supported_cross_types{'bulk_self'} = 1; #only female parent required
28 $supported_cross_types{'bulk_open'} = 1; #only female parent required
29 $supported_cross_types{'doubled_haploid'} = 1; #only female parent required
31 #try to open the excel file and report any errors
32 $excel_obj = $parser->parse($filename);
33 if ( !$excel_obj ) {
34 push @error_messages, $parser->error();
35 $errors{'error_messages'} = \@error_messages;
36 $self->_set_parse_errors(\%errors);
37 return;
40 $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
41 if (!$worksheet) {
42 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
43 $errors{'error_messages'} = \@error_messages;
44 $self->_set_parse_errors(\%errors);
45 return;
47 my ( $row_min, $row_max ) = $worksheet->row_range();
48 my ( $col_min, $col_max ) = $worksheet->col_range();
49 if (($col_max - $col_min) < 3 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of crosses
50 push @error_messages, "Spreadsheet is missing header or contains no row";
51 $errors{'error_messages'} = \@error_messages;
52 $self->_set_parse_errors(\%errors);
53 return;
56 #get column headers
57 my $cross_name_head;
58 my $cross_type_head;
59 my $female_parent_head;
60 my $male_parent_head;
62 if ($worksheet->get_cell(0,0)) {
63 $cross_name_head = $worksheet->get_cell(0,0)->value();
65 if ($worksheet->get_cell(0,1)) {
66 $cross_type_head = $worksheet->get_cell(0,1)->value();
68 if ($worksheet->get_cell(0,2)) {
69 $female_parent_head = $worksheet->get_cell(0,2)->value();
71 if ($worksheet->get_cell(0,3)) {
72 $male_parent_head = $worksheet->get_cell(0,3)->value();
75 if (!$cross_name_head || $cross_name_head ne 'cross_name' ) {
76 push @error_messages, "Cell A1: cross_name is missing from the header";
78 if (!$cross_type_head || $cross_type_head ne 'cross_type') {
79 push @error_messages, "Cell B1: cross_type is missing from the header";
81 if (!$female_parent_head || $female_parent_head ne 'female_parent') {
82 push @error_messages, "Cell C1: female_parent is missing from the header";
84 if (!$male_parent_head || $male_parent_head ne 'male_parent') {
85 push @error_messages, "Cell D1: male_parent is missing from the header";
88 my %valid_properties;
89 my @properties = @{$cross_properties};
90 foreach my $property(@properties){
91 $valid_properties{$property} = 1;
94 for my $column ( 4 .. $col_max ) {
95 my $header_string = $worksheet->get_cell(0,$column)->value();
97 if (!$valid_properties{$header_string}){
98 push @error_messages, "Invalid info type: $header_string";
102 my %seen_cross_names;
103 my %seen_accession_names;
105 for my $row ( 1 .. $row_max ) {
106 my $row_name = $row+1;
107 my $cross_name;
108 my $cross_type;
109 my $female_parent;
110 my $male_parent;
112 if ($worksheet->get_cell($row,0)) {
113 $cross_name = $worksheet->get_cell($row,0)->value();
115 if ($worksheet->get_cell($row,1)) {
116 $cross_type = $worksheet->get_cell($row,1)->value();
118 if ($worksheet->get_cell($row,2)) {
119 $female_parent = $worksheet->get_cell($row,2)->value();
121 #skip blank lines or lines with no name, type and parent
122 if (!$cross_name && !$cross_type && !$female_parent) {
123 next;
125 if ($worksheet->get_cell($row,3)) {
126 $male_parent = $worksheet->get_cell($row,3)->value();
129 for my $column ( 4 .. $col_max ) {
130 if ($worksheet->get_cell($row,$column)) {
131 my $info_value = $worksheet->get_cell($row,$column)->value();
132 my $info_type = $worksheet->get_cell(0,$column)->value();
133 if ( ($info_type =~ m/days/ || $info_type =~ m/number/) && !($info_value =~ /^\d+?$/) ) {
134 push @error_messages, "Cell $info_type:$row_name: is not a positive integer: $info_value";
136 elsif ( $info_type =~ m/date/ && !($info_value =~ m/(\d{4})\/(\d{2})\/(\d{2})/) ) {
137 push @error_messages, "Cell $info_type:$row_name: is not a valid date: $info_value. Dates need to be of form YYYY/MM/DD";
142 #cross name must not be blank
143 if (!$cross_name || $cross_name eq '') {
144 push @error_messages, "Cell A$row_name: cross name missing";
145 } else {
146 $cross_name =~ s/^\s+|\s+$//g; #trim whitespace from front and end.
148 # } elsif ($cross_name =~ /\s/ || $cross_name =~ /\// || $cross_name =~ /\\/ ) {
149 # push @error_messages, "Cell A$row_name: cross_name must not contain spaces or slashes.";
150 if ($seen_cross_names{$cross_name}) {
151 push @error_messages, "Cell A$row_name: duplicate cross name: $cross_name";
154 #cross type must not be blank
155 if (!$cross_type || $cross_type eq '') {
156 push @error_messages, "Cell B$row_name: cross type missing";
157 } elsif (!$supported_cross_types{$cross_type}){
158 push @error_messages, "Cell B$row_name: cross type not supported: $cross_type";
161 #female parent must not be blank
162 if (!$female_parent || $female_parent eq '') {
163 push @error_messages, "Cell C$row_name: female parent missing";
166 #male parent must not be blank if type is biparental or bulk
167 if (!$male_parent || $male_parent eq '') {
168 if ($cross_type eq ( 'biparental' || 'bulk' )) {
169 push @error_messages, "Cell D$row_name: male parent required for biparental and bulk crosses";
173 if ($cross_name){
174 $seen_cross_names{$cross_name}++;
177 if ($female_parent){
178 $seen_accession_names{$female_parent}++;
181 if ($male_parent){
182 $seen_accession_names{$male_parent}++;
187 my @accessions = keys %seen_accession_names;
188 my $accession_validator = CXGN::List::Validate->new();
189 my @accessions_missing = @{$accession_validator->validate($schema,'accessions',\@accessions)->{'missing'}};
191 my $population_validator = CXGN::List::Validate->new();
192 my @parents_missing = @{$population_validator->validate($schema,'populations',\@accessions_missing)->{'missing'}};
194 if (scalar(@parents_missing) > 0) {
195 push @error_messages, "The following accessions or populations are not in the database as uniquenames or synonyms: ".join(',',@parents_missing);
196 $errors{'missing_accessions'} = \@parents_missing;
199 my @crosses = keys %seen_cross_names;
200 my $rs = $schema->resultset("Stock::Stock")->search({
201 'is_obsolete' => { '!=' => 't' },
202 'uniquename' => { -in => \@crosses }
204 while (my $r=$rs->next){
205 push @error_messages, "Cross name already exists in database: ".$r->uniquename;
208 #store any errors found in the parsed file to parse_errors accessor
209 if (scalar(@error_messages) >= 1) {
210 $errors{'error_messages'} = \@error_messages;
211 $self->_set_parse_errors(\%errors);
212 return;
215 return 1; #returns true if validation is passed
219 sub _parse_with_plugin {
220 my $self = shift;
221 my $filename = $self->get_filename();
222 my $schema = $self->get_chado_schema();
223 my $parser = Spreadsheet::ParseExcel->new();
224 my $excel_obj;
225 my $worksheet;
226 my @pedigrees;
227 my %additional_properties;
228 my %properties_columns;
229 my %parsed_result;
231 $excel_obj = $parser->parse($filename);
232 if ( !$excel_obj ) {
233 return;
236 $worksheet = ( $excel_obj->worksheets() )[0];
237 my ( $row_min, $row_max ) = $worksheet->row_range();
238 my ( $col_min, $col_max ) = $worksheet->col_range();
240 for my $column ( 4 .. $col_max ) {
241 my $header_string = $worksheet->get_cell(0,$column)->value();
243 $properties_columns{$column} = $header_string;
244 $additional_properties{$header_string} = ();
247 for my $row ( 1 .. $row_max ) {
248 my $cross_name;
249 my $cross_type;
250 my $female_parent;
251 my $male_parent;
252 my $cross_stock;
254 if ($worksheet->get_cell($row,0)) {
255 $cross_name = $worksheet->get_cell($row,0)->value();
256 $cross_name =~ s/^\s+|\s+$//g; #trim whitespace from front and end.
258 if ($worksheet->get_cell($row,1)) {
259 $cross_type = $worksheet->get_cell($row,1)->value();
261 if ($worksheet->get_cell($row,2)) {
262 $female_parent = $worksheet->get_cell($row,2)->value();
265 #skip blank lines or lines with no name, type and parent
266 if (!$cross_name && !$cross_type && !$female_parent) {
267 next;
270 if ($worksheet->get_cell($row,3)) {
271 $male_parent = $worksheet->get_cell($row,3)->value();
274 for my $column ( 4 .. $col_max ) {
275 if ($worksheet->get_cell($row,$column)) {
276 my $column_property = $properties_columns{$column};
277 $additional_properties{$column_property}{$cross_name} = $worksheet->get_cell($row,$column)->value();
278 if ($row == $row_max) {
279 my $info_type = $worksheet->get_cell(0,$column)->value();
280 $parsed_result{$info_type} = $additional_properties{$column_property};
285 my $pedigree = Bio::GeneticRelationships::Pedigree->new(name=>$cross_name, cross_type=>$cross_type);
286 if ($female_parent) {
287 my $female_parent_individual = Bio::GeneticRelationships::Individual->new(name => $female_parent);
288 $pedigree->set_female_parent($female_parent_individual);
290 if ($male_parent) {
291 my $male_parent_individual = Bio::GeneticRelationships::Individual->new(name => $male_parent);
292 $pedigree->set_male_parent($male_parent_individual);
295 push @pedigrees, $pedigree;
299 $parsed_result{'crosses'} = \@pedigrees;
301 $self->_set_parsed_data(\%parsed_result);
303 return 1;
308 sub _get_accession {
309 my $self = shift;
310 my $accession_name = shift;
311 my $chado_schema = $self->get_chado_schema();
312 my $stock_lookup = CXGN::Stock::StockLookup->new(schema => $chado_schema);
313 my $stock;
314 my $accession_cvterm = SGN::Model::Cvterm->get_cvterm_row($chado_schema, 'accession', 'stock_type');
316 $stock_lookup->set_stock_name($accession_name);
317 $stock = $stock_lookup->get_stock_exact();
319 if (!$stock) {
320 return;
323 if ($stock->type_id() != $accession_cvterm->cvterm_id()) {
324 return;
327 return $stock;
332 sub _get_cross {
333 my $self = shift;
334 my $cross_name = shift;
335 my $chado_schema = $self->get_chado_schema();
336 my $stock_lookup = CXGN::Stock::StockLookup->new(schema => $chado_schema);
337 my $stock;
339 $stock_lookup->set_stock_name($cross_name);
340 $stock = $stock_lookup->get_stock_exact();
342 if (!$stock) {
343 return;
346 return $stock;