Merge pull request #5230 from solgenomics/topic/open_pollinated
[sgn.git] / lib / CXGN / Pedigree / ParseUpload / Plugin / CrossesSimplifiedParentInfoExcel.pm
blobc2a28e7bce1a170a4521e79d0b0ee5fe506a9298
1 package CXGN::Pedigree::ParseUpload::Plugin::CrossesSimplifiedParentInfoExcel;
3 use Moose::Role;
4 use Spreadsheet::ParseExcel;
5 use Spreadsheet::ParseXLSX;
6 use CXGN::Stock::StockLookup;
7 use SGN::Model::Cvterm;
8 use Data::Dumper;
9 use CXGN::List::Validate;
11 # DEPRECATED: This plugin has been replaced by the CrossesGeneric plugin
13 sub _validate_with_plugin {
14 my $self = shift;
15 my $filename = $self->get_filename();
16 my $schema = $self->get_chado_schema();
17 my $cross_additional_info = $self->get_cross_additional_info();
18 my @error_messages;
19 my %errors;
20 my %supported_cross_types;
22 # Match a dot, extension .xls / .xlsx
23 my ($extension) = $filename =~ /(\.[^.]+)$/;
24 my $parser;
26 if ($extension eq '.xlsx') {
27 $parser = Spreadsheet::ParseXLSX->new();
29 else {
30 $parser = Spreadsheet::ParseExcel->new();
33 my $excel_obj;
34 my $worksheet;
36 #currently supported cross types
37 $supported_cross_types{'biparental'} = 1; #both parents required
38 $supported_cross_types{'self'} = 1; #only female parent required
39 $supported_cross_types{'open'} = 1; #only female parent required
40 $supported_cross_types{'sib'} = 1; #both parents required but can be the same.
41 $supported_cross_types{'bulk_self'} = 1; #only female population required
42 $supported_cross_types{'bulk_open'} = 1; #only female population required
43 $supported_cross_types{'bulk'} = 1; #both female population and male accession required
44 $supported_cross_types{'doubled_haploid'} = 1; #only female parent required
45 $supported_cross_types{'dihaploid_induction'} = 1; # ditto
46 $supported_cross_types{'polycross'} = 1; #both parents required
47 $supported_cross_types{'backcross'} = 1; #both parents required, parents can be cross or accession stock type
49 #try to open the excel file and report any errors
50 $excel_obj = $parser->parse($filename);
51 if ( !$excel_obj ) {
52 push @error_messages, $parser->error();
53 $errors{'error_messages'} = \@error_messages;
54 $self->_set_parse_errors(\%errors);
55 return;
58 $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
59 if (!$worksheet) {
60 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
61 $errors{'error_messages'} = \@error_messages;
62 $self->_set_parse_errors(\%errors);
63 return;
65 my ( $row_min, $row_max ) = $worksheet->row_range();
66 my ( $col_min, $col_max ) = $worksheet->col_range();
67 if (($col_max - $col_min) < 4 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of crosses
68 push @error_messages, "Spreadsheet is missing header or contains no row";
69 $errors{'error_messages'} = \@error_messages;
70 $self->_set_parse_errors(\%errors);
71 return;
74 #get column headers
75 my $cross_name_head;
76 my $cross_combination_head;
77 my $cross_type_head;
78 my $female_parent_head;
79 my $male_parent_head;
81 if ($worksheet->get_cell(0,0)) {
82 $cross_name_head = $worksheet->get_cell(0,0)->value();
83 $cross_name_head =~ s/^\s+|\s+$//g;
85 if ($worksheet->get_cell(0,1)) {
86 $cross_combination_head = $worksheet->get_cell(0,1)->value();
87 $cross_combination_head =~ s/^\s+|\s+$//g;
89 if ($worksheet->get_cell(0,2)) {
90 $cross_type_head = $worksheet->get_cell(0,2)->value();
91 $cross_type_head =~ s/^\s+|\s+$//g;
93 if ($worksheet->get_cell(0,3)) {
94 $female_parent_head = $worksheet->get_cell(0,3)->value();
95 $female_parent_head =~ s/^\s+|\s+$//g;
97 if ($worksheet->get_cell(0,4)) {
98 $male_parent_head = $worksheet->get_cell(0,4)->value();
99 $male_parent_head =~ s/^\s+|\s+$//g;
103 if (!$cross_name_head || $cross_name_head ne 'cross_unique_id' ) {
104 push @error_messages, "Cell A1: cross_unique_id is missing from the header";
106 if (!$cross_combination_head || $cross_combination_head ne 'cross_combination') {
107 push @error_messages, "Cell B1: cross_combination is missing from the header";
109 if (!$cross_type_head || $cross_type_head ne 'cross_type') {
110 push @error_messages, "Cell C1: cross_type is missing from the header";
112 if (!$female_parent_head || $female_parent_head ne 'female_parent') {
113 push @error_messages, "Cell D1: female_parent is missing from the header";
115 if (!$male_parent_head || $male_parent_head ne 'male_parent') {
116 push @error_messages, "Cell E1: male_parent is missing from the header";
119 my %valid_additional_info;
120 my @valid_info = @{$cross_additional_info};
121 foreach my $info(@valid_info){
122 $valid_additional_info{$info} = 1;
125 for my $column (5 .. $col_max){
126 if ($worksheet->get_cell(0, $column)) {
127 my $header_string = $worksheet->get_cell(0,$column)->value();
128 $header_string =~ s/^\s+|\s+$//g;
130 if (($header_string) && (!$valid_additional_info{$header_string})){
131 push @error_messages, "Invalid info type: $header_string";
136 my %seen_cross_names;
137 my %seen_parent_names;
139 for my $row ( 1 .. $row_max ) {
140 my $row_name = $row+1;
141 my $cross_name;
142 my $cross_combination;
143 my $cross_type;
144 my $female_parent;
145 my $male_parent;
147 if ($worksheet->get_cell($row,0)) {
148 $cross_name = $worksheet->get_cell($row,0)->value();
150 if ($worksheet->get_cell($row,1)) {
151 $cross_combination = $worksheet->get_cell($row,1)->value();
152 $cross_combination =~ s/^\s+|\s+$//g;
154 if ($worksheet->get_cell($row,2)) {
155 $cross_type = $worksheet->get_cell($row,2)->value();
156 $cross_type =~ s/^\s+|\s+$//g;
158 if ($worksheet->get_cell($row,3)) {
159 $female_parent = $worksheet->get_cell($row,3)->value();
161 if ($worksheet->get_cell($row,4)) {
162 $male_parent = $worksheet->get_cell($row,4)->value();
165 if (!defined $cross_name && !defined $cross_type && !defined $female_parent) {
166 last;
169 #cross name must not be blank
170 if (!$cross_name || $cross_name eq '') {
171 push @error_messages, "Cell A$row_name: cross unique id missing";
172 } else {
173 $cross_name =~ s/^\s+|\s+$//g; #trim whitespace from front and end.
175 # } elsif ($cross_name =~ /\s/ || $cross_name =~ /\// || $cross_name =~ /\\/ ) {
176 # push @error_messages, "Cell A$row_name: cross_name must not contain spaces or slashes.";
177 if ($seen_cross_names{$cross_name}) {
178 push @error_messages, "Cell A$row_name: duplicate cross unique id: $cross_name";
181 #cross type must not be blank
182 if (!$cross_type || $cross_type eq '') {
183 push @error_messages, "Cell C$row_name: cross type missing";
184 } elsif (!$supported_cross_types{$cross_type}){
185 push @error_messages, "Cell C$row_name: cross type not supported: $cross_type";
188 #female parent must not be blank
189 if (!$female_parent || $female_parent eq '') {
190 push @error_messages, "Cell D$row_name: female parent missing";
193 #male parent must not be blank if type is biparental, sib, polycross or bulk
194 if (!$male_parent || $male_parent eq '') {
195 if ($cross_type eq ( 'biparental' || 'bulk' || 'sib' || 'polycross' || 'backcross' )) {
196 push @error_messages, "Cell E$row_name: male parent required for biparental, sib, polycross, backcross and bulk cross types";
200 if ($cross_name){
201 $cross_name =~ s/^\s+|\s+$//g;
202 $seen_cross_names{$cross_name}++;
205 if ($female_parent) {
206 $female_parent =~ s/^\s+|\s+$//g;
207 $seen_parent_names{$female_parent}++;
210 if ($male_parent){
211 $male_parent =~ s/^\s+|\s+$//g;
212 $seen_parent_names{$male_parent}++;
216 my @parent_list = keys %seen_parent_names;
217 my $parent_validator = CXGN::List::Validate->new();
219 my @parents_missing = @{$parent_validator->validate($schema,'accessions_or_populations_or_plots_or_plants',\@parent_list)->{'missing'}};
221 if (scalar(@parents_missing) > 0) {
222 push @error_messages, "The following parents are not in the database, or are not in the database as accession names, plot names or plant names: ".join(',',@parents_missing);
225 my @crosses = keys %seen_cross_names;
226 my $rs = $schema->resultset("Stock::Stock")->search({
227 'is_obsolete' => { '!=' => 't' },
228 'uniquename' => { -in => \@crosses }
230 while (my $r=$rs->next){
231 push @error_messages, "Cross unique id already exists in database: ".$r->uniquename;
234 #store any errors found in the parsed file to parse_errors accessor
235 if (scalar(@error_messages) >= 1) {
236 $errors{'error_messages'} = \@error_messages;
237 $self->_set_parse_errors(\%errors);
238 return;
241 return 1;
245 sub _parse_with_plugin {
246 my $self = shift;
247 my $filename = $self->get_filename();
248 my $schema = $self->get_chado_schema();
250 # Match a dot, extension .xls / .xlsx
251 my ($extension) = $filename =~ /(\.[^.]+)$/;
252 my $parser;
254 if ($extension eq '.xlsx') {
255 $parser = Spreadsheet::ParseXLSX->new();
257 else {
258 $parser = Spreadsheet::ParseExcel->new();
261 my $excel_obj;
262 my $worksheet;
263 my @pedigrees;
264 my %cross_additional_info;
265 my %parsed_result;
267 $excel_obj = $parser->parse($filename);
268 if ( !$excel_obj ) {
269 return;
272 $worksheet = ( $excel_obj->worksheets() )[0];
273 my ( $row_min, $row_max ) = $worksheet->row_range();
274 my ( $col_min, $col_max ) = $worksheet->col_range();
276 my $accession_stock_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type')->cvterm_id();
277 my $plot_stock_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot', 'stock_type')->cvterm_id();
278 my $plant_stock_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plant', 'stock_type')->cvterm_id();
279 my $plot_of_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot_of', 'stock_relationship')->cvterm_id();
280 my $plant_of_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plant_of', 'stock_relationship')->cvterm_id();
282 for my $row ( 1 .. $row_max ) {
283 my $cross_name;
284 my $cross_combination;
285 my $cross_type;
286 my $female_parent;
287 my $male_parent;
288 my $cross_stock;
290 if ($worksheet->get_cell($row,0)) {
291 $cross_name = $worksheet->get_cell($row,0)->value();
292 $cross_name =~ s/^\s+|\s+$//g;
294 if ($worksheet->get_cell($row,1)) {
295 $cross_combination = $worksheet->get_cell($row,1)->value();
296 $cross_combination =~ s/^\s+|\s+$//g;
298 if ($worksheet->get_cell($row,2)) {
299 $cross_type = $worksheet->get_cell($row,2)->value();
300 $cross_type =~ s/^\s+|\s+$//g;
302 if ($worksheet->get_cell($row,3)) {
303 $female_parent = $worksheet->get_cell($row,3)->value();
304 $female_parent =~ s/^\s+|\s+$//g;
307 if (!defined $cross_name && !defined $cross_type && !defined $female_parent) {
308 last;
311 if ($worksheet->get_cell($row,4)) {
312 $male_parent = $worksheet->get_cell($row,4)->value();
313 $male_parent =~ s/^\s+|\s+$//g;
316 for my $column ( 5 .. $col_max ) {
317 if ($worksheet->get_cell($row,$column)) {
318 my $info_header = $worksheet->get_cell(0,$column)->value();
319 $info_header =~ s/^\s+|\s+$//g;
320 $cross_additional_info{$cross_name}{$info_header} = $worksheet->get_cell($row,$column)->value();
324 my $pedigree = Bio::GeneticRelationships::Pedigree->new(name=>$cross_name, cross_type=>$cross_type, cross_combination=>$cross_combination);
326 my $female_rs = $schema->resultset("Stock::Stock")->find({uniquename => $female_parent});
327 my $female_stock_id = $female_rs->stock_id();
328 my $female_type_id = $female_rs->type_id();
330 my $female_accession_name;
331 my $female_accession_stock_id;
332 if ($female_type_id == $plot_stock_type_id) {
333 $female_accession_stock_id = $schema->resultset("Stock::StockRelationship")->find({subject_id=>$female_stock_id, type_id=>$plot_of_type_id})->object_id();
334 $female_accession_name = $schema->resultset("Stock::Stock")->find({stock_id => $female_accession_stock_id})->uniquename();
335 my $female_plot_individual = Bio::GeneticRelationships::Individual->new(name => $female_parent);
336 $pedigree->set_female_plot($female_plot_individual);
337 } elsif ($female_type_id == $plant_stock_type_id) {
338 $female_accession_stock_id = $schema->resultset("Stock::StockRelationship")->find({subject_id=>$female_stock_id, type_id=>$plant_of_type_id})->object_id();
339 $female_accession_name = $schema->resultset("Stock::Stock")->find({stock_id => $female_accession_stock_id})->uniquename();
340 my $female_plant_individual = Bio::GeneticRelationships::Individual->new(name => $female_parent);
341 $pedigree->set_female_plant($female_plant_individual);
342 } else {
343 $female_accession_name = $female_parent;
346 my $female_parent_individual = Bio::GeneticRelationships::Individual->new(name => $female_accession_name);
347 $pedigree->set_female_parent($female_parent_individual);
349 if ($male_parent) {
350 my $male_accession_stock_id;
351 my $male_accession_name;
352 my $male_rs = $schema->resultset("Stock::Stock")->find({uniquename => $male_parent});
353 my $male_stock_id = $male_rs->stock_id();
354 my $male_type_id = $male_rs->type_id();
356 if ($male_type_id == $plot_stock_type_id) {
357 $male_accession_stock_id = $schema->resultset("Stock::StockRelationship")->find({subject_id=>$male_stock_id, type_id=>$plot_of_type_id})->object_id();
358 $male_accession_name = $schema->resultset("Stock::Stock")->find({stock_id => $male_accession_stock_id})->uniquename();
359 my $male_plot_individual = Bio::GeneticRelationships::Individual->new(name => $male_parent);
360 $pedigree->set_male_plot($male_plot_individual);
361 } elsif ($male_type_id == $plant_stock_type_id) {
362 $male_accession_stock_id = $schema->resultset("Stock::StockRelationship")->find({subject_id=>$male_stock_id, type_id=>$plant_of_type_id})->object_id();
363 $male_accession_name = $schema->resultset("Stock::Stock")->find({stock_id => $male_accession_stock_id})->uniquename();
364 my $male_plant_individual = Bio::GeneticRelationships::Individual->new(name => $male_parent);
365 $pedigree->set_male_plant($male_plant_individual);
366 } else {
367 $male_accession_name = $male_parent
370 my $male_parent_individual = Bio::GeneticRelationships::Individual->new(name => $male_accession_name);
371 $pedigree->set_male_parent($male_parent_individual);
374 push @pedigrees, $pedigree;
378 $parsed_result{'additional_info'} = \%cross_additional_info;
380 $parsed_result{'crosses'} = \@pedigrees;
382 $self->_set_parsed_data(\%parsed_result);
384 return 1;