1 package CXGN
::Pedigree
::ParseUpload
::Plugin
::CrossesSimpleExcel
;
4 use Spreadsheet
::ParseExcel
;
5 use Spreadsheet
::ParseXLSX
;
6 use CXGN
::Stock
::StockLookup
;
7 use SGN
::Model
::Cvterm
;
9 use CXGN
::List
::Validate
;
11 sub _validate_with_plugin
{
13 my $filename = $self->get_filename();
14 my $schema = $self->get_chado_schema();
15 my $cross_additional_info = $self->get_cross_additional_info();
18 my %supported_cross_types;
20 # Match a dot, extension .xls / .xlsx
21 my ($extension) = $filename =~ /(\.[^.]+)$/;
24 if ($extension eq '.xlsx') {
25 $parser = Spreadsheet
::ParseXLSX
->new();
28 $parser = Spreadsheet
::ParseExcel
->new();
33 # print STDERR "ADDITIONAL INFO =".Dumper($cross_additional_info)."\n";
35 #currently supported cross types
36 $supported_cross_types{'biparental'} = 1; #both parents required
37 $supported_cross_types{'self'} = 1; #only female parent required
38 $supported_cross_types{'open'} = 1; #only female parent required
39 $supported_cross_types{'sib'} = 1; #both parents required but can be the same.
40 $supported_cross_types{'bulk_self'} = 1; #only female population required
41 $supported_cross_types{'bulk_open'} = 1; #only female population required
42 $supported_cross_types{'bulk'} = 1; #both female population and male accession required
43 $supported_cross_types{'doubled_haploid'} = 1; #only female parent required
44 $supported_cross_types{'dihaploid_induction'} = 1; # only female parent required
45 $supported_cross_types{'polycross'} = 1; #both parents required
46 $supported_cross_types{'backcross'} = 1; #both parents required, parents can be cross or accession stock type
48 #try to open the excel file and report any errors
49 $excel_obj = $parser->parse($filename);
51 push @error_messages, $parser->error();
52 $errors{'error_messages'} = \
@error_messages;
53 $self->_set_parse_errors(\
%errors);
57 $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
59 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
60 $errors{'error_messages'} = \
@error_messages;
61 $self->_set_parse_errors(\
%errors);
64 my ( $row_min, $row_max ) = $worksheet->row_range();
65 my ( $col_min, $col_max ) = $worksheet->col_range();
66 if (($col_max - $col_min) < 4 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of crosses
67 push @error_messages, "Spreadsheet is missing header or contains no row";
68 $errors{'error_messages'} = \
@error_messages;
69 $self->_set_parse_errors(\
%errors);
75 my $cross_combination_head;
77 my $female_parent_head;
80 if ($worksheet->get_cell(0,0)) {
81 $cross_name_head = $worksheet->get_cell(0,0)->value();
82 $cross_name_head =~ s/^\s+|\s+$//g;
84 if ($worksheet->get_cell(0,1)) {
85 $cross_combination_head = $worksheet->get_cell(0,1)->value();
86 $cross_combination_head =~ s/^\s+|\s+$//g;
88 if ($worksheet->get_cell(0,2)) {
89 $cross_type_head = $worksheet->get_cell(0,2)->value();
90 $cross_type_head =~ s/^\s+|\s+$//g;
92 if ($worksheet->get_cell(0,3)) {
93 $female_parent_head = $worksheet->get_cell(0,3)->value();
94 $female_parent_head =~ s/^\s+|\s+$//g;
96 if ($worksheet->get_cell(0,4)) {
97 $male_parent_head = $worksheet->get_cell(0,4)->value();
98 $male_parent_head =~ s/^\s+|\s+$//g;
102 if (!$cross_name_head || $cross_name_head ne 'cross_unique_id' ) {
103 push @error_messages, "Cell A1: cross_unique_id is missing from the header";
105 if (!$cross_combination_head || $cross_combination_head ne 'cross_combination') {
106 push @error_messages, "Cell B1: cross_combination is missing from the header";
108 if (!$cross_type_head || $cross_type_head ne 'cross_type') {
109 push @error_messages, "Cell C1: cross_type is missing from the header";
111 if (!$female_parent_head || $female_parent_head ne 'female_parent') {
112 push @error_messages, "Cell D1: female_parent is missing from the header";
114 if (!$male_parent_head || $male_parent_head ne 'male_parent') {
115 push @error_messages, "Cell E1: male_parent is missing from the header";
118 my %valid_additional_info;
119 my @valid_info = @
{$cross_additional_info};
120 foreach my $info(@valid_info){
121 $valid_additional_info{$info} = 1;
124 for my $column (5 .. $col_max){
125 if ($worksheet->get_cell(0, $column)) {
126 my $header_string = $worksheet->get_cell(0,$column)->value();
127 $header_string =~ s/^\s+|\s+$//g;
129 if (($header_string) && (!$valid_additional_info{$header_string})){
130 push @error_messages, "Invalid info type: $header_string";
135 my %seen_cross_names;
136 my %seen_accession_names;
137 my %seen_backcross_parents;
138 my %seen_population_names;
140 for my $row ( 1 .. $row_max ) {
141 my $row_name = $row+1;
143 my $cross_combination;
148 if ($worksheet->get_cell($row,0)) {
149 $cross_name = $worksheet->get_cell($row,0)->value();
151 if ($worksheet->get_cell($row,1)) {
152 $cross_combination = $worksheet->get_cell($row,1)->value();
153 $cross_combination =~ s/^\s+|\s+$//g;
155 if ($worksheet->get_cell($row,2)) {
156 $cross_type = $worksheet->get_cell($row,2)->value();
157 $cross_type =~ s/^\s+|\s+$//g;
159 if ($worksheet->get_cell($row,3)) {
160 $female_parent = $worksheet->get_cell($row,3)->value();
163 if (!defined $cross_name && !defined $cross_type && !defined $female_parent) {
167 if ($worksheet->get_cell($row,4)) {
168 $male_parent = $worksheet->get_cell($row,4)->value();
171 $female_parent =~ s/^\s+|\s+$//g;
172 $male_parent =~ s/^\s+|\s+$//g;
174 #cross name must not be blank
175 if (!$cross_name || $cross_name eq '') {
176 push @error_messages, "Cell A$row_name: cross unique id missing";
178 $cross_name =~ s/^\s+|\s+$//g; #trim whitespace from front and end.
180 # } elsif ($cross_name =~ /\s/ || $cross_name =~ /\// || $cross_name =~ /\\/ ) {
181 # push @error_messages, "Cell A$row_name: cross_name must not contain spaces or slashes.";
182 if ($seen_cross_names{$cross_name}) {
183 push @error_messages, "Cell A$row_name: duplicate cross unique id: $cross_name";
186 if (($cross_type eq 'double_haploid') || ($cross_type eq 'dihaploid_induction') || ($cross_type eq 'self')) {
187 if ($female_parent ne $male_parent) {
188 push @error_messages, "For double haploid, dihaploid_induction, and self, female parent needs to be identical to male parent in row $row_name";
194 #cross type must not be blank
195 if (!$cross_type || $cross_type eq '') {
196 push @error_messages, "Cell C$row_name: cross type missing";
197 } elsif (!$supported_cross_types{$cross_type}){
198 push @error_messages, "Cell C$row_name: cross type not supported: $cross_type";
201 #female parent must not be blank
202 if (!$female_parent || $female_parent eq '') {
203 push @error_messages, "Cell D$row_name: female parent missing";
206 #male parent must not be blank if type is biparental, sib, polycross or bulk
207 if (!$male_parent || $male_parent eq '') {
208 if ($cross_type eq ( 'biparental' || 'bulk' || 'sib' || 'polycross' || 'backcross' )) {
209 push @error_messages, "Cell E$row_name: male parent required for biparental, sib, polycross, backcross and bulk cross types";
214 $cross_name =~ s/^\s+|\s+$//g;
215 $seen_cross_names{$cross_name}++;
219 if (($cross_type eq 'bulk') || ($cross_type eq 'bulk_self') || ($cross_type eq 'bulk_open')) {
220 #$female_parent =~ s/^\s+|\s+$//g;
221 $seen_population_names{$female_parent}++;
222 if ($cross_type eq 'bulk_open') {
224 #$male_parent =~ s/^\s+|\s+$//g;
225 $seen_population_names{$male_parent}++;
227 } elsif ($cross_type eq 'bulk') {
228 $male_parent =~ s/^\s+|\s+$//g;
229 $seen_accession_names{$male_parent}++;
231 } elsif (($cross_type eq 'polycross') || ($cross_type eq 'open')) {
232 #$female_parent =~ s/^\s+|\s+$//g;
233 $seen_accession_names{$female_parent}++;
235 # $male_parent =~ s/^\s+|\s+$//g;
236 $seen_population_names{$male_parent}++;
238 } elsif ($cross_type eq 'backcross') {
239 #$female_parent =~ s/^\s+|\s+$//g;
240 $seen_backcross_parents{$female_parent}++;
241 #$male_parent =~ s/^\s+|\s+$//g;
242 $seen_backcross_parents{$male_parent}++;
244 #$female_parent =~ s/^\s+|\s+$//g;
245 $seen_accession_names{$female_parent}++;
248 # $male_parent =~ s/^\s+|\s+$//g;
249 $seen_accession_names{$male_parent}++;
254 my @accessions = keys %seen_accession_names;
255 my $accession_validator = CXGN
::List
::Validate
->new();
256 my @accessions_missing = @
{$accession_validator->validate($schema,'uniquenames',\
@accessions)->{'missing'}};
258 if (scalar(@accessions_missing) > 0) {
259 push @error_messages, "The following parents are not in the database, or are not in the database as accession uniquenames: ".join(',',@accessions_missing);
260 $errors{'missing_accessions'} = \
@accessions_missing;
263 my @populations = keys %seen_population_names;
264 my $population_validator = CXGN
::List
::Validate
->new();
265 my @populations_missing = @
{$population_validator->validate($schema,'populations',\
@populations)->{'missing'}};
267 if (scalar(@populations_missing) > 0) {
268 push @error_messages, "The following parents are not in the database, or are not in the database as population uniquenames: ".join(',',@populations_missing);
271 my @backcross_parents = keys %seen_backcross_parents;
272 my $backcross_parent_validator = CXGN
::List
::Validate
->new();
273 my @backcross_parents_missing = @
{$backcross_parent_validator->validate($schema,'accessions_or_crosses',\
@backcross_parents)->{'missing'}};
275 if (scalar(@backcross_parents_missing) > 0) {
276 push @error_messages, "The following parents are not in the database, or are not in the database as uniquenames: ".join(',',@backcross_parents_missing);
277 $errors{'missing_accessions_or_crosses'} = \
@backcross_parents_missing;
280 my @crosses = keys %seen_cross_names;
281 my $rs = $schema->resultset("Stock::Stock")->search({
282 'is_obsolete' => { '!=' => 't' },
283 'uniquename' => { -in => \
@crosses }
285 while (my $r=$rs->next){
286 push @error_messages, "Cross unique id already exists in database: ".$r->uniquename;
289 #store any errors found in the parsed file to parse_errors accessor
290 if (scalar(@error_messages) >= 1) {
291 $errors{'error_messages'} = \
@error_messages;
292 $self->_set_parse_errors(\
%errors);
296 return 1; #returns true if validation is passed
300 sub _parse_with_plugin
{
302 my $filename = $self->get_filename();
303 my $schema = $self->get_chado_schema();
305 # Match a dot, extension .xls / .xlsx
306 my ($extension) = $filename =~ /(\.[^.]+)$/;
309 if ($extension eq '.xlsx') {
310 $parser = Spreadsheet
::ParseXLSX
->new();
313 $parser = Spreadsheet
::ParseExcel
->new();
319 my %cross_additional_info;
322 $excel_obj = $parser->parse($filename);
327 $worksheet = ( $excel_obj->worksheets() )[0];
328 my ( $row_min, $row_max ) = $worksheet->row_range();
329 my ( $col_min, $col_max ) = $worksheet->col_range();
331 for my $row ( 1 .. $row_max ) {
333 my $cross_combination;
339 if ($worksheet->get_cell($row,0)) {
340 $cross_name = $worksheet->get_cell($row,0)->value();
341 $cross_name =~ s/^\s+|\s+$//g;
343 if ($worksheet->get_cell($row,1)) {
344 $cross_combination = $worksheet->get_cell($row,1)->value();
345 $cross_combination =~ s/^\s+|\s+$//g;
347 if ($worksheet->get_cell($row,2)) {
348 $cross_type = $worksheet->get_cell($row,2)->value();
349 $cross_type =~ s/^\s+|\s+$//g;
351 if ($worksheet->get_cell($row,3)) {
352 $female_parent = $worksheet->get_cell($row,3)->value();
353 $female_parent =~ s/^\s+|\s+$//g;
356 if (!defined $cross_name && !defined $cross_type && !defined $female_parent) {
360 if ($worksheet->get_cell($row,4)) {
361 $male_parent = $worksheet->get_cell($row,4)->value();
362 $male_parent =~ s/^\s+|\s+$//g;
365 for my $column ( 5 .. $col_max ) {
366 if ($worksheet->get_cell($row,$column)) {
367 my $info_header = $worksheet->get_cell(0,$column)->value();
368 $info_header =~ s/^\s+|\s+$//g;
369 $cross_additional_info{$cross_name}{$info_header} = $worksheet->get_cell($row,$column)->value();
373 my $pedigree = Bio
::GeneticRelationships
::Pedigree
->new(name
=>$cross_name, cross_type
=>$cross_type, cross_combination
=>$cross_combination);
374 if ($female_parent) {
375 my $female_parent_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $female_parent);
376 $pedigree->set_female_parent($female_parent_individual);
379 my $male_parent_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $male_parent);
380 $pedigree->set_male_parent($male_parent_individual);
383 push @pedigrees, $pedigree;
387 # print STDERR "ADDITIONAL INFO HASH =".Dumper(\%cross_additional_info)."\n";
388 $parsed_result{'additional_info'} = \
%cross_additional_info;
390 $parsed_result{'crosses'} = \
@pedigrees;
392 $self->_set_parsed_data(\
%parsed_result);
401 my $accession_name = shift;
402 my $chado_schema = $self->get_chado_schema();
403 my $stock_lookup = CXGN
::Stock
::StockLookup
->new(schema
=> $chado_schema);
405 my $accession_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($chado_schema, 'accession', 'stock_type');
407 $stock_lookup->set_stock_name($accession_name);
408 $stock = $stock_lookup->get_stock_exact();
414 if ($stock->type_id() != $accession_cvterm->cvterm_id()) {
425 my $cross_name = shift;
426 my $chado_schema = $self->get_chado_schema();
427 my $stock_lookup = CXGN
::Stock
::StockLookup
->new(schema
=> $chado_schema);
430 $stock_lookup->set_stock_name($cross_name);
431 $stock = $stock_lookup->get_stock_exact();