1 package CXGN
::Pedigree
::ParseUpload
::Plugin
::CrossesSimplifiedParentInfoExcel
;
4 use Spreadsheet
::ParseExcel
;
5 use Spreadsheet
::ParseXLSX
;
6 use CXGN
::Stock
::StockLookup
;
7 use SGN
::Model
::Cvterm
;
9 use CXGN
::List
::Validate
;
11 # DEPRECATED: This plugin has been replaced by the CrossesGeneric plugin
13 sub _validate_with_plugin
{
15 my $filename = $self->get_filename();
16 my $schema = $self->get_chado_schema();
17 my $cross_additional_info = $self->get_cross_additional_info();
20 my %supported_cross_types;
22 # Match a dot, extension .xls / .xlsx
23 my ($extension) = $filename =~ /(\.[^.]+)$/;
26 if ($extension eq '.xlsx') {
27 $parser = Spreadsheet
::ParseXLSX
->new();
30 $parser = Spreadsheet
::ParseExcel
->new();
36 #currently supported cross types
37 $supported_cross_types{'biparental'} = 1; #both parents required
38 $supported_cross_types{'self'} = 1; #only female parent required
39 $supported_cross_types{'open'} = 1; #only female parent required
40 $supported_cross_types{'sib'} = 1; #both parents required but can be the same.
41 $supported_cross_types{'bulk_self'} = 1; #only female population required
42 $supported_cross_types{'bulk_open'} = 1; #only female population required
43 $supported_cross_types{'bulk'} = 1; #both female population and male accession required
44 $supported_cross_types{'doubled_haploid'} = 1; #only female parent required
45 $supported_cross_types{'dihaploid_induction'} = 1; # ditto
46 $supported_cross_types{'polycross'} = 1; #both parents required
47 $supported_cross_types{'backcross'} = 1; #both parents required, parents can be cross or accession stock type
49 #try to open the excel file and report any errors
50 $excel_obj = $parser->parse($filename);
52 push @error_messages, $parser->error();
53 $errors{'error_messages'} = \
@error_messages;
54 $self->_set_parse_errors(\
%errors);
58 $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
60 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
61 $errors{'error_messages'} = \
@error_messages;
62 $self->_set_parse_errors(\
%errors);
65 my ( $row_min, $row_max ) = $worksheet->row_range();
66 my ( $col_min, $col_max ) = $worksheet->col_range();
67 if (($col_max - $col_min) < 4 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of crosses
68 push @error_messages, "Spreadsheet is missing header or contains no row";
69 $errors{'error_messages'} = \
@error_messages;
70 $self->_set_parse_errors(\
%errors);
76 my $cross_combination_head;
78 my $female_parent_head;
81 if ($worksheet->get_cell(0,0)) {
82 $cross_name_head = $worksheet->get_cell(0,0)->value();
83 $cross_name_head =~ s/^\s+|\s+$//g;
85 if ($worksheet->get_cell(0,1)) {
86 $cross_combination_head = $worksheet->get_cell(0,1)->value();
87 $cross_combination_head =~ s/^\s+|\s+$//g;
89 if ($worksheet->get_cell(0,2)) {
90 $cross_type_head = $worksheet->get_cell(0,2)->value();
91 $cross_type_head =~ s/^\s+|\s+$//g;
93 if ($worksheet->get_cell(0,3)) {
94 $female_parent_head = $worksheet->get_cell(0,3)->value();
95 $female_parent_head =~ s/^\s+|\s+$//g;
97 if ($worksheet->get_cell(0,4)) {
98 $male_parent_head = $worksheet->get_cell(0,4)->value();
99 $male_parent_head =~ s/^\s+|\s+$//g;
103 if (!$cross_name_head || $cross_name_head ne 'cross_unique_id' ) {
104 push @error_messages, "Cell A1: cross_unique_id is missing from the header";
106 if (!$cross_combination_head || $cross_combination_head ne 'cross_combination') {
107 push @error_messages, "Cell B1: cross_combination is missing from the header";
109 if (!$cross_type_head || $cross_type_head ne 'cross_type') {
110 push @error_messages, "Cell C1: cross_type is missing from the header";
112 if (!$female_parent_head || $female_parent_head ne 'female_parent') {
113 push @error_messages, "Cell D1: female_parent is missing from the header";
115 if (!$male_parent_head || $male_parent_head ne 'male_parent') {
116 push @error_messages, "Cell E1: male_parent is missing from the header";
119 my %valid_additional_info;
120 my @valid_info = @
{$cross_additional_info};
121 foreach my $info(@valid_info){
122 $valid_additional_info{$info} = 1;
125 for my $column (5 .. $col_max){
126 if ($worksheet->get_cell(0, $column)) {
127 my $header_string = $worksheet->get_cell(0,$column)->value();
128 $header_string =~ s/^\s+|\s+$//g;
130 if (($header_string) && (!$valid_additional_info{$header_string})){
131 push @error_messages, "Invalid info type: $header_string";
136 my %seen_cross_names;
137 my %seen_parent_names;
139 for my $row ( 1 .. $row_max ) {
140 my $row_name = $row+1;
142 my $cross_combination;
147 if ($worksheet->get_cell($row,0)) {
148 $cross_name = $worksheet->get_cell($row,0)->value();
150 if ($worksheet->get_cell($row,1)) {
151 $cross_combination = $worksheet->get_cell($row,1)->value();
152 $cross_combination =~ s/^\s+|\s+$//g;
154 if ($worksheet->get_cell($row,2)) {
155 $cross_type = $worksheet->get_cell($row,2)->value();
156 $cross_type =~ s/^\s+|\s+$//g;
158 if ($worksheet->get_cell($row,3)) {
159 $female_parent = $worksheet->get_cell($row,3)->value();
161 if ($worksheet->get_cell($row,4)) {
162 $male_parent = $worksheet->get_cell($row,4)->value();
165 if (!defined $cross_name && !defined $cross_type && !defined $female_parent) {
169 #cross name must not be blank
170 if (!$cross_name || $cross_name eq '') {
171 push @error_messages, "Cell A$row_name: cross unique id missing";
173 $cross_name =~ s/^\s+|\s+$//g; #trim whitespace from front and end.
175 # } elsif ($cross_name =~ /\s/ || $cross_name =~ /\// || $cross_name =~ /\\/ ) {
176 # push @error_messages, "Cell A$row_name: cross_name must not contain spaces or slashes.";
177 if ($seen_cross_names{$cross_name}) {
178 push @error_messages, "Cell A$row_name: duplicate cross unique id: $cross_name";
181 #cross type must not be blank
182 if (!$cross_type || $cross_type eq '') {
183 push @error_messages, "Cell C$row_name: cross type missing";
184 } elsif (!$supported_cross_types{$cross_type}){
185 push @error_messages, "Cell C$row_name: cross type not supported: $cross_type";
188 #female parent must not be blank
189 if (!$female_parent || $female_parent eq '') {
190 push @error_messages, "Cell D$row_name: female parent missing";
193 #male parent must not be blank if type is biparental, sib, polycross or bulk
194 if (!$male_parent || $male_parent eq '') {
195 if ($cross_type eq ( 'biparental' || 'bulk' || 'sib' || 'polycross' || 'backcross' )) {
196 push @error_messages, "Cell E$row_name: male parent required for biparental, sib, polycross, backcross and bulk cross types";
201 $cross_name =~ s/^\s+|\s+$//g;
202 $seen_cross_names{$cross_name}++;
205 if ($female_parent) {
206 $female_parent =~ s/^\s+|\s+$//g;
207 $seen_parent_names{$female_parent}++;
211 $male_parent =~ s/^\s+|\s+$//g;
212 $seen_parent_names{$male_parent}++;
216 my @parent_list = keys %seen_parent_names;
217 my $parent_validator = CXGN
::List
::Validate
->new();
219 my @parents_missing = @
{$parent_validator->validate($schema,'accessions_or_populations_or_plots_or_plants',\
@parent_list)->{'missing'}};
221 if (scalar(@parents_missing) > 0) {
222 push @error_messages, "The following parents are not in the database, or are not in the database as accession names, plot names or plant names: ".join(',',@parents_missing);
225 my @crosses = keys %seen_cross_names;
226 my $rs = $schema->resultset("Stock::Stock")->search({
227 'is_obsolete' => { '!=' => 't' },
228 'uniquename' => { -in => \
@crosses }
230 while (my $r=$rs->next){
231 push @error_messages, "Cross unique id already exists in database: ".$r->uniquename;
234 #store any errors found in the parsed file to parse_errors accessor
235 if (scalar(@error_messages) >= 1) {
236 $errors{'error_messages'} = \
@error_messages;
237 $self->_set_parse_errors(\
%errors);
245 sub _parse_with_plugin
{
247 my $filename = $self->get_filename();
248 my $schema = $self->get_chado_schema();
250 # Match a dot, extension .xls / .xlsx
251 my ($extension) = $filename =~ /(\.[^.]+)$/;
254 if ($extension eq '.xlsx') {
255 $parser = Spreadsheet
::ParseXLSX
->new();
258 $parser = Spreadsheet
::ParseExcel
->new();
264 my %cross_additional_info;
267 $excel_obj = $parser->parse($filename);
272 $worksheet = ( $excel_obj->worksheets() )[0];
273 my ( $row_min, $row_max ) = $worksheet->row_range();
274 my ( $col_min, $col_max ) = $worksheet->col_range();
276 my $accession_stock_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'accession', 'stock_type')->cvterm_id();
277 my $plot_stock_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'plot', 'stock_type')->cvterm_id();
278 my $plant_stock_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'plant', 'stock_type')->cvterm_id();
279 my $plot_of_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'plot_of', 'stock_relationship')->cvterm_id();
280 my $plant_of_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'plant_of', 'stock_relationship')->cvterm_id();
282 for my $row ( 1 .. $row_max ) {
284 my $cross_combination;
290 if ($worksheet->get_cell($row,0)) {
291 $cross_name = $worksheet->get_cell($row,0)->value();
292 $cross_name =~ s/^\s+|\s+$//g;
294 if ($worksheet->get_cell($row,1)) {
295 $cross_combination = $worksheet->get_cell($row,1)->value();
296 $cross_combination =~ s/^\s+|\s+$//g;
298 if ($worksheet->get_cell($row,2)) {
299 $cross_type = $worksheet->get_cell($row,2)->value();
300 $cross_type =~ s/^\s+|\s+$//g;
302 if ($worksheet->get_cell($row,3)) {
303 $female_parent = $worksheet->get_cell($row,3)->value();
304 $female_parent =~ s/^\s+|\s+$//g;
307 if (!defined $cross_name && !defined $cross_type && !defined $female_parent) {
311 if ($worksheet->get_cell($row,4)) {
312 $male_parent = $worksheet->get_cell($row,4)->value();
313 $male_parent =~ s/^\s+|\s+$//g;
316 for my $column ( 5 .. $col_max ) {
317 if ($worksheet->get_cell($row,$column)) {
318 my $info_header = $worksheet->get_cell(0,$column)->value();
319 $info_header =~ s/^\s+|\s+$//g;
320 $cross_additional_info{$cross_name}{$info_header} = $worksheet->get_cell($row,$column)->value();
324 my $pedigree = Bio
::GeneticRelationships
::Pedigree
->new(name
=>$cross_name, cross_type
=>$cross_type, cross_combination
=>$cross_combination);
326 my $female_rs = $schema->resultset("Stock::Stock")->find({uniquename
=> $female_parent});
327 my $female_stock_id = $female_rs->stock_id();
328 my $female_type_id = $female_rs->type_id();
330 my $female_accession_name;
331 my $female_accession_stock_id;
332 if ($female_type_id == $plot_stock_type_id) {
333 $female_accession_stock_id = $schema->resultset("Stock::StockRelationship")->find({subject_id
=>$female_stock_id, type_id
=>$plot_of_type_id})->object_id();
334 $female_accession_name = $schema->resultset("Stock::Stock")->find({stock_id
=> $female_accession_stock_id})->uniquename();
335 my $female_plot_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $female_parent);
336 $pedigree->set_female_plot($female_plot_individual);
337 } elsif ($female_type_id == $plant_stock_type_id) {
338 $female_accession_stock_id = $schema->resultset("Stock::StockRelationship")->find({subject_id
=>$female_stock_id, type_id
=>$plant_of_type_id})->object_id();
339 $female_accession_name = $schema->resultset("Stock::Stock")->find({stock_id
=> $female_accession_stock_id})->uniquename();
340 my $female_plant_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $female_parent);
341 $pedigree->set_female_plant($female_plant_individual);
343 $female_accession_name = $female_parent;
346 my $female_parent_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $female_accession_name);
347 $pedigree->set_female_parent($female_parent_individual);
350 my $male_accession_stock_id;
351 my $male_accession_name;
352 my $male_rs = $schema->resultset("Stock::Stock")->find({uniquename
=> $male_parent});
353 my $male_stock_id = $male_rs->stock_id();
354 my $male_type_id = $male_rs->type_id();
356 if ($male_type_id == $plot_stock_type_id) {
357 $male_accession_stock_id = $schema->resultset("Stock::StockRelationship")->find({subject_id
=>$male_stock_id, type_id
=>$plot_of_type_id})->object_id();
358 $male_accession_name = $schema->resultset("Stock::Stock")->find({stock_id
=> $male_accession_stock_id})->uniquename();
359 my $male_plot_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $male_parent);
360 $pedigree->set_male_plot($male_plot_individual);
361 } elsif ($male_type_id == $plant_stock_type_id) {
362 $male_accession_stock_id = $schema->resultset("Stock::StockRelationship")->find({subject_id
=>$male_stock_id, type_id
=>$plant_of_type_id})->object_id();
363 $male_accession_name = $schema->resultset("Stock::Stock")->find({stock_id
=> $male_accession_stock_id})->uniquename();
364 my $male_plant_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $male_parent);
365 $pedigree->set_male_plant($male_plant_individual);
367 $male_accession_name = $male_parent
370 my $male_parent_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $male_accession_name);
371 $pedigree->set_male_parent($male_parent_individual);
374 push @pedigrees, $pedigree;
378 $parsed_result{'additional_info'} = \
%cross_additional_info;
380 $parsed_result{'crosses'} = \
@pedigrees;
382 $self->_set_parsed_data(\
%parsed_result);