1 package CXGN
::Pedigree
::ParseUpload
::Plugin
::CrossesExcelFormat
;
4 use Spreadsheet
::ParseExcel
;
5 use CXGN
::Stock
::StockLookup
;
6 use SGN
::Model
::Cvterm
;
8 use CXGN
::List
::Validate
;
10 sub _validate_with_plugin
{
12 my $filename = $self->get_filename();
13 my $schema = $self->get_chado_schema();
14 my $cross_properties = $self->get_cross_properties();
17 my %supported_cross_types;
18 my $parser = Spreadsheet
::ParseExcel
->new();
22 #currently supported cross types
23 $supported_cross_types{'biparental'} = 1; #both parents required
24 $supported_cross_types{'self'} = 1; #only female parent required
25 $supported_cross_types{'open'} = 1; #only female parent required
26 $supported_cross_types{'bulk'} = 1; #both parents required
27 $supported_cross_types{'bulk_self'} = 1; #only female parent required
28 $supported_cross_types{'bulk_open'} = 1; #only female parent required
29 $supported_cross_types{'doubled_haploid'} = 1; #only female parent required
31 #try to open the excel file and report any errors
32 $excel_obj = $parser->parse($filename);
34 push @error_messages, $parser->error();
35 $errors{'error_messages'} = \
@error_messages;
36 $self->_set_parse_errors(\
%errors);
40 $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
42 push @error_messages, "Spreadsheet must be on 1st tab in Excel (.xls) file";
43 $errors{'error_messages'} = \
@error_messages;
44 $self->_set_parse_errors(\
%errors);
47 my ( $row_min, $row_max ) = $worksheet->row_range();
48 my ( $col_min, $col_max ) = $worksheet->col_range();
49 if (($col_max - $col_min) < 3 || ($row_max - $row_min) < 1 ) { #must have header and at least one row of crosses
50 push @error_messages, "Spreadsheet is missing header or contains no row";
51 $errors{'error_messages'} = \
@error_messages;
52 $self->_set_parse_errors(\
%errors);
57 my $cross_name_header;
58 my $cross_type_header;
59 my $female_parent_header;
60 my $male_parent_header;
61 my $female_plot_plant_header;
62 my $male_plot_plant_header;
64 if ($worksheet->get_cell(0,0)) {
65 $cross_name_header = $worksheet->get_cell(0,0)->value();
67 if ($worksheet->get_cell(0,1)) {
68 $cross_type_header = $worksheet->get_cell(0,1)->value();
70 if ($worksheet->get_cell(0,2)) {
71 $female_parent_header = $worksheet->get_cell(0,2)->value();
73 if ($worksheet->get_cell(0,3)) {
74 $male_parent_header = $worksheet->get_cell(0,3)->value();
76 if ($worksheet->get_cell(0,4)) {
77 $female_plot_plant_header = $worksheet->get_cell(0,4)->value();
79 if ($worksheet->get_cell(0,5)) {
80 $male_plot_plant_header = $worksheet->get_cell(0,5)->value();
83 if (!$cross_name_header || $cross_name_header ne 'cross_name' ) {
84 push @error_messages, "Cell A1: cross_name is missing from the header";
86 if (!$cross_type_header || $cross_type_header ne 'cross_type') {
87 push @error_messages, "Cell B1: cross_type is missing from the header";
89 if (!$female_parent_header || $female_parent_header ne 'female_parent') {
90 push @error_messages, "Cell C1: female_parent is missing from the header";
92 if (!$male_parent_header || $male_parent_header ne 'male_parent') {
93 push @error_messages, "Cell D1: male_parent is missing from the header";
95 if (!$female_plot_plant_header || (($female_plot_plant_header ne 'female_plot') && ($female_plot_plant_header ne 'female_plant'))) {
96 push @error_messages, "Cell E1: female_plot or female_plant is missing from the header";
98 if (!$male_plot_plant_header || (($male_plot_plant_header ne 'male_plot') && ($male_plot_plant_header ne 'male_plant'))) {
99 push @error_messages, "Cell F1: male_plot or male_plant is missing from the header";
102 my %valid_properties;
103 my @properties = @
{$cross_properties};
104 foreach my $property(@properties){
105 $valid_properties{$property} = 1;
108 for my $column ( 6 .. $col_max ) {
109 my $header_string = $worksheet->get_cell(0,$column)->value();
111 if (!$valid_properties{$header_string}){
112 push @error_messages, "Invalid info type: $header_string";
116 my %seen_cross_names;
117 my %seen_accession_names;
118 my %seen_plot_plant_names;
120 for my $row ( 1 .. $row_max ) {
121 my $row_name = $row+1;
126 my $female_plot_plant_name;
127 my $male_plot_plant_name;
129 if ($worksheet->get_cell($row,0)) {
130 $cross_name = $worksheet->get_cell($row,0)->value();
133 if ($worksheet->get_cell($row,1)) {
134 $cross_type = $worksheet->get_cell($row,1)->value();
137 if ($worksheet->get_cell($row,2)) {
138 $female_parent = $worksheet->get_cell($row,2)->value();
141 #skip blank lines or lines with no name, type and parent
142 if (!$cross_name && !$cross_type && !$female_parent) {
146 if ($worksheet->get_cell($row,3)) {
147 $male_parent = $worksheet->get_cell($row,3)->value();
150 if ($worksheet->get_cell($row,4)) {
151 $female_plot_plant_name = $worksheet->get_cell($row,4)->value();
154 if ($worksheet->get_cell($row,5)) {
155 $male_plot_plant_name = $worksheet->get_cell($row,5)->value();
158 for my $column ( 6 .. $col_max ) {
159 if ($worksheet->get_cell($row,$column)) {
160 my $info_value = $worksheet->get_cell($row,$column)->value();
161 my $info_type = $worksheet->get_cell(0,$column)->value();
162 if ( ($info_type =~ m/days/ || $info_type =~ m/number/) && !($info_value =~ /^\d+?$/) ) {
163 push @error_messages, "Cell $info_type:$row_name: is not a positive integer: $info_value";
165 elsif ( $info_type =~ m/date/ && !($info_value =~ m/(\d{4})\/(\d
{2})\
/(\d{2})/) ) {
166 push @error_messages, "Cell $info_type:$row_name: is not a valid date: $info_value. Dates need to be of form YYYY/MM/DD";
171 #cross name must not be blank
172 if (!$cross_name || $cross_name eq '') {
173 push @error_messages, "Cell A$row_name: cross name missing";
175 $cross_name =~ s/^\s+|\s+$//g; #trim whitespace from front and end.
177 # } elsif ($cross_name =~ /\s/ || $cross_name =~ /\// || $cross_name =~ /\\/ ) {
178 # push @error_messages, "Cell A$row_name: cross_name must not contain spaces or slashes.";
179 if ($seen_cross_names{$cross_name}) {
180 push @error_messages, "Cell A$row_name: duplicate cross name: $cross_name";
183 #cross type must not be blank
184 if (!$cross_type || $cross_type eq '') {
185 push @error_messages, "Cell B$row_name: cross type missing";
186 } elsif (!$supported_cross_types{$cross_type}){
187 push @error_messages, "Cell B$row_name: cross type not supported: $cross_type";
190 #female parent must not be blank
191 if (!$female_parent || $female_parent eq '') {
192 push @error_messages, "Cell C$row_name: female parent missing";
195 #male parent must not be blank if type is biparental or bulk
196 if (!$male_parent || $male_parent eq '') {
197 if ($cross_type eq ( 'biparental' || 'bulk' )) {
198 push @error_messages, "Cell D$row_name: male parent required for biparental and bulk crosses";
203 $seen_cross_names{$cross_name}++;
207 $seen_accession_names{$female_parent}++;
211 $seen_accession_names{$male_parent}++;
214 if ($female_plot_plant_name){
215 $seen_plot_plant_names{$female_plot_plant_name}++;
218 if ($male_plot_plant_name){
219 $seen_plot_plant_names{$male_plot_plant_name}++;
223 my @accessions = keys %seen_accession_names;
224 my $accession_validator = CXGN
::List
::Validate
->new();
225 my @accessions_missing = @
{$accession_validator->validate($schema,'accessions',\
@accessions)->{'missing'}};
227 my $population_validator = CXGN
::List
::Validate
->new();
228 my @parents_missing = @
{$population_validator->validate($schema,'populations',\
@accessions_missing)->{'missing'}};
230 if (scalar(@parents_missing) > 0) {
231 push @error_messages, "The following accessions or populations are not in the database as uniquenames or synonyms: ".join(',',@parents_missing);
232 $errors{'missing_accessions'} = \
@parents_missing;
235 if (($female_plot_plant_header eq 'female_plot') && ($male_plot_plant_header eq 'male_plot')) {
236 my @plots = keys %seen_plot_plant_names;
237 my $plot_validator = CXGN
::List
::Validate
->new();
238 my @plots_missing = @
{$plot_validator->validate($schema,'plots',\
@plots)->{'missing'}};
240 if (scalar(@plots_missing) > 0) {
241 push @error_messages, "The following plots are not in the database as uniquenames or synonyms: ".join(',',@plots_missing);
242 $errors{'missing_plots'} = \
@plots_missing;
244 } elsif (($female_plot_plant_header eq 'female_plant') && ($male_plot_plant_header eq 'male_plant')) {
245 my @plants = keys %seen_plot_plant_names;
246 my $plant_validator = CXGN
::List
::Validate
->new();
247 my @plants_missing = @
{$plant_validator->validate($schema,'plants',\
@plants)->{'missing'}};
249 if (scalar(@plants_missing) > 0) {
250 push @error_messages, "The following plants are not in the database as uniquenames or synonyms: ".join(',',@plants_missing);
251 $errors{'missing_plants'} = \
@plants_missing;
255 my @crosses = keys %seen_cross_names;
256 my $rs = $schema->resultset("Stock::Stock")->search({
257 'is_obsolete' => { '!=' => 't' },
258 'uniquename' => { -in => \
@crosses }
260 while (my $r=$rs->next){
261 push @error_messages, "Cross name already exists in database: ".$r->uniquename;
264 #store any errors found in the parsed file to parse_errors accessor
265 if (scalar(@error_messages) >= 1) {
266 $errors{'error_messages'} = \
@error_messages;
267 $self->_set_parse_errors(\
%errors);
271 return 1; #returns true if validation is passed
275 sub _parse_with_plugin
{
277 my $filename = $self->get_filename();
278 my $schema = $self->get_chado_schema();
279 my $parser = Spreadsheet
::ParseExcel
->new();
283 my %additional_properties;
284 my %properties_columns;
287 $excel_obj = $parser->parse($filename);
292 $worksheet = ( $excel_obj->worksheets() )[0];
293 my ( $row_min, $row_max ) = $worksheet->row_range();
294 my ( $col_min, $col_max ) = $worksheet->col_range();
296 my $female_plot_plant_header = $worksheet->get_cell(0,4)->value();
297 my $male_plot_plant_header = $worksheet->get_cell(0,5)->value();
299 for my $column ( 6 .. $col_max ) {
300 my $header_string = $worksheet->get_cell(0,$column)->value();
302 $properties_columns{$column} = $header_string;
303 $additional_properties{$header_string} = ();
306 for my $row ( 1 .. $row_max ) {
317 if ($worksheet->get_cell($row,0)) {
318 $cross_name = $worksheet->get_cell($row,0)->value();
319 $cross_name =~ s/^\s+|\s+$//g; #trim whitespace from front and end...
322 if ($worksheet->get_cell($row,1)) {
323 $cross_type = $worksheet->get_cell($row,1)->value();
326 if ($worksheet->get_cell($row,2)) {
327 $female_parent = $worksheet->get_cell($row,2)->value();
330 #skip blank lines or lines with no name, type and parent
331 if (!$cross_name && !$cross_type && !$female_parent) {
334 if ($worksheet->get_cell($row,3)) {
335 $male_parent = $worksheet->get_cell($row,3)->value();
338 if ($worksheet->get_cell($row,4)) {
339 if ($female_plot_plant_header eq 'female_plot') {
340 $female_plot = $worksheet->get_cell($row,4)->value();
341 } elsif ($female_plot_plant_header eq 'female_plant') {
342 $female_plant = $worksheet->get_cell($row,4)->value();
346 if ($worksheet->get_cell($row,5)) {
347 if ($male_plot_plant_header eq 'male_plot') {
348 $male_plot = $worksheet->get_cell($row,5)->value();
349 } elsif ($male_plot_plant_header eq 'male_plant') {
350 $male_plant = $worksheet->get_cell($row,5)->value();
354 for my $column ( 6 .. $col_max ) {
355 if ($worksheet->get_cell($row,$column)) {
356 my $column_property = $properties_columns{$column};
357 $additional_properties{$column_property}{$cross_name} = $worksheet->get_cell($row,$column)->value();
358 if ($row == $row_max) {
359 my $info_type = $worksheet->get_cell(0,$column)->value();
360 $parsed_result{$info_type} = $additional_properties{$column_property};
365 my $pedigree = Bio
::GeneticRelationships
::Pedigree
->new(name
=>$cross_name, cross_type
=>$cross_type);
366 if ($female_parent) {
367 my $female_parent_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $female_parent);
368 $pedigree->set_female_parent($female_parent_individual);
371 my $male_parent_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $male_parent);
372 $pedigree->set_male_parent($male_parent_individual);
375 my $female_plot_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $female_plot);
376 $pedigree->set_female_plot($female_plot_individual);
379 my $male_plot_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $male_plot);
380 $pedigree->set_male_plot($male_plot_individual);
383 my $female_plant_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $female_plant);
384 $pedigree->set_female_plant($female_plant_individual);
387 my $male_plant_individual = Bio
::GeneticRelationships
::Individual
->new(name
=> $male_plant);
388 $pedigree->set_male_plant($male_plant_individual);
391 push @pedigrees, $pedigree;
395 $parsed_result{'crosses'} = \
@pedigrees;
397 $self->_set_parsed_data(\
%parsed_result);
404 #sub _get_accession {
406 # my $accession_name = shift;
407 # my $chado_schema = $self->get_chado_schema();
408 # my $stock_lookup = CXGN::Stock::StockLookup->new(schema => $chado_schema);
410 # my $accession_cvterm = SGN::Model::Cvterm->get_cvterm_row($chado_schema, 'accession', 'stock_type');
412 # $stock_lookup->set_stock_name($accession_name);
413 # $stock = $stock_lookup->get_stock_exact();
419 # if ($stock->type_id() != $accession_cvterm->cvterm_id()) {