2 package CXGN
::Stock
::StockTemplate
;
7 CXGN::Stock::StockTemplate - an object to handle SGN stock data uploaded from a spreadsheet template
11 my $sbt = CXGN::Stock::Stocktemplate->new({ schema => $schema} );
19 Naama Menda (nm249@cornell.edu)
26 use Bio
::Chado
::Schema
;
29 use File
::Basename qw
| basename dirname
|;
30 use Spreadsheet
::ParseExcel
;
31 use SGN
::Model
::Cvterm
;
36 isa
=> 'DBIx::Class::Schema',
39 has
'metadata_schema' => (
41 isa
=> 'DBIx::Class::Schema',
44 has
'phenome_schema' => (
46 isa
=> 'DBIx::Class::Schema',
49 has
'parsed_data' => (
53 has
'parsed_header' => (
57 has
'parse_errors' => (
61 has
'verify_errors' => (
69 has
'store_error' => (
73 has
'store_message' => (
82 has
'tmp_filename' => (
94 ################################
97 my ($file , $metadata, $identifier_prefix, $db_name ) = @_;
98 my $hashref; #hashref of hashrefs for storing the uploaded data , to be used for checking the fields
102 #A. Spreadsheet identifier: B. $spreadsheet_unique_id
103 #A. trial name B. $name
104 #A. $trial_description
105 #A. plants per plot: B. $plants_per_plot
106 #A. Operator: B. $operator (user input)
107 #A. Date: B. $date (user input)
108 #G... $trait_name [units]
109 #1.. A. $row_number B. $clone_name C. $block D. $plot_id E. $rep F. $number_of_surviving_plants G. $trait_ontology_id
111 my $parser = Spreadsheet
::ParseExcel
->new();
112 my $workbook = $parser->parse($file); # $file is an Excel file
113 if ( !defined $workbook ) {
114 push @errors, $parser->error();
115 $self->parse_errors(\
@errors);
119 my $worksheet = ( $workbook->worksheets() )[0]; #support only one worksheet
120 my ( $row_min, $row_max ) = $worksheet->row_range();
121 my ( $col_min, $col_max ) = $worksheet->col_range();
122 print STDERR
"$row_min $row_max $col_min $col_max \n";
123 if ($col_max < 2 || $row_max < 8 ) {#must have header and at least one row of phenotypes
124 push @errors, "Spreadsheet is missing header";
125 $self->parse_errors(\
@errors);
128 #hash for column headers
130 #metadata is stored in rows 1..6
131 my $spreadsheet_id = $worksheet->get_cell(0,1);
132 my $trial_name = $worksheet->get_cell(1,1);
133 my $trial_desc = $worksheet->get_cell(2,0);
134 my $plants_per_plot = $worksheet->get_cell(3,1);
135 my $operator = $worksheet->get_cell(4,1);
136 my $date = $worksheet->get_cell(5,1);
137 if (!$spreadsheet_id) {
138 push @errors, "Spreadsheet ID is missing from the header";
141 push @errors, "Trial name is missing from the header";
143 #add function to check that the trial name exists?
145 push @errors, "The name of the operator is missing from the header";
147 #add function to check if the operator name exists in the database and matches the logged in user?
149 push @errors, "The date is missing from the header";
151 #add a check to make sure that the header data is valid?
153 $self->parse_errors(\
@errors);
156 $header_hash{'operator'} = $operator;
157 $header_hash{'trial_name'} = $trial_name;
159 # Row #7 can be skipped, as it contains trait names just for human readability
160 # for my $row ( 8 .. $row_max ) {
161 for my $col ( $col_min .. $col_max ) {
163 $cell = $worksheet->get_cell( 7, $col );
165 my $value = $cell->value() ;
166 my $unformatted_value = $cell->unformatted() ;
167 ## read the header row, make a hash of names to column numbers, then in the data rows, get_cell($row, $col_for_header{'SomeHeader'})
168 $headers{$value} = $col;
169 print STDERR
"cell value: $value\n";
171 for my $row ( 8 .. $row_max ) { # phenotypes should be from row 9 and on
172 # got the row number, now look at the column headers
173 my $plot_stock_id = $worksheet->get_cell ($row , $headers{'PLOT'} )->value();
174 my $replicate = $worksheet->get_cell ($row , $headers{'REP'})->value();
175 my $clone_name = $worksheet->get_cell ($row , $headers{'DESIG'})->value();
176 my $block = $worksheet->get_cell ($row , $headers{'BLOCK'})->value();
177 my $planted_plants = $worksheet->get_cell ($row , $headers{'NOPLT'})->value();
178 my $surv_plants = $worksheet->get_cell ($row , $headers{'NOSV'})->value;
179 foreach my $header (keys %headers) {
180 print STDERR
"header loop header: $header\n";
181 if ($header =~ m/^CO_\d\{1-4\}:\d{7}/ ) {
182 print STDERR
"has header: $header row: $row col: ".$headers{$header}."\n";
183 my $value = $worksheet->get_cell( $row , $headers{$header} )->value();
184 print STDERR
"has value: $value\n";
189 my ($full_accession, $full_unit ) = split(/\|/, $header); # do we support units in a spreadsheet upload?
190 my ($value_type, $unit_value) = split(/\:/, $full_unit);
191 my ($db_name, $accession) = split (/\:/ , $full_accession);
192 #print STDERR "db_name = '$db_name' sp_accession = '$sp_accession'\n";
193 next() if (!$accession);
195 #skip non-numeric values
196 if ($value !~ /^\d/) {
197 if ($value eq "\." ) { next; }
198 print STDERR
"measurement: $value \n";
199 #push @errors, "** Found non-numeric value in column $header (value = '" . $value ."') Row = $row.";
202 #####################
203 $hashref->{join("\t", $spreadsheet_id, $operator, $date, $row) }->{$plot_stock_id}->{$full_accession}->{replicate
} = $replicate;
204 $hashref->{join("\t", $spreadsheet_id, $operator, $date, $row) }->{$plot_stock_id}->{$full_accession}->{block
} = $block;
205 $hashref->{join("\t", $spreadsheet_id, $operator, $date, $row) }->{$plot_stock_id}->{$full_accession}->{planted_plants
} = $planted_plants;
206 $hashref->{join("\t", $spreadsheet_id, $operator, $date, $row) }->{$plot_stock_id}->{$full_accession}->{surviving_plants
} = $surv_plants;
207 $hashref->{join("\t", $spreadsheet_id, $operator, $date, $row) }->{$plot_stock_id}->{$full_accession}->{value
} = $value;
208 print STDERR
"Accession: $full_accession Replicate: $replicate Block: $block\n";
212 ##unique# PLOT REP DESIG BLOCK NOPLT NOSV CO_334:0000010 CO_334:0000099|scale:cassavabase CO_334:0000018|unit:cm CO_334:0000039|date:1MAP
214 #################################
215 #my ($op_name, $project_id, $location_id, $stock_id, $cvterm_accession, $value, $date, $count);
216 $self->parsed_data($hashref);
217 $self->parsed_header(\
%header_hash);
218 if (scalar(@errors) >= 1) {
219 $self->parse_errors(\
@errors);
220 print STDERR
"More than one parse error\n";
226 my $schema = $self->schema;
227 #check is stock exists and if cvterm exists.
228 #print error only if stocks do not exist and the cvterms
229 my $hashref = $self->parsed_data;
233 foreach my $key (keys %$hashref) {
234 ##$hashref->{join("\t", $spreadsheet_id, $operator, $date, $row) }->{$plot_stock_id}->{$cvterm_accession}->{replicate} = $replicate;
235 my ($spreadsheet_id, $operator, $date, $row) = split(/\t/, $key);
236 print STDERR
"***** verify found key $key !!!!!!!\n\n";
237 print STDERR
"verify : ... . . . .spreadsheet = $spreadsheet_id, operator = $operator, date = $date, row = $row \n\n";
239 foreach my $plot_stock_id (keys %{$hashref->{$key} } ) {
240 #check if the stock exists
241 print STDERR
"verify .. Looking for stock_id ".$plot_stock_id."\n";
243 $stock = $schema->resultset("Stock::Stock")->find( { stock_id
=> $plot_stock_id } );
244 if (!$stock) { push @errors, "Stock $plot_stock_id does not exist in the database!"; }
245 foreach my $cvterm_accession (keys %{$hashref->{$key}->{$plot_stock_id} } ) {
246 print STDERR
"verify ... Looking for accession $cvterm_accession..\n";
247 my ($db_name, $accession) = split (/:/, $cvterm_accession);
248 if (!$db_name) { push @errors, "could not find valid db_name in accession $cvterm_accession";}
249 if (!$accession) { push @errors, "Could not find valid cvterm accession in $cvterm_accession";}
250 #check if the cvterm exists
251 my $db = $schema->resultset("General::Db")->search(
252 { 'me.name' => $db_name } );
254 my $dbxref = $db->search_related('dbxrefs', { accession
=> $accession } );
256 if ($dbxref->count) {
257 my $cvterm = $dbxref->search_related("cvterm", {} )->single;
258 if (!$cvterm) { push @errors, "NO cvterm found in the database for accession $cvterm_accession!\n db_name = '" . $db_name . "' , accession = '" . $accession . "'";
261 push @errors, "No dbxref found for cvterm accession $accession";
264 push @errors , "db_name $db_name does not exist in the database!";
269 $self->warnings(\
@warnings);
270 foreach my $err (@errors) {
271 print STDERR
" *!*!*!error = $err\n";
273 if (scalar(@errors) >= 1) {
274 $self->verify_errors(\
@errors);
280 my $schema = $self->schema;
281 my $metadata_schema = $self->metadata_schema;
282 my $phenome_schema = $self->phenome_schema;
283 my $hashref = $self->parsed_data;
284 my $filename = $self->filename();
285 my $user_id = $self->user_id();
290 open(my $F, "<", $self->tmp_filename()) || die "Can't open file ".$self->filename();
292 my $md5 = Digest
::MD5
->new();
296 my $md_row = $metadata_schema->resultset("MdMetadata")->create({
297 create_person_id
=> $user_id,
301 my $file_row = $metadata_schema->resultset("MdFiles")->create({
302 basename
=> basename
($self->filename()),
303 dirname
=> dirname
($self->filename()),
304 filetype
=> 'phenotype spreadsheet upload xls',
305 md5checksum
=> $md5->hexdigest(),
306 metadata_id
=> $md_row->metadata_id(),
311 print STDERR
"\nFile id: ".$file_row->file_id()."\n";
322 # find the cvterm for a phenotyping experiment
323 my $pheno_cvterm = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'phenotyping_experiment', 'experiment_type');
325 print STDERR
" ***store: phenotyping experiment cvterm = " . $pheno_cvterm->cvterm_id . "\n";
327 #This has to be stored in the database when adding a new project for these plots
328 my $field_layout_experiment = 'field_layout'; #############################
330 foreach my $key (keys %$hashref) {
331 my ($spreadsheet_id, $operator, $date, $row) = split(/\t/, $key);
332 foreach my $plot_stock_id (keys %{$hashref->{$key} } ) {
333 print STDERR
" *** store: loading information for stock $plot_stock_id \n";
334 foreach my $cvterm_accession (keys %{$hashref->{$key}->{$plot_stock_id} } ) {
335 print STDERR
" ** store: cvterm_accession = $cvterm_accession\n";
336 my $replicate = $hashref->{$key}->{$plot_stock_id}->{$cvterm_accession}->{replicate
};
337 my $block = $hashref->{$key}->{$plot_stock_id}->{$cvterm_accession}->{block
};
338 my $planted_plants = $hashref->{$key}->{$plot_stock_id}->{$cvterm_accession}->{planted_plants
};
339 my $surviving_plants = $hashref->{$key}->{$plot_stock_id}->{$cvterm_accession}->{surviving_plants
};
340 my $value = $hashref->{$key}->{$plot_stock_id}->{$cvterm_accession}->{value
};
342 print STDERR
" ** store: value = $value\n";
343 my ($db_name, $accession) = split (/:/, $cvterm_accession);
344 my $db = $schema->resultset("General::Db")->search(
346 'me.name' => $db_name, } );
347 print STDERR
" ** store: found db $db_name , accession = $accession \n";
349 my $dbxref = $db->search_related("dbxrefs", { accession
=> $accession, });
351 my $cvterm = $dbxref->search_related("cvterm")->single;
352 #now get the value and store the whole thing in the database!
353 my $stock = $self->schema->resultset("Stock::Stock")->find( { stock_id
=> $plot_stock_id});
354 my $stock_name = $stock->name;
356 my $field_exp = $stock->search_related('nd_experiment_stocks')->search_related('nd_experiment')->find({'type.name' => $field_layout_experiment },{ join => 'type' });
357 my $location_id = $field_exp->nd_geolocation_id;
358 my $project = $field_exp->nd_experiment_projects->single ; #there should be one project linked with the field experiment
359 my $project_id = $project->project_id;
361 ###store a new nd_experiment. One phenotyping experiment per upload
362 #find if a phenotyping experiment exists for this location
363 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->find(
365 nd_geolocation_id
=> $location_id,
366 type_id
=> $pheno_cvterm->cvterm_id(),
368 my ($op_prop, $date_prop);
369 ## Find if the experiment has the date and person of this upload, if yes, use the existing one, if no, create a new nd_experiment
371 $op_prop = $experiment->search_related(
372 'nd_experimentprops' , {
373 'type.name' => 'operator',
379 $date_prop = $experiment->search_related(
380 'nd_experimentprops' , {
381 'type.name' => 'date',
388 # Create a new experiment, if one does not exist
389 # or ff operator and date are not linked with the existing experiment
390 if ( !($op_prop && $date_prop) || !$experiment ) {
391 $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->find(
393 nd_geolocation_id
=> $location_id,
394 type_id
=> $pheno_cvterm->cvterm_id(),
396 print STDERR
" ** store: created new experiment " . $experiment->nd_experiment_id . "\n";
397 $experiment->create_nd_experimentprops(
401 autocreate
=> 1 , cv_name
=> 'local' }
403 $experiment->create_nd_experimentprops(
405 operator
=> $operator } ,
407 autocreate
=> 1 , cv_name
=> 'local' }
411 #link the experiment to the project
412 $experiment->find_or_create_related('nd_experiment_projects', {
413 project_id
=> $project_id
415 print STDERR
" ** store: linking experiment " . $experiment->nd_experiment_id . " with project $project_id \n";
416 #link the experiment to the stock
417 $experiment->find_or_create_related('nd_experiment_stocks' , {
418 stock_id
=> $plot_stock_id,
419 type_id
=> $pheno_cvterm->cvterm_id,
421 print STDERR
" ** store: linking experiment " . $experiment->nd_experiment_id . " to stock $plot_stock_id \n";
422 my $uniquename = "Stock: " . $plot_stock_id . ", trait: " . $cvterm->name . " date: $date" . " operator = $operator" ;
423 my $phenotype = $cvterm->find_or_create_related(
424 "phenotype_cvalues", {
425 observable_id
=> $cvterm->cvterm_id,
427 uniquename
=> $uniquename,
429 print STDERR
" ** store: added phenotype value $value , observable = " . $cvterm->name ." uniquename = $uniquename \n";
430 #link the phenotpe to the experiment
431 $experiment->find_or_create_related('nd_experiment_phenotypes' , {
432 phenotype_id
=> $phenotype->phenotype_id });
433 $message .= "Added phenotype: trait= " . $cvterm->name . ", value = $value, to stock " . qq|<a href
="/stock/$plot_stock_id/view">$stock_name</a
><br
/>| ;
435 #link the file to the experiment
436 #$experiment->find_or_create_related('nd_experiment_md_files',{file_id => $file_row->file_id(),});
437 my $experiment_files = $phenome_schema->resultset("NdExperimentMdFiles")->create({
438 nd_experiment_id
=> $experiment->nd_experiment_id(),
439 file_id
=> $file_row->file_id(),
441 $experiment_files->insert();
452 print "STDERR coderef: $coderef\n";
453 print "STDERR schema: $schema\n";
455 $schema->txn_do($coderef);
458 $error = "An error occured! Cannot store data! <br />" . $_ . "\n";
462 $self->store_error($error);
469 $self->store_message($message);