Merge pull request #5177 from solgenomics/topic/solgs/refactor-solgs-pipeline
[sgn.git] / cgi-bin / phenome / qtl_load.pl
blob76c7c95d8e8aa414c26ed855c66e1618ab92766e
1 package CXGN::Phenome::QtlLoadDetailPage;
2 use CatalystX::GlobalContext qw( $c );
4 =head1 DESCRIPTION
5 processes and loads qtl data obtained from the the web forms
6 (qtl_form.pl) on the user specific directory and the database.
9 =head1 AUTHOR
10 Isaak Y Tecle (iyt2@cornell.edu)
12 =cut
14 use strict;
15 use warnings;
17 my $qtl_load_detail_page = CXGN::Phenome::QtlLoadDetailPage->new();
19 use File::Spec;
20 use CXGN::DB::Connection;
21 use CXGN::Phenome::Qtl;
22 use CXGN::Phenome::Qtl::Tools;
23 use CXGN::Phenome::Population;
24 use CXGN::Phenome::UserTrait;
25 use CXGN::Chado::Phenotype;
26 use CXGN::Chado::Cvterm;
27 use CXGN::Chado::Organism;
28 use CXGN::Phenome::Individual;
29 use CXGN::Accession;
30 use CXGN::Map;
31 use CXGN::Map::Version;
32 use CXGN::Map::Tools;
33 use CXGN::LinkageGroup;
34 use List::MoreUtils qw /uniq/;
35 use CXGN::Marker::Modifiable;
36 use CXGN::Marker::Tools;
37 use CXGN::Marker::Location;
38 use CXGN::Phenome::GenotypeExperiment;
39 use CXGN::Phenome::Genotype;
40 use CXGN::Phenome::GenotypeRegion;
41 use CXGN::Login;
42 use CXGN::Contact;
43 use CXGN::People::Person;
44 use CXGN::Page;
45 use Bio::Chado::Schema;
46 use Storable qw /store retrieve/;
47 use CGI;
49 use CatalystX::GlobalContext qw( $c );
51 sub new {
52 my $class = shift;
53 my $self = bless {}, $class;
55 my $dbh = CXGN::DB::Connection->new();
56 my $login = CXGN::Login->new($dbh);
57 my $sp_person_id = $login->verify_session();
59 $self->set_sp_person_id($sp_person_id);
60 $self->set_dbh($dbh);
62 if ($sp_person_id) {
63 $self->process_data();
66 return $self;
69 sub process_data {
70 my $self = shift;
71 my $page = CXGN::Page->new( "SGN", "isaak" );
72 my $dbh = $self->get_dbh();
74 my $login = CXGN::Login->new($dbh);
75 my $sp_person_id = $login->verify_session();
76 my $referring_page = '/qtl/form';
78 my %args = $page->get_all_encoded_arguments();
79 $args{pop_common_name_id} = $self->common_name_id();
81 my $type = $args{type};
82 my $pop_id = $args{pop_id};
83 my $args_ref = \%args;
85 my $qtl_obj = CXGN::Phenome::Qtl->new( $sp_person_id, $args_ref );
86 $qtl_obj->create_user_qtl_dir($c);
87 my $qtl_tools = CXGN::Phenome::Qtl::Tools->new();
89 if ($pop_id) {
90 $self->set_population_id($pop_id);
91 $qtl_obj->set_population_id($pop_id);
94 my ( $pop_name, $desc, $pop_detail, $message );
96 if ( $type eq 'begin' )
98 $self->show_pop_form();
101 elsif ( $type eq 'pop_form' )
103 $self->post_pop_form($qtl_obj, $qtl_tools);
106 elsif ( $type eq 'trait_form' )
108 $self->post_trait_form($qtl_obj, $args{'trait_file'}, $pop_id);
112 elsif ( $type eq 'pheno_form' )
114 $self->post_pheno_form($qtl_obj, $args{'pheno_file'}, $pop_id);
117 elsif ( $type eq 'geno_form' )
119 $self->post_geno_form($qtl_obj, $args{'geno_file'}, $pop_id);
122 elsif ( $type eq 'stat_form' )
124 $self->post_stat_form($args_ref);
128 sub pheno_upload {
129 my $self = shift;
130 my $qtl = shift;
131 my $p_file = shift;
133 my $safe_char = "a-zA-Z0-9_.-";
134 my ( $temp_pheno_file, $name );
136 $p_file =~ tr/ /_/;
137 $p_file =~ s/[^$safe_char]//g;
139 if ( $p_file =~ /^([$safe_char]+)$/ ) {
141 $p_file = $1;
144 else {
145 die "Phenotype file name contains invalid characters";
148 my $phe_upload = $c->req->upload('pheno_file');
150 if ( defined $phe_upload ) {
151 $name = $phe_upload->filename;
153 my ( $qtl_dir, $user_dir ) = $qtl->get_user_qtl_dir($c);
154 my $qtlfiles = retrieve("$user_dir/qtlfiles");
156 my $trait_file = $qtlfiles->{trait_file};
157 $self->compare_file_names( $name, $trait_file );
158 $qtlfiles->{pheno_file} = $name;
159 store $qtlfiles, "$user_dir/qtlfiles";
162 else {
163 die "Catalyst::Request::Upload object for phenotype file not defined.";
167 if ( $p_file eq $name ) {
168 $temp_pheno_file = $qtl->apache_upload_file( $phe_upload, $c );
169 return $temp_pheno_file;
172 else { return 0; }
176 sub geno_upload {
177 my $self = shift;
178 my $qtl = shift;
179 my $g_file = shift;
181 my ( $temp_geno_file, $name );
183 my $safe_char = "a-zA-Z0-9_.-";
185 $g_file =~ tr/ /_/;
186 $g_file =~ s/[^$safe_char]//g;
188 if ( $g_file =~ /^([$safe_char]+)$/ ) {
190 $g_file = $1;
193 else {
194 die "Genotype file name contains invalid characters";
197 my $gen_upload = $c->req->upload('geno_file');
199 if ( defined $gen_upload ) {
200 $name = $gen_upload->filename;
202 my ( $qtl_dir, $user_dir ) = $qtl->get_user_qtl_dir($c);
203 my $qtlfiles = retrieve("$user_dir/qtlfiles");
205 my $trait_file = $qtlfiles->{trait_file};
206 my $pheno_file = $qtlfiles->{pheno_file};
208 $self->compare_file_names( $name, $trait_file );
209 $self->compare_file_names( $name, $pheno_file );
211 $qtlfiles->{geno_file} = $name;
212 store $qtlfiles, "$user_dir/qtlfiles";
215 else {
216 die "Catalyst::Request::Upload object for genotype file not defined.";
220 if ( $g_file eq $name ) {
221 $temp_geno_file = $qtl->apache_upload_file( $gen_upload, $c );
222 return $temp_geno_file;
224 else { return 0; }
227 sub trait_upload {
228 my $self = shift;
229 my $qtl = shift;
230 my $c_file = shift;
232 my ( $temp_trait_file, $name );
234 print STDERR "Trait file: $c_file\n";
235 my $safe_char = "a-zA-Z0-9_.-";
237 $c_file =~ tr/ /_/;
238 $c_file =~ s/[^$safe_char]//g;
240 if ( $c_file =~ /^([$safe_char]+)$/ ) {
242 $c_file = $1;
245 else {
246 die "Trait file name contains invalid characters";
249 my $trait_upload = $c->req->upload('trait_file');
251 if ( defined $trait_upload )
253 $name = $trait_upload->filename;
254 my ( $qtl_dir, $user_dir ) = $qtl->get_user_qtl_dir($c);
255 my $qtlfiles = {};
256 $qtlfiles->{trait_file} = $name;
257 store( $qtlfiles, "$user_dir/qtlfiles" );
260 else
262 die "Catalyst::Request::Upload object for trait file not defined.";
266 if ( $c_file eq $name )
268 $temp_trait_file = $qtl->apache_upload_file( $trait_upload, $c );
269 return $temp_trait_file;
271 else { return 0; }
274 sub load_pop_details {
275 my $self = shift;
276 my $pop_args = shift;
277 my %pop_details = %{$pop_args};
279 my $org = $pop_details{organism};
280 my $name = $pop_details{pop_name};
281 my $desc = $pop_details{pop_desc};
282 my $cross_id = $pop_details{pop_type};
283 my $female = $pop_details{pop_female_parent};
284 my $male = $pop_details{pop_male_parent};
285 my $recurrent = $pop_details{pop_recurrent_parent};
286 my $donor = $pop_details{pop_donor_parent};
287 my $comment = $pop_details{pop_comment};
288 my $is_public = $pop_details{pop_is_public};
289 my $common_name_id = $pop_details{pop_common_name_id};
291 my $dbh = $self->get_dbh();
292 my $login = CXGN::Login->new($dbh);
293 my $sp_person_id = $login->verify_session();
295 my ( $female_id, $male_id, $recurrent_id, $donor_id );
297 $name =~ s/^\s+|\s+$//g;
298 my $population = CXGN::Phenome::Population->new_with_name( $dbh, $name );
299 my $population_id = $population->get_population_id();
300 if ($population_id) {
301 $self->population_exists( $population, $name );
304 print STDERR "storing parental accessions...\n";
306 if ($female) {
307 $female_id = $self->store_accession($female);
308 print STDERR "female: $female_id\n";
311 if ($male) {
312 $male_id = $self->store_accession($male);
313 print STDERR "male: $male_id\n";
316 if ($recurrent) {
317 $recurrent_id = $self->store_accession($recurrent);
320 if ($donor) {
321 $donor_id = $self->store_accession($donor);
324 my $pop = CXGN::Phenome::Population->new($dbh);
325 $pop->set_name($name);
326 $pop->set_description($desc);
327 $pop->set_sp_person_id($sp_person_id);
328 $pop->set_cross_type_id($cross_id);
329 $pop->set_female_parent_id($female_id);
330 $pop->set_male_parent_id($male_id);
331 $pop->set_recurrent_parent_id($recurrent_id);
332 $pop->set_donor_parent_id($donor_id);
333 $pop->set_comment($comment);
334 $pop->set_web_uploaded('t');
335 $pop->set_common_name_id($common_name_id);
336 $pop->store();
338 my $pop_id = $dbh->last_insert_id( "population", "phenome" );
340 $pop = CXGN::Phenome::Population->new( $dbh, $pop_id );
341 $pop->store_data_privacy($is_public);
343 return $pop_id, $name, $desc;
346 sub store_accession {
347 my $self = shift;
348 my $accession = shift;
349 my $dbh = $self->get_dbh();
351 print STDERR "organism_id: $accession\n";
352 my ( $species, $cultivar ) = split( /cv|var|cv\.|var\./, $accession );
353 $species =~ s/^\s+|\s+$//g;
354 $cultivar =~ s/\.//;
355 $cultivar =~ s/^\s+|\s+$//g;
356 $species = ucfirst($species);
358 print STDERR "$accession: species:$species, cultivar:$cultivar\n";
359 my $schema = $c->dbic_schema('Bio::Chado::Schema', 'sgn_chado');
361 my $organism = CXGN::Chado::Organism->new_with_species( $schema, $species );
362 $self->check_organism( $organism, $species, $cultivar );
364 my $existing_organism_id = $organism->get_organism_id();
365 my $organism_name = $organism->get_species();
367 eval {
368 my $sth = $dbh->prepare(
369 "SELECT accession_id, chado_organism_id, common_name
370 FROM sgn.accession
371 WHERE common_name ILIKE ?"
373 $sth->execute($cultivar);
374 my ( $accession_id, $chado_organism_id, $common_name ) =
375 $sth->fetchrow_array();
376 print STDERR
377 "select existing accession: $accession_id, $chado_organism_id, $common_name\n";
379 if ($accession_id) {
380 unless ($chado_organism_id) {
381 $sth = $dbh->prepare(
382 "UPDATE sgn.accession
383 SET chado_organism_id = ?
384 WHERE accession_id = $accession_id"
386 $sth->execute($existing_organism_id);
389 elsif ( !$accession_id ) {
391 $sth = $dbh->prepare(
392 "INSERT INTO sgn.accession
393 (common_name, chado_organism_id)
394 VALUES (?,?)"
396 $sth->execute( $cultivar, $existing_organism_id );
397 $accession_id = $dbh->last_insert_id( "accession", "sgn" );
399 #my $accession = CXGN::Accession->new($dbh, $accession_id);
400 #$common_name = $accession->accession_common_name();
401 print STDERR
402 "inserted: $accession_id, $chado_organism_id, $common_name\n";
405 my ( $accession_names_id, $accession_name );
407 unless ( !$common_name ) {
408 $sth = $dbh->prepare(
409 "SELECT accession_name_id, accession_name
410 FROM sgn.accession_names
411 WHERE accession_name ILIKE ?"
413 $sth->execute($common_name);
415 ( $accession_names_id, $accession_name ) = $sth->fetchrow_array();
416 print STDERR
417 "selected existing accession_names: $accession_names_id, $accession_name\n";
419 unless ($accession_names_id) {
420 $sth = $dbh->prepare(
421 "INSERT INTO sgn.accession_names
422 (accession_name, accession_id)
423 VALUES (?, ?)"
425 $sth->execute( $common_name, $accession_id );
427 $accession_names_id =
428 $dbh->last_insert_id( "accession_names", "sgn" );
429 print STDERR
430 "inserted accession_names : $common_name, $accession_id\n";
434 unless ( !$accession_names_id ) {
435 $sth = $dbh->prepare(
436 "UPDATE sgn.accession
437 SET accession_name_id = ?
438 WHERE accession_id = ?"
440 $sth->execute( $accession_names_id, $accession_id );
441 print STDERR "updated accession: with $accession_names_id\n";
444 if (@_) {
445 print STDERR "@_\n";
446 $dbh->rollback();
447 return 0;
449 else {
450 $dbh->commit();
451 return $accession_id;
457 =head2 store_traits
459 Usage: my ($true_or_false) = $self->store_traits($file);
460 Desc: reads traits, their definition, and unit from
461 user submitted tab-delimited traits file and stores traits
462 that does not exist in the db or exist but with different units
463 Ret: true or false
464 Args: tab delimited trait file, with full path
465 Side Effects: accesses database
466 Example:
468 =cut
470 sub store_traits {
471 my $self = shift;
472 my $file = shift;
473 my $pop_id = $self->get_population_id();
474 my $sp_person_id = $self->get_sp_person_id();
475 my $dbh = $self->get_dbh();
477 open( F, "<$file" ) || die "Can't open file $file.";
479 my $header = <F>;
480 chomp($header);
481 my @fields = split /\t/, $header;
482 @fields = map { lc( $_ ) } @fields;
484 my ( $trait, $trait_id, $trait_name, $unit, $unit_id );
486 if ( $fields[0] !~ /trait|name/
487 || $fields[1] !~ /definition/
488 || $fields[2] !~ /unit/
491 my $error =
492 "Data columns in the traits file need to be in the order of:
493 <b>traits -> definition -> unit</b>. <br/>
494 Now they are in the order of <b><i>$fields[0] -> $fields[1]
495 -> $fields[2]</i></b>.\n";
497 $self->trait_columns($error);
499 else {
501 eval {
502 while (<F>)
504 chomp;
505 my (@values) = split /\t/;
506 print STDERR "\n store traits: $values[0] -- $values[1] ..\n";
507 $trait =
508 CXGN::Phenome::UserTrait->new_with_name( $dbh, $values[0] );
510 if ( !$trait ) {
511 $trait = CXGN::Phenome::UserTrait->new($dbh);
513 $trait->set_cv_id(17);#16 for cassavabase
514 $trait->set_name( $values[0] );
515 $trait->set_definition( $values[1] );
516 $trait->set_sp_person_id($sp_person_id);
517 $trait_id = $trait->store();
519 $trait = CXGN::Phenome::UserTrait->new( $dbh, $trait_id );
520 $trait_id = $trait->get_user_trait_id();
522 unless ( !$values[2] ) {
523 $unit_id = $trait->get_unit_id( $values[2] );
524 if ( !$unit_id ) {
525 $unit_id = $trait->insert_unit( $values[2] );
529 if ( ($trait_id) && ($pop_id) && ($unit_id) ) {
530 $trait->insert_user_trait_unit( $trait_id, $unit_id,
531 $pop_id );
536 else {
538 unless ( !$values[2] ) {
539 $trait_id = $trait->get_user_trait_id();
540 $unit_id = $trait->get_unit_id( $values[2] );
541 if ( !$unit_id ) {
542 $unit_id = $trait->insert_unit( $values[2] );
545 if ( ($trait_id) && ($pop_id) && ($unit_id) ) {
546 $trait->insert_user_trait_unit( $trait_id, $unit_id,
547 $pop_id );
557 if ($@) {
558 print STDERR "An error occurred storing traits: $@\n";
559 $dbh->rollback();
560 return 0;
562 else {
563 print STDERR "Committing...traits\n";
564 return 1;
569 =head2 store_individual
571 Usage: $individual_id = $self->store_individual($dbh, $name, $pop_id, $sp_person_id);
572 Desc: stores individual genotypes is they don't
573 exist in the same pop in the db
574 Ret: individual id
575 Args: db handle, individual name, population id sp_person_id
576 Side Effects: accesses database
577 Example:
579 =cut
581 sub store_individual {
582 my $self = shift;
583 my $ind_name = shift;
584 my $pop_id = $self->get_population_id();
585 my $sp_person_id = $self->get_sp_person_id();
586 my $dbh = $self->get_dbh();
587 my $common_name_id = $self->common_name_id();
589 my ( $individual, $individual_id, $individual_name );
590 my @individuals =
591 CXGN::Phenome::Individual->new_with_name( $dbh, $ind_name, $pop_id );
593 eval {
594 if ( scalar(@individuals) == 0 )
596 $individual = CXGN::Phenome::Individual->new($dbh);
597 $individual->set_name($ind_name);
598 $individual->set_population_id($pop_id);
599 $individual->set_sp_person_id($sp_person_id);
600 $individual->set_common_name_id($common_name_id);
601 $individual_id = $individual->store();
603 $individual_name = $individual->get_name();
606 elsif ( scalar(@individuals) == 1 ) {
608 print STDERR "There is a genotype with name $ind_name
609 in the same population ($pop_id). \n";
610 die "There might be a phenotype data for the same trait
611 for the same genotype $ind_name. I can't store
612 duplicate phenotype data. So I am quitting..\n";
614 elsif ( scalar(@individuals) > 0 ) {
615 die "There are two genotypes with the same name ($ind_name)
616 in the population: $pop_id.\n";
619 if ($@) {
620 $dbh->rollback();
621 print STDERR "An error occurred storing individuals: $@\n";
622 return 0;
625 else {
626 $dbh->commit();
627 print STDERR "STORED individual $individual_name.\n";
628 return $individual;
632 =head2 store_trait_values
634 Usage: my ($true_or_false) = &store_trait_values($dbh, $file, $pop_id, $sp_person_id);
635 Desc: stores phenotype values for traits evaluated for individuals of a population.
636 Ret: true or false
637 Args: db handle, tab delimited phenotype file with full path, population id, sp_person_id
638 Side Effects: accesses database
639 Example:
641 =cut
643 sub store_trait_values {
644 my $self = shift;
645 my $file = shift;
646 my $pop_id = $self->get_population_id();
647 my $sp_person_id = $self->get_sp_person_id();
648 my $dbh = $c->dbc->dbh;
649 open( F, "<$file" ) || die "Can't open file $file.";
651 my $header = <F>;
652 chomp($header);
653 my @fields = split /\t/, $header;
654 print STDERR "\n store phenotype values pop id-- $pop_id : header: $header .. \n";
655 my @trait = ();
656 my ( $trait_name, $trait_id );
658 for ( my $i = 1 ; $i < @fields ; $i++ )
660 my $field_name = $fields[$i];
661 print STDERR "\n store phenotype values: field $i: ..$fields[$i].. ..$field_name.. \n";
662 $field_name =~ s/^\s+|\s+$//g;
663 print STDERR "\n store phenotype values: field $i: ..$fields[$i].. ..$field_name.. \n";
665 $trait[$i] = CXGN::Phenome::UserTrait->new_with_name($dbh, $field_name);
666 print STDERR "\n store phenotype values: get_name --$fields[$i]-- \n";
668 $trait_name = $trait[$i]->get_name();
669 $trait_id = $trait[$i]->get_user_trait_id();
670 print STDERR "\n store phenotype values: GOT trait_name -- $trait_name -- trait id -- $trait_id .. \n";
672 eval {
673 while (<F>)
675 chomp;
676 my (@values) = split /\t/;
677 $values[0] =~ s/^\s+|\s+$//g;
678 print STDERR "\n store individual: $values[0]\n";
679 my $individual = $self->store_individual( $values[0] );
681 die "The genotype does not exist in the database.
682 Therefore, it can not store the associated
683 phenotype data\n"
684 unless ($individual);
686 my $individual_id = $individual->get_individual_id();
687 my $individual_name = $individual->get_name();
689 for ( my $i = 1 ; $i < @values ; $i++ ) {
690 my $phenotype = CXGN::Chado::Phenotype->new($dbh);
691 $phenotype->set_unique_name(
692 qq | $individual_name $pop_id .":". $i |);
694 $phenotype->set_observable_id( $trait[$i]->get_user_trait_id() );
696 if ($values[$i] != 0 && !$values[$i]) {$values[$i] = undef;}
697 if ($values[$i] && $values[$i] =~ /NA|-|^\s+$|^\.+$/ig)
699 $values[$i] = undef;
702 $values[$i] =~ s/^\s+|\s+$//g;
703 my $tr_name = $trait[$i]->get_name();
704 print STDERR "\nstore phenotype values: $individual_name -- $tr_name -- count: $i -- $values[$i] \n";
705 $phenotype->set_value($values[$i]);
706 $phenotype->set_individual_id($individual_id);
707 $phenotype->set_sp_person_id($sp_person_id);
708 my $phenotype_id = $phenotype->store();
710 $trait[$i]->insert_phenotype_user_trait_ids(
711 $trait[$i]->get_user_trait_id(),
712 $phenotype_id );
718 if ($@) {
719 $dbh->rollback();
720 print STDERR "An error occurred storing trait values: $@\n";
721 return 0;
724 else {
725 print STDERR "Committing...trait values to tables public.phenotype
726 and user_trait_id and phenotype_id to phenotype_user_trait\n";
727 return 1;
733 =head2 store_map
735 Usage:
736 Desc:
737 Ret:
738 Args:
739 Side Effects:
740 Example:
742 =cut
744 sub store_map {
745 my $self = shift;
746 my $file = shift;
748 my $dbh = $self->get_dbh();
749 my $pop_id = $self->get_population_id();
751 my $pop = CXGN::Phenome::Population->new( $dbh, $pop_id );
752 my $pop_name = $pop->get_name();
753 my $parent_m = $pop->get_male_parent_id();
754 my $parent_f = $pop->get_female_parent_id();
755 my $desc = $pop->get_description();
757 my $acc = CXGN::Accession->new( $dbh, $parent_f );
758 my $female_name = $acc->accession_common_name();
759 my $chado_org_id_f = $acc->chado_organism_id();
761 $acc = CXGN::Accession->new( $dbh, $parent_m );
762 my $male_name = $acc->accession_common_name();
763 my $chado_org_id_m = $acc->chado_organism_id();
765 my $existing_map_id = CXGN::Map::Tools::population_map( $dbh, $pop_id );
767 my ( $map, $map_id, $map_version_id );
768 if ($existing_map_id) {
769 $map_version_id =
770 CXGN::Map::Version->map_version( $dbh, $existing_map_id );
771 $map = CXGN::Map->new( $dbh, { map_id => $existing_map_id } );
774 else {
775 $map = CXGN::Map->new_map( $dbh, $pop_name );
776 $map_version_id = $map->{map_version_id};
778 $map_id = $map->{map_id};
780 my $species_m = $self->species($chado_org_id_m);
781 my $species_f = $self->species($chado_org_id_f);
783 my $long_name =
784 $species_f . ' cv. '
785 . $female_name . ' x '
786 . $species_m . ' cv. '
787 . $male_name;
788 print STDERR "map long name: $long_name\n";
789 $map->{long_name} = $long_name;
790 $map->{map_type} = 'genetic';
791 $map->{parent_1} = $parent_f;
792 $map->{parent_2} = $parent_m;
793 $map->{abstract} = $desc;
794 $map->{population_id} = $pop_id;
795 $map_id = $map->store();
797 my $lg_result;
798 if ($map_version_id) {
799 $lg_result = $self->store_lg( $map_version_id, $file );
801 if ($lg_result) {
802 print STDERR " STORED LINKAGE GROUPS\n";
804 else {
805 print STDERR "FAILED STORING LINKAGE GROUPS\n";
809 if ( $map_id && $map_version_id && $lg_result ) {
810 return $map_id, $map_version_id;
812 else {
813 print STDERR "Either map or map_version or
814 linkage_groups storing did not work\n";
815 return 0;
820 =head2 species
822 Usage: my $species = $self->species($org_id)
823 Desc: when given the chado.organism_id, it returns the
824 genus and species name (in abbreviated format)
825 Ret: abbreviated species name
826 Args: chado organism id
827 Side Effects: access db
828 Example:
830 =cut
832 sub species {
833 my $self = shift;
834 my $org_id = shift;
835 my $dbh = $self->get_dbh();
837 my $schema = $c->dbic_schema('Bio::Chado::Schema', 'sgn_chado');
839 my $org = CXGN::Chado::Organism->new( $schema, $org_id );
841 return my $species = $org->get_abbreviation();
845 =head2 store_lg
847 Usage:
848 Desc:
849 Ret:
850 Args:
851 Side Effects:
852 Example:
854 =cut
856 sub store_lg {
857 my $self = shift;
858 my ( $map_version_id, $file ) = @_;
859 my $dbh = $self->get_dbh();
861 open F, "<$file" or die "can't open $file\n";
862 my $markers = <F>;
863 my $chr = <F>;
864 chomp($chr);
865 close F;
866 print STDERR "\n chr: $chr \n";
867 my @all_chrs = split "\t", $chr;
868 my $num = scalar(@all_chrs);
869 print STDERR "\n all chr num: $num\n";
871 @all_chrs = uniq @all_chrs;
872 $num = scalar(@all_chrs);
873 print STDERR "\n unique chr numbers: $num\n";
874 foreach my $ch (@all_chrs) { print STDERR "\n chr -- $ch \n ";}
875 my @chrs = grep {$_ =~ /\d+/} @all_chrs;
876 @chrs = uniq(@chrs);
877 $num = scalar(@chrs);
878 print STDERR "\n clean chr numbers: $num\n";
879 my @cleaned_chrs;
881 foreach my $ch (@chrs) { print STDERR "\n cleaned chr -- $ch \n "; $ch =~ s/\s+//g; push @cleaned_chrs, $ch;}
882 @chrs = @cleaned_chrs;
883 @chrs = uniq(@chrs);
884 $num = scalar(@chrs);
885 print STDERR "\n clean chr numbers: $num\n";
886 foreach my $ch (@chrs) { print STDERR "\n final cleaned chr -- $ch \n ";}
895 # die "The first cell of 2nd row must be empty." unless !$chrs[0];
896 # shift(@chrs);
898 my $lg = CXGN::LinkageGroup->new( $dbh, $map_version_id, \@chrs );
899 my $result = $lg->store();
901 if ($result) {
902 print STDERR "Succeeded storing linkage groups
903 on map_version_id $map_version_id\n";
905 else {
906 print STDERR "Failed storing linkage groups
907 on map_version_id $map_version_id\n";
912 =head2 store_markers
914 Usage:
915 Desc:
916 Ret:
917 Args:
918 Side Effects:
919 Example:
921 =cut
923 sub store_marker_and_position {
924 my $self = shift;
925 my ( $file, $map_version_id ) = @_;
926 my $dbh = $self->get_dbh();
927 open F, "<$file" or die "can't open $file\n";
929 my $markers = <F>;
930 my $chrs = <F>;
931 my $positions = <F>;
932 chomp( $markers, $chrs, $positions );
933 close F;
935 my @markers = split /\t/, $markers;
936 shift(@markers);
938 my @positions = split /\t/, $positions;
939 shift(@positions);
941 my @chromosomes = split /\t/, $chrs;
942 shift(@chromosomes);
944 eval {
945 for ( my $i = 0 ; $i < @markers ; $i++ )
947 print STDERR "\nstore marker and position: $markers[$i] -- $positions[$i] \n";
948 $markers[$i] =~ s/^\s+|\s+$//g;
950 my ( $marker_name, $subs ) =
951 CXGN::Marker::Tools::clean_marker_name( $markers[$i] );
953 my @marker_ids =
954 CXGN::Marker::Tools::marker_name_to_ids( $dbh, $marker_name );
955 if ( @marker_ids > 1 ) {
956 die "Too many IDs found for marker '$marker_name'";
958 my ($marker_id) = @marker_ids;
960 my $marker_obj;
961 if ($marker_id) {
962 $marker_obj = CXGN::Marker::Modifiable->new( $dbh, $marker_id );
964 else {
965 $marker_obj = CXGN::Marker::Modifiable->new($dbh);
966 $marker_obj->set_marker_name($marker_name);
967 my $inserts = $marker_obj->store_new_data();
969 if ( $inserts and @{$inserts} ) {
971 else {
972 die "Oops, I thought I was inserting some new data";
974 $marker_id = $marker_obj->marker_id();
977 $positions[$i] =~ s/^\s+|\s+$//g;
978 $chromosomes[$i] =~ s/^\s+|\s+$//g;
979 print STDERR "\nstore marker and position: $markers[$i] --$chromosomes[$i] -- $positions[$i] \n";
981 my $loc = $marker_obj->new_location();
982 my $pos = $positions[$i];
983 my $conf = 'uncalculated';
984 my $protocol = 'unknown';
985 $loc->marker_id($marker_id);
987 $loc->map_version_id($map_version_id);
988 $loc->lg_name( $chromosomes[$i] );
989 $loc->position($pos);
990 $loc->confidence($conf);
991 $loc->subscript($subs);
993 $marker_obj->add_experiment(
994 { location => $loc, protocol => $protocol } );
995 my $inserts = $marker_obj->store_new_data();
997 if ( $inserts and @{$inserts} ) {
1001 else {
1002 die "Oops, I thought I was inserting some new data";
1007 if ($@) {
1008 print STDERR $@;
1009 print STDERR
1010 "Failed loading markers and their positions; rolling back.\n";
1011 $dbh->rollback();
1012 return 0;
1014 else {
1015 print STDERR "Succeeded. loading markers and their position\n";
1017 $dbh->commit();
1018 return 1;
1023 =head2 store_genotype
1025 Usage:
1026 Desc:
1027 Ret:
1028 Args:
1029 Side Effects:
1030 Example:
1032 =cut
1034 sub store_genotype {
1035 my $self = shift;
1036 my ( $file, $map_version_id ) = @_;
1037 my $dbh = $self->get_dbh();
1038 my $pop_id = $self->get_population_id();
1040 open F, "<$file" or die "can't open $file\n";
1042 my $markers = <F>;
1043 my $chrs = <F>;
1044 my $positions = <F>;
1045 chomp( $markers, $chrs, $positions );
1047 my @markers = split /\t/, $markers;
1048 shift(@markers);
1050 my @chrs = split /\t/, $chrs;
1051 shift(@chrs);
1053 my $pop = CXGN::Phenome::Population->new( $dbh, $pop_id );
1054 my $pop_name = $pop->get_name();
1055 my $sp_person_id = $pop->get_sp_person_id();
1057 my $map = CXGN::Map->new( $dbh, { map_version_id => $map_version_id } );
1058 my $map_id = $map->get_map_id();
1060 my $linkage = CXGN::LinkageGroup->new( $dbh, $map_version_id );
1062 unless ($map_id) {
1063 die "I need a valid reference map before I can
1064 start loading the genotype data\n";
1067 eval {
1069 my $experiment = CXGN::Phenome::GenotypeExperiment->new($dbh);
1070 $experiment->set_background_accession_id(100);
1071 $experiment->set_experiment_name($pop_name);
1072 $experiment->set_reference_map_id($map_id);
1073 $experiment->set_sp_person_id($sp_person_id);
1074 $experiment->set_preferred(1);
1075 my $experiment_id = $experiment->store();
1077 while ( my $row = <F> ) {
1078 chomp($row);
1079 my @plant_genotype = split /\t/, $row;
1080 my $plant_name = shift(@plant_genotype);
1081 $plant_name =~ s/^\s+|\s+$//g;
1082 print STDERR "\n storing genotype... individual: $plant_name\n";
1084 my @individual = CXGN::Phenome::Individual->new_with_name( $dbh, $plant_name, $pop_id );
1086 die "There are two genotypes with the same name or no genotypes
1087 in the same population. Can't assign genotype values."
1088 unless ( scalar(@individual) == 1 );
1090 if ( $individual[0] ) {
1092 my $genotype = CXGN::Phenome::Genotype->new($dbh);
1094 $genotype->set_genotype_experiment_id($experiment_id);
1096 my $individual_id = $individual[0]->get_individual_id();
1097 $genotype->set_individual_id($individual_id);
1099 #$genotype->set_experiment_name($pop_name);
1100 #$genotype->set_reference_map_id($map_id);
1101 #$genotype->set_sp_person_id($sp_person_id);
1102 my $genotype_id = $genotype->store();
1104 my $mapmaker_genotype;
1105 for ( my $i = 0 ; $i < @plant_genotype ; $i++ ) {
1107 my $genotype_region = CXGN::Phenome::GenotypeRegion->new($dbh);
1109 $markers[$i] =~ s/^\s+|\s+$//g;
1110 print STDERR "\n marker name: $markers[$i]\n";
1111 $markers[$i] = CXGN::Marker::Tools::clean_marker_name( $markers[$i] );
1112 print STDERR "\n clean marker name: $markers[$i]\n";
1113 my $marker_id;
1115 my $marker = CXGN::Marker->new_with_name( $dbh, $markers[$i] );
1116 if ($marker) {
1117 $marker_id = $marker->marker_id();
1118 } else {
1119 my @marker_ids = CXGN::Marker::Tools::marker_name_to_ids( $dbh, $markers[$i] );
1121 # if ( @marker_ids > 1 ) {
1122 # die "Too many IDs found for marker '$markers[$i]'";
1123 # }
1125 $marker_id = $marker_ids[0];
1128 print STDERR "\n marker id: $markers[$i] -- $marker_id\n";
1129 $chrs[$i] =~ s/^\s+|\s+$//g;
1130 my $c = $chrs[$i];
1131 my $lg_id = $linkage->get_lg_id( $chrs[$i] );
1133 $plant_genotype[$i] =~ s/^\s+|\s+$//g;
1134 print STDERR "\n $markers[$i] -- $marker_id -- $chrs[$i] -- $plant_genotype[$i]\n";
1135 if ( !$plant_genotype[$i]
1136 || ( $plant_genotype[$i] =~ /\-/ ) )
1138 next();
1141 $genotype_region->set_genotype_id($genotype_id);
1142 $genotype_region->set_marker_id_nn( $marker_id );
1143 $genotype_region->set_marker_id_ns( $marker_id );
1144 $genotype_region->set_marker_id_sn( $marker_id );
1145 $genotype_region->set_marker_id_ss( $marker_id );
1146 $genotype_region->set_lg_id($lg_id);
1147 $genotype_region->set_sp_person_id($sp_person_id);
1149 if ( $i == 0 ) {
1150 if ( $plant_genotype[$i] =~ /\d/ ) {
1151 $mapmaker_genotype = 1;
1153 elsif ( $plant_genotype[$i] =~ /\D/ ) {
1154 $mapmaker_genotype = undef;
1158 if ($mapmaker_genotype) {
1159 $genotype_region->set_mapmaker_zygocity_code(
1160 $plant_genotype[$i] );
1162 else {
1163 $genotype_region->set_zygocity_code(
1164 $plant_genotype[$i] );
1166 $genotype_region->set_type("map");
1167 $genotype_region->store();
1171 else {
1173 "There is mismatch between the list of genotypes/lines ($plant_genotype[0]
1174 in your phenotype and genotype datasets\n";
1180 if ($@) {
1181 $dbh->rollback();
1182 print STDERR "An error occurred loading genotype data:
1183 $@. ROLLED BACK CHANGES.\n";
1184 return undef;
1186 else {
1187 print STDERR "All is fine. Committing...genotype data\n";
1188 $dbh->commit();
1189 return 1;
1194 =head2 accessors get_sp_person_id, set_sp_person_id
1196 Usage:
1197 Desc:
1198 Property
1199 Side Effects:
1200 Example:
1202 =cut
1204 sub get_sp_person_id {
1205 my $self = shift;
1206 return $self->{sp_person_id};
1209 sub set_sp_person_id {
1210 my $self = shift;
1211 $self->{sp_person_id} = shift;
1214 =head2 accessors get_population_id, set_population_id
1216 Usage:
1217 Desc:
1218 Property
1219 Side Effects:
1220 Example:
1222 =cut
1224 sub get_population_id {
1225 my $self = shift;
1226 return $self->{population_id};
1229 sub set_population_id {
1230 my $self = shift;
1231 $self->{population_id} = shift;
1234 =head2 accessors get_dbh, set_dbh
1236 Usage:
1237 Desc:
1238 Property
1239 Side Effects:
1240 Example:
1242 =cut
1244 sub get_dbh {
1245 my $self = shift;
1246 return $self->{dbh};
1249 sub set_dbh {
1250 my $self = shift;
1251 $self->{dbh} = shift;
1254 =head2 common_name_id
1256 Usage:
1257 Desc:
1258 Ret:
1259 Args:
1260 Side Effects:
1261 Example:
1263 =cut
1265 sub common_name_id {
1266 my $self = shift;
1267 my $sp_person_id = $self->get_sp_person_id();
1268 my $qtl = CXGN::Phenome::Qtl->new($sp_person_id);
1269 my ( $qtl_dir, $user_qtl_dir ) = $qtl->get_user_qtl_dir($c);
1271 my $id;
1272 if ( -e "$user_qtl_dir/organism.txt" ) {
1273 open C, "<$user_qtl_dir/organism.txt" or die "Can't open file: !$\n";
1275 my $row = <C>;
1276 if ( $row =~ /(\d)/ ) {
1277 $id = $1;
1280 close C;
1282 return $id;
1284 else {
1285 return 0;
1290 =head2 error_page
1292 Usage: $self->error_page(@errors);
1293 Desc: when feed with error messages, it generates an
1294 error page with a list of the messages and a link
1295 back to the previous page where required data field(s)
1296 was/were not properly filled.
1297 Ret: page with the appropriate message
1298 Args: a list of messages to print...
1299 Side Effects:
1300 Example:
1302 =cut
1304 sub error_page {
1305 my $self = shift;
1306 my @error = @_;
1308 $c->forward_to_mason_view('/qtl/qtl_load/missing_data.mas',
1309 missing_data => \@error,
1310 guide => $self->guideline(),
1314 =head2 check_organism
1316 Usage: $self->check_organism($organism);
1317 Desc: checks if organism object is defined and if the
1318 the parental species is supported by sgn (chado.organism).
1319 in case organism object is not defined (a query to the chado.organism
1320 using a species name does not return a value, it generates
1321 a page with advise to check for the spelling of the scientific species name.
1322 Ret: a page with advice
1323 Args: organism object, species name, cultivar name
1324 Side Effects:
1325 Example:
1327 =cut
1329 sub check_organism {
1330 my ($self, $organism, $species, $cultivar) = @_ ;
1332 unless ( !$cultivar ) {
1333 $cultivar = " cv. $cultivar";
1336 if ( !$organism )
1338 $c->forward_to_mason_view('/qtl/qtl_load/check_organism.mas',
1339 species => $species,
1340 cultivar => $cultivar,
1341 guide => $self->guideline(),
1343 } else
1345 #do nothing..relax
1350 =head2 population_exists
1352 Usage: $self->population_exists($population, $population_name);
1353 Desc: checks if there is already a population with the same name
1354 and if so it generates a page with the appropriate advice to the user
1356 Ret: a page with advice
1357 Args: population object, population name
1358 Side Effects:
1359 Example:
1361 =cut
1363 sub population_exists {
1364 my ($self, $pop, $name) = @_;
1366 if ($pop) {
1368 $c->forward_to_mason_view('/qtl/qtl_load/population_exists.mas',
1369 name => $name,
1370 guide => $self->guideline()
1377 sub guideline {
1378 my $self = shift;
1379 return qq |<a href="/qtl/submission/guide">Guidelines</a> |;
1382 =head2 trait_columns
1384 Usage: $self->trait_columns($error);
1385 Desc: checks if the trait file has the right order
1386 of data columns and if not advises the submitter
1387 with the appropriate message
1389 Ret: a page with advice
1390 Args: text message
1391 Side Effects:
1392 Example:
1394 =cut
1396 sub trait_columns {
1397 my ($self, $trait_error) = @_;
1399 if ($trait_error) {
1400 $c->forward_to_mason_view('/qtl/qtl_load/trait_columns.mas',
1401 error => $trait_error,
1402 guide => $self->guideline()
1408 =head2 accessors compare_file_names
1410 Usage: $f = $self->compare_file_names($file1, $file2);
1411 Desc: useful for checking if data files submitted for the traits,
1412 phenotype and genotype are different. helpful to avoid indvertent
1413 uploading of the same file for different fields.
1415 Ret: a page with advice if the same files are uploaded
1416 Args: file names to compare
1417 Side Effects:
1418 Example:
1420 =cut
1422 sub compare_file_names {
1423 my ($self, $file1, $file2) = @_;
1425 unless ( $file1 ne $file2 ) {
1426 $c->forward_to_mason_view('/qtl/qtl_load/compare_file_names.mas',
1427 file1 => $file1,
1428 file2 => $file2,
1429 guide => $self->guideline()
1434 =head2 send_email
1436 Usage: $self->send_email($subj, $message, $pop_id);
1437 Desc: sends email at each step of the qtl data upload
1438 process..
1439 Ret: nothing
1440 Args: subject, message, population_id
1441 Side Effects:
1442 Example:
1444 =cut
1446 sub send_email {
1447 my $self = shift;
1448 my ( $subj, $message, $pop_id ) = @_;
1449 my $dbh = $self->get_dbh();
1450 my $sp_person_id = $self->get_sp_person_id();
1451 my $person = CXGN::People::Person->new( $dbh, $sp_person_id );
1453 my $user_profile =
1454 qq |http://solgenomics.net/solpeople/personal-info.pl?sp_person_id=$sp_person_id |;
1456 my $username = $person->get_first_name() . " " . $person->get_last_name();
1457 $message .=
1458 qq |\nQTL population id: $pop_id \nQTL data owner: $username ($user_profile) |;
1460 print STDERR "\n$subj\n$message\n";
1461 CXGN::Contact::send_email( $subj, $message,
1462 'sgn-db-curation@sgn.cornell.edu' );
1467 sub post_stat_form {
1468 my ($self, $args_ref) = @_;
1470 my $sp_person_id = $self->get_sp_person_id();
1471 my $qtl_obj = CXGN::Phenome::Qtl->new($sp_person_id, $args_ref );
1472 my $qtl_tools = CXGN::Phenome::Qtl::Tools->new();
1473 my $stat_param = $qtl_obj->user_stat_parameters();
1474 my @missing = $qtl_tools->check_stat_fields($stat_param);
1475 my $pop_id = $args_ref->{pop_id};
1477 my ($stat_file, $type);
1478 if (@missing) {
1479 $self->error_page(@missing);
1481 else {
1482 $stat_file = $qtl_obj->user_stat_file( $c, $pop_id );
1483 $type = 'confirm';
1486 if ($type eq 'confirm') {
1487 my $referer = $c->req->base . "qtl/form/stat_form/$pop_id";
1488 if (-e $stat_file && $c->req->referer eq $referer)
1490 my $qtlpage = $c->req->base . "qtl/population/$pop_id";
1491 my $message = qq | QTL statistical parameters set: Step 5 of 5.
1492 QTL data upload for<a href="$qtlpage">population
1493 $pop_id</a> is completed. |;
1495 $self->send_email( '[QTL upload: Step 5]', $message, $pop_id );
1496 $self->redirect_to_next_form("/qtl/form/confirm/$pop_id");
1499 else
1501 $c->res->redirect($c->req->referer);
1502 $c->detach();
1504 } else
1506 $c->res->redirect($c->req->referer);
1507 $c->detach();
1511 sub show_pop_form {
1512 my ( $self ) = @_;
1513 $self->send_email( '[QTL upload: Step 1]', 'A user is at the QTL data upload Step 1 of 5', 'NA' );
1514 $self->redirect_to_next_form("/qtl/form/pop_form");
1517 sub post_pop_form {
1518 my ($self, $qtl_obj, $qtl_tools) = @_;
1520 my $pop_detail = $qtl_obj->user_pop_details();
1521 my @error = $qtl_tools->check_pop_fields($pop_detail);
1523 my ( $pop_id, $pop_name, $desc );
1524 if (@error)
1526 $self->error_page(@error);
1528 else
1530 ( $pop_id, $pop_name, $desc ) =
1531 $self->load_pop_details($pop_detail);
1534 unless ( !$pop_id )
1536 $self->send_email( '[QTL upload: Step 1]', 'QTL population data uploaded: Step 1 of 5 completed', $pop_id );
1537 $self->redirect_to_next_form("/qtl/form/trait_form/$pop_id");
1541 sub post_trait_form {
1542 my ($self, $qtl_obj, $trait_file, $pop_id) = @_;
1544 if (!$trait_file)
1546 $self->error_page("Trait file");
1549 my $uploaded_file = $self->trait_upload($qtl_obj, $trait_file);
1551 my $traits_in_db;
1552 if ($uploaded_file)
1554 $traits_in_db = $self->store_traits($uploaded_file);
1557 if ($pop_id && $traits_in_db)
1559 $self->send_email('[QTL upload: Step 2]', 'QTL traits uploaded: Step 2 of 5', $pop_id);
1560 $self->redirect_to_next_form("/qtl/form/pheno_form/$pop_id");
1567 sub post_pheno_form {
1568 my ($self, $qtl_obj, $pheno_file, $pop_id) = @_;
1570 if (!$pheno_file)
1572 $self->error_page('Phenotype dataset file');
1575 my $uploaded_file = $self->pheno_upload($qtl_obj, $pheno_file);
1577 my $phenotype_in_db;
1578 if ($uploaded_file)
1580 $phenotype_in_db = $self->store_trait_values($uploaded_file);
1583 if ($phenotype_in_db && $pop_id)
1585 $self->send_email('[QTL upload: Step 3]', 'QTL phenotype data uploaded: Step 3 of 5', $pop_id);
1586 $self->redirect_to_next_form("/qtl/form/geno_form/$pop_id");
1591 sub post_geno_form {
1592 my ($self, $qtl_obj, $geno_file, $pop_id) = @_;
1594 if (!$geno_file)
1596 $self->error_page('Genotype dataset file');
1599 my ($map_id, $map_version_id);
1600 my $uploaded_file = $self->geno_upload( $qtl_obj, $geno_file);
1602 if ($uploaded_file)
1604 ( $map_id, $map_version_id ) = $self->store_map($uploaded_file);
1606 else
1608 $self->error_page('Genotype dataset file');
1611 my $genotype_uploaded;
1613 if ($map_version_id)
1615 my $result = $self->store_marker_and_position($uploaded_file, $map_version_id);
1617 unless ($result)
1619 $c->throw_404("Couldn't store markers and position.");
1622 my $genotype_uploaded = $self->store_genotype($uploaded_file, $map_version_id);
1624 if ($genotype_uploaded)
1626 $self->send_email( '[QTL upload: Step 4]', 'QTL genotype data uploaded : Step 4 of 5', $pop_id );
1627 $self->redirect_to_next_form("/qtl/form/stat_form/$pop_id");
1629 else
1631 $c->throw_404("failed storing genotype data.");
1636 sub redirect_to_next_form {
1637 my ($self, $next_form) = @_;
1638 $c->res->redirect("$next_form");
1639 $c->detach();