added a comment section..
[sgn.git] / cgi-bin / phenome / qtl_load.pl
blob86d438d6d24e2db6963391476c64ab004dee6860
1 =head1 DESCRIPTION
2 processes and loads qtl data obtained from the the web forms
3 (qtl_form.pl) on the user specific directory and the database.
6 =head1 AUTHOR
7 Isaak Y Tecle (iyt2@cornell.edu)
9 =cut
12 use strict;
14 my $qtl_load_detail_page = CXGN::Phenome::QtlLoadDetailPage->new();
16 package CXGN::Phenome::QtlLoadDetailPage;
21 use File::Spec;
22 use SGN::Context;
23 use CXGN::Page;
24 use CXGN::Page::FormattingHelpers qw /info_section_html
25 page_title_html
26 columnar_table_html
27 html_optional_show
28 info_table_html
29 tooltipped_text
30 html_alternate_show
32 use CXGN::DB::Connection;
33 use CXGN::Phenome::Qtl;
34 use CXGN::Phenome::Qtl::Tools;
35 use CXGN::Phenome::Population;
36 use CXGN::Phenome::UserTrait;
37 use CXGN::Chado::Phenotype;
38 use CXGN::Chado::Cvterm;
39 use CXGN::Chado::Organism;
40 use CXGN::Phenome::Individual;
41 use CXGN::Accession;
42 use CXGN::Map;
43 use CXGN::Map::Version;
44 use CXGN::Map::Tools;
45 use CXGN::LinkageGroup;
46 use List::MoreUtils qw /uniq/;
47 use CXGN::Marker::Modifiable;
48 use CXGN::Marker::Tools;
49 use CXGN::Marker::Location;
50 use CXGN::Phenome::GenotypeExperiment;
51 use CXGN::Phenome::Genotype;
52 use CXGN::Phenome::GenotypeRegion;
53 use CXGN::Login;
54 use CXGN::Contact;
55 use CXGN::People::Person;
56 use CXGN::Page;
57 use Bio::Chado::Schema;
58 use Storable qw /store retrieve/;
61 sub new {
62 my $class = shift;
63 my $self = bless {}, $class;
65 my $dbh = CXGN::DB::Connection->new();
66 my $login = CXGN::Login->new($dbh);
67 my $sp_person_id = $login->verify_session();
69 $self->set_sp_person_id($sp_person_id);
70 $self->set_dbh($dbh);
72 if ($sp_person_id) {
73 $self->process_data();
76 return $self;
81 sub process_data {
82 my $self=shift;
83 my $page = CXGN::Page->new("SGN", "isaak");
84 my $dbh = $self->get_dbh();
86 my $login = CXGN::Login->new($dbh);
87 my $sp_person_id = $login->verify_session();
89 my $referring_page = "/phenome/qtl_form.pl";
92 my %args = $page->get_all_encoded_arguments();
93 $args{pop_common_name_id} = $self->common_name_id();
95 my $type = $args{type};
96 my $pop_id = $args{pop_id};
97 my $args_ref = \%args;
99 my $qtl_obj = CXGN::Phenome::Qtl->new($sp_person_id, $args_ref);
100 my $c = SGN::Context->new();
101 $qtl_obj->create_user_qtl_dir($c);
102 my $qtl_tools = CXGN::Phenome::Qtl::Tools->new();
103 my $page = CXGN::Page->new("SGN", "Isaak");
105 if ($pop_id) {
106 $self->set_population_id($pop_id);
107 $qtl_obj->set_population_id($pop_id);
110 my ($pop_name, $desc, $pop_detail, $message);
112 if ($type eq 'begin') {
113 $type = 'pop_form';
114 $message = 'A user is at the QTL data upload Step 0 of 5';
115 $self->send_email('[QTL upload: Step 0]', $message, 'NA');
116 $page->client_redirect("$referring_page?type=$type");
119 elsif ($type eq 'pop_form') {
120 $pop_detail = $qtl_obj->user_pop_details();
121 my @error = $qtl_tools->check_pop_fields($pop_detail);
123 if (@error) {
124 $self->error_page(@error);
125 } else {
126 ($pop_id, $pop_name, $desc) = $self->load_pop_details($pop_detail);
128 unless (!$pop_id) {
129 $message = 'QTL population data uploaded: Step 1 of 5';
130 $self->send_email('[QTL upload: Step 1]', $message, $pop_id);
131 $type = 'trait_form';
132 $page->client_redirect("$referring_page?pop_id=$pop_id&type=$type");
136 elsif ($type eq 'trait_form') {
138 my ($trait_file, $trait_to_db);
139 if ($args{'trait_file'}) {
140 $trait_file = $self->trait_upload($qtl_obj, $args{'trait_file'});
141 $trait_to_db = $self->store_traits($trait_file);
142 } else {
143 $self->error_page('Traits file');
145 unless (!-e $trait_file || !$trait_to_db) {
146 $message = 'QTL traits uploaded: Step 2 of 5';
147 $self->send_email('[QTL upload: Step 2]', $message, $pop_id);
148 $type = 'pheno_form';
149 $page->client_redirect("$referring_page?pop_id=$pop_id&type=$type");
152 # else {
153 # print "There is problem with your traits list. <br/>
154 # Make sure (1) it is a tab delimited file,
155 # <br/> (2) The headings are in the order of
156 # trait->definition->unit.";
157 # $page->footer();
162 elsif ($type eq 'pheno_form') {
164 my ($pheno_file, $trait_values_to_db);
165 if ($args{'pheno_file'}) {
166 $pheno_file = $self->pheno_upload ($qtl_obj, $args{'pheno_file'});
167 $trait_values_to_db = $self->store_trait_values($pheno_file);
168 } else {
169 $self->error_page('Phenotype dataset file');
172 unless (!-e $pheno_file || !$trait_values_to_db) {
173 $message = 'QTL phenotype data uploaded : Step 3 of 5';
174 $self->send_email('[QTL upload: Step 3]', $message, $pop_id);
175 $type = 'geno_form';
176 $page->client_redirect("$referring_page?pop_id=$pop_id&amp;type=$type");
180 elsif ($type eq 'geno_form') {
181 my ($geno_file, $map_id, $map_version_id);
182 if ($args{'geno_file'}) {
183 $geno_file = $self->geno_upload($qtl_obj, $args{'geno_file'});
184 ($map_id, $map_version_id) = $self->store_map($geno_file);
185 } else {
186 $self->error_page('Genotype dataset file');
189 unless (!$geno_file || !$map_id || !$map_version_id) {
190 if ($map_version_id) {
191 my $result = $self->store_marker_and_position($geno_file, $map_version_id);
192 unless (!$result) {
193 my $genotype_uploaded = $self->store_genotype($geno_file, $map_version_id);
194 if ($genotype_uploaded) {
195 $message = 'QTL genotype data uploaded : Step 4 of 5';
196 $self->send_email('[QTL upload: Step 4]', $message, $pop_id);
197 $type = 'stat_form';
198 $page->client_redirect("$referring_page?pop_id=$pop_id&amp;type=$type");
200 else {
201 print STDERR "There is problem with your genotype data uploading\n";
204 } else {
205 die "Can't store markers. No map version id\n";
208 } #else { die "Failed storing map data\n";}
211 elsif ($type eq 'stat_form') {
212 my $stat_param = $qtl_obj->user_stat_parameters();
213 my @missing = $qtl_tools->check_stat_fields($stat_param);
215 my $stat_file;
216 if (@missing) {
217 $self->error_page(@missing);
218 } else {
219 my $c = SGN::Context->new();
220 $stat_file = $qtl_obj->get_stat_file($c, $pop_id);
222 unless (!-e $stat_file) {
223 $message = 'QTL statistical parameters set : Step 5 of 5' . qq | \nQTL data upload for http://solgenomics.net/phenome/population.pl?pop_id=$pop_id" is completed|;
224 $self->send_email('[QTL upload: Step 5]', $message, $pop_id);
226 $type = 'confirm';
227 $page->client_redirect("$referring_page?pop_id=$pop_id&amp;type=$type");
234 sub pheno_upload {
235 my $self = shift;
236 my $qtl = shift;
237 my $p_file = shift;
239 my $safe_char = "a-zA-Z0-9_.-";
240 my ($temp_pheno_file, $name);
242 $p_file =~ tr/ /_/;
243 $p_file =~ s/[^$safe_char]//g;
245 if ($p_file =~/^([$safe_char]+)$/) {
247 $p_file = $1;
249 } else {
250 die "Phenotype file name contains invalid characters";
254 my $p= CXGN::Page->new();
255 my $c = SGN::Context->new();
256 my $phe_upload = $p->get_upload();
258 if (defined $phe_upload) {
259 $name = $phe_upload->filename;
261 my ($qtl_dir, $user_dir) = $qtl->get_user_qtl_dir($c);
262 my $qtlfiles = retrieve("$user_dir/qtlfiles");
264 my $trait_file = $qtlfiles->{trait_file};
265 my $f = $self->compare_file_names($name, $trait_file);
266 $qtlfiles->{pheno_file}=$name;
267 store $qtlfiles, "$user_dir/qtlfiles";
269 } else {
270 die "Apache2::Upload object for phenotype file not defined."
274 if ($p_file eq $name ) {
275 #here, call apache_upload_files
276 $temp_pheno_file = $qtl->apache_upload_file($phe_upload, $c);
277 return $temp_pheno_file;
280 else {return 0};
285 sub geno_upload {
286 my $self = shift;
287 my $qtl = shift;
288 my $g_file = shift;
291 my ($temp_geno_file, $name);
293 my $safe_char = "a-zA-Z0-9_.-";
295 $g_file =~ tr/ /_/;
296 $g_file =~ s/[^$safe_char]//g;
298 if ($g_file =~/^([$safe_char]+)$/) {
300 $g_file = $1;
302 } else {
303 die "Genotype file name contains invalid characters";
306 my $p = CXGN::Page->new();
307 my $c = SGN::Context->new();
308 my $gen_upload = $p->get_upload();
310 if (defined $gen_upload) {
311 $name = $gen_upload->filename;
313 my ($qtl_dir, $user_dir) = $qtl->get_user_qtl_dir($c);
314 my $qtlfiles = retrieve("$user_dir/qtlfiles");
316 my $trait_file = $qtlfiles->{trait_file};
317 my $pheno_file = $qtlfiles->{pheno_file};
319 my $f = $self->compare_file_names($name, $trait_file);
320 $f = $self->compare_file_names($name, $pheno_file);
322 $qtlfiles->{geno_file}=$name;
323 store $qtlfiles, "$user_dir/qtlfiles";
325 } else {
326 die "Apache2::Upload object for genotype file not defined."
330 if ($g_file eq $name ) {
331 $temp_geno_file = $qtl->apache_upload_file($gen_upload, $c);
332 return $temp_geno_file;
334 else {return 0};
338 sub trait_upload {
339 my $self = shift;
340 my $qtl = shift;
341 my $c_file = shift;
343 my ($temp_trait_file, $name);
345 print STDERR "Trait file: $c_file\n";
346 my $safe_char = "a-zA-Z0-9_.-";
348 $c_file =~ tr/ /_/;
349 $c_file =~ s/[^$safe_char]//g;
351 if ($c_file =~/^([$safe_char]+)$/) {
353 $c_file = $1;
355 } else {
356 die "Trait file name contains invalid characters";
359 my $p = CXGN::Page->new();
360 my $c = SGN::Context->new();
361 my $trait_upload = $p->get_upload();
363 if (defined $trait_upload) {
364 $name = $trait_upload->filename;
366 my ($qtl_dir, $user_dir) = $qtl->get_user_qtl_dir($c);
367 my $qtlfiles={};
368 $qtlfiles->{trait_file}=$name;
369 store ($qtlfiles, "$user_dir/qtlfiles");
372 } else {
373 die "Apache2::Upload object for trait file not defined."
377 if ($c_file eq $name ) {
378 $temp_trait_file = $qtl->apache_upload_file($trait_upload, $c);
379 return $temp_trait_file;
381 else {return 0};
386 sub load_pop_details {
387 my $self = shift;
388 my $pop_args = shift;
389 my %pop_details = %{$pop_args};
391 my $org = $pop_details{organism};
392 my $name = $pop_details{pop_name};
393 my $desc = $pop_details{pop_desc};
394 my $cross_id = $pop_details{pop_type};
395 my $female = $pop_details{pop_female_parent};
396 my $male = $pop_details{pop_male_parent};
397 my $recurrent= $pop_details{pop_recurrent_parent};
398 my $donor = $pop_details{pop_donor_parent};
399 my $comment = $pop_details{pop_comment};
400 my $is_public = $pop_details{pop_is_public};
401 my $common_name_id = $pop_details{pop_common_name_id};
404 my $dbh = $self->get_dbh();
405 my $login = CXGN::Login->new($dbh);
406 my $sp_person_id = $login->verify_session();
409 my ($female_id, $male_id, $recurrent_id, $donor_id);
411 my $population = CXGN::Phenome::Population->new_with_name($dbh, $name);
412 $self->population_exists($population, $name);
414 print STDERR "storing parental accessions...\n";
416 if ($female) {
417 $female_id = $self->store_accession($female);
418 print STDERR "female: $female_id\n";
422 if ($male) {
423 $male_id = $self->store_accession($male);
424 print STDERR "male: $male_id\n";
427 if ($recurrent) {
428 $recurrent_id = $self->store_accession($recurrent);
431 if ($donor) {
432 $donor_id = $self->store_accession($donor);
436 my $pop = CXGN::Phenome::Population->new($dbh);
437 $pop->set_name($name);
438 $pop->set_description($desc);
439 $pop->set_sp_person_id($sp_person_id);
440 $pop->set_cross_type_id($cross_id);
441 $pop->set_female_parent_id($female_id);
442 $pop->set_male_parent_id($male_id);
443 $pop->set_recurrent_parent_id($recurrent_id);
444 $pop->set_donor_parent_id($donor_id);
445 $pop->set_comment($comment);
446 $pop->set_web_uploaded('t');
447 $pop->set_common_name_id($common_name_id);
448 $pop->store();
450 my $pop_id = $dbh->last_insert_id("population", "phenome");
452 $pop = CXGN::Phenome::Population->new($dbh, $pop_id);
453 $pop->store_data_privacy($is_public);
455 return $pop_id, $name, $desc;
460 sub store_accession {
461 my $self = shift;
462 my $accession = shift;
463 my $dbh = $self->get_dbh();
465 print STDERR "organism_id: $accession\n";
466 my ($species, $cultivar) = split (/cv|var|cv\.|var\./, $accession);
467 $species =~ s/^\s+|\s+$//;
468 $cultivar=~ s/\.//;
469 $cultivar =~ s/^\s+|\s+$//;
470 $species = ucfirst($species);
472 print STDERR "$accession: species:$species, cultivar:$cultivar\n";
473 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() },
474 { on_connect_do => ['SET search_path TO public'],
475 },);
477 my $organism = CXGN::Chado::Organism->new_with_species($schema, $species);
478 $self->check_organism($organism, $species, $cultivar);
480 my $existing_organism_id = $organism->get_organism_id();
481 my $organism_name = $organism->get_species();
483 print STDERR "chado organism: $organism_name\n";
484 eval {
485 my $sth = $dbh->prepare("SELECT accession_id, chado_organism_id, common_name
486 FROM sgn.accession
487 WHERE common_name ILIKE ?"
489 $sth->execute($cultivar);
490 my ($accession_id, $chado_organism_id, $common_name) = $sth->fetchrow_array();
491 print STDERR "select existing accession: $accession_id, $chado_organism_id, $common_name\n";
493 if ($accession_id) {
494 unless ($chado_organism_id) {
495 $sth = $dbh->prepare("UPDATE sgn.accession
496 SET chado_organism_id = ?
497 WHERE accession_id = $accession_id"
499 $sth->execute($existing_organism_id);
501 } elsif (!$accession_id) {
503 $sth = $dbh->prepare("INSERT INTO sgn.accession
504 (common_name, chado_organism_id)
505 VALUES (?,?)"
507 $sth->execute($cultivar, $existing_organism_id);
508 $accession_id = $dbh->last_insert_id("accession", "sgn");
509 #my $accession = CXGN::Accession->new($dbh, $accession_id);
510 #$common_name = $accession->accession_common_name();
511 print STDERR "inserted: $accession_id, $chado_organism_id, $common_name\n";
515 my ($accession_names_id, $accession_name);
518 unless (!$common_name) {
519 $sth = $dbh->prepare("SELECT accession_name_id, accession_name
520 FROM sgn.accession_names
521 WHERE accession_name ILIKE ?"
523 $sth->execute($common_name);
525 ($accession_names_id, $accession_name) = $sth->fetchrow_array();
526 print STDERR "selected existing accession_names: $accession_names_id, $accession_name\n";
528 unless ($accession_names_id) {
529 $sth = $dbh->prepare("INSERT INTO sgn.accession_names
530 (accession_name, accession_id)
531 VALUES (?, ?)"
533 $sth->execute($common_name, $accession_id);
535 $accession_names_id = $dbh->last_insert_id("accession_names", "sgn");
536 print STDERR "inserted accession_names : $common_name, $accession_id\n";
540 unless (!$accession_names_id) {
541 $sth = $dbh->prepare("UPDATE sgn.accession
542 SET accession_name_id = ?
543 WHERE accession_id = ?"
545 $sth->execute($accession_names_id, $accession_id);
546 print STDERR "updated accession: with $accession_names_id\n";
550 if (@_) {
551 print STDERR "@_\n";
552 $dbh->rollback();
553 return 0;
555 else {
556 $dbh->commit();
557 return $accession_id;
564 =head2 store_traits
566 Usage: my ($true_or_false) = $self->store_traits($dbh, $file, $pop_id, $sp_person_id);
567 Desc: reads traits, their definition, and unit from
568 user submitted tab-delimited traits file and stores traits
569 that does not exist in the db or exist but with different units
570 Ret: true or false
571 Args: db handle, tab delimited trait file, with full path, population_id, sp_person_id
572 Side Effects: accesses database
573 Example:
575 =cut
577 sub store_traits {
578 my $self = shift;
579 my $file = shift;
580 my $pop_id = $self->get_population_id();
581 my $sp_person_id = $self->get_sp_person_id();
582 my $dbh = $self->get_dbh();
584 open(F, "<$file") || die "Can't open file $file.";
586 my $header = <F>;
587 chomp($header);
588 my @fields = split /\t/, $header;
590 my ($trait, $trait_id, $trait_name, $unit, $unit_id);
593 if ($fields[0] ne "traits" || $fields[1] ne "definition" || $fields[2] ne "unit") {
594 my $error = "Data columns in the traits file need to be in the order of:
595 <b>trait -> definition -> unit</b>. <br/>
596 Now they are in the order of <b><i>$fields[0] -> $fields[1]
597 -> $fields[2]</i></b>.\n";
599 $self->trait_columns($error);
600 } else {
602 eval {
603 while (<F>) {
604 chomp;
605 my (@values) = split /\t/;
607 $trait = CXGN::Phenome::UserTrait->new_with_name($dbh, $values[0]);
609 if (!$trait) {
610 $trait = CXGN::Phenome::UserTrait->new($dbh);
612 $trait->set_cv_id(17);
613 $trait->set_name($values[0]);
614 $trait->set_definition($values[1]);
615 $trait->set_sp_person_id($sp_person_id);
616 $trait_id = $trait->store();
618 $trait = CXGN::Phenome::UserTrait->new($dbh, $trait_id);
619 $trait_id = $trait->get_user_trait_id();
621 unless (!$values[2]) {
622 $unit_id = $trait->get_unit_id($values[2]);
623 if (!$unit_id) {
624 $unit_id = $trait->insert_unit($values[2]);
628 if (($trait_id) && ($pop_id) && ($unit_id)) {
629 $trait->insert_user_trait_unit($trait_id, $unit_id, $pop_id);
633 } else {
635 unless (!$values[2]) {
636 $trait_id = $trait->get_user_trait_id();
637 $unit_id = $trait->get_unit_id($values[2]);
638 if (!$unit_id) {
639 $unit_id = $trait->insert_unit($values[2]);
642 if (($trait_id) && ($pop_id) && ($unit_id)) {
643 $trait->insert_user_trait_unit($trait_id, $unit_id, $pop_id);
656 if ($@) {
657 $dbh->rollback();
658 print STDERR "An error occurred storing traits: $@\n";
659 return 0;
661 else {
662 print STDERR "Committing...traits\n";
663 return 1;
664 #$dbh->commit();
671 =head2 store_individual
673 Usage: $individual_id = $self->store_individual($dbh, $name, $pop_id, $sp_person_id);
674 Desc: stores individual genotypes is they don't
675 exist in the same pop in the db
676 Ret: individual id
677 Args: db handle, individual name, population id sp_person_id
678 Side Effects: accesses database
679 Example:
681 =cut
683 sub store_individual {
684 my $self = shift;
685 my $ind_name = shift;
686 my $pop_id = $self->get_population_id();
687 my $sp_person_id = $self->get_sp_person_id();
688 my $dbh = $self->get_dbh();
689 my $common_name_id = $self->common_name_id();
691 my ($individual, $individual_id, $individual_name);
692 my @individuals = CXGN::Phenome::Individual->new_with_name($dbh, $ind_name, $pop_id);
694 eval {
695 if (scalar(@individuals) == 0) {
696 # print STDERR "Individual with name $ind_name does not
697 # exist! Storing it now...\n";
698 $individual = CXGN::Phenome::Individual->new($dbh);
699 $individual->set_name($ind_name);
700 $individual->set_population_id($pop_id);
701 $individual->set_sp_person_id($sp_person_id);
702 $individual->set_common_name_id($common_name_id);
703 $individual_id = $individual->store();
705 $individual_name = $individual->get_name();
708 elsif (scalar(@individuals) == 1) {
710 print STDERR "There is a genotype with name $ind_name
711 in the same population ($pop_id). \n";
712 die "There might be a phenotype data for the same trait
713 for the same genotype $ind_name. I can't store
714 duplicate phenotype data. So I am quitting..\n";
716 elsif (scalar(@individuals) > 0) {
717 # print STDERR "There are more than 1 individuals with the name
718 die "There are two genotypes with the same name ($ind_name)
719 in the population: $pop_id.\n";
722 if ($@) {
723 $dbh->rollback();
724 print STDERR "An error occurred storing individuals: $@\n";
725 return 0;
728 else {
729 $dbh->commit();
730 print STDERR "STORED individual $individual_name.\n";
731 return $individual;
735 =head2 store_trait_values
737 Usage: my ($true_or_false) = &store_trait_values($dbh, $file, $pop_id, $sp_person_id);
738 Desc: stores phenotype values for traits evaluated for individuals of a population.
739 Ret: true or false
740 Args: db handle, tab delimited phenotype file with full path, population id, sp_person_id
741 Side Effects: accesses database
742 Example:
744 =cut
746 sub store_trait_values {
747 my $self = shift;
748 my $file = shift;
749 my $pop_id = $self->get_population_id();
750 my $sp_person_id = $self->get_sp_person_id();
751 my $dbh = $self->get_dbh();
753 open(F, "<$file") || die "Can't open file $file.";
755 my $header = <F>;
756 chomp($header);
757 my @fields = split /\t/, $header;
759 my @trait = ();
760 my ($trait_name, $trait_id);
764 for (my $i=1; $i<@fields; $i++) {
766 $trait[$i] = CXGN::Phenome::UserTrait->new_with_name($dbh, $fields[$i]);
767 $trait_name = $trait[$i]->get_name();
768 $trait_id = $trait[$i]->get_user_trait_id();
769 # print STDERR "Created trait object $fields[$i] \n";
770 # print STDERR "Got the user_trait_id: $trait_id\n";
771 # print STDERR "population id: $pop_id\n";
774 eval {
775 while (<F>) {
776 chomp;
777 my (@values) = split /\t/;
779 my $individual = $self->store_individual($values[0]);
781 die "The genotype does not exist in the database.
782 Therefore, it can not store the associated
783 phenotype data\n"
784 unless ($individual);
786 my $individual_id = $individual->get_individual_id();
787 my $individual_name = $individual->get_name();
789 for (my $i=1; $i<@values; $i++) {
790 my $phenotype = CXGN::Chado::Phenotype->new($dbh);
791 $phenotype->set_unique_name(qq | $individual_name $pop_id .":". $i | );
792 $phenotype->set_observable_id($trait[$i]->get_user_trait_id());
793 $phenotype->set_value($values[$i]);
794 $phenotype->set_individual_id($individual_id);
795 $phenotype->set_sp_person_id($sp_person_id);
796 my $phenotype_id = $phenotype->store();
798 $trait[$i]->insert_phenotype_user_trait_ids ($trait[$i]->get_user_trait_id(), $phenotype_id);
804 if ($@) {
805 $dbh->rollback();
806 print STDERR "An error occurred storing trait values: $@\n";
807 return 0;
810 else {
811 print STDERR "Committing...trait values to tables public.phenotype
812 and user_trait_id and phenotype_id to phenotype_user_trait\n";
813 #$dbh->commit();
814 return 1;
820 =head2 store_map
822 Usage:
823 Desc:
824 Ret:
825 Args:
826 Side Effects:
827 Example:
829 =cut
831 sub store_map {
832 my $self = shift;
833 my $file = shift;
835 my $dbh = $self->get_dbh();
836 my $pop_id = $self->get_population_id();
838 my $pop = CXGN::Phenome::Population->new($dbh, $pop_id);
839 my $pop_name = $pop->get_name();
840 my $parent_m = $pop->get_male_parent_id();
841 my $parent_f = $pop->get_female_parent_id();
842 my $desc = $pop->get_description();
845 my $acc = CXGN::Accession->new($dbh, $parent_f);
846 my $female_name = $acc->accession_common_name();
847 my $chado_org_id_f = $acc->chado_organism_id();
849 $acc = CXGN::Accession->new($dbh, $parent_m);
850 my $male_name = $acc->accession_common_name();
851 my $chado_org_id_m = $acc->chado_organism_id();
854 ### put some control here if map already exists...
856 my $existing_map_id = CXGN::Map::Tools::population_map($dbh, $pop_id);
858 my ($map, $map_id, $map_version_id);
859 if ($existing_map_id) {
860 $map_version_id = CXGN::Map::Version->map_version($dbh, $existing_map_id);
861 $map = CXGN::Map->new($dbh, {map_id=>$existing_map_id});
863 } else {
864 $map = CXGN::Map->new_map($dbh, $pop_name);
865 $map_version_id = $map->{map_version_id};
867 $map_id = $map->{map_id};
870 my $species_m = $self->species($chado_org_id_m);
871 my $species_f= $self->species($chado_org_id_f);
874 print STDERR "map_id from the store_map function: $map_id\n";
875 my $long_name = $species_f . ' cv. ' . $female_name . ' x ' . $species_m . ' cv. ' . $male_name;
876 print STDERR "map long name: $long_name\n";
877 $map->{long_name}=$long_name;
878 $map->{map_type}='genetic';
879 $map->{parent_1}=$parent_f;
880 $map->{parent_2}=$parent_m;
881 $map->{abstract}=$desc;
882 $map->{population_id}=$pop_id;
883 $map_id = $map->store();
887 print STDERR " map_version_id: $map_version_id\n";
888 my $lg_result;
889 if ($map_version_id) {
890 $lg_result = $self->store_lg($map_version_id, $file);
892 if ($lg_result) {
893 print STDERR " STORED LINKAGE GROUPS\n";
895 else { print STDERR "FAILED STORING LINKAGE GROUPS\n";
899 if ($map_id && $map_version_id && $lg_result) {
900 return $map_id, $map_version_id;
902 else {
903 print STDERR "Either map or map_version or
904 linkage_groups storing did not work\n";
905 return 0;
910 =head2 species
912 Usage: my $species = $self->species($org_id)
913 Desc: when given the chado.organism_id, it returns the
914 genus and species name (in abbreviated format)
915 Ret: abbreviated species name
916 Args: chado organism id
917 Side Effects: access db
918 Example:
920 =cut
922 sub species {
923 my $self = shift;
924 my $org_id = shift;
925 my $dbh = $self->get_dbh();
927 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() },
928 { on_connect_do => ['SET search_path TO public'],
929 },);
931 my $org = CXGN::Chado::Organism->new($schema, $org_id);
933 return my $species = $org->get_abbreviation();
938 =head2 store_lg
940 Usage:
941 Desc:
942 Ret:
943 Args:
944 Side Effects:
945 Example:
947 =cut
949 sub store_lg {
950 my $self = shift;
951 my ($map_version_id, $file) = @_;
952 my $dbh = $self->get_dbh();
954 open F, "<$file" or die "can't open $file\n";
955 my $markers = <F>;
956 my $chr = <F>;
957 chomp($chr);
958 close F;
960 #print STDERR "chromosomes: $chr\n";
961 my @chrs = split /\t/, $chr;
962 @chrs = uniq @chrs;
965 die "The first cell of 2nd row must be empty." unless !$chrs[0];
966 shift(@chrs);
968 # print STDERR "chromosome 1: $chrs[0]\n";
970 my $lg = CXGN::LinkageGroup->new($dbh, $map_version_id, \@chrs);
971 my $result = $lg->store();
973 if ($result) {
974 print STDERR "Succeeded storing linkage groups
975 on map_version_id $map_version_id\n";
976 } else {
977 print STDERR "Failed storing linkage groups
978 on map_version_id $map_version_id\n";
983 =head2 store_markers
985 Usage:
986 Desc:
987 Ret:
988 Args:
989 Side Effects:
990 Example:
992 =cut
994 sub store_marker_and_position {
995 my $self = shift;
996 my ($file, $map_version_id) = @_;
997 my $dbh = $self->get_dbh();
998 open F, "<$file" or die "can't open $file\n";
1000 my $markers = <F>;
1001 my $chrs = <F>;
1002 my $positions = <F>;
1003 chomp($markers, $chrs, $positions);
1004 close F;
1006 my @markers = split /\t/, $markers;
1007 shift(@markers);
1009 my @positions = split /\t/, $positions;
1010 shift(@positions);
1012 my @chromosomes = split /\t/, $chrs;
1013 shift(@chromosomes);
1015 eval {
1016 for (my $i=0; $i<@markers; $i++) {
1017 print STDERR $markers[$i] . "\t" . $positions[$i] . "\n";
1019 my ($marker_name, $subs) = CXGN::Marker::Tools::clean_marker_name($markers[$i]);
1021 my @marker_ids = CXGN::Marker::Tools::marker_name_to_ids($dbh,$marker_name);
1022 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'"; }
1023 my($marker_id) = @marker_ids;
1025 my $marker_obj;
1026 if($marker_id) {
1027 $marker_obj = CXGN::Marker::Modifiable->new($dbh,$marker_id);
1029 else {
1030 $marker_obj = CXGN::Marker::Modifiable->new($dbh);
1031 $marker_obj->set_marker_name($marker_name);
1032 my $inserts = $marker_obj->store_new_data();
1034 if ($inserts and @{$inserts}) {
1035 # print STDERR "New marker inserted: $marker_name\n"
1037 else {
1038 die "Oops, I thought I was inserting some new data";
1040 $marker_id=$marker_obj->marker_id();
1042 # print STDERR $marker_obj->name_that_marker()."\n";
1043 my $loc=$marker_obj->new_location();
1044 my $pos = $positions[$i];
1045 my $conf ='uncalculated';
1046 my $protocol = 'unknown';
1047 $loc->marker_id($marker_id);
1049 $loc->map_version_id($map_version_id);
1050 $loc->lg_name($chromosomes[$i]);
1051 $loc->position($pos);
1052 $loc->confidence($conf);
1053 $loc->subscript($subs);
1057 $marker_obj->add_experiment({location=>$loc,protocol=>$protocol});
1058 my $inserts = $marker_obj->store_new_data();
1060 if ($inserts and @{$inserts}) {
1064 else {
1065 die "Oops, I thought I was inserting some new data";
1071 if ($@) {
1072 print STDERR $@;
1073 print STDERR "Failed loading markers and their positions; rolling back.\n";
1074 $dbh->rollback();
1075 return 0;
1077 else {
1078 print STDERR "Succeeded. loading markers and their position\n";
1080 $dbh->commit();
1081 return 1;
1087 =head2 store_genotype
1089 Usage:
1090 Desc:
1091 Ret:
1092 Args:
1093 Side Effects:
1094 Example:
1096 =cut
1098 sub store_genotype {
1099 my $self = shift;
1100 my ($file, $map_version_id) = @_;
1101 my $dbh = $self->get_dbh();
1102 my $pop_id = $self->get_population_id();
1104 open F, "<$file" or die "can't open $file\n";
1106 my $markers = <F>;
1107 my $chrs = <F>;
1108 my $positions = <F>;
1109 chomp($markers, $chrs, $positions);
1112 my @markers = split /\t/, $markers;
1113 shift(@markers);
1115 my @chrs = split /\t/, $chrs;
1116 shift(@chrs);
1118 my $pop = CXGN::Phenome::Population->new($dbh, $pop_id);
1119 my $pop_name = $pop->get_name();
1120 my $sp_person_id = $pop->get_sp_person_id();
1122 #print STDERR "sp_person_id: $sp_person_id\n";
1123 my $map = CXGN::Map->new($dbh, {map_version_id=>$map_version_id});
1124 my $map_id = $map->get_map_id();
1126 my $linkage = CXGN::LinkageGroup->new($dbh, $map_version_id);
1128 # if ($linkage) {print STDERR "linkage group... \n";}
1130 # print STDERR " map_id in store_genotype: $map_id\n";
1132 unless ($map_id) {
1133 die "I need a valid reference map before I can
1134 start loading the genotype data\n";
1138 eval {
1140 my $experiment = CXGN::Phenome::GenotypeExperiment->new($dbh);
1141 #print STDERR "store_genotype: created an experiment object!..\n";
1142 $experiment->set_background_accession_id(100);
1143 $experiment->set_experiment_name($pop_name);
1144 $experiment->set_reference_map_id($map_id);
1145 $experiment->set_sp_person_id($sp_person_id);
1146 $experiment->set_preferred(1);
1147 my $experiment_id = $experiment->store();
1148 #print STDERR "store_genotypes: experiment_id: $experiment_id\n";
1152 while (my $row = <F>) {
1153 chomp($row);
1154 my @plant_genotype = split /\t/, $row;
1155 my $plant_name = shift(@plant_genotype);
1156 # print STDERR "plant name: $plant_name\n";
1159 my @individual = CXGN::Phenome::Individual->new_with_name($dbh, $plant_name, $pop_id);
1160 # my $population_id = $individual[0]->get_population_id();
1161 my $individual_id = $individual[0]->get_individual_id();
1162 # print STDERR "plant population id: $pop_id\n";
1164 die "There are two genotypes with the same name or no genotypes
1165 in the same population. Can't assign genotype values."
1166 unless (scalar(@individual) == 1);
1169 if ($individual[0]) {
1170 #if ($population_id != $pop_id) {
1171 # die "Individual $plant_genotype[0] is not from population $pop_id\n";
1173 #print STDERR "Individual $plant_genotype[0] is not from population $pop_id\n";
1174 my $genotype = CXGN::Phenome::Genotype->new($dbh);
1176 # print STDERR "experiment_id is $experiment_id\n";
1177 $genotype->set_genotype_experiment_id($experiment_id);
1178 $genotype->set_individual_id($individual_id);
1179 # print STDERR "individual_id: $individual_id\n";
1180 #$genotype->set_experiment_name($pop_name);
1181 #$genotype->set_reference_map_id($map_id);
1182 #$genotype->set_sp_person_id($sp_person_id);
1183 my $genotype_id = $genotype->store();
1185 #print STDERR "store_genotype: genotype_id: $genotype_id\n";
1187 my $mapmaker_genotype;
1188 for (my $i=0; $i<@plant_genotype; $i++) {
1189 my $genotype_region = CXGN::Phenome::GenotypeRegion->new($dbh);
1190 # print STDERR "counting $i\n";
1191 my $marker_name = CXGN::Marker::Tools::clean_marker_name($markers[$i]);
1192 my $marker = CXGN::Marker->new_with_name($dbh, $marker_name);
1193 #print STDERR "marker: $marker_name\n";
1194 my $c = $chrs[$i];
1195 #print STDERR "linkage_name: $c\n";
1196 my $lg_id = $linkage->get_lg_id($chrs[$i]);
1198 #print STDERR "store_genotype: lg_id: $lg_id\n";
1199 if (!$plant_genotype[$i] || ($plant_genotype[$i] =~/\-/)) {
1200 # print STDERR "No zygocity information for:
1201 # $plant_name\t$markers[$i]\t$chrs[$i]
1202 # .....Skipping...\n";
1203 next();
1207 $genotype_region->set_genotype_id($genotype_id);
1208 $genotype_region->set_marker_id_nn($marker->marker_id());
1209 $genotype_region->set_marker_id_ns($marker->marker_id());
1210 $genotype_region->set_marker_id_sn($marker->marker_id());
1211 $genotype_region->set_marker_id_ss($marker->marker_id());
1212 $genotype_region->set_lg_id($lg_id);
1213 $genotype_region->set_sp_person_id($sp_person_id);
1214 # print STDERR "Setting genotype values: $plant_name\t$markers[$i]\t
1215 # $chrs[$i]\t$plant_genotype[$i]\n";
1217 if ($i == 0) {
1218 if ($plant_genotype[$i] =~/\d/) {
1219 $mapmaker_genotype = 1;
1221 elsif ($plant_genotype[$i] =~/\D/) {
1222 $mapmaker_genotype = undef;
1226 if ($mapmaker_genotype) {
1227 $genotype_region->set_mapmaker_zygocity_code($plant_genotype[$i]);
1229 else {
1230 $genotype_region->set_zygocity_code($plant_genotype[$i]);
1232 $genotype_region->set_type("map");
1233 $genotype_region->store();
1237 else {
1238 die "There is mismatch between the list of genotypes/lines ($plant_genotype[0]
1239 in your phenotype and genotype datasets\n";
1245 if ($@) {
1246 $dbh->rollback();
1247 print STDERR "An error occurred loading genotype data:
1248 $@. ROLLED BACK CHANGES.\n";
1249 return undef;
1251 else {
1252 print STDERR "All is fine. Committing...genotype data\n";
1253 $dbh->commit();
1254 return 1;
1259 =head2 accessors get_sp_person_id, set_sp_person_id
1261 Usage:
1262 Desc:
1263 Property
1264 Side Effects:
1265 Example:
1267 =cut
1269 sub get_sp_person_id {
1270 my $self = shift;
1271 return $self->{sp_person_id};
1274 sub set_sp_person_id {
1275 my $self = shift;
1276 $self->{sp_person_id} = shift;
1279 =head2 accessors get_population_id, set_population_id
1281 Usage:
1282 Desc:
1283 Property
1284 Side Effects:
1285 Example:
1287 =cut
1289 sub get_population_id {
1290 my $self = shift;
1291 return $self->{population_id};
1294 sub set_population_id {
1295 my $self = shift;
1296 $self->{population_id} = shift;
1299 =head2 accessors get_dbh, set_dbh
1301 Usage:
1302 Desc:
1303 Property
1304 Side Effects:
1305 Example:
1307 =cut
1309 sub get_dbh {
1310 my $self = shift;
1311 return $self->{dbh};
1314 sub set_dbh {
1315 my $self = shift;
1316 $self->{dbh} = shift;
1320 =head2 common_name_id
1322 Usage:
1323 Desc:
1324 Ret:
1325 Args:
1326 Side Effects:
1327 Example:
1329 =cut
1331 sub common_name_id {
1332 my $self = shift;
1333 my $sp_person_id = $self->get_sp_person_id();
1334 my $qtl = CXGN::Phenome::Qtl->new($sp_person_id);
1335 my $c = SGN::Context->new();
1336 my ($qtl_dir, $user_qtl_dir) = $qtl->get_user_qtl_dir($c);
1338 my $id;
1339 if (-e "$user_qtl_dir/organism.txt") {
1340 open C, "<$user_qtl_dir/organism.txt" or die "Can't open file: !$\n";
1342 my $row = <C>;
1343 if ($row =~/(\d)/) {
1344 $id = $1;
1347 close C;
1349 return $id;
1350 } else {
1351 return 0;
1356 =head2 error_page
1358 Usage: $self->error_page(@errors);
1359 Desc: when feed with error messages, it generates an
1360 error page with a list of the messages and a link
1361 back to the previous page where required data field(s)
1362 was/were not properly filled.
1363 Ret: page with the appropriate message
1364 Args: a list of messages to print...
1365 Side Effects:
1366 Example:
1368 =cut
1370 sub error_page {
1371 my $self = shift;
1372 my @error = @_;
1373 my $page = CXGN::Page->new();
1374 my ($messages, $count);
1376 my $guide = $self->guideline();
1377 if (@error) {
1378 $page->header();
1380 print page_title_html("Missing Data:");
1381 $messages .="<p>Data for the following field(s) is missing: </p>";
1382 my $count=1;
1383 foreach my $e (@error) {
1385 $messages .= $count . ")" ."\t" . $e ."." ."<br />";
1386 $count++;
1389 $messages .= qq | <p><a href="javascript:history.go(-1)">
1390 Please go back and fill in the missing information.</a> </p>|;
1392 print info_section_html(subtitle=>$guide,
1393 contents =>$messages,
1395 $page->footer();
1402 =head2 check_organism
1404 Usage: $self->check_organism($organism);
1405 Desc: checks if organism object is defined and if the
1406 the parental species is supported by sgn (chado.organism).
1407 in case organism object is not defined (a query to the chado.organism
1408 using a species name does not return a value, it generates
1409 a page with advise to check for the spelling of the scientific species name.
1410 Ret: a page with advice
1411 Args: organism object, species name, cultivar name
1412 Side Effects:
1413 Example:
1415 =cut
1417 sub check_organism {
1418 my $self = shift;
1419 my $organism = shift;
1420 my $species = shift;
1421 my $cultivar = shift;
1423 my $guide = $self->guideline();
1424 unless (!$cultivar) {
1425 $cultivar = " cv. $cultivar";
1428 if (!$organism) {
1429 my $page = CXGN::Page->new("SGN", "Isaak");
1431 $page->header();
1433 print page_title_html("Problem with parental accessions......");
1435 my $messages .= "It appears that SGN currently does not support
1436 this species (<b><i>$species</i>$cultivar</b>).<br/>
1437 As a first step, please make sure you have spelled
1438 the species correctly.</p><p> Read also the guidline for the
1439 nomenclature format you have to use for the parental lines.</p>";
1441 $messages .= qq |<p>
1442 Please go <a href="javascript:history.go(-1)">back</a>
1443 and check its spelling or if you keep having problem
1444 with it contact us.</p>|;
1446 print info_section_html(subtitle=>$guide, contents => $messages,);
1448 $page->footer();
1449 exit ();
1451 else {
1452 #do nothing..relax
1457 =head2 population_exists
1459 Usage: $self->population_exists($population);
1460 Desc: checks if there is already a population with the same name
1461 and if so it generates a page with the appropriate advice to the user
1463 Ret: a page with advice
1464 Args: population object, population name
1465 Side Effects:
1466 Example:
1468 =cut
1470 sub population_exists {
1471 my $self = shift;
1472 my $pop = shift;
1473 my $name = shift;
1474 my $guide = $self->guideline();
1476 if ($pop) {
1477 my $page = CXGN::Page->new("SGN", "Isaak");
1479 $page->header();
1481 print page_title_html("Population name...");
1483 my $messages .= "It appears that a population <b>$name</></b> already
1484 exists in the database. To continue loading QTL data
1485 for a new population, try with a different population
1486 name.</p>
1487 <p>If you are trying to load more or change data to an exising population
1488 contact us.</p>";
1490 $messages .= qq |<p>
1491 Please go <a href="javascript:history.go(-1)">back</a>
1492 and try to use a different name or if you keep having problem
1493 with it contact us.</p>|;
1495 print info_section_html(subtitle=>$guide, contents => $messages,);
1497 $page->footer();
1498 exit ();
1503 sub guideline {
1504 my $self = shift;
1505 return my $guideline = qq |<a href="http://docs.google.com/View?id=dgvczrcd_1c479cgfb">Guidelines</a> |;
1510 =head2 trait_columns
1512 Usage: $self->trait_columns($error);
1513 Desc: checks if the trait file has the right order
1514 of data columns and if not advises the submitter
1515 with the appropriate message
1517 Ret: a page with advice
1518 Args: text message
1519 Side Effects:
1520 Example:
1522 =cut
1523 sub trait_columns {
1524 my $self = shift;
1525 my $trait_error = shift;
1526 my $guide = $self->guideline();
1528 if ($trait_error) {
1529 my $page = CXGN::Page->new("SGN", "Isaak");
1531 $page->header();
1533 print page_title_html("Traits file...");
1535 my $messages .= $trait_error;
1537 $messages .= qq |<p>
1538 Please go <a href="javascript:history.go(-1)"><b>back</b></a>
1539 and rearrange the order of the columns and upload the file
1540 again or if you keep having problem with it contact us.</p>|;
1542 print info_section_html(subtitle=>$guide, contents => $messages,);
1544 $page->footer();
1545 exit ();
1549 =head2 accessors compare_file_names
1551 Usage: $f = $self->compare_file_names($file1, $file2);
1552 Desc: useful for checking if data files submitted for the traits,
1553 phenotype and genotype are different. helpful to avoid indvertent
1554 uploading of the same file for different fields.
1556 Ret: a page with advice if the same files are uploaded
1557 Args: file names to compare
1558 Side Effects:
1559 Example:
1561 =cut
1563 sub compare_file_names {
1564 my $self = shift;
1565 my ($file1, $file2) = @_;
1567 my $guide = $self->guideline();
1569 unless ($file1 ne $file2) {
1571 my $page = CXGN::Page->new("SGN", "Isaak");
1573 $page->header();
1575 print page_title_html("Data files...");
1577 my $messages .= qq |You are trying to upload file(s) with the same name <b>($file1 and $file2)</b>
1578 for this step and one of the steps before it.|;
1580 $messages .= qq |<p>
1581 Please go <a href="javascript:history.go(-1)"><b>back</b></a>
1582 and check the file you are trying to upload
1583 or if you keep having problem with it contact us.</p>|;
1585 print info_section_html(subtitle=>$guide, contents => $messages,);
1587 $page->footer();
1588 exit ();
1591 return 0;
1597 =head2 send_email
1599 Usage: $self->send_email($subj, $message, $pop_id);
1600 Desc: sends email at each step of the qtl data upload
1601 process..
1602 Ret: nothing
1603 Args: subject, message, population_id
1604 Side Effects:
1605 Example:
1607 =cut
1609 sub send_email {
1610 my $self = shift;
1611 my ($subj, $message, $pop_id) = @_;
1612 my $dbh = $self->get_dbh();
1613 my $sp_person_id = $self->get_sp_person_id();
1614 my $person = CXGN::People::Person->new($dbh, $sp_person_id);
1616 my $user_profile = qq |http://solgenomics.net/solpeople/personal-info.pl?sp_person_id=$sp_person_id |;
1618 my $username= $person->get_first_name()." ".$person->get_last_name();
1619 $message .= qq |\nQTL population id: $pop_id \nQTL data owner: $username ($user_profile) |;
1621 print STDERR "\n$subj\n$message\n";
1622 CXGN::Contact::send_email($subj,$message, 'sgn-db-curation@sgn.cornell.edu');