make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / download_trials.pl
blob423198d8db38a313ddecf8f350e743d72b3d90ab
2 =head1 NAME
4 download_trials.pl - script to download trials
6 =head1 DESCRIPTION
8 perl download_trials.pl -i trial_id -H host -D dbname -U dbuser -P dbpass
10 Downloads trials whose ids are provided as a comma separated list for the -i parameter.
12 =head1 AUTHOR
14 Lukas Mueller <lam87@cornell.edu>
16 =cut
18 use strict;
20 use Getopt::Std;
21 use Data::Dumper;
23 use Bio::Chado::Schema;
24 use CXGN::Metadata::Schema;
25 use CXGN::Phenome::Schema;
26 use CXGN::DB::InsertDBH;
27 use CXGN::Trial;
29 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_b, $opt_i, $opt_n, $opt_t, $opt_r);
31 getopts('H:D:U:P:b:i:t:r:n');
33 my $dbhost = $opt_H;
34 my $dbname = $opt_D;
35 my $dbuser = $opt_U;
36 my $dbpass = $opt_P;
37 my $trial_ids = $opt_i;
38 my $trial_names = $opt_t;
39 my $non_interactive = $opt_n;
41 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
42 dbname=>$dbname,
43 dbargs => {AutoCommit => 0,
44 RaiseError => 1}
48 print STDERR "Connecting to database...\n";
49 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
50 my $metadata_schema = CXGN::Metadata::Schema->connect( sub { $dbh->get_actual_dbh() });
51 my $phenome_schema = CXGN::Phenome::Schema->connect( sub { $dbh->get_actual_dbh() });
53 my @trial_ids = split ",", $trial_ids;
54 my @trial_names = split ",", $trial_names;
56 foreach my $name (@trial_names) {
57 my $trial = $schema->resultset("Project::Project")->find( { name => $name });
58 if (!$trial) { print STDERR "Trial $name not found. Skipping...\n"; next; }
59 push @trial_ids, $trial->project_id();
62 my @spreadsheet;
63 my %trial_data;
64 my %trial_cols;
66 foreach my $trial_id (@trial_ids) {
67 print STDERR "Retrieving trial information for trial $trial_id...\n";
69 my $t = CXGN::Trial->new({
70 bcs_schema => $schema,
71 metadata_schema => $metadata_schema,
72 phenome_schema => $phenome_schema,
73 trial_id => $trial_id
74 });
76 my $location = $t->get_location();
78 my $breeding_programs = $t->get_breeding_programs();
80 my $breeding_program_name = $t->get_breeding_program();
82 my $planting_date = $t->get_planting_date();
84 my $harvest_date = $t->get_harvest_date();
86 my $breeding_program_id;
87 my $breeding_program_description;
89 foreach my $bp (@$breeding_programs) {
90 if ($bp->[1] eq $breeding_program_name) {
91 $breeding_program_id = $bp->[0];
92 $breeding_program_description = $bp->[2];
96 my $trial_name = $t->get_name();
98 my $traits = $t->get_traits_assayed();
100 my $year = $t->get_year();
102 my $trial_id = $t->get_trial_id();
104 my $design_type = $t->get_design_type();
106 my $plot_width = $t->get_plot_width();
108 my $plot_length = $t->get_plot_length();
110 print STDERR "Traits assayed = ".Dumper($traits);
112 my @trait_names = map { $_->[1] } @$traits;
113 my @trait_ids = map { $_->[0] } @$traits;
115 print STDERR "trait_ids = ". Dumper(\@trait_ids);
117 my $data = $t->get_stock_phenotypes_for_traits(\@trait_ids, 'all', ['plot_of','plant_of'], 'accession', 'subject');
119 print STDERR Dumper($data);
122 $trial_data{$trial_id} = $data;
123 $trial_cols{$trial_id} = [ $year, $breeding_program_id, $breeding_program_name, $breeding_program_description, $trial_id, $trial_name, $design_type, $plot_width, $plot_length, '', '', '', $planting_date, $harvest_date, $location->[0], $location->[1] ];
128 my @trial_header = qw | studyYear programDbId breeding_programName programDescription studyDbId studyName studyDesign plotWidth plotLength fieldSize fieldTrialIsPlannedToBeGenotyped fieldTrialIsPlannedToCross plantingDate harvestDate, locationDbId, locationName |;
130 # first organize traits in hash structure
131 my %obs;
132 my %traits;
133 my %plots;
134 my %plot_ids;
135 foreach my $trial_id (keys %trial_data) {
136 foreach my $line (@{$trial_data{$trial_id}}) {
137 # keys: {trial_id} -> {accession}-> {plot} -> {trait} = value
138 $obs{$trial_id}->{$line->[9]}->{$line->[1]}->{$line->[3]} = $line->[7];
139 $traits{$line->[3]}++;
140 $plots{$line->[1]} = $line->[0];
146 # get plot metadata
148 my %plot_data;
149 foreach my $p (keys %plots) {
150 my $rs= $schema->resultset("Stock::Stockprop")->search( { stock_id => $plots{$p} }, { join => 'type', '+select' => 'type.name', '+as'=> 'cvterm_name' });
152 while (my $row = $rs->next()) {
153 print STDERR "stockprop: ".$row->get_column("cvterm_name"). " ".$row->value()."\n";
154 $plot_data{$p}->{$row->get_column("cvterm_name")} = $row->value();
159 print STDERR "observations: ".Dumper(\%obs);
161 print STDERR "Traits: ".Dumper(\%traits);
163 print join("\t", (@trial_header, 'accession', 'plot', 'replicate', 'blockNumber', 'plotNumber', 'rowNumber', 'colNumber', 'entryType', sort(keys(%traits))))."\n";
165 foreach my $trial_id (keys(%obs)) {
167 foreach my $accession (keys %{$obs{$trial_id}}) {
169 foreach my $plot (keys %{$obs{$trial_id}->{$accession}}) {
172 my @out = ( @{$trial_cols{$trial_id}}, $accession, $plot );
174 foreach my $prop (qw| replicate block_number plot_number row_number col_number entry_type |) {
175 push @out, $plot_data{$plot}->{$prop};
179 foreach my $trait (sort(keys %traits)) {
180 push @out, $obs{$trial_id}->{$accession}->{$plot}->{$trait};
182 print join("\t", @out)."\n";
188 #print STDERR "spreadsheet : ". Dumper(\@spreadsheet);