4 download_trials.pl - script to download trials
8 perl download_trials.pl -i trial_id -H host -D dbname -U dbuser -P dbpass
10 Downloads trials whose ids are provided as a comma separated list for the -i parameter.
14 Lukas Mueller <lam87@cornell.edu>
23 use Bio
::Chado
::Schema
;
24 use CXGN
::Metadata
::Schema
;
25 use CXGN
::Phenome
::Schema
;
26 use CXGN
::DB
::InsertDBH
;
29 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_b, $opt_i, $opt_n, $opt_t, $opt_r);
31 getopts
('H:D:U:P:b:i:t:r:n');
37 my $trial_ids = $opt_i;
38 my $trial_names = $opt_t;
39 my $non_interactive = $opt_n;
41 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
43 dbargs
=> {AutoCommit
=> 0,
48 print STDERR
"Connecting to database...\n";
49 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
50 my $metadata_schema = CXGN
::Metadata
::Schema
->connect( sub { $dbh->get_actual_dbh() });
51 my $phenome_schema = CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh() });
53 my @trial_ids = split ",", $trial_ids;
54 my @trial_names = split ",", $trial_names;
56 foreach my $name (@trial_names) {
57 my $trial = $schema->resultset("Project::Project")->find( { name
=> $name });
58 if (!$trial) { print STDERR
"Trial $name not found. Skipping...\n"; next; }
59 push @trial_ids, $trial->project_id();
66 foreach my $trial_id (@trial_ids) {
67 print STDERR
"Retrieving trial information for trial $trial_id...\n";
69 my $t = CXGN
::Trial
->new({
70 bcs_schema
=> $schema,
71 metadata_schema
=> $metadata_schema,
72 phenome_schema
=> $phenome_schema,
76 my $location = $t->get_location();
78 my $breeding_programs = $t->get_breeding_programs();
80 my $breeding_program_name = $t->get_breeding_program();
82 my $planting_date = $t->get_planting_date();
84 my $harvest_date = $t->get_harvest_date();
86 my $breeding_program_id;
87 my $breeding_program_description;
89 foreach my $bp (@
$breeding_programs) {
90 if ($bp->[1] eq $breeding_program_name) {
91 $breeding_program_id = $bp->[0];
92 $breeding_program_description = $bp->[2];
96 my $trial_name = $t->get_name();
98 my $traits = $t->get_traits_assayed();
100 my $year = $t->get_year();
102 my $trial_id = $t->get_trial_id();
104 my $design_type = $t->get_design_type();
106 my $plot_width = $t->get_plot_width();
108 my $plot_length = $t->get_plot_length();
110 print STDERR
"Traits assayed = ".Dumper
($traits);
112 my @trait_names = map { $_->[1] } @
$traits;
113 my @trait_ids = map { $_->[0] } @
$traits;
115 print STDERR
"trait_ids = ". Dumper
(\
@trait_ids);
117 my $data = $t->get_stock_phenotypes_for_traits(\
@trait_ids, 'all', ['plot_of','plant_of'], 'accession', 'subject');
119 print STDERR Dumper
($data);
122 $trial_data{$trial_id} = $data;
123 $trial_cols{$trial_id} = [ $year, $breeding_program_id, $breeding_program_name, $breeding_program_description, $trial_id, $trial_name, $design_type, $plot_width, $plot_length, '', '', '', $planting_date, $harvest_date, $location->[0], $location->[1] ];
128 my @trial_header = qw
| studyYear programDbId breeding_programName programDescription studyDbId studyName studyDesign plotWidth plotLength fieldSize fieldTrialIsPlannedToBeGenotyped fieldTrialIsPlannedToCross plantingDate harvestDate
, locationDbId
, locationName
|;
130 # first organize traits in hash structure
135 foreach my $trial_id (keys %trial_data) {
136 foreach my $line (@
{$trial_data{$trial_id}}) {
137 # keys: {trial_id} -> {accession}-> {plot} -> {trait} = value
138 $obs{$trial_id}->{$line->[9]}->{$line->[1]}->{$line->[3]} = $line->[7];
139 $traits{$line->[3]}++;
140 $plots{$line->[1]} = $line->[0];
149 foreach my $p (keys %plots) {
150 my $rs= $schema->resultset("Stock::Stockprop")->search( { stock_id
=> $plots{$p} }, { join => 'type', '+select' => 'type.name', '+as'=> 'cvterm_name' });
152 while (my $row = $rs->next()) {
153 print STDERR
"stockprop: ".$row->get_column("cvterm_name"). " ".$row->value()."\n";
154 $plot_data{$p}->{$row->get_column("cvterm_name")} = $row->value();
159 print STDERR
"observations: ".Dumper
(\
%obs);
161 print STDERR
"Traits: ".Dumper
(\
%traits);
163 print join("\t", (@trial_header, 'accession', 'plot', 'replicate', 'blockNumber', 'plotNumber', 'rowNumber', 'colNumber', 'entryType', sort(keys(%traits))))."\n";
165 foreach my $trial_id (keys(%obs)) {
167 foreach my $accession (keys %{$obs{$trial_id}}) {
169 foreach my $plot (keys %{$obs{$trial_id}->{$accession}}) {
172 my @out = ( @
{$trial_cols{$trial_id}}, $accession, $plot );
174 foreach my $prop (qw
| replicate block_number plot_number row_number col_number entry_type
|) {
175 push @out, $plot_data{$plot}->{$prop};
179 foreach my $trait (sort(keys %traits)) {
180 push @out, $obs{$trial_id}->{$accession}->{$plot}->{$trait};
182 print join("\t", @out)."\n";
188 #print STDERR "spreadsheet : ". Dumper(\@spreadsheet);