phenotype download will print 0 values, while leaving empty values empty
[sgn.git] / lib / CXGN / BreederSearch.pm
blob4fade0c0c068e5e1e1b73b24a97fdc75ca036ceb
1 =head1 NAME
3 CXGN::BreederSearch - class for retrieving breeder information for the breeder search wizard
5 =head1 AUTHORS
7 Lukas Mueller <lam87@cornell.edu>
8 Aimin Yan <ay247@cornell.edu>
10 =head1 METHODS
12 =cut
14 package CXGN::BreederSearch;
16 use Moose;
17 use Data::Dumper;
19 has 'dbh' => (
20 is => 'rw',
21 required => 1,
23 has 'dbname' => (
24 is => 'rw',
25 isa => 'Str',
28 =head2 metadata_query
30 Usage: my %info = $bs->metadata_query($criteria_list, $dataref, $queryref);
31 Desc:
32 Ret: returns a hash with a key called results that contains
33 a listref of listrefs specifying the matching list with ids
34 and names.
35 Args: criteria_list: a comma separated string called a criteria_list,
36 listing all the criteria that need to be applied. Possible
37 criteria are trials, years, traits, and locations. The last
38 criteria in the list is the return type.
39 dataref: The dataref is a hashref of hashrefs. The first key
40 is the target of the transformation, and the second is the
41 source type of the transformation, containing comma separated
42 values of the source type.
43 queryref: same structure as dataref, but instead of storing ids it stores a
44 1 if user requested intersect, or 0 for default union
45 Side Effects: will create a materialized view of the ontology corresponding to
46 $db_name
47 Example:
49 =cut
51 sub metadata_query {
52 my $self = shift;
53 my $criteria_list = shift;
54 my $dataref = shift;
55 my $queryref = shift;
56 my $h;
57 print STDERR "criteria_list=" . Dumper($criteria_list);
58 print STDERR "dataref=" . Dumper($dataref);
59 print STDERR "queryref=" . Dumper($queryref);
61 my $target_table = $criteria_list->[-1];
62 print STDERR "target_table=". $target_table . "\n";
63 my $target = $target_table;
64 $target =~ s/s$//;
66 my $select = "SELECT ".$target."_id, ".$target."_name ";
67 my $group = "GROUP BY ".$target."_id, ".$target."_name ";
69 my $full_query;
70 if (!$dataref->{"$target_table"}) {
71 my $from = "FROM public.". $target_table;
72 my $where = " WHERE ".$target."_id IS NOT NULL";
73 $full_query = $select . $from . $where;
75 else {
76 my @queries;
77 foreach my $category (@$criteria_list) {
79 if ($dataref->{$criteria_list->[-1]}->{$category}) {
80 my $query;
81 my @categories = ($target_table, $category);
82 @categories = sort @categories;
83 my $from = "FROM public.". $categories[0] ."x". $categories[1] . " JOIN public." . $target_table . " USING(" . $target."_id) ";
84 my $criterion = $category;
85 $criterion =~ s/s$//;
86 my $intersect = $queryref->{$criteria_list->[-1]}->{$category};
88 if ($intersect) {
89 my @parts;
90 my @ids = split(/,/, $dataref->{$criteria_list->[-1]}->{$category});
91 foreach my $id (@ids) {
92 my $where = "WHERE ". $criterion. "_id IN (". $id .") ";
93 my $statement = $select . $from . $where . $group;
94 push @parts, $statement;
96 $query = join (" INTERSECT ", @parts);
97 push @queries, $query;
99 else {
100 my $where = "WHERE ". $criterion. "_id IN (" . $dataref->{$criteria_list->[-1]}->{$category} . ") ";
101 $query = $select . $from . $where . $group;
102 push @queries, $query;
106 $full_query = join (" INTERSECT ", @queries);
108 $full_query .= " ORDER BY 2";
109 print STDERR "QUERY: $full_query\n";
110 $h = $self->dbh->prepare($full_query);
111 $h->execute();
113 my @results;
114 while (my ($id, $name) = $h->fetchrow_array()) {
115 push @results, [ $id, $name ];
118 if (@results >= 10_000) {
119 return { error => scalar(@results).' matches. Too many results to display' };
121 elsif (@results < 1) {
122 return { error => scalar(@results).' matches. No results to display' };
124 else {
125 return { results => \@results };
129 =head2 refresh_matviews
131 parameters: None.
133 returns: success or error message
135 Side Effects: refreshes matertialized_fullview and all of the smaller materialized views that are based on it and used in the wizard.
137 =cut
139 sub refresh_matviews {
140 my $self = shift;
142 my $q = "SELECT currently_refreshing FROM public.matviews WHERE mv_id=?";
143 my $h = $self->dbh->prepare($q);
144 $h->execute(1);
146 my $refreshing = $h->fetchrow_array();
148 if ($refreshing) {
149 return { error => 'Wizard update already in progress . . . ' };
151 else {
152 my $connect = "SELECT dblink_connect_u('dbname=".$self->dbname."')";
153 my $send_query = "SELECT dblink_send_query('SELECT refresh_materialized_views()')";
155 $self->dbh->do($connect);
156 $h = $self->dbh->do($send_query);
158 if ($h != 1) {
159 return { error => 'Error initiating wizard update.'};
161 else {
162 $q = "UPDATE public.matviews SET currently_refreshing=?";
163 my $true = 'TRUE';
164 my $h = $self->dbh->prepare($q);
165 $h->execute($true);
167 print STDERR "materialized fullview status updated to refreshing\n";
168 return { message => 'Wizard update initiated' };
173 sub matviews_status {
174 my $self = shift;
175 my $q = "SELECT currently_refreshing, last_refresh FROM public.matviews WHERE mv_id=?";
176 my $h = $self->dbh->prepare($q);
177 $h->execute(1);
179 my ($refreshing, $timestamp) = $h->fetchrow_array();
181 if ($refreshing) {
182 print STDERR "Wizard is already refreshing, current status: $refreshing \n";
183 return { refreshing => "<p id='wizard_status'>Wizard update in progress . . . </p>"};
185 else {
186 print STDERR "materialized fullview last updated $timestamp\n";
187 return { timestamp => "<p id='wizard_status'>Wizard last updated: $timestamp</p>" };
191 sub get_phenotype_info {
192 my $self = shift;
193 my $accession_sql = shift;
194 my $trial_sql = shift;
195 my $trait_sql = shift;
197 print STDERR "GET_PHENOTYPE_INFO: $accession_sql - $trial_sql - $trait_sql \n\n";
199 my $rep_type_id = $self->get_stockprop_type_id("replicate");
200 my $block_number_type_id = $self -> get_stockprop_type_id("block");
201 my $year_type_id = $self->get_projectprop_type_id("project year");
204 my @where_clause = ();
205 if ($accession_sql) { push @where_clause, "stock.stock_id in ($accession_sql)"; }
206 if ($trial_sql) { push @where_clause, "project.project_id in ($trial_sql)"; }
207 if ($trait_sql) { push @where_clause, "cvterm.cvterm_id in ($trait_sql)"; }
209 my $where_clause = "";
211 if (@where_clause>0) {
212 $where_clause .= $rep_type_id ? "WHERE (stockprop.type_id = $rep_type_id OR stockprop.type_id IS NULL) " : "WHERE stockprop.type_id IS NULL";
213 $where_clause .= $block_number_type_id ? " AND (block_number.type_id = $block_number_type_id OR block_number.type_id IS NULL)" : " AND block_number.type_id IS NULL";
214 $where_clause .= $year_type_id ? " AND projectprop.type_id = $year_type_id" :"" ;
215 $where_clause .= " AND " . (join (" AND " , @where_clause));
217 #$where_clause = "where (stockprop.type_id=$rep_type_id or stockprop.type_id IS NULL) AND (block_number.type_id=$block_number_type_id or block_number.type_id IS NULL) AND ".(join (" and ", @where_clause));
220 my $order_clause = " order by project.name, plot.uniquename";
221 my $q = "SELECT projectprop.value, project.name, stock.uniquename, nd_geolocation.description, cvterm.name, phenotype.value, plot.uniquename, db.name, db.name || ':' || dbxref.accession AS accession, stockprop.value, block_number.value AS rep, cvterm.cvterm_id, project.project_id, nd_geolocation.nd_geolocation_id, stock.stock_id, plot.stock_id
222 FROM stock as plot JOIN stock_relationship ON (plot.stock_id=subject_id)
223 JOIN stock ON (object_id=stock.stock_id)
224 LEFT JOIN stockprop ON (plot.stock_id=stockprop.stock_id)
225 LEFT JOIN stockprop AS block_number ON (plot.stock_id=block_number.stock_id)
226 JOIN nd_experiment_stock ON(nd_experiment_stock.stock_id=plot.stock_id)
227 JOIN nd_experiment ON (nd_experiment_stock.nd_experiment_id=nd_experiment.nd_experiment_id)
228 JOIN nd_geolocation USING(nd_geolocation_id)
229 JOIN nd_experiment_phenotype ON (nd_experiment_phenotype.nd_experiment_id=nd_experiment.nd_experiment_id)
230 JOIN phenotype USING(phenotype_id) JOIN cvterm ON (phenotype.cvalue_id=cvterm.cvterm_id)
231 JOIN cv USING(cv_id)
232 JOIN dbxref ON (cvterm.dbxref_id = dbxref.dbxref_id)
233 JOIN db USING(db_id)
234 JOIN nd_experiment_project ON (nd_experiment_project.nd_experiment_id=nd_experiment.nd_experiment_id)
235 JOIN project USING(project_id)
236 JOIN projectprop USING(project_id)
237 $where_clause
238 $order_clause";
240 #print STDERR "QUERY: $q\n\n";
241 my $h = $self->dbh()->prepare($q);
242 $h->execute();
244 my $result = [];
245 while (my ($year, $project_name, $stock_name, $location, $trait, $value, $plot_name, $cv_name, $cvterm_accession, $rep, $block_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id) = $h->fetchrow_array()) {
246 push @$result, [ $year, $project_name, $stock_name, $location, $trait, $value, $plot_name, $cv_name, $cvterm_accession, $rep, $block_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id ];
249 #print STDERR Dumper $result;
250 print STDERR "QUERY returned ".scalar(@$result)." rows.\n";
251 return $result;
254 sub get_phenotype_info_matrix {
255 my $self = shift;
256 my $accession_sql = shift;
257 my $trial_sql = shift;
258 my $trait_sql = shift;
260 my $data = $self->get_phenotype_info($accession_sql, $trial_sql, $trait_sql);
261 #data contains [$year, $project_name, $stock_name, $location, $trait, $value, $plot_name, $cv_name, $cvterm_accession, $rep, $block_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id]
263 my %plot_data;
264 my %traits;
266 foreach my $d (@$data) {
267 print STDERR "PRINTING TRAIT DATA FOR TERM " . $d->[4] . "\n\n";
268 my $cvterm = $d->[4]."|".$d->[8];
269 my $trait_data = $d->[5];
270 my $plot = $d->[6];
271 $plot_data{$plot}->{$cvterm} = $trait_data;
272 $traits{$cvterm}++;
275 my @info = ();
276 my $line = "";
278 # generate header line
280 my @sorted_traits = sort keys(%traits);
281 foreach my $trait (@sorted_traits) {
282 $line .= "\t".$trait; # first header has to be empty (plot name column)
284 push @info, $line;
286 # dump phenotypic values
288 my $count2 = 0;
289 foreach my $plot (sort keys (%plot_data)) {
290 $line = $plot;
292 foreach my $trait (@sorted_traits) {
293 my $tab = $plot_data{$plot}->{$trait}; # ? "\t".$plot_data{$plot}->{$trait} : "\t";
294 $line .= defined($tab) ? "\t".$tab : "\t";
297 push @info, $line;
300 return @info;
303 sub get_extended_phenotype_info_matrix {
304 my $self = shift;
305 my $accession_sql = shift;
306 my $trial_sql = shift;
307 my $trait_sql = shift;
309 my $data = $self->get_phenotype_info($accession_sql, $trial_sql, $trait_sql);
310 #data contains [$year, $project_name, $stock_name, $location, $trait, $value, $plot_name, $cv_name, $cvterm_accession, $rep, $block_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id]
312 my %plot_data;
313 my %traits;
315 print STDERR "No of lines retrieved: ".scalar(@$data)."\n";
316 foreach my $d (@$data) {
318 my ($year, $project_name, $stock_name, $location, $trait, $trait_data, $plot, $cv_name, $cvterm_accession, $rep, $block_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id) = @$d;
320 my $cvterm = $d->[4]."|".$d->[8];
321 if (!defined($rep)) { $rep = ""; }
322 $plot_data{$plot}->{$cvterm} = $trait_data;
323 $plot_data{$plot}->{metadata} = {
324 rep => $rep,
325 studyName => $project_name,
326 germplasmName => $stock_name,
327 locationName => $location,
328 blockNumber => $block_number,
329 plotName => $plot,
330 cvterm => $cvterm,
331 trait_data => $trait_data,
332 year => $year,
333 cvterm_id => $trait_id,
334 studyDbId => $project_id,
335 locationDbId => $location_id,
336 germplasmDbId => $stock_id,
337 plotDbId => $plot_id
339 $traits{$cvterm}++;
342 my @info = ();
343 my $line = join "\t", qw | year studyDbId studyName locationDbId locationName germplasmDbId germplasmName plotDbId plotName rep blockNumber |;
345 # generate header line
347 my @sorted_traits = sort keys(%traits);
348 foreach my $trait (@sorted_traits) {
349 $line .= "\t".$trait;
351 push @info, $line;
353 # dump phenotypic values
355 my $count2 = 0;
357 my @unique_plot_list = ();
358 my $previous_plot = "";
359 foreach my $d (@$data) {
360 my $plot = $d->[6];
361 if ($plot ne $previous_plot) {
362 push @unique_plot_list, $plot;
364 $previous_plot = $plot;
367 foreach my $p (@unique_plot_list) {
368 #$line = join "\t", map { $plot_data{$p}->{metadata}->{$_} } ( "year", "trial_name", "location", "accession", "plot", "rep", "block_number" );
369 $line = join "\t", map { $plot_data{$p}->{metadata}->{$_} } ( "year", "studyDbId", "studyName", "locationDbId", "locationName", "germplasmDbId", "germplasmName", "plotDbId", "plotName", "rep", "blockNumber" );
371 #print STDERR "Adding line for plot $p\n";
372 foreach my $trait (@sorted_traits) {
373 my $tab = $plot_data{$p}->{$trait};
374 $line .= defined($tab) ? "\t".$tab : "\t";
376 push @info, $line;
379 return @info;
384 =head2 get_genotype_info
386 parameters: comma-separated lists of accession, trial, and trait IDs. May be empty.
388 returns: an array with genotype information
390 =cut
392 sub get_genotype_info {
394 my $self = shift;
395 my $accession_idref = shift;
396 my $protocol_id = shift;
397 my @accession_ids = @$accession_idref;
398 my ($q, @result);
400 print STDERR "accession sql= @accession_ids \n";
401 print STDERR "protocol id= $protocol_id \n";
403 if (@accession_ids) {
404 $q = "SELECT uniquename, value FROM (SELECT stock.uniquename, genotypeprop.value, row_number() over (partition by stock.uniquename order by genotypeprop.genotype_id) as rownum from genotypeprop join nd_experiment_genotype USING (genotype_id) JOIN nd_experiment_protocol USING(nd_experiment_id) JOIN nd_experiment_stock USING(nd_experiment_id) JOIN stock USING(stock_id) WHERE stock.stock_id in (@{[join',', ('?') x @accession_ids]}) AND nd_experiment_protocol.nd_protocol_id=?) tmp WHERE rownum <2";
406 print "QUERY: $q\n\n";
408 my $h = $self->dbh()->prepare($q);
409 $h->execute(@accession_ids,$protocol_id);
411 while (my ($uniquename,$genotype_string) = $h->fetchrow_array()) {
412 push @result, [ $uniquename, $genotype_string ];
414 return \@result;
418 sub get_type_id {
419 my $self = shift;
420 my $term = shift;
421 my $q = "SELECT projectprop.type_id FROM projectprop JOIN cvterm on (projectprop.type_id=cvterm.cvterm_id) WHERE cvterm.name='$term'";
422 my $h = $self->dbh->prepare($q);
423 $h->execute();
424 my ($type_id) = $h->fetchrow_array();
425 return $type_id;
429 sub get_stock_type_id {
430 my $self = shift;
431 my $term =shift;
432 my $q = "SELECT stock.type_id FROM stock JOIN cvterm on (stock.type_id=cvterm.cvterm_id) WHERE cvterm.name='$term'";
433 my $h = $self->dbh->prepare($q);
434 $h->execute();
435 my ($type_id) = $h->fetchrow_array();
436 return $type_id;
439 sub get_stockprop_type_id {
440 my $self = shift;
441 my $term = shift;
442 my $q = "SELECT stockprop.type_id FROM stockprop JOIN cvterm on (stockprop.type_id=cvterm.cvterm_id) WHERE cvterm.name=?";
443 my $h = $self->dbh->prepare($q);
444 $h->execute($term);
445 my ($type_id) = $h->fetchrow_array();
446 return $type_id;
449 sub get_projectprop_type_id {
450 my $self = shift;
451 my $term = shift;
452 my $q = "SELECT projectprop.type_id FROM projectprop JOIN cvterm ON (projectprop.type_id=cvterm.cvterm_id) WHERE cvterm.name=?";
453 my $h = $self->dbh->prepare($q);
454 $h->execute($term);
455 my ($type_id) = $h->fetchrow_array();
456 return $type_id;