3 CXGN::BreederSearch - class for retrieving breeder information for the breeder search wizard
7 Lukas Mueller <lam87@cornell.edu>
8 Aimin Yan <ay247@cornell.edu>
14 package CXGN
::BreederSearch
;
30 Usage: my %info = $bs->metadata_query($criteria_list, $dataref, $queryref);
32 Ret: returns a hash with a key called results that contains
33 a listref of listrefs specifying the matching list with ids
35 Args: criteria_list: a comma separated string called a criteria_list,
36 listing all the criteria that need to be applied. Possible
37 criteria are trials, years, traits, and locations. The last
38 criteria in the list is the return type.
39 dataref: The dataref is a hashref of hashrefs. The first key
40 is the target of the transformation, and the second is the
41 source type of the transformation, containing comma separated
42 values of the source type.
43 queryref: same structure as dataref, but instead of storing ids it stores a
44 1 if user requested intersect, or 0 for default union
45 Side Effects: will create a materialized view of the ontology corresponding to
53 my $criteria_list = shift;
57 print STDERR
"criteria_list=" . Dumper
($criteria_list);
58 print STDERR
"dataref=" . Dumper
($dataref);
59 print STDERR
"queryref=" . Dumper
($queryref);
61 my $target_table = $criteria_list->[-1];
62 print STDERR
"target_table=". $target_table . "\n";
63 my $target = $target_table;
66 my $select = "SELECT ".$target."_id, ".$target."_name ";
67 my $group = "GROUP BY ".$target."_id, ".$target."_name ";
70 if (!$dataref->{"$target_table"}) {
71 my $from = "FROM public.". $target_table;
72 my $where = " WHERE ".$target."_id IS NOT NULL";
73 $full_query = $select . $from . $where;
77 foreach my $category (@
$criteria_list) {
79 if ($dataref->{$criteria_list->[-1]}->{$category}) {
81 my @categories = ($target_table, $category);
82 @categories = sort @categories;
83 my $from = "FROM public.". $categories[0] ."x". $categories[1] . " JOIN public." . $target_table . " USING(" . $target."_id) ";
84 my $criterion = $category;
86 my $intersect = $queryref->{$criteria_list->[-1]}->{$category};
90 my @ids = split(/,/, $dataref->{$criteria_list->[-1]}->{$category});
91 foreach my $id (@ids) {
92 my $where = "WHERE ". $criterion. "_id IN (". $id .") ";
93 my $statement = $select . $from . $where . $group;
94 push @parts, $statement;
96 $query = join (" INTERSECT ", @parts);
97 push @queries, $query;
100 my $where = "WHERE ". $criterion. "_id IN (" . $dataref->{$criteria_list->[-1]}->{$category} . ") ";
101 $query = $select . $from . $where . $group;
102 push @queries, $query;
106 $full_query = join (" INTERSECT ", @queries);
108 $full_query .= " ORDER BY 2";
109 print STDERR
"QUERY: $full_query\n";
110 $h = $self->dbh->prepare($full_query);
114 while (my ($id, $name) = $h->fetchrow_array()) {
115 push @results, [ $id, $name ];
118 if (@results >= 10_000
) {
119 return { error
=> scalar(@results).' matches. Too many results to display' };
121 elsif (@results < 1) {
122 return { error
=> scalar(@results).' matches. No results to display' };
125 return { results
=> \
@results };
129 =head2 refresh_matviews
133 returns: success or error message
135 Side Effects: refreshes matertialized_fullview and all of the smaller materialized views that are based on it and used in the wizard.
139 sub refresh_matviews
{
142 my $q = "SELECT currently_refreshing FROM public.matviews WHERE mv_id=?";
143 my $h = $self->dbh->prepare($q);
146 my $refreshing = $h->fetchrow_array();
149 return { error
=> 'Wizard update already in progress . . . ' };
152 my $connect = "SELECT dblink_connect_u('dbname=".$self->dbname."')";
153 my $send_query = "SELECT dblink_send_query('SELECT refresh_materialized_views()')";
155 $self->dbh->do($connect);
156 $h = $self->dbh->do($send_query);
159 return { error
=> 'Error initiating wizard update.'};
162 $q = "UPDATE public.matviews SET currently_refreshing=?";
164 my $h = $self->dbh->prepare($q);
167 print STDERR
"materialized fullview status updated to refreshing\n";
168 return { message
=> 'Wizard update initiated' };
173 sub matviews_status
{
175 my $q = "SELECT currently_refreshing, last_refresh FROM public.matviews WHERE mv_id=?";
176 my $h = $self->dbh->prepare($q);
179 my ($refreshing, $timestamp) = $h->fetchrow_array();
182 print STDERR
"Wizard is already refreshing, current status: $refreshing \n";
183 return { refreshing
=> "<p id='wizard_status'>Wizard update in progress . . . </p>"};
186 print STDERR
"materialized fullview last updated $timestamp\n";
187 return { timestamp
=> "<p id='wizard_status'>Wizard last updated: $timestamp</p>" };
191 sub get_phenotype_info
{
193 my $accession_sql = shift;
194 my $trial_sql = shift;
195 my $trait_sql = shift;
197 print STDERR
"GET_PHENOTYPE_INFO: $accession_sql - $trial_sql - $trait_sql \n\n";
199 my $rep_type_id = $self->get_stockprop_type_id("replicate");
200 my $block_number_type_id = $self -> get_stockprop_type_id
("block");
201 my $year_type_id = $self->get_projectprop_type_id("project year");
204 my @where_clause = ();
205 if ($accession_sql) { push @where_clause, "stock.stock_id in ($accession_sql)"; }
206 if ($trial_sql) { push @where_clause, "project.project_id in ($trial_sql)"; }
207 if ($trait_sql) { push @where_clause, "cvterm.cvterm_id in ($trait_sql)"; }
209 my $where_clause = "";
211 if (@where_clause>0) {
212 $where_clause .= $rep_type_id ?
"WHERE (stockprop.type_id = $rep_type_id OR stockprop.type_id IS NULL) " : "WHERE stockprop.type_id IS NULL";
213 $where_clause .= $block_number_type_id ?
" AND (block_number.type_id = $block_number_type_id OR block_number.type_id IS NULL)" : " AND block_number.type_id IS NULL";
214 $where_clause .= $year_type_id ?
" AND projectprop.type_id = $year_type_id" :"" ;
215 $where_clause .= " AND " . (join (" AND " , @where_clause));
217 #$where_clause = "where (stockprop.type_id=$rep_type_id or stockprop.type_id IS NULL) AND (block_number.type_id=$block_number_type_id or block_number.type_id IS NULL) AND ".(join (" and ", @where_clause));
220 my $order_clause = " order by project.name, plot.uniquename";
221 my $q = "SELECT projectprop.value, project.name, stock.uniquename, nd_geolocation.description, cvterm.name, phenotype.value, plot.uniquename, db.name, db.name || ':' || dbxref.accession AS accession, stockprop.value, block_number.value AS rep, cvterm.cvterm_id, project.project_id, nd_geolocation.nd_geolocation_id, stock.stock_id, plot.stock_id
222 FROM stock as plot JOIN stock_relationship ON (plot.stock_id=subject_id)
223 JOIN stock ON (object_id=stock.stock_id)
224 LEFT JOIN stockprop ON (plot.stock_id=stockprop.stock_id)
225 LEFT JOIN stockprop AS block_number ON (plot.stock_id=block_number.stock_id)
226 JOIN nd_experiment_stock ON(nd_experiment_stock.stock_id=plot.stock_id)
227 JOIN nd_experiment ON (nd_experiment_stock.nd_experiment_id=nd_experiment.nd_experiment_id)
228 JOIN nd_geolocation USING(nd_geolocation_id)
229 JOIN nd_experiment_phenotype ON (nd_experiment_phenotype.nd_experiment_id=nd_experiment.nd_experiment_id)
230 JOIN phenotype USING(phenotype_id) JOIN cvterm ON (phenotype.cvalue_id=cvterm.cvterm_id)
232 JOIN dbxref ON (cvterm.dbxref_id = dbxref.dbxref_id)
234 JOIN nd_experiment_project ON (nd_experiment_project.nd_experiment_id=nd_experiment.nd_experiment_id)
235 JOIN project USING(project_id)
236 JOIN projectprop USING(project_id)
240 #print STDERR "QUERY: $q\n\n";
241 my $h = $self->dbh()->prepare($q);
245 while (my ($year, $project_name, $stock_name, $location, $trait, $value, $plot_name, $cv_name, $cvterm_accession, $rep, $block_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id) = $h->fetchrow_array()) {
246 push @
$result, [ $year, $project_name, $stock_name, $location, $trait, $value, $plot_name, $cv_name, $cvterm_accession, $rep, $block_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id ];
249 #print STDERR Dumper $result;
250 print STDERR
"QUERY returned ".scalar(@
$result)." rows.\n";
254 sub get_phenotype_info_matrix
{
256 my $accession_sql = shift;
257 my $trial_sql = shift;
258 my $trait_sql = shift;
260 my $data = $self->get_phenotype_info($accession_sql, $trial_sql, $trait_sql);
261 #data contains [$year, $project_name, $stock_name, $location, $trait, $value, $plot_name, $cv_name, $cvterm_accession, $rep, $block_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id]
266 foreach my $d (@
$data) {
267 print STDERR
"PRINTING TRAIT DATA FOR TERM " . $d->[4] . "\n\n";
268 my $cvterm = $d->[4]."|".$d->[8];
269 my $trait_data = $d->[5];
271 $plot_data{$plot}->{$cvterm} = $trait_data;
278 # generate header line
280 my @sorted_traits = sort keys(%traits);
281 foreach my $trait (@sorted_traits) {
282 $line .= "\t".$trait; # first header has to be empty (plot name column)
286 # dump phenotypic values
289 foreach my $plot (sort keys (%plot_data)) {
292 foreach my $trait (@sorted_traits) {
293 my $tab = $plot_data{$plot}->{$trait}; # ? "\t".$plot_data{$plot}->{$trait} : "\t";
294 $line .= defined($tab) ?
"\t".$tab : "\t";
303 sub get_extended_phenotype_info_matrix
{
305 my $accession_sql = shift;
306 my $trial_sql = shift;
307 my $trait_sql = shift;
309 my $data = $self->get_phenotype_info($accession_sql, $trial_sql, $trait_sql);
310 #data contains [$year, $project_name, $stock_name, $location, $trait, $value, $plot_name, $cv_name, $cvterm_accession, $rep, $block_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id]
315 print STDERR
"No of lines retrieved: ".scalar(@
$data)."\n";
316 foreach my $d (@
$data) {
318 my ($year, $project_name, $stock_name, $location, $trait, $trait_data, $plot, $cv_name, $cvterm_accession, $rep, $block_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id) = @
$d;
320 my $cvterm = $d->[4]."|".$d->[8];
321 if (!defined($rep)) { $rep = ""; }
322 $plot_data{$plot}->{$cvterm} = $trait_data;
323 $plot_data{$plot}->{metadata
} = {
325 studyName
=> $project_name,
326 germplasmName
=> $stock_name,
327 locationName
=> $location,
328 blockNumber
=> $block_number,
331 trait_data
=> $trait_data,
333 cvterm_id
=> $trait_id,
334 studyDbId
=> $project_id,
335 locationDbId
=> $location_id,
336 germplasmDbId
=> $stock_id,
343 my $line = join "\t", qw
| year studyDbId studyName locationDbId locationName germplasmDbId germplasmName plotDbId plotName rep blockNumber
|;
345 # generate header line
347 my @sorted_traits = sort keys(%traits);
348 foreach my $trait (@sorted_traits) {
349 $line .= "\t".$trait;
353 # dump phenotypic values
357 my @unique_plot_list = ();
358 my $previous_plot = "";
359 foreach my $d (@
$data) {
361 if ($plot ne $previous_plot) {
362 push @unique_plot_list, $plot;
364 $previous_plot = $plot;
367 foreach my $p (@unique_plot_list) {
368 #$line = join "\t", map { $plot_data{$p}->{metadata}->{$_} } ( "year", "trial_name", "location", "accession", "plot", "rep", "block_number" );
369 $line = join "\t", map { $plot_data{$p}->{metadata
}->{$_} } ( "year", "studyDbId", "studyName", "locationDbId", "locationName", "germplasmDbId", "germplasmName", "plotDbId", "plotName", "rep", "blockNumber" );
371 #print STDERR "Adding line for plot $p\n";
372 foreach my $trait (@sorted_traits) {
373 my $tab = $plot_data{$p}->{$trait};
374 $line .= defined($tab) ?
"\t".$tab : "\t";
384 =head2 get_genotype_info
386 parameters: comma-separated lists of accession, trial, and trait IDs. May be empty.
388 returns: an array with genotype information
392 sub get_genotype_info
{
395 my $accession_idref = shift;
396 my $protocol_id = shift;
397 my @accession_ids = @
$accession_idref;
400 print STDERR
"accession sql= @accession_ids \n";
401 print STDERR
"protocol id= $protocol_id \n";
403 if (@accession_ids) {
404 $q = "SELECT uniquename, value FROM (SELECT stock.uniquename, genotypeprop.value, row_number() over (partition by stock.uniquename order by genotypeprop.genotype_id) as rownum from genotypeprop join nd_experiment_genotype USING (genotype_id) JOIN nd_experiment_protocol USING(nd_experiment_id) JOIN nd_experiment_stock USING(nd_experiment_id) JOIN stock USING(stock_id) WHERE stock.stock_id in (@{[join',', ('?') x @accession_ids]}) AND nd_experiment_protocol.nd_protocol_id=?) tmp WHERE rownum <2";
406 print "QUERY: $q\n\n";
408 my $h = $self->dbh()->prepare($q);
409 $h->execute(@accession_ids,$protocol_id);
411 while (my ($uniquename,$genotype_string) = $h->fetchrow_array()) {
412 push @result, [ $uniquename, $genotype_string ];
421 my $q = "SELECT projectprop.type_id FROM projectprop JOIN cvterm on (projectprop.type_id=cvterm.cvterm_id) WHERE cvterm.name='$term'";
422 my $h = $self->dbh->prepare($q);
424 my ($type_id) = $h->fetchrow_array();
429 sub get_stock_type_id
{
432 my $q = "SELECT stock.type_id FROM stock JOIN cvterm on (stock.type_id=cvterm.cvterm_id) WHERE cvterm.name='$term'";
433 my $h = $self->dbh->prepare($q);
435 my ($type_id) = $h->fetchrow_array();
439 sub get_stockprop_type_id
{
442 my $q = "SELECT stockprop.type_id FROM stockprop JOIN cvterm on (stockprop.type_id=cvterm.cvterm_id) WHERE cvterm.name=?";
443 my $h = $self->dbh->prepare($q);
445 my ($type_id) = $h->fetchrow_array();
449 sub get_projectprop_type_id
{
452 my $q = "SELECT projectprop.type_id FROM projectprop JOIN cvterm ON (projectprop.type_id=cvterm.cvterm_id) WHERE cvterm.name=?";
453 my $h = $self->dbh->prepare($q);
455 my ($type_id) = $h->fetchrow_array();