3 CXGN::BreederSearch - class for retrieving breeder information for the breeder search wizard
7 Lukas Mueller <lam87@cornell.edu>
8 Aimin Yan <ay247@cornell.edu>
14 package CXGN
::BreederSearch
;
31 Usage: my $results_ref = $bs->metadata_query($criteria_list, $dataref, $queryref);
33 Ret: returns a hash with a list of ids and names that were matched.
35 Args: criteria_list: a comma separated string of criteria categories. Possible
36 criteria include accessions, breeding programs, genotyping protocols,
37 locations, plots, plants, trials, trial_designs, traits, and years. The last
38 criteria in the list is the return type.
40 dataref: The dataref is a hashref of hashrefs. The first key
41 is the target of the transformation, and the second is the
42 source type of the transformation, containing comma separated
43 values of the source type.
45 queryref: same structure as dataref, but instead of storing ids it stores a
46 1 if user to retrieve an intersection of matches, or 0 for the default union,
47 or a fractional number (between 0 and 1) to filter a miniumum match percentage
50 Example: retrieving all the trials from location 'test_location' (location_id = 23) and year '2014' in the fixture db:
52 my $bs = CXGN::BreederSearch->new( { dbh=>$dbh } );
60 'locations' => '\'23\'',
71 my $results_ref = $bs ->metadata_query($criteria_list, $dataref, $queryref);
73 print Dumper($results);
96 my $criteria_list = shift;
100 # print STDERR "criteria_list=" . Dumper($criteria_list);
101 # print STDERR "dataref=" . Dumper($dataref);
102 # print STDERR "queryref=" . Dumper($queryref);
104 my $target_table = $criteria_list->[-1];
105 # print STDERR "target_table=". $target_table . "\n";
106 my $target = $target_table;
108 # print STDERR "target=$target\n";
110 my $select = "SELECT ".$target."_id, ".$target."_name ";
111 my $group = "GROUP BY ".$target."_id, ".$target."_name ";
114 if (!$dataref->{"$target_table"}) {
115 my $from = "FROM public.". $target_table;
116 my $where = " WHERE ".$target."_id IS NOT NULL";
117 $full_query = $select . $from . $where;
121 foreach my $category (@
$criteria_list) {
123 # print STDERR "==> BUILDING QUERY FOR CATEGORY: $category\n";
125 if ($dataref->{$criteria_list->[-1]}->{$category}) {
127 my @categories = ($target_table, $category);
128 @categories = sort @categories;
129 my $from = "FROM public.". $categories[0] ."x". $categories[1] . " JOIN public." . $target_table . " USING(" . $target."_id) ";
130 my $criterion = $category;
131 $criterion =~ s/s$//;
132 my $match = $queryref->{$criteria_list->[-1]}->{$category};
137 # print STDERR "... Match: $match\n";
139 my $total = scalar(split(',', $dataref->{$criteria_list->[-1]}->{$category}));
140 my $inner_select = $select . ", COUNT(" . $criterion . "_id)/" . $total . "::decimal AS match ";
141 my $where = "WHERE ". $criterion. "_id IN (" . $dataref->{$criteria_list->[-1]}->{$category} . ") ";
142 $query = $inner_select . $from . $where . $group;
144 my $outer_query = "SELECT i." . $target . "_id, i." . $target . "_name, i.match ";
145 $outer_query .= "FROM ($query) AS i ";
146 $outer_query .= "WHERE i.match >= " . $match;
148 my $outest_query = "SELECT j." . $target . "_id, j." . $target . "_name ";
149 $outest_query .= "FROM ($outer_query) AS j ";
151 # print STDERR "... Query: $outest_query\n";
152 push @queries, $outest_query;
155 $full_query = join (" INTERSECT ", @queries);
157 $full_query .= " ORDER BY 2";
158 # print STDERR "FULL QUERY: $full_query\n";
159 $h = $self->dbh->prepare($full_query);
163 while (my ($id, $name) = $h->fetchrow_array()) {
164 push @results, [ $id, $name ];
167 return { results
=> \
@results };
171 =head2 avg_phenotypes_query
173 parameters: trait_id, trial_id, allow_missing
175 returns: values, the avg pheno value of each accession in a trial for the given trait, and column_names, an array of the trait names
181 sub avg_phenotypes_query
{
183 my $trial_id = shift;
184 my $trait_ids = shift;
186 my $controls = shift;
187 my @trait_ids = @
$trait_ids;
188 my @weights = @
$weights;
189 my @controls = @
$controls;
190 my $allow_missing = shift;
192 my $select = "SELECT table0.accession_id, table0.accession_name";
193 my $from = " FROM (SELECT accession_id, accession_name FROM materialized_phenoview JOIN accessions USING (accession_id) WHERE trial_id = $trial_id GROUP BY 1,2) AS table0";
194 for (my $i = 1; $i <= scalar @trait_ids; $i++) {
195 $select .= ", ROUND( CAST(table$i.trait$i AS NUMERIC), 2)";
196 $from .= " JOIN (SELECT accession_id, accession_name, AVG(value::REAL) AS trait$i FROM materialized_phenoview JOIN accessions USING (accession_id) JOIN phenotype USING (phenotype_id) WHERE trial_id = $trial_id AND trait_id = ? GROUP BY 1,2) AS table$i USING (accession_id)";
198 my $query = $select . $from . " ORDER BY 2";
199 if ($allow_missing eq 'true') { $query =~ s/JOIN/FULL OUTER JOIN/g; }
201 print STDERR
"QUERY: $query\n";
203 my $h = $self->dbh->prepare($query);
204 $h->execute(@
$trait_ids);
206 my (@raw_avg_values, @reference_values, @rows_to_scale, @weighted_values);
208 if (grep { defined($_) } @controls) {
209 while (my @row = $h->fetchrow_array()) {
210 push @rows_to_scale, @row;
211 my ($id, $name, @avg_values) = @row;
212 for my $i (0..$#controls) {
213 my $control = $controls[$i] || 0;
214 if ($id == $control) {
215 #print STDERR "Matched control accession $name with values @avg_values\n";
216 if (!defined($avg_values[$i])) {
217 return { error
=> "Can't scale values using control $name, it has a zero or undefined value for trait with id @$trait_ids[$i] in this trial. Please select a different control for this trait." };
219 $reference_values[$i] = $avg_values[$i];
223 for my $i (0..$#trait_ids) {
224 $reference_values[$i] = 1 unless defined $reference_values[$i];
227 #print STDERR "reference values = @reference_values\n";
228 $h->execute(@
$trait_ids);
229 while (my ($id, $name, @avg_values) = $h->fetchrow_array()) {
231 my @scaled_values = map {sprintf("%.2f", $avg_values[$_] / $reference_values[$_])} 0..$#avg_values;
232 my @scaled_and_weighted = map {sprintf("%.2f", $scaled_values[$_] * $weights[$_])} 0..$#scaled_values;
233 unshift @scaled_values, '<a href="/stock/'.$id.'/view">'.$name.'</a>';
234 push @raw_avg_values, [@scaled_values];
237 map { $sum += $_ } @scaled_and_weighted;
238 my $rounded_sum = sprintf("%.2f", $sum);
239 push @scaled_and_weighted, $rounded_sum;
240 unshift @scaled_and_weighted, '<a href="/stock/'.$id.'/view">'.$name.'</a>';
241 push @weighted_values, [@scaled_and_weighted];
246 while (my ($id, $name, @avg_values) = $h->fetchrow_array()) {
248 my @values_to_weight = @avg_values;
249 unshift @avg_values, '<a href="/stock/'.$id.'/view">'.$name.'</a>';
250 push @raw_avg_values, [@avg_values];
252 @values_to_weight = map {$values_to_weight[$_] * $weights[$_]} 0..$#values_to_weight;
254 map { $sum += $_ } @values_to_weight;
255 unshift @values_to_weight, '<a href="/stock/'.$id.'/view">'.$name.'</a>';
256 my $rounded_sum = sprintf("%.2f", $sum);
257 push @values_to_weight, $rounded_sum;
258 push @weighted_values, [@values_to_weight];
263 my @weighted_values2 = sort { $b->[-1] <=> $a->[-1] } @weighted_values;
264 my @weighted_values3;
265 for (my $i = 0; $i < scalar @weighted_values2; $i++ ) {
266 my $temp_array = $weighted_values2[$i];
267 my @temp_array = @
$temp_array;
268 push @temp_array, $i+1;
269 push @weighted_values3, [@temp_array];
272 #print STDERR "avg_phenotypes: ".Dumper(@raw_avg_values);
273 #print STDERR "avg_phenotypes: ".Dumper(@weighted_values3);
276 raw_avg_values
=> \
@raw_avg_values,
277 weighted_values
=> \
@weighted_values3
284 parameters: db parameters
286 returns: message detailing matview status
288 Side Effects: If they are unavailable, it will use the refresh_matviews method to populate the materialized views
300 my ($status, %response_hash);
303 my $populated_query = "select * from materialized_phenoview limit 1";
304 my $sth = $self->dbh->prepare($populated_query);
306 } catch
{ #if test query fails because views aren't populated
307 print STDERR
"Using basic refresh to populate views . . .\n";
308 $status = $self->refresh_matviews($dbhost, $dbname, $dbuser, $dbpass, 'basic');
309 %response_hash = %$status;
312 if (%response_hash && $response_hash{'message'} eq 'Wizard update completed!') {
313 print STDERR
"Populated views, now proceeding with query . . . .\n";
314 return { status
=> "Populated views, query can proceed." };
315 } elsif (%response_hash && $response_hash{'message'} eq 'Wizard update initiated.') {
316 return { error
=> "The search wizard is temporarily unavailable while database indexes are being repopulated. Please try again later." };
317 } elsif (%response_hash && $response_hash{'error'}) {
318 return { error
=> $response_hash{'error'} };
320 return { success
=> "Test successful, query can proceed." };
324 =head2 refresh_matviews
326 parameters: db parameters, and a string to specify desired refresh type, basic or concurrent. defaults to concurrent
328 returns: message detailing success or error
330 Side Effects: Refreshes materialized views
334 sub refresh_matviews
{
340 my $materialized_view = shift || 'fullview'; #Can be 'fullview' or 'stockprop'
341 my $refresh_type = shift || 'concurrent';
342 my $basepath = shift;
345 if (!defined($async)) {
349 my $refresh_finished = 0;
352 my $q = "SELECT currently_refreshing FROM public.matviews WHERE mv_id=?";
353 my $h = $self->dbh->prepare($q);
356 my $refreshing = $h->fetchrow_array();
359 return { error
=> $materialized_view.' update already in progress . . . ' };
363 my $refresh_command = "perl $basepath/bin/refresh_matviews.pl -H $dbhost -D $dbname -U $dbuser -P $dbpass -m $materialized_view";
364 $async_refresh = CXGN
::Tools
::Run
->new();
365 if ($refresh_type eq 'concurrent') {
366 print STDERR
"Using CXGN::Tools::Run to ".($async ?
"asynchronously" : "synchronously")." run $refresh_command -c\n";
368 $async_refresh->run_async($refresh_command." -c");
370 $async_refresh->run($refresh_command." -c");
373 print STDERR
"Using CXGN::Tools::Run to ".($async ?
"asynchronously" : "synchronously")." run $refresh_command\n";
375 $async_refresh->run_async($refresh_command);
377 $async_refresh->run($refresh_command);
381 for (my $i = 1; $i < 10; $i++) {
383 if ($async_refresh->alive) {
386 $refresh_finished = 1;
390 if ($refresh_finished) {
391 return { message
=> $materialized_view . ' update completed!', connection
=> $async_refresh };
393 return { message
=> $materialized_view.' update initiated.', connection
=> $async_refresh };
396 print STDERR
'Error initiating '.$materialized_view.' update.' . $@
. "\n";
397 return { error
=> 'Error initiating '.$materialized_view.' update.' . $@
};
402 =head2 matviews_status
404 Desc: checks tracking table to see if materialized views are updating, and if not, when they were last updated.
408 returns: refreshing message or timestamp
414 sub matviews_status
{
416 my $q = "SELECT currently_refreshing, last_refresh FROM public.matviews WHERE mv_id=?";
417 my $h = $self->dbh->prepare($q);
420 my ($refreshing, $timestamp) = $h->fetchrow_array();
423 print STDERR
"Wizard is already refreshing, current status: $refreshing \n";
424 return { refreshing
=> "<p id='wizard_status'>Wizard update in progress . . . </p>"};
427 print STDERR
"materialized views last updated $timestamp\n";
428 return { timestamp
=> "<p id='wizard_status'>Wizard last updated: $timestamp</p>" };