seedlot upload with accession synonyms. seedlot upload works to update existing seedlots
[sgn.git] / lib / SGN / Controller / QuickSearch.pm
blobf1d0c0bb5c5cf9b4b072b3586c5d85b3657a7e53
1 package SGN::Controller::QuickSearch;
2 use Moose;
3 use namespace::autoclean;
5 BEGIN { extends 'Catalyst::Controller' }
7 use Class::MOP;
8 use HTML::Entities;
9 use List::MoreUtils 'uniq';
10 use Time::HiRes 'time';
11 use URI::FromHash 'uri';
12 use Class::Load ':all';
13 use CXGN::Marker::Tools;
14 use CXGN::Tools::Identifiers qw/ identifier_url identifier_namespace /;
15 use CXGN::Tools::Text qw/to_tsquery_string trim/;
18 =head1 NAME
20 SGN::Controller::QuickSearch - implement the quick search
21 functionality of the site
23 =head1 DESCRIPTION
25 Performs a search for each entity in the database and reports the
26 number of hits for each entity. Links are provided on an overview page
27 to allow the user to complete the search. In addition to the database,
28 quick_search.pl also searches google for the solgenomics.net domain,
29 parses the page that Google returns to report the number of hits on
30 the web site (which includes static and dynamic pages). The link
31 provided with that search is going directly to Google. Similarly, the
32 search is Google search is repeated without the domain constraint to
33 show the number of hits on the world wide web in total. A link for
34 that search is also provided.
36 =head1 PUBLIC ACTIONS
38 =cut
40 my %searches = (
42 # function-based searches
43 clone => { function => \&quick_clone_search, exact => 1 },
44 est => { function => \&quick_est_search, exact => 1 },
45 microarray => { function => \&quick_array_search, exact => 1 },
46 marker => { function => \&quick_marker_search },
47 manual_annotations => { function => \&quick_manual_annotation_search },
48 automatic_annotations => { function => \&quick_automatic_annotation_search },
49 sgn_pages => { function => \&quick_page_search },
50 web => { function => \&quick_web_search },
51 phenotype => { function => \&quick_phenotype_search },
52 # search-framework searches
53 people => { sf_class => 'CXGN::Searches::People',
54 result_desc => 'people',
55 search_path => '/solpeople/people_search.pl'
57 library => { sf_class => 'CXGN::Searches::Library',
58 result_desc => 'cDNA libraries',
59 search_path => '/search/library_search.pl',
61 #bac => { sf_class => 'CXGN::Genomic::Search::Clone',
62 # result_desc => 'BAC identifiers',
63 # search_path => '/maps/physical/clone_search.pl',
64 # },
65 unigene => { sf_class => 'CXGN::Unigene::Search',
66 result_desc => 'unigene identifiers',
67 search_path => '/search/ug-ad2.pl',
68 exact => 1,
70 image => { sf_class => 'CXGN::Searches::Images',
71 result_desc => 'images',
72 search_path => '/search/image_search.pl',
74 locus_allele => { sf_class => 'CXGN::Phenome',
75 result_desc => 'locus or allele identifiers',
76 search_path => '/search/locus',
79 # note that there is also another method of searching using site feature xrefs
83 =head2 quick_search
85 Public path: /search/quick
87 Handles POST or GET quick searches. Parameter can be either C<term>
88 or C<q>. If optional param C<showtimes> is true, shows number of
89 seconds each of the search steps took.
91 =cut
93 sub quick_search: Path('/search/quick') {
94 my ( $self, $c ) = @_;
96 # use either 'term' or 'q' as the search term
97 my ($term) = grep defined, @{ $c->req->parameters }{'term','q'};
99 $term =~ s/^\s*|\s*$//g;
101 defined $term && length $term
102 or $c->throw_client_error( public_message => 'Must provide a search term' );
104 $c->stash(
105 quick_search_term => $term,
106 term => $term,
107 show_times => $c->req->parameters->{showtimes},
108 template => '/search/quick_search.mas',
111 return if $c->forward('redirect_by_ident');
113 $c->forward('execute_predefined_searches');
114 $c->forward('search_with_xrefs');
115 $c->forward('redirect_if_only_one_possible');
118 #run the term through CXGN::Tools::Identifiers, and if it's
119 #recognized as an exact SGN identifier match, just redirect them to
120 #that page
121 sub redirect_by_ident : Private {
122 my ( $self, $c ) = @_;
124 my $term = $c->stash->{term};
126 if ( my $direct_url = identifier_url($term) ) {
127 my $namespace = identifier_namespace($term);
128 #if the URL is just to this page, it's not useful
129 unless( $direct_url =~ m!quick_search\.pl|search/quick! #unless the url is to quick_search
130 || $namespace eq 'est' # don't auto-redirect for est names, some markers are called this
133 #if it's an external link, don't redirect, but put it in the external_link variable
134 if ( $direct_url =~ m@(f|ht)tp://@
135 && $direct_url !~ /sgn\.cornell\.edu|solgenomics\.net/
137 my ($domain) = $direct_url =~ m|://(?:www\.)?([^/]+)|;
138 $c->stash->{results}{external_link}{result} = [ $direct_url, '1 direct information page' ];
139 } else {
140 $c->res->redirect( $direct_url );
141 return 1;
146 return;
149 # another optimization: if the quick search found only one
150 # possible URL to go to, go there
151 sub redirect_if_only_one_possible : Private {
152 my ( $self, $c ) = @_;
154 my @possible_urls = uniq(
155 grep $_ !~ m!^https?://! && $_ !~ m!^/solpeople!,
156 grep defined,
157 ( map $_->{result}->[0],
158 values %{$c->stash->{results}}
160 ( map ''.$_->url,
161 @{ $c->stash->{xrefs} || [] }
165 if( @possible_urls == 1 ) {
166 $c->log->debug("redirecting to only possible url: $possible_urls[0]") if $c->debug;
167 $c->res->redirect( $possible_urls[0] );
168 return;
172 sub execute_predefined_searches: Private {
173 my ( $self, $c ) = @_;
175 # execute all the searches and stash the results
176 for my $search_name ( sort keys %searches ) {
177 my $search = $searches{$search_name};
178 my $b = time;
179 my $searchresults = $self->do_quick_search(
180 $c->dbc->dbh,
181 %$search,
182 term => $c->stash->{term},
184 $c->stash->{results}{$search_name} = {
185 result => $searchresults,
186 time => time - $b,
187 exact => $search->{exact}
193 sub search_with_xrefs: Private {
194 my ( $self, $c ) = @_;
196 my $b = time;
197 my @xrefs = $c->feature_xrefs( $c->stash->{term} );
198 $c->stash->{xrefs} = \@xrefs;
199 $c->stash->{xrefs_time} = time - $b;
202 #do a quick search with either a legacy quick search function or a
203 #WWWSearch-implementing search
204 sub do_quick_search {
205 my ( $self, $db, %args ) = @_;
207 if ($args{function}) { #just run legacy functions and return their results
208 return $args{function}->( $db,$args{term});
209 } else {
210 my $classname = $args{sf_class}
211 or die 'Must provide a class name';
213 Class::Load::load_class( $classname );
214 $classname->isa( 'CXGN::Search::SearchI' )
215 or die "'$classname' is not a CXGN::Search::SearchI-implementing object";
217 my $search = $classname->new;
218 my $query = $search->new_query;
220 #check that the query has a quick_search function
221 $query->can('quick_search')
222 or die "Search '$classname' does not appear to have a query object with a quick_search method";
224 if ( $query->quick_search($args{term}) ) {
225 my $results = $search->do_search($query);
226 my $count = $results->total_results;
227 die 'count should not be negative' if $count < 0;
229 if ($count > 0) {
230 my $qstr = encode_entities($query->to_query_string());
231 return [ "$args{search_path}?$qstr", "$count $args{result_desc}" ];
234 return [undef, "0 $args{result_desc}"];
237 die 'this point should not be reached';
240 ###################### LEGACY QUICK SEARCH FUNCTIONS ##########################
242 sub quick_est_search {
243 my $db = shift;
244 my $term = shift;
246 my $est_link = [ undef, "0 EST identifiers" ];
248 # the est quick search should support identifiers of the form SGN-E999999, SGN_E999999, SGNE999999
249 # and also E999999, as well as straight number (999999).
251 if ($term =~ /^\d+$/ || ( identifier_namespace($term) || '' )eq 'sgn_e' )
253 my ($id_term) = $term =~ /(\d+)/;
254 my $count = sql_query_count($db, "SELECT count(*) FROM est WHERE est.est_id = ?",$id_term);
255 if ($count != 0) {
256 $est_link = [
257 "/search/est.pl?request_id=$id_term&request_from=0&request_type=7&search=Search",
258 "$count EST identifiers",
262 return $est_link;
265 sub quick_clone_search {
266 my $db = shift;
267 my $term = shift;
269 # adjust if EST
270 unless ($term =~ m|^ccc|) { # coffee clone name.
271 $term =~ s|([a-z]{4})(\d{1,2})([a-z]\d{1,2})|$1-$2-$3|i;
274 # the quick clone search supports searching of clone name and
275 # clone ids. Clone ids can be entered as SGNC999999, SGN-C999999,
276 # SGN_C999999 or C999999. if the input does not correspond to any
277 # of these formats, the clone_name is searched. may have to add
278 # something for the dashes that are sometimes not present in the
279 # clone names.
281 my $where_clause = "";
282 if ($term =~ /^(?:(SGN[\-\_]?)?C)?(\d+)$/i) {
283 $where_clause = "WHERE clone_id = ?";
284 $term = $2;
285 } else {
286 $where_clause = "WHERE clone_name ilike ?";
289 my $query = "SELECT clone_id FROM sgn.clone $where_clause";
290 my ($clone_id) = $db->selectrow_array($query, undef, $term);
292 my $clone_link = [undef, "0 cDNA clone identifiers"];
293 if ($clone_id) {
294 $clone_link = [
295 "/search/est.pl?request_id=SGN-C$clone_id&request_from=0&request_type=automatic&search=Search",
296 "1 cDNA clone identifier",
299 return $clone_link;
302 # For quick_search queries without the Version#-Release#- prefix, the version and release are
303 # assumed to both be one. This is hardcoded below in two variables $version and $release.
304 sub quick_array_search {
305 my $db = shift;
306 my $term = shift;
308 my $version = 1; # default version is 1
309 my $release = 1; # default release is 1
310 my $spot = "";
312 my $array_link = [ undef, "0 array identifiers" ];
314 # the array quick search should support the following formats:
315 # 1-1-1.1.1.1 (proper), -1-1.1.1.1, 1-1.1.1.1, -1.1.1.1 and 1.1.1.1
317 my $id_term = "";
318 if ($term =~ /^-?\d*-?(\d+\.\d+\.\d+\.\d+)$/) { # incomplete or absent Version#-Release#- prefix
319 $id_term = $version . "-" . $release . "-" . $1; # use default prefix
320 $spot = $1;
323 if ($term =~ /^(\d+)-(\d+)-(\d+\.\d+\.\d+\.\d+)$/) { # complete Version#-Release#- prefix
324 $spot = $3;
325 $id_term = $term; # use new version and release values
328 if ($id_term) {
329 my $query = "SELECT count(*) FROM microarray AS m WHERE m.spot_id = ? AND m.version = ? AND m.release = ?";
330 my $count = sql_query_count($db , $query, $spot,$version,$release);
332 if ($count != 0) {
333 $array_link = [
334 "/search/est.pl?request_id=$id_term&request_from=0&request_type=14&search=Search",
335 "$count array identifiers",
339 return $array_link;
341 sub quick_phenotype_search {
342 my ($db, $term) = @_;
343 my $q = "select count (distinct stock_id ) from stock left join stockprop using (stock_id) left join cvterm on stockprop.type_id = cvterm.cvterm_id where stock.name ilike ? or stock.uniquename ilike ? or (stockprop.value ilike ? and cvterm.name ilike ? ) " ;
344 my $count = sql_query_count( $db , $q , "\%$term\%","\%$term\%","\%$term\%", "\%synonym\%" );
345 my $pheno_link = [ undef , "0 phenotype identifiers"];
346 if ($count>0) {
347 $pheno_link = ["/search/stocks?any_name=$term" ,
348 "$count phenotype identifiers" ];
350 return $pheno_link;
353 sub quick_marker_search {
354 my $db = shift;
355 my $term = shift;
357 # adjust if EST
358 $term =~ s/([a-z]{4})(\d{1,2})([a-z]\d{1,2})/$1-$2-$3/i;
360 my $marker_link = [undef, "0 marker identifiers"];
361 my $count = CXGN::Marker::Tools::marker_name_to_ids($db,$term);
362 if ($count != 0) {
363 $marker_link = [
364 "/search/markers/markersearch.pl?w822_nametype=starts+with&w822_marker_name=$term&w822_submit=Search&w822_mapped=off&w822_species=Any&w822_protos=Any&w822_chromos=Any&w822_pos_start=&w822_pos_end=&w822_confs=Any&w822_maps=Any",
365 "$count marker identifiers"
368 return $marker_link;
371 sub quick_manual_annotation_search {
372 my $db = shift;
373 my $term = shift;
375 # It's a syntax error for whitespace to occur in tsquery query strings. Replace with ampersands.
376 my $cleaned_term = to_tsquery_string($term);
377 my $count = sql_query_count($db, <<EOSQL, $cleaned_term);
378 SELECT COUNT(*)
379 FROM manual_annotations
380 WHERE annotation_text_fulltext @@ to_tsquery(?)
381 EOSQL
383 my $unigene_count = do {
384 if($count > 0) {
385 sql_query_count($db,<<EOSQL,$cleaned_term);
386 SELECT COUNT(DISTINCT(unigene_member.unigene_id))
387 FROM manual_annotations,
388 seqread,
389 est,
390 unigene_member
391 WHERE annotation_text_fulltext @@ to_tsquery(?)
392 AND manual_annotations.annotation_target_id=seqread.clone_id
393 AND seqread.read_id=est.read_id
394 AND est.est_id=unigene_member.est_id
395 EOSQL
396 } else {
401 return
402 $count > 0 ? ["/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=manual_search", "$count manual annotations on $unigene_count unigenes"]
403 : [undef, "0 manual annotations"];
406 sub quick_automatic_annotation_search {
407 my $db = shift;
408 my $term = shift;
409 my $cleaned_term = to_tsquery_string($term);
410 my $count = sql_query_count($db, "select count(*) from blast_defline where defline_fulltext @@ to_tsquery(?)",$cleaned_term);
412 my $unigene_count = "(not determined -- number of annotations too large)";
413 if ($count < 10000) {
414 $unigene_count = sql_query_count($db, <<EOSQL,$cleaned_term);
415 SELECT COUNT(DISTINCT(unigene.unigene_id))
416 FROM blast_defline,
417 blast_hits,
418 blast_annotations,
419 unigene
420 WHERE defline_fulltext @@ to_tsquery(?)
421 AND blast_defline.defline_id=blast_hits.defline_id
422 AND blast_hits.blast_annotation_id=blast_annotations.blast_annotation_id
423 AND blast_annotations.apply_id=unigene.unigene_id
424 AND blast_annotations.apply_type=15
425 EOSQL
427 my $automatic_annotation_link = [undef, "0 automatic annotations"];
428 if ($count !=0) {
429 $automatic_annotation_link = [ "/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=blast_search", "$count automatic annotations on $unigene_count unigenes" ];
431 return $automatic_annotation_link;
434 sub sql_query_count {
435 my $db = shift;
436 my $query = shift;
437 my $qh = $db -> prepare_cached($query);
438 $qh -> execute(@_);
439 my ($count) = $qh -> fetchrow_array();
440 return $count;
443 sub google_search {
444 my( $site_title, $term, $site_address ) = @_;
446 my $google_url = uri( scheme => 'http',
447 host => 'www.google.com',
448 path => '/custom',
449 query => {
450 q => $term,
451 ( $site_address
452 ? ( sitesearch => $site_address )
453 : ()
456 query_separator => '&',
459 my $lwp_ua = LWP::UserAgent->new;
460 $lwp_ua->agent( 'SGN Quick Search ( Mozilla compatible )' );
461 my $res = $lwp_ua->request( HTTP::Request->new( GET => $google_url ));
463 my $count = do {
464 if( $res ->is_success ) {
465 my $cont = $res->content;
466 $cont =~ s/\<.*?\>//g;
467 my ($c) = $cont =~ /Results\s*\d*?\s*\-\s*\d*\s*of\s*(?:about)?\s*?([\d\,]+)/;
472 if( $count ) {
473 return [ $google_url, "$count pages on $site_title" ];
474 } else {
475 return [ undef, "0 pages on $site_title" ];
480 sub quick_web_search {
481 my (undef,$term) = @_;
482 # works the same way as quick_page_search, except that the domain contraint is removed from the
483 # search.
484 return google_search('the entire web',$term);
486 sub quick_page_search {
487 my (undef,$term) = @_;
488 return google_search('SGN',$term,'solgenomics.net');
492 __PACKAGE__->meta->make_immutable;