redirecting /qtl/search to /qtl/search, to make use of the tab formatted SGN search...
[sgn.git] / lib / SGN / Controller / QuickSearch.pm
bloba7942e9ce0e5e980678ef60778f38d0dc3a38479
1 package SGN::Controller::QuickSearch;
2 use Moose;
3 use namespace::autoclean;
5 BEGIN { extends 'Catalyst::Controller' }
7 use Class::MOP;
8 use HTML::Entities;
9 use List::MoreUtils 'uniq';
10 use Time::HiRes 'time';
11 use URI::FromHash 'uri';
13 use CXGN::Marker::Tools;
14 use CXGN::Tools::Identifiers qw/ identifier_url identifier_namespace /;
15 use CXGN::Tools::Text qw/to_tsquery_string trim/;
18 =head1 NAME
20 SGN::Controller::QuickSearch - implement the quick search
21 functionality of the site
23 =head1 DESCRIPTION
25 Performs a search for each entity in the database and reports the
26 number of hits for each entity. Links are provided on an overview page
27 to allow the user to complete the search. In addition to the database,
28 quick_search.pl also searches google for the solgenomics.net domain,
29 parses the page that Google returns to report the number of hits on
30 the web site (which includes static and dynamic pages). The link
31 provided with that search is going directly to Google. Similarly, the
32 search is Google search is repeated without the domain constraint to
33 show the number of hits on the world wide web in total. A link for
34 that search is also provided.
36 =head1 PUBLIC ACTIONS
38 =cut
40 my %searches = (
42 # function-based searches
43 clone => { function => \&quick_clone_search, exact => 1 },
44 est => { function => \&quick_est_search, exact => 1 },
45 microarray => { function => \&quick_array_search, exact => 1 },
46 marker => { function => \&quick_marker_search },
47 manual_annotations => { function => \&quick_manual_annotation_search },
48 automatic_annotations => { function => \&quick_automatic_annotation_search },
49 sgn_pages => { function => \&quick_page_search },
50 web => { function => \&quick_web_search },
51 phenotype => { function => \&quick_phenotype_search },
52 # search-framework searches
53 people => { sf_class => 'CXGN::Searches::People',
54 result_desc => 'people',
55 search_path => '/solpeople/people_search.pl'
57 library => { sf_class => 'CXGN::Searches::Library',
58 result_desc => 'cDNA libraries',
59 search_path => '/search/library_search.pl',
61 bac => { sf_class => 'CXGN::Genomic::Search::Clone',
62 result_desc => 'BAC identifiers',
63 search_path => '/maps/physical/clone_search.pl',
65 unigene => { sf_class => 'CXGN::Unigene::Search',
66 result_desc => 'unigene identifiers',
67 search_path => '/search/ug-ad2.pl',
68 exact => 1,
70 image => { sf_class => 'CXGN::Searches::Images',
71 result_desc => 'images',
72 search_path => '/search/image_search.pl',
74 locus_allele => { sf_class => 'CXGN::Phenome',
75 result_desc => 'locus or allele identifiers',
76 search_path => '/search/locus_search.pl',
79 # note that there is also another method of searching using site feature xrefs
83 =head2 quick_search
85 Public path: /search/quick
87 Handles POST or GET quick searches. Parameter can be either C<term>
88 or C<q>. If optional param C<showtimes> is true, shows number of
89 seconds each of the search steps took.
91 =cut
93 sub quick_search: Path('/search/quick') {
94 my ( $self, $c ) = @_;
96 # use either 'term' or 'q' as the search term
97 my ($term) = grep defined, @{ $c->req->parameters }{'term','q'};
99 defined $term && length $term
100 or $c->throw_client_error( public_message => 'Must provide a search term' );
102 $c->stash(
103 quick_search_term => $term,
104 term => $term,
105 show_times => $c->req->parameters->{showtimes},
106 template => '/search/quick_search.mas',
109 return if $c->forward('redirect_by_ident');
111 $c->forward('execute_predefined_searches');
112 $c->forward('search_with_xrefs');
113 $c->forward('redirect_if_only_one_possible');
116 #run the term through CXGN::Tools::Identifiers, and if it's
117 #recognized as an exact SGN identifier match, just redirect them to
118 #that page
119 sub redirect_by_ident : Private {
120 my ( $self, $c ) = @_;
122 my $term = $c->stash->{term};
124 if ( my $direct_url = identifier_url($term) ) {
125 my $namespace = identifier_namespace($term);
126 #if the URL is just to this page, it's not useful
127 unless( $direct_url =~ m!quick_search\.pl|search/quick! #unless the url is to quick_search
128 || $namespace eq 'est' # don't auto-redirect for est names, some markers are called this
131 #if it's an external link, don't redirect, but put it in the external_link variable
132 if ( $direct_url =~ m@(f|ht)tp://@
133 && $direct_url !~ /sgn\.cornell\.edu|solgenomics\.net/
135 my ($domain) = $direct_url =~ m|://(?:www\.)?([^/]+)|;
136 $c->stash->{results}{external_link}{result} = [ $direct_url, '1 direct information page' ];
137 } else {
138 $c->res->redirect( $direct_url );
139 return 1;
144 return;
147 # another optimization: if the quick search found only one
148 # possible URL to go to, go there
149 sub redirect_if_only_one_possible : Private {
150 my ( $self, $c ) = @_;
152 my @possible_urls = uniq(
153 grep $_ !~ m!^https?://! && $_ !~ m!^/solpeople!,
154 grep defined,
155 ( map $_->{result}->[0],
156 values %{$c->stash->{results}}
158 ( map ''.$_->url,
159 @{ $c->stash->{xrefs} || [] }
163 if( @possible_urls == 1 ) {
164 $c->log->debug("redirecting to only possible url: $possible_urls[0]") if $c->debug;
165 $c->res->redirect( $possible_urls[0] );
166 return;
170 sub execute_predefined_searches: Private {
171 my ( $self, $c ) = @_;
173 # execute all the searches and stash the results
174 for my $search_name ( sort keys %searches ) {
175 my $search = $searches{$search_name};
176 my $b = time;
177 my $searchresults = $self->do_quick_search(
178 $c->dbc->dbh,
179 %$search,
180 term => $c->stash->{term},
182 $c->stash->{results}{$search_name} = {
183 result => $searchresults,
184 time => time - $b,
185 exact => $search->{exact}
191 sub search_with_xrefs: Private {
192 my ( $self, $c ) = @_;
194 my $b = time;
195 my @xrefs = $c->feature_xrefs( $c->stash->{term} );
196 $c->stash->{xrefs} = \@xrefs;
197 $c->stash->{xrefs_time} = time - $b;
200 #do a quick search with either a legacy quick search function or a
201 #WWWSearch-implementing search
202 sub do_quick_search {
203 my ( $self, $db, %args ) = @_;
205 if ($args{function}) { #just run legacy functions and return their results
206 return $args{function}->( $db,$args{term});
207 } else {
208 my $classname = $args{sf_class}
209 or die 'Must provide a class name';
211 Class::MOP::load_class( $classname );
212 $classname->isa( 'CXGN::Search::SearchI' )
213 or die "'$classname' is not a CXGN::Search::SearchI-implementing object";
215 my $search = $classname->new;
216 my $query = $search->new_query;
218 #check that the query has a quick_search function
219 $query->can('quick_search')
220 or die "Search '$classname' does not appear to have a query object with a quick_search method";
222 if ( $query->quick_search($args{term}) ) {
223 my $results = $search->do_search($query);
224 my $count = $results->total_results;
225 die 'count should not be negative' if $count < 0;
227 if ($count > 0) {
228 my $qstr = encode_entities($query->to_query_string());
229 return [ "$args{search_path}?$qstr", "$count $args{result_desc}" ];
232 return [undef, "0 $args{result_desc}"];
235 die 'this point should not be reached';
238 ###################### LEGACY QUICK SEARCH FUNCTIONS ##########################
240 sub quick_est_search {
241 my $db = shift;
242 my $term = shift;
244 my $est_link = [ undef, "0 EST identifiers" ];
246 # the est quick search should support identifiers of the form SGN-E999999, SGN_E999999, SGNE999999
247 # and also E999999, as well as straight number (999999).
249 if ($term =~ /^\d+$/ || ( identifier_namespace($term) || '' )eq 'sgn_e' )
251 my ($id_term) = $term =~ /(\d+)/;
252 my $count = sql_query_count($db, "SELECT count(*) FROM est WHERE est.est_id = ?",$id_term);
253 if ($count != 0) {
254 $est_link = [
255 "/search/est.pl?request_id=$id_term&request_from=0&request_type=7&search=Search",
256 "$count EST identifiers",
260 return $est_link;
263 sub quick_clone_search {
264 my $db = shift;
265 my $term = shift;
267 # adjust if EST
268 unless ($term =~ m|^ccc|) { # coffee clone name.
269 $term =~ s|([a-z]{4})(\d{1,2})([a-z]\d{1,2})|$1-$2-$3|i;
272 # the quick clone search supports searching of clone name and
273 # clone ids. Clone ids can be entered as SGNC999999, SGN-C999999,
274 # SGN_C999999 or C999999. if the input does not correspond to any
275 # of these formats, the clone_name is searched. may have to add
276 # something for the dashes that are sometimes not present in the
277 # clone names.
279 my $where_clause = "";
280 if ($term =~ /^(?:(SGN[\-\_]?)?C)?(\d+)$/i) {
281 $where_clause = "WHERE clone_id = ?";
282 $term = $2;
283 } else {
284 $where_clause = "WHERE clone_name ilike ?";
287 my $query = "SELECT clone_id FROM sgn.clone $where_clause";
288 my ($clone_id) = $db->selectrow_array($query, undef, $term);
290 my $clone_link = [undef, "0 cDNA clone identifiers"];
291 if ($clone_id) {
292 $clone_link = [
293 "/search/est.pl?request_id=SGN-C$clone_id&request_from=0&request_type=automatic&search=Search",
294 "1 cDNA clone identifier",
297 return $clone_link;
300 # For quick_search queries without the Version#-Release#- prefix, the version and release are
301 # assumed to both be one. This is hardcoded below in two variables $version and $release.
302 sub quick_array_search {
303 my $db = shift;
304 my $term = shift;
306 my $version = 1; # default version is 1
307 my $release = 1; # default release is 1
308 my $spot = "";
310 my $array_link = [ undef, "0 array identifiers" ];
312 # the array quick search should support the following formats:
313 # 1-1-1.1.1.1 (proper), -1-1.1.1.1, 1-1.1.1.1, -1.1.1.1 and 1.1.1.1
315 my $id_term = "";
316 if ($term =~ /^-?\d*-?(\d+\.\d+\.\d+\.\d+)$/) { # incomplete or absent Version#-Release#- prefix
317 $id_term = $version . "-" . $release . "-" . $1; # use default prefix
318 $spot = $1;
321 if ($term =~ /^(\d+)-(\d+)-(\d+\.\d+\.\d+\.\d+)$/) { # complete Version#-Release#- prefix
322 $spot = $3;
323 $id_term = $term; # use new version and release values
326 if ($id_term) {
327 my $query = "SELECT count(*) FROM microarray AS m WHERE m.spot_id = ? AND m.version = ? AND m.release = ?";
328 my $count = sql_query_count($db , $query, $spot,$version,$release);
330 if ($count != 0) {
331 $array_link = [
332 "/search/est.pl?request_id=$id_term&request_from=0&request_type=14&search=Search",
333 "$count array identifiers",
337 return $array_link;
339 sub quick_phenotype_search {
340 my ($db, $term) = @_;
341 my $q = "select count (distinct stock_id ) from stock left join stockprop using (stock_id) left join cvterm on stockprop.type_id = cvterm.cvterm_id where stock.name ilike ? or stock.uniquename ilike ? or (stockprop.value ilike ? and cvterm.name ilike ? ) " ;
342 my $count = sql_query_count( $db , $q , "\%$term\%","\%$term\%","\%$term\%", "\%synonym\%" );
343 my $pheno_link = [ undef , "0 phenotype identifiers"];
344 if ($count>0) {
345 $pheno_link = ["/stock/search?stock_name=$term&search_submitted=1" ,
346 "$count phenotype identifiers" ];
348 return $pheno_link;
351 sub quick_marker_search {
352 my $db = shift;
353 my $term = shift;
355 # adjust if EST
356 $term =~ s/([a-z]{4})(\d{1,2})([a-z]\d{1,2})/$1-$2-$3/i;
358 my $marker_link = [undef, "0 marker identifiers"];
359 my $count = CXGN::Marker::Tools::marker_name_to_ids($db,$term);
360 if ($count != 0) {
361 $marker_link = [
362 "/search/markers/markersearch.pl?w822_nametype=starts+with&w822_marker_name=$term&w822_submit=Search&w822_mapped=off&w822_species=Any&w822_protos=Any&w822_chromos=Any&w822_pos_start=&w822_pos_end=&w822_confs=Any&w822_maps=Any",
363 "$count marker identifiers"
366 return $marker_link;
369 sub quick_manual_annotation_search {
370 my $db = shift;
371 my $term = shift;
373 # It's a syntax error for whitespace to occur in tsquery query strings. Replace with ampersands.
374 my $cleaned_term = to_tsquery_string($term);
375 my $count = sql_query_count($db, <<EOSQL, $cleaned_term);
376 SELECT COUNT(*)
377 FROM manual_annotations
378 WHERE annotation_text_fulltext @@ to_tsquery(?)
379 EOSQL
381 my $unigene_count = do {
382 if($count > 0) {
383 sql_query_count($db,<<EOSQL,$cleaned_term);
384 SELECT COUNT(DISTINCT(unigene_member.unigene_id))
385 FROM manual_annotations,
386 seqread,
387 est,
388 unigene_member
389 WHERE annotation_text_fulltext @@ to_tsquery(?)
390 AND manual_annotations.annotation_target_id=seqread.clone_id
391 AND seqread.read_id=est.read_id
392 AND est.est_id=unigene_member.est_id
393 EOSQL
394 } else {
399 return
400 $count > 0 ? ["/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=manual_search", "$count manual annotations on $unigene_count unigenes"]
401 : [undef, "0 manual annotations"];
404 sub quick_automatic_annotation_search {
405 my $db = shift;
406 my $term = shift;
407 my $cleaned_term = to_tsquery_string($term);
408 my $count = sql_query_count($db, "select count(*) from blast_defline where defline_fulltext @@ to_tsquery(?)",$cleaned_term);
410 my $unigene_count = "(not determined -- number of annotations too large)";
411 if ($count < 10000) {
412 $unigene_count = sql_query_count($db, <<EOSQL,$cleaned_term);
413 SELECT COUNT(DISTINCT(unigene.unigene_id))
414 FROM blast_defline,
415 blast_hits,
416 blast_annotations,
417 unigene
418 WHERE defline_fulltext @@ to_tsquery(?)
419 AND blast_defline.defline_id=blast_hits.defline_id
420 AND blast_hits.blast_annotation_id=blast_annotations.blast_annotation_id
421 AND blast_annotations.apply_id=unigene.unigene_id
422 AND blast_annotations.apply_type=15
423 EOSQL
425 my $automatic_annotation_link = [undef, "0 automatic annotations"];
426 if ($count !=0) {
427 $automatic_annotation_link = [ "/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=blast_search", "$count automatic annotations on $unigene_count unigenes" ];
429 return $automatic_annotation_link;
432 sub sql_query_count {
433 my $db = shift;
434 my $query = shift;
435 my $qh = $db -> prepare_cached($query);
436 $qh -> execute(@_);
437 my ($count) = $qh -> fetchrow_array();
438 return $count;
441 sub google_search {
442 my( $site_title, $term, $site_address ) = @_;
444 my $google_url = uri( scheme => 'http',
445 host => 'www.google.com',
446 path => '/custom',
447 query => {
448 q => $term,
449 ( $site_address
450 ? ( sitesearch => $site_address )
451 : ()
454 query_separator => '&',
457 my $lwp_ua = LWP::UserAgent->new;
458 $lwp_ua->agent( 'SGN Quick Search ( Mozilla compatible )' );
459 my $res = $lwp_ua->request( HTTP::Request->new( GET => $google_url ));
461 my $count = do {
462 if( $res ->is_success ) {
463 my $cont = $res->content;
464 $cont =~ s/\<.*?\>//g;
465 my ($c) = $cont =~ /Results\s*\d*?\s*\-\s*\d*\s*of\s*(?:about)?\s*?([\d\,]+)/;
470 if( $count ) {
471 return [ $google_url, "$count pages on $site_title" ];
472 } else {
473 return [ undef, "0 pages on $site_title" ];
478 sub quick_web_search {
479 my (undef,$term) = @_;
480 # works the same way as quick_page_search, except that the domain contraint is removed from the
481 # search.
482 return google_search('the entire web',$term);
484 sub quick_page_search {
485 my (undef,$term) = @_;
486 return google_search('SGN',$term,'solgenomics.net');
490 __PACKAGE__->meta->make_immutable;