1 package SGN
::Controller
::QuickSearch
;
3 use namespace
::autoclean
;
5 BEGIN { extends
'Catalyst::Controller' }
9 use List
::MoreUtils
'uniq';
10 use Time
::HiRes
'time';
11 use URI
::FromHash
'uri';
13 use CXGN
::Marker
::Tools
;
14 use CXGN
::Tools
::Identifiers qw
/ identifier_url identifier_namespace /;
15 use CXGN
::Tools
::Text qw
/to_tsquery_string trim/;
20 SGN::Controller::QuickSearch - implement the quick search
21 functionality of the site
25 Performs a search for each entity in the database and reports the
26 number of hits for each entity. Links are provided on an overview page
27 to allow the user to complete the search. In addition to the database,
28 quick_search.pl also searches google for the solgenomics.net domain,
29 parses the page that Google returns to report the number of hits on
30 the web site (which includes static and dynamic pages). The link
31 provided with that search is going directly to Google. Similarly, the
32 search is Google search is repeated without the domain constraint to
33 show the number of hits on the world wide web in total. A link for
34 that search is also provided.
42 # function-based searches
43 clone
=> { function
=> \
&quick_clone_search
, exact
=> 1 },
44 est
=> { function
=> \
&quick_est_search
, exact
=> 1 },
45 microarray
=> { function
=> \
&quick_array_search
, exact
=> 1 },
46 marker
=> { function
=> \
&quick_marker_search
},
47 manual_annotations
=> { function
=> \
&quick_manual_annotation_search
},
48 automatic_annotations
=> { function
=> \
&quick_automatic_annotation_search
},
49 sgn_pages
=> { function
=> \
&quick_page_search
},
50 web
=> { function
=> \
&quick_web_search
},
51 phenotype
=> { function
=> \
&quick_phenotype_search
},
52 # search-framework searches
53 people
=> { sf_class
=> 'CXGN::Searches::People',
54 result_desc
=> 'people',
55 search_path
=> '/solpeople/people_search.pl'
57 library
=> { sf_class
=> 'CXGN::Searches::Library',
58 result_desc
=> 'cDNA libraries',
59 search_path
=> '/search/library_search.pl',
61 bac
=> { sf_class
=> 'CXGN::Genomic::Search::Clone',
62 result_desc
=> 'BAC identifiers',
63 search_path
=> '/maps/physical/clone_search.pl',
65 unigene
=> { sf_class
=> 'CXGN::Unigene::Search',
66 result_desc
=> 'unigene identifiers',
67 search_path
=> '/search/ug-ad2.pl',
70 image
=> { sf_class
=> 'CXGN::Searches::Images',
71 result_desc
=> 'images',
72 search_path
=> '/search/image_search.pl',
74 locus_allele
=> { sf_class
=> 'CXGN::Phenome',
75 result_desc
=> 'locus or allele identifiers',
76 search_path
=> '/search/locus_search.pl',
79 # note that there is also another method of searching using site feature xrefs
85 Public path: /search/quick
87 Handles POST or GET quick searches. Parameter can be either C<term>
88 or C<q>. If optional param C<showtimes> is true, shows number of
89 seconds each of the search steps took.
93 sub quick_search
: Path
('/search/quick') {
94 my ( $self, $c ) = @_;
96 # use either 'term' or 'q' as the search term
97 my ($term) = grep defined, @
{ $c->req->parameters }{'term','q'};
99 defined $term && length $term
100 or $c->throw_client_error( public_message
=> 'Must provide a search term' );
103 quick_search_term
=> $term,
105 show_times
=> $c->req->parameters->{showtimes
},
106 template
=> '/search/quick_search.mas',
109 return if $c->forward('redirect_by_ident');
111 $c->forward('execute_predefined_searches');
112 $c->forward('search_with_xrefs');
113 $c->forward('redirect_if_only_one_possible');
116 #run the term through CXGN::Tools::Identifiers, and if it's
117 #recognized as an exact SGN identifier match, just redirect them to
119 sub redirect_by_ident
: Private
{
120 my ( $self, $c ) = @_;
122 my $term = $c->stash->{term
};
124 if ( my $direct_url = identifier_url
($term) ) {
125 my $namespace = identifier_namespace
($term);
126 #if the URL is just to this page, it's not useful
127 unless( $direct_url =~ m!quick_search\.pl|search/quick! #unless the url is to quick_search
128 || $namespace eq 'est' # don't auto-redirect for est names, some markers are called this
131 #if it's an external link, don't redirect, but put it in the external_link variable
132 if ( $direct_url =~ m@
(f
|ht
)tp
://@
133 && $direct_url !~ /sgn\.cornell\.edu|solgenomics\.net/
135 my ($domain) = $direct_url =~ m
|://(?
:www\
.)?
([^/]+)|;
136 $c->stash->{results
}{external_link
}{result
} = [ $direct_url, '1 direct information page' ];
138 $c->res->redirect( $direct_url );
147 # another optimization: if the quick search found only one
148 # possible URL to go to, go there
149 sub redirect_if_only_one_possible
: Private
{
150 my ( $self, $c ) = @_;
152 my @possible_urls = uniq
(
153 grep $_ !~ m!^https?://! && $_ !~ m!^/solpeople!,
155 ( map $_->{result
}->[0],
156 values %{$c->stash->{results
}}
159 @
{ $c->stash->{xrefs
} || [] }
163 if( @possible_urls == 1 ) {
164 $c->log->debug("redirecting to only possible url: $possible_urls[0]") if $c->debug;
165 $c->res->redirect( $possible_urls[0] );
170 sub execute_predefined_searches
: Private
{
171 my ( $self, $c ) = @_;
173 # execute all the searches and stash the results
174 for my $search_name ( sort keys %searches ) {
175 my $search = $searches{$search_name};
177 my $searchresults = $self->do_quick_search(
180 term
=> $c->stash->{term
},
182 $c->stash->{results
}{$search_name} = {
183 result
=> $searchresults,
185 exact
=> $search->{exact
}
191 sub search_with_xrefs
: Private
{
192 my ( $self, $c ) = @_;
195 my @xrefs = $c->feature_xrefs( $c->stash->{term
} );
196 $c->stash->{xrefs
} = \
@xrefs;
197 $c->stash->{xrefs_time
} = time - $b;
200 #do a quick search with either a legacy quick search function or a
201 #WWWSearch-implementing search
202 sub do_quick_search
{
203 my ( $self, $db, %args ) = @_;
205 if ($args{function
}) { #just run legacy functions and return their results
206 return $args{function
}->( $db,$args{term
});
208 my $classname = $args{sf_class
}
209 or die 'Must provide a class name';
211 Class
::MOP
::load_class
( $classname );
212 $classname->isa( 'CXGN::Search::SearchI' )
213 or die "'$classname' is not a CXGN::Search::SearchI-implementing object";
215 my $search = $classname->new;
216 my $query = $search->new_query;
218 #check that the query has a quick_search function
219 $query->can('quick_search')
220 or die "Search '$classname' does not appear to have a query object with a quick_search method";
222 if ( $query->quick_search($args{term
}) ) {
223 my $results = $search->do_search($query);
224 my $count = $results->total_results;
225 die 'count should not be negative' if $count < 0;
228 my $qstr = encode_entities
($query->to_query_string());
229 return [ "$args{search_path}?$qstr", "$count $args{result_desc}" ];
232 return [undef, "0 $args{result_desc}"];
235 die 'this point should not be reached';
238 ###################### LEGACY QUICK SEARCH FUNCTIONS ##########################
240 sub quick_est_search
{
244 my $est_link = [ undef, "0 EST identifiers" ];
246 # the est quick search should support identifiers of the form SGN-E999999, SGN_E999999, SGNE999999
247 # and also E999999, as well as straight number (999999).
249 if ($term =~ /^\d+$/ || ( identifier_namespace
($term) || '' )eq 'sgn_e' )
251 my ($id_term) = $term =~ /(\d+)/;
252 my $count = sql_query_count
($db, "SELECT count(*) FROM est WHERE est.est_id = ?",$id_term);
255 "/search/est.pl?request_id=$id_term&request_from=0&request_type=7&search=Search",
256 "$count EST identifiers",
263 sub quick_clone_search
{
268 unless ($term =~ m
|^ccc
|) { # coffee clone name.
269 $term =~ s
|([a
-z
]{4})(\d
{1,2})([a
-z
]\d
{1,2})|$1-$2-$3|i
;
272 # the quick clone search supports searching of clone name and
273 # clone ids. Clone ids can be entered as SGNC999999, SGN-C999999,
274 # SGN_C999999 or C999999. if the input does not correspond to any
275 # of these formats, the clone_name is searched. may have to add
276 # something for the dashes that are sometimes not present in the
279 my $where_clause = "";
280 if ($term =~ /^(?:(SGN[\-\_]?)?C)?(\d+)$/i) {
281 $where_clause = "WHERE clone_id = ?";
284 $where_clause = "WHERE clone_name ilike ?";
287 my $query = "SELECT clone_id FROM sgn.clone $where_clause";
288 my ($clone_id) = $db->selectrow_array($query, undef, $term);
290 my $clone_link = [undef, "0 cDNA clone identifiers"];
293 "/search/est.pl?request_id=SGN-C$clone_id&request_from=0&request_type=automatic&search=Search",
294 "1 cDNA clone identifier",
300 # For quick_search queries without the Version#-Release#- prefix, the version and release are
301 # assumed to both be one. This is hardcoded below in two variables $version and $release.
302 sub quick_array_search
{
306 my $version = 1; # default version is 1
307 my $release = 1; # default release is 1
310 my $array_link = [ undef, "0 array identifiers" ];
312 # the array quick search should support the following formats:
313 # 1-1-1.1.1.1 (proper), -1-1.1.1.1, 1-1.1.1.1, -1.1.1.1 and 1.1.1.1
316 if ($term =~ /^-?\d*-?(\d+\.\d+\.\d+\.\d+)$/) { # incomplete or absent Version#-Release#- prefix
317 $id_term = $version . "-" . $release . "-" . $1; # use default prefix
321 if ($term =~ /^(\d+)-(\d+)-(\d+\.\d+\.\d+\.\d+)$/) { # complete Version#-Release#- prefix
323 $id_term = $term; # use new version and release values
327 my $query = "SELECT count(*) FROM microarray AS m WHERE m.spot_id = ? AND m.version = ? AND m.release = ?";
328 my $count = sql_query_count
($db , $query, $spot,$version,$release);
332 "/search/est.pl?request_id=$id_term&request_from=0&request_type=14&search=Search",
333 "$count array identifiers",
339 sub quick_phenotype_search
{
340 my ($db, $term) = @_;
341 my $q = "select count (distinct stock_id ) from stock left join stockprop using (stock_id) left join cvterm on stockprop.type_id = cvterm.cvterm_id where stock.name ilike ? or stock.uniquename ilike ? or (stockprop.value ilike ? and cvterm.name ilike ? ) " ;
342 my $count = sql_query_count
( $db , $q , "\%$term\%","\%$term\%","\%$term\%", "\%synonym\%" );
343 my $pheno_link = [ undef , "0 phenotype identifiers"];
345 $pheno_link = ["/stock/search?stock_name=$term&search_submitted=1" ,
346 "$count phenotype identifiers" ];
351 sub quick_marker_search
{
356 $term =~ s/([a-z]{4})(\d{1,2})([a-z]\d{1,2})/$1-$2-$3/i;
358 my $marker_link = [undef, "0 marker identifiers"];
359 my $count = CXGN
::Marker
::Tools
::marker_name_to_ids
($db,$term);
362 "/search/markers/markersearch.pl?w822_nametype=starts+with&w822_marker_name=$term&w822_submit=Search&w822_mapped=off&w822_species=Any&w822_protos=Any&w822_chromos=Any&w822_pos_start=&w822_pos_end=&w822_confs=Any&w822_maps=Any",
363 "$count marker identifiers"
369 sub quick_manual_annotation_search
{
373 # It's a syntax error for whitespace to occur in tsquery query strings. Replace with ampersands.
374 my $cleaned_term = to_tsquery_string
($term);
375 my $count = sql_query_count
($db, <<EOSQL, $cleaned_term);
377 FROM manual_annotations
378 WHERE annotation_text_fulltext @@ to_tsquery(?)
381 my $unigene_count = do {
383 sql_query_count
($db,<<EOSQL,$cleaned_term);
384 SELECT COUNT(DISTINCT(unigene_member.unigene_id))
385 FROM manual_annotations,
389 WHERE annotation_text_fulltext @@ to_tsquery(?)
390 AND manual_annotations.annotation_target_id=seqread.clone_id
391 AND seqread.read_id=est.read_id
392 AND est.est_id=unigene_member.est_id
400 $count > 0 ?
["/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=manual_search", "$count manual annotations on $unigene_count unigenes"]
401 : [undef, "0 manual annotations"];
404 sub quick_automatic_annotation_search
{
407 my $cleaned_term = to_tsquery_string
($term);
408 my $count = sql_query_count
($db, "select count(*) from blast_defline where defline_fulltext @@ to_tsquery(?)",$cleaned_term);
410 my $unigene_count = "(not determined -- number of annotations too large)";
411 if ($count < 10000) {
412 $unigene_count = sql_query_count
($db, <<EOSQL,$cleaned_term);
413 SELECT COUNT(DISTINCT(unigene.unigene_id))
418 WHERE defline_fulltext @@ to_tsquery(?)
419 AND blast_defline.defline_id=blast_hits.defline_id
420 AND blast_hits.blast_annotation_id=blast_annotations.blast_annotation_id
421 AND blast_annotations.apply_id=unigene.unigene_id
422 AND blast_annotations.apply_type=15
425 my $automatic_annotation_link = [undef, "0 automatic annotations"];
427 $automatic_annotation_link = [ "/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=blast_search", "$count automatic annotations on $unigene_count unigenes" ];
429 return $automatic_annotation_link;
432 sub sql_query_count
{
435 my $qh = $db -> prepare_cached
($query);
437 my ($count) = $qh -> fetchrow_array
();
442 my( $site_title, $term, $site_address ) = @_;
444 my $google_url = uri
( scheme
=> 'http',
445 host
=> 'www.google.com',
450 ?
( sitesearch
=> $site_address )
454 query_separator
=> '&',
457 my $lwp_ua = LWP
::UserAgent
->new;
458 $lwp_ua->agent( 'SGN Quick Search ( Mozilla compatible )' );
459 my $res = $lwp_ua->request( HTTP
::Request
->new( GET
=> $google_url ));
462 if( $res ->is_success ) {
463 my $cont = $res->content;
464 $cont =~ s/\<.*?\>//g;
465 my ($c) = $cont =~ /Results\s*\d*?\s*\-\s*\d*\s*of\s*(?:about)?\s*?([\d\,]+)/;
471 return [ $google_url, "$count pages on $site_title" ];
473 return [ undef, "0 pages on $site_title" ];
478 sub quick_web_search
{
479 my (undef,$term) = @_;
480 # works the same way as quick_page_search, except that the domain contraint is removed from the
482 return google_search
('the entire web',$term);
484 sub quick_page_search
{
485 my (undef,$term) = @_;
486 return google_search
('SGN',$term,'solgenomics.net');
490 __PACKAGE__
->meta->make_immutable;