1 package SGN
::Controller
::QuickSearch
;
3 use namespace
::autoclean
;
5 BEGIN { extends
'Catalyst::Controller' }
9 use List
::MoreUtils
'uniq';
10 use Time
::HiRes
'time';
11 use URI
::FromHash
'uri';
12 use Class
::Load
':all';
13 use CXGN
::Marker
::Tools
;
14 use CXGN
::Tools
::Identifiers qw
/ identifier_url identifier_namespace /;
15 use CXGN
::Tools
::Text qw
/to_tsquery_string trim/;
20 SGN::Controller::QuickSearch - implement the quick search
21 functionality of the site
25 Performs a search for each entity in the database and reports the
26 number of hits for each entity. Links are provided on an overview page
27 to allow the user to complete the search. In addition to the database,
28 quick_search.pl also searches google for the solgenomics.net domain,
29 parses the page that Google returns to report the number of hits on
30 the web site (which includes static and dynamic pages). The link
31 provided with that search is going directly to Google. Similarly, the
32 search is Google search is repeated without the domain constraint to
33 show the number of hits on the world wide web in total. A link for
34 that search is also provided.
42 # function-based searches
43 clone
=> { function
=> \
&quick_clone_search
, exact
=> 1 },
44 est
=> { function
=> \
&quick_est_search
, exact
=> 1 },
45 microarray
=> { function
=> \
&quick_array_search
, exact
=> 1 },
46 marker
=> { function
=> \
&quick_marker_search
},
47 manual_annotations
=> { function
=> \
&quick_manual_annotation_search
},
48 automatic_annotations
=> { function
=> \
&quick_automatic_annotation_search
},
49 sgn_pages
=> { function
=> \
&quick_page_search
},
50 web
=> { function
=> \
&quick_web_search
},
51 phenotype
=> { function
=> \
&quick_phenotype_search
},
52 # search-framework searches
53 people
=> { sf_class
=> 'CXGN::Searches::People',
54 result_desc
=> 'people',
55 search_path
=> '/solpeople/people_search.pl'
57 library
=> { sf_class
=> 'CXGN::Searches::Library',
58 result_desc
=> 'cDNA libraries',
59 search_path
=> '/search/library_search.pl',
61 #bac => { sf_class => 'CXGN::Genomic::Search::Clone',
62 # result_desc => 'BAC identifiers',
63 # search_path => '/maps/physical/clone_search.pl',
65 unigene
=> { sf_class
=> 'CXGN::Unigene::Search',
66 result_desc
=> 'unigene identifiers',
67 search_path
=> '/search/ug-ad2.pl',
70 image
=> { sf_class
=> 'CXGN::Searches::Images',
71 result_desc
=> 'images',
72 search_path
=> '/search/image_search.pl',
74 locus_allele
=> { sf_class
=> 'CXGN::Phenome',
75 result_desc
=> 'locus or allele identifiers',
76 search_path
=> '/search/locus',
79 # note that there is also another method of searching using site feature xrefs
85 Public path: /search/quick
87 Handles POST or GET quick searches. Parameter can be either C<term>
88 or C<q>. If optional param C<showtimes> is true, shows number of
89 seconds each of the search steps took.
93 sub quick_search
: Path
('/search/quick') {
94 my ( $self, $c ) = @_;
96 # use either 'term' or 'q' as the search term
97 my ($term) = grep defined, @
{ $c->req->parameters }{'term','q'};
99 $term =~ s/^\s*|\s*$//g;
101 defined $term && length $term
102 or $c->throw_client_error( public_message
=> 'Must provide a search term' );
105 quick_search_term
=> $term,
107 show_times
=> $c->req->parameters->{showtimes
},
108 template
=> '/search/quick_search.mas',
111 return if $c->forward('redirect_by_ident');
113 $c->forward('execute_predefined_searches');
114 $c->forward('search_with_xrefs');
115 $c->forward('redirect_if_only_one_possible');
118 #run the term through CXGN::Tools::Identifiers, and if it's
119 #recognized as an exact SGN identifier match, just redirect them to
121 sub redirect_by_ident
: Private
{
122 my ( $self, $c ) = @_;
124 my $term = $c->stash->{term
};
126 if ( my $direct_url = identifier_url
($term) ) {
127 my $namespace = identifier_namespace
($term);
128 #if the URL is just to this page, it's not useful
129 unless( $direct_url =~ m!quick_search\.pl|search/quick! #unless the url is to quick_search
130 || $namespace eq 'est' # don't auto-redirect for est names, some markers are called this
133 #if it's an external link, don't redirect, but put it in the external_link variable
134 if ( $direct_url =~ m@
(f
|ht
)tp
://@
135 && $direct_url !~ /sgn\.cornell\.edu|solgenomics\.net/
137 my ($domain) = $direct_url =~ m
|://(?
:www\
.)?
([^/]+)|;
138 $c->stash->{results
}{external_link
}{result
} = [ $direct_url, '1 direct information page' ];
140 $c->res->redirect( $direct_url );
149 # another optimization: if the quick search found only one
150 # possible URL to go to, go there
151 sub redirect_if_only_one_possible
: Private
{
152 my ( $self, $c ) = @_;
154 my @possible_urls = uniq
(
155 grep $_ !~ m!^https?://! && $_ !~ m!^/solpeople!,
157 ( map $_->{result
}->[0],
158 values %{$c->stash->{results
}}
161 @
{ $c->stash->{xrefs
} || [] }
165 if( @possible_urls == 1 ) {
166 $c->log->debug("redirecting to only possible url: $possible_urls[0]") if $c->debug;
167 $c->res->redirect( $possible_urls[0] );
172 sub execute_predefined_searches
: Private
{
173 my ( $self, $c ) = @_;
175 # execute all the searches and stash the results
176 for my $search_name ( sort keys %searches ) {
177 my $search = $searches{$search_name};
179 my $searchresults = $self->do_quick_search(
182 term
=> $c->stash->{term
},
184 $c->stash->{results
}{$search_name} = {
185 result
=> $searchresults,
187 exact
=> $search->{exact
}
193 sub search_with_xrefs
: Private
{
194 my ( $self, $c ) = @_;
197 my @xrefs = $c->feature_xrefs( $c->stash->{term
} );
198 $c->stash->{xrefs
} = \
@xrefs;
199 $c->stash->{xrefs_time
} = time - $b;
202 #do a quick search with either a legacy quick search function or a
203 #WWWSearch-implementing search
204 sub do_quick_search
{
205 my ( $self, $db, %args ) = @_;
207 if ($args{function
}) { #just run legacy functions and return their results
208 return $args{function
}->( $db,$args{term
});
210 my $classname = $args{sf_class
}
211 or die 'Must provide a class name';
213 Class
::Load
::load_class
( $classname );
214 $classname->isa( 'CXGN::Search::SearchI' )
215 or die "'$classname' is not a CXGN::Search::SearchI-implementing object";
217 my $search = $classname->new;
218 my $query = $search->new_query;
220 #check that the query has a quick_search function
221 $query->can('quick_search')
222 or die "Search '$classname' does not appear to have a query object with a quick_search method";
224 if ( $query->quick_search($args{term
}) ) {
225 my $results = $search->do_search($query);
226 my $count = $results->total_results;
227 die 'count should not be negative' if $count < 0;
230 my $qstr = encode_entities
($query->to_query_string());
231 return [ "$args{search_path}?$qstr", "$count $args{result_desc}" ];
234 return [undef, "0 $args{result_desc}"];
237 die 'this point should not be reached';
240 ###################### LEGACY QUICK SEARCH FUNCTIONS ##########################
242 sub quick_est_search
{
246 my $est_link = [ undef, "0 EST identifiers" ];
248 # the est quick search should support identifiers of the form SGN-E999999, SGN_E999999, SGNE999999
249 # and also E999999, as well as straight number (999999).
251 if ($term =~ /^\d+$/ || ( identifier_namespace
($term) || '' )eq 'sgn_e' )
253 my ($id_term) = $term =~ /(\d+)/;
254 my $count = sql_query_count
($db, "SELECT count(*) FROM est WHERE est.est_id = ?",$id_term);
257 "/search/est.pl?request_id=$id_term&request_from=0&request_type=7&search=Search",
258 "$count EST identifiers",
265 sub quick_clone_search
{
270 unless ($term =~ m
|^ccc
|) { # coffee clone name.
271 $term =~ s
|([a
-z
]{4})(\d
{1,2})([a
-z
]\d
{1,2})|$1-$2-$3|i
;
274 # the quick clone search supports searching of clone name and
275 # clone ids. Clone ids can be entered as SGNC999999, SGN-C999999,
276 # SGN_C999999 or C999999. if the input does not correspond to any
277 # of these formats, the clone_name is searched. may have to add
278 # something for the dashes that are sometimes not present in the
281 my $where_clause = "";
282 if ($term =~ /^(?:(SGN[\-\_]?)?C)?(\d+)$/i) {
283 $where_clause = "WHERE clone_id = ?";
286 $where_clause = "WHERE clone_name ilike ?";
289 my $query = "SELECT clone_id FROM sgn.clone $where_clause";
290 my ($clone_id) = $db->selectrow_array($query, undef, $term);
292 my $clone_link = [undef, "0 cDNA clone identifiers"];
295 "/search/est.pl?request_id=SGN-C$clone_id&request_from=0&request_type=automatic&search=Search",
296 "1 cDNA clone identifier",
302 # For quick_search queries without the Version#-Release#- prefix, the version and release are
303 # assumed to both be one. This is hardcoded below in two variables $version and $release.
304 sub quick_array_search
{
308 my $version = 1; # default version is 1
309 my $release = 1; # default release is 1
312 my $array_link = [ undef, "0 array identifiers" ];
314 # the array quick search should support the following formats:
315 # 1-1-1.1.1.1 (proper), -1-1.1.1.1, 1-1.1.1.1, -1.1.1.1 and 1.1.1.1
318 if ($term =~ /^-?\d*-?(\d+\.\d+\.\d+\.\d+)$/) { # incomplete or absent Version#-Release#- prefix
319 $id_term = $version . "-" . $release . "-" . $1; # use default prefix
323 if ($term =~ /^(\d+)-(\d+)-(\d+\.\d+\.\d+\.\d+)$/) { # complete Version#-Release#- prefix
325 $id_term = $term; # use new version and release values
329 my $query = "SELECT count(*) FROM microarray AS m WHERE m.spot_id = ? AND m.version = ? AND m.release = ?";
330 my $count = sql_query_count
($db , $query, $spot,$version,$release);
334 "/search/est.pl?request_id=$id_term&request_from=0&request_type=14&search=Search",
335 "$count array identifiers",
341 sub quick_phenotype_search
{
342 my ($db, $term) = @_;
343 my $q = "select count (distinct stock_id ) from stock left join stockprop using (stock_id) left join cvterm on stockprop.type_id = cvterm.cvterm_id where stock.name ilike ? or stock.uniquename ilike ? or (stockprop.value ilike ? and cvterm.name ilike ? ) " ;
344 my $count = sql_query_count
( $db , $q , "\%$term\%","\%$term\%","\%$term\%", "\%synonym\%" );
345 my $pheno_link = [ undef , "0 phenotype identifiers"];
347 $pheno_link = ["/search/stocks?any_name=$term" ,
348 "$count phenotype identifiers" ];
353 sub quick_marker_search
{
358 $term =~ s/([a-z]{4})(\d{1,2})([a-z]\d{1,2})/$1-$2-$3/i;
360 my $marker_link = [undef, "0 marker identifiers"];
361 my $count = CXGN
::Marker
::Tools
::marker_name_to_ids
($db,$term);
364 "/search/markers/markersearch.pl?w822_nametype=starts+with&w822_marker_name=$term&w822_submit=Search&w822_mapped=off&w822_species=Any&w822_protos=Any&w822_chromos=Any&w822_pos_start=&w822_pos_end=&w822_confs=Any&w822_maps=Any",
365 "$count marker identifiers"
371 sub quick_manual_annotation_search
{
375 # It's a syntax error for whitespace to occur in tsquery query strings. Replace with ampersands.
376 my $cleaned_term = to_tsquery_string
($term);
377 my $count = sql_query_count
($db, <<EOSQL, $cleaned_term);
379 FROM manual_annotations
380 WHERE annotation_text_fulltext @@ to_tsquery(?)
383 my $unigene_count = do {
385 sql_query_count
($db,<<EOSQL,$cleaned_term);
386 SELECT COUNT(DISTINCT(unigene_member.unigene_id))
387 FROM manual_annotations,
391 WHERE annotation_text_fulltext @@ to_tsquery(?)
392 AND manual_annotations.annotation_target_id=seqread.clone_id
393 AND seqread.read_id=est.read_id
394 AND est.est_id=unigene_member.est_id
402 $count > 0 ?
["/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=manual_search", "$count manual annotations on $unigene_count unigenes"]
403 : [undef, "0 manual annotations"];
406 sub quick_automatic_annotation_search
{
409 my $cleaned_term = to_tsquery_string
($term);
410 my $count = sql_query_count
($db, "select count(*) from blast_defline where defline_fulltext @@ to_tsquery(?)",$cleaned_term);
412 my $unigene_count = "(not determined -- number of annotations too large)";
413 if ($count < 10000) {
414 $unigene_count = sql_query_count
($db, <<EOSQL,$cleaned_term);
415 SELECT COUNT(DISTINCT(unigene.unigene_id))
420 WHERE defline_fulltext @@ to_tsquery(?)
421 AND blast_defline.defline_id=blast_hits.defline_id
422 AND blast_hits.blast_annotation_id=blast_annotations.blast_annotation_id
423 AND blast_annotations.apply_id=unigene.unigene_id
424 AND blast_annotations.apply_type=15
427 my $automatic_annotation_link = [undef, "0 automatic annotations"];
429 $automatic_annotation_link = [ "/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=blast_search", "$count automatic annotations on $unigene_count unigenes" ];
431 return $automatic_annotation_link;
434 sub sql_query_count
{
437 my $qh = $db -> prepare_cached
($query);
439 my ($count) = $qh -> fetchrow_array
();
444 my( $site_title, $term, $site_address ) = @_;
446 my $google_url = uri
( scheme
=> 'http',
447 host
=> 'www.google.com',
452 ?
( sitesearch
=> $site_address )
456 query_separator
=> '&',
459 my $lwp_ua = LWP
::UserAgent
->new;
460 $lwp_ua->agent( 'SGN Quick Search ( Mozilla compatible )' );
461 my $res = $lwp_ua->request( HTTP
::Request
->new( GET
=> $google_url ));
464 if( $res ->is_success ) {
465 my $cont = $res->content;
466 $cont =~ s/\<.*?\>//g;
467 my ($c) = $cont =~ /Results\s*\d*?\s*\-\s*\d*\s*of\s*(?:about)?\s*?([\d\,]+)/;
473 return [ $google_url, "$count pages on $site_title" ];
475 return [ undef, "0 pages on $site_title" ];
480 sub quick_web_search
{
481 my (undef,$term) = @_;
482 # works the same way as quick_page_search, except that the domain contraint is removed from the
484 return google_search
('the entire web',$term);
486 sub quick_page_search
{
487 my (undef,$term) = @_;
488 return google_search
('SGN',$term,'solgenomics.net');
492 __PACKAGE__
->meta->make_immutable;