1 package SGN
::Controller
::QuickSearch
;
3 use namespace
::autoclean
;
5 BEGIN { extends
'Catalyst::Controller' }
9 use List
::MoreUtils
'uniq';
10 use Time
::HiRes
'time';
11 use URI
::FromHash
'uri';
12 use Class
::Load
':all';
13 use CXGN
::Marker
::Tools
;
14 use CXGN
::Tools
::Identifiers qw
/ identifier_url identifier_namespace /;
15 use CXGN
::Tools
::Text qw
/to_tsquery_string trim/;
16 use SGN
::Model
::Cvterm
;
17 use Bio
::Chado
::Schema
;
18 use CXGN
::Marker
::SearchMatView
;
23 SGN::Controller::QuickSearch - implement the quick search
24 functionality of the site
28 Performs a search for each entity in the database and reports the
29 number of hits for each entity. Links are provided on an overview page
30 to allow the user to complete the search. In addition to the database,
31 quick_search.pl also searches google for the solgenomics.net domain,
32 parses the page that Google returns to report the number of hits on
33 the web site (which includes static and dynamic pages). The link
34 provided with that search is going directly to Google. Similarly, the
35 search is Google search is repeated without the domain constraint to
36 show the number of hits on the world wide web in total. A link for
37 that search is also provided.
39 ## TO DO: Make this a little more modern and move this code to
40 ## an AJAX Controller...
48 # function-based searches
49 clone
=> { function
=> \
&quick_clone_search
, exact
=> 1 },
50 est
=> { function
=> \
&quick_est_search
, exact
=> 1 },
51 microarray
=> { function
=> \
&quick_array_search
, exact
=> 1 },
52 # marker => { function => \&quick_marker_search },
53 marker
=> { function
=> \
&quick_mapped_geno_marker_search
},
54 manual_annotations
=> { function
=> \
&quick_manual_annotation_search
},
55 automatic_annotations
=> { function
=> \
&quick_automatic_annotation_search
},
56 # sgn_pages => { function => \&quick_page_search },
57 # web => { function => \&quick_web_search },
58 phenotype
=> { function
=> \
&quick_phenotype_search
},
59 accessions
=> { function
=> \
&quick_accession_search
},
60 vectors
=> { function
=> \
&quick_vector_search
},
61 plots
=> { function
=> \
&quick_plot_search
},
62 populations
=> { function
=> \
&quick_populations_search
},
63 trials
=> { function
=> \
&quick_trials_search
},
64 locations
=> { function
=> \
&quick_locations_search
},
65 traits
=> { function
=> \
&quick_traits_search
},
66 breeding_programs
=> { function
=> \
&quick_bp_search
},
68 # search-framework searches
69 people
=> { sf_class
=> 'CXGN::Searches::People',
70 result_desc
=> 'people',
71 search_path
=> '/solpeople/people_search.pl'
73 library
=> { sf_class
=> 'CXGN::Searches::Library',
74 result_desc
=> 'cDNA libraries',
75 search_path
=> '/search/library_search.pl',
77 #bac => { sf_class => 'CXGN::Genomic::Search::Clone',
78 # result_desc => 'BAC identifiers',
79 # search_path => '/maps/physical/clone_search.pl',
81 unigene
=> { sf_class
=> 'CXGN::Unigene::Search',
82 result_desc
=> 'unigene identifiers',
83 search_path
=> '/search/ug-ad2.pl',
86 image
=> { sf_class
=> 'CXGN::Searches::Images',
87 result_desc
=> 'images',
88 search_path
=> '/search/image_search.pl',
90 locus_allele
=> { sf_class
=> 'CXGN::Phenome',
91 result_desc
=> 'locus or allele identifiers',
92 search_path
=> '/search/locus',
95 # note that there is also another method of searching using site feature xrefs
101 Public path: /search/quick
103 Handles POST or GET quick searches. Parameter can be either C<term>
104 or C<q>. If optional param C<showtimes> is true, shows number of
105 seconds each of the search steps took.
109 sub quick_search
: Path
('/search/quick') {
110 my ( $self, $c ) = @_;
112 # use either 'term' or 'q' as the search term
113 my ($term) = grep defined, @
{ $c->req->parameters }{'term','q'};
115 $term =~ s/^\s*|\s*$//g;
117 defined $term && length $term
118 or $c->throw_client_error( public_message
=> 'Must provide a search term' );
121 quick_search_term
=> $term,
123 show_times
=> $c->req->parameters->{showtimes
},
124 template
=> '/search/quick_search.mas',
127 return if $c->forward('redirect_by_ident');
129 $c->forward('execute_predefined_searches');
130 $c->forward('search_with_xrefs');
131 $c->forward('redirect_if_only_one_possible');
134 #run the term through CXGN::Tools::Identifiers, and if it's
135 #recognized as an exact SGN identifier match, just redirect them to
137 sub redirect_by_ident
: Private
{
138 my ( $self, $c ) = @_;
140 my $term = $c->stash->{term
};
142 if ( my $direct_url = identifier_url
($term) ) {
143 my $namespace = identifier_namespace
($term);
144 #if the URL is just to this page, it's not useful
145 unless( $direct_url =~ m!quick_search\.pl|search/quick! #unless the url is to quick_search
146 || $namespace eq 'est' # don't auto-redirect for est names, some markers are called this
149 #if it's an external link, don't redirect, but put it in the external_link variable
150 if ( $direct_url =~ m@
(f
|ht
)tp
://@
151 && $direct_url !~ /sgn\.cornell\.edu|solgenomics\.net/
153 my ($domain) = $direct_url =~ m
|://(?
:www\
.)?
([^/]+)|;
154 $c->stash->{results
}{external_link
}{result
} = [ $direct_url, '1 direct information page' ];
156 $c->res->redirect( $direct_url );
165 # another optimization: if the quick search found only one
166 # possible URL to go to, go there
167 sub redirect_if_only_one_possible
: Private
{
168 my ( $self, $c ) = @_;
170 my @possible_urls = uniq
(
171 grep $_ !~ m!^https?://! && $_ !~ m!^/solpeople!,
173 ( map $_->{result
}->[0],
174 values %{$c->stash->{results
}}
177 @
{ $c->stash->{xrefs
} || [] }
181 if( @possible_urls == 1 && $possible_urls[0] ne '' ) {
182 $c->log->debug("redirecting to only possible url: $possible_urls[0]") if $c->debug;
183 $c->res->redirect( $possible_urls[0] );
188 sub execute_predefined_searches
: Private
{
189 my ( $self, $c ) = @_;
191 my $sp_person_id = $c->user() ?
$c->user->get_object()->get_sp_person_id() : undef;
192 # execute all the searches and stash the results
193 for my $search_name ( sort keys %searches ) {
194 print STDERR
"performing quick search for $search_name (". Dumper
($searches{$search_name}).")...\n";
195 my $search = $searches{$search_name};
197 my $searchresults = $self->do_quick_search(
200 term
=> $c->stash->{term
},
201 schema
=> $c->dbic_schema("Bio::Chado::Schema", undef, $sp_person_id),
203 $c->stash->{results
}{$search_name} = {
204 result
=> $searchresults,
206 exact
=> $search->{exact
}
208 print STDERR
"SEARCH RESULTS: ".Dumper
($searchresults);
212 sub search_with_xrefs
: Private
{
213 my ( $self, $c ) = @_;
216 my @xrefs = $c->feature_xrefs( $c->stash->{term
} );
217 $c->stash->{xrefs
} = \
@xrefs;
218 $c->stash->{xrefs_time
} = time - $b;
221 #do a quick search with either a legacy quick search function or a
222 #WWWSearch-implementing search
223 sub do_quick_search
{
224 my ( $self, $db, %args ) = @_;
226 if ($args{function
}) { #just run legacy functions and return their results
227 print STDERR
"INVOKING $args{function} with $args{term}\n";
228 return $args{function
}->( $self, $db,$args{term
}, $args{schema
});
230 my $classname = $args{sf_class
}
231 or die 'Must provide a class name';
233 Class
::Load
::load_class
( $classname );
234 $classname->isa( 'CXGN::Search::SearchI' )
235 or die "'$classname' is not a CXGN::Search::SearchI-implementing object";
237 my $search = $classname->new;
238 my $query = $search->new_query;
240 #check that the query has a quick_search function
241 $query->can('quick_search')
242 or die "Search '$classname' does not appear to have a query object with a quick_search method";
244 if ( $query->quick_search($args{term
}) ) {
245 my $results = $search->do_search($query);
246 my $count = $results->total_results;
247 die 'count should not be negative' if $count < 0;
250 my $qstr = encode_entities
($query->to_query_string());
251 return [ "$args{search_path}?$qstr", "$count $args{result_desc}" ];
254 return [undef, "0 $args{result_desc}"];
257 die 'this point should not be reached';
260 ###################### LEGACY QUICK SEARCH FUNCTIONS ##########################
262 sub quick_est_search
{
267 my $est_link = [ undef, "0 EST identifiers" ];
269 # the est quick search should support identifiers of the form SGN-E999999, SGN_E999999, SGNE999999
270 # and also E999999, as well as straight number (999999).
272 if ($term =~ /^\d+$/ || ( identifier_namespace
($term) || '' )eq 'sgn_e' )
274 my ($id_term) = $term =~ /(\d+)/;
275 my $count = sql_query_count
($db, "SELECT count(*) FROM est WHERE est.est_id = ?",$id_term);
278 "/search/est.pl?request_id=$id_term&request_from=0&request_type=7&search=Search",
279 "$count EST identifiers",
286 sub quick_clone_search
{
292 unless ($term =~ m
|^ccc
|) { # coffee clone name.
293 $term =~ s
|([a
-z
]{4})(\d
{1,2})([a
-z
]\d
{1,2})|$1-$2-$3|i
;
296 # the quick clone search supports searching of clone name and
297 # clone ids. Clone ids can be entered as SGNC999999, SGN-C999999,
298 # SGN_C999999 or C999999. if the input does not correspond to any
299 # of these formats, the clone_name is searched. may have to add
300 # something for the dashes that are sometimes not present in the
303 my $where_clause = "";
304 if ($term =~ /^(?:(SGN[\-\_]?)?C)?(\d+)$/i) {
305 $where_clause = "WHERE clone_id = ?";
308 $where_clause = "WHERE clone_name ilike ?";
311 my $query = "SELECT clone_id FROM sgn.clone $where_clause";
312 my ($clone_id) = $db->selectrow_array($query, undef, $term);
314 my $clone_link = [undef, "0 cDNA clone identifiers"];
317 "/search/est.pl?request_id=SGN-C$clone_id&request_from=0&request_type=automatic&search=Search",
318 "1 cDNA clone identifier",
324 # For quick_search queries without the Version#-Release#- prefix, the version and release are
325 # assumed to both be one. This is hardcoded below in two variables $version and $release.
326 sub quick_array_search
{
331 my $version = 1; # default version is 1
332 my $release = 1; # default release is 1
335 my $array_link = [ undef, "0 array identifiers" ];
337 # the array quick search should support the following formats:
338 # 1-1-1.1.1.1 (proper), -1-1.1.1.1, 1-1.1.1.1, -1.1.1.1 and 1.1.1.1
341 if ($term =~ /^-?\d*-?(\d+\.\d+\.\d+\.\d+)$/) { # incomplete or absent Version#-Release#- prefix
342 $id_term = $version . "-" . $release . "-" . $1; # use default prefix
346 if ($term =~ /^(\d+)-(\d+)-(\d+\.\d+\.\d+\.\d+)$/) { # complete Version#-Release#- prefix
348 $id_term = $term; # use new version and release values
352 my $query = "SELECT count(*) FROM microarray AS m WHERE m.spot_id = ? AND m.version = ? AND m.release = ?";
353 my $count = sql_query_count
($db , $query, $spot,$version,$release);
357 "/search/est.pl?request_id=$id_term&request_from=0&request_type=14&search=Search",
358 "$count array identifiers",
365 sub quick_phenotype_search
{
366 my ($self, $db, $term) = @_;
367 my $q = "select count (distinct stock_id ) from stock left join stockprop using (stock_id) left join cvterm on stockprop.type_id = cvterm.cvterm_id where stock.name ilike ? or stock.uniquename ilike ? or (stockprop.value ilike ? and cvterm.name ilike ? ) " ;
368 my $count = sql_query_count
( $db , $q , "\%$term\%","\%$term\%","\%$term\%", "\%synonym\%" );
369 my $pheno_link = [ undef , "0 phenotype identifiers"];
371 $pheno_link = ["/search/stocks?any_name=$term" ,
372 "$count phenotype identifiers" ];
377 sub quick_marker_search
{
383 $term =~ s/([a-z]{4})(\d{1,2})([a-z]\d{1,2})/$1-$2-$3/i;
385 my $marker_link = [undef, "0 marker identifiers"];
386 my $count = CXGN
::Marker
::Tools
::marker_name_to_ids
($db,$term);
389 "/search/markers/markersearch.pl?w822_nametype=starts+with&w822_marker_name=$term&w822_submit=Search&w822_mapped=off&w822_species=Any&w822_protos=Any&w822_chromos=Any&w822_pos_start=&w822_pos_end=&w822_confs=Any&w822_maps=Any",
390 "$count marker identifiers"
396 sub quick_mapped_geno_marker_search
{
403 my $marker_link = [undef, "0 marker identifiers"];
405 # Get count of mapped markers
406 my $mapped_count = CXGN
::Marker
::Tools
::marker_name_to_ids
($db,$term);
407 $count = $count + $mapped_count;
409 # Get count of genotyped markers
410 my $msearch = CXGN
::Marker
::SearchMatView
->new(bcs_schema
=> $schema);
411 my $results = $msearch->query({ name
=> $term, name_match
=> 'exact' });
412 my $genotyped_count = $results->{counts
}->{markers
};
413 $count = $count + $genotyped_count;
417 "/search/variants/results?marker_name=$term&marker_name_match=exactly",
418 "$count marker identifiers"
425 sub quick_manual_annotation_search
{
430 # It's a syntax error for whitespace to occur in tsquery query strings. Replace with ampersands.
431 my $cleaned_term = to_tsquery_string
($term);
432 my $count = sql_query_count
($db, <<EOSQL, $cleaned_term);
434 FROM manual_annotations
435 WHERE annotation_text_fulltext @@ to_tsquery(?)
438 my $unigene_count = do {
440 sql_query_count
($db,<<EOSQL,$cleaned_term);
441 SELECT COUNT(DISTINCT(unigene_member.unigene_id))
442 FROM manual_annotations,
446 WHERE annotation_text_fulltext @@ to_tsquery(?)
447 AND manual_annotations.annotation_target_id=seqread.clone_id
448 AND seqread.read_id=est.read_id
449 AND est.est_id=unigene_member.est_id
457 $count > 0 ?
["/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=manual_search", "$count manual annotations on $unigene_count unigenes"]
458 : [undef, "0 manual annotations"];
461 sub quick_automatic_annotation_search
{
465 my $cleaned_term = to_tsquery_string
($term);
466 my $count = sql_query_count
($db, "select count(*) from blast_defline where defline_fulltext @@ to_tsquery(?)",$cleaned_term);
468 my $unigene_count = "(not determined -- number of annotations too large)";
469 if ($count < 10000) {
470 $unigene_count = sql_query_count
($db, <<EOSQL,$cleaned_term);
471 SELECT COUNT(DISTINCT(unigene.unigene_id))
476 WHERE defline_fulltext @@ to_tsquery(?)
477 AND blast_defline.defline_id=blast_hits.defline_id
478 AND blast_hits.blast_annotation_id=blast_annotations.blast_annotation_id
479 AND blast_annotations.apply_id=unigene.unigene_id
480 AND blast_annotations.apply_type=15
483 my $automatic_annotation_link = [undef, "0 automatic annotations"];
485 $automatic_annotation_link = [ "/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=blast_search", "$count automatic annotations on $unigene_count unigenes" ];
487 return $automatic_annotation_link;
490 sub sql_query_count
{
493 my $qh = $db -> prepare_cached
($query);
495 my ($count) = $qh -> fetchrow_array
();
500 my( $self, $site_title, $term, $site_address ) = @_;
502 print STDERR
"Googling...\n";
503 my $google_url = uri
( scheme
=> 'http',
504 host
=> 'www.google.com',
509 ?
( sitesearch
=> $site_address )
513 query_separator
=> '&',
516 my $lwp_ua = LWP
::UserAgent
->new;
517 $lwp_ua->agent( 'SGN Quick Search ( Mozilla compatible )' );
518 my $res = $lwp_ua->request( HTTP
::Request
->new( GET
=> $google_url ));
520 print STDERR
"Hello world!\n";
522 if( $res ->is_success ) {
523 my $cont = $res->content;
524 $cont =~ s/\<.*?\>//g;
525 my ($c) = $cont =~ /Results\s*\d*?\s*\-\s*\d*\s*of\s*(?:about)?\s*?([\d\,]+)/;
530 print STDERR
"Returning search results...\n";
532 return [ $google_url, "$count pages on $site_title" ];
534 return [ undef, "0 pages on $site_title" ];
544 my $accession_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, $type, 'stock_type')->cvterm_id();
545 my $rs = $schema->resultset("Stock::Stock")->search( { uniquename
=> { ilike
=> $term} , type_id
=> $accession_type_id });
548 if ($rs->count() > 0) {
549 my $row = $rs->next();
550 $id = $row->stock_id();
551 $name = $row->uniquename();
552 print STDERR
"FOUND: $id, $name\n";
557 sub quick_accession_search
{
563 my ($id, $name) = $self->stock_search($schema, 'accession', $term);
565 print STDERR
"Found accession $id, $name\n";
566 return [ '/stock/'.$id.'/view', "1 accession: ".$name ];
569 print STDERR
"Found no accession... ???\n";
570 return [ '', "0 accession identifiers" ];
574 sub quick_vector_search
{
580 my ($id, $name) = $self->stock_search($schema, 'vector_construct', $term);
582 print STDERR
"Found vector_construct $id, $name\n";
583 return [ '/stock/'.$id.'/view', "1 vector construct: ".$name ];
586 print STDERR
"Found no vector construct\n";
587 return [ '', "0 vectors" ];
592 sub quick_plot_search
{
598 my ($id, $name) = $self->stock_search($schema, 'plot', $term);
601 return [ '/stock/'.$id."/view", "1 plot: ".$name ];
604 return [ '', "plots: No exact match." ];
608 sub quick_populations_search
{
614 my ($id, $name) = $self->stock_search($schema, 'population', $term);
617 return [ '/stock/'.$id."/view", "1 population: ".$name ];
620 return [ '', "0 populations." ];
624 sub quick_trials_search
{
630 my $cv_rs = $schema->resultset("Cv::Cv")->search( { 'me.name' => 'project_type' }, { join => 'cvterms', '+select' => [ 'cvterms.name', 'cvterms.cvterm_id' ], '+as' => [ 'cvterm_name', 'cvterm_id' ] });
632 my @trial_type_ids = ();
633 while (my $row = $cv_rs->next()) {
634 my $cvterm_id = $row->get_column('cvterm_id');
635 print STDERR
"retrieving cvterm id: $cvterm_id\n";
636 push @trial_type_ids, $cvterm_id;
639 my $rs = $schema->resultset("Project::Project")->search( { name
=> { ilike
=> $term }, 'projectprops.type_id' => { -in => [ @trial_type_ids ] } }, { join => 'projectprops', '+select' => [ 'projectprops.type_id' ], '+as' => [ 'project_type_id' ] });
642 if ($rs->count() == 1) {
643 my $row = $rs->next();
644 $id = $row->project_id();
645 $name = $row->name();
648 return [ '/breeders/trial/'.$id, "1 trial: ".$name ];
651 return [ '', "0 trials" ];
655 sub quick_locations_search
{
661 print STDERR
"LOCATION SEARCH!\n";
662 my $rs = $schema->resultset("NaturalDiversity::NdGeolocation")->search( { description
=> { ilike
=> $term } });
665 if ($rs->count() == 1) {
666 my $row = $rs->next();
667 $id = $row->nd_geolocation_id();
668 $name = $row->description();
671 print STDERR
"LOCATION HAS ".$rs->count()." MATCHES!!!!\n";
673 print STDERR
"RETURNED: $id, $name\n";
675 return [ '/breeders/locations/', '1 location: '.$name ]; # just link to generic locations page
678 return [ '', '0 locations' ];
682 sub quick_traits_search
{
688 my $trait_search = CXGN
::Trait
::Search
->new({
691 trait_name_list
=>[$term]
693 my ($data, $records_total) = $trait_search->search();
695 if ($records_total == 0 ) {
696 return [ '', '0 traits' ];
698 elsif ($records_total == 1) {
700 my $id = $t->{trait_id
};
701 my $name = $t->{trait_name
};
702 return [ '/cvterm/'.$id.'/view', '1 trait: '.$name ];
705 return [ "/search/traits?name=$term", "$records_total traits" ];
709 sub quick_bp_search
{
715 print STDERR
"breeding program search... \n";
716 my $rs = $schema->resultset("Project::Project")->search( { 'me.name' => { ilike
=> $term } } , { cvterm_name
=> 'breeding_program', join => 'projectprops' => { 'cvterms', '+select' => [ 'cvterm.name'], '+as' => ['cvterm_name'] } } );
720 if ($rs->count() > 1) {
721 print STDERR
$rs->count()." results, which is unexpected...\n";
722 while (my $row = $rs->next()) {
723 print STDERR
join("\t", $row->name(), $row->project_id())."\n";
725 return [ '', 'too many hits' ];
728 elsif ($rs->count() == 1) {
729 my $row = $rs->next();
730 $id = $row->project_id();
731 $name = $row->name();
734 print STDERR
"Sorry, no match!\n";
737 print STDERR
"FOUND: $id\n";
739 return [ '/breeders/program/'.$id, "1 breeding program: ".$name ];
742 return [ '', '0 breeding programs' ];
747 sub quick_web_search
{
748 my ($self, undef,$term) = @_;
749 # works the same way as quick_page_search, except that the domain contraint is removed from the
751 print STDERR
"Performing web search... ";
752 return $self->google_search('the entire web',$term);
755 sub quick_page_search
{
756 my ($self, undef,$term) = @_;
757 return $self->google_search('SGN',$term,'solgenomics.net');
760 __PACKAGE__
->meta->make_immutable;