7 performs a search for each entity in the database and reports the
8 number of hits for each entity. Links are provided on an overview page
9 to allow the user to complete the search. In addition to the database,
10 quick_search.pl also searches google for the sgn.cornell.edu domain,
11 parses the page that Google returns to report the number of hits on
12 the web site (which includes static and dynamic pages). The link
13 provided with that search is going directly to Google. Similarly, the
14 search is Google search is repeated without the domain constraint to
15 show the number of hits on the world wide web in total. A link for
16 that search is also provided.
20 Lukas Mueller, Feb 15, 2004
27 use UNIVERSAL qw
/isa/;
28 use Time
::HiRes qw
/time/;
37 use CXGN
::DB
::Connection
;
38 use CXGN
::Page
::FormattingHelpers qw
/blue_section_html page_title_html info_table_html/;
39 use CXGN
::Tools
::Text qw
/to_tsquery_string trim/;
40 use CXGN
::Tools
::Identifiers qw
/identifier_url link_identifier clean_identifier identifier_namespace/;
41 use CXGN
::Marker
::Tools
;
43 #search-framework search classes
44 use CXGN
::Searches
::People
;
45 use CXGN
::Searches
::Library
;
46 use CXGN
::Genomic
::Search
::Clone
;
47 use CXGN
::Unigene
::Search
;
50 use CXGN
::Searches
::Images
;
53 clone
=> { function
=> \
&quick_clone_search
, exact
=> 1 },
54 est
=> { function
=> \
&quick_est_search
, exact
=> 1 },
55 microarray
=> { function
=> \
&quick_array_search
, exact
=> 1 },
56 marker
=> { function
=> \
&quick_marker_search
},
57 manual_annotations
=> { function
=> \
&quick_manual_annotation_search
},
58 automatic_annotations
=> { function
=> \
&quick_automatic_annotation_search
},
59 sgn_pages
=> { function
=> \
&quick_page_search
},
60 web
=> { function
=> \
&quick_web_search
},
61 people
=> { class => 'CXGN::Searches::People',
62 result_desc
=> 'people',
63 search_path
=> '/solpeople/people_search.pl'
65 library
=> { class => 'CXGN::Searches::Library',
66 result_desc
=> 'cDNA libraries',
67 search_path
=> '/search/library_search.pl',
69 bac
=> { class => 'CXGN::Genomic::Search::Clone',
70 result_desc
=> 'BAC identifiers',
71 search_path
=> '/maps/physical/clone_search.pl',
73 unigene
=> { class => 'CXGN::Unigene::Search',
74 result_desc
=> 'unigene identifiers',
75 search_path
=> '/search/ug-ad2.pl',
78 phenotype
=> { class => 'CXGN::Phenotypes',
79 result_desc
=> 'phenotype identifiers',
80 search_path
=> '/search/phenotype_search.pl',
82 image
=> { class => 'CXGN::Searches::Images',
83 result_desc
=> 'images',
84 search_path
=> '/search/image_search.pl',
86 locus_allele
=> { class => 'CXGN::Phenome',
87 result_desc
=> 'locus or allele identifiers',
88 search_path
=> '/search/locus_search.pl',
92 our $page = CXGN
::Page
->new("SGN Quick Search page",'Rob');
93 our $db = CXGN
::DB
::Connection
->new;
94 our ($print_individual_times) = $page->get_arguments('showtimes');
98 my ($term) = $page->get_arguments('term');
101 $term or $page->message_page('You did not enter a search term.');
102 $term =~ s/[\"\'\\]//g;
103 my $html_term = encode_entities
($term);
105 #now run the term through CXGN::Tools::Identifiers, and if it's
106 #recognized as an exact SGN identifier match, just redirect them to
108 my $external_link = '<span class="ghosted">0 direct information pages</span>';
109 if( my $direct_url = identifier_url
($term) ) {
110 #if the URL is just to this page, it's not useful
111 unless( $direct_url =~ /quick_search\.pl/) { #unless the url is to quick_search
113 #if it's an external link, don't redirect, but put it in the external_link variable
114 if( $direct_url =~ m@
(f
|ht
)tp
://@
115 && $direct_url !~ /sgn\.cornell\.edu/
117 my ($domain) = $direct_url =~ m
|://(?
:www\
.)?
([^/]+)|;
118 my $clean = clean_identifier
($term);
119 $external_link = qq|<a href
="$direct_url" class="quicksearch_hit">1 direct information page
($domain)</a
>|;
121 $page->client_redirect($direct_url);
127 #make a %search tied hash that will print out the html and javascript to do each of the searches
129 'Tie::Function' => sub {
130 my $searchname = shift;
131 die "No $searchname search defined" unless exists($searches{$searchname});
132 my $search = $searches{$searchname};
134 my $searchresults = do_quick_search
(%$search, term
=> $term).($search->{exact
} ?
'*' : '');
135 my $timestr = $print_individual_times ?
sprintf(' (%0.1f sec)',time-$b) : '';
136 "<div>$searchresults$timestr</div>"
140 my $results_html = <<EOHTML;
141 <div style="float: left; width: 50%">
152 $search{locus_allele}
154 <dt>People (searching by name)</dt>
155 <dd>$search{people}</dd>
158 <div style="float: right; width: 50%">
160 <dt>cDNA libraries</dt>
161 <dd>$search{library}</dd>
164 $search{manual_annotations}
165 $search{automatic_annotations}
179 * – exact matches only
183 print page_title_html
("Quick search: '$html_term'");
186 The quick search does not perform the same queries as full searches of the various types.<br />
187 Generally, quick-search results will be a subset of full results.
190 print blue_section_html
('Results',
191 sprintf('%0.2f seconds',time-$begintime),
197 ###############################################################################
198 ############################### SUBROUTINES ###################################
199 ###############################################################################
201 #do a quick search with either a legacy quick search function or a
202 #WWWSearch-implementing search
203 sub do_quick_search
{
206 if($args{function
}) { #just run legacy functions and return their results
207 return $args{function
}->($db,$args{term
});
210 my $classname = $args{class}
211 or die 'Must provide a class name';
213 isa
($classname,'CXGN::Search::SearchI')
214 or die "'$classname' is not a CXGN::Search::SearchI-implementing object";
216 my $search = $classname->new;
217 my $query = $search->new_query;
219 #check that the query has a quick_search function
220 $query->can('quick_search')
221 or die "Search '$classname' does not appear to have a query object with a quick_search method";
223 if ( $query->quick_search($args{term
}) ) {
224 my $results = $search->do_search($query);
225 my $count = $results->total_results;
226 die 'count should not be negative' if $count < 0;
229 my $qstr = encode_entities
($query->to_query_string());
230 return qq{<a
class="quicksearch_hit" href
="$args{search_path}?$qstr">$count $args{result_desc
}</a
>};
233 return "0 $args{result_desc}";
236 die 'this point should not be reached';
239 ###################### LEGACY QUICK SEARCH FUNCTIONS ##########################
241 sub quick_est_search
{
245 my $est_link = "0 EST identifiers";
247 # the est quick search should support identifiers of the form SGN-E999999, SGN_E999999, SGNE999999
248 # and also E999999, as well as straight number (999999).
250 if ($term =~ /^\d+$/ || identifier_namespace
($term) eq 'sgn_e' )
252 my ($id_term) = $term =~ /(\d+)/;
253 my $count = sql_query_count
($db, "SELECT count(*) FROM est WHERE est.est_id = ?",$id_term);
255 $est_link = qq|<a
class="quicksearch_hit" href
="/search/est.pl?request_id=$id_term&request_from=0&request_type=7&search=Search">$count EST identifiers
</a
>|;
261 sub quick_clone_search
{
266 unless ($term =~ m
|^ccc
|) { # coffee clone name.
267 $term =~ s
|([a
-z
]{4})(\d
{1,2})([a
-z
]\d
{1,2})|$1-$2-$3|i
;
270 # the quick clone search supports searching of clone name and
271 # clone ids. Clone ids can be entered as SGNC999999, SGN-C999999,
272 # SGN_C999999 or C999999. if the input does not correspond to any
273 # of these formats, the clone_name is searched. may have to add
274 # something for the dashes that are sometimes not present in the
277 my $where_clause = "";
278 if ($term =~ /^(?:(SGN[\-\_]?)?C)?(\d+)$/i) {
279 $where_clause = "WHERE clone_id = ?";
282 $where_clause = "WHERE clone_name ilike ?";
285 my $sgn = $db->qualify_schema('sgn');
286 my $query = "SELECT clone_id FROM $sgn.clone $where_clause";
287 my ($clone_id) = $db->selectrow_array($query, undef, $term);
289 my $clone_link = "0 cDNA clone identifiers";
292 <a class="quicksearch_hit"
293 href="/search/est.pl?request_id=SGN-C$clone_id&request_from=0&request_type=automatic&search=Search">1 cDNA clone identifier</a>
299 # For quick_search queries without the Version#-Release#- prefix, the version and release are
300 # assumed to both be one. This is hardcoded below in two variables $version and $release.
301 sub quick_array_search
{
305 my $version = 1; # default version is 1
306 my $release = 1; # default release is 1
309 my $array_link = "0 array identifiers";
311 # the array quick search should support the following formats:
312 # 1-1-1.1.1.1 (proper), -1-1.1.1.1, 1-1.1.1.1, -1.1.1.1 and 1.1.1.1
315 if ($term =~ /^-?\d*-?(\d+\.\d+\.\d+\.\d+)$/) { # incomplete or absent Version#-Release#- prefix
316 $id_term = $version . "-" . $release . "-" . $1; # use default prefix
320 if ($term =~ /^(\d+)-(\d+)-(\d+\.\d+\.\d+\.\d+)$/) { # complete Version#-Release#- prefix
322 $id_term = $term; # use new version and release values
326 my $query = "SELECT count(*) FROM microarray AS m WHERE m.spot_id = ? AND m.version = ? AND m.release = ?";
327 my $count = sql_query_count
($db , $query, $spot,$version,$release);
330 $array_link = qq|<a
class="quicksearch_hit" href
="/search/est.pl?request_id=$id_term&request_from=0&request_type=14&search=Search">$count array identifiers
</a
>|;
336 sub quick_marker_search
{
341 $term =~ s/([a-z]{4})(\d{1,2})([a-z]\d{1,2})/$1-$2-$3/i;
343 my $marker_link = "0 marker identifiers";
344 my $count = CXGN
::Marker
::Tools
::marker_name_to_ids
($db,$term);
346 $marker_link = qq|<a
class="quicksearch_hit" href
="/search/markers/markersearch.pl?w822_nametype=starts+with&w822_marker_name=$term&w822_submit=Search&w822_mapped=off&w822_species=Any&w822_protos=Any&w822_chromos=Any&w822_pos_start=&w822_pos_end=&w822_confs=Any&w822_maps=Any">$count marker identifiers
</a
>|;
351 sub quick_manual_annotation_search
{
355 # It's a syntax error for whitespace to occur in tsquery query strings. Replace with ampersands.
356 my $cleaned_term = to_tsquery_string
($term);
357 my $count = sql_query_count
($db, <<EOSQL, $cleaned_term);
359 FROM manual_annotations
360 WHERE annotation_text_fulltext @@ to_tsquery(?)
363 my $unigene_count = do {
365 sql_query_count
($db,<<EOSQL,$cleaned_term);
366 SELECT COUNT(DISTINCT(unigene_member.unigene_id))
367 FROM manual_annotations,
371 WHERE annotation_text_fulltext @@ to_tsquery(?)
372 AND manual_annotations.annotation_target_id=seqread.clone_id
373 AND seqread.read_id=est.read_id
374 AND est.est_id=unigene_member.est_id
382 $count > 0 ?
qq( <a
class="quicksearch_hit" href
="/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=manual_search">$count manual annotations to
$unigene_count unigenes
</a
> )
383 : "0 manual annotations";
386 sub quick_automatic_annotation_search
{
389 my $cleaned_term = to_tsquery_string
($term);
390 my $count = sql_query_count
($db, "select count(*) from blast_defline where defline_fulltext @@ to_tsquery(?)",$cleaned_term);
392 my $unigene_count = "(not determined -- number of annotations too large)";
393 if ($count < 10000) {
394 $unigene_count = sql_query_count
($db, <<EOSQL,$cleaned_term);
395 SELECT COUNT(DISTINCT(unigene.unigene_id))
400 WHERE defline_fulltext @@ to_tsquery(?)
401 AND blast_defline.defline_id=blast_hits.defline_id
402 AND blast_hits.blast_annotation_id=blast_annotations.blast_annotation_id
403 AND blast_annotations.apply_id=unigene.unigene_id
404 AND blast_annotations.apply_type=15
407 my $automatic_annotation_link = "0 automatic annotations";
409 $automatic_annotation_link = qq|<a
class="quicksearch_hit" href
="/search/annotation_search_result.pl?search_text=$term&Submit=search&request_from=0&search_type=blast_search">$count automatic annotations to
$unigene_count unigenes
</a
>|;
411 return $automatic_annotation_link;
414 sub sql_query_count
{
417 my $qh = $db -> prepare_cached
($query);
419 my ($count) = $qh -> fetchrow_array
();
424 my( $site_title, $term, $site_address ) = @_;
426 my $qstr = "q=$urlencode{$term}&btnG=Google+Search";
427 $qstr .= "&domains=$site_address&sitesearch=$site_address" if $site_address;
429 my $lwp_ua = LWP
::UserAgent
->new;
430 $lwp_ua->agent('SGN Quick Search ( Mozilla compatible )');
431 my $res = $lwp_ua->request( HTTP
::Request
->new(GET
=> "http://www.google.com/custom?$qstr") );
434 if( $res ->is_success ) {
435 my $cont = $res->content;
436 $cont =~ s/\<.*?\>//g;
437 my ($c) = $cont =~ /Results\s*\d*?\s*\-\s*\d*\s*of\s*(?:about)?\s*?([\d\,]+)/;
442 $qstr =~ s/&/&/g;
445 $count ?
qq|<a
class="quicksearch_hit" href
="http://www.google.com/custom?$qstr">$count web pages on
$site_title.</a
>|
446 : "0 web pages on $site_title";
449 sub quick_web_search
{
450 my (undef,$term) = @_;
451 # works the same way as quick_page_search, except that the domain contraint is removed from the
453 return google_search
('the entire web',$term);
455 sub quick_page_search
{
456 my (undef,$term) = @_;
457 return google_search
('SGN',$term,'sgn.cornell.edu');