minor fixes
[sgn.git] / lib / CXGN / Marker / Search.pm
blob86be20ea9bc219fe6e5f6279d53d7247b64cb234
1 use strict;
3 package CXGN::Marker::Search;
5 use CXGN::Marker;
6 use CXGN::Marker::LocMarker;
7 use CXGN::Marker::Tools qw(clean_marker_name);
10 =head1 NAME
12 CXGN::Marker::Search - object to return lists of markers based on your criteria
14 =head1 SYNOPSIS
16 use CXGN::Marker::Search;
17 my $search = CXGN::Marker::Search->new();
19 #add criteria here, for example..
20 $search->on_chromosome('5');
22 $search->perform_search(1,30);
23 my @marker_objects = $search->fetch_full_objects();
26 =head1 DESCRIPTION
28 Running a search consists of three steps:
30 0. Create marker search object
32 1. Specify search criteria, as many as you like
34 2. Call perform_search() to execute the SQL
36 3. Retrieve search results with one of the fetch_* functions. These can return a list of marker_id's, a list of location_id's, or a list of full-fledged marker objects.
38 As an alternative to (2) and (3), you may instead want to perform the query yourself (say if you want to use it as a subquery in a larger SQL statement your program is writing). In this case, the function return_subquery_with_placeholders() will do what you require; steps (2) and (3) are not necessary.
40 =cut
43 =head2 Constructor
45 =over 12
47 =item new($dbh)
49 my $msearch = CXGN::Marker::Search->new($dbh);
51 Returns a search object, ready for searching. Required before any of the other methods are called.
53 =back
55 =cut
57 sub new {
59 my ($class, $dbh) = @_;
61 die "must provide a dbh as first argument: CXGN::Marker->new($dbh)\n"
62 unless $dbh && ref($dbh) && $dbh->can('selectall_arrayref');
64 my $self = bless {dbh => $dbh},$class;
65 #my @nullm2mfields = qw(marker.marker_id location_id lg_name lg_order position confidence_id subscript map_version_id map_id);
66 $self->{m2mfields} = ' m2m.marker_id, m2m.location_id, m2m.lg_name, m2m.lg_order, m2m.position, m2m.confidence_id, m2m.subscript, m2m.map_version_id, m2m.map_id ';
68 $self->{midqfields} = 'marker.marker_id, m2m.location_id, m2m.lg_name, m2m.lg_order, m2m.position, m2m.confidence_id, m2m.subscript, m2m.map_version_id, m2m.map_id';
69 $self->{nom2m} = $self->{m2mfields};
70 $self->{nom2m} =~ s/m2m\.//g;
72 $self->{mlqfields} = $self->{m2mfields};
73 $self->{mlqfields} =~ s/m2m\./mlq./g;
75 # $self->{m2mfields_nom2m} = $self->{m2mfields};
76 # $self->{m2mfields_nom2m} =~ s/m2m.//g;
78 # yick
79 #$self->{m2mfieldsnull} = join ',', map {" NULL as $_ "} grep {$_ ne 'marker_id'} @nullm2mfields;
81 return $self;
88 ########################################
89 # here are the search criteria thingies!
91 =head2 Search Criteria Methods
93 These methods can be called in any order. The search object
94 "remembers" what criteria you have asked for, and when you perform
95 the search, all of them are applied. As you are building the query,
96 you can call query_text() if you would like to see the SQL string, say for
97 debugging.
99 None of these methods have interesting return values.
101 =over 12
103 =item random()
105 Causes the query to return only a single marker, randomly selected
106 from those that fit the other criteria you provide.
108 $msearch->random();
109 $msearch->perform_search();
110 my ($random_marker) = $msearch->fetch_id_list();
111 # returns a random random marker
113 # OR
115 $msearch->on_chromosome(5);
116 $msearch->random();
117 $msearch->in_collection('KFG');
118 $msearch->perform_search();
119 my ($random_marker) = $msearch->fetch_id_list();
120 # randomly selects one of the KFG markers on chr 5.
122 =cut
124 sub random {
126 my ($self) = @_;
128 # really, we just have to special-case this.
129 $self->{random} = 1;
134 =item name_like($string)
136 Limits the markers to those with a name like the one given.
137 There is no need to run clean_marker_name; this function will
138 search for the name both cleaned and as originally input.
140 The query is run with ILIKE. Asterisks are converted to percent
141 characters. If you know the marker name, please see name_exactly().
143 $msearch->name_like('TG*');
145 =cut
147 sub name_like {
149 my ($self, $name) = @_;
151 return unless $name;
153 # convert * to % (but leave underscores alone)
154 $name =~ s/\*/\%/g;
156 # strip un-normal characters?
157 $name =~ s|[^-_\w./%]||g;
159 # clean the name to see what WE would call it.
160 # We'll search for both the input name and the cleaned name.
161 my $clean_name = clean_marker_name($name);
163 # also allow to search on marker SGN-M type identifiers in the name
164 # field. exact/starts with etc won't be supported for SGN-M ids.
165 # (Lukas 4/2008).
167 my $id_name = "";
168 if ($name =~ /SGN-?M(\d+)/i) {
169 $id_name=$1;
173 my $subquery = "";
175 if ($id_name) {
176 $subquery = "SELECT marker_id FROM marker_alias WHERE alias ILIKE ? OR alias ILIKE ? OR marker_id=?";
177 $self->_add_marker_query($subquery, $name, $clean_name, $id_name);
179 else {
180 $subquery = "SELECT marker_id FROM marker_alias WHERE alias ILIKE ? OR alias ILIKE ?";
181 $self->_add_marker_query($subquery, $name, $clean_name);
187 =item name_exactly($name)
189 Limits the search to markers that have the EXACT name specified.
190 See also name_like().
192 $msearch->name_exactly('KG_E41/M49-F-285-P2');
194 =cut
196 sub name_exactly {
197 my ($self, $name) = @_;
199 return unless $name;
201 # we do no cleaning on the name. If you didn't get what you wanted,
202 # well, you should have called name_like().
204 my $subquery = "SELECT marker_id FROM marker_alias WHERE alias = ?";
206 $self->_add_marker_query($subquery, $name);
211 =item marker_id($marker_id)
213 Limits the search to one particular marker that you know the id of.
214 Can be useful when looking for many locations of one marker.
216 $msearch->marker_id(518);
218 =cut
220 sub marker_id {
221 my ($self, $marker_id) = @_;
223 return unless $marker_id;
225 # this looks really silly.
226 my $subquery = "SELECT marker_id FROM marker WHERE marker_id = ?";
228 $self->_add_marker_query($subquery, $marker_id);
233 =item in_collection(@colls)
235 Limit results to markers in a certain collection, or list of collections
237 $msearch->in_collection('KFG');
239 $msearch->in_collection('KFG', 'COS', 'COSII');
241 =cut
243 sub in_collection {
245 my ($self, @colls) = @_;
247 return unless @colls > 0;
249 # collection name
250 my $subq = "SELECT marker_id FROM marker_collectible INNER JOIN marker_collection USING(mc_id) WHERE " . join (' OR ', map {" mc_name ILIKE ? "} @colls);
253 $self->_add_marker_query($subq, @colls);
257 =item derived_from(@source_names)
259 Limits markers to those derived from the given source.
261 $msearch->derived_from('est-by-read', 'bac', 'unigene');
263 =cut
265 sub derived_from {
267 my ($self, @sources) = @_;
269 return unless @sources > 0;
271 my $q = 'SELECT marker_id FROM marker_derived_from INNER JOIN derived_from_source USING(derived_from_source_id) WHERE ' . join (' OR ', map {' source_name ILIKE ? '} @sources);
273 $self->_add_marker_query($q, @sources);
280 #################################################################
281 # more criteria thingies, but these have to do with map locations
283 =item must_be_mapped()
285 Limits the marker results to only markers that are mapped.
287 You do not need to call this method if you are ALSO asking for a
288 particular chromosome, map, species, etc. Those methods will automatically
289 exclude unmapped markers.
291 $msearch->must_be_mapped();
293 =cut
295 sub must_be_mapped {
297 my ($self) = @_;
299 push(@{$self->{query_parts}{marker_loc}}, ["SELECT $self->{m2mfields} FROM marker_to_map as m2m WHERE position IS NOT NULL AND current_version = 't'", []]);
303 =item in_species(@common_names);
305 Limits the search to markers mapped in the species whose common names
306 are given.
308 $msearch->in_species('tomato', 'nipplefruit');
311 =cut
314 sub in_species {
316 my ($self, @species) = @_;
318 return unless @species > 0;
320 my $q = "SELECT $self->{m2mfields} FROM marker_to_map as m2m INNER JOIN accession ON(parent_1 = accession.accession_id OR parent_2 = accession.accession_id) INNER JOIN organism using(organism_id) INNER JOIN common_name USING(common_name_id) WHERE "
321 . join(' OR ', map {' common_name.common_name ILIKE ? '} @species);
323 $self->_add_loc_query($q, @species);
328 =item on_map_version($mv_id)
330 Limits results to markers on the given map_version(s).
332 $msearch->on_map_version(1, 2, 3);
334 =cut
336 sub on_map_version {
337 my ($self, @mvs) = @_;
339 return unless @mvs > 0;
341 my $q = "SELECT $self->{m2mfields} FROM marker_to_map as m2m WHERE "
342 .join(' OR ', map {return unless $_ > 0; ' map_version_id = ? '} @mvs);
344 $self->_add_loc_query($q, @mvs);
349 =item on_map(@map_ids)
351 Limits results to makers on the given map(s).
353 $msearch->on_map(9, 5);
355 =cut
357 sub on_map {
358 my ($self, @maps) = @_;
360 return unless @maps > 0;
362 my $q = "SELECT $self->{m2mfields} FROM marker_to_map as m2m WHERE "
363 .join(' OR ', map {return unless $_ > 0; ' map_id = ? '} @maps);
365 $self->_add_loc_query($q, @maps);
370 =item on_chr(@chrs)
372 Limits the marker results to only markers on a given chromosome or
373 linkage group.
375 $msearch->on_chr(5, 6, 7);
377 Bonus feature: if you search for (say) chromosome 4, the search will
378 automagically include linkage groups 4a and 4b in addition to 4.
381 =cut
383 sub on_chr {
385 my ($self, @chrs) = @_;
387 return unless @chrs > 0;
389 # asking for "4" should also retrieve "4b".
390 foreach my $chr (@chrs){
391 return unless $chr > 0;
392 if ($chr =~ /^\d+$/){ $chr .='[ABCabc]?' }
393 $chr = '^'.$chr.'$';
396 my $q = "SELECT $self->{m2mfields} FROM marker_to_map as m2m WHERE "
397 .join(' OR ', map {' lg_name ~ ? '} @chrs);
399 $self->_add_loc_query($q, @chrs);
403 =item position_between($start, $end)
405 Limits the results to markers appearing between the given endpoints.
407 $msearch->position_between(90,100);
408 $msearch->on_chr(5);
409 # returns only markers between 90-100 on chr 5
411 $msearch->position_between(50, undef);
412 # returns markers after 50 cM on any chromosome
414 =cut
416 sub position_between {
418 my ($self, $start, $end) = @_;
420 # making sure our inputs are numbers
421 # (pg barfs if you feed it a string;
422 # we want to fail gracefully)
423 $start += 0;
424 $end += 0;
425 return unless ($start || $end);
427 my @conds;
428 push(@conds, 'position >= ?') if $start;
429 push(@conds, 'position <= ?') if $end;
431 my @places;
432 push(@places, $start) if $start;
433 push(@places, $end) if $end;
435 my $subq = "SELECT $self->{m2mfields} FROM marker_to_map as m2m WHERE ". (join ' AND ', @conds);
437 $self->_add_loc_query($subq, @places);
442 =item has_subscript()
444 Not very useful, except in testing.
445 Limits the results to marker locations that have subscripts.
446 Remember that most markers do NOT have subscripts.
448 $msearch->has_subscript();
450 =cut
452 sub has_subscript {
454 my ($self) = @_;
456 $self->_add_loc_query("SELECT $self->{m2mfields} FROM marker_to_map as m2m WHERE subscript IS NOT NULL");
460 =item confidence_at_least($conf)
462 Limits results to marker locations with a given confidence or greater.
463 Remember that many maps have entirely uncalculated confidences, so this
464 is not always appropriate.
466 The only argument this method takes is the minimum confidence. This
467 can be supplied as a confidence_id OR as a confidence name. Using the
468 ID will result in a slightly more efficient query.
470 $msearch->confidence_at_least(2);
471 $msearch->confidence_at_least('F(LOD3)');
473 =cut
475 sub confidence_at_least {
476 my ($self, $conf) = @_;
478 return unless $conf;
479 my $q;
481 if($conf =~ /^-?\d+$/){
483 # it's an ID
484 $q = "SELECT $self->{m2mfields} FROM marker_to_map as m2m WHERE confidence_id >= ?";
486 } else {
488 # it's a confidence name
489 $q = "SELECT $self->{m2mfields} FROM marker_to_map as m2m INNER JOIN marker_confidence USING(confidence_id) WHERE marker_confidence.confidence_id >= (select confidence_id from marker_confidence where confidence_name = ?)";
494 $self->_add_loc_query($q, $conf);
499 =item protocol(@protos)
501 Limit results to markers/locations mapped with a certain type of
502 experiment, for example CAPS or RFLP or AFLP.
504 $msearch->protocol('CAPS', 'AFLP');
506 =cut
508 sub protocol {
509 my ($self, @protos) = @_;
510 return unless @protos > 0;
512 my $subq = "SELECT $self->{m2mfields} FROM marker_to_map as m2m WHERE "
513 . join(' OR ', map {' protocol ILIKE ? '} @protos);
515 $self->_add_loc_query($subq, @protos);
519 =item with_bac_associations(@clone_ids)
521 Limit results to markers that have some sort of bac associations.
522 This is equivalent to running with_overgo_associations,
523 with_manual_associations, and with_computational_associations. As with
524 all three of those, the list of clone_id's is optional. If no
525 clone_id's are specified, the results will include all markers that
526 have any bacs at all.
528 $msearch->with_bac_associations();
529 $msearch->with_bac_associations(12345, 67890);
531 =cut
533 sub with_bac_associations {
534 my ($self, @bacs) = @_;
535 my $q = "select $self->{m2mfields} from marker_to_map as m2m left join physical.probe_markers as pm on(m2m.marker_id=pm.marker_id) left join physical.overgo_associations as oa using(overgo_probe_id) left join physical.oa_plausibility using(overgo_assoc_id) left join physical.manual_associations as ma on(ma.marker_id=m2m.marker_id) left join physical.computational_associations as ca on(ca.marker_id=m2m.marker_id) WHERE oa_plausibility.plausible = 1 OR oa.overgo_assoc_id IS NULL " . join(' OR ', map {' oa.bac_id = ? OR ca.clone_id=? OR ma.clone_id=?'} @bacs);
537 my @placebacs;
539 foreach my $bac(@bacs){
540 # we need to do this in triplicate - see the query above, where each clone_id has to be in three different tests in the where clause.
542 push(@placebacs, $bac) for 1..3;
546 $self->_add_loc_query($q, @placebacs);
550 =item with_overgo_associations(@clone_ids)
552 Limit results to markers with PLAUSIBLE overgo_associations to bacs.
553 Particular bacs may be specified by their clone_id\'s.
555 $msearch->with_overgo_associations();
556 $msearch->with_overgo_associations(12345, 67890);
558 =cut
560 sub with_overgo_associations {
561 my ($self, @bacs) = @_;
563 my $q = "SELECT $self->{m2mfields} FROM marker_to_map as m2m INNER JOIN physical.probe_markers using(marker_id) INNER JOIN physical.overgo_associations using(overgo_probe_id) INNER JOIN physical.oa_plausibility using(overgo_assoc_id)";
565 if(@bacs > 0){
567 $q .= " WHERE "
568 . join(' OR ', map {' bac_id = ? '} @bacs);
572 $self->_add_loc_query($q, @bacs);
576 =item with_manual_associations(@clone_ids)
578 Limits results to markers that have been manually associated with bacs.
580 $msearch->with_manual_associations();
581 $msearch->with_manual_associations(12345, 67890);
583 =cut
586 sub with_manual_associations {
587 my ($self, @bacs) = @_;
589 my $q = "SELECT $self->{m2mfields} FROM marker_to_map as m2m INNER JOIN physical.manual_associations using(marker_id)";
591 if(@bacs > 0){
593 $q .= " WHERE "
594 . join(' OR ', map {' bac_id = ? '} @bacs);
598 $self->_add_loc_query($q, @bacs)
603 =item with_computational_associations(@clone_ids)
605 Limits results to markers that have been computationally associated
606 with bacs (eg, with BLAST).
608 $msearch->with_computational_associations();
609 $msearch->with_computational_associations(12345, 67890);
611 =cut
613 sub with_computational_associations {
614 my ($self, @bacs) = @_;
616 my $q = "SELECT $self->{m2mfields} FROM marker_to_map as m2m INNER JOIN physical.computational_associations using(marker_id)";
618 if(@bacs > 0){
620 $q .= " WHERE "
621 . join(' OR ', map {' bac_id = ? '} @bacs);
625 $self->_add_loc_query($q, @bacs)
643 ##############################################
644 # what follows is all query processing bidness
646 sub _assemble_query {
648 my ($self, $start, $end) = @_;
650 # make a string out the query as we have it so far.
651 # You can call this function to see how the query is doing,
652 # but no $self->{query} will be stored until perform_search
653 # is called.
655 # This is where the heavy hitting happens. Assemble a query and
656 # figure out the placeholders. Remember to include the limit/offset.
658 # check and assemble the queries on marker_id
660 my $marker_id_query = "(SELECT $self->{midqfields} FROM marker left join marker_to_map as m2m using(marker_id) WHERE marker.marker_id is not null)";#WHERE m2m.marker_id is not null)";
661 my $marker_loc_query = "(SELECT $self->{m2mfields} FROM marker inner join marker_to_map as m2m using(marker_id))";
662 my @places;
664 foreach my $querytype ('marker_loc', 'marker_id'){
665 next unless $self->{query_parts}{$querytype} && @{$self->{query_parts}{$querytype}} > 0;
666 # warn " processing $querytype query\n";
667 foreach ((@{$self->{query_parts}{$querytype}})) {
668 next unless $_;
669 my ($subq, $subplaces) = @$_;
671 # check for the right number of placeholders
672 my $slotcount = ($subq =~ tr/\?//);
673 if ($slotcount != @$subplaces){
674 # if somebody messed up, there's not much we can do.
675 local $"=',';
676 die "number of placeholders is wrong for query $subq (Placeholders: @$subplaces)\n";
679 # add this to the query
680 if($querytype eq 'marker_id'){
681 #$marker_id_query .= " INTERSECT (select $self->{m2mfields} from marker_to_map as m2m inner join ($subq) as subq1 using(marker_id)) ";
682 $marker_id_query .= " INTERSECT (select $self->{midqfields} from marker inner join ($subq) as subq1 using(marker_id) left join marker_to_map as m2m using(marker_id)) ";
683 } elsif ($querytype eq 'marker_loc'){
684 $marker_loc_query .= "INTERSECT ($subq) ";
685 } else {
686 next;
687 #die "what the heck? no query type.";
690 # and the placeholders
691 push(@places, @$subplaces);
696 # final assembly
697 my $query;
698 if( $self->{query_parts}{marker_loc}){
699 $query = "SELECT DISTINCT $self->{mlqfields} FROM ($marker_loc_query) as mlq INNER JOIN ($marker_id_query) as midq using(marker_id)";
700 } else {
701 # if we only have a marker_id query, or if we have none
702 $query = $marker_id_query;
705 # how to order?
706 if($self->{random}){
707 $query = "SELECT * FROM ($query) AS rquery ORDER BY RANDOM() LIMIT 1";
708 } elsif ($self->{query_parts}{marker_loc}){
709 $query .= ' ORDER BY lg_order, position, subscript, confidence_id desc';
712 return $query, \@places;
716 =back
718 =head2 Queries
720 =over 12
722 =item query_text()
724 my $debug_string = $msearch->query_text();
725 warn $debug_string;
727 Returns a string for debugging purposes. This string contains the query and indicates the placeholder values. If perform_search() has already been called, this is the query that was run. If perform_search() has NOT yet been called, this is the query as it currently appears at this stage in the query-building process. It is preceded by the tag "[PROVISIONAL]".
729 =cut
731 sub query_text {
733 my ($self) = @_;
735 # assemble our query
736 my ($query, $places) = $self->_assemble_query();
737 my $retstring = $query . " [Placeholder values: ".(join ", ", @$places)."]\n";
738 if (!defined($self->{query})){
739 # if we don't have a query, make one up quick!
740 warn "search has not been performed; this query may be incomplete.\n";
741 $retstring = '[PROVISIONAL] '.$retstring;
744 return $retstring;
748 =item return_subquery_and_placeholders()
750 Instead of performing the search through this module, you may wish to
751 use the query we construct, perhaps as a subquery in some SQL you are
752 writing. This method returns the query as a string, and a reference to
753 an array of the required placeholders.
755 my ($subquery, $places) =
756 $msearch->return_subquery_and_placeholders();
758 my $sth = $dbh->prepare("SELECT * FROM BLAH
759 BLAH INNER JOIN ($subquery) WHERE BLAH = ?");
761 $sth->execute(@$places, "blah");
763 This expects to be done INSTEAD of perform_search(), not in addition to it.
765 =cut
767 sub return_subquery_and_placeholders {
769 my ($self) = @_;
771 # yeah, it's a thin wrapper around _assemble_query(). So sue me.
772 my ($query, $places) = $self->_assemble_query();
773 return ($query, $places);
777 =item perform_search()
779 Performs the search; this is like the execute() function in DBI. After
780 calling perform_search(), you will be ready to use the fetch_* methods (see below).
782 If you only want a subset of results, say the first 30, specify the start and end.
784 $msearch->perform_search();
785 # search includes all possible results
787 $msearch->perform_search(1, 30);
788 # will only include first 30 results
790 Proceed immediately to one of the fetch_* methods.
792 =back
794 =cut
796 sub perform_search {
798 my ($self, $start, $end) = @_;
799 # assemble the query and run it
800 my ($q, $places) = $self->_assemble_query($start, $end);
802 #warn $self->query_text();
804 $self->{sth} = $self->{dbh}->prepare($q);
805 $self->{sth}->execute(@$places);
807 # store the query for posterity
808 $self->{query} = $q;
813 ##########################
814 # result-getting functions
817 =head2 Fetch Methods
819 =over 12
821 =item fetch_id_list()
823 Returns a list of marker_id's that match your criteria.
825 $msearch->perform_search();
826 my @marker_ids = $msearch->fetch_id_list();
828 =cut
830 sub fetch_id_list {
832 my ($self) = @_;
834 return @{$self->{idlist}} if exists $self->{idlist};
836 # get results
837 $self->{allref} = $self->{sth}->fetchall_arrayref();
839 # collapse one level of arrayrefs
840 my @idlist = map {$_->[0]} @{$self->{allref}};
842 $self->{idlist} = \@idlist;
843 my $c = @{$self->{idlist}};
844 # warn ">>> idlist has $c items\n";
845 return @idlist;
849 =item fetch_location_id_list()
851 Returns a list of marker location_ids that match your criteria.
853 $msearch->perform_search();
854 my @loc_ids = $msearch->fetch_location_id_list();
856 =cut
858 sub fetch_location_id_list {
860 my ($self) = @_;
862 return $self->{locidlist} if exists $self->{locidlist};
864 my $results = $self->{sth}->fetchall_arrayref({});
866 @{$self->{locidlist}} = map {$_->{location_id}} @$results;
868 return @$results;
872 =item fetch_full_markers()
874 Returns a list of marker objects that match your criteria.
876 $msearch->perform_search()
877 my @marker_objs = $msearch->fetch_full_markers();
879 =cut
882 sub fetch_full_markers {
884 my ($self) = @_;
886 my @ids = $self->fetch_id_list();
888 my @objs;
889 for (@ids) {
891 my $m = CXGN::Marker->new($self->{dbh},$_);
892 push (@objs, $m);
896 return @objs;
900 =item fetch_location_markers()
902 Returns a list of CXGN::Marker::LocMarker objects that match your criteria.
904 $msearch->perform_search();
905 my @loc_objs = $msearch->fetch_location_markers();
907 WARNING: If your search could return more than a few dozen locations, the process of creating all these objects may take a VERY LONG TIME. This is not recommended in such cases; try one of the other fetch_* methods instead.
909 =cut
912 sub fetch_location_markers {
914 my ($self) = @_;
915 my $results = $self->{sth}->fetchall_arrayref({});
916 my @list;
918 foreach my $r (@$results){
920 push(@list, CXGN::Marker::LocMarker->new($self->{dbh}, $r->{marker_id}, $r));
924 return @list;
930 # takes a query and list of placeholders; adds them to the marker id
931 # portion of the query
933 sub _add_marker_query {
935 my ($self, $q, @rest) = @_;
936 push(@{$self->{query_parts}{marker_id}}, [$q, [@rest]]);
940 # takes a query and list of placeholders; adds them to the marker loc
941 # portion of the query
943 sub _add_loc_query {
945 my ($self, $q, @rest) = @_;
946 push(@{$self->{query_parts}{marker_loc}}, [$q, [@rest]]);
951 =back
953 =head1 DEVELOPING SEARCH PIECES
955 Fortunately, new search criteria methods are pretty easy to write and
956 test. This module works by intersecting subqueries with each other, so
957 each search criterion simply adds a subquery. When you call
958 perform_search(), the subqueries are glued together into one big query,
959 and the placeholders are all merged into one list. This creates a
960 mega-query that the database can easily optimize as it pleases. In
961 practice, I find the resulting queries to be very fast.
963 So, let's say you want to add a new method, something along the lines
964 of $msearch->name_like($some_input_name). Here is how to develop that
965 subquery and how to integrate it with the search module.
967 There are two different types of subqueries that this module expects:
969 (a) queries that relate to a marker by itself, and begin "SELECT
970 marker_id FROM..."
972 (b) queries that relate to a marker in a map location, and include the view called marker_to_map. These begin "SELECT $self->{m2mfields} FROM marker_to_map as m2m..." . So long as you include the marker_to_map view, you don't have to worry about what is in $self->{m2mfields}.
974 Let's walk through an easy type (a) example. Say you want to write the name_exactly function: it selects markers whose name is exactly what the user inputs. First, sit down at your psql prompt and write an SQL query that selects the markers you want out of the whole entire database:
976 SELECT marker_id FROM marker_alias WHERE alias = ?
978 That\'s your query, and there is one placeholder, for the marker name. Your subroutine will simply specify the subquery and the placeholders, and use the _add_marker_query() function to add itself to the growing query. This is the entire function:
980 sub name_exactly {
981 my ($self, $name) = @_;
983 my $subquery = "SELECT marker_id FROM marker_alias WHERE alias = ?";
985 $self->_add_marker_query($subquery, $name);
989 _add_marker_query() takes a subquery and a list of placeholders, so if
990 you have many placeholders, just supply them one after the other (or
991 with an array):
993 $self->_add_marker_query($subquery, $one, $two, $red, $blue);
994 $self->_add_marker_query($subquery, @crapload_of_placeholders);
997 Pretty easy, huh? Let\'s try a type (b) example, where we have to join
998 through the marker_to_map table. The only difference is that you call _add_loc_query instead of _add_marker_query.
1000 sub on_chr {
1001 my ($self, $chr) = @_;
1003 # asking for "4" should also retrieve "4b".
1004 if ($chr =~ /^\D+$/){ $chr .='[ABCabc]?' }
1006 my $q = "SELECT $self->{m2mfields} FROM marker_to_map as m2m WHERE lg_name ~ ? ";
1008 $self->_add_loc_query($q, $chr);
1012 That is all! Let\'s review:
1014 1. Write your query, at the psql prompt or otherwise
1016 2. Rephrase in terms of placeholders
1018 3. Stick it in a subroutine inside this module, using _add_marker_query() or _add_loc_query()
1020 4. Test and document your subroutine.
1022 5. The end! You don\'t have to understand or modify any of the other existing code.
1024 Where in the file should you put your new code? Well, all the type
1025 (a)\'s are together, and all the type (b)\'s are together. It\'s like the
1026 high school cafeteria: find your friends, and sit with them.
1028 As you test the subroutine, if you don\'t get results you expect,
1029 simply print query_text() and you will see the query as you have built
1030 it, and the list of placeholders that go along with it (in order). You
1031 can actually cut-and-paste this into your psql monitor if you like,
1032 for debugging purposes. After debugging, you may still find it useful
1033 to use query_text() in die statements, warnings, or even as a comment
1034 in the html of your search-related page.
1036 =head1 BUGS
1038 Still in heavy development as of March 2006.
1040 =head1 LICENSE
1042 This is under the same license as the rest of the CXGN codebase.
1043 Questions? Contact sgn-feedback@sgn.cornell.edu
1045 =head1 AUTHOR
1047 Beth started it.
1049 =head1 SEE ALSO
1051 CXGN::Marker, CXGN::Map, Cview, etc.
1053 =cut
1057 # all good modules return true