Bio::Align::Graphics: move into its own distribution and drop dependency on GD
[bioperl-live.git] / Bio / DB / HIV / HIVQueryHelper.pm
blob8e258fbd31c3bee6616c6d26579c66d008ebc595
1 # $Id: HIVQueryHelper.pm 231 2008-12-11 14:32:00Z maj $
3 # BioPerl module for Bio::DB::HIV::HIVQueryHelper
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Mark A. Jensen <maj@fortinbras.us>
9 # Copyright Mark A. Jensen
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::DB::HIV::HIVQueryHelper - Routines and packages used by Bio::DB::HIV and
18 Bio::DB::Query::HIVQuery
20 =head1 SYNOPSIS
22 Used in Bio::DB::Query::HIVQuery. No need to use directly.
24 =head1 DESCRIPTION
26 C<Bio::DB::HIV::HIVQueryHelper> contains a number of packages for use
27 by L<Bio::DB::Query::HIVQuery>. Package C<HIVSchema> parses the
28 C<lanl-schema.xml> file, and allows access to it in the context of the
29 relational database it represents (see APPENDIX for excruciating
30 detail). Packages C<QRY>, C<R>, and C<Q> together create the query
31 string parser that enables NCBI-like queries to be understood by
32 C<Bio::DB::Query::HIVQuery>. They provide objects and operators to
33 perform and simplify logical expressions involving C<AND>, C<OR>, and
34 C<()> and return hash structures that can be handled by
35 C<Bio::DB::Query::HIVQuery> routines.
37 =head1 FEEDBACK
39 =head2 Mailing Lists
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 =head2 Support
50 Please direct usage questions or support issues to the mailing list:
52 I<bioperl-l@bioperl.org>
54 rather than to the module maintainer directly. Many experienced and
55 reponsive experts will be able look at the problem and quickly
56 address it. Please include a thorough description of the problem
57 with code and data examples if at all possible.
59 =head2 Reporting Bugs
61 Report bugs to the Bioperl bug tracking system to help us keep track
62 of the bugs and their resolution. Bug reports can be submitted via
63 the web:
65 https://github.com/bioperl/bioperl-live/issues
67 =head1 AUTHOR - Mark A. Jensen
69 Email maj@fortinbras.us
71 =head1 CONTRIBUTORS
73 Mark A. Jensen
75 =head1 APPENDIX
77 The rest of the documentation details each of the contained packages.
78 Internal methods are usually preceded with a _
80 =cut
82 # Let the code begin...
84 package Bio::DB::HIV::HIVQueryHelper;
85 use strict;
86 use Bio::Root::Root;
88 # globals
89 BEGIN {
90 #exceptions
91 @Bio::QueryStringSyntax::Exception::ISA = qw( Bio::Root::Exception);
96 =head2 HIVSchema - objects/methods to manipulate a version of the LANL HIV DB schema
98 =head3 HIVSchema SYNOPSIS
100 $schema = new HIVSchema( 'lanl-schema.xml' );
101 @tables = $schema->tables;
102 @validFields = $schema->fields;
103 @validAliases = $schema->aliases;
104 @query_aliases_for_coreceptor = $schema->aliases( 'SEQ_SAMple.SSAM_second_receptor' );
105 $pk_for_SequenceEntry = $schema->primarykey('SequenceEntry'); # returns 'SequenceEntry.SE_id'
106 $fk_for_SEQ_SAMple_to_SequenceEntry =
107 $schema->foreignkey('SEQ_SAMple', 'SequenceEntry'); # returns 'SEQ_SAMple.SSAM_SE_id'
109 $table = $schema->tablepart('SEQ_SAMple.SSAM_badseq'); # returns 'SEQ_SAMple'
110 $column = $schema->columnpart('SEQ_SAMple.SSAM_badseq'); # returns 'SSAM_badseq'
112 =head3 HIVSchema DESCRIPTION
114 HIVSchema methods are used in L<Bio::DB::Query::HIVQuery> for table,
115 column, primary/foreign key manipulations based on the observed Los
116 Alamos HIV Sequence Database (LANL DB) naming conventions for their
117 CGI parameters. The schema is contained in an XML file
118 (C<lanl-schema.xml>) which is read into an HIVSchema object, in turn a
119 property of the HIVQuery object. HIVSchema methods are used to build
120 correct cgi queries in a way that attempts to preserve the context of
121 the relational database the query parameters represent.
123 =cut
125 package # hide from PAUSE
126 HIVSchema;
127 # objects/methods to manipulate a version of the LANL HIV DB schema
128 # stored in XML
129 use XML::Simple;
130 use Bio::Root::Root;
131 use strict;
133 ### constructor
135 =head3 HIVSchema CONSTRUCTOR
137 =head4 HIVSchema::new
139 Title : new
140 Usage : $schema = new HIVSchema( "lanl-schema.xml ");
141 Function:
142 Example :
143 Returns : an HIVSchema object
144 Args : XML filename
146 =cut
148 sub new {
149 my $class = shift;
150 my @args = @_;
151 my $self = {};
152 if ($args[0]) {
153 $self->{schema_ref} = loadHIVSchema($args[0]);
155 bless($self, $class);
156 return $self;
159 ### object methods
161 =head3 HIVSchema INSTANCE METHODS
163 =head4 HIVSchema tables
165 Title : tables
166 Usage : $schema->tables()
167 Function: get all table names in schema
168 Example :
169 Returns : array of table names
170 Args : none
172 =cut
174 sub tables {
175 # return array of all tables in schema
176 local $_;
177 my $self = shift;
178 my $sref = $self->{schema_ref};
179 Bio::Root::Root->throw("schema not initialized") unless $sref;
180 my @k = grep(/\./, keys %$sref);
181 my %ret;
182 foreach (@k) {
183 s/\..*$//;
184 $ret{$_}++;
186 @k = sort keys %ret;
187 return @k;
190 =head4 HIVSchema columns
192 Title : columns
193 Usage : $schema->columns( [$tablename] );
194 Function: return array of columns for specified table, or all columns in
195 schema, if called w/o args
196 Example :
197 Returns :
198 Args : tablename or fieldname string
200 =cut
202 sub columns {
203 # return array of columns for specified table
204 # all columns in schema, if called w/o args
205 local $_;
206 my $self = shift;
207 my ($tbl) = @_;
208 my $sref = $self->{schema_ref};
209 Bio::Root::Root->throw("schema not initialized") unless $sref;
210 # trim column name
211 $tbl =~ s/\..*$//;
212 # check if table exists
213 return () unless grep(/^$tbl$/i, $self->tables);
214 my @k = sort keys %$sref;
215 @k = grep (/^$tbl\./i, @k);
216 foreach (@k) {
217 s/^$tbl\.//;
219 return @k;
222 =head4 HIVSchema fields
224 Title : fields
225 Usage : $schema->fields();
226 Function: return array of all fields in schema, in format "table.column"
227 Example :
228 Returns : array of all fields
229 Args : none
231 =cut
233 sub fields {
234 # return array of all fields (Table.Column format) in schema
235 my $self = shift;
236 my $sref = $self->{schema_ref};
237 Bio::Root::Root->throw("schema not initialized") unless $sref;
238 my @k = sort keys %{$sref};
239 return @k;
242 =head4 HIVSchema options
244 Title : options
245 Usage : $schema->options(@fieldnames)
246 Function: get array of options (i.e., valid match data strings) available
247 to specified field
248 Example :
249 Returns : array of match data strings
250 Args : [array of] fieldname string[s] in "table.column" format
252 =cut
254 sub options {
255 # return array of options available to specified field
256 my $self = shift;
257 my ($sfield) = @_;
258 my $sref = $self->{schema_ref};
259 Bio::Root::Root->throw("schema not initialized") unless $sref;
260 return $$sref{$sfield}{option} ? @{$$sref{$sfield}{option}} : ();
263 =head4 HIVSchema aliases
265 Title : aliases
266 Usage : $schema->aliases(@fieldnames)
267 Function: get array of aliases to specified field[s]
268 Example :
269 Returns : array of valid query aliases for fields as spec'd in XML file
270 Args : [an array of] fieldname[s] in "table.column" format
272 =cut
274 sub aliases {
275 # return array of aliases to specified field
276 my $self = shift;
277 my ($sfield) = @_;
278 my $sref = $self->{schema_ref};
279 my @ret;
280 Bio::Root::Root->throw("schema not initialized") unless $sref;
281 if ($sfield) {
282 return $$sref{$sfield}{alias} ? @{$$sref{$sfield}{alias}} : ();
284 else { # all valid aliases
285 map {push @ret, @{$$sref{$_}{alias}} if $$sref{$_}{alias}} $self->fields;
286 return @ret;
290 =head4 HIVSchema ankh
292 Title : ankh (annotation key hash)
293 Usage : $schema->ankh(@fieldnames)
294 Function: return a hash translating fields to annotation keys for the
295 spec'd fields.
296 (Annotation keys are used for parsing the tab-delimited response
297 to Bio::DB::Query::HIVQuery::_do_lanl_request.)
298 Example :
299 Returns : hash ref
300 Args : [an array of] fieldname[s] in "table.column" format
302 =cut
304 sub ankh {
305 # return hash translating sfields to annotation keys for specified sfield(s)
306 my $self = shift;
307 my %ret = ();
308 my @sfields = @_;
309 my $sref = $self->{schema_ref};
310 Bio::Root::Root->throw("schema not initialized") unless $sref;
311 foreach (@sfields) {
312 next unless $$sref{$_}{ankey};
313 $ret{$_} = {'ankey'=>$$sref{$_}{ankey},'antype'=>$$sref{$_}{antype}};
315 return %ret;
318 =head4 HIVSchema tablepart
320 Title : tablepart (alias: tbl)
321 Usage : $schema->tbl(@fieldnames)
322 Function: return the portion of the fieldname[s] that refer to the
323 db table
324 Example : $schema->tbl('SequenceEntry.SE_id'); # returns 'SequenceEntry'
325 Returns : table name as string
326 Args : [an array of] fieldname[s] in "table.column" format
328 =cut
330 sub tablepart {
331 # return the 'Table' part of the specified field(s)
332 my $self = shift;
333 my @sfields = @_;
334 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
335 my ($squish,@ret, %ret);
336 if ($sfields[0] eq '-s') {
337 # squish : remove duplicates from the returned array
338 $squish=1;
339 shift @sfields;
341 foreach (@sfields) {
342 push @ret, /^(.*)\./;
344 if ($squish) {
345 # arg order is clobbered
346 @ret{@ret} = undef;
347 @ret = keys %ret;
349 return (wantarray ? @ret : $ret[0]);
352 sub tbl {
353 # tablepart alias
354 shift->tablepart(@_);
357 =head4 HIVSchema columnpart
359 Title : columnpart (alias: col)
360 Usage : $schema->col(@fieldnames)
361 Function: return the portion of the fieldname[s] that refer to the
362 db column
363 Example : $schema->col('SequenceEntry.SE_id'); # returns 'SE_id'
364 Returns : column name as string
365 Args : [an array of] fieldname[s] in "table.column" format
367 =cut
369 sub columnpart {
370 # return the 'Column' part of the specified field(s)
371 my $self = shift;
372 my @sfields = @_;
373 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
374 my @ret;
375 foreach (@sfields) {
376 push @ret, /\.(.*)$/;
378 return (wantarray ? @ret : $ret[0]);
381 sub col {
382 # columnpart alias
383 shift->columnpart(@_);
386 =head4 HIVSchema primarykey
388 Title : primarykey [alias: pk]
389 Usage : $schema->pk(@tablenames);
390 Function: return the primary key of the specified table[s], as judged by
391 the syntax of the table's[s'] fieldnames
392 Example : $schema->pk('SequenceEntry') # returns 'SequenceEntry.SE_id'
393 Returns : primary key fieldname[s] in "table.column" format, or null if
394 no pk exists
395 Args : [an array of] table name[s] (fieldnames are ok, table part used)
397 =cut
399 sub primarykey {
400 # return the primary key (in Table.Column format) of specified table(s)
401 my $self = shift;
402 my @tbl = @_;
403 my @ret;
404 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
405 foreach my $tbl (@tbl) {
406 # trim column name
407 $tbl =~ s/\..*$//;
408 grep(/^$tbl$/i, $self->tables) ?
409 push(@ret, grep(/\.[0-9a-zA-Z]+_id/, grep(/$tbl/i,$self->fields))) :
410 push(@ret, "");
412 return (wantarray ? @ret : $ret[0]);
415 sub pk {
416 # primarykey alias
417 shift->primarykey(@_);
420 =head4 HIVSchema foreignkey
422 Title : foreignkey [alias: fk]
423 Usage : $schema->fk($intable [, $totable])
424 Function: return foreign key fieldname in table $intable referring to
425 table $totable, or all foreign keys in $intable if $totable
426 unspec'd
427 Example : $schema->fk('AUthor', 'SequenceEntry'); # returns 'AUthor_AU_SE_id'
428 Returns : foreign key fieldname[s] in "table.column" format
429 Args : tablename [, optional foreign table name] (fieldnames are ok,
430 table part used)
432 =cut
434 sub foreignkey {
435 # return foreign key in in-table ($intbl) to to-table ($totbl)
436 # or all foreign keys in in-table if to-table not specified
437 # keys returned in Table.Column format
438 my $self = shift;
439 my ($intbl, $totbl) = @_;
440 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
441 # trim col names
442 $intbl =~ s/\..*$//;
443 $totbl =~ s/\..*$// if $totbl;
444 # check if in-table exists
445 return () unless grep( /^$intbl/i, $self->tables);
446 my @ret = grep( /$intbl\.(?:[0-9a-zA-Z]+_){2,}id/i, $self->fields);
447 if ($totbl) {
448 my $tpk = $self->primarykey($totbl);
449 return (wantarray ? () : "") unless grep( /^$totbl/i, $self->tables) && $tpk;
450 ($tpk) = ($tpk =~ /\.(.*)$/);
451 @ret = grep( /$tpk$/, @ret);
452 return (wantarray ? @ret : $ret[0]);
454 else {
455 # return all foreign keys in in-table
456 return @ret;
460 sub fk {
461 # foreignkey alias
462 shift->foreignkey(@_);
465 =head4 HIVSchema foreigntable
467 Title : foreigntable [alias ftbl]
468 Usage : $schema->ftbl( @foreign_key_fieldnames );
469 Function: return tablename of table that foreign keys points to
470 Example : $schema->ftbl( 'AUthor.AU_SE_id' ); # returns 'SequenceEntry'
471 Returns : tablename
472 Args : [an array of] fieldname[s] in "table.column" format
474 =cut
476 sub foreigntable {
477 # return table name that foreign key(s) point(s) to
478 my $self = shift;
479 my @fk = @_;
480 my @ret;
481 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
482 foreach (@fk) {
483 my ($mnem, $fmnem) = /\.([0-9a-zA-Z]+)_([0-9a-zA-Z]+)_.*$/;
484 next unless $mnem && $fmnem;
485 # lookup based on Table.Column format of fields
486 my $sf = [grep( /^[0-9a-zA-Z]+\.$fmnem\_/, $self->fields )]->[0];
487 next unless $sf;
488 ($sf) = ($sf =~ /^([0-9a-zA-Z]+)\./);
489 push @ret, $sf;
491 return (wantarray ? @ret : $ret[0]);
494 sub ftbl {
495 # foreigntable alias
496 shift->foreigntable(@_);
499 =head4 HIVSchema find_join
501 Title : find_join
502 Usage : $sch->find_join('Table1', 'Table2')
503 Function: Retrieves a set of foreign and primary keys (in table.column
504 format) that represents a join path from Table1 to Table2
505 Example :
506 Returns : an array of keys (as table.column strings) -or- an empty
507 array if Table1 == Table2 -or- undef if no path exists
508 Args : two table names as strings
510 =cut
512 sub find_join {
513 my $self = shift;
514 my ($tgt, $tbl) = @_;
515 my ($stack, $revstack, $found, $revcut) = ([],[], 0, 4);
516 $self->_find_join_guts($tgt, $tbl, $stack, \$found);
517 if ($found) {
518 if (@$stack > $revcut) {
519 # reverse order of tables, see if a shorter path emerges
520 $found = 0;
521 $self->_find_join_guts($tgt, $tbl, $revstack, \$found, 1);
522 return (@$stack <= @$revstack ? @$stack : @$revstack);
524 return @$stack;
526 else {
527 return undef;
531 =head4 HIVSchema _find_join_guts
533 Title : _find_join_guts
534 Usage : $sch->_find_join_guts($table1, $table2, $stackref, \$found, $reverse)
535 (call with $stackref = [], $found=0)
536 Function: recursive guts of find_join
537 Example :
538 Returns : if a path is found, $found==1 and @$stackref contains the keys
539 in table.column format representing the path; if a path is not
540 found, $found == 0 and @$stackref contains garbage
541 Args : $table1, $table2 : table names as strings
542 $stackref : an arrayref to an empty array
543 \$found : a scalar ref to the value 0
544 $rev : if $rev==1, the arrays of table names will be reversed;
545 this can give a shorter path if cycles exist in the
546 schema graph
548 =cut
550 sub _find_join_guts {
551 my $self = shift;
552 my ($tbl, $tgt, $stack, $found, $rev) = @_;
553 return () if $tbl eq $tgt;
554 my $k = $self->pk($tbl);
555 if ($k) {
556 # all fks pointing to pk
557 my @fk2pk = map {
558 $self->fk($_, $k) || ()
559 } ($rev ? reverse $self->tables : $self->tables);
560 # skip keys already on stack
561 if (@$stack) {
562 (@$stack == 1) && do {
563 @fk2pk = grep (!/$$stack[0]/, @fk2pk);
565 (@$stack > 1 ) && do {
566 @fk2pk = map { my $f=$_; grep(/$f/, @$stack) ? () : $f } @fk2pk;
569 foreach my $f2p (@fk2pk) { # tables with fks pointing to pk
570 push @$stack, $f2p;
571 if ($self->tbl($f2p) eq $tgt) { # this fk's table is the target
572 # found it
573 $$found = 1;
574 return;
576 else {
577 #keep looking
578 $self->_find_join_guts($self->tbl($f2p), $tgt, $stack, $found, $rev);
579 return if $$found;
583 # all fks in $tbl
584 my @fks = ($rev ? reverse $self->fk($tbl) : $self->fk($tbl));
585 #skip keys already on stack
586 if (@$stack) {
587 (@$stack == 1) && do {
588 @fks = grep(!/$$stack[0]/, @fks);
590 (@$stack > 1) && do {
591 @fks = map { my $f=$_; grep(/$f/, @$stack) ? () : $f } @fks;
594 # all fks in table
595 if (@fks) {
596 for my $f (@fks) {
597 push @$stack, $f;
598 if ($self->ftbl($f) eq $tgt) { #found it
599 $$found = 1;
600 return;
602 else {
603 $self->_find_join_guts($self->ftbl($f), $tgt, $stack, $found, $rev);
604 $$found ? return : pop @$stack;
608 else {
609 pop @$stack;
610 return;
614 =head4 HIVSchema loadSchema
616 Title : loadHIVSchema [alias: loadSchema]
617 Usage : $schema->loadSchema( $XMLfilename )
618 Function: read (LANL DB) schema spec from XML
619 Example : $schema->loadSchema('lanl-schema.xml');
620 Returns : hashref to schema data
621 Keys are fieldnames in "table.column" format.
622 Each value is a hashref with the following properties:
623 {name} : HIVWEB 'table.column' format fieldname,
624 can be used directly in the cgi query
625 {aliases} : ref to array containing valid aliases/shortcuts for
626 {name}; can be used in routines creating the HTML query
627 {options} : ref to array containing valid matchdata for this field
628 can be used directly in the HTML query
629 {ankey} : contains the annotation key for this field used with
630 Bioperl annotation objects
631 {..attr..}: ..value_of_attr.. for this field (app-specific metadata)
632 Args :
634 =cut
636 sub loadHIVSchema {
637 my $fn = shift;
638 Bio::Root::Root->throw("loadHIVSchema: schema file not found") unless -e $fn;
639 my $q = XML::Simple->new(ContentKey=>'name',NormalizeSpace=>2,ForceArray=>1);
640 my %ret;
641 my $ref = $q->XMLin($fn);
642 my @sf = keys %{$$ref{sfield}};
643 foreach (@sf) {
644 my $h = $$ref{sfield}{$_};
645 $ret{$_} = $h;
646 foreach my $ptr ($$h{option}, $$h{alias}) {
647 if ($ptr) {
648 # kludge for XMLin: appears to convert to arrays, if there
649 # exists a tag without content, but to convert to hashes
650 # with content as key, if all tags possess content
651 if (ref($ptr) eq 'HASH') {
652 my @k = keys %{$ptr};
653 if (grep /desc/, keys %{$ptr->{$k[0]}}) {
654 # slurp the desc's
655 $$h{desc} = [ map { $$ptr{$_}->{desc} } @k ];
657 # now overwrite with keys (descs in same order...)
658 $ptr = [@k];
660 elsif (ref($ptr) eq 'ARRAY') {
661 $ptr = [map { ref eq 'HASH' ? $_->{name} : $_ } @{$ptr}]
663 else {
664 1; # stub : doh!
668 for my $ptr ($$h{ankey}) {
669 # flatten
670 my $ank = [keys %{$ptr}]->[0];
671 if (!defined $ank) {
672 delete $$h{ankey};
674 else {
675 $h->{antype} = $ptr->{$ank}{antype};
676 $ptr = $ank;
680 return \%ret;
683 sub loadSchema {
684 my $self = shift;
685 $self->{schema_ref} = loadHIVSchema(shift);
688 # below, dangerous
690 =head4 HIVSchema _sfieldh
692 Title : _sfieldh
693 Usage : $schema->_sfieldh($fieldname)
694 Function: get hashref to the specified field hash
695 Example :
696 Returns : hashref
697 Args : fieldname in "table.column" format
699 =cut
701 sub _sfieldh {
702 # return reference to the specified field hash
703 my $self = shift;
704 my ($sfield) = @_;
705 return ${$self->{schema_ref}}{$sfield};
710 =head2 Class QRY - a query algebra for HIVQuery
712 =head3 QRY SYNOPSIS
714 $Q = new QRY(
715 new R(
716 new Q('coreceptor', 'CXCR4'),
717 new Q('country', 'ZA')
720 QRY::Eq(QRY::And($Q, $Q), $Q); # returns 1
721 QRY::Eq(QRY::Or($Q, $Q), $Q); # returns 1
722 $Q2 = $Q1->clone;
723 $Q2 = new QRY(
724 new R(
725 new Q( 'coreceptor', 'CCR5' ),
726 new Q( 'country', 'ZA')
729 (QRY::And($Q, $Q2))->isnull; # returns 1
730 $Q3 = QRY::Or($Q, $Q2);
731 print $Q3->A; # prints '(CCR5 CXCR4)[coreceptor] (ZA)[country]'
733 =head3 QRY DESCRIPTION
735 The QRY package provides a query parser for
736 L<Bio::DB::Query::HIVQuery>. Currently, the parser supports AND, OR,
737 and () operations. The structure of the LANL cgi makes it tricky to
738 perform NOTs, though this could be implemented if the desire were
739 great.
741 Two class methods do the work. C<QRY::_parse_q> does a first-pass
742 parse of the query string. C<QRY::_make_q> interprets the parse tree
743 as returned by C<QRY::_parse_q> and produces an array of hash
744 structures that can be used directly by C<Bio::DB::Query::HIVQuery>
745 query execution methods. Validation of query fields and options is
746 performed at the C<Bio::DB::Query::HIVQuery> level, not here.
748 C<QRY> objects are collections of C<R> (or request) objects, which are
749 in turn collections of C<Q> (or atomic query) objects. C<Q> objects
750 represent a query on a single field, with match data options C<OR>ed
751 together, e.g. C<(A B)[subtype]>. C<R> objects collect C<Q> objects
752 that could be processed in a single HTTP request; i.e., a set of
753 atomic queries each having different fields C<AND>ed together, such as
755 (A B)[subtype] AND ('CCR5')[coreceptor] AND (US CA)[country]
757 The C<QRY> object collects C<R>s that cannot be reduced (through
758 logical operations) to a single HTTP request, e.g.
760 ((C)[subtype] AND (SI)[phenotype]) OR ( (D)[subtype] AND (NSI)[phenotype] ),
762 which cannot be got in one go through the current LANL cgi
763 implementation (as far as I can tell). The parser will simplify
764 something like
766 ((C)[subtype] AND (SI)[phenotype]) OR ((C)[subtype] AND (NSI)[phenotype])
768 to the single request
770 (C)[subtype] AND (NSI SI)[phenotype]
772 however.
774 The operators C<&> and C<|> are overloaded to C<QRY::And> and
775 C<QRY::Or>, to get Perl precedence and grouping for free. C<bool> is
776 overloaded to get symbolic tests such as C<if ($QRY) {stuff}>. C<==>
777 is overloaded with C<QRY::Eq> for convenience. No overloading is done
778 for C<R> or C<Q>.
780 =cut
782 # a query algebra for HIVQuery
784 # Each Q object is an 'atomic' query, written as (data)[field]
785 # (a b ...)[X] equals (a)[X] | (b)[X] | ...
786 # Each R object represents a single HTTP request to the db
787 # contains an array of Q (atomic) objects (q1, q2, ...)
788 # the R object is interpreted as q1 & q2 & ...
789 # Each QRY object represents a series of HTTP requests to the db
790 # contains an array of R (request) objects (R1, R2, ...)
791 # the QRY object is interpreted as R1 | R2 | ...
793 # & and | operations are specified for each type
795 package # hide from PAUSE
796 QRY;
797 use strict;
798 $QRY::NULL = new QRY();
801 use overload
802 "|" => \&Or,
803 "&" => \&And,
804 "bool" => \&Bool,
805 "==" => \&Eq;
808 # query language emulator
809 # supports only AND and OR, any groupings
811 # syntax rules:
812 # query atom: bareword [field] OR (bareword ...) [field]
813 # only single bareword allowed between []
814 # annotation fields in {} (only bareword lists allowed between {})
815 # () can group query atoms joined by operators (AND or OR)
816 # () containing only barewords MUST be followed by a field descriptor [field]
817 # empty [] not allowed
818 # query atoms joined with AND by default
819 # barewords are associated (ORed within) the next field descriptor in the line
821 # follow the parse tree, creating new QRY objects as needed in @q, and
822 # construct a logical expression using & and | symbols.
823 # These are overloaded for doing ands and ors on QRY objects;
824 # to get the final QRY object, eval the resulting expression $q_expr.
825 # QRY object will be translated into (possibly multiple) hashes
826 # conforming to HIVQuery parameter requirements.
828 =head4 QRY _make_q
830 Title : _make_q
831 Usage : QRY::_make_q($parsetree)
832 Function: creates hash structures suitable for HIVQuery from parse tree
833 returned by QRY::_parse_q
834 Example :
835 Returns : array of hashrefs of query specs
836 Args : a hashref
838 =cut
840 sub _make_q {
841 my $ptree = shift;
842 my ($q_expr, @q, @an, $query, @dbq);
843 _make_q_guts($ptree, \$q_expr, \@q, \@an);
844 $query = eval $q_expr;
845 throw Bio::Root::Root(-class=>'Bio::Root::Exception',
846 -text=>$@,
847 -value=>$q_expr) if $@;
848 return {} if $query->isnull;
849 foreach my $rq ($query->requests) {
850 my $h = {'query'=>{}};
851 foreach ($rq->atoms) {
852 my @d = split(/\s+/, $_->dta);
853 foreach my $d (@d) {
854 $d =~ s/[+]/ /g; ###! _ to [+]
855 $d =~ s/'//g;
857 $h->{'query'}{$_->fld} = (@d == 1) ? $d[0] : [@d];
859 $h->{'annot'} = [@an] if @an;
860 push @dbq, $h;
862 return @dbq;
865 =head4 QRY _make_q_guts
867 Title : _make_q_guts (Internal class method)
868 Usage : _make_q_guts($ptree, $q_expr, $qarry, $anarry)
869 Function: traverses the parse tree returned from QRY::_parse_q, checking
870 syntax and creating HIVQuery-compliant query structures
871 Example :
872 Returns :
873 Args : $parse_tree (hashref), $query_expression (scalar string ref),
874 $query_array (array ref : stack for returning query structures),
875 $annotation_array (array ref : stack for returning annotation
876 fields)
878 =cut
880 sub _make_q_guts {
881 my ($ptree, $q_expr, $qarry, $anarry) = @_;
882 my (@words, $o);
883 eval { # catch
884 foreach (@{$ptree->{cont}}) {
885 m{^AND$} && do {
886 $$q_expr .= "&";
887 next;
889 m{^OR$} && do {
890 $$q_expr .= "|";
891 next;
893 m{^HASH} && do {
894 for my $dl ($_->{delim}) {
895 ($dl =~ m{\(}) && do {
896 if (grep /^HASH/, @{$_->{cont}}) {
897 $$q_expr .= "&" unless !$$q_expr || !length($$q_expr) || (substr($$q_expr, -1, 1) =~ /[&|(]/);
898 $$q_expr .= "(";
899 _make_q_guts($_,$q_expr,$qarry,$anarry);
900 $$q_expr .= ")";
902 else {
903 my @c;
904 my $c = join(' ',@{$_->{cont}});
905 $c =~ s/,/ /g;
906 Bio::Root::Root->throw("query syntax error: unmatched ['\"]") if (@c = ($c =~ /(['"])/g)) % 2;
907 @c = split(/\s*(['"])\s*/, $c);
908 do {
909 $c = shift @c;
910 if ($c =~ m{['"]}) {
911 $c = join('', ($c, shift @c, shift @c));
912 $c =~ s/\s+/+/g; ###! _ to +
913 push @words, $c;
915 else {
916 push @words, split(/\s+/,$c);
918 } while @c;
920 last;
922 ($dl =~ m{\[}) && do {
923 Bio::Root::Root->throw("syntax error: empty field descriptor") unless @{$_->{cont}};
924 Bio::Root::Root->throw("syntax error: more than one field descriptor in square brackets") unless @{$_->{cont}} == 1;
926 push @{$qarry}, new QRY( new R( new Q( $_->{cont}->[0], @words)));
927 # add default operation if nec
928 $$q_expr .= "&" unless !$$q_expr || !length($$q_expr) || (substr($$q_expr, -1, 1) =~ /[&|(]/);
929 $$q_expr .= "\$q[".$#$qarry."]";
930 @words = ();
931 last;
933 ($dl =~ m{\{}) && do {
934 foreach my $an (@{$_->{cont}}) {
935 ($an =~ /^HASH/) && do {
936 if ($an->{delim} eq '[') {
937 push @$anarry, @{$an->{cont}};
939 else {
940 Bio::Root::Root->throw("query syntax error: only field descriptors (with or without square brackets) allowed in annotation spec");
942 next;
944 do { #else
945 push @$anarry, $an;
946 next;
949 last;
951 do {
952 1; #else stub
955 next;
957 do { # else, bareword
958 if ($o) {
959 $words[-1] .= "+$_"; ####! _ to +
961 else {
962 push @words, $_;
964 m/['"]/ && ($o = !$o);
966 } # @{ptree->{cont}}
967 Bio::Root::Root->throw("query syntax error: no search fields specified")
968 unless $$q_expr =~ /q\[[0-9]+\]/;
970 $@ ?
971 throw Bio::Root::Root(-class=>'Bio::QueryStringSyntax::Exception',
972 -text=>$@,
973 -value=>$$q_expr)
974 : return 1;
977 =head4 QRY _parse_q
979 Title : _parse_q
980 Usage : QRY::_parse_q($query_string)
981 Function: perform first pass parse of a query string with some syntax
982 checking, return a parse tree suitable for QRY::_make_q
983 Example : QRY::_parse_q(" to[be] OR (not to)[be] ");
984 Returns : hashref
985 Args : query string
987 =cut
989 # parse qry string into a branching tree structure
990 # each branch tagged by the opening delimiter ( key 'delim' )
991 # content (tokens and subbranch hashes) placed in l2r order in
992 # @{p->{cont}}
993 sub _parse_q {
994 local $_;
995 my $qstr = shift;
996 my $illegal = qr/[^a-zA-Z0-9-_<>=,\.\(\[\{\}\]\)\s'"]/;
997 my $pdlm = qr/[\{\[\(\)\]\}]/;
998 my %md = ('('=>')', '['=>']','{'=>'}');
999 my @tok = grep !/^\s*$/, split /($pdlm)/, $qstr;
1000 return {} unless @tok;
1001 my @pstack = ();
1002 my @dstack = ();
1003 my ($ptree, $p);
1005 eval { #catch
1006 Bio::Root::Root->throw("query syntax error: illegal character") if $qstr =~ /$illegal/;
1008 $ptree = $p = {'delim'=>'*'};
1009 foreach (@tok) {
1010 #trim whsp
1011 s/^\s+//;
1012 s/\s+$//;
1013 m{[\(\[\{]} && do {
1014 my $new = {'delim'=>$_};
1015 $p->{cont} = [] unless $p->{cont};
1016 push @{$p->{cont}}, $new;
1017 push @pstack, $p;
1018 push @dstack, $_;
1019 $p = $new;
1020 next;
1022 m{[\)\]\}]} && do {
1023 my $d = pop @dstack;
1024 if ($md{$d} eq $_) {
1025 $p = pop @pstack;
1026 Bio::Root::Root->throw("query syntax error: unmatched \"$_\"") unless $p;
1028 else {
1029 Bio::Root::Root->throw("query syntax error: saw \"$_\" before matching \"$md{$d}\"");
1031 next;
1033 do { # else
1034 $p->{cont} = [] unless $p->{cont};
1035 push @{$p->{cont}}, split(/\s+/);
1039 $@ ?
1040 throw Bio::Root::Root(-class=>'Bio::QueryStringSyntax::Exception',
1041 -text=>$@,
1042 -value=>"")
1043 : return $ptree;
1046 ## QRY constructor
1048 =head3 QRY CONSTRUCTOR
1050 =head4 QRY Constructor
1052 Title : QRY constructor
1053 Usage : $QRY = new QRY()
1054 Function:
1055 Example :
1056 Returns :
1057 Args : array of R objects, optional
1059 =cut
1061 sub new {
1062 my $class = shift;
1063 my @args = @_;
1064 my $self = {};
1065 $self->{requests} = [];
1066 bless($self, $class);
1067 $self->put_requests(@args) if @args;
1068 return $self;
1071 ## QRY instance methods
1073 =head3 QRY INSTANCE METHODS
1075 =head4 QRY requests
1077 Title : requests
1078 Usage : $QRY->requests
1079 Function: get/set array of requests comprising this QRY object
1080 Example :
1081 Returns :
1082 Args : array of class R objects
1084 =cut
1086 sub requests {
1087 my $self = shift;
1088 $self->put_requests(@_) if @_;
1089 return @{$self->{'requests'}};
1092 =head4 QRY put_requests
1094 Title : put_requests
1095 Usage : $QRY->put_request(@R)
1096 Function: add object of class R to $QRY
1097 Example :
1098 Returns :
1099 Args : [an array of] of class R object[s]
1101 =cut
1103 sub put_requests {
1104 my $self = shift;
1105 my @args = @_;
1106 foreach (@args) {
1107 Bio::Root::Root->throw('requires type R (request)') unless ref && $_->isa('R');
1108 push @{$self->{requests}}, $_;
1110 return @args;
1113 =head4 QRY isnull
1115 Title : isnull
1116 Usage : $QRY->isnull
1117 Function: test if QRY object is null
1118 Example :
1119 Returns : 1 if null, 0 otherwise
1120 Args :
1122 =cut
1124 sub isnull {
1125 my $self = shift;
1126 return ($self->requests) ? 0 : 1;
1129 =head4 QRY A
1131 Title : A
1132 Usage : print $QRY->A
1133 Function: get a string representation of QRY object
1134 Example :
1135 Returns : string scalar
1136 Args :
1138 =cut
1140 sub A {
1141 my $self = shift;
1142 return join( "\n", map {$_->A} $self->requests );
1145 =head4 QRY len
1147 Title : len
1148 Usage : $QRY->len
1149 Function: get number of class R objects contained by QRY object
1150 Example :
1151 Returns : scalar
1152 Args :
1154 =cut
1156 sub len {
1157 my $self = shift;
1158 return scalar @{$self->{'requests'}};
1161 =head4 QRY clone
1163 Title : clone
1164 Usage : $QRY2 = $QRY1->clone;
1165 Function: create and return a clone of the object
1166 Example :
1167 Returns : object of class QRY
1168 Args :
1170 =cut
1172 sub clone {
1173 local $_;
1174 my $self = shift;
1175 my $ret = QRY->new();
1176 foreach ($self->requests) {
1177 $ret->put_requests($_->clone);
1179 return $ret;
1182 ## QRY class methods
1184 =head3 QRY CLASS METHODS
1186 =head4 QRY Or
1188 Title : Or
1189 Usage : $QRY3 = QRY::Or($QRY1, $QRY2)
1190 Function: logical OR for QRY objects
1191 Example :
1192 Returns : a QRY object
1193 Args : two class QRY objects
1195 =cut
1197 sub Or {
1198 local $_;
1199 my ($q, $r, $rev_f) = @_;
1200 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1201 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1202 if ($q->isnull) {
1203 return $r->clone;
1205 elsif ($r->isnull) {
1206 return $q->clone;
1208 do {my $qq = $q; $q=$r; $r=$qq} if ($q->len > $r->len);
1209 my @rq_r = $r->requests;
1210 my @rq_q = $q->requests;
1211 my (@cand_rq, @ret_rq);
1212 # search for simplifications
1213 my @now = @rq_q;
1214 my @nxt =();
1215 foreach (@rq_r) {
1216 my $found = 0;
1217 while (my $rq = pop @now) {
1218 my @result = R::Or($rq, $_);
1219 if (@result==1) {
1220 push @cand_rq, $result[0]->clone;
1221 $found = 1;
1222 last;
1224 else {
1225 push @nxt, $rq;
1228 push @cand_rq, $_->clone unless ($found);
1229 # @now becomes unexamined @rq_q's plus failed @rq_q's
1230 @now = (@now, @nxt);
1232 push @cand_rq, map {$_->clone} @now; # add all failed @rq_q's
1233 # squeeze out redundant requests
1234 while (my $rq = pop @cand_rq) {
1235 push @ret_rq, $rq unless @cand_rq && grep {R::Eq($rq, $_)} @cand_rq;
1237 return new QRY( @ret_rq );
1240 =head4 QRY And
1242 Title : And
1243 Usage : $QRY3 = QRY::And($QRY1, $QRY2)
1244 Function: logical AND for QRY objects
1245 Example :
1246 Returns : a QRY object
1247 Args : two class QRY objects
1249 =cut
1251 sub And {
1252 my ($q, $r, $rev_f) = @_;
1253 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1254 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1255 return ($QRY::NULL) if ($q->isnull || $r->isnull);
1256 my (@cand_rq, @ret_rq);
1257 foreach my $rq_r ($r->requests) {
1258 foreach my $rq_q ($q->requests) {
1259 my ($rq) = R::And($rq_r, $rq_q);
1260 push @cand_rq, $rq unless $rq->isnull;
1263 return $QRY::NULL unless @cand_rq;
1264 # squeeze out redundant requests
1265 while (my $rq = pop @cand_rq) {
1266 push @ret_rq, $rq unless @cand_rq && grep {R::Eq($rq, $_)} @cand_rq;
1268 return new QRY( @ret_rq );
1271 =head4 QRY Bool
1273 Title : Bool
1274 Usage : QRY::Bool($QRY1)
1275 Function: allows symbolic testing of QRY object when bool overloaded
1276 Example : do {stuff} if $QRY1 *same as* do {stuff} if !$QRY1->isnull
1277 Returns :
1278 Args : a class QRY object
1280 =cut
1282 sub Bool {
1283 my $q = shift;
1284 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1285 return $q->isnull ? 0 : 1;
1288 =head4 QRY Eq
1290 Title : Eq
1291 Usage : QRY::Eq($QRY1, $QRY2)
1292 Function: test if R objects in two QRY objects are the same
1293 (irrespective of order)
1294 Example :
1295 Returns : 1 if equal, 0 otherwise
1296 Args : two class QRY objects
1298 =cut
1300 sub Eq {
1301 my ($q, $r, $rev_f) = @_;
1302 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1303 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1304 return 0 unless $q->len == $r->len;
1305 foreach my $rq_q ($q->requests) {
1306 my $found = 0;
1307 foreach my $rq_r ($r->requests) {
1308 if (R::Eq($rq_q,$rq_r)) {
1309 $found = 1;
1310 last;
1313 return 0 unless $found;
1315 return 1;
1320 =head2 Class R - request objects for QRY algebra
1322 =head3 R SYNOPSIS
1324 $R = new R( $q1, $q2 );
1325 $R->put_atoms($q3);
1326 $R->del_atoms('coreceptor', 'phenotype');
1327 return $R->clone;
1328 $R1 = new R( new Q('subtype', 'B') );
1329 $R2 = new R( new Q('subtype', 'B C'),
1330 new Q('country', 'US') );
1331 R::Eq( (R::And($R1, $R2))[0],
1332 new R( new Q('subtype', 'B' ),
1333 new Q('country', 'US') )); # returns 1
1334 QRY::Eq( new QRY(R::Or($R1, $R2)), new QRY($R1, $R2) ); # returns 1
1335 R::In( (R::And($R1, $R2))[0], $R1 ); # returns 1
1337 =head3 R DESCRIPTION
1339 Class R objects contain a list of atomic queries (class Q
1340 objects). Each class R object represents a single HTTP request to the
1341 LANL DB. When converted to a DB query, the class Q objects contained
1342 by an R object are effectively C<AND>ed.
1344 =cut
1346 package # hide from PAUSE
1348 use strict;
1349 $R::NULL = R->new();
1352 ## R constructor
1354 =head3 R CONSTRUCTOR
1356 =head4 R constructor
1358 Title : R constructor
1359 Usage : $R = new R()
1360 Function: create a new R (request) object
1361 Example :
1362 Returns : class R (request) object
1363 Args : optional, array of class Q objects
1365 =cut
1367 sub new {
1368 my $class = shift;
1369 my @args = @_;
1370 my $self = {};
1371 $self->{atoms} = {};
1372 bless($self, $class);
1373 $self->put_atoms(@args) if @args;
1374 return $self;
1377 ## R instance methods
1379 =head3 R INSTANCE METHODS
1381 =head4 R len
1383 Title : len
1384 Usage : $R->len
1385 Function: get number of class Q objects contained in R object
1386 Example :
1387 Returns : scalar
1388 Args :
1390 =cut
1392 sub len {
1393 my $self = shift;
1394 return scalar @{[keys %{$self->{'atoms'}}]};
1397 =head4 R atoms
1399 Title : atoms
1400 Usage : $R->atoms( [optional $field])
1401 Function: get array of class Q (atomic query) objects in class R object
1402 Example : $R->atoms(); $R->atoms('coreceptor')
1403 Returns : array of class Q objects (all Qs or those corresponding to $field
1404 if present)
1405 Args : optional, scalar string
1407 =cut
1409 sub atoms {
1410 local $_;
1411 # returns an array of atoms
1412 # no arg: all atoms;
1413 # args: atoms with specified fields
1414 my $self = shift;
1415 my @flds = (@_ ? @_ : keys %{$self->{'atoms'}});
1416 return wantarray ? map { $self->{'atoms'}->{$_} } @flds : $self->{'atoms'}->{$flds[0]};
1419 =head4 R fields
1421 Title : fields
1422 Usage : $R->fields
1423 Function: get array of fields of all Q objects contained in $R
1424 Example :
1425 Returns : array of scalars
1426 Args :
1428 =cut
1430 sub fields {
1431 my $self = shift;
1432 return keys %{$self->{'atoms'}};
1435 =head4 R put_atoms
1437 Title : put_atoms
1438 Usage : $R->put_atoms( @q )
1439 Function: AND an atomic query (class Q object) to the class R object's list
1440 Example :
1441 Returns : void
1442 Args : an [array of] class Q object[s]
1444 =cut
1446 sub put_atoms {
1447 # AND this atom to the request
1448 local $_;
1449 my $self = shift;
1450 my @args = @_;
1451 foreach (@args) {
1452 Bio::Root::Root->throw('requires type Q (atom)') unless ref && $_->isa('Q');
1453 if ($self->atoms($_->fld)) {
1454 my $a = Q::qand( $self->atoms($_->fld), $_ );
1455 if ($a->isnull) {
1456 delete $self->{'atoms'}->{$_->fld};
1458 else {
1459 $self->{atoms}->{$_->fld} = $a->clone;
1462 else {
1463 $self->{atoms}->{$_->fld} = $_->clone;
1466 return;
1469 =head4 R del_atoms
1471 Title : del_atoms
1472 Usage : $R->del_atoms( @qfields )
1473 Function: removes class Q objects from R object's list according to the
1474 field names given in arguments
1475 Example :
1476 Returns : the class Q objects deleted
1477 Args : scalar array of field names
1479 =cut
1481 sub del_atoms {
1482 # remove atoms by field from request
1483 local $_;
1484 my $self = shift;
1485 my @args = @_;
1486 return () unless @args;
1487 my @ret;
1488 foreach (@args) {
1489 push @ret, delete $self->{'atoms'}->{$_};
1491 return @ret;
1494 =head4 R isnull
1496 Title : isnull
1497 Usage : $R->isnull
1498 Function: test if class R object is null
1499 Example :
1500 Returns : 1 if null, 0 otherwise
1501 Args :
1503 =cut
1505 sub isnull {
1506 my $self = shift;
1507 return ($self->len) ? 0 : 1;
1510 =head4 R A
1512 Title : A
1513 Usage : print $R->A
1514 Function: get a string representation of class R object
1515 Example :
1516 Returns : string scalar
1517 Args :
1519 =cut
1521 sub A {
1522 my $self = shift;
1523 my @a = sort {$a->fld cmp $b->fld} $self->atoms;
1524 return join(" ", map {$_->A} @a);
1527 =head4 R clone
1529 Title : clone
1530 Usage : $R2 = $R1->clone;
1531 Function: create and return a clone of the object
1532 Example :
1533 Returns : object of class R
1534 Args :
1536 =cut
1538 sub clone {
1539 local $_;
1540 my $self = shift;
1541 my $ret = R->new();
1542 foreach ($self->atoms) {
1543 $ret->put_atoms($_->clone);
1545 return $ret;
1548 ## R class methods
1550 =head3 R CLASS METHODS
1552 =head4 R In
1554 Title : In
1555 Usage : R::In($R1, $R2)
1556 Function: tests whether the query represented by $R1 would return a subset
1557 of items returned by the query represented by $R2
1558 Example : print "R2 gets those and more" if R::In($R1, $R2);
1559 Returns : 1 if R1 is subset of R2, 0 otherwise
1560 Args : two class R objects
1562 =cut
1564 sub In {
1565 local $_;
1566 my ($s, $t) = @_;
1567 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1568 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1569 return 1 if ($s->isnull);
1570 # common fields
1571 my @cf = grep {defined} map {my $f=$_; grep /^$f$/,$s->fields} $t->fields;
1572 return 0 unless @cf==$t->len;
1573 foreach (@cf) {
1574 my @sd = split(/\s+/, $s->atoms($_)->dta);
1575 my @td = split(/\s+/, $t->atoms($_)->dta);
1576 my @cd = grep {defined} map {my $d=$_; grep /^$d$/, @td} @sd;
1577 return 0 unless @cd==@sd;
1579 return 1;
1582 =head4 R And
1584 Title : And
1585 Usage : @Rresult = R::And($R1, $R2)
1586 Function: logical AND for R objects
1587 Example :
1588 Returns : an array containing class R objects
1589 Args : two class R objects
1591 =cut
1593 sub And {
1594 local $_;
1595 my ($s, $t) = @_;
1596 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1597 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1598 return ($R::NULL) if ($s->isnull || $t->isnull);
1600 do { my $ss = $s; $s = $t; $t = $ss } if ( $s->len > $t->len );
1601 # $t has at least as many fields defined than $s ($t is more restrictive)
1603 # common fields
1604 my @cf = grep {defined} map {my $sf = $_; grep /$sf/, $t->fields } $s->fields;
1605 my $ret = R->new();
1606 my $v = $t->clone;
1607 $v->del_atoms(@cf);
1608 my $u = $s->clone;
1609 $u->del_atoms(@cf);
1611 # And the atoms with identical fields
1613 foreach (@cf) {
1614 my ($a) = Q::qand($s->atoms($_), $t->atoms($_));
1615 if ($a->isnull) {
1616 return $R::NULL;
1618 else {
1619 $ret->put_atoms($a);
1622 # put the private atoms
1623 $ret->put_atoms($u->atoms, $v->atoms);
1624 return ($ret);
1628 =head4 R Or
1630 Title : Or
1631 Usage : @Rresult = R::Or($R1, $R2)
1632 Function: logical OR for R objects
1633 Example :
1634 Returns : an array containing class R objects
1635 Args : two class R objects
1637 =cut
1639 sub Or {
1640 local $_;
1641 my ($s, $t) = @_;
1642 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1643 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1644 if ($s->isnull) {
1645 return $t->clone;
1647 elsif ($t->isnull) {
1648 return $s->clone;
1650 return $s->clone if (R::In($t, $s));
1651 return $t->clone if (R::In($s, $t));
1653 # try simplifying
1654 do { my $ss = $s; $s = $t; $t = $ss } if ( $s->len > $t->len );
1655 # common fields
1656 my @cf = grep {defined} map {my $sf = $_; grep /$sf/, $t->fields } $s->fields;
1658 if ($t->len == @cf) {
1659 # all atoms equal within fields but one? If yes, simplify...
1660 my @df = grep {!Q::qeq($s->atoms($_), $t->atoms($_))} @cf;
1661 if (@df == 1) {
1662 my ($a) = Q::qor($s->atoms($df[0]), $t->atoms($df[0]));
1663 my $ret = $s->clone;
1664 $ret->del_atoms($df[0]);
1665 $ret->put_atoms($a);
1666 return ($ret);
1670 # neither request contains the other, and the requests cannot be
1671 # simplified; reflect back (clones of) the input...
1672 return ($s->clone, $t->clone);
1676 =head4 R Eq
1678 Title : Eq
1679 Usage : R::Eq($R1, $R2)
1680 Function: test if class Q objects in two R objects are the same
1681 (irrespective of order)
1682 Example :
1683 Returns : 1 if equal, 0 otherwise
1684 Args : two class R objects
1686 =cut
1688 sub Eq {
1689 local $_;
1690 my ($s, $t) = @_;
1691 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1692 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1693 my @sf = $s->fields;
1694 my @tf = $t->fields;
1695 return 0 unless @sf==@tf;
1696 my @cf = grep {defined} map {my $f=$_; grep /^$f$/,@sf} @tf;
1697 return 0 unless @cf==@tf;
1698 foreach (@cf) {
1699 return 0 unless Q::qeq($s->atoms($_), $t->atoms($_));
1701 return 1;
1705 =head2 Class Q - atomic query objects for QRY algebra
1707 =head3 Q SYNOPSIS
1709 $q = new Q('coreceptor', 'CXCR4 CCR5');
1710 $u = new Q('coreceptor', 'CXCR4');
1711 $q->fld; # returns 'coreceptor'
1712 $q->dta; # returns 'CXCR4 CCR5'
1713 print $q->A; # prints '(CXCR4 CCR5)[coreceptor]
1714 Q::qeq($q, $u); # returns 0
1715 Q::qeq( Q::qor($q, $q), $q ); # returns 1
1716 Q::qin($u, $q) # returns 1
1717 Q::qeq(Q::qand($u, $q), $u ); # returns 1
1719 =head3 Q DESCRIPTION
1721 Class Q objects represent atomic queries, that can be described by a
1722 single LANL cgi parameter=value pair. Class R objects (requests) are
1723 built from class Qs. The logical operations at the higher levels
1724 (C<QRY, R>) ultimately depend on the lower level operations on Qs:
1725 C<qeq, qin, qand, qor>.
1727 =cut
1729 package # hide from PAUSE
1731 use strict;
1732 $Q::NULL = Q->new();
1734 ## Q constructor
1736 =head3 Q CONSTRUCTOR
1738 =head4 Q constructor
1740 Title : Q constructor
1741 Usage : $q = new Q($field, $data)
1742 Function: create a new Q (atomic query) object
1743 Example :
1744 Returns : class Q object
1745 Args : optional $field, $data strings
1747 =cut
1749 sub new {
1750 local $_;
1751 my ($class,@args) = @_;
1752 my $self={};
1753 foreach (@args) { s/^\s+//; s/\s+$//; }
1754 my ($fld, @dta) = @args;
1755 $self->{fld}=$fld;
1756 $self->{dta}=join(" ", @dta);
1757 bless($self, $class);
1758 return $self;
1761 ## Q instance methods
1763 =head3 Q INSTANCE METHODS
1765 =head4 Q isnull
1767 Title : isnull
1768 Usage : $q->isnull
1769 Function: test if class Q object is null
1770 Example :
1771 Returns : 1 if null, 0 otherwise
1772 Args :
1774 =cut
1776 sub isnull {
1777 my $self = shift;
1778 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1779 return 1 unless (($self->fld && length($self->fld)) || ($self->dta && length($self->dta)));
1780 return 0;
1783 =head4 Q fld
1785 Title : fld
1786 Usage : $q->fld($field)
1787 Function: get/set fld (field name) property
1788 Example :
1789 Returns : scalar
1790 Args : scalar
1792 =cut
1794 sub fld {
1795 my $self = shift;
1796 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1797 my $f = shift;
1798 if ($f) {
1799 $f =~ s/^\s+//;
1800 $f =~ s/\s+$//;
1801 return $self->{fld}=$f;
1803 return $self->{fld};
1807 =head4 Q dta
1809 Title : dta
1810 Usage : $q->dta($data)
1811 Function: get/set dta (whsp-separated data string) property
1812 Example :
1813 Returns : scalar
1814 Args : scalar
1816 =cut
1818 sub dta {
1819 my $self = shift;
1820 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1821 my $d = join(" ", @_);
1822 if ($d) {
1823 $d =~ s/^\s+//;
1824 $d =~ s/\s+$//;
1825 return $self->{dta} = $d;
1827 return $self->{dta};
1830 =head4 Q A
1832 Title : A
1833 Usage : print $q->A
1834 Function: get a string representation of class Q object
1835 Example :
1836 Returns : string scalar
1837 Args :
1839 =cut
1841 sub A {
1842 my $self = shift;
1843 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1844 my @a = split(/\s+/, $self->dta);
1846 return "(".join(' ', sort {$a cmp $b} @a).")[".$self->fld."]";
1849 =head4 Q clone
1851 Title : clone
1852 Usage : $q2 = $q1->clone;
1853 Function: create and return a clone of the object
1854 Example :
1855 Returns : object of class Q
1856 Args :
1858 =cut
1860 sub clone {
1861 my $self = shift;
1862 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1863 my $ret = Q->new($self->fld, $self->dta);
1864 return $ret;
1867 ### Q class methods
1869 =head3 Q CLASS METHODS
1871 =head4 Q qin
1873 Title : qin
1874 Usage : Q::qin($q1, $q2)
1875 Function: tests whether the query represented by $q1 would return a subset
1876 of items returned by the query represented by $q2
1877 Example : print "q2 gets those and more" if Q::qin($q1, $q2);
1878 Returns : 1 if q1 is subset of q2, 0 otherwise
1879 Args : two class Q objects
1881 =cut
1883 sub qin {
1884 my ($a, $b) = @_;
1885 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1886 return 0 unless $a->fld eq $b->fld;
1887 return Q::qeq( $b, Q::qor($a, $b) );
1890 =head4 Q qeq
1892 Title : qeq
1893 Usage : Q::qeq($q1, $q2)
1894 Function: test if fld and dta properties in two class Q objects are the same
1895 (irrespective of order)
1896 Example :
1897 Returns : 1 if equal, 0 otherwise
1898 Args : two class Q objects
1900 =cut
1902 sub qeq {
1903 local $_;
1904 my ($a, $b) = @_;
1905 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1906 return 0 unless $a->fld eq $b->fld;
1907 my @ad = unique(split(/\s+/,$a->dta));
1908 my @bd = unique(split(/\s+/,$b->dta));
1909 return 0 unless @ad==@bd;
1910 my @cd = grep {defined} map {my $f = $_; grep /^$f$/, @ad} @bd;
1911 return @cd == @bd;
1914 =head4 Q qor
1916 Title : qor
1917 Usage : @qresult = Q::qor($q1, $q2)
1918 Function: logical OR for Q objects
1919 Example :
1920 Returns : an array of class Q objects
1921 Args : two class Q objects
1923 =cut
1925 sub qor {
1926 local $_;
1927 my @a = @_;
1928 foreach (@a) {
1929 Bio::Root::Root->throw("requires type Q (atom)") unless ref && $_->isa('Q');
1931 my @ret;
1932 my (%f, @f);
1933 @a = grep {!$_->isnull} @a;
1934 return ($Q::NULL) unless @a > 0;
1935 # list of unique flds
1936 @f = unique(map {$_->fld} @a);
1937 foreach my $f (@f) {
1938 my @fobjs = grep {$_->fld eq $f} @a;
1939 my @d = unique(map {split(/\s/, $_->dta)} @fobjs );
1940 my $r = Q->new($f, @d);
1941 push @ret, $r;
1943 return @ret;
1946 =head4 Q qand
1948 Title : qand
1949 Usage : @qresult = Q::And($q1, $q2)
1950 Function: logical AND for R objects
1951 Example :
1952 Returns : an array of class Q objects
1953 Args : two class Q objects
1955 =cut
1957 sub qand {
1958 local $_;
1959 my ($a, $b) = @_;
1960 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1961 my @ret;
1962 if (ref $a eq 'ARRAY') {
1963 foreach my $ea (@$a) {
1964 push @ret, qand( $ea, $b );
1966 return qor(@ret); # simplify
1968 elsif (ref $b eq 'ARRAY') {
1969 foreach my $eb (@$b) {
1970 push @ret, qand( $a, $eb);
1973 return qor(@ret); # simplify
1975 else {
1976 return ($Q::NULL) if ($a->isnull || $b->isnull);
1977 if ($a->fld eq $b->fld) {
1978 # find intersection of data
1979 my (%ad, @ad, @bd);
1980 @ad = split(/\s+/, $a->dta);
1981 @ad{@ad} = (1) x @ad;
1982 @bd = split(/\s+/, $b->dta);
1983 foreach (@bd) {
1984 $ad{$_}++;
1986 my $r = Q->new($a->fld,
1987 grep {$_}
1988 map {$ad{$_} == 2 ? $_ : undef} keys %ad);
1989 return (length($r->dta) > 0) ? ($r) : ($Q::NULL);
1991 else {
1992 return ($a, $b);
1997 =head3 Q INTERNALS
1999 =head4 Q unique
2001 Title : unique
2002 Usage : @ua = unique(@a)
2003 Function: return contents of @a with duplicates removed
2004 Example :
2005 Returns :
2006 Args : an array
2008 =cut
2010 sub unique {
2011 my @a = @_;
2012 my %a;
2013 @a{@a} = undef;
2014 return keys %a;
2019 =head2 Additional tools for Bio::AnnotationCollectionI
2021 =head3 Bio::AnnotationCollectionI SYNOPSIS (additional methods)
2023 $seq->annotation->put_value('patient_id', 1401)
2024 $seq->annotation->get_value('patient_ids') # returns 1401
2025 $seq->annotation->put_value('patient_group', 'MassGenH')
2026 $seq->annotation->put_value(['clinical', 'cd4count'], 503);
2027 $seq->annotation->put_value(['clinical', 'virus_load'], 150805);
2028 foreach ( qw( cd4count virus_load ) ) {
2029 $blood_readings{$_} = $seq->annonation->get_value(['clinical', $_]);
2032 =head3 Bio::AnnotationCollectionI DESCRIPTION (additional methods)
2034 C<get_value()> and C<put_value> allow easy creation of and access to an
2035 annotation collection tree with nodes of L<Bio::Annotation::SimpleValue>. These
2036 methods obiviate direct accession of the SimpleValue objects.
2038 =cut
2040 package Bio::AnnotationCollectionI;
2041 use strict;
2042 use Bio::Annotation::SimpleValue;
2044 =head2 get_value
2046 Title : get_value
2047 Usage : $ac->get_value($tagname) -or-
2048 $ac->get_value( $tag_level1, $tag_level2,... )
2049 Function: access the annotation value associated with the given tags
2050 Example :
2051 Returns : a scalar
2052 Args : an array of tagnames that descend into the annotation tree
2054 =cut
2056 sub get_value {
2057 local $_;
2058 my $self = shift;
2059 my @args = @_;
2060 my @h;
2061 return "" unless @_;
2062 while ($_ = shift @args) {
2063 @h = $self->get_Annotations($_);
2064 if (ref($h[0]->{value})) {
2065 $self = $h[0]->{value}; # must be another Bio::AnnotationCollectionI
2067 else {
2068 last;
2071 return $h[0] && $h[0]->{value} ; # now the last value.
2074 =head2 put_value
2076 Title : put_value
2077 Usage : $ac->put_value($tagname, $value) -or-
2078 $ac->put_value([$tag_level1, $tag_level2, ...], $value) -or-
2079 $ac->put_value( [$tag_level1, $tag_level2, ...] )
2080 Function: create a node in an annotation tree, and assign a scalar value to it
2081 if a value is specified
2082 Example :
2083 Returns : scalar or a Bio::AnnotationCollection object
2084 Args : $tagname, $value scalars (can be specified as -KEYS=>$tagname,
2085 -VALUE=>$value) -or-
2086 \@tagnames, $value (or as -KEYS=>\@tagnames, -VALUE=>$value )
2087 Note : If intervening nodes do not exist, put_value creates them, replacing
2088 existing nodes. So if $ac->put_value('x', 10) was done, then later,
2089 $ac->put_value(['x', 'y'], 20), the original value of 'x' is trashed,
2090 and $ac->get_value('x') will now return the annotation collection
2091 with tagname 'y'.
2093 =cut
2095 sub put_value {
2096 local $_;
2097 my $self = shift;
2098 my @args = @_;
2099 my ($keys, $value) = $self->_rearrange([qw( KEYS VALUE )], @args);
2100 my (@keys, $lastkey);
2101 # $value ||= new Bio::Annotation::Collection;
2102 @keys = (ref($keys) eq 'ARRAY') ? @$keys : ($keys);
2103 $lastkey = pop @keys;
2104 foreach (@keys) {
2105 my $a = $self->get_value($_);
2106 if (ref($a) && $a->isa('Bio::Annotation::Collection')) {
2107 $self = $a;
2109 else {
2110 # replace an old value
2111 $self->remove_Annotations($_) if $a;
2112 my $ac = Bio::Annotation::Collection->new();
2113 $self->add_Annotation(Bio::Annotation::SimpleValue->new(
2114 -tagname => $_,
2115 -value => $ac
2118 $self = $ac;
2121 if ($self->get_value($lastkey)) {
2122 # replace existing value
2123 ($self->get_Annotations($lastkey))[0]->{value} = $value;
2125 else {
2126 $self->add_Annotation(Bio::Annotation::SimpleValue->new(
2127 -tagname=>$lastkey,
2128 -value=>$value
2131 return $value;
2134 =head2 get_keys
2136 Title : get_keys
2137 Usage : $ac->get_keys($tagname_level_1, $tagname_level_2,...)
2138 Function: Get an array of tagnames underneath the named tag nodes
2139 Example : # prints the values of the members of Category 1...
2140 print map { $ac->get_value($_) } $ac->get_keys('Category 1') ;
2141 Returns : array of tagnames or empty list if the arguments represent a leaf
2142 Args : [array of] tagname[s]
2144 =cut
2146 sub get_keys {
2147 my $self = shift;
2148 my @keys = @_;
2149 foreach (@keys) {
2150 my $a = $self->get_value($_);
2151 if (ref($a) && $a->isa('Bio::Annotation::Collection')) {
2152 $self = $a;
2154 else {
2155 return ();
2158 return $self->get_all_annotation_keys();