2 # command-line script for HIV sequence queries
3 # using HIV.pm and HIVQuery.pm
7 bp_hivq.PL - an interactive command-line interface to L<Bio::DB::HIV> and L<Bio::DB::Query::HIVQuery>
12 hivq> query C[subtype] SI[phenotype]
15 Query: C[subtype] SI[phenotype]
20 hivq> run D[subtype] SI[phenotype]
24 Query: D[subtype] SI[phenotype]
30 The BioPerl modules L<Bio::DB::HIV> and L<Bio::DB::Query::HIVQuery>
31 together allow batch queries against the Los Alamos National
32 Laboratories' HIV Sequence Database using a simple query
33 language. C<bp_hivq.PL> provides both an example of the use of these
34 modules, and a standalone interactive command-line interface to the
35 LANL HIV DB. Simple commands allow the user to retrieve HIV sequences
36 and annotations using the query language implemented in
37 L<Bio::DB::Query::HIVQuery>. Visit the man pages for those modules for
42 Run the script using C<perl bp_hivq.PL> or, in Unix, C<./bp_hivq.PL>. You will see
47 prompt. Type commands with queries to retrieve sequence and annotation
48 data. See the SYNOPSIS for a sample session. Available commands
53 The LANL database is pretty complex and extensive. Use the C<find> facility to
54 explore the available database tables and fields. To identify aliases for a particular field, use C<find alias [fieldname]>. For example, to find a short alias to the
55 weirdly named field C<seq_sample.ssam_second_receptor>, do
57 hivq> find alias seq_sample.ssam_second_receptor
61 coreceptor second_receptor
63 Now, instead of the following query
65 hivq> run C[subtype] CCR5[seq_sample.ssam_second_receptor]
69 hivq> run C[subtype] CCR5[coreceptor]
71 Use the C<outfile> command to set the file that receives the retrieved
72 sequences. You can change the current output file simply by issuing a
73 new C<outfile> command during the session. The output file defaults to
76 Use the C<query> command to validate a query without hitting the
77 database. Use the C<prerun> or C<count> commands to get a count of
78 sequence hits for a query without retrieving the data. Use C<run> or
79 C<do> to perform a complete query, retrieving sequence data into the
80 currently set output files.
82 To process C<bp_hivq.PL> commands in batch, create a text file
83 (C<bp_hivq.cmd>, for example) containing desired commands one per
84 line. Then execute the following from the shell:
86 $ cat bp_hivq.cmd | perl bp_hivq.PL
90 Here is a complete list of commands. Options in single brackets (C<[req_option]>) are
91 required; options in double brackets (C<[[opt_option]]>) are optional.
93 confirm : Toggle interactive confirmation before
96 find : Explore database schema
97 find tables Display all database tables
98 find fields Display all database fields (columns)
99 find fields [table] Display all fields in [table]
100 find alias [field] Display valid aliases for [field]
101 help [[command]] : Show command help
102 if [[command]] not specified, list all
104 id : Display current session id
105 outfile [filename] : Set file for collecting retrieved data
106 ping : Check if LANL DB is available
107 prerun [[query]] : Execute query but retrieve hit count only
108 if [[query]] not specified, use current query
109 query [query] : Validate and set current query
110 run [[query]] : Execute query and retrieve data
111 if [[query]] not specified, use current query
112 state : Display current state of the script
114 bye : Alias for 'exit'
115 config : Alias for 'state'
116 count : Alias for 'prerun'
118 out : Alias for 'outfile'
119 quit : Alias for 'exit'
123 -v : verbose; turns on the internal debug() function
129 User feedback is an integral part of the evolution of this and other
130 Bioperl modules. Send your comments and suggestions preferably to
131 the Bioperl mailing list. Your participation is much appreciated.
133 bioperl-l@bioperl.org - General discussion
134 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
136 =head2 Reporting Bugs
138 Report bugs to the Bioperl bug tracking system to help us keep track
139 of the bugs and their resolution. Bug reports can be submitted via the
142 https://github.com/bioperl/bioperl-live/issues
144 =head1 AUTHOR - Mark A. Jensen
146 Mark A. Jensen E<lt>maj@fortinbras.usE<gt>
155 use Bio
::DB
::Query
::HIVQuery
;
162 my $db = new Bio
::DB
::HIV
;
164 my $q = new Bio
::DB
::Query
::HIVQuery
();
165 my $schema = $q->_schema;
189 'outfile' => {'text'=>'[stdout]','handle'=>\
*STDOUT
},
197 'bye' => 'bye: End script',
198 'config' => 'config: Print current configuration state of hivq',
199 'confirm' => 'confirm: Toggle user confirmation of query execution',
200 'count' => 'count [[query string]]: Get number of sequences available for query',
201 'do' => 'do [[query string]]: Run query to completion',
202 'exit' => 'exit: End script',
203 'find' => join("\n","find tables: Print all LANL table names",
204 "find fields: Print all field names",
205 "find fields [tablename]: Print all fields in specified table",
206 "find aliases [fieldname]: Print all valid aliases for specified field",
207 "find options [fieldname/alias]: Print valid match options for specified field"),
208 'help' => 'help [[command]]: Get description of command',
209 'id' => 'id: Print current session id',
210 'out' => 'out [output filename]: Set filename to hold returned sequences',
211 'outfile' => 'outfile [output filename]: Set filename to hold returned sequences',
212 'ping' => 'ping: Ping LANL database',
213 'prerun' => 'prerun [[query string]]: Get number of sequences available',
214 'query' => 'query [[query string]]: Validate and set current query string',
215 'quit' => 'quit: End script',
216 'run' => 'run [[query string]]: Run query to completion',
217 'state' => 'state: Print current configuration state of hivq',
220 debug
("We have arrived.");
223 my $prompt = "hivq> ";
224 my $term = new Term
::ReadLine
'hivq';
228 for ( $cmd = $term->readline($prompt) ) {
231 $term->addhistory($_);
233 $tok[0] = lc $tok[0]; # normalize
235 (!$_ || /^\#/) && do {
238 !(grep(/^$tok[0]$/, @cmds)) && do {
239 error
("Unrecognized command \'$tok[0]\'");
242 (m/^quit$/ || m/^bye$/ || m/^exit$/) && do {
243 my $fh = $state{outfile
}->{handle
};
244 unless ($fh == \
*STDOUT
) {
251 $state{confirm
} = !$state{confirm
};
252 print "User confirmation before running query: ". ($state{confirm
} ?
"yes" : "no")."\n";
255 (m/^outfile$/ || m/^out$/) && do {
257 error
('Output filename not specified');
262 open $fh, ">$tok[1]" or die $!;
268 my $oldfh = $state{outfile
}->{handle
};
269 if ($state{outfile
}->{text
} ne '[stdout]') {
272 $state{outfile
}->{handle
} = $fh;
273 $state{outfile
}->{text
} = $tok[1];
278 my $query = join(' ', @tok[1..$#tok]);
285 print "Current query: $state{query}\n";
288 print "No query set\n";
300 $state{session_id
} = $q->_session_id;
301 $state{query
} = $query;
304 (m/^prerun$/ || m/^count$/) && do {
305 my $query = join(' ', @tok[1..$#tok]);
308 error
('No query currently set');
311 elsif ($q->{'_RUN_LEVEL'} >= 1 &&
312 ($q->query() eq $state{query
})) {
316 # else, fallthrough to prerun current query
331 $state{confirm
} && do { next CMD
unless getConfirm
(); };
332 $state{session_id
} = $q->_session_id;
333 $state{query
} = $query;
343 $state{curct
} = $q->count;
344 $state{query
} = $q->query;
349 (m/^run$/ || m/^do$/) && do {
350 my $query = join(' ', @tok[1..$#tok]);
352 unless (defined $q->{'_RUN_LEVEL'} && $q->{'_RUN_LEVEL'} < 2) {
353 error
('No query currently set');
368 $state{query
} = $query;
370 $state{session_id
} = $q->_session_id;
373 $state{confirm
} && do { next CMD
unless getConfirm
(); };
376 $seqio = $db->get_Stream_by_query($q);
384 $state{curct
} = $q->count;
390 error
('No sequences returned');
403 if (grep(/$tok[1]/, @cmds)) {
404 print $help{$tok[1]}, "\n";
407 error
("Command \'$tok[1]\' unrecognized\n");
411 (m/^state$/ || m/^config$/) && do {
412 foreach my $k (sort keys %state) {
413 if (ref($state{$k}) eq 'HASH') {
414 print "$k: ".$state{$k}->{text
}."\n";
418 if ($k eq 'confirm') {
419 print $state{$k} ?
"yes" : "no";
423 print "$state{$k}\n";
430 error
("Command currently unimplemented\n");
441 if ($msg =~ /MSG:/) {
442 ($msg) = grep (/^MSG:/, split(/\n|\r/,$msg));
444 $msg =~ s/\sat\s.*$//;
446 print STDERR
"hivq: $msg\n";
451 print (($state{curct
} ?
$state{curct
} : "No")
453 . ($state{curct
}>1 ?
"s" : "")
455 print "Query: ".$state{query
}."\n";
462 while (my $seq = $seqio->next_seq) {
464 $nameline .= $seq->annotation->get_value('Special', 'accession').
466 # loop through categories:
467 foreach my $cat ($seq->annotation->get_keys()) {
468 foreach my $an ($seq->annotation->get_keys($cat)) {
469 next if ($an eq 'accession');
470 my $value = $seq->annotation->get_value($cat, $an);
471 # next line: kludge to skip if there's an annotation
472 # object instead of a value (I believe this is a bug)
474 $nameline .= "\t".join('=', "'$an'", "'".$value."'");
477 push @ret, $nameline."\n";
478 push @ret, $seq->seq()."\n";
486 error
("No sequences to output");
488 my $fh = $state{outfile
}->{handle
};
490 print "Download complete\n";
495 print "Run query? [y/n]";
497 ($ans =~ /^[yY]/) && do {return 1;};
502 print "Available commands:\n";
503 outputInColumns
(\
@cmds);
509 for my $arg ($tok[1]) {
511 print $help{find
},"\n";
514 ($arg =~ m/^tables$/) && do {
515 outputInColumns
([$schema->tables]);
518 ($arg =~ m/^fields$/) && do {
521 outputInColumns
([$schema->fields]);
524 unless (grep /^$tbl$/, $schema->tables) {
525 error
("Table \'$tbl\' not valid");
528 outputInColumns
([grep /^$tbl/, $schema->fields()]);
532 ($arg =~ /^options$/) && do {
534 my @aliases = $schema->aliases($fld);
536 unless (grep /^$fld$/, $schema->aliases) {
537 error
("Field \'$tok[2]\' not valid");
540 foreach ($schema->fields) {
541 if ( grep /$fld/, $schema->aliases($_) ) {
546 # on success: $fld is set to valid field
548 my @options = sort {$a cmp $b} $schema->options($fld);
549 @options = (@options ?
@options : ('Free text'));
550 outputInColumns
(\
@options);
554 ($arg =~ /^alias/) && ($arg = $tok[2]);
555 if (grep /$arg/, $schema->fields) {
556 my @aliases = sort $schema->aliases($arg);
558 outputInColumns
(\
@aliases);
561 error
("No aliases to field \'$arg\'");
565 error
("Field \'$arg\' not valid");
575 sub outputInColumns
{
576 my ($items, $n, $w) = @_;
579 my $maxl = length([sort {length($b)<=>length($a)} @items]->[0]);
580 $n ||= int(60/($maxl+3)) || 1;
582 my $coll = int(@items/$n);
583 $coll == @items/$n ?
$coll : ++$coll;
585 for my $j (0..$n-1) {
587 $t[$j] = [@items[$j*$coll..$j*$coll+$coll-1]];
590 $t[$j] = [@items[$j*$coll..$#items]];
593 @items = map { my $j = $_; map { $t[$_]->[$j] || () } (0..$#t) } (0..scalar(@
{$t[0]})-1);
595 $_ .= (' ' x
($w-length($_)));
597 while ($i < @items) {
598 print join('', map { $_ || () } @items[$i..$i+$n-1] ),"\n";
605 print STDERR
shift()."\n" if $opt_v;