Bio::DB::TFBS namespace has been moved to its own distribution named after itself
[bioperl-live.git] / Bio / Index / Hmmer.pm
blob8c48f940e10f8e2946d180b9820886cbaf172a51
2 # BioPerl module for Bio::Index::Hmmer
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Josh Lauricha <laurichj@bioinfo.ucr.edu>
8 # Copyright Josh Lauricha
9 # Unless otherwise noted, this was shamelessly ripped from
10 # Bio::Index::Blast
12 # You may distribute this module under the terms of perl itself
14 # POD documentation - main docs before the code
16 =head1 NAME
18 Bio::Index::Hmmer - indexes HMMER reports and supports retreival based on query
20 =head1 SYNOPSIS
22 # Complete Code for indexing a set of report files
23 #!/usr/bin/perl -w
24 use strict;
25 use Bio::Index::Hmmer;
26 my $indexfile = shift;
27 my $index = Bio::Index::Hmmer->new(
28 -filename => $indexfile,
29 -write_flag => 1
31 $index->make_index(@ARGV);
34 # Complete code for fetching a report
35 use strict;
36 use Bio::Index::Hmmer;
37 my $indexfile = shift;
38 my $index = Bio::Index::Hmmer->new(
39 -filename => $indexfile,
40 -write_flag => 0
43 foreach my $id (@ARGV) {
44 my $report = $index->fetch_report($id);
45 print "Query: ", $report->query_name(), "\n";
46 while( my $hit = $report->next_hit() ) {
47 print "\tHit Name: ", $hit->name(), "\n";
48 while( my $hsp = $hit->next_domain() ) {
49 print "\t\tE-Value: ", $hsp->evalue(), "\n";
54 =head1 DESCRIPTION
56 This object allows one to build an index on a HMMER file (or files)
57 and provide quick access to the HMMER report for that accession.
58 For best results 'use strict'.
60 You can also set or customize the unique key used to retrieve by
61 writing your own function and calling the id_parser() method.
62 For example:
64 $inx->id_parser(\&get_id);
65 # make the index
66 $inx->make_index($file_name);
68 # here is where the retrieval key is specified
69 sub get_id {
70 my $line = shift;
71 $line =~ /^KW\s+([A-Z]+)/i;
72 $1;
76 =head1 FEEDBACK
78 =head2 Mailing Lists
80 User feedback is an integral part of the evolution of this and other
81 Bioperl modules. Send your comments and suggestions preferably to
82 the Bioperl mailing list. Your participation is much appreciated.
84 bioperl-l@bioperl.org - General discussion
85 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
87 =head2 Support
89 Please direct usage questions or support issues to the mailing list:
91 I<bioperl-l@bioperl.org>
93 rather than to the module maintainer directly. Many experienced and
94 reponsive experts will be able look at the problem and quickly
95 address it. Please include a thorough description of the problem
96 with code and data examples if at all possible.
98 =head2 Reporting Bugs
100 Report bugs to the Bioperl bug tracking system to help us keep track
101 of the bugs and their resolution. Bug reports can be submitted via the
102 web:
104 https://github.com/bioperl/bioperl-live/issues
106 =head1 AUTHOR - Josh Lauricha
108 Email laurichj@bioinfo.ucr.edu
110 =head1 APPENDIX
112 The rest of the documentation details each of the object methods.
113 Internal methods are usually preceded with a _
115 =cut
117 # Let the code begin...
119 package Bio::Index::Hmmer;
120 use strict;
122 use Bio::SearchIO;
123 use IO::String;
124 use Bio::Root::Version;
126 use base qw(Bio::Index::Abstract Bio::Root::Root);
128 sub _version
130 return ${Bio::Root::Version::VERSION};
133 =head2 new
135 Usage : $index = Bio::Index::Hmmer->new(
136 -filename => $dbm_file,
137 -write_flag => 0,
138 -dbm_package => 'DB_File',
139 -verbose => 0
141 Function: Returns a new index object. If filename is
142 specified, then open_dbm() is immediately called.
143 Returns : A new index object
144 Args : -filename The name of the dbm index file.
145 -write_flag TRUE if write access to the dbm file is
146 needed.
147 -dbm_package The Perl dbm module to use for the
148 index.
149 -verbose Print debugging output to STDERR if
150 TRUE.
152 =cut
154 sub new
156 my($class, @args) = @_;
157 my $self = $class->SUPER::new(@args);
160 =head2 Bio::Index::Hmmer implemented methods
162 =cut
164 =head2 fetch_report
166 Title : fetch_report
167 Usage : my $report = $idx->fetch_report($id);
168 Function: Returns a Bio::Search::Result::HMMERResult report object
169 for a specific HMMER report
170 Returns : Bio::Search::Result::HMMERResult
171 Args : valid id
173 =cut
175 sub fetch_report
177 my ($self, $id) = @_;
178 my (@header, @data, $line);
179 my $fh = $self->get_stream($id);
180 my $pos = tell($fh);
182 seek($fh, 0, 0); # The HMMER SearchIO wants the header, so we fetch it
183 while($line = <$fh>) {
184 push @header, $line;
185 last if $line =~ /Query sequence:/o;
187 seek($fh, $pos, 0);
189 # Then the data
190 while(<$fh>) {
191 push @data, $_ if defined;
192 last if m{//}o;
195 # Then join them and send
196 my $rfh = IO::String->new(join('', @header, @data));
197 my $report = Bio::SearchIO->new(
198 -noclose => 1,
199 -format => 'hmmer',
200 -fh => $rfh
202 return $report->next_result();
205 # shamelessly stolen from Bio::Index::Fasta
207 =head2 id_parser
209 Title : id_parser
210 Usage : $index->id_parser( CODE )
211 Function: Stores or returns the code used by record_id to
212 parse the ID for record from a string. Useful
213 for (for instance) specifying a different
214 parser for different flavours of blast dbs.
215 Returns \&default_id_parser (see below) if not
216 set. If you supply your own id_parser
217 subroutine, then it should expect a fasta
218 description line. An entry will be added to
219 the index for each string in the list returned.
220 Example : $index->id_parser( \&my_id_parser )
221 Returns : ref to CODE if called without arguments
222 Args : CODE
224 =cut
226 sub id_parser
228 my( $self, $code ) =@_;
230 if ($code) {
231 $self->{'_id_parser'} = $code;
233 return $self->{'_id_parser'} || \&default_id_parser;
236 =head2 default_id_parser
238 Title : default_id_parser
239 Usage : $id = default_id_parser( $header )
240 Function: The default Blast Query ID parser for Bio::Index::Blast.pm
241 Returns $1 from applying the regexp /^>\s*(\S+)/
242 to $header.
243 Returns : ID string
244 Args : a header line string
246 =cut
248 sub default_id_parser
250 if ($_[0] =~ /^\s*(\S+)/) {
251 return $1;
252 } else {
253 return;
257 =head2 Require methods from Bio::Index::Abstract
259 =cut
261 =head2 _index_file
263 Title : _index_file
264 Usage : $index->_index_file( $file_name, $i )
265 Function: Specialist function to index HMMER report file(s).
266 Is provided with a filename and an integer
267 by make_index in its SUPER class.
268 Example :
269 Returns :
270 Args :
272 =cut
275 sub _index_file {
276 my($self, $file, $i) = @_;
277 my($begin);
279 open my $HMMER, '<', $file or $self->throw("Could not read file '$file': $!");
281 my $id;
282 my $indexpoint = 0;
284 while(<$HMMER>) {
285 if( /Query sequence: ([^\s]+)/o ) {
286 $indexpoint = tell($HMMER);
287 foreach my $id ($self->id_parser()->($1)) {
288 print "id is $id, begin is $indexpoint\n" if $self->verbose() > 0;
289 $self->add_record($id, $i, $indexpoint);
293 close $HMMER;
294 return 1;
297 =head2 Bio::Index::Abstract methods
299 =cut
301 =head2 filename
303 Title : filename
304 Usage : $value = $self->filename();
305 $self->filename($value);
306 Function: Gets or sets the name of the dbm index file.
307 Returns : The current value of filename
308 Args : Value of filename if setting, or none if
309 getting the value.
311 =head2 write_flag
313 Title : write_flag
314 Usage : $value = $self->write_flag();
315 $self->write_flag($value);
316 Function: Gets or sets the value of write_flag, which
317 is whether the dbm file should be opened with
318 write access.
319 Returns : The current value of write_flag (default 0)
320 Args : Value of write_flag if setting, or none if
321 getting the value.
323 =head2 dbm_package
325 Usage : $value = $self->dbm_package();
326 $self->dbm_package($value);
328 Function: Gets or sets the name of the Perl dbm module used.
329 If the value is unset, then it returns the value of
330 the package variable $USE_DBM_TYPE or if that is
331 unset, then it chooses the best available dbm type,
332 choosing 'DB_File' in preference to 'SDBM_File'.
333 Bio::Abstract::Index may work with other dbm file
334 types.
336 Returns : The current value of dbm_package
337 Args : Value of dbm_package if setting, or none if
338 getting the value.
341 =head2 get_stream
343 Title : get_stream
344 Usage : $stream = $index->get_stream( $id );
345 Function: Returns a file handle with the file pointer
346 at the approprite place
348 This provides for a way to get the actual
349 file contents and not an object
351 WARNING: you must parse the record deliminter
352 *yourself*. Abstract won't do this for you
353 So this code
355 $fh = $index->get_stream($myid);
356 while( <$fh> ) {
357 # do something
359 will parse the entire file if you don't put in
360 a last statement in, like
362 while( <$fh> ) {
363 /^\/\// && last; # end of record
364 # do something
367 Returns : A filehandle object
368 Args : string represents the accession number
369 Notes : This method should not be used without forethought
372 =head2 open_dbm
374 Usage : $index->open_dbm()
375 Function: Opens the dbm file associated with the index
376 object. Write access is only given if explicitly
377 asked for by calling new(-write => 1) or having set
378 the write_flag(1) on the index object. The type of
379 dbm file opened is that returned by dbm_package().
380 The name of the file to be is opened is obtained by
381 calling the filename() method.
383 Example : $index->_open_dbm()
384 Returns : 1 on success
387 =head2 _version
389 Title : _version
390 Usage : $type = $index->_version()
391 Function: Returns a string which identifes the version of an
392 index module. Used to permanently identify an index
393 file as having been created by a particular version
394 of the index module. Must be provided by the sub class
395 Example :
396 Returns :
397 Args : none
399 =head2 _filename
401 Title : _filename
402 Usage : $index->_filename( FILE INT )
403 Function: Indexes the file
404 Example :
405 Returns :
406 Args :
408 =head2 _file_handle
410 Title : _file_handle
411 Usage : $fh = $index->_file_handle( INT )
412 Function: Returns an open filehandle for the file
413 index INT. On opening a new filehandle it
414 caches it in the @{$index->_filehandle} array.
415 If the requested filehandle is already open,
416 it simply returns it from the array.
417 Example : $fist_file_indexed = $index->_file_handle( 0 );
418 Returns : ref to a filehandle
419 Args : INT
421 =head2 _file_count
423 Title : _file_count
424 Usage : $index->_file_count( INT )
425 Function: Used by the index building sub in a sub class to
426 track the number of files indexed. Sets or gets
427 the number of files indexed when called with or
428 without an argument.
429 Example :
430 Returns : INT
431 Args : INT
434 =head2 add_record
436 Title : add_record
437 Usage : $index->add_record( $id, @stuff );
438 Function: Calls pack_record on @stuff, and adds the result
439 of pack_record to the index database under key $id.
440 If $id is a reference to an array, then a new entry
441 is added under a key corresponding to each element
442 of the array.
443 Example : $index->add_record( $id, $fileNumber, $begin, $end )
444 Returns : TRUE on success or FALSE on failure
445 Args : ID LIST
447 =head2 pack_record
449 Title : pack_record
450 Usage : $packed_string = $index->pack_record( LIST )
451 Function: Packs an array of scalars into a single string
452 joined by ASCII 034 (which is unlikely to be used
453 in any of the strings), and returns it.
454 Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end )
455 Returns : STRING or undef
456 Args : LIST
458 =head2 unpack_record
460 Title : unpack_record
461 Usage : $index->unpack_record( STRING )
462 Function: Splits the sting provided into an array,
463 splitting on ASCII 034.
464 Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} )
465 Returns : A 3 element ARRAY
466 Args : STRING containing ASCII 034
468 =head2 DESTROY
470 Title : DESTROY
471 Usage : Called automatically when index goes out of scope
472 Function: Closes connection to database and handles to
473 sequence files
474 Returns : NEVER
475 Args : NONE
478 =cut