Bio::DB::TFBS namespace has been moved to its own distribution named after itself
[bioperl-live.git] / Bio / Search / HSP / HmmpfamHSP.pm
blobbab8cb957dbc1fdc8e149325cf153dc26189e841
2 # BioPerl module for Bio::Search::HSP::HmmpfamHSP
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
8 # Copyright Sendu Bala
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Search::HSP::HmmpfamHSP - A parser and HSP object for hmmpfam hsps
18 =head1 SYNOPSIS
20 # generally we use Bio::SearchIO to build these objects
21 use Bio::SearchIO;
22 my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
23 -file => 'result.hmmer');
25 while (my $result = $in->next_result) {
26 while (my $hit = $result->next_hit) {
27 print $hit->name, "\n";
28 print $hit->score, "\n";
29 print $hit->significance, "\n";
31 while (my $hsp = $hit->next_hsp) {
32 # process HSPI objects
37 =head1 DESCRIPTION
39 This object implements a parser for hmmpfam hsp output, a program in the HMMER
40 package.
42 =head1 FEEDBACK
44 =head2 Mailing Lists
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to
48 the Bioperl mailing list. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 =head2 Support
55 Please direct usage questions or support issues to the mailing list:
57 I<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
64 =head2 Reporting Bugs
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 of the bugs and their resolution. Bug reports can be submitted via the
68 web:
70 https://github.com/bioperl/bioperl-live/issues
72 =head1 AUTHOR - Sendu Bala
74 Email bix@sendu.me.uk
76 =head1 APPENDIX
78 The rest of the documentation details each of the object methods.
79 Internal methods are usually preceded with a _
81 =cut
83 # Let the code begin...
85 package Bio::Search::HSP::HmmpfamHSP;
87 use strict;
88 use base qw(Bio::Search::HSP::PullHSPI);
90 =head2 new
92 Title : new
93 Usage : my $obj = Bio::Search::HSP::HmmpfamHSP->new();
94 Function: Builds a new Bio::Search::HSP::HmmpfamHSP object.
95 Returns : Bio::Search::HSP::HmmpfamHSP
96 Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
97 -parent => Bio::PullParserI object (required if no -chunk)
98 -hsp_data => array ref with [rank query_start query_end hit_start
99 hit_end score evalue]
101 where the array ref provided to -chunk contains an IO object
102 for a filehandle to something representing the raw data of the
103 hsp, and $start and $end define the tell() position within the
104 filehandle that the hsp data starts and ends (optional; defaults
105 to start and end of the entire thing described by the filehandle)
107 =cut
109 sub new {
110 my ($class, @args) = @_;
111 my $self = $class->SUPER::new(@args);
113 $self->_setup(@args);
115 my $fields = $self->_fields;
116 foreach my $field (qw( alignment )) {
117 $fields->{$field} = undef;
120 my $hsp_data = $self->_raw_hsp_data;
121 if ($hsp_data && ref($hsp_data) eq 'ARRAY') {
122 my @hsp_data = @{$hsp_data}; # don't alter the reference
123 foreach my $field (qw(rank query_start query_end hit_start hit_end score evalue)) {
124 $fields->{$field} = shift(@hsp_data);
128 $self->_dependencies( { ( query_string => 'alignment',
129 hit_string => 'alignment',
130 homology_string => 'alignment',
131 hit_identical_inds => 'seq_inds',
132 hit_conserved_inds => 'seq_inds',
133 hit_nomatch_inds => 'seq_inds',
134 hit_gap_inds => 'seq_inds',
135 query_identical_inds => 'seq_inds',
136 query_conserved_inds => 'seq_inds',
137 query_nomatch_inds => 'seq_inds',
138 query_gap_inds => 'seq_inds' ) } );
140 return $self;
144 # PullParserI discovery methods so we can answer all HitI questions
147 sub _discover_alignment {
148 my $self = shift;
149 my $alignments_hash = $self->get_field('alignments');
151 my $identifier = $self->get_field('name').'~~~~'.$self->get_field('rank');
153 while (! defined $alignments_hash->{$identifier}) {
154 last unless $self->parent->parent->_next_alignment;
156 my $alignment = $alignments_hash->{$identifier};
158 if ($alignment) {
159 # work out query, hit and homology strings, and some stats
160 # (quicker to do this all at once instead of each method working on
161 # $alignment string itself)
163 my ($query_string, $hit_string, $homology_string);
164 while ($alignment =~ /\s+(\S+)\n\s+(\S.+)\n\s+\S+\s+\d+\s+(\S+)\s+\d/gm) {
165 my $hi = $1;
166 my $ho = $2;
167 $query_string .= $3;
169 $hi =~ s/\*\-\>//;
170 $ho = ' 'x(length($hi) - length($ho)).$ho;
171 $hi =~ s/\<\-\*//;
173 $hit_string .= $hi;
174 $homology_string .= $ho;
177 $self->_fields->{query_string} = $query_string;
178 $self->_fields->{hit_string} = $hit_string;
179 $homology_string =~ s/ $//;
180 $self->_fields->{homology_string} = $homology_string;
182 ($self->{_query_gaps}) = $query_string =~ tr/-//;
183 ($self->{_hit_gaps}) = $hit_string =~ tr/.//;
184 ($self->{_total_gaps}) = $self->{_query_gaps} + $self->{_hit_gaps};
187 $self->_fields->{alignment} = 1; # stop this method being called again
190 # seq_inds related methods, all just need seq_inds field to have been gotten
191 sub _discover_seq_inds {
192 my $self = shift;
193 my ($seqString, $qseq, $sseq) = ( $self->get_field('homology_string'),
194 $self->get_field('query_string'),
195 $self->get_field('hit_string') );
197 # (code largely lifted from GenericHSP)
199 # Using hashes to avoid saving duplicate residue numbers.
200 my %identicalList_query = ();
201 my %identicalList_sbjct = ();
202 my %conservedList_query = ();
203 my %conservedList_sbjct = ();
204 my @gapList_query = ();
205 my @gapList_sbjct = ();
206 my %nomatchList_query = ();
207 my %nomatchList_sbjct = ();
209 my $resCount_query = $self->get_field('query_end');
210 my $resCount_sbjct = $self->get_field('hit_end');
212 my ($mchar, $schar, $qchar);
213 while ($mchar = chop($seqString) ) {
214 ($qchar, $schar) = (chop($qseq), chop($sseq));
216 if ($mchar eq '+' || $mchar eq '.' || $mchar eq ':') {
217 $conservedList_query{ $resCount_query } = 1;
218 $conservedList_sbjct{ $resCount_sbjct } = 1;
220 elsif ($mchar eq ' ') {
221 $nomatchList_query{ $resCount_query } = 1;
222 $nomatchList_sbjct{ $resCount_sbjct } = 1;
224 else {
225 $identicalList_query{ $resCount_query } = 1;
226 $identicalList_sbjct{ $resCount_sbjct } = 1;
229 if ($qchar eq '-') {
230 push(@gapList_query, $resCount_query);
232 else {
233 $resCount_query -= 1;
235 if ($schar eq '.') {
236 push(@gapList_sbjct, $resCount_sbjct);
238 else {
239 $resCount_sbjct -= 1;
243 my $fields = $self->_fields;
244 $fields->{hit_identical_inds} = [ sort { $a <=> $b } keys %identicalList_sbjct ];
245 $fields->{hit_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_sbjct ];
246 $fields->{hit_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_sbjct ];
247 $fields->{hit_gap_inds} = [ reverse @gapList_sbjct ];
248 $fields->{query_identical_inds} = [ sort { $a <=> $b } keys %identicalList_query ];
249 $fields->{query_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_query ];
250 $fields->{query_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_query ];
251 $fields->{query_gap_inds} = [ reverse @gapList_query ];
253 $fields->{seq_inds} = 1;
256 =head2 query
258 Title : query
259 Usage : my $query = $hsp->query
260 Function: Returns a SeqFeature representing the query in the HSP
261 Returns : L<Bio::SeqFeature::Similarity>
262 Args : none
264 =cut
266 sub query {
267 my $self = shift;
268 unless ($self->{_created_query}) {
269 $self->SUPER::query( new Bio::SeqFeature::Similarity
270 ('-primary' => $self->primary_tag,
271 '-start' => $self->get_field('query_start'),
272 '-end' => $self->get_field('query_end'),
273 '-expect' => $self->get_field('evalue'),
274 '-score' => $self->get_field('score'),
275 '-strand' => 1,
276 '-seq_id' => $self->get_field('query_name'),
277 #'-seqlength'=> $self->get_field('query_length'), (not known)
278 '-source' => $self->get_field('algorithm'),
279 '-seqdesc' => $self->get_field('query_description')
280 ) );
281 $self->{_created_query} = 1;
283 return $self->SUPER::query(@_);
286 =head2 hit
288 Title : hit
289 Usage : my $hit = $hsp->hit
290 Function: Returns a SeqFeature representing the hit in the HSP
291 Returns : L<Bio::SeqFeature::Similarity>
292 Args : [optional] new value to set
294 =cut
296 sub hit {
297 my $self = shift;
298 unless ($self->{_created_hit}) {
299 # the full length isn't always known (given in the report), but don't
300 # warn about the missing info all the time
301 my $verbose = $self->parent->parent->parent->verbose;
302 $self->parent->parent->parent->verbose(-1);
303 my $seq_length = $self->get_field('length');
304 $self->parent->parent->parent->verbose($verbose);
306 $self->SUPER::hit( new Bio::SeqFeature::Similarity
307 ('-primary' => $self->primary_tag,
308 '-start' => $self->get_field('hit_start'),
309 '-end' => $self->get_field('hit_end'),
310 '-expect' => $self->get_field('evalue'),
311 '-score' => $self->get_field('score'),
312 '-strand' => 1,
313 '-seq_id' => $self->get_field('name'),
314 $seq_length ? ('-seqlength' => $seq_length) : (),
315 '-source' => $self->get_field('algorithm'),
316 '-seqdesc' => $self->get_field('description')
317 ) );
318 $self->{_created_hit} = 1;
320 return $self->SUPER::hit(@_);
323 =head2 gaps
325 Title : gaps
326 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
327 Function : Get the number of gaps in the query, hit, or total alignment.
328 Returns : Integer, number of gaps or 0 if none
329 Args : 'query' = num conserved / length of query seq (without gaps)
330 'hit' = num conserved / length of hit seq (without gaps)
331 'total' = num conserved / length of alignment (with gaps)
332 default = 'total'
334 =cut
336 sub gaps {
337 my ($self, $type) = @_;
339 $type = lc $type if defined $type;
340 $type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/);
341 $type = 'hit' if $type =~ /sbjct|subject/;
343 $self->get_field('alignment'); # make sure gaps have been calculated
345 return $self->{'_'.$type.'_gaps'};
348 =head2 pvalue
350 Title : pvalue
351 Usage : my $pvalue = $hsp->pvalue();
352 Function: Returns the P-value for this HSP
353 Returns : undef (Hmmpfam reports do not have p-values)
354 Args : none
356 =cut
358 # noop
359 sub pvalue { }