t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / Bio / Tools / Analysis / Protein / HNN.pm
blob936756a96e9cfff15dd9265d5a3d3e9a71dfb557
1 # $Id: HNN.pm,v 1.0 2003/07/ 11
3 # BioPerl module for Bio::Tools::Analysis::Protein::HNN
5 # Copyright Richard Adams
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
12 =head1 NAME
14 Bio::Tools::Analysis::Protein::HNN - a wrapper around the HNN protein
15 secondary structure prediction server
17 =head1 SYNOPSIS
19 use Bio::Tools::Analysis::Protein::HNN;
20 #get a Bio::Seq or Bio::PrimarySeq
21 use Bio::PrimarySeq;
22 my $seq = Bio::PrimarySeq->new
23 (-seq=>'IKLCVHHJHJHJHJHJHJHNLAILAKAHLIELALAL',
24 -primary_id=>'test'); # a Bio::PrimarySeqI object
26 my $hnn = Bio::Tools::Analysis::Protein::HNN->new (-seq=>$seq);
27 $hnn->run;
28 print $hnn->result;# #raw text to standard error
30 =head1 DESCRIPTION
32 A module to remotely retrieve predictions of protein secondary
33 structure. Each residue in the protein receives a score representing
34 the likelihood of existing in each of three different states (helix,
35 coil or sheet), e.g.:
37 my $analysis_object = Bio::Tools::SimpleAnalysis::Protein::HNN->new
38 (-seq => $seq);
40 creates a new object
42 $analysis_object->run;
44 submits the query to the server and obtains raw text output.
46 Given an amino acid sequence the results can be obtained in 4 formats,
47 determined by the argument to the result method:
49 =over 4
51 =item 1
53 The raw text of the program output.
55 my $rawdata = $analysis_object->result;
57 =item 2
59 A reference to an array of hashes of scores for each state and the
60 assigned state.
62 my $data_ref = $analysis_object->result('parsed');
63 print "score for helix at residue 2 is $data_ref->[1]{'helix'}\n";
64 print "predicted struc at residue 2 is $data_ref->[1]{'struc}\n";
66 =item 3
68 An array of Bio::SeqFeature::Generic objects where each feature is a
69 predicted unit of secondary structure. Only stretches of helix/sheet
70 predictions for longer than 4 residues are defined as helices.
72 my @fts = $analysis_object->result(Bio::SeqFeatureI);
73 for my $ft (@fts) {
74 print " From ", $ft->start, " to ",$ft->end, " struc: " ,
75 ($ft->each_tag_value('type'))[0] ,"\n";
78 =item 4
80 A Bio::Seq::Meta::Array implementing sequence.
82 This is a Bio::Seq object that can also hold data about each residue
83 in the sequence In this case, the sequence can be associated with a
84 single array of HNN prediction scores. e.g.,
86 my $meta_sequence = $analysis_object->result('meta');
88 print "helix scores from residues 10-20 are ",
89 $meta_sequence->named_submeta_text("HNN_helix",10,20), "\n";
91 Meta sequence default names are : HNN_helix, HNN_sheet, HNN_coil,
92 HNN_struc, representing the scores for each residue.
94 Many methods common to all analyses are inherited from
95 L<Bio::Tools::Analysis::SimpleAnalysisBase>.
97 =back
99 =head1 SEE ALSO
101 L<Bio::SimpleAnalysisI>,
102 L<Bio::Tools::Analysis::SimpleAnalysisBase>,
103 L<Bio::Seq::Meta::Array>,
104 L<Bio::WebAgent>
106 =head1 FEEDBACK
108 =head2 Mailing Lists
110 User feedback is an integral part of the evolution of this and other
111 Bioperl modules. Send your comments and suggestions preferably to one
112 of the Bioperl mailing lists. Your participation is much appreciated.
114 bioperl-l@bioperl.org - General discussion
115 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
117 =head2 Support
119 Please direct usage questions or support issues to the mailing list:
121 I<bioperl-l@bioperl.org>
123 rather than to the module maintainer directly. Many experienced and
124 reponsive experts will be able look at the problem and quickly
125 address it. Please include a thorough description of the problem
126 with code and data examples if at all possible.
128 =head2 Reporting Bugs
130 Report bugs to the Bioperl bug tracking system to help us keep track
131 the bugs and their resolution. Bug reports can be submitted via the
132 web:
134 https://github.com/bioperl/bioperl-live/issues
136 =head1 AUTHORS
138 Richard Adams, Richard.Adams@ed.ac.uk,
140 =head1 APPENDIX
142 The rest of the documentation details each of the object
143 methods. Internal methods are usually preceded with a _
145 =cut
148 use strict;
150 package Bio::Tools::Analysis::Protein::HNN;
152 use IO::String;
153 use Bio::SeqIO;
154 use HTTP::Request::Common qw (POST);
155 use Bio::SeqFeature::Generic;
156 use Bio::Seq::Meta::Array;
157 $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;
159 use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
161 #extends array for 2struc.
163 my $URL = 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_hnn.pl';
164 my $ANALYSIS_NAME= 'HNN';
165 my $ANALYSIS_SPEC= {name => 'HNN', type => 'Protein'};
166 my $INPUT_SPEC = [
167 { mandatory => 'true',
168 type => 'Bio::PrimarySeqI',
169 'name' => 'seq',
172 my $RESULT_SPEC =
174 '' => 'bulk', # same as undef
175 'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
176 raw => '[ {helix=>, sheet=>, struc=>, coil=>}]',
177 meta => 'Bio::Seq::Meta::Array object',
179 use constant MIN_STRUC_LEN => 3;
181 sub _init {
182 my $self = shift;
183 $self->url($URL);
184 $self->{'_ANALYSIS_SPEC'} = $ANALYSIS_SPEC;
185 $self->{'_INPUT_SPEC'} = $INPUT_SPEC;
186 $self->{'_RESULT_SPEC'} = $RESULT_SPEC;
187 $self->{'_ANALYSIS_NAME'} = $ANALYSIS_NAME;
188 return $self;
192 sub _run {
193 my $self = shift;
194 $self->delay(1);
195 # delay repeated calls by default by 3 sec, set delay() to change
196 $self->sleep;
197 $self->status('TERMINATED_BY_ERROR');
198 my $request = POST 'https://npsa-prabi.ibcp.fr/cgi-bin/secpred_hnn.pl',
199 Content_Type => 'form-data',
200 Content => [
201 title => "",
202 notice => $self->seq->seq,
203 ali_width => 70,
206 my $text = $self->request($request)->content;
207 return unless $text;
208 my ($next) = $text =~ /Prediction.*?=(.*?)>/;
209 return unless $next;
210 my $out = "http://npsa-pbil.ibcp.fr/".$next;
211 my $req2 = HTTP::Request->new(GET=>$out);
212 my $resp2 = $self->request ($req2);
213 $self->status('COMPLETED') if $resp2 ne '';
214 $self->{'_result'} = $resp2->content;
215 return $self;
219 =head2 result
221 NAme : result
222 Usage : $job->result (...)
223 Returns : a result created by running an analysis
224 Args : see keys of $INPUT_SPEC
226 The method returns a result of an executed job. If the job was
227 terminated by an error the result may contain an error message instead
228 of the real data.
230 This implementation returns differently processed data depending on
231 argument:
233 =over 3
235 =item undef
237 Returns the raw ASCII data stream but without HTML tags.
239 =item 'Bio::SeqFeatureI'
241 The argument string defines the type of bioperl objects returned in an
242 array. The objects are L<Bio::SeqFeature::Generic>. Feature primary
243 tag is "2ary". Feature tags are "type" (which can be helix, sheet or
244 coil) "method" (HNN).
246 =item 'parsed'
248 Array of hash references of scores/structure assignations { helix =E<gt>,
249 sheet =E<gt> , coil =E<gt> , struc=E<gt>}.
251 =item 'all'
253 A Bio::Seq::Meta::Array object. Scores can be accessed using methods
254 from this class. Meta sequence names are HNN_helix, HNN_sheet,
255 HNN_coil, HNN_struc.
257 =back
260 =cut
263 sub result {
264 my ($self,$value) = @_;
266 my @scores;
267 my @fts;
269 if ($value ) {
270 #parse into basic raw form, store this as well as '_result'
271 if (!exists($self->{'_parsed'}) ) {
272 my $result = IO::String->new($self->{'_result'});
273 while (my $line = <$result>) {
274 next unless $line =~ /^[HEC]\s/; # or for sopma/hnn /^[A-Z]\s/
275 $line =~/^([A-Z])\s+(\d+)\s+(\d+)\s+(\d+)/; # or for so
276 push @scores, { struc => $1,
277 helix => $2,
278 sheet => $3,
279 coil => $4,
282 $self->{'_parsed'} = \@scores;
284 if ($value eq 'Bio::SeqFeatureI') {
285 $self->_get_2ary_coords();
286 for my $type (keys %{$self->{'_parsed_coords'}} ) {
287 next if $type =~ /\w{2,}/; #if not H,C,E or T
288 for my $loc (@{$self->{'_parsed_coords'}{$type}} ) {
289 push @fts, Bio::SeqFeature::Generic->new
290 (-start => $loc->{'start'},
291 -end => $loc->{'end'},
292 -source => 'HNN',
293 -primary => 'Domain',
294 -tag => {
295 type => $type,
296 method => $self->analysis_name,
298 } #end of array of strucs of type
299 } # end of all 2nd struc elements
300 delete $self->{'_parsed_coords'}; #remove temp data
301 return @fts;
302 } #endif BioSeqFeature
304 elsif ($value eq 'meta') {
305 #1st of all make 3 or 4 arrays of scores for each type from column data
306 my %type_scores;
307 for my $aa (@{$self->{'_parsed'}}) {
308 push @{$type_scores{'struc'}}, $aa->{'struc'};
309 push @{$type_scores{'helix'}}, $aa->{'helix'};
310 push @{$type_scores{'sheet'}}, $aa->{'sheet'};
311 push @{$type_scores{'coil'}}, $aa->{'coil'};
314 ## bless as metasequence if necessary
315 if (!$self->seq->isa("Bio::Seq::MetaI")) {
316 bless ($self->seq, "Bio::Seq::Meta::Array");
318 $self->seq->isa("Bio::Seq::MetaI")
319 || $self->throw("$self is not a Bio::Seq::MetaI");
321 ## now make meta sequence
322 $Bio::Seq::Meta::Array::DEFAULT_NAME = 'HNN_struc';
323 for my $struc_type (keys %type_scores) {
324 my $meta_name = "HNN". "_" . "$struc_type";
325 my @meta = map{$_->{$struc_type}} @{$self->{'_parsed'}};
326 if (grep{$_ eq $meta_name}$self->seq->meta_names ) {
327 $self->warn ("$meta_name already exists , not overwriting!");
328 next;
330 $self->seq->named_meta($meta_name,\@meta );
332 # return seq array object implementing meta sequence #
333 return $self->seq;
336 ## else for aa true value get data structure back ##
337 else {
338 return $self->{'_parsed'};
340 } #endif ($value)
342 #return raw result if no return fomrt stated
343 return $self->{'_result'};
347 sub _get_2ary_coords {
348 #helper sub for result;
349 ##extracts runs of structure > MIN_STRUC_LENresidues or less if Turn:
350 #i.e., helical prediction for 1 residue isn't very meaningful...
351 ## and poulates array of hashes with start/end values.
352 #could be put into a secondary base class if need be
353 my ($self) = @_;
354 my @prot = @{$self->{'_parsed'}};
355 my %Result;
356 for (my $index = 0; $index <= $#prot; $index++) {
357 my $type = $prot[$index]{'struc'};
358 next unless $type =~ /[HTCE]/;
359 my $length = 1;
360 for (my $j = $index + 1; $j <= $#prot; $j++) {
361 my $test = $prot[$j];
362 if ($test->{'struc'} eq $type) {
363 $length++;
364 } elsif ( $length > MIN_STRUC_LEN ||
365 ($length <= MIN_STRUC_LEN && $type eq 'T') ) {
366 push @{$Result{$type}}, {start => $index + 1 , end => $j};
367 $index += $length -1;
368 last;
369 } else {
370 $index += $length - 1;
371 last;
375 $self->{'_parsed_coords'} = \%Result; #temp assignment