1 # $Id: HNN.pm,v 1.0 2003/07/ 11
3 # BioPerl module for Bio::Tools::Analysis::Protein::HNN
5 # Copyright Richard Adams
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
14 Bio::Tools::Analysis::Protein::HNN - a wrapper around the HNN protein
15 secondary structure prediction server
19 use Bio::Tools::Analysis::Protein::HNN;
20 #get a Bio::Seq or Bio::PrimarySeq
22 my $seq = Bio::PrimarySeq->new
23 (-seq=>'IKLCVHHJHJHJHJHJHJHNLAILAKAHLIELALAL',
24 -primary_id=>'test'); # a Bio::PrimarySeqI object
26 my $hnn = Bio::Tools::Analysis::Protein::HNN->new (-seq=>$seq);
28 print $hnn->result;# #raw text to standard error
32 A module to remotely retrieve predictions of protein secondary
33 structure. Each residue in the protein receives a score representing
34 the likelihood of existing in each of three different states (helix,
37 my $analysis_object = Bio::Tools::SimpleAnalysis::Protein::HNN->new
42 $analysis_object->run;
44 submits the query to the server and obtains raw text output.
46 Given an amino acid sequence the results can be obtained in 4 formats,
47 determined by the argument to the result method:
53 The raw text of the program output.
55 my $rawdata = $analysis_object->result;
59 A reference to an array of hashes of scores for each state and the
62 my $data_ref = $analysis_object->result('parsed');
63 print "score for helix at residue 2 is $data_ref->[1]{'helix'}\n";
64 print "predicted struc at residue 2 is $data_ref->[1]{'struc}\n";
68 An array of Bio::SeqFeature::Generic objects where each feature is a
69 predicted unit of secondary structure. Only stretches of helix/sheet
70 predictions for longer than 4 residues are defined as helices.
72 my @fts = $analysis_object->result(Bio::SeqFeatureI);
74 print " From ", $ft->start, " to ",$ft->end, " struc: " ,
75 ($ft->each_tag_value('type'))[0] ,"\n";
80 A Bio::Seq::Meta::Array implementing sequence.
82 This is a Bio::Seq object that can also hold data about each residue
83 in the sequence In this case, the sequence can be associated with a
84 single array of HNN prediction scores. e.g.,
86 my $meta_sequence = $analysis_object->result('meta');
88 print "helix scores from residues 10-20 are ",
89 $meta_sequence->named_submeta_text("HNN_helix",10,20), "\n";
91 Meta sequence default names are : HNN_helix, HNN_sheet, HNN_coil,
92 HNN_struc, representing the scores for each residue.
94 Many methods common to all analyses are inherited from
95 L<Bio::Tools::Analysis::SimpleAnalysisBase>.
101 L<Bio::SimpleAnalysisI>,
102 L<Bio::Tools::Analysis::SimpleAnalysisBase>,
103 L<Bio::Seq::Meta::Array>,
110 User feedback is an integral part of the evolution of this and other
111 Bioperl modules. Send your comments and suggestions preferably to one
112 of the Bioperl mailing lists. Your participation is much appreciated.
114 bioperl-l@bioperl.org - General discussion
115 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
119 Please direct usage questions or support issues to the mailing list:
121 I<bioperl-l@bioperl.org>
123 rather than to the module maintainer directly. Many experienced and
124 reponsive experts will be able look at the problem and quickly
125 address it. Please include a thorough description of the problem
126 with code and data examples if at all possible.
128 =head2 Reporting Bugs
130 Report bugs to the Bioperl bug tracking system to help us keep track
131 the bugs and their resolution. Bug reports can be submitted via the
134 https://github.com/bioperl/bioperl-live/issues
138 Richard Adams, Richard.Adams@ed.ac.uk,
142 The rest of the documentation details each of the object
143 methods. Internal methods are usually preceded with a _
150 package Bio
::Tools
::Analysis
::Protein
::HNN
;
154 use HTTP
::Request
::Common qw
(POST
);
155 use Bio
::SeqFeature
::Generic
;
156 use Bio
::Seq
::Meta
::Array
;
157 $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME
} = 0;
159 use base
qw(Bio::Tools::Analysis::SimpleAnalysisBase);
161 #extends array for 2struc.
163 my $URL = 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_hnn.pl';
164 my $ANALYSIS_NAME= 'HNN';
165 my $ANALYSIS_SPEC= {name
=> 'HNN', type
=> 'Protein'};
167 { mandatory
=> 'true',
168 type
=> 'Bio::PrimarySeqI',
174 '' => 'bulk', # same as undef
175 'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
176 raw
=> '[ {helix=>, sheet=>, struc=>, coil=>}]',
177 meta
=> 'Bio::Seq::Meta::Array object',
179 use constant MIN_STRUC_LEN
=> 3;
184 $self->{'_ANALYSIS_SPEC'} = $ANALYSIS_SPEC;
185 $self->{'_INPUT_SPEC'} = $INPUT_SPEC;
186 $self->{'_RESULT_SPEC'} = $RESULT_SPEC;
187 $self->{'_ANALYSIS_NAME'} = $ANALYSIS_NAME;
195 # delay repeated calls by default by 3 sec, set delay() to change
197 $self->status('TERMINATED_BY_ERROR');
198 my $request = POST
'https://npsa-prabi.ibcp.fr/cgi-bin/secpred_hnn.pl',
199 Content_Type
=> 'form-data',
202 notice
=> $self->seq->seq,
206 my $text = $self->request($request)->content;
208 my ($next) = $text =~ /Prediction.*?=(.*?)>/;
210 my $out = "http://npsa-pbil.ibcp.fr/".$next;
211 my $req2 = HTTP
::Request
->new(GET
=>$out);
212 my $resp2 = $self->request ($req2);
213 $self->status('COMPLETED') if $resp2 ne '';
214 $self->{'_result'} = $resp2->content;
222 Usage : $job->result (...)
223 Returns : a result created by running an analysis
224 Args : see keys of $INPUT_SPEC
226 The method returns a result of an executed job. If the job was
227 terminated by an error the result may contain an error message instead
230 This implementation returns differently processed data depending on
237 Returns the raw ASCII data stream but without HTML tags.
239 =item 'Bio::SeqFeatureI'
241 The argument string defines the type of bioperl objects returned in an
242 array. The objects are L<Bio::SeqFeature::Generic>. Feature primary
243 tag is "2ary". Feature tags are "type" (which can be helix, sheet or
244 coil) "method" (HNN).
248 Array of hash references of scores/structure assignations { helix =E<gt>,
249 sheet =E<gt> , coil =E<gt> , struc=E<gt>}.
253 A Bio::Seq::Meta::Array object. Scores can be accessed using methods
254 from this class. Meta sequence names are HNN_helix, HNN_sheet,
264 my ($self,$value) = @_;
270 #parse into basic raw form, store this as well as '_result'
271 if (!exists($self->{'_parsed'}) ) {
272 my $result = IO
::String
->new($self->{'_result'});
273 while (my $line = <$result>) {
274 next unless $line =~ /^[HEC]\s/; # or for sopma/hnn /^[A-Z]\s/
275 $line =~/^([A-Z])\s+(\d+)\s+(\d+)\s+(\d+)/; # or for so
276 push @scores, { struc
=> $1,
282 $self->{'_parsed'} = \
@scores;
284 if ($value eq 'Bio::SeqFeatureI') {
285 $self->_get_2ary_coords();
286 for my $type (keys %{$self->{'_parsed_coords'}} ) {
287 next if $type =~ /\w{2,}/; #if not H,C,E or T
288 for my $loc (@
{$self->{'_parsed_coords'}{$type}} ) {
289 push @fts, Bio
::SeqFeature
::Generic
->new
290 (-start
=> $loc->{'start'},
291 -end
=> $loc->{'end'},
293 -primary
=> 'Domain',
296 method
=> $self->analysis_name,
298 } #end of array of strucs of type
299 } # end of all 2nd struc elements
300 delete $self->{'_parsed_coords'}; #remove temp data
302 } #endif BioSeqFeature
304 elsif ($value eq 'meta') {
305 #1st of all make 3 or 4 arrays of scores for each type from column data
307 for my $aa (@
{$self->{'_parsed'}}) {
308 push @
{$type_scores{'struc'}}, $aa->{'struc'};
309 push @
{$type_scores{'helix'}}, $aa->{'helix'};
310 push @
{$type_scores{'sheet'}}, $aa->{'sheet'};
311 push @
{$type_scores{'coil'}}, $aa->{'coil'};
314 ## bless as metasequence if necessary
315 if (!$self->seq->isa("Bio::Seq::MetaI")) {
316 bless ($self->seq, "Bio::Seq::Meta::Array");
318 $self->seq->isa("Bio::Seq::MetaI")
319 || $self->throw("$self is not a Bio::Seq::MetaI");
321 ## now make meta sequence
322 $Bio::Seq
::Meta
::Array
::DEFAULT_NAME
= 'HNN_struc';
323 for my $struc_type (keys %type_scores) {
324 my $meta_name = "HNN". "_" . "$struc_type";
325 my @meta = map{$_->{$struc_type}} @
{$self->{'_parsed'}};
326 if (grep{$_ eq $meta_name}$self->seq->meta_names ) {
327 $self->warn ("$meta_name already exists , not overwriting!");
330 $self->seq->named_meta($meta_name,\
@meta );
332 # return seq array object implementing meta sequence #
336 ## else for aa true value get data structure back ##
338 return $self->{'_parsed'};
342 #return raw result if no return fomrt stated
343 return $self->{'_result'};
347 sub _get_2ary_coords
{
348 #helper sub for result;
349 ##extracts runs of structure > MIN_STRUC_LENresidues or less if Turn:
350 #i.e., helical prediction for 1 residue isn't very meaningful...
351 ## and poulates array of hashes with start/end values.
352 #could be put into a secondary base class if need be
354 my @prot = @
{$self->{'_parsed'}};
356 for (my $index = 0; $index <= $#prot; $index++) {
357 my $type = $prot[$index]{'struc'};
358 next unless $type =~ /[HTCE]/;
360 for (my $j = $index + 1; $j <= $#prot; $j++) {
361 my $test = $prot[$j];
362 if ($test->{'struc'} eq $type) {
364 } elsif ( $length > MIN_STRUC_LEN
||
365 ($length <= MIN_STRUC_LEN
&& $type eq 'T') ) {
366 push @
{$Result{$type}}, {start
=> $index + 1 , end
=> $j};
367 $index += $length -1;
370 $index += $length - 1;
375 $self->{'_parsed_coords'} = \
%Result; #temp assignment