2 # BioPerl module for Bio::Tools::Protparam
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Richard Dobson, r.j.dobson at qmul dot ac dot uk
8 # Copyright Richard Dobson
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Tools::Protparam - submit to and parse output from protparam ;
21 use Bio::Tools::Protparam;
23 my $gb = new Bio::DB::GenBank(-retrievaltype => 'tempfile' ,
25 my @ids=qw(O14521 O43709 O43826);
26 my $seqio = $gb->get_Stream_by_acc(\@ids );
28 while( my $seq = $seqio->next_seq ) {
30 my $pp = Bio::Tools::Protparam->new(seq=>$seq->seq);
33 "ID : ", $seq->display_id,"\n",
34 "Amino acid number : ",$pp->amino_acid_number(),"\n",
35 "Number of negative amino acids : ",$pp->num_neg(),"\n",
36 "Number of positive amino acids : ",$pp->num_pos(),"\n",
37 "Molecular weight : ",$pp->molecular_weight(),"\n",
38 "Theoretical pI : ",$pp->theoretical_pI(),"\n",
39 "Total number of atoms : ", $pp->total_atoms(),"\n",
40 "Number of carbon atoms : ",$pp->num_carbon(),"\n",
41 "Number of hydrogen atoms : ",$pp->num_hydrogen(),"\n",
42 "Number of nitrogen atoms : ",$pp->num_nitro(),"\n",
43 "Number of oxygen atoms : ",$pp->num_oxygen(),"\n",
44 "Number of sulphur atoms : ",$pp->num_sulphur(),"\n",
45 "Half life : ", $pp->half_life(),"\n",
46 "Instability Index : ", $pp->instability_index(),"\n",
47 "Stability class : ", $pp->stability(),"\n",
48 "Aliphatic_index : ",$pp->aliphatic_index(),"\n",
49 "Gravy : ", $pp->gravy(),"\n",
50 "Composition of A : ", $pp->AA_comp('A'),"\n",
51 "Composition of R : ", $pp->AA_comp('R'),"\n",
52 "Composition of N : ", $pp->AA_comp('N'),"\n",
53 "Composition of D : ", $pp->AA_comp('D'),"\n",
54 "Composition of C : ", $pp->AA_comp('C'),"\n",
55 "Composition of Q : ", $pp->AA_comp('Q'),"\n",
56 "Composition of E : ", $pp->AA_comp('E'),"\n",
57 "Composition of G : ", $pp->AA_comp('G'),"\n",
58 "Composition of H : ", $pp->AA_comp('H'),"\n",
59 "Composition of I : ", $pp->AA_comp('I'),"\n",
60 "Composition of L : ", $pp->AA_comp('L'),"\n",
61 "Composition of K : ", $pp->AA_comp('K'),"\n",
62 "Composition of M : ", $pp->AA_comp('M'),"\n",
63 "Composition of F : ", $pp->AA_comp('F'),"\n",
64 "Composition of P : ", $pp->AA_comp('P'),"\n",
65 "Composition of S : ", $pp->AA_comp('S'),"\n",
66 "Composition of T : ", $pp->AA_comp('T'),"\n",
67 "Composition of W : ", $pp->AA_comp('W'),"\n",
68 "Composition of Y : ", $pp->AA_comp('Y'),"\n",
69 "Composition of V : ", $pp->AA_comp('V'),"\n",
70 "Composition of B : ", $pp->AA_comp('B'),"\n",
71 "Composition of Z : ", $pp->AA_comp('Z'),"\n",
72 "Composition of X : ", $pp->AA_comp('X'),"\n";
77 This module takes an amino acid sequence and submits it to the
78 Protparam program at www.expasy.org/cgi-bin/protparam. Many
79 properties of the submitted sequence are returned.
83 Richard Dobson, r.j.dobson at qmul dot ac dot uk
87 # Let the code begin...
89 package Bio
::Tools
::Protparam
;
92 use base
qw(Bio::Root::Root);
98 Usage : $pp = Protparam->new(seq=>$seq->seq);
99 Function : Creates a new Protparam object
100 Returns : A Protparam object
106 my ($class,@args) = @_;
107 @args=('-url'=>'http://web.expasy.org/cgi-bin/protparam/protparam','-form'=>'sequence',@args);
108 my $self=$class->SUPER::new
(@args);
110 my ($url,$seq,$form)=$self->_rearrange([qw(URL SEQ FORM)],@args);
112 my $browser = LWP
::UserAgent
->new;
115 #send request to PROTPARAM @ Expasy
116 $response = $browser->post($url,
120 'User-Agent' => 'Mozilla/4.76 [en] (Win2000; U)',
124 $self->throw("$url error: ".$response->status_line) unless $response->is_success;
125 $self->throw("Bad content type at $url ".$response->content_type) unless $response->content_type eq 'text/html';
127 my $protParamOutput=$response->decoded_content;
129 $self->{'output'}=$protParamOutput;
131 return bless $self,$class;
138 Usage : $pp->num_neg()
139 Function : Retrieves the number of negative amino acids in a sequence
140 Returns : Returns the number of negative amino acids in a sequence
151 ($self->{'negAA'})=$self->{'output'}=~/<B>Total number of negatively charged residues.*?<\/B
>\s
*(\d
*)/;
153 return $self->{'negAA'};
161 Usage : $pp->num_pos()
162 Function : Retrieves the number of positive amino acids in a sequence
163 Returns : Returns the number of positive amino acids in a sequence
171 ($self->{'posAA'})=$self->{'output'}=~/<B>Total number of positively charged residues.*?<\/B
>\s
*(\d
*)/;
172 return $self->{'posAA'};
175 =head2 amino_acid_number
177 Title : amino_acid_number
178 Usage : $pp->amino_acid_number()
179 Function : Retrieves the number of amino acids within a sequence
180 Returns : Returns the number of amino acids within a sequence
185 sub amino_acid_number
{
187 ($self->{'numAA'})=$self->{'output'}=~/<B>Number of amino acids:<\/B
> (\d
+)/;
188 return $self->{'numAA'};
194 Usage : $pp->total_atoms()
195 Function : Retrieves the total number of atoms within a sequence
196 Returns : Returns the total number of atoms within a sequence
204 $self->{'total_atoms'}=$self->{'output'}=~/<B>Total number of atoms:<\/B
>\s
*(\d
*)/;
205 return $self->{'total_atoms'};
208 =head2 molecular_weight
210 Title : molecular_weight
211 Usage : $pp->molecular_weight()
212 Function : Retrieves the molecular weight of a sequence
213 Returns : Returns the molecular weight of a sequence
219 sub molecular_weight
{
221 ($self->{'MolWt'})=$self->{'output'}=~/<B>Molecular weight:<\/B
> (\d
*\
.{0,1}\d
*)/;
222 return $self->{'MolWt'};
226 =head2 theoretical_pI
228 Title : theoretical_pI
229 Usage : $pp->theoretical_pI()
230 Function : Retrieve the theoretical pI for a sequence
231 Returns : Return the theoretical pI for a sequence
239 ($self->{'TpI'})=$self->{'output'}=~/<B>Theoretical pI:<\/B
> (-{0,1}\d
*\
.{0,1}\d
*)/;
240 return $self->{'TpI'};
246 Usage : $pp->num_carbon()
247 Function : Retrieves the number of carbon atoms in a sequence
248 Returns : Returns the number of carbon atoms in a sequence
256 ($self->{'car'}) = $self->{'output'}=~/Carbon\s+C\s+(\d+)/;
257 return $self->{'car'};
263 Usage : $pp->num_hydrogen
264 Function : Retrieves the number of hydrogen atoms in a sequence
265 Returns : Returns the number of hydrogen atoms in a sequence
273 ($self->{'hyd'}) = $self->{'output'}=~/Hydrogen\s+H\s+(\d+)/;
274 return $self->{'hyd'}
280 Usage : $pp->num_nitro
281 Function : Retrieves the number of nitrogen atoms in a sequence
282 Returns : Returns the number of nitrogen atoms in a sequence
290 ($self->{'nitro'}) = $self->{'output'}=~/Nitrogen\s+N\s+(\d+)/;
291 return $self->{'nitro'};
297 Usage : $pp->num_oxygen()
298 Function : Retrieves the number of oxygen atoms in a sequence
299 Returns : Returns the number of oxygen atoms in a sequence
307 ($self->{'oxy'}) = $self->{'output'}=~/Oxygen\s+O\s+(\d+)/;
308 return $self->{'oxy'};
314 Usage : $pp->num_sulphur()
315 Function : Retrieves the number of sulphur atoms in a sequence
316 Returns : Returns the number of sulphur atoms in a sequence
324 ($self->{'sul'}) = $self->{'output'}=~/Sulfur\s+S\s+(\d+)/;
325 return $self->{'sul'};
331 Usage : $pp->half_life()
332 Function : Retrieves the half life of a sequence
333 Returns : Returns the half life of a sequence
341 ($self->{'half_life'}) = $self->{'output'}=~/The estimated half-life is.*?(-{0,1}\d*\.{0,1}\d*)\s*hours \(mammalian reticulocytes, in vitro\)/;
342 return $self->{'half_life'};
345 =head2 instability_index
347 Title : instability_index
348 Usage : $pp->instability_index()
349 Function : Retrieves the instability index of a sequence
350 Returns : Returns the instability index of a sequence
356 sub instability_index
{
358 ($self->{'InstabilityIndex'})=$self->{'output'}=~/The instability index \(II\) is computed to be (-{0,1}\d*\.{0,1}\d*)/;
359 return $self->{'InstabilityIndex'};
365 Usage : $pp->stability()
366 Function : Calculates whether the sequence is stable or unstable
367 Returns : 'stable' or 'unstable'
375 ($self->{'Stability'})=$self->{'output'}=~/This classifies the protein as\s(\w+)\./;
376 return $self->{'Stability'};
379 =head2 aliphatic_index
381 Title : aliphatic_index
382 Usage : $pp->aliphatic_index()
383 Function : Retrieves the aliphatic index of the sequence
384 Returns : Returns the aliphatic index of the sequence
392 ($self->{'AliphaticIndex'})=$self->{'output'}=~/<B>Aliphatic index:<\/B
>\s
*(-{0,1}\d
*\
.{0,1}\d
*)/;
393 return $self->{'AliphaticIndex'};
401 Function : Retrieves the grand average of hydropathicity (GRAVY) of a sequence
402 Returns : Returns the grand average of hydropathicity (GRAVY) of a sequence
410 ($self->{'GRAVY'})=$self->{'output'}=~/<B>Grand average of hydropathicity \(GRAVY\):<\/B
>\s
*(-{0,1}\d
*\
.{0,1}\d
*)/;
411 return $self->{'GRAVY'};
417 Usage : $pp->AA_comp('P')
418 Function : Retrieves the percentage composition of a given amino acid for a sequence
419 Returns : Returns the percentage composition of a given amino acid for a sequence
420 Args : A single letter amino acid code eg A, R, G, P etc
429 my $AA={qw(A Ala R Arg N Asn D Asp C Cys Q Gln E Glu G Gly H His I Ile L Leu K Lys M Met F Phe P Pro S Ser T Thr W Trp Y Tyr V Val B Asx Z Glx X Xaa)};
430 ($self->{$aa})= $self->{'output'}=~/$AA->{$aa} \($aa\)\s+\d+\s+(\d+\.\d+)%/;