2 # BioPerl module for Bio::Tools::pICalculator
4 # Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved.
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
13 Bio::Tools::pICalculator - calculate the isoelectric point of a protein
17 Calculates the isoelectric point of a protein, the pH at which there
18 is no overall charge on the protein. Calculates the charge on a protein
19 at a given pH. Can use built-in sets of pK values or custom pK sets.
23 use Bio::Tools::pICalculator;
26 my $in = Bio::SeqIO->new( -fh => \*STDIN ,
29 my $calc = Bio::Tools::pICalculator->new(-places => 2,
32 while ( my $seq = $in->next_seq ) {
35 print sprintf( "%s\t%s\t%.2f\n",
38 $calc->charge_at_pH($iep) );
40 for( my $i = 0; $i <= 14; $i += 0.5 ){
41 print sprintf( "pH = %.2f\tCharge = %.2f\n",
43 $calc->charge_at_pH($i) );
49 http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
50 http://emboss.sourceforge.net/apps/cvs/emboss/apps/iep.html
51 http://us.expasy.org/tools/pi_tool.html
55 There are various sources for the pK values of the amino acids.
56 The set of pK values chosen will affect the pI reported.
58 The charge state of each residue is assumed to be independent of
59 the others. Protein modifications (such as a phosphate group) that
60 have a charge are ignored.
66 User feedback is an integral part of the evolution of this
67 and other Bioperl modules. Send your comments and suggestions
68 preferably to one of the Bioperl mailing lists.
69 Your participation is much appreciated.
71 bioperl-l@bioperl.org - General discussion
72 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 the bugs and their resolution. Bug reports can be submitted via the
80 https://github.com/bioperl/bioperl-live/issues
84 Mark Southern (mark_southern@merck.com). From an algorithm by David
85 Tabb found at http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf.
86 Modification for Bioperl, additional documentation by Brian Osborne.
90 Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved. This module is
91 free software. It may be used, redistributed and/or modified under the terms
92 of the Perl Artistic License (see http://www.perl.com/perl/misc/Artistic.html)
96 The rest of the documentation details each of the object methods.
97 Private methods are usually preceded by a _.
101 # Let the code begin...
103 package Bio
::Tools
::pICalculator
;
107 use base
qw(Bio::Root::Root);
109 # pK values from the DTASelect program from Scripps
110 # http://fields.scripps.edu/DTASelect
111 my $DTASelect_pK = { N_term
=> 8.0,
122 # pK values from the iep program from EMBOSS
123 # http://emboss.sourceforge.net/apps/cvs/emboss/apps/iep.html
124 my $Emboss_pK = { N_term
=> 8.6,
138 Usage : Bio::Tools::pICalculator->new
139 Function: Instantiates the Bio::Tools::pICalculator object
140 Example : $calc = Bio::Tools::pICalculator->new( -pKset => \%pKvalues,
146 $calc = Bio::Tools::pICalculator->new( -pKset => 'string',
151 Constructs a new pICalculator. Arguments are a flattened hash.
152 Valid, optional keys are:
154 pKset - A reference to a hash with key value pairs for the
155 pK values of the charged amino acids. Required keys
158 N_term C_term K R H D E C Y
160 pKset - A string ( 'DTASelect' or 'EMBOSS' ) that will
161 specify an internal set of pK values to be used. The
164 seq - A Bio::Seq sequence object to analyze
166 places - The number of decimal places to use in the
167 isoelectric point calculation. The default is 2.
169 Returns : The description
170 Args : The description or none
175 my( $class, %opts ) = @_;
176 my $self = $class->SUPER::new
(%opts);
177 $self = bless {}, ref $self || $self;
178 $self->seq( $opts{-seq
} ) if exists $opts{-seq
};
179 $self->pKset( $opts{-pKset
} || 'EMBOSS' );
180 exists $opts{-places
} ?
$self->places( $opts{-places
} ) :
188 Usage : $calc->seq($seqobj)
189 Function: Sets or returns the Bio::Seq used in the calculation
190 Example : $seqobj = Bio::Seq->new(-seq=>"gghhhmmm",-id=>"GHM");
191 $calc = Bio::Tools::pICalculator->new;
193 Returns : Bio::Seq object
194 Args : Bio::Seq object or none
199 my( $this, $seq ) = @_;
200 unless( defined $seq && UNIVERSAL
::isa
($seq,'Bio::Seq') ){
201 $this->throw("$seq is not a valid Bio::Seq object");
203 $this->{-seq
} = $seq;
204 $this->{-count
} = count_charged_residues
( $seq );
205 return $this->{-seq
};
211 Usage : $pkSet = $calc->pKSet(\%pKSet)
212 Function: Sets or returns the hash of pK values used in the calculation
213 Example : $calc->pKset('emboss')
214 Returns : reference to pKset hash
215 Args : The reference to a pKset hash, a string, or none. Examples:
217 pKset - A reference to a hash with key value pairs for the
218 pK values of the charged amino acids. Required keys
221 N_term C_term K R H D E C Y
223 pKset - A valid string ( 'DTASelect' or 'EMBOSS' ) that will
224 specify an internal set of pK values to be used. The
230 my ( $this, $pKset ) = @_;
231 if( ref $pKset eq 'HASH' ){ # user defined pK values
232 $this->{-pKset
} = $pKset;
233 }elsif( $pKset =~ /^emboss$/i ){ # from EMBOSS's iep program
234 $this->{-pKset
} = $Emboss_pK;
235 }elsif( $pKset =~ /^dtaselect$/i ){ # from DTASelect (scripps)
236 $this->{-pKset
} = $DTASelect_pK;
237 }else{ # default to EMBOSS
238 $this->{-pKset
} = $Emboss_pK;
240 return $this->{-pKset
};
245 $this->{-places
} = shift if @_;
246 return $this->{-places
};
253 Function: Returns the isoelectric point
254 Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
257 Returns : The isoelectric point of the sequence in the Bio::Seq object
264 return _calculate_iep
($this->{-pKset
},
274 Usage : $charge = $calc->charge_at_pH($pH)
275 Function: Sets or gets the description of the sequence
276 Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
278 $charge = $calc->charge_at_ph("7");
279 Returns : The predicted charge at the given pH
286 return _calculate_charge_at_pH
( shift, $this->{-pKset
},
290 sub count_charged_residues
{
292 my $sequence = $seq->seq;
294 for ( qw( K R H D E C Y ) ){ # charged AA's
295 $count->{$_}++ while $sequence =~ /$_/ig;
301 my( $pK, $places, $seq, $count ) = @_;
304 my $last_charge = 0.0;
305 my $format = "%.${places}f";
307 unless( defined $count ){
308 $count = count_charged_residues
($seq);
311 my $charge = _calculate_charge_at_pH
( $pH, $pK, $count );
312 last if sprintf($format,$charge) ==
313 sprintf($format,$last_charge);
314 $charge > 0 ?
( $pH += $step ) : ( $pH -= $step );
316 $last_charge = $charge;
318 return sprintf( $format, $pH );
321 # it's the sum of all the partial charges for the
322 # termini and all of the charged aa's!
323 sub _calculate_charge_at_pH
{
324 no warnings
; # don't complain if a given key doesn't exist
325 my( $pH, $pK, $count ) = @_;
326 my $charge = _partial_charge
( $pK->{N_term
}, $pH )
327 + $count->{K
} * _partial_charge
( $pK->{K
}, $pH )
328 + $count->{R
} * _partial_charge
( $pK->{R
}, $pH )
329 + $count->{H
} * _partial_charge
( $pK->{H
}, $pH )
330 - $count->{D
} * _partial_charge
( $pH, $pK->{D
} )
331 - $count->{E
} * _partial_charge
( $pH, $pK->{E
} )
332 - $count->{C
} * _partial_charge
( $pH, $pK->{C
} )
333 - $count->{Y
} * _partial_charge
( $pH, $pK->{Y
} )
334 - _partial_charge
( $pH, $pK->{C_term
} );
338 # Concentration Ratio is 10**(pK - pH) for positive groups
339 # and 10**(pH - pK) for negative groups
340 sub _partial_charge
{
341 my $cr = 10 ** ( $_[0] - $_[1] );
342 return $cr / ( $cr + 1 );