bp_process_wormbase: move program to new Bio-DB-Ace distribution
[bioperl-live.git] / Bio / CodonUsage / IO.pm
blobdc44823f764329150e18e09ba44ed9e3e17a9528
2 # BioPerl module for Bio::CodonUsage::IO
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Richard Adams (richard.adams@ed.ac.uk)
8 # Copyright Richard Adams
10 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::CodonUsage::IO - for reading and writing codon usage tables to file
17 =head1 SYNOPSIS
19 use Bio::CodonUsage::IO;
21 ## read in a codon usage file
22 my $io = Bio::CodonUsage::IO->new(-file => "in");
23 my $cut = $io->next_data();
25 ## write it out again
26 my $out = Bio::CodonUsage::IO->new(-file => ">out");
27 $out->write_data($cut);
29 =head1 DESCRIPTION
31 This class provides standard IO methods for reading and writing text files
32 of codon usage tables. These tables can initially be retrieved using
33 Bio::DB::CUTG. At present only this format is supported for read/write.
35 Reading a CUTG will return a Bio::CodonUsage::Table object.
37 =head1 SEE ALSO
39 L<Bio::Tools::CodonTable>,
40 L<Bio::WebAgent>,
41 L<Bio::CodonUsage::Table>,
42 L<Bio::CodonUsage::IO>
44 =head1 FEEDBACK
46 =head2 Mailing Lists
48 User feedback is an integral part of the evolution of this and other
49 Bioperl modules. Send your comments and suggestions preferably to one
50 of the Bioperl mailing lists. Your participation is much appreciated.
52 bioperl-l@bioperl.org - General discussion
53 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
55 =head2 Support
57 Please direct usage questions or support issues to the mailing list:
59 I<bioperl-l@bioperl.org>
61 rather than to the module maintainer directly. Many experienced and
62 reponsive experts will be able look at the problem and quickly
63 address it. Please include a thorough description of the problem
64 with code and data examples if at all possible.
66 =head2 Reporting Bugs
68 Report bugs to the Bioperl bug tracking system to help us keep track
69 the bugs and their resolution. Bug reports can be submitted via the
70 web:
72 https://github.com/bioperl/bioperl-live/issues
74 =head1 AUTHORS
76 Richard Adams, Richard.Adams@ed.ac.uk
78 =head1 APPENDIX
80 The rest of the documentation details each of the object
81 methods. Internal methods are usually preceded with a _
83 =cut
86 # Let the code begin
88 package Bio::CodonUsage::IO;
89 use Bio::CodonUsage::Table;
91 use base qw(Bio::Root::IO);
93 =head2 new
95 Title : new
96 Usage : my $io = Bio::CodonUsage::IO->new(-file => "CUTfile");
97 Purpose: To read/write a Bio:CodonUsage::Table object
98 Returns: A Bio:CodonUsage::IO object
99 Args : a file or file handle
101 =cut
103 sub new {
104 my ($class , @args) = @_;
105 my $self = $class->SUPER::new(@args);
109 =head2 next_data
111 Title : next_data
112 Usage : my $cut = $io->next_data();
113 Purpose: To obtain a Bio:CodonUsage::Table object
114 Returns: A Bio:CodonUsage::Table object
115 Args : none
117 =cut
119 sub next_data {
120 my $self = shift;
121 my $cut = $self->_parse;
122 return $cut;
125 =head2 write_data
127 Title : write_data
128 Usage : $io->write_data($cut);
129 Purpose: To write a CUT to file
130 Returns: void
131 Args : a Bio::CodonUsage::Table object reference
133 =cut
136 sub write_data {
137 my ($self, $cut) = @_;
138 if (!$cut || ! $cut->isa(Bio::CodonUsage::Table)) {
139 $self->throw("must supply a Bio::CodonUsage::Table object for writing\n");
141 my $outstring = "Codon usage table\n\n";
143 my $sp_string = $cut->species . "[" . $cut->_gb_db . "] " .
144 $cut->cds_count . " CDS's\n\n";
145 $outstring .= $sp_string;
146 my $colhead = sprintf("%-9s%-9s%15s%12s%12s\n\n", "AmAcid",
147 "Codon", "Number", "/1000", "Fraction");
148 $outstring .= $colhead;
150 ### now write bulk of codon data ##
151 my $ctable = Bio::Tools::CodonTable->new;
153 for my $f (qw(G A T C)) {
154 for my $s (qw(G A T C)) {
155 for my $t (qw(G A T C)) {
156 $cod = $f . $s . $t;
157 my $aa =$Bio::SeqUtils::THREECODE {$ctable->translate($cod)};
158 my $codstr = sprintf("%-9s%-9s%15.2f%12.2f%12.2f\n",
160 $aa, $cod, my $tt = $cut->codon_count($cod)|| 0.00,
161 my $ll =$cut->{'_table'}{$aa}{$cod}{'per1000'}|| 0.00,
162 my $ss = $cut->codon_rel_frequency($cod) || 0.00);
163 $outstring .= $codstr;
165 $outstring .= "\n";
168 $outstring .= "\n\n";
170 ## now append GC data
171 $outstring .= "Coding GC ". $cut->get_coding_gc('all'). "%\n";
172 $outstring .= "1st letter GC ". $cut->get_coding_gc(1). "%\n";
173 $outstring .= "2nd letter GC ". $cut->get_coding_gc(2). "%\n";
174 $outstring .= "3rd letter GC ". $cut->get_coding_gc(3). "%\n";
175 $outstring .= "Genetic code " . $cut->genetic_code() ."\n\n\n";
177 $self->_print ($outstring);
178 $self->flush();
182 sub _parse {
183 my $self = shift;
184 my $cdtableobj = Bio::CodonUsage::Table->new();
185 while (my $line = $self->_readline() ) {
186 next if $line =~ /^$/ ;
187 $line =~ s/End/Ter/;
188 ## now parse in species name, cds number
190 if ($line =~ /^(.+?)\s*\[(\w+)\].+?(\d+)/) {
191 $cdtableobj->species($1);
192 $cdtableobj->{'_gb_db'} = $2;
193 $cdtableobj->cds_count($3);
196 ## now parse in bulk of codon usage table
197 elsif ( $line =~ /^(\w\w\w)\s+(\w+)\s+(\d+\.\d+)
198 \s+(\d+\.\d+)\s+(\d+\.\d+)/x){
199 if (defined ($1)) {
200 $cdtableobj->{'_table'}{$1}{$2} = {
201 'abs_count'=>$3,
202 'per1000'=> $4,
203 'rel_freq'=> $5,
208 ## now parse in gc data ####
209 if($line =~ /^Cod.+?(\d\d\.\d\d)/ ){
210 $cdtableobj->{'_coding_gc'}{'all'} = $1;
212 elsif ($line =~ /^1st.+?(\d\d\.\d\d)/){
213 $cdtableobj->{'_coding_gc'}{'1'} = $1;
215 elsif($line =~ /^2nd.+?(\d\d\.\d\d)/){
216 $cdtableobj->{'_coding_gc'}{'2'} = $1;
218 elsif ($line =~ /^3rd.+?(\d\d\.\d\d)/){
219 $cdtableobj->{'_coding_gc'}{'3'} = $1;
222 elsif ($line =~ /^Gen.+?(\d+)/){
223 $cdtableobj->{'_genetic_code'} = $1;
226 ## check has been parsed ok ##
227 if (scalar keys %{$cdtableobj->{'_table'}} != 21) {
228 $cdtableobj->warn("probable parsing error - should be 21 entries for 20aa + stop codon");
230 return $cdtableobj;
236 __END__