t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / scripts / utilities / bp_mutate.pl
blob1468688c7be6c43183f9a2d1c42cf00c4b0b28e1
1 #!/usr/bin/perl
3 =head1 NAME
5 bp_mutate.pl - randomly mutagenize a single protein or DNA sequence
7 =head1 SYNOPSIS
9 ./bp_mutate.pl -p 25 -i test.fa -n 5 -f swiss -o muts.swiss
11 #or
13 ./bp_mutate.pl --percent=25 --input=test.fa --number=5 -output=x.fa
15 =head1 DESCRIPTION
17 Randomly mutagenize a single protein or DNA sequence one or more times.
18 Specify percentage mutated and number of resulting mutant sequences.
19 Print mutagenized sequences to STDOUT or write to an output file.
21 -h|--help Help
22 -p|--percent Percent mutagenized
23 -n|--number Number of mutant sequences created
24 -o|--output Output file (optional)
25 -f|--format Output format (default: fasta)
26 -i|--input Input file
28 =head1 FEEDBACK
30 User feedback is an integral part of the evolution of this and other
31 Bioperl scripts. Send your comments and suggestions to the Bioperl
32 mailing list. Your participation is much appreciated.
34 bioperl-l@bioperl.org - General discussion
35 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
37 =head2 Reporting Bugs
39 Report bugs to the Bioperl bug tracking system to help us keep track
40 of the bugs and their resolution. Bug reports can be submitted via the
41 web:
43 https://github.com/bioperl/bioperl-live/issues
45 =head1 AUTHOR
47 Brian Osborne, bosborne at alum.mit.edu
49 =cut
51 use strict;
52 use warnings;
53 use Getopt::Long;
54 use Bio::SeqIO;
56 my ( $help, $in_file, $percent, $out_file, $number );
57 my $format = "fasta";
58 my @dna = qw(a g c t);
59 my @amino = qw(a c d e f g h i k l m n p q r s t v w y);
61 GetOptions(
62 "h|help" => \$help,
63 "p|percent:i" => \$percent,
64 "n|number:i" => \$number,
65 "o|output:s" => \$out_file,
66 "f|format:s" => \$format,
67 "i|input:s" => \$in_file
70 usage() if ( $help || !$percent || !$in_file || !$number || $percent > 100 );
72 # Seed the random number generator. "time|$$" combines the
73 # current time with the current process id
74 srand( time | $$ );
76 my $seqio = Bio::SeqIO->new( -file => $in_file );
77 my $seqobj = $seqio->next_seq;
78 my $num_mut = percent_to_num($percent);
79 my @seq_arr = ();
81 # don't keep a mutant that's already been made
82 while ( $number > $#seq_arr + 1 ) {
83 my $mut_seq = mutate_all( $seqobj, $num_mut );
84 push @seq_arr, $mut_seq unless ( grep /$mut_seq/, @seq_arr );
87 foreach my $mut_seq (@seq_arr) {
88 my $name = $seqobj->display_id . "-${percent}_percent-$number";
89 my $outseq = Bio::Seq->new(
90 -seq => $mut_seq,
91 -display_id => $name,
92 -desc => $seqobj->desc
94 my %args = ( -format => $format );
95 $args{-file} = ">>$out_file" if $out_file;
96 my $seqio = Bio::SeqIO->new(%args);
97 $seqio->write_seq($outseq);
98 $number--;
101 # mutagenize the sequence, one-by-one
102 sub mutate_all {
103 my ( $seq_obj, $num ) = @_;
104 my $type = $seq_obj->alphabet;
105 my $str = $seq_obj->seq;
107 # store the mutagenized positions in $positions
108 my $positions = "";
109 for ( my $i = 0 ; $i < $num_mut ; ++$i ) {
110 ( $str, $positions ) = mutate_one( $str, $type, $positions );
112 $str;
115 # mutagenize one position
116 sub mutate_one {
117 my ( $str, $type, $positions ) = @_;
118 my ( $position, $new_char );
120 # pick a random position in the sequence, checking
121 # that the position isn't already mutagenized
122 do {
123 $position = random_position($str);
124 } until ( $positions !~ /\b$position\b/ );
125 $positions .= "$position ";
126 my $current_char = substr( $str, $position, 1 );
128 # pick a random char that's not the existing char
129 do {
130 $new_char = random_char($type);
131 } until ( $new_char ne $current_char );
132 substr( $str, $position, 1, $new_char );
133 ( $str, $positions );
136 # randomly select a position in the sequence
137 sub random_position {
138 my $string = shift;
139 int( rand( length($string) ) );
142 # randomly select one of the chars depending on alphabet
143 sub random_char {
144 my $type = shift;
145 $type eq "protein"
146 ? return $amino[ rand @amino ]
147 : return $dna[ rand @dna ];
150 sub percent_to_num {
151 my $percent = shift;
152 int( $percent / 100 * length( $seqobj->seq ) );
155 sub usage {
156 exec( 'perldoc', $0 );
157 exit(0);
160 __END__