scripts/utilities/bp_mutate.pl

   1 #!/usr/bin/perl
   2
   3 =head1 NAME
   4
   5 bp_mutate.pl - randomly mutagenize a single protein or DNA sequence
   6
   7 =head1 SYNOPSIS
   8
   9   ./bp_mutate.pl -p 25 -i test.fa -n 5 -f swiss -o muts.swiss
  10
  11   #or
  12
  13   ./bp_mutate.pl --percent=25 --input=test.fa --number=5 -output=x.fa
  14
  15 =head1 DESCRIPTION
  16
  17 Randomly mutagenize a single protein or DNA sequence one or more times.
  18 Specify percentage mutated and number of resulting mutant sequences.
  19 Print mutagenized sequences to STDOUT or write to an output file.
  20
  21   -h|--help    Help
  22   -p|--percent Percent mutagenized
  23   -n|--number  Number of mutant sequences created
  24   -o|--output  Output file (optional)
  25   -f|--format  Output format (default: fasta)
  26   -i|--input   Input file
  27
  28 =head1 FEEDBACK
  29
  30 User feedback is an integral part of the evolution of this and other
  31 Bioperl scripts. Send your comments and suggestions to the Bioperl
  32 mailing list.  Your participation is much appreciated.
  33
  34   bioperl-l@bioperl.org                  - General discussion
  35   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  36
  37 =head2 Reporting Bugs
  38
  39 Report bugs to the Bioperl bug tracking system to help us keep track
  40 of the bugs and their resolution. Bug reports can be submitted via the
  41 web:
  42
  43   https://github.com/bioperl/bioperl-live/issues
  44
  45 =head1 AUTHOR
  46
  47 Brian Osborne, bosborne at alum.mit.edu
  48
  49 =cut
  50
  51 use strict;
  52 use warnings;
  53 use Getopt::Long;
  54 use Bio::SeqIO;
  55
  56 my ( $help, $in_file, $percent, $out_file, $number );
  57 my $format = "fasta";
  58 my @dna    = qw(a g c t);
  59 my @amino  = qw(a c d e f g h i k l m n p q r s t v w y);
  60
  61 GetOptions(
  62     "h|help"      => \$help,
  63     "p|percent:i" => \$percent,
  64     "n|number:i"  => \$number,
  65     "o|output:s"  => \$out_file,
  66     "f|format:s"  => \$format,
  67     "i|input:s"   => \$in_file
  68 );
  69
  70 usage() if ( $help || !$percent || !$in_file || !$number || $percent > 100 );
  71
  72 # Seed the random number generator. "time|$$" combines the
  73 # current time with the current process id
  74 srand( time | $$ );
  75
  76 my $seqio   = Bio::SeqIO->new( -file => $in_file );
  77 my $seqobj  = $seqio->next_seq;
  78 my $num_mut = percent_to_num($percent);
  79 my @seq_arr = ();
  80
  81 # don't keep a mutant that's already been made
  82 while ( $number > $#seq_arr + 1 ) {
  83     my $mut_seq = mutate_all( $seqobj, $num_mut );
  84     push @seq_arr, $mut_seq unless ( grep /$mut_seq/, @seq_arr );
  85 }
  86
  87 foreach my $mut_seq (@seq_arr) {
  88     my $name   = $seqobj->display_id . "-${percent}_percent-$number";
  89     my $outseq = Bio::Seq->new(
  90         -seq        => $mut_seq,
  91         -display_id => $name,
  92         -desc       => $seqobj->desc
  93     );
  94     my %args = ( -format => $format );
  95     $args{-file} = ">>$out_file" if $out_file;
  96     my $seqio = Bio::SeqIO->new(%args);
  97     $seqio->write_seq($outseq);
  98     $number--;
  99 }
 100
 101 # mutagenize the sequence, one-by-one
 102 sub mutate_all {
 103     my ( $seq_obj, $num ) = @_;
 104     my $type = $seq_obj->alphabet;
 105     my $str  = $seq_obj->seq;
 106
 107     # store the mutagenized positions in $positions
 108     my $positions = "";
 109     for ( my $i = 0 ; $i < $num_mut ; ++$i ) {
 110         ( $str, $positions ) = mutate_one( $str, $type, $positions );
 111     }
 112     $str;
 113 }
 114
 115 # mutagenize one position
 116 sub mutate_one {
 117     my ( $str, $type, $positions ) = @_;
 118     my ( $position, $new_char );
 119
 120     # pick a random position in the sequence, checking
 121     # that the position isn't already mutagenized
 122     do {
 123         $position = random_position($str);
 124     } until ( $positions !~ /\b$position\b/ );
 125     $positions .= "$position ";
 126     my $current_char = substr( $str, $position, 1 );
 127
 128     # pick a random char that's not the existing char
 129     do {
 130         $new_char = random_char($type);
 131     } until ( $new_char ne $current_char );
 132     substr( $str, $position, 1, $new_char );
 133     ( $str, $positions );
 134 }
 135
 136 # randomly select a position in the sequence
 137 sub random_position {
 138     my $string = shift;
 139     int( rand( length($string) ) );
 140 }
 141
 142 # randomly select one of the chars depending on alphabet
 143 sub random_char {
 144     my $type = shift;
 145     $type eq "protein"
 146       ? return $amino[ rand @amino ]
 147       : return $dna[ rand @dna ];
 148 }
 149
 150 sub percent_to_num {
 151     my $percent = shift;
 152     int( $percent / 100 * length( $seqobj->seq ) );
 153 }
 154
 155 sub usage {
 156     exec( 'perldoc', $0 );
 157     exit(0);
 158 }
 159
 160 __END__