Bio::DB::CUTG: move into its own distribution
[bioperl-live.git] / bin / bp_chaos_plot
blob4a4fcefc6917a4cfaa209052cb2d54ee7f687205
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
6 use Bio::SeqIO;
7 use Getopt::Long;
8 use GD;
10 use vars qw( $USAGE %VALIDFORMATS);
12 %VALIDFORMATS = ( 'png' => 1,
13 'jpeg' => 1,
14 'gd2' => 1,
15 'gd' => 1,
16 'gif' => 1,
17 'wbmp' => 1 );
19 $USAGE = "usage:\tchaos_plot -i/--input=INPUTFILE -f/--format=SEQFORMAT \n".
20 "\t-o/--output=OUTPUTFILE -g/--graphics=GRAPHIC TYPE\n".
21 "\t-w/--width=600 -h/--height=400\n";
23 $USAGE .= "\tValid graphics formats: (" . join(",", ( keys %VALIDFORMATS )) .")\n";
24 $USAGE .= "\tImage size defaults to 600x400, SEQFORMAT to fasta\n";
25 $USAGE .= "\tINPUTFILE can also be read from STDIN\n";
27 my ($format,$graph,$width,$height,$seqfile,$output) = ('fasta', 'png', 600, 400);
28 GetOptions( "i|input:s" => \$seqfile,
29 "f|format:s" => \$format,
30 "o|output:s" => \$output,
31 "g|graph|graphics:s" => \$graph,
32 "width:i" => \$width,
33 "height:i" => \$height
36 if( ! $output || ! $VALIDFORMATS{$graph} ) {
37 die $USAGE ;
39 my $seqin;
40 $seqfile = shift unless $seqfile;
41 if( defined $seqfile ) {
42 print "Could not open file [$seqfile]\n$USAGE" and exit unless -e $seqfile;
43 $seqin = new Bio::SeqIO(-format => $format,
44 -file => $seqfile);
45 } else {
46 $seqin = new Bio::SeqIO(-format => $format,
47 -fh => \*STDIN);
50 my $img = new GD::Image($width,$height);
51 my $white = $img->colorAllocate(255,255,255);
52 my $black = $img->colorAllocate(0,0,0);
54 my $seq = $seqin->next_seq;
55 die("Sequence type must be DNA not " . $seq->alphabet())
56 unless $seq->alphabet ne 'dna' or $seq->alphabet ne 'rna';
57 my %nmerdata;
58 my $len = $seq->length();
59 my $max = 0;
61 my ($x,$y) = ( 0.5, 0.5);
62 $img->string(gdGiantFont, 1,1, 'A', $black);
63 $img->string(gdGiantFont, 0,$height - 15, 'C', $black);
64 $img->string(gdGiantFont, $width - 15,1, 'T', $black);
65 $img->string(gdGiantFont, $width - 15,$height -20, 'G', $black);
67 for( my $i = 1; $i <= $len; $i++ ) {
69 my $base = lc $seq->subseq($i,$i);
70 if( $base eq 'a' ) {
71 $x *= 0.5;
72 $y *= 0.5;
73 } elsif ( $base eq 'g' ) {
74 $x = ( $x + 1.0 ) * 0.5;
75 $y = ( $y + 1.0 ) * 0.5;
76 } elsif ( $base eq 'c' ) {
77 $x *= 0.5;
78 $y = ( $y + 1.0 ) * 0.5;
79 } elsif ( $base eq 't' or $base eq 'u' ) {
80 $x = ( $x + 1.0 ) * 0.5;
81 $y *= 0.5;
84 $img->setPixel($x * $width,$y * $height, $black);
86 open my $OUT, '>', $output or die "Could not write file '$output': $!\n";
87 binmode $OUT;
88 $graph =~ s/jpg/jpeg/;
90 print $OUT $img->$graph();
91 close $OUT;
94 __END__
96 =head1 NAME
98 bp_chaos_plot - a chaos plot from DNA and RNA sequences
100 =head1 SYNOPSIS
102 bp_chaos_plot.pl -i/--input=INPUTFILE -f/--format=SEQFORMAT
103 -o/--output=OUTPUTFILE -g/--graphics=GRAPHIC FORMAT
104 -w/--width=WIGHT -h/--height=HEIGHT
106 =head1 DESCRIPTION
108 This scripts generates image files using GD image library to visualize
109 nucleotide sequences using chaos plot.
111 =head1 OPTIONS
113 Valid graphics formats are currently gd, gd2, png, wbmp, jpeg and gif.
115 The default size of the image file is 600x400.
117 The sequence input can be provided using any of the three methods:
119 =over 3
121 =item unnamed argument
123 bp_chaos_plot filename
125 =item named argument
127 bp_chaos_plot -i filename
129 =item standard input
131 bp_chaos_plot < filename
133 =back
135 =head1 FEEDBACK
137 =head2 Mailing Lists
139 User feedback is an integral part of the evolution of this and other
140 Bioperl modules. Send your comments and suggestions preferably to
141 the Bioperl mailing list. Your participation is much appreciated.
143 bioperl-l@bioperl.org - General discussion
144 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
146 =head2 Reporting Bugs
148 Report bugs to the Bioperl bug tracking system to help us keep track
149 of the bugs and their resolution. Bug reports can be submitted via the
150 web:
152 https://github.com/bioperl/bioperl-live/issues
154 =head1 AUTHOR - Jason Stajich
156 Email jason@bioperl.org
158 =head1 HISTORY
160 This code is based on EMBOSS C code for chaos.c by Ian Longden.
161 Included are documentation from EMBOSS code:
163 Chaos produces a chaos plot. The original application is part of the
164 ACEDB genome database package, written by ** Richard Durbin (MRC LMB,
165 UK) rd@mrc-lmba.cam.ac.uk, and Jean Thierry-Mieg (CRBM du CNRS,
166 France) mieg@crbm1.cnusc.fr
168 =cut