new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / denovo / gethist.pl
blob142dbd6030e82d57273653f4ac65d8b51309c63f
1 #!/bin/env perl
2 use strict;
3 use warnings;
5 die "Usage: $0 <col> <input[gzipped]> <output prefix>\n" if @ARGV < 3;
6 my ($col,$inf,$outf)=@ARGV;
7 warn "From Col=[$col] of [$inf] to [$outf]\n";
9 --$col;
10 my ($Sum,$Count,%Cnt)=(0,0);
12 sub openFH($) {
13 my $inf = $_[0];
14 my $FH;
15 if ($inf =~ /\.gz$/i) {
16 open $FH,'-|',"gzip -dc $inf" or die "Error opening $inf: $!\n";
17 } elsif ($inf =~ /\.xz$/i) {
18 open $FH,'-|',"xz -dc $inf" or die "Error opening $inf: $!\n";
19 } elsif ($inf =~ /\.bz2$/i) {
20 open $FH,'-|',"bzip2 -dc $inf" or die "Error opening $inf: $!\n";
21 } else {
22 open $FH,'<',$inf or die "Error opening $inf: $!\n";
24 return $FH;
27 my $FH = openFH($inf);
28 while (<$FH>) {
29 my @Dat = split /\s+/;
30 next if /^[#;]/;
31 ++$Cnt{$Dat[$col]};
32 ++$Count;
33 $Sum += $Dat[$col];
35 close $FH;
37 open O,'>',"$outf.dat" or die $!;
38 print O "# TotalCount=$Count, ValueSum=$Sum, Average=",$Sum/$Count,"\n#Value\tCount\tCountRatio\tCumCountRatio\n";
40 my $Cum=0;
41 for my $k (sort { $a<=> $b } keys %Cnt) { # $Cnt{$a} <=> $Cnt{$b} || $a<=> $b
42 $Cum += $Cnt{$k}/$Count;
43 print O join("\t",$k,$Cnt{$k},$Cnt{$k}/$Count,$Cum),"\n";
45 close O;
47 __END__
48 set terminal png notransparent nointerlace size 1200,960 font '/opt/arial.ttf' 24
49 set output "plot.png"
50 set xlabel 'Depth'
51 set ylabel 'Count'
52 set xrange[2:862]
53 set yrange[0:320000]
54 plot 'tw.dat' using 1:2 with lines