4 use IO
::Unread
qw(unread);
5 use Data
::Dump
qw(ddx);
7 die "Usage: $0 <input>\n" if @ARGV < 1;
13 if ($filename=~/.xz$/) {
14 open( $infile,"-|","xz -dc $filename") or die "Error opening $filename: $!\n";
15 } elsif ($filename=~/.gz$/) {
16 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
17 } elsif ($filename=~/.lz4$/) {
18 open( $infile,"-|","/opt/bin/lz4c -dy $filename /dev/stdout") or die "Error opening $filename: $!\n";
19 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
23 my $IN = openfile
($inf);
26 my $str = (split /\t/,$tmp)[0];
27 my $len = length $str;
28 my $allcnt = 4 ** $len;
30 my ($Sum,$kMax,$kMin,$cMax,$cMin,%Hist)=(0,0,999999999,0,999999999);
31 my ($KmerSum,$K90,$K75,$K50,$K25,$K10,$Kadding)=(0,0,0,0,0,0,0);
35 my ($str,$cnt) = split /\t/;
36 #print "$str\t[$cnt]\n";
40 $kMax = $cnt if $cnt > $kMax;
41 $kMin = $cnt if $cnt < $kMin;
46 $cMax = $_ if $cMax < $_;
47 $cMin = $_ if $cMin > $_;
50 open OUT
,'>',"$inf.hist" or die;
51 print OUT
"# K\t$len\t$allcnt
52 # KmerFreq_Min-Max\t$kMin\t$kMax
53 # Cnt_Min-Max\t$cMin\t$cMax
54 # CntOfFoundKmer\t$Sum\t",$Sum/$allcnt,"\n";
56 my @order = sort {$a <=> $b} keys %Hist;
58 $Kadding += $_ * $Hist{$_};
59 if ( 10 * $Kadding < $KmerSum ) {
61 } elsif ( 4 * $Kadding < $KmerSum ) {
63 } elsif ( 2 * $Kadding < $KmerSum ) {
65 } elsif ( 4 * $Kadding < $KmerSum * 3 ) {
67 } elsif ( 10 * $Kadding < $KmerSum * 9 ) {
71 print OUT
"# KmerSum: $KmerSum\n# K90: $K90, K75: $K75, K50: $K50, K25: $K25, K10: $K10\n";
73 push @t, $_/$KmerSum for ($K90,$K75,$K50,$K25,$K10);
74 print OUT
'# Kratio: ',join(', ',@t),"\n";
76 print OUT
"# KmerFreq\tCntOfThisFreq\tKmerSumRatio\tCumulativeKmerSumRatio\n";
79 $cumulative += $_*$Hist{$_};
80 print OUT
join("\t",$_,$Hist{$_},$_*$Hist{$_}/$KmerSum,$cumulative/$KmerSum),"\n";
85 cat kmerfreq
.lst
|while read a
;do perl getkmhist
.pl
$a; done
&
87 cat kmerfreq
.lst
|while read a
;do echo perl
"getkmhist.pl $a &"; done
> t
.sh