4 use IO
::Unread
qw(unread);
5 use Data
::Dump
qw(ddx);
7 my $GRID = 40; # 0 .. $GRID-1, but $GRID for bigger numbers.
9 die "Usage: $0 <max_freq> <input1> <input2> <output>\n" if @ARGV < 4;
10 my ($max,$inf1,$inf2,$outf)=@ARGV;
13 my $oneGrid = $max / $GRID; # no need to +1 as max is for main parts of data only, 不包括极端值。
17 my ($KmerSum,$infile)=(0);
18 if ($filename=~/.xz$/) {
19 open( $infile,"-|","xz -dc $filename") or die "Error opening $filename: $!\n";
20 } elsif ($filename=~/.gz$/) {
21 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
22 } elsif ($filename=~/.lz4$/) {
23 open( $infile,"-|","/opt/bin/lz4c -dy $filename /dev/stdout") or die "Error opening $filename: $!\n";
24 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
25 open T
,'<',"$filename.hist" or die $!;
28 #print join('|',@t),"\n" if $t[0] eq '#';
29 if ( $t[1] eq 'KmerSum:' ) {
30 chomp($KmerSum = $t[2]);
34 return [$infile,$KmerSum];
41 my $str = (split /\t/,$tmp)[0];
42 my $len = length $str;
43 #my $allcnt = 4 ** $len;
47 my ($IN1,$KmerSum1) = @
{openfile
($inf1)};
48 my ($IN2,$KmerSum2) = @
{openfile
($inf2)};
49 my ($klen1,$klen2) = ( getKsize
($IN1),getKsize
($IN2) );
50 if ($klen1 != $klen2) {
53 print "Kmer_Length: $klen1,$klen2 KmerSum: $KmerSum1,$KmerSum2\nGrid: $oneGrid x $GRID = ",$oneGrid * $GRID," of $max, ",$max/($GRID*$oneGrid),"\n";
57 my ($flag,$lastunread,@RawArray,$la,$lb,$kmer1,$kmer2,$count1,$count2)=(3,0);
58 for my $i (0 .. $GRID) {
59 for my $j (0 .. $GRID) {
60 $RawArray[$i][$j] = 0;
65 unless ( defined($la=<$IN1>) ) {
70 unless ( defined($lb=<$IN2>) ) {
75 ($kmer1,$count1) = split /\t/,$la;
76 ($kmer2,$count2) = split /\t/,$lb;
77 chomp($count1,$count2);
78 #warn "[$flag] $kmer1,$count1\t$kmer2,$count2\n";
80 # $count1 = int($count1 / $oneGrid);
81 # $count2 = int($count2 / $oneGrid);
82 $count1 = ($count1 / $KmerSum1) / $oneGrid;
83 $count2 = ($count2 / $KmerSum2) / $oneGrid;
84 $count1 = $GRID if $count1 > $GRID;
85 $count2 = $GRID if $count2 > $GRID;
87 if ( $kmer1 lt $kmer2 ) {
88 #print STDERR "$kmer1 < $kmer2 $count1,$count2\n";
89 if ( $kmer1 ne '@' ) {
90 ++$RawArray[$count1][0]; # $kmer2 is bigger thus cannot be '@'
91 #++$TMP{"$count1."}{"$kmer1.\@x"};
92 #print STDERR "$kmer1 < $kmer2 $count1,$count2\n";
96 if ($lastunread == 2) {
100 } elsif ( $kmer1 gt $kmer2 ) {
101 #print STDERR "$flag $kmer1 > $kmer2 $count1,$count2\n";
102 if ( $kmer2 ne '@' ) {
103 ++$RawArray[0][$count2];
104 #++$TMP{".$count2"}{"x\@.$kmer2"};
105 #print STDERR "$flag $kmer1 > $kmer2 $count1,$count2\n";
109 if ($lastunread == 1) {
113 } elsif ( $count1 + $count2 >= 0 ) {
114 ++$RawArray[$count1][$count2];
115 #++$TMP{"$count1.$count2"}{"$kmer1"};
116 #print STDERR "$flag $kmer1 = $kmer2 $count1,$count2\n";
120 #die "[$t][$la][$lb]" if defined $t;
121 ##############################
122 # The last lines will be missing !!!
123 ##############################
132 open OUT
,'>',$outf or die;
133 open OUTP
,'>',$outf.'.plot' or die;
134 print OUT
"# Input: [$inf1],[$inf2]\n";
135 print OUT
"# Kmer_Length: $klen1,$klen2\n# Grid: $oneGrid x $GRID = ",$oneGrid * $GRID," of $max, ",$max/($GRID*$oneGrid),"\n";
136 print OUT
"# Horizontal:[$inf2], Vertical: [$inf1]\n";
137 print OUT
join(',','.',( 0 .. $GRID )),"\n";
138 for my $i ( 0 .. $GRID ) {
139 print OUT
join(', ',$i,@
{$RawArray[$i]}),"\n";
140 print OUTP
join(', ',@
{$RawArray[$i]}),"\n";
147 find sss2
/ -name
*.?z
* > kmerfreq
.lst
149 perl plotkmercnt
.pl
40 a
.gz b
.gz t
>tt1
2>tt2
150 perl plotkmercnt
.pl
40 a100k
.gz b100k
.gz t100k
.40
152 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k17
.lz4 sss2
/mda/S23
.k17
.lz4 out
/s01v23k17e9
-5 &
153 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k17
.lz4 sss2
/mlbac/Donor
.k17
.gz out
/s01vDRk17e9
-5 &
154 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k17
.lz4 sss2
/mlbac/Donor
.k17
.gz out
/s23vDRk17e9
-5 &
155 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k19
.lz4 sss2
/mda/S23
.k19
.lz4 out
/s01v23k19e9
-5 &
156 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k19
.lz4 sss2
/mlbac/Donor
.k19
.gz out
/s01vDRk19e9
-5 &
157 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k19
.lz4 sss2
/mlbac/Donor
.k19
.gz out
/s23vDRk19e9
-5 &
158 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k23
.lz4 sss2
/mda/S23
.k23
.lz4 out
/s01v23k23e9
-5 &
159 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k23
.lz4 sss2
/mlbac/Donor
.k23
.lz4 out
/s01vDRk23e9
-5 &
160 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k23
.lz4 sss2
/mlbac/Donor
.k23
.lz4 out
/s23vDRk23e9
-5 &
161 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k25
.lz4 sss2
/mda/S23
.k25
.lz4 out
/s01v23k25e9
-5 &
162 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k25
.lz4 sss2
/mlbac/Donor
.k25
.lz4 out
/s01vDRk25e9
-5 &
163 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k25
.lz4 sss2
/mlbac/Donor
.k25
.lz4 out
/s23vDRk25e9
-5 &
165 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k17
.lz4 sss2
/mlbac/S02
.k17
.lz4 out
/s01vs02k17e9
-5 &
166 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k17
.lz4 sss2
/mlbac/S03
.k17
.lz4 out
/s01vs03k17e9
-5 &
167 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k17
.lz4 sss2
/mda/S24
.k17
.lz4 out
/23v24k17e9
-5 &
168 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k17
.lz4 sss2
/mda/S28
.k17
.lz4 out
/23v28k17e9
-5 &
170 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k17
.lz4 sss2
/mda/S23
.k17
.lz4 out
/s01v23k17e9
-5 &
171 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k17
.lz4 sss2
/mlbac/Donor
.k17
.gz out
/s01vDRk17e9
-5 &
172 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k17
.lz4 sss2
/mlbac/Donor
.k17
.gz out
/s23vDRk17e9
-5 &
173 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k19
.lz4 sss2
/mda/S23
.k19
.lz4 out
/s01v23k19e9
-5 &
174 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k19
.lz4 sss2
/mlbac/Donor
.k19
.gz out
/s01vDRk19e9
-5 &
175 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k19
.lz4 sss2
/mlbac/Donor
.k19
.gz out
/s23vDRk19e9
-5 &
176 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k23
.lz4 sss2
/mda/S23
.k23
.lz4 out
/s01v23k23e9
-5 &
177 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k23
.lz4 sss2
/mlbac/Donor
.k23
.lz4 out
/s01vDRk23e9
-5 &
178 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k23
.lz4 sss2
/mlbac/Donor
.k23
.lz4 out
/s23vDRk23e9
-5 &
179 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k25
.lz4 sss2
/mda/S23
.k25
.lz4 out
/s01v23k25e9
-5 &
180 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k25
.lz4 sss2
/mlbac/Donor
.k25
.lz4 out
/s01vDRk25e9
-5 &
181 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k25
.lz4 sss2
/mlbac/Donor
.k25
.lz4 out
/s23vDRk25e9
-5 &
183 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k19
.lz4 sss2
/mlbac/S02
.k19
.lz4 out
/s01vs02k19e9
-5 &
184 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k19
.lz4 sss2
/mlbac/S03
.k19
.lz4 out
/s01vs03k19e9
-5 &
185 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k19
.lz4 sss2
/mda/S24
.k19
.lz4 out
/23v24k19e9
-5 &
186 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k19
.lz4 sss2
/mda/S28
.k19
.lz4 out
/23v28k19e9
-5 &
188 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k23
.lz4 sss2
/mlbac/S02
.k23
.lz4 out
/s01vs02k23e9
-5 &
189 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k23
.lz4 sss2
/mlbac/S03
.k23
.lz4 out
/s01vs03k23e9
-5 &
190 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k23
.lz4 sss2
/mda/S24
.k23
.lz4 out
/23v24k23e9
-5 &
191 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k23
.lz4 sss2
/mda/S28
.k23
.lz4 out
/23v28k23e9
-5 &
193 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k25
.lz4 sss2
/mlbac/S02
.k25
.lz4 out
/s01vs02k25e9
-5 &
194 perl plotkmercnt
.pl
5e-9 sss2
/mlbac/S01
.k25
.lz4 sss2
/mlbac/S03
.k25
.lz4 out
/s01vs03k25e9
-5 &
195 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k25
.lz4 sss2
/mda/S24
.k25
.lz4 out
/23v24k25e9
-5 &
196 perl plotkmercnt
.pl
5e-9 sss2
/mda/S23
.k25
.lz4 sss2
/mda/S28
.k25
.lz4 out
/23v28k25e9
-5 &