modified: myjupyterlab.sh
[GalaxyCodeBases.git] / perl / etc / justonce / sperm / plotkmercnt.pl
blobd51ca47cfdd479894216fea540ac5e05a88f639d
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use IO::Unread qw(unread);
5 use Data::Dump qw(ddx);
7 my $GRID = 40; # 0 .. $GRID-1, but $GRID for bigger numbers.
9 die "Usage: $0 <max_freq> <input1> <input2> <output>\n" if @ARGV < 4;
10 my ($max,$inf1,$inf2,$outf)=@ARGV;
12 die if $max <= 0;
13 my $oneGrid = $max / $GRID; # no need to +1 as max is for main parts of data only, 不包括极端值。
15 sub openfile($) {
16 my ($filename)=@_;
17 my ($KmerSum,$infile)=(0);
18 if ($filename=~/.xz$/) {
19 open( $infile,"-|","xz -dc $filename") or die "Error opening $filename: $!\n";
20 } elsif ($filename=~/.gz$/) {
21 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
22 } elsif ($filename=~/.lz4$/) {
23 open( $infile,"-|","/opt/bin/lz4c -dy $filename /dev/stdout") or die "Error opening $filename: $!\n";
24 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
25 open T,'<',"$filename.hist" or die $!;
26 while (<T>) {
27 my @t = split /\s+/;
28 #print join('|',@t),"\n" if $t[0] eq '#';
29 if ( $t[1] eq 'KmerSum:' ) {
30 chomp($KmerSum = $t[2]);
33 close T;
34 return [$infile,$KmerSum];
37 sub getKsize($) {
38 my $fh = $_[0];
39 my $tmp = <$fh>;
40 unread $fh, $tmp;
41 my $str = (split /\t/,$tmp)[0];
42 my $len = length $str;
43 #my $allcnt = 4 ** $len;
44 return $len;
47 my ($IN1,$KmerSum1) = @{openfile($inf1)};
48 my ($IN2,$KmerSum2) = @{openfile($inf2)};
49 my ($klen1,$klen2) = ( getKsize($IN1),getKsize($IN2) );
50 if ($klen1 != $klen2) {
51 die "$klen1,$klen2";
53 print "Kmer_Length: $klen1,$klen2 KmerSum: $KmerSum1,$KmerSum2\nGrid: $oneGrid x $GRID = ",$oneGrid * $GRID," of $max, ",$max/($GRID*$oneGrid),"\n";
55 my %TMP;
57 my ($flag,$lastunread,@RawArray,$la,$lb,$kmer1,$kmer2,$count1,$count2)=(3,0);
58 for my $i (0 .. $GRID) {
59 for my $j (0 .. $GRID) {
60 $RawArray[$i][$j] = 0;
64 while ($flag) {
65 unless ( defined($la=<$IN1>) ) {
66 $flag &=2;
67 $la = "@\t-1";
69 last unless $flag;
70 unless ( defined($lb=<$IN2>) ) {
71 $flag &=1;
72 $lb = "@\t-1";
74 #chomp($la,$lb);
75 ($kmer1,$count1) = split /\t/,$la;
76 ($kmer2,$count2) = split /\t/,$lb;
77 chomp($count1,$count2);
78 #warn "[$flag] $kmer1,$count1\t$kmer2,$count2\n";
80 # $count1 = int($count1 / $oneGrid);
81 # $count2 = int($count2 / $oneGrid);
82 $count1 = ($count1 / $KmerSum1) / $oneGrid;
83 $count2 = ($count2 / $KmerSum2) / $oneGrid;
84 $count1 = $GRID if $count1 > $GRID;
85 $count2 = $GRID if $count2 > $GRID;
87 if ( $kmer1 lt $kmer2 ) {
88 #print STDERR "$kmer1 < $kmer2 $count1,$count2\n";
89 if ( $kmer1 ne '@' ) {
90 ++$RawArray[$count1][0]; # $kmer2 is bigger thus cannot be '@'
91 #++$TMP{"$count1."}{"$kmer1.\@x"};
92 #print STDERR "$kmer1 < $kmer2 $count1,$count2\n";
93 unread $IN2,$lb;
94 $lastunread = 2;
95 } else {
96 if ($lastunread == 2) {
97 last;
100 } elsif ( $kmer1 gt $kmer2 ) {
101 #print STDERR "$flag $kmer1 > $kmer2 $count1,$count2\n";
102 if ( $kmer2 ne '@' ) {
103 ++$RawArray[0][$count2];
104 #++$TMP{".$count2"}{"x\@.$kmer2"};
105 #print STDERR "$flag $kmer1 > $kmer2 $count1,$count2\n";
106 unread $IN1,$la;
107 $lastunread = 1;
108 } else {
109 if ($lastunread == 1) {
110 last;
113 } elsif ( $count1 + $count2 >= 0 ) {
114 ++$RawArray[$count1][$count2];
115 #++$TMP{"$count1.$count2"}{"$kmer1"};
116 #print STDERR "$flag $kmer1 = $kmer2 $count1,$count2\n";
119 #my $t=<$IN2>;
120 #die "[$t][$la][$lb]" if defined $t;
121 ##############################
122 # The last lines will be missing !!!
123 ##############################
124 print STDERR "\n";
126 close $IN1;
127 close $IN2;
129 #ddx \%TMP;
130 #ddx \@RawArray;
132 open OUT,'>',$outf or die;
133 open OUTP,'>',$outf.'.plot' or die;
134 print OUT "# Input: [$inf1],[$inf2]\n";
135 print OUT "# Kmer_Length: $klen1,$klen2\n# Grid: $oneGrid x $GRID = ",$oneGrid * $GRID," of $max, ",$max/($GRID*$oneGrid),"\n";
136 print OUT "# Horizontal:[$inf2], Vertical: [$inf1]\n";
137 print OUT join(',','.',( 0 .. $GRID )),"\n";
138 for my $i ( 0 .. $GRID ) {
139 print OUT join(', ',$i,@{$RawArray[$i]}),"\n";
140 print OUTP join(', ',@{$RawArray[$i]}),"\n";
142 close OUT;
143 close OUTP;
146 __END__
147 find sss2/ -name *.?z* > kmerfreq.lst
149 perl plotkmercnt.pl 40 a.gz b.gz t >tt1 2>tt2
150 perl plotkmercnt.pl 40 a100k.gz b100k.gz t100k.40
152 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k17.lz4 sss2/mda/S23.k17.lz4 out/s01v23k17e9-5 &
153 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k17.lz4 sss2/mlbac/Donor.k17.gz out/s01vDRk17e9-5 &
154 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k17.lz4 sss2/mlbac/Donor.k17.gz out/s23vDRk17e9-5 &
155 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k19.lz4 sss2/mda/S23.k19.lz4 out/s01v23k19e9-5 &
156 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k19.lz4 sss2/mlbac/Donor.k19.gz out/s01vDRk19e9-5 &
157 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k19.lz4 sss2/mlbac/Donor.k19.gz out/s23vDRk19e9-5 &
158 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k23.lz4 sss2/mda/S23.k23.lz4 out/s01v23k23e9-5 &
159 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k23.lz4 sss2/mlbac/Donor.k23.lz4 out/s01vDRk23e9-5 &
160 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k23.lz4 sss2/mlbac/Donor.k23.lz4 out/s23vDRk23e9-5 &
161 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k25.lz4 sss2/mda/S23.k25.lz4 out/s01v23k25e9-5 &
162 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k25.lz4 sss2/mlbac/Donor.k25.lz4 out/s01vDRk25e9-5 &
163 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k25.lz4 sss2/mlbac/Donor.k25.lz4 out/s23vDRk25e9-5 &
165 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k17.lz4 sss2/mlbac/S02.k17.lz4 out/s01vs02k17e9-5 &
166 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k17.lz4 sss2/mlbac/S03.k17.lz4 out/s01vs03k17e9-5 &
167 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k17.lz4 sss2/mda/S24.k17.lz4 out/23v24k17e9-5 &
168 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k17.lz4 sss2/mda/S28.k17.lz4 out/23v28k17e9-5 &
170 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k17.lz4 sss2/mda/S23.k17.lz4 out/s01v23k17e9-5 &
171 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k17.lz4 sss2/mlbac/Donor.k17.gz out/s01vDRk17e9-5 &
172 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k17.lz4 sss2/mlbac/Donor.k17.gz out/s23vDRk17e9-5 &
173 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k19.lz4 sss2/mda/S23.k19.lz4 out/s01v23k19e9-5 &
174 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k19.lz4 sss2/mlbac/Donor.k19.gz out/s01vDRk19e9-5 &
175 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k19.lz4 sss2/mlbac/Donor.k19.gz out/s23vDRk19e9-5 &
176 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k23.lz4 sss2/mda/S23.k23.lz4 out/s01v23k23e9-5 &
177 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k23.lz4 sss2/mlbac/Donor.k23.lz4 out/s01vDRk23e9-5 &
178 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k23.lz4 sss2/mlbac/Donor.k23.lz4 out/s23vDRk23e9-5 &
179 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k25.lz4 sss2/mda/S23.k25.lz4 out/s01v23k25e9-5 &
180 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k25.lz4 sss2/mlbac/Donor.k25.lz4 out/s01vDRk25e9-5 &
181 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k25.lz4 sss2/mlbac/Donor.k25.lz4 out/s23vDRk25e9-5 &
183 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k19.lz4 sss2/mlbac/S02.k19.lz4 out/s01vs02k19e9-5 &
184 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k19.lz4 sss2/mlbac/S03.k19.lz4 out/s01vs03k19e9-5 &
185 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k19.lz4 sss2/mda/S24.k19.lz4 out/23v24k19e9-5 &
186 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k19.lz4 sss2/mda/S28.k19.lz4 out/23v28k19e9-5 &
188 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k23.lz4 sss2/mlbac/S02.k23.lz4 out/s01vs02k23e9-5 &
189 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k23.lz4 sss2/mlbac/S03.k23.lz4 out/s01vs03k23e9-5 &
190 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k23.lz4 sss2/mda/S24.k23.lz4 out/23v24k23e9-5 &
191 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k23.lz4 sss2/mda/S28.k23.lz4 out/23v28k23e9-5 &
193 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k25.lz4 sss2/mlbac/S02.k25.lz4 out/s01vs02k25e9-5 &
194 perl plotkmercnt.pl 5e-9 sss2/mlbac/S01.k25.lz4 sss2/mlbac/S03.k25.lz4 out/s01vs03k25e9-5 &
195 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k25.lz4 sss2/mda/S24.k25.lz4 out/23v24k25e9-5 &
196 perl plotkmercnt.pl 5e-9 sss2/mda/S23.k25.lz4 sss2/mda/S28.k25.lz4 out/23v28k25e9-5 &