modified: myjupyterlab.sh
[GalaxyCodeBases.git] / perl / etc / justonce / sperm / sperm_siteDepStat.pl
blob9be21b74e005f388162710915b3572e62b8f797f
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 #use IO::Unread qw(unread);
5 use Data::Dump qw(ddx);
7 #my $ZONE_LENGTH = 200000;
8 #$ZONE_LENGTH = 20000;
10 die "Usage: $0 <ZONE_LENGTH> <output>\n" if @ARGV < 2;
11 my ($ZONE_LENGTH,$outf)=@ARGV;
12 warn "ZONE_LENGTH: $ZONE_LENGTH\n";
14 =pod
15 open CHR,'<','chr.lst' or die;
16 my %ChrGI2ID;
17 while (<CHR>) {
18 my ($gi,$id) = split /\s+/,$_;
19 $ChrGI2ID{$gi}=$id;
21 close CHR;
22 $ChrGI2ID{'='}='=';
23 #ddx \%ChrGI2ID;
24 =cut
26 open I,'<','xtubam/depth.sh' or die;
27 # $ cat /bak/seqdata/sperm/xtubam/depth.sh
28 # samtools depth mdaSperm23xtu.bam mdaSperm24xtu.bam mdaSperm28xtu.bam mlbacDonor.bam mlbacSpermS01.bam mlbacSpermS02.bam mlbacSpermS03.bam | xz -9c > depths.xz
29 our @IDs;
30 my @tmpids;
31 while (<I>) {
32 if (/^samtools depth /) {
33 (undef,undef,@tmpids) = split /\s+/;
34 last;
37 close I;
38 for (@tmpids) {
39 if (/\.bam$/) {
40 s/\.bam$//;
41 s/\.sort$//;
42 push @IDs,$_;
45 ddx \@IDs;
46 #our @IDsAllOne = split //,'1' x @IDs;
48 sub doaddup($$) {
49 my ($adder,$data) = @_;
50 for my $i (0 .. $#IDs) {
51 $$adder[$i] += $$data[$i];
54 sub doaddifone($$) {
55 my ($adder,$data) = @_;
56 for my $i (0 .. $#IDs) {
57 ++$$adder[$i] if $$data[$i] > 0;
60 sub doaddifzero($$) {
61 my ($adder,$data) = @_;
62 for my $i (0 .. $#IDs) {
63 ++$$adder[$i] if $$data[$i] == 0;
66 sub doinc($$) {
67 my ($adder,$value) = @_;
68 for my $i (0 .. $#IDs) {
69 $$adder[$i] += $value;
72 sub dosetvalue($$) {
73 my ($adder,$value) = @_;
74 for my $i (0 .. $#IDs) {
75 $$adder[$i] = $value;
79 my $FileName = 'xtubam/depths.gz';
80 #$FileName = 'tmpdep.xz';
81 open I,'-|',"gzip -dc $FileName" or die;
82 my $posPoint; # 1-based coordinate
83 my (%Dat,$t,%LastmPos); # [cov-avg,emptRatio,covsum,covbp,emptbp,lasts]
84 while (<I>) {
85 chomp;
86 my ($chrid,$pos,@depthDat) = split /\t/;
87 #my $chrid = $ChrGI2ID{$chrname};
88 my $mPos = int($pos / $ZONE_LENGTH);
89 unless (defined $Dat{$chrid}->[$mPos]) {
90 $Dat{$chrid}->[$mPos] = [[],[],[],[],[],[]];
91 $t = $Dat{$chrid}->[$mPos];
92 dosetvalue($t->[0],0);
93 dosetvalue($t->[1],0);
94 dosetvalue($t->[2],0);
95 dosetvalue($t->[3],0);
96 dosetvalue($t->[4],0);
97 dosetvalue($t->[5],0);
98 $posPoint = $mPos*$ZONE_LENGTH -1;
100 doinc($Dat{$chrid}->[$mPos]->[4], $pos - $posPoint - 1);
101 $posPoint = $pos;
102 doaddup($Dat{$chrid}->[$mPos]->[2], \@depthDat);
103 doaddifone($Dat{$chrid}->[$mPos]->[3], \@depthDat);
104 doaddifzero($Dat{$chrid}->[$mPos]->[4], \@depthDat);
105 dosetvalue($t->[5],(1+$mPos)*$ZONE_LENGTH -1 - $pos);
106 $LastmPos{$chrid} = $mPos;
108 close I;
110 for my $chrid (keys %Dat) {
111 for my $ZoneID (0 .. $#{$Dat{$chrid}}) {
112 $t = $Dat{$chrid}->[$ZoneID];
113 unless (defined $t) {
114 $Dat{$chrid}->[$ZoneID] = [[],[],[],[],[],[]];
115 $t = $Dat{$chrid}->[$ZoneID];
116 dosetvalue($t->[0],0);
117 dosetvalue($t->[1],1);
118 dosetvalue($t->[2],0);
119 dosetvalue($t->[3],0);
120 dosetvalue($t->[4],$ZONE_LENGTH);
121 dosetvalue($t->[5],-1); # Well, just mark it.
122 next;
124 for my $i (0 .. $#IDs) {
125 my $sum = $t->[3][$i] + $t->[4][$i];
126 unless ($ZoneID == $LastmPos{$chrid}) {
127 $sum += $t->[5][$i];
129 $t->[0][$i] = $t->[3][$i] ? ($t->[2][$i] / $t->[3][$i]) : 0;
130 $t->[1][$i] = $t->[4][$i] / $sum;
134 #ddx \%Dat;
136 open OUT,'>',$outf or die "Error opening $outf: $!\n";
137 print OUT "# ZONE_LENGTH: $ZONE_LENGTH\n# Order: ",join(' ',@IDs),"\n#",
138 join("\t",'ChrID','zPos','unCovBases','unCovRatio','CovSum','avgDepth','isNUL'),"\n";
139 for my $chrid (keys %Dat) {
140 for my $ZoneID (0 .. $#{$Dat{$chrid}}) {
141 $t = $Dat{$chrid}->[$ZoneID];
142 print OUT join( "\t",$chrid,$ZoneID,
143 join(' ',@{$t->[4]}),join(' ',map { int(.5+1000*$_)/1000 } @{$t->[1]}),
144 join(' ',@{$t->[2]}),join(' ',map { int(.5+1000*$_)/1000 } @{$t->[0]}),
145 join('',map {($_==-1)?1:0} @{$t->[5]}) ),"\n";
150 __END__
151 perl sperm_siteDepStat.pl 200000 rss.tsv
152 perl rsstat.pl 500000 rss5k.tsv
153 perl rsstat.pl 1000000 rss1m.tsv
155 $ll -L *tsv depths.gz
156 -rw-r--r-- 1 gaoshengjie bc_tumor 13G Sep 1 23:04 depths.gz
157 -rw-r--r-- 1 gaoshengjie bc_tumor 693K Sep 2 14:01 rss1m.tsv
158 -rw-r--r-- 1 gaoshengjie bc_tumor 1.4M Sep 2 13:58 rss500k.tsv
159 (the two tsv starts at the same time, immediately after `samtools depth`.)