4 #use IO::Unread qw(unread);
5 use Data
::Dump
qw(ddx);
7 #my $ZONE_LENGTH = 200000;
10 die "Usage: $0 <ZONE_LENGTH> <output>\n" if @ARGV < 2;
11 my ($ZONE_LENGTH,$outf)=@ARGV;
12 warn "ZONE_LENGTH: $ZONE_LENGTH\n";
15 open CHR,'<','chr.lst' or die;
18 my ($gi,$id) = split /\s+/,$_;
26 open I
,'<','xtubam/depth.sh' or die;
27 # $ cat /bak/seqdata/sperm/xtubam/depth.sh
28 # samtools depth mdaSperm23xtu.bam mdaSperm24xtu.bam mdaSperm28xtu.bam mlbacDonor.bam mlbacSpermS01.bam mlbacSpermS02.bam mlbacSpermS03.bam | xz -9c > depths.xz
32 if (/^samtools depth /) {
33 (undef,undef,@tmpids) = split /\s+/;
46 #our @IDsAllOne = split //,'1' x @IDs;
49 my ($adder,$data) = @_;
50 for my $i (0 .. $#IDs) {
51 $$adder[$i] += $$data[$i];
55 my ($adder,$data) = @_;
56 for my $i (0 .. $#IDs) {
57 ++$$adder[$i] if $$data[$i] > 0;
61 my ($adder,$data) = @_;
62 for my $i (0 .. $#IDs) {
63 ++$$adder[$i] if $$data[$i] == 0;
67 my ($adder,$value) = @_;
68 for my $i (0 .. $#IDs) {
69 $$adder[$i] += $value;
73 my ($adder,$value) = @_;
74 for my $i (0 .. $#IDs) {
79 my $FileName = 'xtubam/depths.gz';
80 #$FileName = 'tmpdep.xz';
81 open I
,'-|',"gzip -dc $FileName" or die;
82 my $posPoint; # 1-based coordinate
83 my (%Dat,$t,%LastmPos); # [cov-avg,emptRatio,covsum,covbp,emptbp,lasts]
86 my ($chrid,$pos,@depthDat) = split /\t/;
87 #my $chrid = $ChrGI2ID{$chrname};
88 my $mPos = int($pos / $ZONE_LENGTH);
89 unless (defined $Dat{$chrid}->[$mPos]) {
90 $Dat{$chrid}->[$mPos] = [[],[],[],[],[],[]];
91 $t = $Dat{$chrid}->[$mPos];
92 dosetvalue
($t->[0],0);
93 dosetvalue
($t->[1],0);
94 dosetvalue
($t->[2],0);
95 dosetvalue
($t->[3],0);
96 dosetvalue
($t->[4],0);
97 dosetvalue
($t->[5],0);
98 $posPoint = $mPos*$ZONE_LENGTH -1;
100 doinc
($Dat{$chrid}->[$mPos]->[4], $pos - $posPoint - 1);
102 doaddup
($Dat{$chrid}->[$mPos]->[2], \
@depthDat);
103 doaddifone
($Dat{$chrid}->[$mPos]->[3], \
@depthDat);
104 doaddifzero
($Dat{$chrid}->[$mPos]->[4], \
@depthDat);
105 dosetvalue
($t->[5],(1+$mPos)*$ZONE_LENGTH -1 - $pos);
106 $LastmPos{$chrid} = $mPos;
110 for my $chrid (keys %Dat) {
111 for my $ZoneID (0 .. $#{$Dat{$chrid}}) {
112 $t = $Dat{$chrid}->[$ZoneID];
113 unless (defined $t) {
114 $Dat{$chrid}->[$ZoneID] = [[],[],[],[],[],[]];
115 $t = $Dat{$chrid}->[$ZoneID];
116 dosetvalue
($t->[0],0);
117 dosetvalue
($t->[1],1);
118 dosetvalue
($t->[2],0);
119 dosetvalue
($t->[3],0);
120 dosetvalue
($t->[4],$ZONE_LENGTH);
121 dosetvalue
($t->[5],-1); # Well, just mark it.
124 for my $i (0 .. $#IDs) {
125 my $sum = $t->[3][$i] + $t->[4][$i];
126 unless ($ZoneID == $LastmPos{$chrid}) {
129 $t->[0][$i] = $t->[3][$i] ?
($t->[2][$i] / $t->[3][$i]) : 0;
130 $t->[1][$i] = $t->[4][$i] / $sum;
136 open OUT
,'>',$outf or die "Error opening $outf: $!\n";
137 print OUT
"# ZONE_LENGTH: $ZONE_LENGTH\n# Order: ",join(' ',@IDs),"\n#",
138 join("\t",'ChrID','zPos','unCovBases','unCovRatio','CovSum','avgDepth','isNUL'),"\n";
139 for my $chrid (keys %Dat) {
140 for my $ZoneID (0 .. $#{$Dat{$chrid}}) {
141 $t = $Dat{$chrid}->[$ZoneID];
142 print OUT
join( "\t",$chrid,$ZoneID,
143 join(' ',@
{$t->[4]}),join(' ',map { int(.5+1000*$_)/1000 } @
{$t->[1]}),
144 join(' ',@
{$t->[2]}),join(' ',map { int(.5+1000*$_)/1000 } @
{$t->[0]}),
145 join('',map {($_==-1)?
1:0} @
{$t->[5]}) ),"\n";
151 perl sperm_siteDepStat
.pl
200000 rss
.tsv
152 perl rsstat
.pl
500000 rss5k
.tsv
153 perl rsstat
.pl
1000000 rss1m
.tsv
155 $ll -L
*tsv depths
.gz
156 -rw
-r
--r
-- 1 gaoshengjie bc_tumor
13G Sep
1 23:04 depths
.gz
157 -rw
-r
--r
-- 1 gaoshengjie bc_tumor
693K Sep
2 14:01 rss1m
.tsv
158 -rw
-r
--r
-- 1 gaoshengjie bc_tumor
1.4M Sep
2 13:58 rss500k
.tsv
159 (the two tsv starts at the same
time, immediately after
`samtools depth`.)