new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / gcmtdiv.pl
blob6a306773680c771944105cea5233ac6cf8d387a5
1 #!/bin/env perl
2 use strict;
3 use warnings;
5 die "usage: $0 <file prefix>\n" unless @ARGV == 1;
7 my (@cnt,@ref,@div);
8 open CNT,'<',$ARGV[0].'.sam.gc.stat' or die $!;
9 open REF,'<',$ARGV[0].'.ref.gc.stat' or die $!;
11 while(<CNT>) { last if /^#COL=higher/; }
12 while(<CNT>) {
13 chomp;
14 next if /^#/;
15 #last if /^$/;
16 my @t=split /\t/,$_;
17 my $id=shift @t;
18 $cnt[$id]=[@t];
20 close CNT;
22 while(<REF>) { last if /^#COL=higher/; }
23 while(<REF>) {
24 chomp;
25 next if /^#/;
26 my @t=split /\t/,$_;
27 my $id=shift @t;
28 $ref[$id]=[@t];
30 close REF;
32 $cnt[$_][$_] *= 2 for (0..100);
33 $ref[$_][$_] *= 2 for (0..100);
35 sub dePepperC($$) {
36 my ($h,$range)=@_;
37 return 0 if $range > 2;
38 my ($points,$sum)=(0,0);
39 for my $i ($h-$range .. $h+$range) {
40 next unless $div[$i][$i];
41 next if $i==$h;
42 ++$points;
43 $sum+=$div[$i][$i];
45 if ($points >= 2) {
46 for my $i ($h-$range .. $h+$range) {
47 print $div[$i][$i],' ';
49 print "\n",$sum / $points,"\n",'*' x 75,"\n";
50 return $sum / $points;
51 } else {
52 return &dePepperC($h,++$range);
55 sub dePepperA($$) {
56 my ($h,$l)=@_;
57 my $range=1;
58 my ($points,$sum)=(0,0);
59 for my $i ($h-$range .. $h+$range) {
60 for my $j ($l-$range .. $l+$range) {
61 next unless $div[$i][$j];
62 next if $i==$h and $j==$l;
63 ++$points;
64 $sum+=$div[$i][$j];
67 if ($points >= 2) {
68 for my $i ($h-$range .. $h+$range) {
69 for my $j ($l-$range .. $l+$range) {
70 print $div[$i][$j],' ';
72 print "\n";
74 print $sum / $points,"\n",'=' x 75,"\n";
75 return $sum / $points;
76 } else {
77 return 0;
80 sub dePepper($$$){
81 my ($h,$l,$range)=@_;
82 my ($points,$sum)=(0,0);
83 for my $i ($h-$range .. $h+$range) {
84 for my $j ($l-$range .. $l+$range) {
85 next unless $ref[$i][$j];
86 next if $i==$h and $j==$l;
87 ++$points;
88 $sum+=$ref[$i][$j];
91 if ($points >= 2) {
92 for my $i ($h-$range .. $h+$range) {
93 for my $j ($l-$range .. $l+$range) {
94 print $ref[$i][$j],' ';
96 print "\n";
98 print $sum / $points,"\n";
99 return $sum / $points;
100 } else {
101 return &dePepper($h,$l,++$range);
105 my $refvalue=0;
106 for my $h (0..100) {
107 for my $l (0..100) {
108 #print "$h,$l\tCnt:$cnt[$h][$l]\tRef:$ref[$h][$l]\n";
109 #next unless $cnt[$h][$l];
110 $refvalue = $ref[$h][$l];
111 unless ($refvalue) {
112 if ($cnt[$h][$l]) {
113 #$ref[$h][$l]=-1;
114 $refvalue = &dePepper($h,$l,1);
115 print '-' x 75,"\n";
116 } else {$refvalue=1;}
118 $div[$h][$l] = $cnt[$h][$l] / $refvalue;
121 my @divOut;
122 for my $h (0..100) {
123 for my $l (0..100) {
124 if ($h<$l) {
125 $divOut[$h][$l] = 0;
126 } elsif ($div[$h][$l]) {
127 $divOut[$h][$l] = $div[$h][$l];
128 } else {
129 $divOut[$h][$l] = &dePepperA($h,$l);
130 if ($h == $l and $divOut[$h][$l] == 0) {
131 $divOut[$h][$l] = &dePepperC($h,1);
137 open OUT,'>',$ARGV[0].'.div.gc.stat' or die $!;
138 print OUT "#COL=higher_GC%_read, ROW=lower_GC%_read\n";
139 print OUT join("\t",'#',0..100),"\n";
140 print OUT "$_\t",join("\t",@{$divOut[$_]}),"\n" for (0..100);
141 close OUT;
142 #print "$_: [",scalar @{$cnt[$_]},']',join(' ',@{$cnt[$_]}),"\n" for (0 .. $#cnt);
143 #print "$_: [",scalar @{$ref[$_]},']',join(' ',@{$ref[$_]}),"\n" for (0 .. $#ref);