modified: Makefile
[GalaxyCodeBases.git] / perl / etc / radseq / plothap.pl
blobd3c64067889aa1480faa09e786f1d2be2427bf53
1 #!/usr/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
5 =cut
6 use strict;
7 use warnings;
8 use Data::Dump qw(ddx);
9 use Galaxy;
11 my ($borderL,$borderR) = (206000000,220000000);
13 die "Usage: $0 <marker dat> <tassel dump> [out midfix]\n" if @ARGV<2;
14 my $markf=shift;
15 my $inf=shift;
16 my $outf=shift;
18 my $GRIDsize = 5;
20 if ($outf) {
21 $outf = ".$outf";
22 } else {
23 $outf = '';
26 my $scaffnfo = '/bak/seqdata/2012/tiger/120512_TigerRefGenome/chr.nfo';
27 $scaffnfo = 'chr.nfo';
28 #my $scaf2locuslst = '/share/users/huxs/work/tiger/20120910/tighap.lst';
29 print "From [$markf][$inf] to [$inf$outf.(dat|svg)]\n";
31 my %Stat;
32 $Stat{GRIDsize} = $GRIDsize;
34 my @Scaffolds = ([qw(scaffold75 scaffold1458)], ['scaffold188']);
35 @Scaffolds = (['gi|362110686|gb|CM001378.1|']);
36 my $t=0;
37 my %scaffolds = map {map {$_ => ++$t} @$_} @Scaffolds;
38 my %scaff2chr = map { $scaffolds{$_} => $_ } keys %scaffolds;
39 ddx \@Scaffolds;
40 ddx \%scaffolds;
41 ddx \%scaff2chr;
42 # plothap.pl:32: [["scaffold75", "scaffold1458"], ["scaffold188"]]
43 # plothap.pl:33: { scaffold1458 => 2, scaffold188 => 3, scaffold75 => 1 }
44 # plothap.pl:34: { 1 => "scaffold75", 2 => "scaffold1458", 3 => "scaffold188" }
46 my ($maxLen,@LineLen,%ScaffoldLen,%ScaffoldsOffect)=(0);
47 open I,'<',$scaffnfo or die $!;
48 while (<I>) {
49 next if /^#/;
50 chomp;
51 my @items = split /\t/;
52 next unless exists $scaffolds{$items[0]};
53 $ScaffoldLen{$items[0]} = $items[1];
54 $ScaffoldLen{$items[0]} = $borderR - $borderL;
56 close I;
57 print "\n",scalar keys(%ScaffoldLen)," scaffold(s) Load.\nLength:\n";
58 for (@Scaffolds) {
59 my $len = 0;
60 for (@$_) {
61 $ScaffoldsOffect{$_} = $len;
62 $len += $ScaffoldLen{$_};
64 #push @t,$len; # not necessary
65 push @LineLen,$len;
66 print "[",join(', ',@$_),"]: $len\n";
67 $maxLen = $len if $maxLen < $len;
69 print "MaxLen: $maxLen\n";
70 #ddx \@LineLen;
71 ddx [\%ScaffoldsOffect,\%ScaffoldLen];
72 # plothap.pl:65: [[0, 5658748, 12958355], [0, 19124984]]
73 # plothap.pl:63: { scaffold1458 => 5658748, scaffold188 => 0, scaffold75 => 0 }
75 my %LD;
76 open I,'<',$inf or die $!;
77 while (<I>) {
78 next if /^Locus1\t/;
79 chomp;
80 my @items = split /\t/;
81 die scalar @items if @items != 17;
82 my ($chr1,$pos1,$id1,undef,undef,undef,$chr2,$pos2,$id2,undef,undef,undef,$dist,$rsq,$dp,$p,$N) = @items;
83 $pos1 -= $borderL; $pos2 -= $borderL;
84 my $x = [$chr1,$pos1];
85 my $y = [$chr2,$pos2];
86 #ddx [$x,$y];
87 ($x,$y) = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } ($x,$y);
88 #ddx [$x,$y];
89 $LD{$$x[0]}{$$x[1]}{$$y[0]}{$$y[1]} = [$dist,$rsq,$dp,$p,$N];
91 #ddx \%LD;
92 close I;
94 my $Xrange = 500;
95 my $Yrange = $Xrange/2 + 50;
96 my $YmaxVal = 2;
97 my $ArrowLen = 20; # 16+4
98 my $axisTick = 4;
99 my $OutBorder = 24;
100 my $InBorder = 20;
101 my $Xtotal = $Xrange + 2*$OutBorder;
102 my $Yitem = $Yrange + $InBorder;
103 my $FontSize = int($Xrange/40);
104 my $FontFamily = 'Arial';
106 my $perUnit = int($maxLen/10); # 279.330936 M /10 = 27.933093 M
107 my $numlevel = int(log($perUnit)/log(10)); # 7
108 my $numSuflevel = int($numlevel/3); # 2
109 my $numSuf=( '', qw( K M G T P E Z Y ) )[$numSuflevel]; # M <---
110 $numSuflevel = 10 ** (3*$numSuflevel); # 1,000,000 <---
111 my $roundTo = 5 * (10 ** ($numlevel-1)); # 5e6
112 my $unit = $perUnit + (-$perUnit % $roundTo); # 30 M
113 my $countMax = int($maxLen/$unit) + (($maxLen%$unit)?1:0);
114 my $BasepPx = 10*$unit/$Xrange;
115 #print join(",",$BasepPx,$maxLen/$numSuflevel,$perUnit/$numSuflevel,$numlevel,$numSuf,$numSuflevel,$roundTo/$numSuflevel,$unit/$numSuflevel,$countMax),"\n";
116 my $ChrCount = @Scaffolds;
117 my $Ytotal = $Yitem*$ChrCount - $InBorder + 2*$OutBorder;
119 $Stat{'Base/px'} = $BasepPx;
121 sub getVal($) {
122 my @dat = @{$_[0]};
123 my ($sum,$cnt,$max)=(0,0,0);
124 my %dat;
125 for my $k (@dat) {
126 $sum += $k;
127 ++$cnt;
128 ++$dat{$k};
130 #my @v = sort { $dat{$b} <=> $dat{$a} } keys %dat;
131 if ($cnt) {
132 #return $v[0];
133 return $sum/$cnt;
134 } else {
135 return -1;
139 my %PlotLD;
140 for (@Scaffolds) {
141 my %locushere = map { $scaffolds{$_} => 1 } @$_;
142 for my $scaff (@$_) {
143 my $locus1 = $scaffolds{$scaff};
144 for my $pos1 (sort {$a<=>$b} keys %{$LD{$locus1}}) {
145 for my $locus2 (keys %{$LD{$locus1}{$pos1}}) {
146 next unless exists $locushere{$locus2};
147 for my $pos2 (sort {$a<=>$b} keys %{$LD{$locus1}{$pos1}{$locus2}}) {
148 my @Offects = map {$ScaffoldsOffect{$scaff2chr{$_}} } ($locus1,$locus2);
149 #print join(',',$scaff,$locus1,$pos1,$locus2,$pos2,@Offects),"\n";
150 my $bp1 = $pos1 + $Offects[0];
151 my $bp2 = $pos2 + $Offects[1];
152 my ($p1,$p2) = map { int($_/($BasepPx*$GRIDsize))*$GRIDsize } ($bp1,$bp2);
153 #print join(',',$scaff,$bp1,$bp2,$p1,$p2),"\n";
154 my $val = $LD{$locus1}{$pos1}{$locus2}{$pos2}->[1]; # r^2
155 next if $val eq "NaN";
156 push @{$PlotLD{$locus1}{$p1}{$p2}},$val;
162 #ddx \%PlotLD;
163 my ($MaxSampleCnt,%PlotPlink)=(0);
164 open I,'<',$markf or die $!;
165 while (<I>) {
166 chomp;
167 my @items = split /\t/;
168 next unless exists $scaffolds{$items[1]};
169 next if $items[2] <= $borderL or $items[2] >= $borderR;
170 $items[2] -= $borderL;
171 my $locus = $scaffolds{$items[1]};
172 my $offects = $ScaffoldsOffect{$scaff2chr{$locus}};
173 my $pos = int(($items[2]+$offects)/($BasepPx*$GRIDsize))*$GRIDsize;
174 my @array = split /\//,$items[7].'/'.$items[8];
175 my $mid = $array[1]+$array[2];
176 my $sum = $array[0]+$mid+$array[3];
177 $MaxSampleCnt = $sum if $MaxSampleCnt < $sum;
178 my $value;
179 if ($mid==0) {
180 $value = -$sum;
181 } else {
182 $value = $mid/$sum;
184 push @{$PlotPlink{$locus}{$pos}},$value;
186 close I;
187 #ddx [\%PlotPlink,$MaxSampleCnt];
189 open O,'>',$inf.$outf.'.svg' or die $!;
190 print O <<HEAD;
191 <?xml version="1.0"?>
192 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.2" baseProfile="tiny"
193 width="${Xtotal}pt" height="${Ytotal}pt" viewBox="0 0 $Xtotal $Ytotal">
194 <title>Plot $inf</title>
195 <rect x="0" y="0" width="$Xtotal" height="$Ytotal" fill="none" stroke="none" stroke-width="2" />
196 <defs>
197 HEAD
199 $t=0;
200 for (@LineLen) {
201 my $len = $_ / $BasepPx;
202 print O <<CLIP;
203 <clipPath id="curveClip$t">
204 <path id="curve$t" d="M$len 0 H0 V$len z"/>
205 </clipPath>
206 CLIP
207 ++$t;
209 print O " </defs>\n";
210 $t=0;
211 my $theY=$OutBorder;
212 for (@LineLen) {
213 my $len = $_ / $BasepPx;
214 my $intlen = int(0.999 + $len);
215 my $halflen = $intlen/2;
216 my $Ylen = $halflen + 50;
217 print O <<DEF2;
218 <g transform="translate($OutBorder,$theY)" stroke-width="2" stroke="black" font-size="$FontSize" font-family="$FontFamily">
219 <rect x="0" y="0" width="$Xrange" height="$Ylen" fill="none" stroke="none" stroke-width="2" />
220 <g transform="matrix(0.5,0.5,-0.5,0.5,$halflen,0)" clip-path="url(#curveClip$t)" stroke-width="0">
221 <rect x="0" y="0" width="$len" height="$len" fill="grey" stroke="green" stroke-width="1" />
222 DEF2
223 # LD
224 for my $scaff (@{$Scaffolds[$t]}) {
225 my $locus1 = $scaffolds{$scaff};
226 for my $pos1 (sort {$a<=>$b} keys %{$PlotLD{$locus1}}) {
227 for my $pos2 (sort {$a<=>$b} keys %{$PlotLD{$locus1}{$pos1}}) {
228 my $val = getVal($PlotLD{$locus1}{$pos1}{$pos2});
229 next if $val == -1;
230 my $color = colormap($val,
231 [[0,1], [1,1]],
232 [[0,1],[0.75,1],[0.9,0.3],[1,0]],
233 [[0,1],[0.75,0], [1,0]]);
234 #$color = colormap($val);
235 #print join(',',$locus1,$pos1,$pos2,$val),"\n";
236 my $py = $intlen - $pos2 - $GRIDsize;
237 print O <<GRID;
238 <rect x="$pos1" y="$py" width="$GRIDsize" height="$GRIDsize" fill="$color"/>
239 GRID
243 print O ' </g>
244 <g transform="translate(0,',5 + $len/2,')" stroke-width="0">
245 <line x1="0" y1="0" x2="',$len,'" y2="0" stroke-width="3"/>
247 unless ($t) {
248 print O ' <line x1="0" y1="0" x2="',$ScaffoldLen{'scaffold75'}/$BasepPx,'" y2="0" stroke="blue" stroke-width="3"/>',"\n";
249 # scaffold75,1522816,SLC45A2 B:0,C b:1,T [3]
250 my $pSLC45A2 = 1522816/$BasepPx;
251 print O ' <circle cx="',$pSLC45A2,'" cy="0" fill="gold" r="2"/>',"\n";
252 print O ' <line x1="',871133/$BasepPx,'" y1="0" x2="',924457/$BasepPx,'" y2="0" stroke="red" stroke-width="3"/>',"\n";
253 print O ' <circle cx="',924457/$BasepPx,'" cy="-3" fill="gold" r="2"/>',"\n";
254 print O ' <line x1="',4078553/$BasepPx,'" y1="0" x2="',4192353/$BasepPx,'" y2="0" stroke="red" stroke-width="3"/>',"\n";
255 my $tmpShift = 0;
256 my $OneMega = $tmpShift + (1000000 / $BasepPx);
257 print O <<BART;
258 <polyline fill="none" stroke-width="1" points="$tmpShift,-10 $tmpShift,0 $OneMega,0 $OneMega,-10" />
259 BART
261 # plink
262 my $heigh = 36;
263 my $HalfGRIDsize = $GRIDsize/2;
264 print O ' <rect x="-',$GRIDsize,'" y="1" width="',$GRIDsize,'" height="',$heigh,'" fill="grey"/>',"\n",
265 ' <rect x="',$len,'" y="1" width="',$GRIDsize,'" height="',$heigh,'" fill="grey"/>',"\n";
266 for my $scaff (@{$Scaffolds[$t]}) {
267 my $locus = $scaffolds{$scaff};
268 my $pldat = $PlotPlink{$locus};
269 for my $pos (sort {$a<=>$b} keys %{$pldat}) {
270 my $px = $pos;
271 my ($P,$U) = getPLval($pldat->{$pos});
272 if ($P) {
273 my $h = $P*$heigh;
274 print O <<BARP;
275 <rect x="$px" y="1" width="$HalfGRIDsize" height="$h" fill="green" title="$scaff,$pos"/>
276 BARP
278 if ($U) {
279 my $h = $U*$heigh;
280 $px += $HalfGRIDsize;
281 print O <<BARU;
282 <rect x="$px" y="1" width="$HalfGRIDsize" height="$h" fill="red" title="$scaff,$pos"/>
283 BARU
287 print O " </g>\n </g>\n";
288 ++$t;
289 $theY += $Ylen + $InBorder;
291 #bar
292 print O <<THEBAR;
293 <g transform="translate(480,24)" stroke="black" font-size="12" font-family="Arial" stroke-width="0">
294 <defs>
295 <linearGradient id="ColorMap" x1="0" y1="100%" x2="0" y2="0">
296 <stop offset="0%" stop-color="#FFF"/>
297 <stop offset="75%" stop-color="#FF0"/>
298 <stop offset="90%" stop-color="#FF4d00"/>
299 <stop offset="100%" stop-color="#F00"/>
300 </linearGradient>
301 </defs>
302 <rect fill="url(#ColorMap)" stroke-width="1" x="0" y="0" width="10" height="200"/>
303 <text x="10" y="0" dx="3" dy="5">1</text>
304 <text x="10" y="50" dx="3" dy="5">0.75</text>
305 <text x="10" y="100" dx="3" dy="5">0.5</text>
306 <text x="10" y="150" dx="3" dy="5">0.25</text>
307 <text x="10" y="200" dx="3" dy="5">0</text>
308 </g>
309 THEBAR
311 print O "</svg>\n";
312 close O;
314 ddx \%Stat;
315 # plothap.pl:301: { "Base/px" => 40000, "GRIDsize" => 5 }
317 sub getPLval {
318 my $arr = $_[0];
319 my ($sumP,$cntP,$sumU,$cntU,$P,$U)=(0,0,0,0,0,0);
320 for my $v (@$arr) {
321 if ($v < 0) {
322 $sumP += -$v;
323 ++$cntP;
324 } else {
325 $sumU += $v;
326 ++$cntU;
329 if ($cntP) {
330 $P = $sumP/($cntP*$MaxSampleCnt);
332 if ($cntU) {
333 $U = $sumU/$cntU;
335 return ($P,$U);
338 __END__
340 ./plothap.pl ../rec.npa 17all75.txt
341 perl ./plothap.pl ../rec16q20.npa 16-75n1458j15.txt out16n2