new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / radseq / plotp2chr.pl
blob8ea352139d699f2f260e9fec0ff604e5388f8348
1 #!/usr/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
5 =cut
6 use strict;
7 use warnings;
8 use Data::Dump qw(ddx);
9 use Galaxy;
11 die "Usage: $0 <prefix> [out midfix]\n" if @ARGV<1;
12 my $inf=shift;
13 my $outf=shift;
15 if ($outf) {
16 $outf = ".$outf";
17 } else {
18 $outf = '';
21 my $scaffnfo = 'chr.nfo';
22 $scaffnfo = 'chr.inf';
23 my $markerdat = '/share/users/huxs/work/tiger/paper/rec.npa';
24 $markerdat = "/share/users/huxs/work/tiger/cat/rec.npa";
25 $markerdat = $inf . '.npa';
26 print "From [$inf] to [$inf$outf.(dat|svg)]\n";
28 my %MVScaffolds = map {$_ => 1} qw(scaffold75 scaffold1458 scaffold188);
30 my %Stat;
32 sub getVal($) { # deprecated
33 my %dat = %{$_[0]};
34 my ($sum,$cnt,$max,$major)=(0,0,0,0);
35 my $mjcnt=0;
36 for my $k (keys %dat) {
37 $sum += $k * $dat{$k};
38 $cnt += $dat{$k};
39 $max = $k if $max < $k;
40 if ($k and $mjcnt<$dat{$k}) {
41 $major = $k;
42 $mjcnt = $dat{$k};
45 if ($cnt) {
46 return ($major,$max);
47 return $sum/$cnt;
48 } else {
49 return -1;
52 sub getCircles($) {
53 my %dat = %{$_[0]};
54 my @ret;
55 my ($max,$maxR) = (0,0);
56 for my $k (keys %dat) {
57 next unless $k;
58 my $r = int( 100 * sqrt($dat{$k}) ) / 100;
59 $r = 1.35 if $r > 1.35;
60 push @ret,[$k,$r];
61 if ($max < $k) {
62 $max = $k;
63 $maxR = $dat{$k};
66 unshift @ret,[$max,$maxR];
67 return @ret;
70 my (%ScaffoldLen,@ChrOrder0,%ChrID,%ChrColor);
71 open I,'<',$scaffnfo or die $!;
72 while (<I>) {
73 next if /^#/;
74 chomp;
75 my @items = split /\t/;
76 $ScaffoldLen{$items[0]} = $items[1];
77 $ChrID{$items[0]} = $items[2];
78 $ChrColor{$items[0]} = $items[3];
79 push @ChrOrder0,$items[0];
81 close I;
82 print "\n",scalar keys(%ScaffoldLen)," scaffold(s) Load.\n";
84 my %MarkerDat;
85 open I,'<',$markerdat or die $!;
86 while (<I>) {
87 next if /^#/;
88 chomp;
89 my @items = split /\t/;
90 die unless exists $ScaffoldLen{$items[1]};
91 my @tmp = split /[\/|]/,$items[7].'/'.$items[8]; # Aff,UnAff
92 push @{$MarkerDat{$items[1]}},[ $items[2],$items[9],int(0.5-1000*log($items[9])/log(10))/1000,$tmp[1]+$tmp[2] ]; # pos,p,lg(p) round to 0.001,SumMid(0 is red)
94 close I;
95 #ddx \%MarkerDat;
96 my (%OrderbyChr,%OrderedOnce,@DirectOrder,@notOrdered);
97 open I,'<',$inf.'.order' or warn $! and goto NULLORDERFILE;
98 while (<I>) {
99 next if /^#/;
100 chomp;
101 my ($scaff,$chr) = split /\t/ or next; # skip blank lines
102 next if exists $OrderedOnce{$scaff};
103 push @{$OrderbyChr{$chr}},$scaff;
104 push @DirectOrder,$scaff;
105 ++$OrderedOnce{$scaff};
107 close I;
108 NULLORDERFILE:
109 my $TotalLen=0;
110 #my @order = map {$_='scaffold'.$_} sort {$a<=>$b} map {s/^scaffold//;$_;} keys %MarkerDat;
111 my @order = @ChrOrder0;
112 for my $scaff (@order) {
113 next unless exists $MarkerDat{$scaff};
114 if (exists $OrderedOnce{$scaff}) {
115 $Stat{Len_Ordered} += $ScaffoldLen{$scaff} or die;
116 ++$Stat{Scaffold_Ordered};
117 } else {
118 push @notOrdered,$scaff;
119 $Stat{Len_notOrdered} += $ScaffoldLen{$scaff} or die;
120 ++$Stat{Scaffold_notOrdered};
122 $TotalLen += $ScaffoldLen{$scaff};
124 if ($Stat{Scaffold_notOrdered}) {
125 $Stat{AvgLen_notOrdered} = $Stat{Len_notOrdered} / $Stat{Scaffold_notOrdered};
127 if ($Stat{Scaffold_Ordered}) {
128 $Stat{AvgLen_Ordered} = $Stat{Len_Ordered} / $Stat{Scaffold_Ordered};
131 # ------ BEGIN PLOT --------
132 # 1in = 2.54cm = 25.4mm = 72pt = 12pc, 1pc=2.1167mm, 1pt=0.35278mm
133 my @color = qw(Red Purple Brown Navy Green Maroon Blue Teal);
134 my $Xrange = 500;
135 $Xrange = 1050;
136 my $Yrange = 320;
137 my $YmaxVal = 5;
138 $YmaxVal = 4;
139 my $ArrowLen = 20; # 16+4
140 my $axisTick = 4;
141 my $OutBorder = 24;
142 my $InBorder = 40;
143 my $Yextra = 200;
144 my $Xtotal = $Xrange + $ArrowLen + 2*$OutBorder;
145 my $Yitem = $Yrange + $ArrowLen + $InBorder + $Yextra;
146 my $FontSize = int($Xrange/40);
147 $FontSize = int($Yrange/25);
148 my $FontFamily = 'Arial';
150 my $perUnit = int($TotalLen/10); # 279.330936 M /10 = 27.933093 M
151 my $numlevel = int(log($perUnit)/log(10)); # 7
152 my $numSuflevel = int($numlevel/3); # 2
153 my $numSuf=( '', qw( K M G T P E Z Y ) )[$numSuflevel]; # M <---
154 $numSuflevel = 10 ** (3*$numSuflevel); # 1,000,000 <---
155 my $roundTo = 5 * (10 ** ($numlevel-1)); # 5e6
156 my $unit = $perUnit + (-$perUnit % $roundTo); # 30 M
157 my $countMax = int($TotalLen/$unit) + (($TotalLen%$unit)?1:0);
158 #print join(",",$TotalLen/$numSuflevel,$perUnit/$numSuflevel,$numlevel,$numSuf,$numSuflevel,$roundTo/$numSuflevel,$unit/$numSuflevel,$countMax),"\n";
159 my $BasepPx = 10*$unit/$Xrange;
161 my @Yticks;
162 for my $i (1 .. $YmaxVal) { # 0 will be shared with X-axie
163 my $pY = $Yrange - $i * ($Yrange/$YmaxVal);
164 push @Yticks,$pY;
167 my (%PlotDat,%PlotScaffRange,%PlotCandiDat);
168 my $start = 0;
169 for my $scaff (@notOrdered,@DirectOrder) { # Well, Ordered last to be far away from X-axie.
170 unless (exists $MarkerDat{$scaff}) {
171 warn "[!marker]$scaff\n";
172 ++$Stat{'Marker_Not_found'};
173 next;
175 my $maxlgp = 0;
176 for my $mditem (@{$MarkerDat{$scaff}}) {
177 my ($pos,$p,$lgp,$isFalse) = @$mditem;
178 my $posOchr = $start + $pos;
179 if ($posOchr < 0) {
180 $posOchr = 0;
181 ++$Stat{'Marker_Pos_Minus'};
182 } elsif ($pos > $ScaffoldLen{$scaff}) {
183 $posOchr = $start + $ScaffoldLen{$scaff};
184 ++$Stat{'Marker_Pos_Overflow'};
186 my $value = int(0.5+10*$posOchr/$BasepPx)/10; # 10 => 720 dpi for pt unit system, enough.
187 ++$PlotDat{$scaff}{$value}{$lgp};
188 ++$PlotCandiDat{$scaff}{$value}{$lgp} unless $isFalse;
189 $maxlgp = $lgp if $maxlgp < $lgp;
191 $PlotScaffRange{$scaff} = [ ( map {int(0.5+10*$_/$BasepPx)/10} ($start+1,$start+$ScaffoldLen{$scaff}) ),$maxlgp ];
192 $start += $ScaffoldLen{$scaff};
195 my $ChrCount = keys %PlotDat;
196 $ChrCount = 1;
197 my $Ytotal = $Yitem*$ChrCount - $InBorder + $ArrowLen + 2*$OutBorder;
199 open O,'>',$inf.$outf.'.svg' or die $!;
200 print O <<HEAD;
201 <?xml version="1.0"?>
202 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.2" baseProfile="tiny"
203 width="${Xtotal}pt" height="${Ytotal}pt" viewBox="0 0 $Xtotal $Ytotal">
204 <title>Plot $inf</title>
205 <!--
206 <rect x="0" y="0" width="$Xtotal" height="$Ytotal" fill="none" stroke="red" stroke-width="2" />
208 HEAD
210 print O <<DEF1;
211 <defs>
212 <g id="axis" stroke="black" font-size="$FontSize" font-family="$FontFamily" stroke-width="0" text-anchor="middle">
213 <polyline fill="none" points="-2,-4 0,-20 2,-4 0,-20
214 DEF1
215 for (@Yticks) {
216 print O " 0,$_ -$axisTick,$_ 0,$_ \n"
218 print O " 0,$Yrange\n";
220 for my $i (1 .. 10) {
221 my $x = $i*$Xrange/10;
222 print O ' ',$x,',',$Yrange,' ',$x,',',$Yrange+$axisTick,' ',$x,',',$Yrange,"\n";
224 print O ' ',$Xrange+$ArrowLen,',',$Yrange,' ',
225 $Xrange+$axisTick,',',$Yrange-2,' ',$Xrange+$axisTick,',',$Yrange+2,' ',
226 $Xrange+$ArrowLen,',',$Yrange,'" stroke-width="2"/>',"\n";
227 for my $i (1 .. $YmaxVal) {
228 my $y = $Yticks[$i-1];
229 print O <<TXTAX;
230 <text x="0" y="$y" dx="-20" dy="5" text-anchor="middle">$i</text>
231 <line x1="0" y1="$y" x2="$Xrange" y2="$y" stroke-width="0.5" stroke-dasharray="5,9"/>
232 TXTAX
234 for my $i (0 .. 10) {
235 my $x = $i*$Xrange/10;
236 my $l = $unit*$i/$numSuflevel;
237 $l .= " $numSuf" if $l;
238 print O <<TXT1;
239 <text x="$x" y="$Yrange" dy="20" text-anchor="middle">$l</text>
240 TXT1
242 print O <<DEF2;
243 </g>
244 </defs>
245 <g transform="translate($OutBorder,$OutBorder)" stroke-width="2" stroke="black" font-size="$FontSize" font-family="$FontFamily">
246 DEF2
248 my %maxCircles;
249 my $thisChrNo=0;
250 for my $chr ('-lg(p)') {
251 my $topY = $ArrowLen + $Yitem*$thisChrNo;
252 print O <<TXT2;
253 <g transform="translate(0,$topY)" stroke-width="1">
254 <use x="0" y="0" xlink:href="#axis" stroke-width="2"/>
255 <text x="8" y="-10" stroke-width="0">$chr</text>
256 TXT2
257 my $scaffcnt=0;
258 for my $scaff (@DirectOrder,@notOrdered) {
259 next unless exists $MarkerDat{$scaff};
260 my @Poses = sort {$a<=>$b} keys %{$PlotDat{$scaff}};
261 my $thiscolor = $color[$scaffcnt%scalar(@color)];
262 my ($pa,$pb,$maxlgp) = @{$PlotScaffRange{$scaff}};
263 # if (exists $MVScaffolds{$scaff}) {
264 # $thiscolor = 'red';
265 # } else {
266 # $thiscolor = 'navy';
268 $thiscolor = '#'.$ChrColor{$scaff};
269 print O ' <g stroke="',$thiscolor,'" fill="',$thiscolor,'" focusable = "true">',"\n <title>$scaff, max=$maxlgp</title>\n";
270 my $topestY = $Ytotal;
271 for my $pos (@Poses) {
272 my @Circles = getCircles($PlotDat{$scaff}{$pos});
273 my @CandiCircles = getCircles($PlotCandiDat{$scaff}{$pos}) if exists $PlotCandiDat{$scaff}{$pos};
274 my $t = shift @Circles;
275 shift @CandiCircles;
276 my $p = int($pos);
277 unless (exists $maxCircles{$p}) {
278 $maxCircles{$p} = $t;
279 } else {
280 if ($maxCircles{$p}->[0] < $t->[0]) {
281 $maxCircles{$p} = $t;
282 } elsif ($maxCircles{$p}->[0] == $t->[0] and $maxCircles{$p}->[1] < $t->[1]) {
283 $t->[1] += $maxCircles{$p}->[1];
284 $maxCircles{$p} = $t;
287 for (@Circles) {
288 my ($y,$r) = @$_;
289 my $Py = int(10*$Yrange*(1-$y/$YmaxVal))/10;
290 $topestY = $Py if $topestY > $Py;
291 #print "$y,$Py,$Yrange,$YmaxVal\n";
292 print O " <circle cx=\"$pos\" cy=\"$Py\" r=\"$r\" />\n";
294 for (@CandiCircles) {
295 my ($y,$r) = @$_;
296 my $Py = int(10*$Yrange*(1-$y/$YmaxVal))/10;
297 $topestY = $Py if $topestY > $Py;
298 #print "$y,$Py,$Yrange,$YmaxVal\n";
299 # 这里用于把最大值标红
300 #print O " <circle cx=\"$pos\" cy=\"$Py\" r=\"$r\" stroke=\"red\" fill=\"red\" />\n";
302 =pod
303 my ($pYmajor,$pYmax) = map {int(10*$Yrange*(1-$_/$YmaxVal))/10;} ($major,$max);
304 print O <<TXTL;
305 <circle cx="$pos" cy="$pYmajor" r="1" />
306 <circle cx="$pos" cy="$pYmax" r="0.5" />
307 TXTL
308 =cut
310 #my ($pa,$pb) = @{$PlotScaffRange{$scaff}};
311 print O <<TXTLB;
312 <line x1="$pa" y1="$Yrange" x2="$pb" y2="$Yrange" stroke-width="2"/>
313 </g>
314 TXTLB
315 print O " <text x=\"$Poses[0]\" y=\"",$topestY-12,"\" dy=\"5\" text-anchor=\"middle\" fill=\"navy\" stroke-width=\"0\">$scaff</text>\n" if exists $MVScaffolds{$scaff};
316 ++$scaffcnt;
318 # print O ' <polyline fill="none" stroke="gold" stroke-width="0.5" points="';
319 # for my $x (sort {$a<=>$b} keys %maxCircles) {
320 # my ($y,$s) = @{$maxCircles{$x}};
321 # next unless $y;
322 # $y = int(10*$Yrange*(1-$y/$YmaxVal))/10;
323 # print O "$x,$y ";
325 # print O "\" />\n </g>\n";
326 my ($xx,$yy)=(0,$Yrange + 30);
327 for my $id (@ChrOrder0) {
328 my $name = $ChrID{$id};
329 my $color = '#'.$ChrColor{$id};
330 my $tx = $xx + 17;
331 print O <<CHRBOX;
332 <rect x="$xx" y="$yy" rx="5" ry="5" width="15" height="15" style="fill:$color;stroke:black;stroke-width:1"/>
333 <text x="$tx" y="$yy" dy="12" stroke-width="0">Chr$name</text>
334 CHRBOX
335 $xx += 56;
336 if ($xx > $Xtotal - 70) {
337 $yy += 24;
338 $xx = 0;
342 print O " </g>\n";
343 # ++$thisChrNo;
346 print O " </g>\n</svg>\n";
347 =pod
348 print O "
349 </g>
350 <rect x=\"$OutBorder\" y=\"$OutBorder\" width=\"",$Xrange+$ArrowLen,"\" height=\"",$Ytotal-2*$OutBorder,"\" fill=\"none\" stroke=\"blue\" stroke-width=\"1\" />
351 <line x1=\"",$Xrange+$OutBorder,"\" y1=\"$OutBorder\" x2=\"",$Xrange+$OutBorder,"\" y2=\"",$Ytotal-$OutBorder,"\" stroke=\"blue\" stroke-width=\"1\"/>
352 </svg>
354 =cut
355 close O;
357 ddx \%Stat;
358 print commify($TotalLen),"\n";
359 __END__
360 perl -lane '$F[-5]=~s/\,$//;print join("\t",@F[0,1,-5],'f00')' /bak/seqdata/genomes/Felis_catus-6.2/chr.nfo > chr.inf
362 perl -F',' -lane "map {s/'//g;s/ /_/} @F;print join(\"\t\",@F);" ./tig2cat.csv > ./tig2cat.tsv
363 sort -t $'\t' -k 1,1 -k 2,2n -k 3,3n ./tig2cat.tsv > ./tig2cat.tsv.s
364 awk '{print "scaffold"$4"\tchr"$1}' tig2cat.tsv.s|uniq > tig2cat.order
366 ./plotscaf.pl rec16q20.npa15 tig2cats16q20s15