3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
8 use Data
::Dump
qw(ddx);
11 die "Usage: $0 <order file> <marker.rec> <outfile> <TRUE for 97&1457 only,also dot size>\n" if @ARGV<3;
17 my $scaffnfo = '/bak/seqdata/genomes/TigerRefGenome/chr.nfo';
18 #$scaffnfo = '/bak/seqdata/genomes/Felis_catus-6.2/chr.nfo';
20 print "From [O:$orderf][$markerdat] to [$outf.(dat|svg)]\n";
22 my %MVScaffolds = map {$_ => 1} qw(scaffold97 scaffold1457 scaffold91);
26 my %ResultZone = ('scaffold97' => -1, 'scaffold1457' => 1); # scaffold97 on '-' strand.
31 sub getVal
($) { # deprecated
33 my ($sum,$cnt,$max,$major)=(0,0,0,0);
35 for my $k (keys %dat) {
36 $sum += $k * $dat{$k};
38 $max = $k if $max < $k;
39 if ($k and $mjcnt<$dat{$k}) {
53 my $MaxCirclesR = 1.5;
57 my ($max,$maxR) = (0,0);
58 for my $k (keys %dat) {
60 my $r = int( 100 * sqrt($dat{$k}) ) / 100;
62 $r = $MaxCirclesR if $r > $MaxCirclesR;
69 unshift @ret,[$max,$maxR];
73 my (%ScaffoldLen,@ChrOrder0);
74 open I
,'<',$scaffnfo or die $!;
78 my @items = split /\t/;
79 $ScaffoldLen{$items[0]} = $items[1];
80 push @ChrOrder0,$items[0];
83 print "\n",scalar keys(%ScaffoldLen)," scaffold(s) Load.\n";
86 open I
,'<',$markerdat or die $!;
90 my @items = split /\t/;
92 next unless $ResultZone{$items[1]};
94 die unless exists $ScaffoldLen{$items[1]};
95 my @tmp = split /[\/|]/,$items[7].'/'.$items[8]; # Aff,UnAff
96 push @{$MarkerDat{$items[1]}},[ $items[2],$items[9],int(0.5-1000*log($items[9])/log(10))/1000,$tmp[1]+$tmp[2] ]; # pos,p,lg(p) round to 0.001,SumMid(0 is red)
101 # [8766, 0.3698, 0.432, 7],
102 # [33512, 0.02941, 1.532, 4],
103 # [34634, 0.4706, 0.327, 7],
104 # [36213, 0.03251, 1.488, 5],
105 # [6453045, 1, 0, 9],
107 my (%OrderbyChr,@ChrOrder,%OrderedOnce,@DirectOrder,@notOrdered);
108 my (@order,%Scaff2Chr);
109 open I,'<',$orderf or warn $! and goto NULLORDERFILE;
113 my ($scaff,$chr) = split /\t/ or next; # skip blank lines
114 next if exists $OrderedOnce{$scaff};
115 next if $type and !$ResultZone{$scaff}; # only those in %ResultZone
116 push @ChrOrder,$chr unless exists $OrderbyChr{$chr};
117 push @{$OrderbyChr{$chr}},$scaff;
118 push @DirectOrder,$scaff;
119 ++$OrderedOnce{$scaff};
120 $Scaff2Chr{$scaff} = $chr;
125 ### my %UsedChr = map {$_ => 0} @ChrOrder;
129 @order = map {$_='scaffold
'.$_} sort {$a<=>$b} map {s/^scaffold//;$_;} keys %MarkerDat;
133 for my $scaff (@order) {
134 next unless exists $MarkerDat{$scaff};
135 if (exists $OrderedOnce{$scaff}) {
136 $Stat{Len_Ordered} += $ScaffoldLen{$scaff} or die;
137 ++$Stat{Scaffold_Ordered};
139 push @notOrdered,$scaff;
140 $Stat{Len_notOrdered} += $ScaffoldLen{$scaff} or die;
141 ++$Stat{Scaffold_notOrdered};
143 $TotalLen += $ScaffoldLen{$scaff};
144 ### $UsedChr{ $Scaff2Chr{$scaff} } = 1 if $Scaff2Chr{$scaff}; # Well, assume the %ResultZone is mapped.
146 if ($Stat{Scaffold_notOrdered}) {
147 $Stat{AvgLen_notOrdered} = $Stat{Len_notOrdered} / $Stat{Scaffold_notOrdered};
149 if ($Stat{Scaffold_Ordered}) {
150 $Stat{AvgLen_Ordered} = $Stat{Len_Ordered} / $Stat{Scaffold_Ordered};
156 push @chrs,$_ if $UsedChr{$_};
162 push @ChrOrder,$CHR_UN unless $type;
164 # ------ BEGIN PLOT --------
165 # 1in = 2.54cm = 25.4mm = 72pt = 12pc, 1pc=2.1167mm, 1pt=0.35278mm
166 my @color = qw(Black Red Green Navy Blue Purple Orange Gray Maroon Teal Brown);
167 @color = qw(Black Brown #0F8B43 #3954A5 #199BCD #B2499B #EE7821 #686868); # #ED1924
170 $Xrange = 500 if $type;
171 # Y=340pt, X=980pt or 500pt
173 my $ArrowLen = 20; # 16+4
177 my $Xtotal = $Xrange + $ArrowLen + 2*$OutBorder;
178 my $Yitem = $Yrange + $ArrowLen + $InBorder;
179 my $FontSize = int($Yrange/16);
180 my $SmallFontSize = $FontSize-2;
181 my $FontFamily = 'Arial';
183 my $perUnit = int($TotalLen/10); # 279.330936 M /10 = 27.933093 M
184 my $numlevel = int(log($perUnit)/log(10)); # 7
185 my $numSuflevel = int($numlevel/3); # 2
186 my $numSuf=( '', qw( K M G T P E Z Y ) )[$numSuflevel]; # M <---
187 $numSuflevel = 10 ** (3*$numSuflevel); # 1,000,000 <---
188 my $roundTo = 5 * (10 ** ($numlevel-1)); # 5e6
189 my $unit = $perUnit + (-$perUnit % $roundTo); # 30 M
190 my $countMax = int($TotalLen/$unit) + (($TotalLen%$unit)?
1:0);
191 print "[!]PlotSize: ",join(",",$TotalLen/$numSuflevel,$perUnit/$numSuflevel,$numlevel,$numSuf,$numSuflevel,$roundTo/$numSuflevel,$unit/$numSuflevel,$countMax),"\n";
192 my $BasepPx = 10*$unit/$Xrange;
193 $BasepPx = $countMax*$unit/$Xrange; # Well, use $countMax instead of fixed 10
195 print "[!]TotalLen:$TotalLen, per10Unit:$perUnit, numlevel:$numlevel, numSuf:$numSuf=$numSuflevel, roundTo:$roundTo, unit:$unit, countMax:$countMax, BasepPx:$BasepPx\n";
198 for my $i ($Ymin .. $YmaxVal) { # 0 will be shared with X-axie, set $Ymin=1 if needed this.
199 my $pY = $Yrange - $i * ($Yrange/$YmaxVal);
203 my (%PlotDat,%PlotScaffRange,%PlotCandiDat);
205 for my $scaff (@DirectOrder,@notOrdered) { # Well, I prefer Ordered last to be far away from Y-axie, but they does not.
206 unless (exists $MarkerDat{$scaff}) {
207 warn "[!marker]$scaff\n" unless $type;
208 ++$Stat{'Marker_Not_found'};
212 if ($type && $ResultZone{$scaff} < 0) {
213 my @tmpDat = @
{$MarkerDat{$scaff}};
215 for my $mditem (@tmpDat) {
216 my ($pos,$p,$lgp,$isFalse) = @
$mditem;
219 my ($maxpos,$minpos) = (sort {$b<=>$a} keys %poses)[0,-1];
220 my $ToDeltra = $maxpos+$minpos;
221 $MarkerDat{$scaff} = [];
222 for my $mditem (@tmpDat) {
223 my ($pos,$p,$lgp,$isFalse) = @
$mditem;
224 push @
{$MarkerDat{$scaff}},[$ToDeltra-$pos,$p,$lgp,$isFalse];
227 for my $mditem (@
{$MarkerDat{$scaff}}) {
228 my ($pos,$p,$lgp,$isFalse) = @
$mditem;
229 my $posOchr = $start + $pos;
232 ++$Stat{'Marker_Pos_Minus'};
233 } elsif ($pos > $ScaffoldLen{$scaff}) {
234 $posOchr = $start + $ScaffoldLen{$scaff};
235 ++$Stat{'Marker_Pos_Overflow'};
237 my $value = int(0.5+10*$posOchr/$BasepPx)/10; # 10 => 720 dpi for pt unit system, enough.
238 ++$PlotDat{$scaff}{$value}{$lgp};
239 #++$PlotCandiDat{$scaff}{$value}{$lgp} unless $isFalse;
240 $maxlgp = $lgp if $maxlgp < $lgp;
242 $PlotScaffRange{$scaff} = [ ( map {int(0.5+10*$_/$BasepPx)/10} ($start+1,$start+$ScaffoldLen{$scaff}) ),$maxlgp ];
243 $start += $ScaffoldLen{$scaff};
246 my $ChrCount = keys %PlotDat;
248 my $Ytotal = $Yitem*$ChrCount - $InBorder + $ArrowLen + 2*$OutBorder;
250 open O
,'>',$outf.'.svg' or die $!;
252 <?xml version="1.0"?>
253 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.2" baseProfile="tiny"
254 width="${Xtotal}pt" height="${Ytotal}pt" viewBox="0 0 $Xtotal $Ytotal">
255 <title>Plot $markerdat [O:$orderf]</title>
257 <rect x="0" y="0" width="$Xtotal" height="$Ytotal" fill="none" stroke="red" stroke-width="2" />
263 <g id="axis" stroke="black" font-size="$FontSize" font-family="$FontFamily" stroke-width="0" text-anchor="middle">
264 <polyline fill="none" points="-2,-4 0,-20 2,-4 0,-20
267 print O " 0,$_ -$axisTick,$_ 0,$_ \n"
269 print O " 0,$Yrange\n";
271 my $XaxisColor = 'white';
274 $XaxisColor = 'black';
277 $axisTick=1; # remove X ticks.
279 for my $i (1 .. $XaxisCount) {
280 my $x = $i*$Xrange/$XaxisCount;
281 print O ' ',$x,',',$Yrange,' ',$x,',',$Yrange+$axisTick,' ',$x,',',$Yrange,"\n";
283 print O ' ',$Xrange+$ArrowLen,',',$Yrange,' ',
284 $Xrange+$axisTick,',',$Yrange-2,' ',$Xrange+$axisTick,',',$Yrange+2,' ',
285 $Xrange+$ArrowLen,',',$Yrange,'" stroke-width="2"/>',"\n";
286 for my $i ($Ymin .. $YmaxVal) {
287 my $y = $Yticks[$i-$Ymin];
289 <text x="0" y="$y" dx="-20" dy="5" text-anchor="middle">$i</text>
290 <line x1="0" y1="$y" x2="$Xrange" y2="$y" stroke-width="0.5" stroke-dasharray="5,9" stroke="$XaxisColor"/>
294 for my $i (0 .. $XaxisCount) {
295 my $x = $i*$Xrange/$XaxisCount;
296 my $l = ($countMax/$XaxisCount) * $unit*$i/$numSuflevel;
297 $l .= " $numSuf" if $l;
299 <text x="$x" y="$Yrange" dy="20" text-anchor="middle" font-size="$SmallFontSize" fill="$XaxisColor">$l</text>
306 <g transform="translate($OutBorder,$OutBorder)" stroke-width="2" stroke="black" font-size="$FontSize" font-family="$FontFamily">
311 for my $chr ('-lg(p)') {
312 my $topY = $ArrowLen + $Yitem*$thisChrNo;
314 <g transform
="translate(0,$topY)" stroke
-width
="1">
315 <use x
="0" y
="0" xlink
:href
="#axis" stroke
-width
="2"/>
316 <text x
="8" y
="-10" stroke
-width
="0">$chr</text
>
319 for my $chr ( @ChrOrder ) {
320 my ($pChrA,$pChrB)=($Xrange,0);
321 my @scaffs; # @DirectOrder,@notOrdered
322 if ($chr ne $CHR_UN) { # 'UN'
323 @scaffs = @
{$OrderbyChr{$chr}};
325 @scaffs = @notOrdered;
327 my $thiscolor = $color[$chrcnt%scalar(@color)];
328 print O
' <g stroke="',$thiscolor,'" fill="',$thiscolor,'" focusable = "true">',"\n <title>$chr</title>\n";
329 for my $scaff ( @scaffs ) {
330 next unless exists $MarkerDat{$scaff};
331 my @Poses = sort {$a<=>$b} keys %{$PlotDat{$scaff}};
332 #my $thiscolor = $color[$scaffcnt%scalar(@color)];
333 my ($pa,$pb,$maxlgp) = @
{$PlotScaffRange{$scaff}};
334 $pChrA = $pa if $pChrA > $pa;
335 $pChrB = $pb if $pChrB < $pb;
337 if (exists $MVScaffolds{$scaff}) {
340 $thiscolor = $color[$chrcnt%scalar(@color)];
342 # $thiscolor = 'black';
344 print O
' <g stroke="',$thiscolor,'" fill="',$thiscolor,'" focusable = "true">',"\n <title>$scaff, max=$maxlgp</title>\n";
345 my $topestY = $Ytotal;
346 for my $pos (@Poses) {
347 my @Circles = getCircles
($PlotDat{$scaff}{$pos});
348 #my @CandiCircles = getCircles($PlotCandiDat{$scaff}{$pos}) if exists $PlotCandiDat{$scaff}{$pos};
349 my $t = shift @Circles;
350 #shift @CandiCircles;
352 unless (exists $maxCircles{$p}) {
353 $maxCircles{$p} = $t;
355 if ($maxCircles{$p}->[0] < $t->[0]) {
356 $maxCircles{$p} = $t;
357 } elsif ($maxCircles{$p}->[0] == $t->[0] and $maxCircles{$p}->[1] < $t->[1]) {
358 $t->[1] += $maxCircles{$p}->[1];
359 $maxCircles{$p} = $t;
367 $r *= 2; # make it bigger
369 my $Py = int(10*$Yrange*(1-$y/$YmaxVal))/10;
370 $topestY = $Py if $topestY > $Py;
371 #print "$y,$Py,$Yrange,$YmaxVal\n";
372 #print O " <circle cx=\"$pos\" cy=\"$Py\" r=\"$r\" />\n";
373 print O
" <rect x=\"$pos\" y=\"$Py\" width=\"$r\" height=\"$r\" />\n";
376 for (@CandiCircles) {
378 my $Py = int(10*$Yrange*(1-$y/$YmaxVal))/10;
379 $topestY = $Py if $topestY > $Py;
380 #print "$y,$Py,$Yrange,$YmaxVal\n";
381 print O " <circle cx=\"$pos\" cy=\"$Py\" r=\"$r\" stroke=\"red\" fill=\"red\" />\n";
385 #my ($pa,$pb) = @{$PlotScaffRange{$scaff}};
387 if (exists $MVScaffolds{$scaff} or $type) {
388 print O
" <text x=\"$Poses[0]\" y=\"",$topestY-$FontSize,"\" dy=\"5\" text-anchor=\"middle\" fill=\"navy\" stroke-width=\"0\">$scaff</text>\n";
394 print O
" <text x=\"",($pChrA+$pChrB)/2,"\" y=\"",$Yrange+$SmallFontSize+(1+$SmallFontSize)*($chrcnt%2),
395 "\" dy=\"5\" text-anchor=\"middle\" stroke-width=\"0\" font-size=\"$SmallFontSize\">$chr</text>\n"; # one long line will break SyntaxHighlight ...
397 <line x1="$pChrA" y1="$Yrange" x2="$pChrB" y2="$Yrange" stroke-width="2"/>
400 ++$chrcnt unless $type;
402 # print O ' <polyline fill="none" stroke="gold" stroke-width="0.5" points="';
403 # for my $x (sort {$a<=>$b} keys %maxCircles) {
404 # my ($y,$s) = @{$maxCircles{$x}};
406 # $y = int(10*$Yrange*(1-$y/$YmaxVal))/10;
409 # print O "\" />\n </g>\n";
414 print O
" </g>\n</svg>\n";
418 <rect x=\"$OutBorder\" y=\"$OutBorder\" width=\"",$Xrange+$ArrowLen,"\" height=\"",$Ytotal-2*$OutBorder,"\" fill=\"none\" stroke=\"blue\" stroke-width=\"1\" />
419 <line x1=\"",$Xrange+$OutBorder,"\" y1=\"$OutBorder\" x2=\"",$Xrange+$OutBorder,"\" y2=\"",$Ytotal-$OutBorder,"\" stroke=\"blue\" stroke-width=\"1\"/>
426 print commify
($TotalLen),"\n";
430 perl
-F
',' -lane
"map {s/'//g;s/ /_/} @F;print join(\"\t\",@F);" ./tig2cat.csv | awk '{if(NR>1)print}' > ./tig2cat
.tsv
431 sort -t
$'\t' -k
1,1 -k
2,2n
-k
3,3n
./tig2cat.tsv > ./tig2cat
.tsv
.s
432 awk
'{print "scaffold"$4"\tchr"$1}' tig2cat
.tsv
.s
|uniq
> tig2cat
.order
434 ./plotscaf
.pl rec16q20
.npa15 tig2cats16q20s15
438 zcat tig2cat
.csv
.gz
| perl
-F
',' -lane
"map {s/'//g;s/ /_/} @F;print join(\"\t\",@F);" | awk
'{if(NR>1)print}' > tig2cat
.tsv
439 sort -t
$'\t' -k
1,1 -k
2,2n
-k
3,3n
./tig2cat.tsv > ./tig2cat
.tsv
.s
440 awk
'{print "scaffold"$4"\tchr"$1}' tig2cat
.tsv
.s
|uniq
> tig2cat
.order
442 perl plotscaf
.pl tig2cat
.order sw000
-17R
.REC
.out
.rec
.npa sw000
443 ./plotscaf
.pl tig2cat
.order sw210
-17R
.REC
.out
.rec
.npa sw210ts
1
444 ./plotscaf
.pl tig2cat
.order sw210
-17R
.REC
.out
.rec
.npa sw210tsn
2.5