new file: cell2loc.py
[GalaxyCodeBases.git] / perl / bsvf / plotblock.pl
blob82355a53a70ece7920cca2d40ddbca56fa78d15d
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump qw(ddx);
6 die "Usage: $0 <block file>\n" if @ARGV<1;
8 my ($in) = @ARGV;
9 mkdir '/tmp/plots';
10 mkdir './plots';
12 $in = './example18/simVir4_grep/blocks.txt.gz';
13 my $chrOnly = 'chr18';
14 my $viruschr = 'gi|59585|emb|X04615.1|';
16 my $IMG_Width = 1200;
17 my $IMG_Height = 900;
18 my $IMG_Boder = 50;
20 my $Paint_Width = $IMG_Width - 2*$IMG_Boder;
21 my $Paint_Height = $IMG_Height - 2*$IMG_Boder;
22 my $Stroke_Width = $Paint_Width/240;
24 sub openFH($) {
25 my $inf = $_[0];
26 my $FH;
27 if ($inf =~ /\.gz$/i) {
28 open $FH,'-|',"gzip -dc $inf" or die "Error opening $inf: $!\n";
29 } elsif ($inf =~ /\.xz$/i) {
30 open $FH,'-|',"xz -dc $inf" or die "Error opening $inf: $!\n";
31 } elsif ($inf =~ /\.bz2$/i) {
32 open $FH,'-|',"bzip2 -dc $inf" or die "Error opening $inf: $!\n";
33 } else {
34 open $FH,'<',$inf or die "Error opening $inf: $!\n";
36 return $FH;
39 my $fhin = openFH($in);
40 my @cache=();
41 my ($id,$chr,$range,$cnt,$vchr,$vrange,$vcnt);
43 sub getmaxkey($) {
44 my $inh = $_[0];
45 my @keys = sort { $inh->{$b} <=> $inh->{$a} } keys %{$inh};
46 return $keys[0];
49 sub CIAGRdepth($$$$) {
50 my ($CIAGR,$leftPos,$DatHash,$SimInfo) = @_;
51 my $curPos = $leftPos;
52 my (%LR,$ret);
53 while ($CIAGR =~ /(\d+)(\D)/g) {
54 if ($2 eq 'M') {
55 my $end = $curPos + $1 -1;
56 for ($curPos .. $end) {
57 ++$DatHash->{$_};
58 if (defined $SimInfo) {
59 if ($_ >= $$SimInfo[0] and $_ < $$SimInfo[1]) {
60 ++$LR{'L'};
61 } elsif ($_ >= $$SimInfo[1] and $_ <= $$SimInfo[2]) {
62 ++$LR{'R'};
66 $curPos = $end +1;
67 } elsif ($2 eq 'S') {
68 $curPos += $1;
69 } elsif ($2 eq 'D') {
70 $curPos += $1;
71 } else {
72 die "[x]CIAGR[$1 $2] not allowed.\n" if $2 ne 'I';
75 if (%LR) {
76 $ret = getmaxkey(\%LR);
78 return $ret;
81 sub getdepth($) {
82 my ($readsRef)=@_;
83 my (%HumDepth,%VirusDepth,%sEdges,%pEdges,%cpItemCache);
84 %pEdges = ('LL'=>0, 'RR'=>0);
85 my (%HumDep,%VirDep,%HumCut,%HumLeft,%HumRight,%VirStrand,%VirLeft,%VirRight);
86 my $doflag = 0;
87 for (@$readsRef) {
88 my ($qname,$flag,$rname,$pos,$mapq,$CIAGR,undef,undef,undef,undef,undef,@opts) = split /\t/;
89 $qname =~ /_Ref_(\d+)_(\d+)_(\d+)_Vir_([+-])_(\d+)_(\d+)_/ or die;
90 ++$HumLeft{$1};
91 ++$HumCut{$2};
92 ++$HumRight{$3};
93 ++$VirStrand{$4};
94 ++$VirLeft{$5};
95 ++$VirRight{$6};
96 my @XAs = grep(/^[SX]A:Z:/,@opts);
97 #print "+++ @XAs\n";
98 my @t = grep(/\b[fr](\Q$chrOnly\E|\Q$viruschr\E),/,@XAs);
99 if (scalar @t>=0) {
100 $doflag = 1;
101 #print "--- $doflag, @t\n";
102 $cpItemCache{"$qname\t$rname\t$pos"} = $CIAGR;
105 return unless $doflag; # in fact, all items have $doflag==1;
106 my ($humLeft,$humCut,$humRight,$virStrand,$virLeft,$virRight);
107 $humLeft = getmaxkey(\%HumLeft);
108 $humCut = getmaxkey(\%HumCut);
109 $humRight = getmaxkey(\%HumRight);
110 $virStrand = getmaxkey(\%VirStrand);
111 $virLeft = getmaxkey(\%VirLeft);
112 $virRight = getmaxkey(\%VirRight);
113 my $SimInfo = [$humLeft,$humCut,$humRight,$virStrand,$virLeft,$virRight];
114 print ">>> $humLeft,$humCut,$humRight,$virStrand,$virLeft,$virRight $doflag:\n";
115 for (@$readsRef) {
116 my ($qname,$flag,$rname,$pos,$mapq,$CIAGR,$MRNM,$MPOS,undef,undef,undef,@opts) = split /\t/;
117 my ($LR1,$LR2,@itemsEdges,@itempEdges);
118 $MRNM = $rname if $MRNM eq '=';
119 if ($rname eq $chrOnly) {
120 $LR1 = CIAGRdepth($CIAGR,$pos,\%HumDepth,$SimInfo);
121 next unless defined $LR1;
122 } else {
123 $LR1 = 'V';
124 CIAGRdepth($CIAGR,$pos,\%VirusDepth,undef);
126 if (exists $cpItemCache{"$qname\t$MRNM\t$MPOS"}) {
127 my $MCIAGR = $cpItemCache{"$qname\t$MRNM\t$MPOS"};
128 if ($MRNM eq $chrOnly) {
129 $LR2 = CIAGRdepth($MCIAGR,$MPOS,\%HumDepth,$SimInfo);
130 } else {
131 $LR2 = 'V';
132 CIAGRdepth($MCIAGR,$MPOS,\%VirusDepth,undef);
134 if (defined $LR2) {
135 my @t = sort ($LR1,$LR2);
136 ++$pEdges{join('',@t)};
139 my @XAHit;
140 for (@opts) {
141 if (/^[SX]A:Z:([\w\,\;\|\.\+\-]+)$/) {
142 push @XAHit,split(/;/,$1);
145 if (@XAHit) {
146 #print "$qname\n";
147 for (@XAHit) {
148 my ($XAchr,$XApos,$XAcigar,$XAnm) = split /,/;
149 if ($XAchr =~ /^[fr](\Q$chrOnly\E|\Q$viruschr\E)$/) {
150 #print "$1\t$XAchr,$XApos,$XAcigar,$XAnm\n";
151 my $LR3;
152 if ($1 eq $chrOnly) {
153 $LR3 = CIAGRdepth($XAcigar,abs($XApos),\%HumDepth,$SimInfo) if $LR1 eq 'V';
154 } elsif ($1 eq $viruschr and $LR1 ne 'V') {
155 $LR3 = 'V';
156 CIAGRdepth($XAcigar,abs($XApos),\%VirusDepth,undef);
158 if (defined $LR3) {
159 my @t = sort ($LR1,$LR3);
160 push @itemsEdges,join('',@t);
164 if (@itemsEdges) {
165 my $weight = 1 / @itemsEdges;
166 $sEdges{$_} += $weight for @itemsEdges;
170 if (exists $sEdges{'LV'} and exists $sEdges{'RV'}) {
171 ddx [\%sEdges,\%pEdges];
172 return (\%HumDepth,\%VirusDepth,\%sEdges,\%pEdges,$SimInfo);
173 } else {
174 warn ".\n";
175 return;
179 sub doplot($$$$$$) {
180 my ($fh,$HumDepth,$VirusDepth,$sEdges,$pEdges,$SimInfo) = @_;
181 my ($MaxDepth,$HumMin,$HumMax,$VirMin,$VirMax)=(0,@$SimInfo[0,2,4,5]);
182 my @HumCovered = sort {$a <=> $b} keys %{$HumDepth};
183 my @VirCovered = sort {$a <=> $b} keys %{$VirusDepth};
184 for (@HumCovered) {
185 $MaxDepth = $HumDepth->{$_} if $MaxDepth < $HumDepth->{$_};
187 for (@VirCovered) {
188 $MaxDepth = $VirusDepth->{$_} if $MaxDepth < $VirusDepth->{$_};
190 $HumMin = $HumCovered[0] if $HumMin > $HumCovered[0];
191 $HumMax = $HumCovered[-1] if $HumMax < $HumCovered[-1];
192 $VirMin = $VirCovered[0] if $VirMin > $VirCovered[0];
193 $VirMax = $VirCovered[-1] if $VirMax < $VirCovered[-1];
194 my $HumRange = $HumMax - $HumMin +1;
195 my $VirRange = $VirMax - $VirMin +1;
196 my $RatioH = $Paint_Width / $HumRange;
197 my $RatioV = $Paint_Width / $VirRange;
198 my $XRatio = ($RatioH < $RatioV)?$RatioH:$RatioV;
199 my @Cuts;
200 for (@$SimInfo[0,1,2]) {
201 push @Cuts,($_ - $HumMin);
203 for (@$SimInfo[4,5]) {
204 push @Cuts,($_ - $VirMin);
206 print "Hum:$HumCovered[0] - $HumCovered[-1]\nVir:$VirCovered[0] - $VirCovered[-1]\n@$SimInfo\t@Cuts\n";
207 $_ *= $XRatio for @Cuts;
208 my @BarHeight = ($Paint_Height*3/8, $Paint_Height*5/8, $Paint_Height*7/16);
209 my $YRatio = ($Paint_Height*3/8)/$MaxDepth;
210 my $HumLeft = ($Paint_Width - $HumRange*$XRatio)/2;
211 my $VirLeft = ($Paint_Width - $VirRange*$XRatio)/2;
212 print $fh '<line stroke="gray" stroke-width="',$Stroke_Width,'" x1="',$VirLeft+$IMG_Boder,'" y1="',$IMG_Boder+$BarHeight[0]+$Stroke_Width/2,'" x2="',$VirLeft+$IMG_Boder+$VirRange*$XRatio,'" y2="',$IMG_Boder+$BarHeight[0]+$Stroke_Width/2,'"/>',"\n";
213 print $fh '<line stroke="gray" stroke-width="',$Stroke_Width,'" x1="',$HumLeft+$IMG_Boder,'" y1="',$IMG_Boder+$BarHeight[1]-$Stroke_Width/2,'" x2="',$HumLeft+$IMG_Boder+$HumRange*$XRatio,'" y2="',$IMG_Boder+$BarHeight[1]-$Stroke_Width/2,'"/>',"\n";
215 print $fh '<line stroke="firebrick" stroke-width="',$Stroke_Width,'" x1="',$Cuts[3]+$VirLeft+$IMG_Boder,'" y1="',$IMG_Boder+$BarHeight[0]+$Stroke_Width,'" x2="',$Cuts[4]+$VirLeft+$IMG_Boder,'" y2="',$IMG_Boder+$BarHeight[0]+$Stroke_Width,'"/>',"\n";
216 print $fh '<line stroke="forestgreen" stroke-width="',$Stroke_Width,'" x1="',$Cuts[0]+$HumLeft+$IMG_Boder,'" y1="',$IMG_Boder+$BarHeight[1]-$Stroke_Width,'" x2="',$Cuts[1]+$HumLeft+$IMG_Boder,'" y2="',$IMG_Boder+$BarHeight[1]-$Stroke_Width,'"/>',"\n";
217 print $fh '<line stroke="mediumblue" stroke-width="',$Stroke_Width,'" x1="',$Cuts[1]+$HumLeft+$IMG_Boder,'" y1="',$IMG_Boder+$BarHeight[1]-$Stroke_Width,'" x2="',$Cuts[2]+$HumLeft+$IMG_Boder,'" y2="',$IMG_Boder+$BarHeight[1]-$Stroke_Width,'"/>',"\n";
219 print $fh "<defs>\n";
220 print $fh '<path id="pLV" d="M',$Cuts[0]+$HumLeft+$IMG_Boder+$Stroke_Width/2,',',$IMG_Boder+$BarHeight[1]-2*$Stroke_Width,' Q',$HumLeft+$IMG_Boder,',',$IMG_Boder+$BarHeight[2],' ',$Cuts[3]+$VirLeft+$IMG_Boder,',',$IMG_Boder+$BarHeight[0]+2*$Stroke_Width,'"
221 fill="none" stroke="limegreen" stroke-width="',$Stroke_Width,'" />',"\n";
222 print $fh '<path id="sLV" d="M',$VirLeft+$IMG_Boder+($Cuts[4]+15*$Cuts[3])/16,',',$IMG_Boder+$BarHeight[0]+2*$Stroke_Width,' Q',$HumLeft+$IMG_Boder+$Cuts[1],',',$IMG_Boder+$BarHeight[2],' ',$Cuts[1]+$HumLeft+$IMG_Boder-$Stroke_Width/2,',',$IMG_Boder+$BarHeight[1]-2*$Stroke_Width,'"
223 fill="none" stroke="limegreen" stroke-width="',$Stroke_Width,'" />',"\n";
225 print $fh '<path id="sRV" d="M',$Cuts[1]+$HumLeft+$IMG_Boder+$Stroke_Width/2,',',$IMG_Boder+$BarHeight[1]-2*$Stroke_Width,' Q',$HumLeft+$IMG_Boder+$Cuts[1],',',$IMG_Boder+$BarHeight[2],' ',$VirLeft+$IMG_Boder+$Cuts[3]+($Cuts[4]-$Cuts[3])*15/16,',',$IMG_Boder+$BarHeight[0]+2*$Stroke_Width,'"
226 fill="none" stroke="royalblue" stroke-width="',$Stroke_Width,'" />',"\n";
227 print $fh '<path id="pRV" d="M',$Cuts[4]+$VirLeft+$IMG_Boder,',',$IMG_Boder+$BarHeight[0]+2*$Stroke_Width,' Q',$HumLeft+$IMG_Boder+$Cuts[2],',',$IMG_Boder+$BarHeight[2],' ',$Cuts[2]+$HumLeft+$IMG_Boder-$Stroke_Width/2,',',$IMG_Boder+$BarHeight[1]-2*$Stroke_Width,'"
228 fill="none" stroke="royalblue" stroke-width="',$Stroke_Width,'" />',"\n";
229 print $fh "</defs>\n<use xlink:href=\"#pLV\"/>\n<use xlink:href=\"#sLV\"/>\n<use xlink:href=\"#pRV\"/>\n<use xlink:href=\"#sRV\"/>\n";
230 print $fh '<text text-anchor="middle" dominant-baseline="text-before-edge" font-family="Verdana" font-size="20">',"\n";
231 print $fh "<textPath startOffset=\"50%\" xlink:href=\"#pLV\">pLV: ${$pEdges}{'LV'}</textPath>\n";
232 print $fh "<textPath startOffset=\"50%\" xlink:href=\"#sLV\">sLV: ${$sEdges}{'LV'}</textPath>\n";
233 print $fh "<textPath startOffset=\"50%\" xlink:href=\"#pRV\">pRV: ${$pEdges}{'RV'}</textPath>\n";
234 print $fh "<textPath startOffset=\"50%\" xlink:href=\"#sRV\">sRV: ${$sEdges}{'RV'}</textPath>\n";
235 print $fh '</text>',"\n";
236 print $fh '<text x="',0.5*($Cuts[0]+$Cuts[1])+$HumLeft+$IMG_Boder,'" y="',$IMG_Boder+$BarHeight[1]-2*$Stroke_Width,'" font-family="Verdana" font-size="20" text-anchor="middle" alignment-baseline="text-after-edge">pLL: ',${$pEdges}{'LL'},'</text>',"\n";
237 print $fh '<text x="',0.5*($Cuts[1]+$Cuts[2])+$HumLeft+$IMG_Boder,'" y="',$IMG_Boder+$BarHeight[1]-2*$Stroke_Width,'" font-family="Verdana" font-size="20" text-anchor="middle" alignment-baseline="text-after-edge">pRR: ',${$pEdges}{'RR'},'</text>',"\n";
238 print $fh '<text x="',0.5*($Cuts[3]+$Cuts[4])+$VirLeft+$IMG_Boder,'" y="',$IMG_Boder+$BarHeight[0]+$Stroke_Width,'" font-family="Verdana" font-size="20" text-anchor="middle" alignment-baseline="text-before-edge">pVV: ',${$pEdges}{'VV'},'</text>',"\n";
240 for my $bp (@VirCovered) {
241 my $depth = $VirusDepth->{$bp};
242 my $x = ($bp - $VirMin +0.4) * $XRatio +$VirLeft+$IMG_Boder;
243 #print "$bp:$depth -> $x\n";
244 print $fh '<line stroke="red" stroke-width="',$XRatio*0.8,'" x1="',$x,'" y1="',$IMG_Boder+$BarHeight[0],'" x2="',$x,'" y2="',$IMG_Boder+$BarHeight[0]-$depth*$YRatio,'"/>'," $bp\n";
246 #print "@VirCovered\n";
247 for my $bp (@HumCovered) {
248 my $depth = $HumDepth->{$bp};
249 my $x = ($bp - $HumMin +0.4) * $XRatio +$HumLeft+$IMG_Boder;
250 #print "$bp:$depth -> $x\n";
251 print $fh '<line stroke="blue" stroke-width="',$XRatio*0.8,'" x1="',$x,'" y1="',$IMG_Boder+$BarHeight[1],'" x2="',$x,'" y2="',$IMG_Boder+$BarHeight[1]+$depth*$YRatio,'"/>'," $bp\n";
255 while (<$fhin>) {
256 chomp;
257 if (/^\[(B\d+)\]/) {
258 my ($HumDepth,$VirusDepth,$sEdges,$pEdges,$SimInfo);
259 if (defined $id and defined $vchr) {
260 #print "[$id]\t$chr,$range,$cnt\t$vchr,$vrange,$vcnt\n",join("\n",@cache),"\n\n\n";
261 warn "> $id - $1\n";
262 ($HumDepth,$VirusDepth,$sEdges,$pEdges,$SimInfo) = getdepth(\@cache);
263 if (defined $HumDepth) {
264 my $fh;
265 open $fh,'>',"./plots/$id.svg";
266 print $fh '<?xml version="1.0"?>
267 <svg width="',$IMG_Width,'" height="',$IMG_Height,'" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
269 doplot($fh,$HumDepth,$VirusDepth,$sEdges,$pEdges,$SimInfo);
270 print $fh "</svg>\n";
271 close $fh;
273 if ($id eq 'B1553') {
274 #ddx $VirusDepth;
275 #die "$id\n";
278 $id = $1;
279 @cache = ();
280 $chr='';
281 $vchr=undef;
282 } elsif (/^HostRange=([\w\:\-\,\|\.]+)$/) {
283 my $t = $1;
284 $id = undef if $t =~ /,/;
285 ($chr,$range,$cnt) = split /:/,$t;
286 $id = undef if $chr ne $chrOnly;
287 } elsif (/^VirusRange=([\w\:\-\,\|\.]+)$/) {
288 my $t = $1;
289 $id = undef if $t =~ /,/;
290 ($vchr,$vrange,$vcnt) = split /:/,$t;
291 $id = undef if $vchr ne $viruschr;
292 } elsif (/^SamFS=/) {
293 next;
294 } else {
295 push @cache,$_ unless /^$/;