new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / olcgv.pl
blob00dec01259069ef689ed409d0e3dfca7a3d51b0f
1 #!/bin/env perl
2 use strict;
3 use warnings;
5 die "Usage: $0 <section_len> <f/r patten> <nodes_overlap> [collapse_repeats] [out_prefix]\n" if @ARGV < 3;
6 my ($SecLen,$FRpatten,$OverlapNodes,$Collapse,$out)=@ARGV;
7 $out='olc' unless $out;
9 #my @Seq=qw/1 2 3 4 A B C D 5 6 7 8 A B C D 9 10 11 12 D C B A 13 14 15 16/;
10 #my @Seq=qw/1 2 3 A B C 4 5 6 A B C 7 8 9 C B A 10 11 12/;
11 my @Seq;
12 die "[x] section_len is [2,26].\n" if $SecLen<2 or $SecLen>26;
13 $FRpatten=lc $FRpatten;
14 my $u=0;
15 push @Seq,++$u for (1..$SecLen);
16 for (split //,$FRpatten) {
17 if ($_ eq 'f') {
18 push @Seq,chr(64+$_) for (1..$SecLen);
19 } elsif ($_ eq 'r') {
20 push @Seq,chr(65+$SecLen-$_) for (1..$SecLen);
22 push @Seq,++$u for (1..$SecLen);
24 my $Seq=join('-',@Seq);
25 my $filename="$out.$FRpatten${SecLen}o$OverlapNodes";
26 $filename .= 'c' if $Collapse;
27 print "Seq: $Seq\nOut: $filename.{gv,png}\n";
29 my @U=grep(/^\d+$/,@Seq);
30 my @R=grep(/[^\d]/,@Seq);
31 my %t;
32 ++$t{$_} for @R;
33 @R=sort keys %t;
34 my $RepeatCount=(values %t)[0];
35 for (values %t) {
36 $RepeatCount=$_ if $RepeatCount>$_;
38 %t=();
39 #print "[$_] " for @seq;
41 sub getName($) {
42 my $tag=$_[0];
43 return [$tag] if $Collapse;
44 if ($tag =~ /^\d+$/) {
45 return [$tag];
46 #return ["u$tag"];
47 } else {
48 my @t;
49 #push @t,"r${tag}$_" for (1..$RepeatCount);
50 push @t,"${tag}$_" for (1..$RepeatCount);
51 return \@t;
55 open O,'>',$filename.'.gv' or die "$!";
56 print O <<HEAD;
57 graph "OLC" {
58 \trankdir=LR; splines=true; overlap=false; fontname = "Arial"; fontsize=20; dpi=180;
59 \tnode [fontname = "Arial", fontsize=22];
60 \tgraph [ label = "OLC plot of $RepeatCount Repeats with Overlap=$OverlapNodes\\nSeq: $Seq" ];
61 HEAD
62 #print O "\tnode [shape = ellipse]; ";
63 #print O " @{&getName($_)}" for (@R);
64 #print O ";\n\tnode [shape = box];\n";
65 unless ($Collapse) {
66 #print O "\t{rank=same; ";
67 #print O "\t{rank=same; @{&getName($_)} ;}\n" for (@R);
68 #print O ";}\n";
69 for my $i (1..$RepeatCount) {
70 print O "\tsubgraph clusterR$i { label=\"\"; style=filled; color=gray95; ";
71 #my @tr;
72 for my $r (@R) {
73 print O " $r$i";
74 #push @tr,"$r$i";
76 #print O ";}; ",join(' -- ',@tr),";};\n";
77 print O "; };\n";
79 } else {
80 print O "\tsubgraph clusterR { label=\"\"; style=filled; color=gray95; ",join(' ',@R),";};\n";
82 print O "\tnode [shape = box];\n";
83 my $i=0;
84 while (@U) {
85 my @t;
86 ++$i;
87 push @t,shift(@U) for (1..$SecLen);
88 print O "\tsubgraph clusterU$i { label=\"\"; style=filled; color=aliceblue; ",join(' ',@t),"; };\n";
91 my (%Edges,%NodeLabel,%NodeLabelexists);
92 for (my $i=0;$i<$#Seq;$i++) {
93 #print "[$Seq[$i]],",join(",",@{&getName($Seq[$i])}),"\n";
94 for my $a (@{&getName($Seq[$i])}) {
95 for my $j (1..$OverlapNodes) {
96 if ($i+$j<=$#Seq) {
97 push @{$Edges{$a}},@{&getName($Seq[$i+$j])};
98 $NodeLabel{$a} .= '-'.$Seq[$i+$j] unless exists $NodeLabelexists{$a}{$Seq[$i+$j]};
99 ++$NodeLabelexists{$a}->{$Seq[$i+$j]};
104 print O "\t$_ [label=\"${_}$NodeLabel{$_}\"];\n" for sort keys %NodeLabel;
106 sub getStr($$) {
107 my ($a,$b)=sort @_;
108 my $v=0.1;
109 if ("$a$b" =~ /^\d+$/) {
110 $v=10;
111 } elsif ($a !~ /^\d+$/ and $b !~ /^\d+$/) {
112 $v=1;
114 return ["$a -- $b",$v];
116 my %str;
117 for my $a (keys %Edges) {
118 %t=();
119 ++$t{$_} for @{$Edges{$a}};
120 for my $b (keys %t) {
121 #for my $b (@{$Edges{$a}}) {
122 my ($tstr,$v)=@{&getStr($a,$b)};
123 $str{$tstr} += $v;
126 %t=();
127 print O "\t$_ [weight=$str{$_}",($str{$_}>=10)?",style=bold":"","];\n" for sort keys %str;
128 print O "}\n";
129 close O;
130 system('dot','-Tpng',"-o$filename.png","$filename.gv");