modified: Makefile
[GalaxyCodeBases.git] / perl / etc / crep / gen_cluster_input.pl
blob226760008f19267e8e641cfe02e3b07aed4d2237
1 #!/usr/bin/perl -w
2 use strict;
3 use warnings;
4 use Data::Dump;
6 open IA,'<','crep_all_tsv_new.txt.up2.txt' or die $!;
7 open IB,'<','crep_all_tsv_new.txt.up2.rev.txt' or die $!;
8 open IR,'<','resLOC.rpratio' or die $!;
10 open OA,'>','ricexpro.up2.clu.txt' or die $!;
11 open OB,'>','ricexpro.up2.rev.clu.txt' or die $!;
12 open OS,'>','ricexpro.up2.stat.txt' or die $!;
14 my (%dA,%dB,%dFlag,%cntdflag);
15 while (<IA>) {
16 my ($id,$cnt,$value) = split /\t/;
17 $id =~ s/^LOC_Os/t/;
18 $dA{$id} = log($value) / log(2);
19 ++$dFlag{$id};
21 close IA;
22 while (<IB>) {
23 my ($id,$cnt,$value) = split /\t/;
24 $id =~ s/^LOC_Os/t/;
25 $dB{$id} = log($value) / log(2);
26 $dFlag{$id} += 100;
28 close IB;
30 for my $k (keys %dFlag) {
31 push @{ $cntdflag{$dFlag{$k}} },$k;
33 for my $k (keys %cntdflag) {
34 print "$k -> ",scalar @{ $cntdflag{$k} },': ';
35 print "\n"; # join(',',@{ $cntdflag{$k} }),
37 # 1 -> 5332:
38 # 101 -> 531:
39 # 100 -> 8173:
40 my ($max,$min,$sum,$n,$ss)=(-999,999,0,0,0);
41 for my $id (@{ $cntdflag{'101'} }) {
42 #print OS "# $id: $dA{$id}, $dB{$id}\n";
43 $sum += $dA{$id}+$dB{$id};
44 $ss += $dA{$id}*$dA{$id} + $dB{$id}*$dB{$id};
45 $n += 2;
46 $max = $dA{$id} if $max < $dA{$id};
47 $min = $dA{$id} if $min > $dA{$id};
48 $max = $dB{$id} if $max < $dB{$id};
49 $min = $dB{$id} if $min > $dB{$id};
51 my $mean = $sum/$n;
52 print "Range:[$min,$max] Mean: $mean SD:",sqrt($ss/$n - $mean*$mean)," Cnt:$n\n";
53 =pod
54 1 -> 5332:
55 101 -> 531:
56 100 -> 8173:
57 Range:[-5.28337053983209,5.82964609302319] Mean: -0.575308843011672 SD:1.62278420169048 Cnt:1062
58 =cut
59 my $outHead = <IR>;
60 print OS "$outHead";
61 print OA "$outHead";
62 print OB "$outHead";
63 while (<IR>) {
64 my ($id,$acc,@dat) = split /\t/;
65 my $LOCid = $id;
66 $id =~ s/^LOC_Os/t/;
67 my $tmp = join('', map {my $a; if (defined $_) {$a = int($_); $a="+$a" if $_>=0; $a="-$a" if $_<0 and $a==0; $a; } } ($dA{$id},$dB{$id}));
68 my $newLOCid = $LOCid . $tmp;
69 my $newid = "${id}_${acc}$tmp";
70 if ($dFlag{$id} == '101') {
71 print OS join("\t",$newid, $dA{$id} .';'. $dB{$id} ,@dat);
72 } elsif ($dFlag{$id} == '1') {
73 print OA join("\t",$newid,$newLOCid,@dat);
74 } elsif ($dFlag{$id} == '100') {
75 print OB join("\t",$newid,$newLOCid,@dat);
76 } else { die; }
78 close IR;
79 close OS;
80 close OA;
81 close OB;
83 __END__
84 $ wc -l crep_all_tsv_new.txt.up2.*txt ricexpro.up2.* resLOC.rpratio
85 8704 crep_all_tsv_new.txt.up2.rev.txt
86 5863 crep_all_tsv_new.txt.up2.txt
87 4756 ricexpro.up2.clu.txt
88 7344 ricexpro.up2.rev.clu.txt
89 401 ricexpro.up2.stat.txt
90 12499 resLOC.rpratio
91 39567 total
93 ./cluster -f ricexpro.up2.clu.txt -l -cg a -ng -g 2 -e 2
94 ./cluster -f ricexpro.up2.rev.clu.txt -l -cg a -ng -g 2 -e 2