limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / perl / etc / crep / anocrep.pl
blob961193819fdb7a54b32f4e486eecbd2088906c4e
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use IO::Unread qw(unread);
5 use Data::Dump qw(ddx);
7 die "Usage: $0 <anno_file> <input>\n" if @ARGV < 2;
8 my ($anno,$inf)=@ARGV;
10 my (%ANNO,%ANNOcnt,%LOCdesc,%LOC2AFF,%AFF2LOC);
12 open I,'<',$anno or die;
13 while (<I>) {
14 chomp;
15 my @dat = split /\t/;
16 next unless exists $dat[1];
17 next if $dat[1] eq 'NONE|NONE|NONE';
18 my $id = shift @dat;
19 my $sum=0;
20 my @tids;
21 for (@dat) {
22 my ($tid,$ratio,$desc) = split /\|/;
23 $ratio =~ s#/11$## or die "[$ratio]";
24 $sum += $ratio;
25 if (exists $LOCdesc{$tid} and $LOCdesc{$tid} ne $desc) {
26 die "[x]$_\n$LOCdesc{$tid}\n";
28 $LOCdesc{$tid} = $desc;
29 push @tids,[$tid,$ratio];
31 $ANNO{$id} = \@tids;
32 $ANNOcnt{$id} = $sum;
34 close I;
35 #ddx \%ANNO;
36 # "Os.10031.1.S1_at" => [["LOC_Os05g35140", 11], ["LOC_Os07g12230", 1]],
38 for my $aid (keys %ANNO) {
39 my @items = @{$ANNO{$aid}};
40 for (@items) {
41 $LOC2AFF{$_->[0]} = [[],0] unless exists $LOC2AFF{$_->[0]};
42 =pod
43 my @dat = @{ $LOC2AFF{$_->[0]} };
44 push @{ $dat[0] },$_->[0];
45 $dat[1] += $_->[1];
46 $LOC2AFF{$_->[0]} = \@dat;
47 =cut
48 push @{ $LOC2AFF{$_->[0]}->[0] },[$aid,$_->[1]];
49 $LOC2AFF{$_->[0]}->[1] += $_->[1];
52 #ddx \%LOC2AFF;
53 # LOC_Os05g35140 => [[["Os.10031.1.S1_at", 11]], 11],
54 # LOC_Os07g12230 => [
55 # [
56 # ["OsAffx.22438.1.S1_x_at", 11],
57 # ["Os.10031.1.S1_at", 1],
58 # ["OsAffx.22438.1.S1_at", 11],
59 # ["OsAffx.8676.1.S1_at", 4],
60 # ],
61 # 27,
62 # ],
64 for my $tid (keys %LOC2AFF) {
65 my @dat = @{$LOC2AFF{$tid}->[0]};
66 my $sum = $LOC2AFF{$tid}->[1];
67 my %hdat;
68 for (@dat) {
69 $_->[2] = $_->[1] / $sum;
70 $hdat{ $_->[0] } = [$_->[1],$_->[2]];
71 $AFF2LOC{ $_->[0] } = [] unless exists $AFF2LOC{ $_->[0] };
72 push @{ $AFF2LOC{$_->[0]} },[$tid,$_->[1],$_->[2]];
74 $LOC2AFF{$tid} = \%hdat;
76 #ddx \%LOC2AFF;
77 # LOC_Os05g35140 => { "Os.10031.1.S1_at" => [11, 1] },
78 # LOC_Os07g12230 => {
79 # "Os.10031.1.S1_at" => [1, 0.037037037037037],
80 # "OsAffx.22438.1.S1_at" => [11, 0.407407407407407],
81 # "OsAffx.22438.1.S1_x_at" => [11, 0.407407407407407],
82 # "OsAffx.8676.1.S1_at" => [4, 0.148148148148148],
83 # },
85 #ddx \%AFF2LOC;
86 # "Os.10031.1.S1_at" => [
87 # ["LOC_Os05g35140", 11, 1],
88 # ["LOC_Os07g12230", 1, 0.037037037037037],
89 # ],
91 my %outLOC;
92 open I,'<',$inf or die;
93 while (<I>) {
94 chomp;
95 my @dat = split /\t/;
96 my $aid = $dat[0];
97 if (exists $AFF2LOC{$aid}) {
98 for my $i ( @{ $AFF2LOC{$aid} } ) {
99 $outLOC{ $i->[0] } = [0,[]] unless exists $outLOC{ $i->[0] };
100 push @{ $outLOC{$i->[0]}->[1] },[@dat];
101 #push @{ $outLOC{$i->[0]}->[1] },\@dat;
102 $outLOC{$i->[0]}->[0] += $dat[1] * $i->[2];
106 close I;
107 #ddx \%outLOC;
108 # LOC_Os01g02300 => [
109 # 2.99555110455193,
112 # "Os.27046.1.S1_at",
113 # 2.48051948051948,
114 # 101.87,
115 # 41.07,
116 # "66|189.6|50",
117 # "49.5|59.1|14.6",
118 # ],
120 # "Os.27046.2.S1_x_at",
121 # 3.46376167185416,
122 # 259.67,
123 # 74.97,
124 # "88.3|399.4|291.3",
125 # "13.9|167|44",
126 # ],
127 # ],
128 # ],
129 # LOC_Os01g02300 => {
130 # "Os.27046.1.S1_at" => [10, 0.476190476190476],
131 # "Os.27046.2.S1_x_at" => [11, 0.523809523809524],
132 # },
134 open O,'>',$inf.'.out' or die;
135 for my $tid (sort { $outLOC{$b}->[0] <=> $outLOC{$a}->[0] } keys %outLOC) {
136 my @items = @{ $outLOC{$tid} };
137 my @affprobes;
138 for ( @{ $items[1] } ) {
139 $$_[1] = int(0.5+1000*$$_[1])/1000;
140 push @affprobes,join('~',@{$LOC2AFF{$tid}{$$_[0]}},@$_);
142 print O join("\t",$tid,(scalar @affprobes),$items[0],$LOCdesc{$tid},join("\t",@affprobes) ),"\n";
144 close O;
146 __END__
147 perl anocrep.pl crep_anno_merged.tsv crep_all_tsv_new.txt.up2
148 perl anocrep.pl crep_anno_merged.tsv crep_all_tsv_new.txt.down2