new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / crep / merge_ricexpro_anno.pl
blob483d757d15f7a83e53be822a651b059ae865fe7e
1 #!/usr/bin/perl -w
2 use strict;
3 use warnings;
4 use Data::Dump;
6 open I,'<','resLOC.anno' or die $!;
7 open A,'<','resLOC.cydat' or die $!;
8 open O,'>','resLOC.ricexpro' or die $!;
9 open ON,'>','resLOC.rpratio' or die $!;
11 my (%Acc2Loc,%Loc2Acc,%Acc2Accession);
12 while (<I>) {
13 my ($tigLOC,undef,$FeatureNums,$Accessions) = split /\t/;
14 #next unless defined $FeatureNums;
15 my @FeatureNums = split /\|/,$FeatureNums;
16 next unless @FeatureNums > 0;
17 # my @Accessions = split /\|/,$Accessions;
18 #print "$tigLOC,@FeatureNums,@Accessions\n";
19 ++$Acc2Loc{$_}{$tigLOC} for @FeatureNums;
20 ++$Loc2Acc{$tigLOC}{$_} for @FeatureNums;
21 # for my $i (0 .. $#FeatureNums) {
22 # $Acc2Accession{$FeatureNums[$i]} = $Accessions[$i];
23 # }
25 close I;
27 my @AccNums = sort {$a <=> $b} keys %Acc2Loc;
28 for my $acc (@AccNums) {
29 my @LOCs = sort keys %{$Acc2Loc{$acc}};
30 for (@LOCs) { die if $Acc2Loc{$acc}{$_}>1; }
31 $Acc2Loc{$acc} = \@LOCs;
33 my @LocNums = sort keys %Loc2Acc;
34 for my $loc (@LocNums) {
35 my @ACCs = sort keys %{$Loc2Acc{$loc}};
36 for (@ACCs) {
37 die if $Loc2Acc{$loc}{$_}>1;
38 $Loc2Acc{$loc}{$_} = 1 / scalar(@{$Acc2Loc{$_}});
40 #$Loc2Acc{$loc} = \@ACCs;
43 #ddx \%Acc2Loc;
44 # 44020 => ["LOC_Os11g24450", "LOC_Os11g25030"],
45 # 44023 => ["LOC_Os05g37830"],
47 #ddx \%Loc2Acc;
48 # LOC_Os12g37720 => { 32729 => 1, 34753 => 1, 41627 => 1 },
49 # LOC_Os12g37770 => { 19813 => 1 },
51 #ddx \%Acc2Accession; # undef exists from file !
53 my (%cyDat,%cyRDat);
55 my %RXP2Name = (
56 RXP_1006 => 'ABA', #Shoot gene expression profile in response to abscisic acid
57 RXP_1009 => 'BRs', #Shoot gene expression profile in response to brassinosteroid
58 RXP_1010 => 'CK', #Shoot gene expression profile in response to cytokinin
59 RXP_1008 => 'Aux', #Shoot gene expression profile in response to auxin
60 RXP_1007 => 'GA', #Shoot gene expression profile in response to gibberellin
61 RXP_1012 => 'JA', #Shoot gene expression profile in response to jasmonic acid
62 ); # Cy3 (mock treatment) and Cy5 (hormone treatment) => Cy5/Cy3, http://ricexpro.dna.affrc.go.jp/RXP_1006/index.php
64 my (@FeatureOrder, @ExpOrder, @FeatNameOrder, @reps, %rep, @FinalOrder, @RatioExpOrder, @RatioFinalOrder);
65 # FeatureNum RXP_1006 RXP_1009 RXP_1010
66 ## 0 hr (Cy3)|0 hr (Cy5)|1 hr (Cy3)|1 hr (Cy5)|3 hr (Cy3)|3 hr (Cy5)|6 hr (Cy3)|6 hr (Cy5)|12 hr (Cy3)|12 hr (Cy5)
67 while (<A>) {
68 if ( /^#/ ) {
69 #print O $_;
70 if ( /^# FeatureNum\b/ ) {
71 chomp;
72 @FeatureOrder = split /\t/;
73 shift @FeatureOrder;
74 for (@FeatureOrder) {
75 my $tmp = $RXP2Name{$_};
76 push @FeatNameOrder,$tmp;
77 print "$_, $tmp\n";
79 } elsif ( /^## (.+)$/ ) {
80 $_ = $1;
81 chomp;
82 my @Exps = split /\|/;
83 for ( @Exps ) {
84 /^(\d+) hr \(Cy([35])\)$/ or die;
85 push @ExpOrder,"${1}h_Cy$2";
86 push @RatioExpOrder,"${1}h" if $2 eq '3';
87 #print "$_ => ${1}h_Cy$2\n";
89 print "@ExpOrder -> @RatioExpOrder\n";
91 next;
93 my ( $acc,@dat ) = split /\t/;
94 my (%DAT,%RDAT);
95 for my $FNO ( @FeatNameOrder ) {
96 my $tmp = shift @dat;
97 my @rep = split /\,/,$tmp;
98 for my $repline ( @rep ) {
99 my @repdat = split /\|/,$repline;
100 my $repid = shift @repdat;
101 if ($repid eq 'mean') {
102 $RDAT{$FNO} = \@repdat;
103 } else {
104 ++$rep{$repid};
105 $DAT{$FNO}{$repid} = \@repdat;
109 $cyDat{$acc} = \%DAT;
110 $cyRDat{$acc} = \%RDAT;
112 close A;
113 @reps = sort keys %rep;
114 for my $FNO ( @FeatNameOrder ) {
115 for my $RO ( @reps ) {
116 for my $EO ( @ExpOrder ) {
117 my $t = $RO;
118 $t =~ s/^rep/r/;
119 push @FinalOrder,"$FNO$EO$t";
122 for my $REO ( @RatioExpOrder ) {
123 push @RatioFinalOrder,"$FNO$REO";
126 for my $acc ( keys %cyDat ) {
127 my %cyhash = %{$cyDat{$acc}};
128 my %cyrhash = %{$cyRDat{$acc}};
129 my (@item,@ritem);
130 for my $FNO ( @FeatNameOrder ) {
131 for my $RO ( @reps ) {
132 for my $EO ( 0 .. $#ExpOrder ) {
133 push @item,$cyhash{$FNO}{$RO}->[$EO];
136 for my $EO ( 0 .. $#RatioExpOrder ) {
137 push @ritem,$cyrhash{$FNO}->[1+2*$EO] / $cyrhash{$FNO}->[2*$EO];
138 #push @ritem,log($cyrhash{$FNO}->[1+2*$EO] / $cyrhash{$FNO}->[2*$EO])/log(2);
141 $cyDat{$acc} = \@item;
142 $cyRDat{$acc} = \@ritem;
144 #ddx \%cyDat;
145 print "\n@FinalOrder\n";
146 print join(',',@{$cyDat{'45209'}}),"\n";
147 print "\n@RatioFinalOrder\n";
148 print join(',',@{$cyRDat{'45209'}}),"\n";
149 # 45209 => [
150 # "rep1|8.0073|3.6185|8.9315|5.0357|4.6752|4.7962|83.208|12.445|44.395|6.3024,rep2|3.1927|2.6697|3.3677|4.4205|3.3884|3.1185|7.425|6.9132|5.2888|5.7744,mean|5.6|3.1441|6.1496|4.7281|4.0318|3.9574|45.317|9.6791|24.842|6.0384",
151 # "rep1|8.0073|3.6185|4.2184|3.7634|3.4171|6.2807|3.9199|4.3256|5.1421|3.9737,rep2|3.1927|2.6697|4.048|3.8231|4.0791|2.9637|3.569|4.0095|2.597|2.6025,mean|5.6|3.1441|4.1332|3.7933|3.7481|4.6222|3.7445|4.1676|3.8695|3.2881",
152 # "rep1|8.0073|3.6185|3.2538|4.5685|11.197|3.8527|4.4721|3.4333|4.8344|4.8462,rep2|3.1927|2.6697|2.7079|3.9205|7.4918|3.104|3.1729|2.6352|6.9551|4.1439,mean|5.6|3.1441|2.9809|4.2445|9.3444|3.4784|3.8225|3.0343|5.8947|4.495\n",
153 # ],
154 # 45209 => {
155 # ABA => {
156 # rep1 => [
157 # 8.0073,
158 # 3.6185,
159 # 8.9315,
160 # 5.0357,
161 # 4.6752,
162 # 4.7962,
163 # 83.208,
164 # 12.445,
165 # 44.395,
166 # 6.3024,
167 # ],
168 # rep2 => [
169 # 3.1927,
170 # 2.6697,
171 # 3.3677,
172 # 4.4205,
173 # 3.3884,
174 # 3.1185,
175 # 7.425,
176 # 6.9132,
177 # 5.2888,
178 # 5.7744,
179 # ],
180 # },
181 # BRs => {
182 # rep1 => [
183 # 8.0073,
184 # 3.6185,
185 # 4.2184,
186 # 3.7634,
187 # 3.4171,
188 # 6.2807,
189 # 3.9199,
190 # 4.3256,
191 # 5.1421,
192 # 3.9737,
193 # ],
194 # rep2 => [
195 # 3.1927,
196 # 2.6697,
197 # 4.048,
198 # 3.8231,
199 # 4.0791,
200 # 2.9637,
201 # 3.569,
202 # 4.0095,
203 # 2.597,
204 # 2.6025,
205 # ],
206 # },
207 # CK => {
208 # rep1 => [
209 # 8.0073,
210 # 3.6185,
211 # 3.2538,
212 # 4.5685,
213 # 11.197,
214 # 3.8527,
215 # 4.4721,
216 # 3.4333,
217 # 4.8344,
218 # 4.8462,
219 # ],
220 # rep2 => [
221 # 3.1927,
222 # 2.6697,
223 # 2.7079,
224 # 3.9205,
225 # 7.4918,
226 # 3.104,
227 # 3.1729,
228 # 2.6352,
229 # 6.9551,
230 # 4.1439,
231 # ],
232 # },
233 # },
234 # 8.0073,3.6185,8.9315,5.0357,4.6752,4.7962,83.208,12.445,44.395,6.3024,3.1927,2.6697,3.3677,4.4205,3.3884,3.1185,7.425,6.9132,5.2888,5.7744,8.0073,3.6185,4.2184,3.7634,3.4171,6.2807,3.9199,4.3256,5.1421,3.9737,3.1927,2.6697,4.048,3.8231,4.0791,2.9637,3.569,4.0095,2.597,2.6025,8.0073,3.6185,3.2538,4.5685,11.197,3.8527,4.4721,3.4333,4.8344,4.8462,3.1927,2.6697,2.7079,3.9205,7.4918,3.104,3.1729,2.6352,6.9551,4.1439
236 print O join("\t",'YORF','NAME',@FinalOrder),"\n";
237 for my $loc (sort keys %Loc2Acc) {
238 for my $acc ( sort keys %{$Loc2Acc{$loc}} ) {
239 print O join("\t",$loc,$acc,@{$cyDat{$acc}}),"\n";
242 close O;
243 print ON join("\t",'YORF','NAME',@RatioFinalOrder),"\n";
244 for my $loc (sort keys %Loc2Acc) {
245 for my $acc ( sort keys %{$Loc2Acc{$loc}} ) {
246 print ON join("\t",$loc,$acc,@{$cyRDat{$acc}}),"\n";
249 close ON;
250 __END__