modified: n.fq
[GalaxyCodeBases.git] / tools / gc_dep_plot / old / depth.pl
blobef1e27fc2e37188075dcaa7bae245413028f3128
1 #!/usr/bin/perl -w
2 use strict;
3 unless(@ARGV)
5 die "<fasta file><depth file><window><output file>\n";
7 open AGCT,"$ARGV[0]"
8 or die "can't open the fasta file:$!\n";
9 open DEPTH,"$ARGV[1]"
10 or die "can't open depth file:$!\n";
11 open OUT,">$ARGV[3]"
12 or die "the result can't write:$!\n";
14 my $win=$ARGV[2];
15 my $total_num=0;
16 my $sum=$win;
17 my ($GC_num,$N_num,$gc_rate,$avg_depth,$seat,$chrom);
18 my $sum_depth=0;
19 my (@gcs,@depth,@Ns,@line);
20 my (%hash_seq,%hash_num,%hash);
23 while(<AGCT>)
25 chomp;
26 if(/(^>.*$)/&&$total_num<$sum)
28 if($total_num==0)
30 $chrom=$1;
31 $seat=$chrom."+"."$sum";
32 next;
35 $seat=$chrom."+"."$sum";
36 $gc_rate=int(100*$GC_num/($win-$N_num));
37 @{$hash_seq{$seat}}=($N_num,$gc_rate);
38 $total_num=0;
39 $sum=$win;
40 $GC_num=0;
41 $N_num=0;
42 $chrom=$1;
43 next;
45 my $len=length($_);
46 $total_num+=$len;
47 if($total_num<$sum)
49 $GC_num+=$_=~s/(G|C)/$1/gi;
50 $N_num+=$_=~s/(N)/$1/gi;
52 else
54 $GC_num+=substr($_, 0, $sum-$total_num+$len)=~s/(G|C)/$1/gi;
55 $N_num+=substr($_, 0, $sum-$total_num+$len)=~s/(N)/$1/gi;
56 if($win-$N_num)
58 $gc_rate=int(100*$GC_num/($win-$N_num));
60 $seat=$chrom."+"."$sum";
61 @{$hash_seq{$seat}}=($N_num,$gc_rate);
62 $GC_num=substr($_,$sum-$total_num+$len,$total_num-$sum+1)=~s/(G|C)/$1/gi;
63 $N_num=substr($_,$sum-$total_num+$len,$total_num-$sum+1)=~s/(N)/$1/gi;
64 $sum+=$win;
67 if($total_num<$sum)
69 $seat=$chrom."+"."$sum";
70 $gc_rate=int(100*$GC_num/($win-$N_num));
71 @{$hash_seq{$seat}}=($N_num,$gc_rate);
73 #foreach my $key(keys %hash_seq)
75 # print "@{$hash_seq{$key}}\n";
78 while(<DEPTH>)
80 chomp;
81 if(/(^>.*$)/&&$total_num<$sum)
83 if($total_num==0)
85 $chrom=$1;
86 $sum_depth=0;
87 $seat=$chrom."+"."$sum";
88 next;
91 $seat=$chrom."+"."$sum";
92 if((exists $hash_seq{$seat})&&($total_num-$sum+$win-${$hash_seq{$seat}}[0])!=0)
94 push @{$hash{${$hash_seq{$seat}}[1]}},$sum_depth/($total_num-$sum+$win-${$hash_seq{$seat}}[0]);
96 $total_num=0;
97 $sum_depth=0;
98 $sum=$win;
99 $chrom=$1;
100 next;
102 my ($len,$temp_len); #$len表示$_这一行的数字的总和,$temp_len表示$_这一行数字的个数
103 while(/(\d+)/g)
105 $len+=$1;
106 $temp_len+=1;
108 if($total_num<$sum)
110 $sum_depth+=$len;
111 $total_num+=$temp_len;
113 else
115 @line=split;
116 for(my $i=0;$i<$sum-$total_num+$temp_len;$i++)
118 $sum_depth+=$line[$i];
120 $seat=$chrom."+"."$sum";
121 if((exists $hash_seq{$seat})&&($win-${$hash_seq{$seat}}[0])!=0)
123 push @{$hash{${$hash_seq{$seat}}[1]}},$sum_depth/($win-${$hash_seq{$seat}}[0]);
125 $sum_depth=0;
126 for(my $i=$sum-$total_num+$len;$i<$temp_len;$i++)
128 $sum_depth+=$line[$i];
130 $sum+=$win;
133 if($total_num<$sum)
135 $seat=$chrom."+"."$sum";
136 if((exists $hash_seq{$seat})&&($total_num-$sum+$win-${$hash_seq{$seat}}[0]+1)!=0)
138 push @{$hash{${$hash_seq{$seat}}[1]}},$sum_depth/($total_num-$sum+$win-${$hash_seq{$seat}}[0]+1);
141 #foreach my $key(keys %hash)
143 # print "@{$hash{$key}}\n";
146 #输出结果
147 my @gc_key; #@gc_key存放gc值
148 foreach my $key(sort keys %hash)
150 push @gc_key,$key;
152 print OUT "@gc_key\n";
153 my $mark=0;
154 while(1)
156 for(my $i=0;$i<=$#gc_key;$i++)
158 if(@{$hash{$gc_key[$i]}}) #此处改动了原来为$hash{$gc_key[$i]}||$hash{$gc_key[$i]}==0
160 my $tem=shift @{$hash{$gc_key[$i]}} ;
161 $mark+=@{$hash{$gc_key[$i]}};
162 print OUT $tem,"\t";
164 else
166 print OUT "NA","\t";
169 if($mark>0)
171 print OUT "\n";
172 $mark=0;
174 else
176 last;
180 close DEPTH;
181 close OUT;
182 close AGCT;
184 __END__
185 /panfs/GAG/zhangxm/stat/depth.pl
186 张雪梅
188 考虑了N区