modified: myjupyterlab.sh
[GalaxyCodeBases.git] / perl / soap / draw / drawCnD.pl
blob2632239a090997c15035155d01089b58010dc387
1 #!/bin/env perl
3 use strict;
4 use warnings;
5 use Getopt::Long;
6 use File::Basename;
7 use FindBin qw($Bin $Script);
9 my $coverage_bin = "$Bin/subBin/soap.coverage";
10 my $plot_bin = "perl $Bin/subBin/plotDepthDistribution.pl";
11 my $plot_chr_bin = "perl $Bin/subBin/plotChrDepthDistribution.pl";
12 my $call_ngap_bin = "perl $Bin/subBin/callNgap.pl";
14 my ($pro_dir, $ref_fa, $pro_name, $out_dir, $input_soap_list);
15 my ($is_effective, $ngap_file, $gc_file, $chrorder_file);
16 my ($step, $mem);
17 my ($help);
18 GetOptions (
19 "proDir:s" => \$pro_dir,
20 "il:s" => \$input_soap_list,
21 "proName:s" => \$pro_name,
22 "ref:s" => \$ref_fa,
23 "outDir:s" => \$out_dir,
24 "effective:i" => \$is_effective,
25 "ngapFile:s" => \$ngap_file,
26 "gcFile:s" => \$gc_file,
27 "chrid:s" => \$chrorder_file,
28 "step:i" => \$step,
29 "mem:s" => \$mem,
30 "help" => \$help
32 if (!$pro_dir || !$pro_name || !$ref_fa || $help) {
33 print "\n";
34 print "Mandatory:\n";
35 print "\t-il\t*.soap/*.single file list\n";
36 print "\t-proDir\tproject directory\n";
37 print "\t-proName\tproject name\n";
38 print "\t-ref\treference fa file\n";
39 print "\t-outDir\t[proDir]\n";
40 print "\t-chrid\tthe file contains the chromosome id[/share/raid010/resequencing/resequencing/tmp/pub/Genome/Rice/Genome_IRGSP/IRGSP.chrorder]\n";
41 print "Option:\n";
42 print "\t-gcFile\tthe file contains the gc percentage of each chromosome(file format: chr total_genome_size effective_genome_size gc_size gc_percentage)if not provide this file, one gc file will be generated automatically by this program.\n";
43 print "\t-effective\t0:not; 1:yes[0]\n";
44 print "\t-ngapFile\tngap file\n";
45 print "\t-mem\tmemory to run soap.coverage[6g for human]\n";
46 print "\t-step\t3 steps: coverage(1)->read depth(2)->draw depth distribution(3)\t[1]\n";
47 print "Example:\n\tperl $0 -proDir /share/tmp/resequencing/human/Arab/ -proName Arab -ref /share/tmp/pub/Genome/Human/human.fa -outDir /share/tmp/resequencing/human/Arab/ -effective 1 -step 2\n";
48 print "\tperl $0 -proDir /share/raid11/zhanghao/software/postProcess/test/Rice/IRGSP -il soap.l -ref /share/raid1/database/BGI/rice/IRGSP_chromosomes_build04.fa -outDir /share/raid11/zhanghao/software/postProcess/test/Rice/IRGSP -ngapFile /share/raid11/zhanghao/software/postProcess/test/Rice/IRGSP/IRGSP.ngap -mem 4g -effective 1 -step 3 -proName IRGSP\n";
49 print "\tperl $0 -proDir /share/raid11/zhanghao/software/postProcess/test/Rice/9311 -il soap.l -ref /share/raid1/database/BGI/rice/9311_main_chromosomes.fa -outDir /share/raid11/zhanghao/software/postProcess/test/Rice/9311 -ngapFile /share/raid11/zhanghao/software/postProcess/test/Rice/9311/9311.ngap -mem 4g -effective 1 -step 3 -proName 9311\n";
50 print "\tAuther: Hao Zhang\tTime: 23:22 25/05/2009\n";
51 print " modify by HuXuesong @ 2010\n";
53 exit 0;
55 $out_dir ||= $pro_dir;
56 mkdir $out_dir unless (-d $out_dir);
57 $input_soap_list ||= '';
58 $ngap_file ||= ''; #"$Bin/subBin/pub_data/human.ngap";
59 $gc_file ||= '';
60 $chrorder_file ||= "$Bin/subBin/pub_data/human.chrnum";
61 $is_effective ||= 0;
62 $mem ||= "6g";
63 $step ||= 1;
64 my $pe_dir = $pro_dir;
65 #my $pe_dir = "$pro_dir/soap/PE";
66 my $depth_dir = "$out_dir/depth";
67 mkdir $depth_dir unless (-d $depth_dir);
68 my $coverage_dir = "$out_dir/coverage";
69 mkdir $coverage_dir unless (-d $coverage_dir);
70 my $distribution_dir = "$depth_dir/distribution";
71 mkdir $distribution_dir unless (-d $distribution_dir);
73 my $wait_second = 30;
75 my %hDepth = (); #{depth} => num
76 my %hChrDepth = (); #{chr}{depth} => num
77 my %hChrGeSize = (); #{chr} => size
78 my %hChrMapBase = (); #{chr} => basenum
79 my %hChrEffLen = (); #{chr} => effective length
81 my $total_list = $input_soap_list;
82 my $total_base_count = 0;
85 if (1 == $step) {
86 soapCoverage($total_list, \$is_effective, \$out_dir, \$coverage_dir, \$depth_dir);
88 elsif (2 == $step) {
89 readDepth(\$total_base_count);
90 calculateMeanDepthAndPoisson(\$total_base_count);
91 calculateGCpercentage(\$ref_fa, \$gc_file);
92 calculateGCpercentageForSoap($total_list);
93 drawDepthDistribution();
95 my $gc_file2="$out_dir/$pro_name.soap_gc.info";
97 drawChrDepthDistribution2($gc_file2);
99 elsif (3 == $step) {
100 #calculateGCpercentage(\$ref_fa, \$gc_file);
101 #calculateGCpercentageForSoap($total_list);
102 my $gc_file2="$out_dir/$pro_name.soap_gc.info";
103 print "$gc_file2\n";
105 drawDepthDistribution();
106 drawChrDepthDistribution2($gc_file2);
109 elsif(4== $step)
112 readDepth2(\$total_base_count);
113 calculateMeanDepthAndPoisson(\$total_base_count);
114 drawDepthDistribution();
118 sub soapCoverage {
119 my ($total_list, $is_effective, $out_dir, $coverage_dir, $depth_dir) = @_;
120 ######generate list######
121 my $soap_list = '';
122 unless (-f $total_list) { #if there is not soap.list available, create one list
123 $soap_list = "$$depth_dir/soap.list";
124 my $single_list = "$$depth_dir/single.list";
125 $total_list = "$$depth_dir/file.list";
126 system ("cp $soap_list $soap_list.old") if (-f $soap_list);
127 system ("cp $single_list $single_list.old") if (-f $single_list);
128 #system ("cp $$total_list $$total_list.old") if (-f $$total_list);
129 #`ls $pe_dir/*.soap >$soap_list`;
130 #`ls $pe_dir/*.single >$single_list`;
131 #`cat $soap_list $single_list >$total_list`;
132 system "find $pe_dir/ -name '*.soap' >$soap_list";
133 system "find $pe_dir/ -name '*.single' >$single_list";
134 system "cat $soap_list $single_list >$total_list";
138 if($$is_effective) {
139 ######generate ngap list######
140 unless (-f $ngap_file) {
141 $ngap_file = "$$depth_dir/$pro_name.ngap";
142 unless (-f $ngap_file) {
143 my $call_ngap_command = "$call_ngap_bin $ref_fa 1 $ngap_file";
144 print "$call_ngap_command\n";
145 #`$call_ngap_command`;
146 system "$call_ngap_command";
150 open SH, ">$$out_dir/run_total_coverage.sh" or die "$!";
151 print SH "#! /bin/sh\n#\$ -S /bin/sh\nexport LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/lib;\n$coverage_bin -cvg -refsingle $ref_fa -il $total_list -o $$coverage_dir/total_coverage.info -depthsingle $$depth_dir/total_depthsingle -addn $ngap_file";
152 close SH;
154 else {
155 open SH, ">$$out_dir/run_total_coverage.sh" or die "$!";
156 print SH "#! /bin/sh\n#\$ -S /bin/sh\nexport LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/lib;\n$coverage_bin -cvg -refsingle $ref_fa -il $total_list -o $$coverage_dir/total_coverage.info -depthsingle $$depth_dir/total_depthsingle";
157 close SH;
160 my %jobIDs = ();
161 my @coverage_shes = `find $$out_dir -name "run_total_coverage.sh"`;
162 foreach my $sh (@coverage_shes) {
163 my $qsub = `qsub -cwd -l vf=$mem $sh`;
164 my $jobID = (split /\s+/, $qsub)[2];
165 my $local_time = localtime();
166 print "$local_time\nsubmit $jobID\n";
167 $jobIDs{$jobID} = 1;
170 waitJobsDone2(%jobIDs);
173 sub waitJobsDone2 {
174 my (%jobIDs) = @_;
175 while(keys %jobIDs){
176 foreach my $jobid (keys %jobIDs) {
177 my $qstat = `qstat -j $jobid`;
179 my %runningJobs = ();
180 chomp $qstat;
182 my @qstat_lines = split /\n/, $qstat;
183 chomp @qstat_lines;
185 if(defined($qstat_lines[1]))
188 # print "test : $qstat_lines[1]\n";
189 my $job = (split /\s+/,$qstat_lines[1])[1];
191 # print "job: $job\n";
193 if($job eq $jobid){
194 $runningJobs{$job} = 1;
197 foreach my $jobID(keys %jobIDs){
198 if(! exists $runningJobs{$jobID}){
199 my $local_time = localtime();
200 print "\n$local_time\nFinish jobid $jobID\n";
201 delete $jobIDs{$jobID};
204 sleep $wait_second;
207 } #end while
208 =b # old
209 readDepth(\$total_base_count);
210 calculateMeanDepthAndPoisson(\$total_base_count);
211 calculateGCpercentage(\$ref_fa, \$gc_file);
212 calculateGCpercentageForSoap($total_list);
213 drawDepthDistribution();
214 drawChrDepthDistribution();
215 =cut
217 readDepth(\$total_base_count);
218 calculateMeanDepthAndPoisson(\$total_base_count);
219 calculateGCpercentage(\$ref_fa, \$gc_file);
220 calculateGCpercentageForSoap($total_list);
221 drawDepthDistribution();
223 my $gc_file2="$out_dir/$pro_name.soap_gc.info";
225 drawChrDepthDistribution2($gc_file2);
233 sub readDepth2 {
236 my ($total_base_count) = @_;
237 my $current_chr = '';
238 my $effect_base_count = 0;
240 open DEPTH, "$depth_dir/total_depthsingle" or die "$!";
241 while (my $line = <DEPTH>) {
242 chomp $line;
243 if ($line =~ /^>/) {
244 last if ($line =~ /scaffold/i);
245 $line =~ s/^>//;
246 $current_chr = $line;
247 my $local_time = localtime();
248 print "$local_time\tprocess $current_chr\n";
250 else {
251 my @depths = split /\s+/, $line;
252 foreach my $depth (@depths) {
253 $hDepth{$depth} += 1;
254 $hChrDepth{$current_chr}{$depth} += 1;
255 $hChrGeSize{$current_chr} += 1;
256 if ($depth != 65535) {
257 $hChrMapBase{$current_chr} += $depth;
258 $hChrEffLen{$current_chr} += 1;
259 ++$effect_base_count;
261 ++$$total_base_count;
264 } #end while
265 close DEPTH;
268 # calculateMeanDepthAndPoisson(\$total_base_count);
272 print "total base: $$total_base_count\neffective base: $effect_base_count\n";
274 open TCHR, ">$distribution_dir/all.info" or die "$!";
275 print TCHR "chr\tgenome_size\teffective_size\tmappable_base\n";
276 foreach my $chr (keys %hChrDepth) {
277 print TCHR "$chr\t$hChrGeSize{$chr}\t$hChrEffLen{$chr}\t$hChrMapBase{$chr}\n";
279 open CHR, ">$distribution_dir/$chr.depth" or die "$!";
280 foreach my $depth (sort {$a<=>$b} keys %{$hChrDepth{$chr}}) {
281 print CHR "$depth\t$hChrDepth{$chr}{$depth}\n";
283 close CHR;
285 close TCHR;
286 =cut
292 sub readDepth {
294 my ($total_base_count) = @_;
295 my $current_chr = '';
296 my $effect_base_count = 0;
298 open DEPTH, "$depth_dir/total_depthsingle" or die "$!";
299 while (my $line = <DEPTH>) {
300 chomp $line;
301 if ($line =~ /^>/) {
302 last if ($line =~ /scaffold/i);
303 $line =~ s/^>//;
304 $current_chr = $line;
305 my $local_time = localtime();
306 print "$local_time\tprocess $current_chr\n";
308 else {
309 my @depths = split /\s+/, $line;
310 foreach my $depth (@depths) {
311 $hDepth{$depth} += 1;
312 $hChrDepth{$current_chr}{$depth} += 1;
313 $hChrGeSize{$current_chr} += 1;
314 if ($depth != 65535) {
315 $hChrMapBase{$current_chr} += $depth;
316 $hChrEffLen{$current_chr} += 1;
317 ++$effect_base_count;
319 ++$$total_base_count;
322 } #end while
323 close DEPTH;
325 print "total base: $$total_base_count\neffective base: $effect_base_count\n";
327 open TCHR, ">$distribution_dir/all.info" or die "$!";
328 print TCHR "chr\tgenome_size\teffective_size\tmappable_base\n";
329 foreach my $chr (keys %hChrDepth) {
330 print TCHR "$chr\t$hChrGeSize{$chr}\t$hChrEffLen{$chr}\t$hChrMapBase{$chr}\n";
332 open CHR, ">$distribution_dir/$chr.depth" or die "$!";
333 foreach my $depth (sort {$a<=>$b} keys %{$hChrDepth{$chr}}) {
334 print CHR "$depth\t$hChrDepth{$chr}{$depth}\n";
336 close CHR;
338 close TCHR;
341 sub calculateMeanDepthAndPoisson {
343 my ($count) = @_;
345 #read effective length of reference
346 open COVERAGE, "$coverage_dir/total_coverage.info" or die "$!";
347 my $ref_base = 0;
348 while (my $line = <COVERAGE>) {
349 chomp $line;
350 if ($line =~ /^Total/) {
351 #print "$line\n";
352 $ref_base = (split /\:/, $line)[-1];
353 print "$ref_base\n";
354 last;
356 } #end while
357 close COVERAGE;
359 #count mean depth
360 my $sum_mappable_base = 0;
361 my $stat_file = "$pro_dir/Statistics.txt";
362 if (-f $stat_file) {
363 open STAT, "$pro_dir/Statistics.txt" or die "$!";
364 my $read_count = 0;
365 while (my $line = <STAT>) {
366 ++$read_count;
367 next if ($read_count < 3);
368 chomp $line;
369 my @stat_info = split /\s+/, $line;
370 #next if ($stat_info[0] !~ /\.fq$/);
371 $sum_mappable_base += $stat_info[8];
372 } #end while
373 close STAT;
375 else {
376 if (-f $total_list) {
377 open LIST, "$total_list" or die "$!";
379 else {
380 open LIST, "$depth_dir/file.list" or die "$!";
382 my @soap_files = <LIST>;
383 chomp @soap_files;
384 close LIST;
386 #print "list file $total_list\n";
387 #map {print "$_\n"} @soap_files;
389 foreach my $soap_file (@soap_files) {
390 my $local_time = localtime();
391 print "$local_time\tprocessing $soap_file\n";
392 $sum_mappable_base += `awk '{sum+=\$6}END{print sum}' $soap_file`;
395 #print "$sum_mappable_base\n";
396 my $mean_depth = $sum_mappable_base / $ref_base;
397 print "sum mappable bases: $sum_mappable_base\nreference length: $ref_base\nmean depth: $mean_depth\n";
399 open DISTRIBUTION, ">$distribution_dir/$pro_name.distribution.data" or die "$!";
400 print DISTRIBUTION "Depth\tnum\tpercentage\tpoisson\n";
401 #$hDepth{0} -= (243568484-226441261);
402 #$$count -= (243568484-226441261);
403 if ($is_effective) {
404 $$count -= $hDepth{65535};
407 print "effective: $$count\n";
408 foreach my $depth (sort {$a<=>$b} keys %hDepth) {
409 my $per = $hDepth{$depth} / $$count * 100;
411 my $factorial = 1;
412 my $power = 1;
413 for my $index (1..$depth) {
414 $factorial *= $index;
415 $power *= $mean_depth;
417 my $poisson = $power * exp(0-$mean_depth) /$factorial * 100;
418 print DISTRIBUTION "$depth\t$hDepth{$depth}\t$per\t$poisson\n";
420 close DISTRIBUTION;
423 sub calculateGCpercentage {
424 my ($fa_file, $gc_file) = @_;
425 unless (-f $$gc_file) {
426 my $chrName = '';
428 my %hChrSeq = (); #{chr} => sequence
429 my @chrNames = (); #(chr1, chr2,...)
431 open FA, $$fa_file or die "$!";
432 while (my $line = <FA>) {
433 chomp $line;
434 if ($line =~ /^>(.*)/) {
435 $chrName = $1;
436 push @chrNames, $chrName;
437 } #end if
438 else {
439 $hChrSeq{$chrName} .= $line;
440 } #end else
441 } #end while
442 close FA;
444 open GCOUT, ">$out_dir/$pro_name.ref_gc.info" or die "$!";
445 foreach my $chr (@chrNames) {
446 my $chr_len = length $hChrSeq{$chr};
447 my $countN = ($hChrSeq{$chr} =~ s/N/q/ig);
448 my $countGC = ($hChrSeq{$chr} =~ s/[GC]/p/ig);
449 my $effective = $chr_len - $countN;
450 my $gc_percentage = $countGC / $effective * 100;
451 print GCOUT "$chr\t$chr_len\t$effective\t$countGC\t$gc_percentage\n";
452 } #end foreach
453 close GCOUT;
455 $$gc_file = "$out_dir/$pro_name.ref_gc.info";
457 } #end else
459 sub calculateGCpercentageForSoap{
460 my ($soaplist)=@_;
461 my @arry_soap=();
462 open FI,"$soaplist" or die "can't open $soaplist, in calculateGCpercentageForSoap()!";
464 print "\n ---- begin statics gc for soap result! \n";
466 while(my $line = <FI>)
468 chomp $line;
470 if($line =~ /\.*.soap/)
472 print " soap file: $line\n";
473 push @arry_soap, $line;
478 close FI;
481 my %hchr = (); #{chr} => sequence
483 foreach my $file (@arry_soap)
485 print " stat soapfile $file\n";
486 open FA, $file or die "$!";
487 while (my $line = <FA>) {
488 chomp $line;
489 my @l = split /\s+/, $line;
490 if(exists($hchr{$l[7]}))
492 ${$hchr{$l[7]}}[0] +=$l[1] =~ s/[GC]/p/ig;
493 ${$hchr{$l[7]}}[1] +=$l[5];
495 else
497 my $gc_num = $l[1] =~ s/[GC]/p/ig;
499 push @{$hchr{$l[7]}},$gc_num;
500 push @{$hchr{$l[7]}},$l[5];
508 } #end while
509 close FA;
512 open GCOUT, ">$out_dir/$pro_name.soap_gc.info" or die "$!";
513 foreach my $chr (keys %hchr) {
514 my $chr_len = ${$hchr{$chr}}[1];
516 my $countGC = ${$hchr{$chr}}[0];
518 my $gc_percentage = $countGC / $chr_len * 100;
519 print GCOUT "$chr\t$chr_len\t$countGC\t$gc_percentage\n";
520 } #end foreach
521 close GCOUT;
525 sub drawChrDepthDistribution2 {
527 my ($gc_file2) = @_;
529 print "draw chrdepthion !\n";
531 my @chr_order = (); #(1..22, "X", "Y");
533 my %hChrCoverage = (); #{chr} => coverage
534 ## read coverage of each chromosome
535 open COVERAGE, " $coverage_dir/total_coverage.info" or die "$!";
536 while (my $line = <COVERAGE>) {
537 chomp $line;
538 last if ($line =~ /Overall/i);
539 if ($line =~ /Percentage/i) {
540 my @info = split "\:", $line;
541 #$info[-1] =~ s/\%//;
542 #$hChrCoverage{$info[0]} = $info[-1];
543 my $covered_length = (split /\//, $info[1])[0];
544 $hChrCoverage{$info[0]} = $covered_length;
547 close COVERAGE;
550 ## count mean depth of each chromosome
551 open ALLINFO, "$distribution_dir/all.info" or die "$!";
552 my %hMeanDepthChr = (); #{chr} => mean depth
553 my %hMappableChr = (); #{chr} => mappable bases
554 my %hEffChr = (); #{chr} => effective length
555 while (my $line = <ALLINFO>) {
556 chomp $line;
557 my @info = split /\s+/, $line;
558 next if ($info[2] !~ /\d{1,}/);
559 $hMeanDepthChr{$info[0]} = $info[3] / $info[2];
560 $hMappableChr{$info[0]} = $info[3];
561 $hEffChr{$info[0]} = $info[2];
563 close ALLINFO;
566 ## count mode depth of each chromosome
567 my %hModeDepthChr = (); #{chr}{depth} => num;
568 my @depth_files = `find $distribution_dir -name "*.depth"`;
569 chomp @depth_files;
570 foreach my $depth_file (@depth_files) {
571 my $basename = basename $depth_file;
572 $basename =~ s/\.depth//;
573 #$basename =~ s/chromosome//;
574 open DEPTH, $depth_file or die "$!";
575 while (my $line = <DEPTH>) {
576 chomp $line;
577 my @info = split /\s+/, $line;
578 $hModeDepthChr{$basename}{$info[0]} = $info[1];
580 close DEPTH;
583 ## read GC percentage file
584 my %hGCperChr = (); #{chr} => gc percentage
585 open GCLIST, $gc_file2 or die "$!";
586 while (my $line = <GCLIST>) {
587 chomp $line;
588 my @info = split /\s+/, $line;
589 $hGCperChr{$info[0]} = $info[3];
591 close GCLIST;
593 open CHRORDER, $chrorder_file or die "$!";
594 while (my $line = <CHRORDER>) {
595 chomp $line;
596 push @chr_order, $line;
598 close CHRORDER;
600 open CHRDIS, ">$distribution_dir/$pro_name.chrdistribution.data" or die "$!";
601 print CHRDIS "chrnum\tchr\tmean_depth\tmode_depth\tgc_percentage\n";
602 open STATISTICS, ">$distribution_dir/$pro_name.statistics_by_chr" or die "$!";
603 print STATISTICS "chr\teffective_length\tmappable_base\tmean_depth\tmode_depth\tcoverage(%)\n";
604 foreach my $chrnum (@chr_order) {
605 #my $chr = "canFam2-chr$chrnum";
606 #my $chr = "chr$chrnum";
607 my $chr = $chrnum;
608 #foreach my $chr (sort keys %hModeDepthChr) {
609 delete $hModeDepthChr{$chr}{0};
610 delete $hModeDepthChr{$chr}{65535};
611 foreach my $depth (sort {$hModeDepthChr{$chr}{$b}<=>$hModeDepthChr{$chr}{$a}} keys %{$hModeDepthChr{$chr}}) {
612 print CHRDIS "$chrnum\t$chr\t$hMeanDepthChr{$chr}\t$depth\t$hGCperChr{$chr}\n";
613 #print STATISTICS "$chr\t$hEffChr{$chr}\t$hMappableChr{$chr}\t$hMeanDepthChr{$chr}\t$depth\t$hChrCoverage{$chr}\n";
614 my $coverage = $hChrCoverage{$chr} / $hEffChr{$chr} * 100;
615 print STATISTICS "$chr\t$hEffChr{$chr}\t$hMappableChr{$chr}\t$hMeanDepthChr{$chr}\t$depth\t$coverage\n";
616 last;
619 close STATISTICS;
620 close CHRDIS;
622 my $local_time = localtime();
623 print "$local_time\tdrawing depth distribution by chromosome\n";
624 open SH, ">$distribution_dir/run_drawchrdis.sh" or die "$!";
625 print SH "$plot_chr_bin $distribution_dir/$pro_name.chrdistribution.data > $distribution_dir/a2 && gnuplot $distribution_dir/a2";
626 close SH;
628 `sh $distribution_dir/run_drawchrdis.sh`;
633 sub drawChrDepthDistribution {
635 print "draw chrdepthion !\n";
637 my @chr_order = (); #(1..22, "X", "Y");
639 my %hChrCoverage = (); #{chr} => coverage
640 ## read coverage of each chromosome
641 open COVERAGE, " $coverage_dir/total_coverage.info" or die "$!";
642 while (my $line = <COVERAGE>) {
643 chomp $line;
644 last if ($line =~ /Overall/i);
645 if ($line =~ /Percentage/i) {
646 my @info = split "\:", $line;
647 #$info[-1] =~ s/\%//;
648 #$hChrCoverage{$info[0]} = $info[-1];
649 my $covered_length = (split /\//, $info[1])[0];
650 $hChrCoverage{$info[0]} = $covered_length;
653 close COVERAGE;
656 ## count mean depth of each chromosome
657 open ALLINFO, "$distribution_dir/all.info" or die "$!";
658 my %hMeanDepthChr = (); #{chr} => mean depth
659 my %hMappableChr = (); #{chr} => mappable bases
660 my %hEffChr = (); #{chr} => effective length
661 while (my $line = <ALLINFO>) {
662 chomp $line;
663 my @info = split /\s+/, $line;
664 next if ($info[2] !~ /\d{1,}/);
665 $hMeanDepthChr{$info[0]} = $info[3] / $info[2];
666 $hMappableChr{$info[0]} = $info[3];
667 $hEffChr{$info[0]} = $info[2];
669 close ALLINFO;
672 ## count mode depth of each chromosome
673 my %hModeDepthChr = (); #{chr}{depth} => num;
674 my @depth_files = `find $distribution_dir -name "*.depth"`;
675 chomp @depth_files;
676 foreach my $depth_file (@depth_files) {
677 my $basename = basename $depth_file;
678 $basename =~ s/\.depth//;
679 #$basename =~ s/chromosome//;
680 open DEPTH, $depth_file or die "$!";
681 while (my $line = <DEPTH>) {
682 chomp $line;
683 my @info = split /\s+/, $line;
684 $hModeDepthChr{$basename}{$info[0]} = $info[1];
686 close DEPTH;
689 ## read GC percentage file
690 my %hGCperChr = (); #{chr} => gc percentage
691 open GCLIST, $gc_file or die "$!";
692 while (my $line = <GCLIST>) {
693 chomp $line;
694 my @info = split /\s+/, $line;
695 $hGCperChr{$info[0]} = $info[4];
697 close GCLIST;
699 open CHRORDER, $chrorder_file or die "$!";
700 while (my $line = <CHRORDER>) {
701 chomp $line;
702 push @chr_order, $line;
704 close CHRORDER;
706 open CHRDIS, ">$distribution_dir/$pro_name.chrdistribution.data" or die "$!";
707 print CHRDIS "chrnum\tchr\tmean_depth\tmode_depth\tgc_percentage\n";
708 open STATISTICS, ">$distribution_dir/$pro_name.statistics_by_chr" or die "$!";
709 print STATISTICS "chr\teffective_length\tmappable_base\tmean_depth\tmode_depth\tcoverage(%)\n";
710 foreach my $chrnum (@chr_order) {
711 #my $chr = "canFam2-chr$chrnum";
712 #my $chr = "chr$chrnum";
713 my $chr = $chrnum;
714 #foreach my $chr (sort keys %hModeDepthChr) {
715 delete $hModeDepthChr{$chr}{0};
716 delete $hModeDepthChr{$chr}{65535};
717 foreach my $depth (sort {$hModeDepthChr{$chr}{$b}<=>$hModeDepthChr{$chr}{$a}} keys %{$hModeDepthChr{$chr}}) {
718 print CHRDIS "$chrnum\t$chr\t$hMeanDepthChr{$chr}\t$depth\t$hGCperChr{$chr}\n";
719 #print STATISTICS "$chr\t$hEffChr{$chr}\t$hMappableChr{$chr}\t$hMeanDepthChr{$chr}\t$depth\t$hChrCoverage{$chr}\n";
720 my $coverage = $hChrCoverage{$chr} / $hEffChr{$chr} * 100;
721 print STATISTICS "$chr\t$hEffChr{$chr}\t$hMappableChr{$chr}\t$hMeanDepthChr{$chr}\t$depth\t$coverage\n";
722 last;
725 close STATISTICS;
726 close CHRDIS;
728 my $local_time = localtime();
729 print "$local_time\tdrawing depth distribution by chromosome\n";
730 open SH, ">$distribution_dir/run_drawchrdis.sh" or die "$!";
731 print SH "$plot_chr_bin $distribution_dir/$pro_name.chrdistribution.data > $distribution_dir/a2 && gnuplot $distribution_dir/a2";
732 close SH;
734 `sh $distribution_dir/run_drawchrdis.sh`;
737 sub drawDepthDistribution {
738 my $local_time = localtime();
739 print "\n$local_time\ndrawing depth distribution\n";
740 open SH, ">$distribution_dir/run_drawdis.sh" or die "$!";
741 print SH "$plot_bin $distribution_dir/$pro_name.distribution.data $distribution_dir 100 10 10 2 $pro_name > $distribution_dir/a1 && gnuplot $distribution_dir/a1";
742 close SH;
744 `sh $distribution_dir/run_drawdis.sh`;
747 __END__
748 nohup perl /panfs/GAG/junli/raid010/pipeline/soap/draw/coverageAndDepthDistribution.V5.SGC2.pl -il soap_result.list -proDir /panfs/GAG/junli/resequencing/project/plant/Soybean_qdj -proName Soybean -ref /share/raid010/resequencing/resequencing/tmp/pub/Genome/Soybean/soybean.fa -outDir /panfs/GAG/junli/resequencing/project/plant/Soybean_qdj -effective 1 -step 2 -chrSuffix chrnum -chrpattern Gm -mem 3g &