3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
4 Version: 1.0.0 @ 20120309
8 use Time
::HiRes qw
( gettimeofday tv_interval
);
10 my $SAMTOOLSBIN="samtools";
11 #$SAMTOOLSBIN="/ifs1/ST_ASMB/USER/yuanjy/huxuesong/tmp/group/rev/test/samtools";
12 my $SAMFILTER="-f 3 -F 1536";
17 die "Usage: $0 <soap.coverage depthsingle> <output_prefix> <min_depth>\n" if @ARGV<2;
21 my (%LENGTH,%Stat,%N5090);
23 open O
,'>',"$out.$mindepth.depthcontig" or die "Error: $!\n";
24 print O
"[Contig]\n#min_depth=$mindepth\n#Min_Contig_Length=$MINCONTIFLEN\n#ChrID\tStart\tEnd\tLen\n";
29 if ($filename=~/.xz$/) {
30 open( $infile,"-|","xz -dc $filename") or die "Error opening $filename: $!\n";
31 } elsif ($filename=~/.gz$/) {
32 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
33 } elsif ($filename=~/.bz2$/) {
34 open( $infile,"-|","bzip2 -dc $filename") or die "Error opening $filename: $!\n";
35 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
40 my ($chr,$Len,$href)=@_;
45 my @Length = sort {$b <=> $a} keys %{$href};
49 if($n50 == -1 and $s >= $Len * 0.5){ $n50 = $l; }
50 if($n90 == -1 and $s >= $Len * 0.9){ $n90 = $l; last; }
52 $N5090{$chr}=[$n50,$n90,$count,$Length[0],$Length[-1]];
53 return " [$count,$Length[0],$Length[-1]] $n50,$n90 ";
57 my ($lastB,$lastE)=(0,0);
58 my ($GapFlag,@Contig)=(0);
60 for my $p (0 .. $#$aref) {
61 #print " $p:$aref->[$p]";
62 if (($GapFlag==0) and ($aref->[$p] >= $mindepth)) {
64 #print "\nB[$p:$aref->[$p]]";
67 if ($GapFlag and $aref->[$p] < $mindepth) {
78 my $count=(scalar @Contig)/2;
82 my $contiglen = $b-$a+1;
83 next if $contiglen < $MINCONTIFLEN;
84 ++$Stat{'_All_'}{$contiglen};
85 ++$Stat{$chr}{$contiglen};
86 $LENGTH{'_All_'} += $contiglen;
87 $LENGTH{$chr} += $contiglen;
88 print O
join("\t",$chr,$a+1,$b+1,$contiglen),"\n";
90 my $n59ret=&don5090
($chr,$LENGTH{$chr},$Stat{$chr});
91 return $count.$n59ret.$LENGTH{$chr};
94 my $fh=openfile
($cvg);
95 my ($genome,@DepDatChr);
101 print STDERR
"$SeqName: ";
106 while($genome=~/(\d+)/gc) {
107 $DepDatChr[$i]+=$1 if ($1!=65536);
110 my $cnt=&dostat
($SeqName,\
@DepDatChr);
112 print STDERR
"$cnt\n";
116 print O
"\n\n[Stat]\n#ChrID\tLen\tCount\n";
117 for my $chr (sort keys %Stat) {
118 for my $len (sort {$b<=>$a} keys %{$Stat{$chr}}) {
119 print O
join("\t",$chr,$len,$Stat{$chr}{$len}),"\n";
124 don5090
($chr,$LENGTH{$chr},$Stat{$chr});
125 print O
"\n\n[N50_N90]\n#ChrID\tN50\tN90\tCount\tMaxLen\tMinLen\tTotal_Length\n";
126 for my $chr (sort keys %N5090) {
127 print O
join("\t",$chr,@
{$N5090{$chr}},$LENGTH{$chr}),"\n";