modified: Makefile
[GalaxyCodeBases.git] / perl / etc / justonce / simcontigstat_deponly.pl
blobae1c68b5dda91923ab6e89a2557bd5fc5fe5d6d3
1 #!/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
4 Version: 1.0.0 @ 20120309
5 =cut
6 use strict;
7 use warnings;
8 use Time::HiRes qw ( gettimeofday tv_interval );
10 my $SAMTOOLSBIN="samtools";
11 #$SAMTOOLSBIN="/ifs1/ST_ASMB/USER/yuanjy/huxuesong/tmp/group/rev/test/samtools";
12 my $SAMFILTER="-f 3 -F 1536";
13 $SAMFILTER="-F 1536";
15 my $MINCONTIFLEN=100;
17 die "Usage: $0 <soap.coverage depthsingle> <output_prefix> <min_depth>\n" if @ARGV<2;
18 my $cvg=shift;
19 my $out=shift;
20 my $mindepth=shift;
21 my (%LENGTH,%Stat,%N5090);
23 open O,'>',"$out.$mindepth.depthcontig" or die "Error: $!\n";
24 print O "[Contig]\n#min_depth=$mindepth\n#Min_Contig_Length=$MINCONTIFLEN\n#ChrID\tStart\tEnd\tLen\n";
26 sub openfile($) {
27 my ($filename)=@_;
28 my $infile;
29 if ($filename=~/.xz$/) {
30 open( $infile,"-|","xz -dc $filename") or die "Error opening $filename: $!\n";
31 } elsif ($filename=~/.gz$/) {
32 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
33 } elsif ($filename=~/.bz2$/) {
34 open( $infile,"-|","bzip2 -dc $filename") or die "Error opening $filename: $!\n";
35 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
36 return $infile;
39 sub don5090($$$) {
40 my ($chr,$Len,$href)=@_;
41 my $n50 = -1;
42 my $n90 = -1;
43 my $s = 0;
44 my $count=0;
45 my @Length = sort {$b <=> $a} keys %{$href};
46 for my $l (@Length){
47 $s += $l*$$href{$l};
48 $count += $$href{$l};
49 if($n50 == -1 and $s >= $Len * 0.5){ $n50 = $l; }
50 if($n90 == -1 and $s >= $Len * 0.9){ $n90 = $l; last; }
52 $N5090{$chr}=[$n50,$n90,$count,$Length[0],$Length[-1]];
53 return " [$count,$Length[0],$Length[-1]] $n50,$n90 ";
55 sub dostat($$) {
56 my ($chr,$aref)=@_;
57 my ($lastB,$lastE)=(0,0);
58 my ($GapFlag,@Contig)=(0);
59 #print "\n>$chr\n";
60 for my $p (0 .. $#$aref) {
61 #print " $p:$aref->[$p]";
62 if (($GapFlag==0) and ($aref->[$p] >= $mindepth)) {
63 push @Contig,$p;
64 #print "\nB[$p:$aref->[$p]]";
65 $GapFlag=1;
67 if ($GapFlag and $aref->[$p] < $mindepth) {
68 push @Contig,$p-1;
69 #print "E",$p-1,"\n";
70 $GapFlag=0;
73 if ($GapFlag) {
74 push @Contig,$#$aref;
75 print STDERR '<';
77 #print "[@Contig]\n";
78 my $count=(scalar @Contig)/2;
79 while (@Contig) {
80 my $a=shift @Contig;
81 my $b=shift @Contig;
82 my $contiglen = $b-$a+1;
83 next if $contiglen < $MINCONTIFLEN;
84 ++$Stat{'_All_'}{$contiglen};
85 ++$Stat{$chr}{$contiglen};
86 $LENGTH{'_All_'} += $contiglen;
87 $LENGTH{$chr} += $contiglen;
88 print O join("\t",$chr,$a+1,$b+1,$contiglen),"\n";
90 my $n59ret=&don5090($chr,$LENGTH{$chr},$Stat{$chr});
91 return $count.$n59ret.$LENGTH{$chr};
94 my $fh=openfile($cvg);
95 my ($genome,@DepDatChr);
96 while(<$fh>) {
97 s/^>//;
98 /^(\S+)/ or next;
99 my $SeqName = $1;
100 @DepDatChr=();
101 print STDERR "$SeqName: ";
102 $/=">";
103 $genome=<$fh>;
104 $/="\n";
105 my $i=0;
106 while($genome=~/(\d+)/gc) {
107 $DepDatChr[$i]+=$1 if ($1!=65536);
108 ++$i;
110 my $cnt=&dostat($SeqName,\@DepDatChr);
111 $genome='';
112 print STDERR "$cnt\n";
114 close $fh;
116 print O "\n\n[Stat]\n#ChrID\tLen\tCount\n";
117 for my $chr (sort keys %Stat) {
118 for my $len (sort {$b<=>$a} keys %{$Stat{$chr}}) {
119 print O join("\t",$chr,$len,$Stat{$chr}{$len}),"\n";
123 my $chr = '_All_';
124 don5090($chr,$LENGTH{$chr},$Stat{$chr});
125 print O "\n\n[N50_N90]\n#ChrID\tN50\tN90\tCount\tMaxLen\tMinLen\tTotal_Length\n";
126 for my $chr (sort keys %N5090) {
127 print O join("\t",$chr,@{$N5090{$chr}},$LENGTH{$chr}),"\n";
129 close O;