3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
4 Version: 1.0.0 @ 20120309
8 use Time
::HiRes qw
( gettimeofday tv_interval
);
10 my $SAMTOOLSBIN="samtools";
11 #$SAMTOOLSBIN="/ifs1/ST_ASMB/USER/yuanjy/huxuesong/tmp/group/rev/test/samtools";
12 my $SAMFILTER="-f 3 -F 1536";
17 die "Usage: $0 <single_sam_bam_file> <soap.coverage depthsingle> <output_prefix> <min_overlap> <min_depth>\n" if @ARGV<2;
23 my (%LENGTH,%Stat,%N5090);
24 if ($name =~ /\.bam$/) {
25 open IN
,'-|',"$SAMTOOLSBIN view $SAMFILTER $name" or die "Error opening $name : $!\n";
26 } elsif ($name =~ /\.sam\.gz$/) {
27 open IN
,'-|',"$SAMTOOLSBIN view $SAMFILTER -S $name" or die "Error opening $name : $!\n";
28 } elsif ($name =~ /\.sam$/) {
29 open IN
,'-|',"$SAMTOOLSBIN view $SAMFILTER -S $name" or die "Error opening $name : $!\n";
31 die "[x]Unsupport file type.";
37 if ($filename=~/.xz$/) {
38 open( $infile,"-|","xz -dc $filename") or die "Error opening $filename: $!\n";
39 } elsif ($filename=~/.gz$/) {
40 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
41 } elsif ($filename=~/.bz2$/) {
42 open( $infile,"-|","bzip2 -dc $filename") or die "Error opening $filename: $!\n";
43 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
46 my $fh=openfile
($cvg);
47 my ($genome,%DepDatChr);
56 while($genome=~/(\d+)/gc) {
57 $DepDatChr{$SeqName}->[$i]+=$1 if ($1!=65536);
66 my ($chr,$Len,$href)=@_;
71 my @Length = sort {$b <=> $a} keys %{$href};
75 if($n50 == -1 and $s >= $Len * 0.5){ $n50 = $l; }
76 if($n90 == -1 and $s >= $Len * 0.9){ $n90 = $l; last; }
78 $N5090{$chr}=[$n50,$n90,$count,$Length[0],$Length[-1]];
79 return " [$count,$Length[0],$Length[-1]] $n50,$n90 ";
83 my ($lastB,$lastE)=(0,0);
85 next if /^@\w\w\t\w\w:/;
89 next unless $cigar =~ /M/;
90 #next unless ($read1[1] & 3) == 3; # paired + mapped in a proper pair
91 next if $read1[1] >= 256; # not primary || QC failure || optical or PCR duplicate
92 #next unless $read1[5] =~ /^(\d+)M$/;
93 # http://davetang.org/muse/2011/01/28/perl-and-sam/
95 while ($cigar !~ /^$/){
96 if ($cigar =~ /^([0-9]+[MIDSH])/){
98 if ($cigar_part =~ /(\d+)M/){
100 } elsif ($cigar_part =~ /(\d+)[IH]/){
102 } elsif ($cigar_part =~ /(\d+)D/){
104 } elsif ($cigar_part =~ /(\d+)S/){
105 die "[!]Not ready for this!\n";
106 #my $insertion = 'x' x $1;
107 #substr($new_ref,$position,0,$insertion);
110 $cigar =~ s/$cigar_part//;
112 die "Unexpected cigar: $cigar\n";
115 my $end=$read1[3]+$position;
117 my ($GapFlag,@Gaps)=(0);
118 for my $p ($read1[3] .. $end) {
119 if ($DepDatChr{$read1[2]}->[$p] < $mindepth) {
120 push @Gaps,[$p,-1] unless $GapFlag;
123 if ($GapFlag and $DepDatChr{$read1[2]}->[$p] >= $mindepth) {
129 $Gaps[-1]->[1]=$end+1;
131 if ($lastB==0) { # just the 1st line.
132 ($lastB,$lastE)=($read1[3],$end);
134 $lastB=$Gaps[0]->[1];
138 if (($lastE-$read1[3] +1 > $overlap) and ($end > $lastE)) {