4 use DBI
qw(:sql_types);
6 use Data
::Dump
qw(ddx);
8 die "Usage: $0 <hum_sorted_bam> <vir_sorted_bam> <out_prefix>\n" if @ARGV < 3;
9 my ($inhum,$invir,$out)=@ARGV;
11 unlink "./${out}.sqlite";
12 my $dbh = DBI
->connect("dbi:SQLite:dbname=./${out}.sqlite","","",{
16 }) or die $DBI::errstr
;
17 $dbh->do("PRAGMA cache_size = -2000000"); # 2G http://www.sqlite.org/pragma.html#pragma_cache_size
18 print '[!]SQLite version: ',$dbh->{sqlite_version
}," (3.7.10 or above is better).\n";
21 CREATE TABLE MergedSam
38 CREATE INDEX nFQID12 ON MergedSam
(FQID12
);
39 CREATE TABLE SamInfo
(
49 for (split /;\s*/,$sql) {
51 $dbh->do($_) or die $dbh->errstr,"[$_]";
55 my $inStr0 = "INSERT INTO MergedSam (FQID12,FQSeq,FQQual,{-}Flag,{-}Chr,{-}Pos,{-}CIGAR,{-}MapQ,YDYC{-}) VALUES (?,?,?,?,?,?,?,?,?)";
56 my ($inStr1,$inStr2) = ($inStr0,$inStr0); $inStr1 =~ s/{-}/Hum/g; $inStr2 =~ s/{-}/Vir/g;
57 my $sthins1 = $dbh->prepare($inStr1); my $sthins2 = $dbh->prepare($inStr2);
59 my $upStr0 = "UPDATE MergedSam SET {-}Flag=?,{-}Chr=?,{-}Pos=?,{-}CIGAR=?,{-}MapQ=?,YDYC{-}=? WHERE FQID12=?";
60 my ($upStr1,$upStr2) = ($upStr0,$upStr0); $upStr1 =~ s/{-}/Hum/g; $upStr2 =~ s/{-}/Vir/g;
61 my $sthupd1 = $dbh->prepare($upStr1); my $sthupd2 = $dbh->prepare($upStr2);
63 my $sthid = $dbh->prepare( "SELECT * FROM MergedSam WHERE FQID12=?" );
68 if ($filename=~/.xz$/) {
69 open( $infile,"-|","xz -dc $filename") or die "Error opening $filename: $!\n";
70 } elsif ($filename=~/.gz$/) {
71 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
72 } elsif ($filename=~/.bz2$/) {
73 open( $infile,"-|","bzip2 -dc $filename") or die "Error opening $filename: $!\n";
74 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
77 sub getsamChrLen
($$) {
79 my $inpath = dirname
($in);
80 my ($Ref,$fq1,$fq2,$readlen1,$readlen2);
81 open( IN
,"-|","samtools view -H $in") or die "Error(m1) opening $in: $!\n";
84 my ($t,$id,$ln)=split /\t/;
86 $id = (split /\:/,$id)[1];
87 $ln = (split /\:/,$ln)[1];
89 } elsif ($t eq '@PG') {
90 # @PG ID:BSMAP VN:2.87 CL:"bsmap -u -d HomoGRCh38.fa -a F12HPCCCSZ0010_Upload/s00_C.bs_1.fq.gz -b F12HPCCCSZ0010_Upload/s00_C.bs_2.fq.gz -z 64 -p 12 -v 10 -q 2 -o s00_C.bs.bam"
91 # @PG ID:bwa-meth PN:bwa-meth VN:0.10 CL:"./bwameth.py -t 24 -p s00_C.bshum --read-group s00_C --reference HomoGRCh38.fa s00_C.bs_1.fq.gz s00_C.bs_2.fq.gz"
92 if ($id eq 'ID:BSMAP') {
93 $Ref = $1 if /\-d\s+([.\w]+)\b/;
94 $fq1 = $1 if /\-a\s+([^\s"]+)(\s|"?$)/;
95 $fq2 = $1 if /\-b\s+([^\s"]+)(\s|"?$)/;
96 } elsif ($id eq 'ID:bwa-meth') {
97 $Ref = $1 if /\--reference\s+([.\w]+)\b/;
98 if (/CL:.+\s([^-\s"]+)\s+([^-\s"]+)(\s|"?$)/) {
106 if ($inpath ne '.') {
107 ($Ref,$fq1,$fq2) = map { "$inpath/$_" } ($Ref,$fq1,$fq2);
109 #warn "$Ref,$fq1,$fq2,$inpath";
110 my $FQ1 = openfile
($fq1);
111 <$FQ1>;chomp($_=<$FQ1>);
112 $readlen1 = length $_;
114 my $FQ2 = openfile
($fq2);
115 <$FQ2>;chomp($_=<$FQ2>);
116 $readlen2 = length $_;
118 my $sthinsnfo = $dbh->prepare("INSERT INTO SamInfo (Type,Ref,FQ1,FQ2,ReadLen1,ReadLen2,Sam) VALUES (?,?,?,?,?,?,?)");
119 $sthinsnfo->execute($Type,$Ref,$fq1,$fq2,$readlen1,$readlen2,$in);
120 warn "[!]Reading [$in]: ($Type,$Ref,$fq1,$fq2,$readlen1,$readlen2)\n";
126 if ($$rd[1] & 0x40) {
128 } elsif ($$rd[1] & 0x80) {
131 die "CIGAR:$$rd[5]" if $$rd[5] =~ /H/;
134 if ($$rd[1] & 0x10) {
135 $seq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
137 $qual = reverse $qual;
139 my $id = join('',$$rd[0],'/',$rd12);
140 return [$rd12,$id,$seq,$qual];
144 getsamChrLen
($Type,$in);
145 open( IN
,"-|","samtools view -F768 $in") or die "Error opening $in: $!\n";
146 while (my $line = <IN
>) {
147 #my ($id, $flag, $ref, $pos, $mapq, $CIGAR, $mref, $mpos, $isize, $seq, $qual, @OPT) = split /\t/,$line;
148 #print "$id, $flag, $ref, $pos, $mapq, $CIGAR, $mref, $mpos, $isize\n";
149 my @Dat1 = split /\t/,$line;
150 my ($id, $flag, $ref, $pos, $mapq, $CIGAR, $mref, $mpos, $isize, $seq, $qual, @OPT) = @Dat1;
151 my $ret1 = Sam2FQ
(\
@Dat1);
152 (undef,$id,$seq,$qual) = @
$ret1;
155 my ($YD,$YC)=('','');
160 $YC=$1 if /YC:Z:(\w+)/
164 $sthid->execute($id);
165 my $qres = $sthid->fetchall_arrayref;
167 $sthins1->execute($id,$seq,$qual,$flag,$ref,$pos,$CIGAR,$mapq,$YDYC) if $Type eq 'Hum';
168 $sthins2->execute($id,$seq,$qual,$flag,$ref,$pos,$CIGAR,$mapq,$YDYC) if $Type eq 'Vir';
169 } elsif ($#$qres == 0) {
170 $sthupd1->execute($flag,$ref,$pos,$CIGAR,$mapq,$YDYC,$id) if $Type eq 'Hum';
171 $sthupd2->execute($flag,$ref,$pos,$CIGAR,$mapq,$YDYC,$id) if $Type eq 'Vir';
176 InsertSam
('Hum',$inhum);
178 InsertSam
('Vir',$invir);
184 CREATE INDEX nSort ON MergedSam
(HumChr
,HumPos
,VirChr
,VirPos
);
186 for (split /;\s*/,$sql) {
188 $dbh->do($_) or die $dbh->errstr,"[$_]";
194 ../mergebam
.pl n3_grep
.vircandi
.sam
.gz n3_grep
.vircandi
.bshbv
.bam n3_merged
195 sqlite3 n3_merged
.sqlite
.dump >n3_merged
.sqlite
.dump
198 sqlite
> EXPLAIN QUERY PLAN SELECT
* FROM MergedSam WHERE HumChr IS NOT NULL AND VirChr IS NOT NULL AND HumCIGAR
<> '*' AND VirCIGAR
<> '*' ORDER BY HumChr
,HumPos
,VirChr
,VirPos ASC
;
199 0|0|0|SCAN TABLE MergedSam USING INDEX nSort
(~62500 rows
)