modified: Makefile
[GalaxyCodeBases.git] / perl / etc / bs / veen.pl
blobbf35a29f3ae3415694876f31b8fd5811d3ecab18
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump qw(ddx);
6 die "Usage: $0 <reference.faidx> <snp files> >out.txt\n" if @ARGV < 3;
7 #@ARGV;
8 my $faiN = shift;
10 my ($cid,%ChrOrder)=(0);
11 open I,'<',$faiN or die "Error opening $faiN: $!\n";
12 while(<I>) {
13 ++$cid;
14 my $chr = (split /\t/)[0];
15 $ChrOrder{$chr} = $cid;
17 $ChrOrder{'_EOF_'} = 1 + $cid;
18 close I;
19 warn "Index:[$faiN], $cid chrosomes found.\nCompare ",scalar @ARGV," Files:[",join('],[',@ARGV),"].\n";
21 my @thePOS = qw(chrom position);
22 my @SELECTED = qw(normal_reads1 normal_reads2 normal_var_freq normal_gt tumor_reads1 tumor_reads2 tumor_var_freq tumor_gt somatic_status);
24 sub readnext($) {
25 my $in = $_[0];
26 if ( ! eof($in->[0]) ) {
27 my $record = readline($in->[0]);
28 die "readline failed: $!" unless defined $record;
29 chomp($record);
30 my %hash;
31 @hash{@{$in->[1]}} = split /\t/, $record;
32 @{$in}[2,3] = @hash{@thePOS};
33 $in->[4] = [@hash{@SELECTED}];
34 #ddx \%hash;
35 return 1;
36 } else {
37 @{$in}[2,3] = qw(_EOF_ 0);
38 $in->[4] = [qw(0 0 NA NA 0 0 NA NA NA)];
39 $in->[5] = 0;
40 return 0;
44 sub initfile($) {
45 my ($filename)=@_;
46 my $infile;
47 if ($filename=~/.zst$/) {
48 open( $infile,"-|","zstd -qdc $filename") or die "Error opening $filename: $!\n";
49 } elsif ($filename=~/.gz$/) {
50 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
51 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
52 chomp(my $t = <$infile>);
53 my @tt = split("\t",$t);
54 map { s/_read(\d)$/_reads$1/ } @tt;
55 my $ret = [$infile,\@tt,undef,-1,[],1,0];
56 readnext($ret) or die "[x]File [$filename] is empty. $!\n";
57 return $ret;
60 my @FH;
61 my ($id,$flag,@tmpstr) = (0,0);
62 for (@ARGV) {
63 my $t = initfile($_);
64 $t->[6] = 1 << $id;
65 ++$id;
66 push @FH,$t;
67 $flag += $t->[5];
68 push @tmpstr,"$_($t->[6])";
70 print "# InFile(VeenID)s: ",join(',',@tmpstr),"\n# INFO: ",join(',',@SELECTED),"\n";
71 print join("\t",qw(Chr Pos VeenType),@ARGV),"\n";
72 while($flag) {
73 my @SortedFH = sort { $ChrOrder{$a->[2]} <=> $ChrOrder{$b->[2]} || $a->[3] <=> $b->[3] } @FH;
74 #ddx \@SortedFH;
75 $flag = 0;
76 $flag += $_->[5] for @FH;
77 #ddx $flag;
78 my $maxSame = 0;
79 my $VeenType = $SortedFH[0]->[6];
80 for my $i (1 .. $#FH) {
81 if ( $SortedFH[0]->[2] eq $SortedFH[$i]->[2] and $SortedFH[0]->[3] == $SortedFH[$i]->[3] ) {
82 ++$maxSame;
83 $VeenType += $SortedFH[$i]->[6];
84 } else {
85 last;
88 my @ChrPos = @{$SortedFH[0]}[2,3];
89 my @pDat;
90 for my $i (0 .. $maxSame) {
91 push @pDat,[$SortedFH[$i]->[4],$SortedFH[$i]->[6]];
92 readnext($SortedFH[$i]);
94 for my $i ((1+$maxSame) .. $#FH) {
95 push @pDat,[[qw(. . . . . . . . .)],$SortedFH[$i]->[6]];
97 @pDat = sort { $a->[1] <=> $b->[1] } @pDat;
98 my @res = map { join(',',@{$_->[0]}) } @pDat;
99 print join("\t",@ChrPos,$VeenType,@res),"\n" if $flag;
103 #ddx \@FH;
104 close $_->[0] for @FH;
106 __END__
107 ./veen.pl GRCh38_p12_control.fa.fai oVarScan9291.snp obsmutect.snp > pVB.txt
108 ./veen.pl GRCh38_p12_control.fa.fai *.zst|head