modified: Makefile
[GalaxyCodeBases.git] / perl / etc / justonce / oyk / rs2pos.pl
blob54e0ad4e6a10aecf2ea4f74a3025456820c67225
1 #!/usr/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
4 Version: 1.0.0 @ 20180516
5 =cut
6 use strict;
7 use warnings;
9 use Data::Dump qw(ddx);
11 use lib '.';
13 sub getPE(@) {
14 my @AFs = @_;
15 my ($p1,$p2)=(0,0);
16 for my $i (0 .. $#AFs) {
17 my $p1i = $AFs[$i]*(1-$AFs[$i])*(1-$AFs[$i]);
18 $p1 += $p1i;
19 for my $j (($i+1) .. $#AFs) {
20 my $p2i = $AFs[$i]*$AFs[$i]*$AFs[$j]*$AFs[$j]*(4-3*$AFs[$i]-3*$AFs[$j]);
21 $p2 += $p2i;
24 return $p1-$p2;
26 sub getDPE(@) {
27 my @AFs = @_;
28 my ($p1,$p2)=(0,0);
29 for my $i (0 .. $#AFs) {
30 my $p1i = $AFs[$i]*$AFs[$i]*(1-$AFs[$i])*(1-$AFs[$i]);
31 $p1 += $p1i;
32 for my $j (($i+1) .. $#AFs) {
33 my $tb = 1-$AFs[$i]-$AFs[$j];
34 my $p2i = 2*$AFs[$i]*$AFs[$j]*$tb*$tb;
35 $p2 += $p2i;
38 return $p1+$p2;
40 sub get0PE(@) {
41 my @AFs = @_;
42 my ($p1,$p2)=(0,0);
43 for my $i (0 .. $#AFs) {
44 my $p1i = $AFs[$i]*(1-$AFs[$i])*(1-$AFs[$i]);
45 $p1 += $p1i;
46 for my $j (0 .. $#AFs) {
47 next if $i == $j;
48 my $p2i = $AFs[$i]*$AFs[$i]*$AFs[$j]*$AFs[$j]*(4-3*$AFs[$i]-3*$AFs[$j]);
49 $p2 += $p2i;
52 return $p1-$p2/2;
55 die "Usage: $0 <in.tsv> ..\n" if @ARGV<1;
56 my (%Markers,%MarkerAF,$sum);
57 while (<>) {
58 chomp;
59 my ($rs,$chr,$pos,@d) = split /\t/,$_;
60 $MarkerAF{$rs} = {@d};
61 my @AFs = values %{$MarkerAF{$rs}};
62 #$sum = eval join '+',@AFs;
63 #print "$rs: $sum\n" if $sum != 1;
64 my $PE = getPE(@AFs);
65 $Markers{$rs} = [$chr,$pos,$PE,'unlocalized',0,'unlocalized',0];
67 #ddx \%Markers;
69 # zgrep rs112459276 /hwfssz1/pub/database/ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/BED/bed_chr_X.bed.gz
70 # zgrep rs112459276 /hwfssz1/pub/database/ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/BED/bed_chr_X.bed.gz
71 my @hg19fs = glob '/hwfssz1/pub/database/ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/BED/bed_chr_*.bed.gz';
72 my @hg38fs = glob '/hwfssz1/pub/database/ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/BED/bed_chr_*.bed.gz';
73 #ddx \@hg19fs,\@hg38fs;
74 sub readPos(@) {
75 my $offect = shift @_;
76 for (@_) {
77 open( GZIN,"-|","gzip -dc $_") or die "Error opening $_: $!\n";
78 while(<GZIN>) {
79 next if /^track /;
80 my ($chrom,$chromStart,$chromEnd,$name,$score,$strand)=split /\t/;
81 next unless exists $Markers{$name};
82 my $len = $chromEnd - $chromStart;
83 warn "[!]Len:$len for [$_]\n" if $len > 1;
84 $Markers{$name}->[$offect] = $chrom;
85 $Markers{$name}->[$offect+1] = $chromEnd;
86 #ddx \$Markers{$name};
87 if ($offect == 3) {
88 if ($Markers{$name}->[0] ne $chrom or $Markers{$name}->[1] ne $chromEnd) {
89 warn "[!]Position Not match to hg19.\n";
90 ddx \$Markers{$name};
96 readPos(3,@hg19fs);
97 readPos(5,@hg38fs);
98 my @ChrIDs = map {"chr$_"} (1..22,'X','Y','MT','M');
99 my $i = 0;
100 my %L = map { $_ => $i++ } @ChrIDs;
101 $i = 5;
102 #$i = 0;
103 my @rsids = sort {
104 no warnings 'uninitialized';
105 exists $L{$Markers{$a}->[$i]} <=> exists $L{$Markers{$b}->[$i]} ||
106 $L{$Markers{$a}->[$i]} <=> $L{$Markers{$b}->[$i]} ||
107 $Markers{$a}->[$i+1] <=> $Markers{$b}->[$i+1] ||
108 $Markers{$a}->[1] <=> $Markers{$b}->[1] ||
109 $a cmp $b
110 } keys %Markers;
111 print join("\t",'#SNPid','Chr.hg19','Pos.hg19','Chr.GRCh38','Pos.GRCh38','Alleles with Frequency'),"\n";
112 for my $id (@rsids) {
113 my @d = @{$Markers{$id}};
114 my @allels = sort { $MarkerAF{$id}{$a} <=> $MarkerAF{$id}{$b} || $a cmp $b } keys %{$MarkerAF{$id}};
115 my @af = map {"$_\t$MarkerAF{$id}{$_}"} @allels;
116 print join("\t",$id,@d[3,4,5,6],@af),"\n";
119 =pod
120 rs149540625 was merged into rs112459276 on August 21, 2014 (Build 142)
121 rs75611253 was merged into rs3852322 on July 1, 2015 (Build 144)
122 rs117314748 was merged into rs289279 on July 1, 2015 (Build 144)
123 rs200370602 was merged into rs10154714 on July 1, 2015 (Build 144)
124 rs201431024 was merged into rs4617205 on July 1, 2015 (Build 144)
125 rs113184075 was merged into rs77634512 on July 19, 2016 (Build 147)
127 rs2484385,rs10453900,rs61800290,rs144913592
128 unlocalized scaffold in GRCh38
130 ./rs2pos.pl >nippt.tsv 2>nippt.tsv.err &
131 =cut