3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
4 Version: 1.0.0 @ 20180516
9 use Data
::Dump
qw(ddx);
11 die "Usage: $0 <in.tsv> ...\n[!]ChrMT are not allowed now.\n" if @ARGV<1;
17 my ($rs,$chr19,$pos19,$chr38,$pos38,@d) = split /\t/,$_;
18 $chr19 =~ s/^chr//; # We have no marker on ChrM now.
19 $Pos19d{$chr19}{$pos19} = $rs;
20 $Pos38d{$chr38}{$pos38} = $rs if $chr38 ne 'unlocalized';
24 my $hg19fa = '/share/FGI2017B/pub/Ref/Human/hs37d5.fna.bgz';
25 my $hg38fa = '/share/FGI2017B/pub/Ref/Human/GRCh38_no_alt_analysis_set.fna.bgz';
26 my @ChrIDs19 = (1..22,'X','Y','MT');
27 my @ChrIDs38 = map {"chr$_"} (1..22,'X','Y','M');
30 my ($PosH,$ChrIDsA,$faf,$outf) = @_;
31 #ddx ($outf,$faf); ddx $ChrIDsA;
33 open O
,'>',$outf or die $?
;
34 for my $chr (@
$ChrIDsA) {
35 my @Poses = sort { $a <=> $b } keys %{$PosH->{$chr}};
40 open( SEQ
,"-|","samtools faidx $faf $chr:$left-$right") or die $!;
42 if ($head ne ">$chr:$left-$right\n") {
51 my $top = substr $theSeq,0,500;
52 my $base = substr $theSeq,500,1;
53 my $bottom = substr $theSeq,501,500;
54 print O
">$PosH->{$chr}{$p} $chr:$p $left-$right\n${top}${base}\n$bottom\n";
55 # `samtools faidx` says "[E::fai_build_core] Different line length in sequence 'rs10453900'" if not in standard line length.
60 dealfa
(\
%Pos19d,\
@ChrIDs19,$hg19fa,'nippt.out.hg19.fa');
61 dealfa
(\
%Pos38d,\
@ChrIDs38,$hg38fa,'nippt.out.GRCh38.fa');
64 $ grep -v GL hs37d5
.fna
.bgz
.fai
66 2 243199373 253404903 60 61
67 3 198022430 500657651 60 61
68 4 191154276 701980507 60 61
69 5 180915260 896320740 60 61
70 6 171115067 1080251307 60 61
71 7 159138663 1254218344 60 61
72 8 146364022 1416009371 60 61
73 9 141213431 1564812846 60 61
74 10 135534747 1708379889 60 61
75 11 135006516 1846173603 60 61
76 12 133851895 1983430282 60 61
77 13 115169878 2119513096 60 61
78 14 107349540 2236602526 60 61
79 15 102531392 2345741279 60 61
80 16 90354753 2449981581 60 61
81 17 81195210 2541842300 60 61
82 18 78077248 2624390817 60 61
83 19 59128983 2703769406 60 61
84 20 63025520 2763883926 60 61
85 21 48129895 2827959925 60 61
86 22 51304566 2876892038 60 61
87 X
155270560 2929051733 60 61
88 Y
59373566 3086910193 60 61
89 MT
16569 3147273397 70 71
90 NC_007605
171823 3153506529 60 61
91 hs37d5
35477943 3153681224 60 61
93 $ egrep
-v
'chrUn|_random' GRCh38_no_alt_analysis_set
.fna
.bgz
.fai
94 chr1
248956422 112 70 71
95 chr2
242193529 252513167 70 71
96 chr3
198295559 498166716 70 71
97 chr4
190214555 699295181 70 71
98 chr5
181538259 892227221 70 71
99 chr6
170805979 1076358996 70 71
100 chr7
159345973 1249605173 70 71
101 chr8
145138636 1411227630 70 71
102 chr9
138394717 1558439788 70 71
103 chr10
133797422 1698811686 70 71
104 chr11
135086622 1834520613 70 71
105 chr12
133275309 1971537157 70 71
106 chr13
114364328 2106716512 70 71
107 chr14
107043718 2222714743 70 71
108 chr15
101991189 2331287770 70 71
109 chr16
90338345 2434736088 70 71
110 chr17
83257441 2526365093 70 71
111 chr18
80373285 2610812039 70 71
112 chr19
58617616 2692333639 70 71
113 chr20
64444167 2751788762 70 71
114 chr21
46709983 2817153685 70 71
115 chr22
50818468 2864531079 70 71
116 chrX
156040895 2916075638 70 71
117 chrY
57227415 3074345836 70 71
118 chrM
16569 3132390908 70 71
119 chrEBV
171823 3144056708 70 71