new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / tmp / annosnp.pl
blobc61097d9d544380997b6339a7c7f59ac6cc660d6
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 #use Text::CSV;
5 #use Data::Dump qw(ddx);
7 my $dbSNPa = 'snp151.lst.h';
9 my %rsIN;
10 open IN,'<','bgi2.tsv' or die "Error opening file: $!\n";
11 <IN>;
12 while(<IN>) {
13 chomp;
14 my ($rsid,$chr,$Ref,$Alt,$pGlobal,$pAsia) = split /\s+/;
15 $rsIN{$rsid} = [$chr,$Ref,$Alt,$pGlobal,$pAsia];
17 close IN;
18 #ddx \%rsIN;
20 my %rsDB;
21 open R,'<',$dbSNPa or die "Error opening [$dbSNPa]: $!\n";
22 while(<R>) {
23 chomp;
24 my ($chr,$pos,$rsid,$ref,$tgt) = split /\s+/;
25 if (exists $rsIN{$rsid}) {
26 my @dat = @{$rsIN{$rsid}};
27 pop @dat unless defined $dat[-1];
28 if (exists $rsDB{$rsid}) {
29 push @{$rsDB{$rsid}},[$chr,$pos,$ref,$tgt,@dat[-2,-1]];
30 } else {
31 $rsDB{$rsid} = [[$chr,$pos,$ref,$tgt,@dat[-2,-1]]];
35 close R;
36 #ddx \%rsDB;
38 for my $rsid (sort keys %rsDB) {
39 my @dat = @{$rsDB{$rsid}};
40 for my $n (0 .. $#dat) {
41 print join("\t",$rsid,$n,@{$dat[$n]}),"\n";
45 __END__
46 ./annosnp.pl > bgi2.anno &
48 grep -v _alt bgi2.anno > bgi2.anno.m
50 perl -lane '
51 print "samtools faidx GRCh38_no_alt_analysis_set.fna.bgz $F[2]:",$F[3]-500,"-",$F[3]+500," \| sed \"s/\>/\>$F[0]_$F[3] /\" "
52 ' bgi2.anno.m