new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / tmp / annosplitVCF.pl
blobf3b8fc4886868b590abc08ce23a0100911f44f75
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump qw (ddx);
6 my $gtff = '/bak/seqdata/genomes/TigerRefGenome/P_tigris.gene.gtf';
7 my $wcl = 374233;
8 #$gtff = 't.gtf';
10 my ($lastStrand,%GTF,%tcds,$lastchr,%OUTF);
12 open I,'-|',"gzip -dc $ARGV[0]" or die;
13 my @Types = ('I',1,2,3,'N');
14 for (@Types) {
15 open $OUTF{$_},'|-',"gzip -9c >vcfout.$_.vcf.gz";
18 while (<I>) {
19 if (/^#/) {
20 for my $i (@Types) {
21 my $fh = $OUTF{$i};
22 print $fh $_;
24 if (/^##contig=<ID=(\w+),length=(\d+)>$/) {
25 $GTF{$1}=' ' x $2;
26 print ">$1: $2\n";
28 last if /^#CHROM/;
29 next;
31 die "Err $_";
34 open G,'<',$gtff;
35 my $tl = 0;
36 while (<G>) {
37 my ($chr,undef,$type,$start,$end,undef,$strand) = split /\t/;
38 ++$tl;
39 next unless exists $GTF{$chr};
40 if ($type eq 'transcript') {
41 my $str = $GTF{$chr};
42 my $len = $end-$start;
43 substr $GTF{$chr},$start-1,$len,'I'x$len;
45 if (defined $lastStrand) {
46 my @Poses = keys %{$tcds{$chr}};
47 if ($lastStrand eq '+') {
48 @Poses = sort { $a <=> $b } @Poses;
49 } else {
50 @Poses = sort { $b <=> $a } @Poses;
52 my $step = 1;
53 for (@Poses) {
54 substr $GTF{$chr},$_-1,1,"$step";
55 ++$step;
56 $step = 1 if $step > 3;
58 %tcds = ();
60 $lastStrand = undef;
61 $lastchr = undef;
62 } elsif ($type eq 'exon') {
63 for ($start .. $end) {
64 $tcds{$chr}{$_} = 'E';
66 $lastStrand = $strand;
67 $lastchr = $chr;
69 print STDERR int(100*$tl/$wcl)/100,"% $tl \r" unless $tl % 1000;
71 if (defined $lastStrand) {
72 my @Poses = keys %{$tcds{$lastchr}};
73 if ($lastStrand eq '+') {
74 @Poses = sort { $a <=> $b } @Poses;
75 } else {
76 @Poses = sort { $b <=> $a } @Poses;
78 my $step = 1;
79 for (@Poses) {
80 substr $GTF{$lastchr},$_-1,1,"$step";
81 ++$step;
82 $step = 1 if $step > 3;
84 %tcds = ();
86 warn "Done ! \n";
87 close G;
89 #print unpack('c',$GTF{'scaffold53'}{44227}),"|\n";
90 #print $GTF{'scaffold53'}{44227},"|\n";
91 #ddx \%GTF;die;
92 open X,'>','gtf.db';
93 for (sort keys %GTF) {
94 print X join("\t",$_,$GTF{$_}),"\n";
96 close X;
97 warn "OK ! \n";
99 while (<I>) {
100 my ($chr,$pos) = split /\t/;
101 my $str = $GTF{$chr};
102 my $type = substr $str,$pos-1,1;
103 if ($type ne ' ') {
104 my $fh = $OUTF{$type};
105 print $fh $_;
106 } else {
107 my $fh = $OUTF{'N'};
108 print $fh $_;
112 ddx \%GTF;
113 for (@Types) {
114 close $OUTF{$_};