modified: makesrc3.mk
[GalaxyCodeBases.git] / tools / annot / indelmixP.pl
blobc5ee38922e523d1938c6e2592f11ecebcd86da09
1 #!/bin/env perl
2 use lib '/share/raid010/resequencing/resequencing/tmp/bin/annotation/glfsqlite';
3 use lib 'E:/BGI/pl/PERL5LIB/';
4 use strict;
5 use warnings;
6 use Time::HiRes qw ( gettimeofday tv_interval );
7 use Galaxy::ShowHelp;
9 $main::VERSION=0.0.3;
11 our $opts='i:o:bv';
12 our ($opt_i, $opt_o, $opt_v, $opt_b, $opt_d);
14 our $help=<<EOH;
15 \t-i Indel list (./indel.lst) for [sample\\t/path/to/indel.filtered]
16 \t-o Output txt file (./indels.pop)
17 \t-v show verbose info to STDOUT
18 \t-b No pause for batch runs
19 EOH
21 ShowHelp();
23 $opt_o='./indels.pop' if ! defined $opt_o;
24 $opt_i='./indel.lst' if ! $opt_i;
26 $opt_i =~ s/\/$//;
27 print STDERR "From [$opt_i]/ to [$opt_o]\n";
28 if (! $opt_b) {print STDERR 'press [Enter] to continue...'; <>;}
30 my $start_time = [gettimeofday];
32 my (%Samples,%Indels);
34 open( SAMP,'<',$opt_i) or die "Error: $!\n";
35 while (<SAMP>) {
36 chomp;
37 my ($sample,$file)=split /\s+/;
38 $Samples{$sample}=$file;
39 print "[!]$sample -> $file\n" if $opt_v;
41 close SAMP;
43 for my $Samp (keys %Samples) {
44 my $name=$Samples{$Samp};
45 open IN,'<',$name or die "Error: [$name] $!\n";
46 warn "Indel:[$Samp]\t...\n";
47 while (<IN>) {
48 my ($chr,$pos,$nid,$seq,undef,$homhet,undef,$pairs)=split /\t/;
49 if ($nid =~ /^I(\d+)$/) {$nid=$1;}
50 elsif ($nid =~ /^D(\d+)$/) {$nid=-$1;}
51 else {warn "File error @ uid !"; next;}
52 if ($homhet eq 'homo') {$seq = uc $seq;}
53 elsif ($homhet eq 'hete') {$seq = lc $seq;}
54 else {warn "File error @ homhet !"; next;}
55 print "$Samp\t$chr,$pos,$nid,$seq,$homhet\n" if $opt_v;
56 $Indels{$chr}{$pos}{$Samp}=[$nid,$seq,$pairs]; # $pairs just for test
58 close IN;
60 warn "Indels all loaded.\n\nOutputting ...\n";
62 open( OUT,'>',$opt_o) or die "Error: $!\n";
63 print OUT "ChrID\tPos\t",join('^',sort keys %Samples),"\n";
64 for my $chr (sort keys %Indels) {
65 for my $pos (sort {$a <=> $b} keys %{$Indels{$chr}}) {
66 print OUT "$chr\t$pos\t";
67 for my $Samp (sort keys %Samples) {
68 my $item=$Indels{$chr}{$pos};
69 if (defined $$item{$Samp}) {
70 if (@{$$item{$Samp}}==3) { # if Indel, use depth of indel_file
71 $item=join '|',@{$$item{$Samp}}
72 } else {
73 $item=join '|',(@{$$item{$Samp}},'.');
75 } else {$item='.|.|.'}
76 print OUT $item,'^';
78 print OUT "\n";
81 close OUT;
83 my $stop_time = [gettimeofday];
85 print STDERR "\nTime Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s).\n";