2 use lib
'/share/raid010/resequencing/resequencing/tmp/bin/annotation/glfsqlite';
5 use Time
::HiRes qw
( gettimeofday tv_interval
);
10 our $opts='i:o:s:bvl:f:c:';
11 our ($opt_i, $opt_o, $opt_s, $opt_v, $opt_b, $opt_d, $opt_f, $opt_l, $opt_c);
14 \t-i Indel path (./Indel/) for [sample].indel.txt.filter
15 \t-f fabyChr path (./fabychr/) for [chr].fa
16 \t-c consensus path (./consensus/) for [sample].[chr].txt
17 \t-s Samples list (./samples.list) [sample\\s+]
18 \t-l Length of nearby sequence (200)bp
19 \t-o Output txt file (./indels.pop)
20 \t-v show verbose info to STDOUT
21 \t-b No pause for batch runs
26 $opt_o='./indels.pop' if ! defined $opt_o;
27 $opt_i='./Indel/' if ! $opt_i;
28 $opt_s='./samples.list' if ! $opt_s;
29 $opt_f='./fabychr/' if ! $opt_f;
30 $opt_c='./consensus/' if ! $opt_c;
31 $opt_l=200 if ! $opt_l;
35 print STDERR
"From [$opt_i]/ to [$opt_o], with [$opt_s][$opt_f][$opt_l]/\n";
36 if (! $opt_b) {print STDERR
'press [Enter] to continue...'; <>;}
38 my $start_time = [gettimeofday
];
40 my (@Samples,%Indels);
42 open( SAMP
,'<',$opt_s) or die "Error: $!\n";
45 my ($sample)=split /\s+/;
46 push @Samples,$sample;
49 if ($opt_v) {print "Samples List:\n";print "[$_]\t" for @Samples;print "\n";}
51 for my $Samp (@Samples) {
52 my $name=$opt_i.'/'.$Samp.'.indel.txt.filter';
53 open IN
,'<',$name or die "Error: [$name] $!\n";
54 warn "Indel:[$Samp]\t...\n";
56 my ($chr,$pos,$nid,$seq,undef,$homhet,undef,$pairs)=split /\t/;
57 if ($nid =~ /^I(\d+)$/) {$nid=$1;}
58 elsif ($nid =~ /^D(\d+)$/) {$nid=-$1;}
59 else {warn "File error @ uid !"; next;}
60 if ($homhet eq 'homo') {$seq = uc $seq;}
61 elsif ($homhet eq 'hete') {$seq = lc $seq;}
62 else {warn "File error @ homhet !"; next;}
63 print "$Samp\t$chr,$pos,$nid,$seq,$homhet\n" if $opt_v;
64 $Indels{$chr}{$pos}{$Samp}=[$nid,$seq,$pairs]; # $pairs just for test
68 warn "Indels all loaded.\n\nLoading Depth:\n";
71 for my $Samp (@Samples) {
72 for my $chr (keys %Indels) {
73 print STDERR
" ${Samp}_$chr\t";
74 my $name=$opt_c.'/'.$Samp.'.'.$chr.'.txt';
76 open IN
,'<',$name or (warn "Error: [$name] $!\n" and next);
79 my ($chr,$pos,undef,undef,undef,undef,$depth)=split /\t/;
80 next unless exists $Indels{$chr}{$pos};
82 $Depth{$chr}{$pos}{$Samp}=$depth;
88 warn "Depth all loaded.\n\n";
90 open( OUT
,'>',$opt_o) or die "Error: $!\n";
91 for my $chr (sort keys %Indels) {
92 open FA
,'<',$opt_f.'/'.$chr.'.fa' or die "Error: $!\n";
96 die "[x]FASTA file wrong for $chr & $1" if $1 ne $chr;
101 $seq = uc($seq); ## all upper case
103 my $ref_len = length($seq);
104 warn "$chr: $ref_len bp\n";
106 for my $pos (sort {$a <=> $b} keys %{$Indels{$chr}}) {
107 my ($posU,$lenU,$up);
108 if ($pos>=$opt_l+1) {$posU=$pos-$opt_l-1;$lenU=$opt_l;}
109 else {$posU=0;$lenU=$pos-1;}
110 if ($lenU>0) {$up=substr($seq,$posU,$lenU);}
112 my $down=substr($seq,$pos,$opt_l); # First character is at offset 0
113 print OUT
"$chr\t$pos\t$up\t$down\t";
114 warn "$chr,$pos\t$posU,$lenU\t$pos,$opt_l\n" if $opt_v;
115 for my $Samp (@Samples) {
116 my $item=$Indels{$chr}{$pos};
117 if (defined $$item{$Samp}) {
118 if (@
{$$item{$Samp}}==3) { # if Indel, use depth of indel_file
119 $item=join '|',@
{$$item{$Samp}}
121 $item=join '|',(@
{$$item{$Samp}},$Depth{$chr}{$pos}{$Samp});
123 } else {$item='.|.|'.$Depth{$chr}{$pos}{$Samp};}
131 my $stop_time = [gettimeofday
];
133 print STDERR
"\nTime Elapsed:\t",tv_interval
( $start_time, $stop_time )," second(s).\n";