limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / perl / popuSNP / snpmix.pl
blobf09903a49669a03ceffd1e40078d5799a1b949ea
1 #!/bin/env perl
2 use lib '/share/raid010/resequencing/resequencing/tmp/bin/annotation/glfsqlite';
3 use strict;
4 use warnings;
5 use Time::HiRes qw ( gettimeofday tv_interval );
6 use Galaxy::ShowHelp;
8 $main::VERSION=0.0.2;
10 our $opts='i:o:s:f:bv';
11 our ($opt_i, $opt_o, $opt_s, $opt_v, $opt_b, $opt_f);
13 our $help=<<EOH;
14 \t-i PSNP list (./psnp.lst) for chrid.individual.finalSNPs
15 \t-f fabyChr path (./faByChr) for [chrid].fa
16 \t-s GLF list (./glf.list), will use \$1 of (([^/]+)/[^/]+\$) for sample names
17 \t-o Output Prefix (./indGenomes/ig_)
18 \t-v show verbose info to STDOUT
19 \t-b No pause for batch runs
20 EOH
22 ShowHelp();
24 $opt_o='./indGenomes/ig_' if ! defined $opt_o;
25 $opt_i='./psnp.lst' if ! $opt_i;
26 $opt_s='./glf.list' if ! $opt_s;
27 $opt_f='./faByChr' if ! $opt_f;
29 $opt_i =~ s/\/$//;
30 $opt_f =~ s/\/$//;
31 print STDERR "From [$opt_i] to [$opt_o], with [$opt_s][$opt_f]/\n";
32 if (! $opt_b) {print STDERR 'press [Enter] to continue...'; <>;}
34 my $start_time = [gettimeofday];
36 system('mkdir','-p',$opt_o);
37 system('rmdir',$opt_o) if $opt_o =~ m#/[\s.]*[^/\s.]+[^/]*$#;
39 my @Samples;
40 open L,'<',$opt_s or die "[x]Error opening $opt_s: $!\n";
41 print STDERR "[!]Sample Order: ";
42 while (<L>) {
43 m#([^/]+)/[^/]+$#;
44 push @Samples,$1;
45 print STDERR (scalar @Samples),':[',$1,"] ";
47 print STDERR "\n";
49 print STDERR "[!]Parsing SNP ";
50 open P,'<',$opt_i or die "[x]Error opening $opt_i: $!\n";
51 while (my $file=<P>) {
52 chomp $file;
53 open SNP,'<',$file or (warn "\n[!]Error opening $file: $!\n" and next);
54 print STDERR ".\b";
55 my (%SNP,$chr,$pos,$ref,$tail,$i);
56 while (<SNP>) {
57 ($chr,$pos,$ref,$tail)=split /\t/;
58 my @indSNP;
59 for (split / /,$tail) {
60 next unless /[ACGTRYMKSWHBVDNX-]/;
61 s/-/n/;
62 push @indSNP,$_;
64 $SNP{$pos}=[$ref,\@indSNP];
66 print STDERR "+\b";
68 my @FH;
69 for (@Samples) {
70 $file=$opt_o.$_.'.'.$chr.'.fa';
71 my $fh;
72 open $fh,'>',$file or die "[x]Error opening $file: $!\n";
73 print $fh ">${_}---$chr\n";
74 push @FH,$fh;
76 warn '[!]PSNP:[',1+$#{${$SNP{$pos}}[1]},'] != File:[',(scalar @FH),"].\n" if $#FH != $#{${$SNP{$pos}}[1]};
78 $file=$opt_f.'/'.$chr.'.fa';
79 open FA,'<',$file or (warn "\n[!]Error opening $file: $!\n" and next);
80 $ref=<FA>;
81 $ref =~ />(\S+)/;
82 warn "[!]Different ChrID, [>$1] in [$file] !\n" if $1 ne $chr;
83 $i=1;
84 while ($ref=getc(FA)) {
85 unless ($ref =~ /[ACGTRYMKSWHBVDNX]/i) {
86 last if $ref eq '>';
87 next;
89 unless ($i%80) {
90 print $_ "\n" for @FH;
92 unless ($SNP{$i}) {
93 print $_ $ref for @FH;
94 } else {
95 my ($refbase,$indSNPr)=@{$SNP{$i}};
96 warn "[!]RefBase differ, SNP:[$refbase] ne FASTA:[$ref].\n" if $refbase ne uc($ref);
97 my $t=0;
98 for (@$indSNPr) {
99 $tail=$FH[$t];
100 print $tail $_;
101 ++$t;
104 ++$i;
106 print STDERR '-';
109 my $stop_time = [gettimeofday];
111 print STDERR "\nTime Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s).\n";