new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / WoodyMiaoLin / PbePopGen / make_dadi_snp_file.pl
blob003cb2b1f3112b5b14c0607cfded8839c1ff479d
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
5 die "This program makes DaDi SNP data format file using gzipped fasta files.\nAuther: Woody
6 Usage: $0 <list (column1: sample, column2: file, column3: \"Column1\"|\"Column2\"|population)> <output>\n" if @ARGV < 2;
8 open LIST, "<", $ARGV[0];
9 open O, ">", $ARGV[1];
11 my $C1;
12 my $C2;
13 my @id;
14 my @po;
15 my %pop;
16 my %id_pop;
17 my %file;
18 while (<LIST>) {
19 chomp;
20 my @a = split / /;
21 open $file{$a[0]}, "-|", "zcat $a[1]";
22 if ($a[2] eq "Column1") {
23 $C1 = $a[0];
24 } elsif ($a[2] eq "Column2") {
25 $C2 = $a[0];
26 } else {
27 push @id, $a[0];
28 push @po, $a[2];
29 $pop{$a[2]} = 1;
32 close LIST;
34 my %base_allele = qw / A AA C CC G GG T TT R AG Y CT M AC K GT S GC W AT N NN /;
35 my %chr_gt;
36 print O "$C1\t$C2\tAllele1\t", join("\t", sort keys %pop), "\tAllele2\t", join("\t", sort keys %pop), "\tChromosome\tOneBasedCoordinate\n";
37 while (readline $file{$C1}) {
38 chomp;
39 s/^>//;
40 last unless /^chr\w\d$/;
41 my $chr0 = $_;
42 my $seq0 = readline $file{$C1};
43 chomp $seq0;
44 my $len = length $seq0;
45 my @seq;
46 push @seq, $seq0;
47 readline $file{$C2};
48 $seq0 = readline $file{$C2};
49 chomp $seq0;
50 push @seq, $seq0;
51 for (@id) {
52 my $chr1 = readline $file{$_};
53 chomp $chr1;
54 warn if $chr1 ne ">$chr0";
55 my $seq1 = readline $file{$_};
56 push @seq, $seq1;
58 for my $coo (0 .. $len-1) {
59 my @base;
60 my %base;
61 for (2 .. @seq-1) {
62 my @b = split //, $base_allele{substr($seq[$_], $coo, 1)};
63 push @base, \@b;
64 ++$base{$b[0]};
65 ++$base{$b[1]};
67 if ($base{N}) {
68 next;
69 } else {
70 next if keys %base != 2;
72 my ($allele0, $allele1) = keys %base;
73 my %pop_allele;
74 for (0 .. @base-1) {
75 ++$pop_allele{$po[$_]}{$base[$_][0]};
76 ++$pop_allele{$po[$_]}{$base[$_][1]};
78 my @num_allele0;
79 my @num_allele1;
80 for (sort keys %pop) {
81 $pop_allele{$_}{$allele0} = 0 unless $pop_allele{$_}{$allele0};
82 $pop_allele{$_}{$allele1} = 0 unless $pop_allele{$_}{$allele1};
83 push @num_allele0, $pop_allele{$_}{$allele0};
84 push @num_allele1, $pop_allele{$_}{$allele1};
86 print O substr($seq[0], $coo-1, 3), "\t", substr($seq[1], $coo-1, 3), "\t$allele0\t";
87 print O join("\t", @num_allele0);
88 print O "\t$allele1\t";
89 print O join("\t", @num_allele1);
90 print O "\t$chr0\t", $coo+1, "\n";
93 close O;