5 die "This program makes DaDi SNP data format file using gzipped fasta files.\nAuther: Woody
6 Usage: $0 <list (column1: sample, column2: file, column3: \"Column1\"|\"Column2\"|population)> <output>\n" if @ARGV < 2;
8 open LIST
, "<", $ARGV[0];
21 open $file{$a[0]}, "-|", "zcat $a[1]";
22 if ($a[2] eq "Column1") {
24 } elsif ($a[2] eq "Column2") {
34 my %base_allele = qw
/ A AA C CC G GG T TT R AG Y CT M AC K GT S GC W AT N NN /;
36 print O
"$C1\t$C2\tAllele1\t", join("\t", sort keys %pop), "\tAllele2\t", join("\t", sort keys %pop), "\tChromosome\tOneBasedCoordinate\n";
37 while (readline $file{$C1}) {
40 last unless /^chr\w\d$/;
42 my $seq0 = readline $file{$C1};
44 my $len = length $seq0;
48 $seq0 = readline $file{$C2};
52 my $chr1 = readline $file{$_};
54 warn if $chr1 ne ">$chr0";
55 my $seq1 = readline $file{$_};
58 for my $coo (0 .. $len-1) {
62 my @b = split //, $base_allele{substr($seq[$_], $coo, 1)};
70 next if keys %base != 2;
72 my ($allele0, $allele1) = keys %base;
75 ++$pop_allele{$po[$_]}{$base[$_][0]};
76 ++$pop_allele{$po[$_]}{$base[$_][1]};
80 for (sort keys %pop) {
81 $pop_allele{$_}{$allele0} = 0 unless $pop_allele{$_}{$allele0};
82 $pop_allele{$_}{$allele1} = 0 unless $pop_allele{$_}{$allele1};
83 push @num_allele0, $pop_allele{$_}{$allele0};
84 push @num_allele1, $pop_allele{$_}{$allele1};
86 print O
substr($seq[0], $coo-1, 3), "\t", substr($seq[1], $coo-1, 3), "\t$allele0\t";
87 print O
join("\t", @num_allele0);
88 print O
"\t$allele1\t";
89 print O
join("\t", @num_allele1);
90 print O
"\t$chr0\t", $coo+1, "\n";