8 Release: Mar. 6, 2013\n
11 Usage: $0 <input> <output>\n
12 The input file should look like:\n
13 #ID Locus1 Locus2 Locus3 ...
14 Sample1 160 162 166 166 0 0 ...
15 Sample2 0 0 174 176 178 178 ...
16 Sample3 180 182 186 186 188 188 ...
17 ... ... ... ... ... ... ... ...
18 \nMissing data must be \"0\".
26 # Load input file into @gt and %gt.
39 # Compare every two samples, and load compatible pairs into @line.
40 my $ns = @gt; # number of sample
41 my $nl = (@
{$gt[0]}-1)/2; # number of loci
42 my @line; # In the array @line of arrays, every array contains two compatible samples.
43 foreach my $a (0 .. $ns-2) {
44 foreach my $b ($a+1 .. $ns-1) {
46 foreach my $c (1 .. $nl) {
47 my %na; # number of allele
48 $na{$gt[$a][2*$c-1]} = 1;
49 $na{$gt[$a][2*$c]} = 1;
50 $na{$gt[$b][2*$c-1]} = 1;
51 $na{$gt[$b][2*$c]} = 1;
52 if (!defined $na{0} and (keys %na) > 2) { # If there are no "0" alleles and number of allele > 2, it is not a compatible locus.
57 push @line, [$gt[$a][0], $gt[$b][0]] unless defined $d;
61 # Push certain individual group arrays into @ss, uncertain group arrays into @sn.
62 my @ss; # samples sure
63 my @sn; # samples not sure
65 if (!defined ${$_}[2]) {
66 ${$_}[2] = 1; # Mark considered sample pair.
67 push my @a, (${$_}[0], ${$_}[1]);
69 while (@a) { # The while loop will recursively find $_'s all intercompatible samples, and push them into @s.
73 foreach (@line) { # The foreach loop will find $_'s all compatible samples, and push them into @w.
74 if (!defined ${$_}[2]) {
75 if (${$_}[0] eq $v or ${$_}[1] eq $v) {
77 push @w, (${$_}[0], ${$_}[1]);
86 my %c; # Array @s contain intercompatible samples. The occurrence number of one sample equals the number of its compatible samples.
91 ++$d{$c{$_}} foreach keys %c;
92 push @ss, \
@s if keys %d == 1; # If the occurrence numbers of all samples are equal, the samples should be one individual.
93 push @sn, \
@s if keys %d >= 2; # If the numbers are not equal, the samples are uncertain.
97 # Merge samples in @ss into indivuals, and push both indivuals and samples into array @ig.
98 my @ig; # individual genotype
105 my @u; # individual genotype
106 foreach my $c (0 .. $nl-1) {
108 foreach my $b (keys %s) {
109 $t{$s{$b}[2*$c]} = 1;
110 $t{$s{$b}[2*$c+1]} = 1;
112 my @d;# locus genotype
113 if (keys %t == 3) { # If there are 3 alleles, push two nonzero alleles into @d.
114 foreach (sort {$a <=> $b} keys %t) {
115 push @d, $_ if $_ != 0;
117 } elsif (keys %t == 2) { # If there are two alleles, ...
118 if (defined $t{0}) { # If one of the two is zero, push the nonzero one twice into @d.
119 foreach (sort {$a <=> $b} keys %t) {
120 push @d, ($_, $_) if $_ != 0;
122 } else { # If there is no zero allele, push the two alleles into @d.
123 push @d, $_ foreach sort {$a <=> $b} keys %t;
125 } elsif (keys %t == 1) { # If there is only one allele, push it twice into @d.
126 push @d, (keys %t, keys %t);
132 unshift @u, sprintf("Indiv%03d", $e);
136 foreach (sort keys %s) {
145 # Print individual groups.
149 my $a = join "\t", @
{$_};
155 # Print uncertain groups.
161 print O
"Uncertain $e\n";
163 foreach (sort keys %s) {
165 print O
join "\t", @
{$s{$_}};
171 # Print individuals with only one sample.
176 print O
"Individuals With Only One Sample\n";
177 foreach (sort keys %gt) {
180 print O
join "\t", @
{$gt{$_}};