modified: Makefile
[GalaxyCodeBases.git] / perl / etc / WoodyMiaoLin / PbeBefore2015 / p_distance.pl
blobe978cf0807fc5781816d3603aee0014b89344fda
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use threads;
7 open IN, "<", "sequence.input";
8 open OUT, ">", "p_distance.log";
10 my %seqs;
11 while (<IN>) {
12 next if $_ eq "\n";
13 chomp;
14 my @a = split /\t/;
15 if ($a[0] eq "FCA6.2") {
16 $seqs{FCA62} .= $a[1];
17 } elsif ($a[0] eq "PBE084") {
18 $seqs{PBE084} .=$a[1];
19 } elsif ($a[0] eq "PBE144") {
20 $seqs{PBE144} .=$a[1];
21 } elsif ($a[0] eq "PVI033") {
22 $seqs{PVI033} .=$a[1];
23 } else {
24 next;
27 close IN;
28 warn "Read sequences complete!\n";
30 my %thread_compare;
31 $thread_compare{FCA62_PBE084} = threads -> create(\&p_distance, $seqs{FCA62}, $seqs{PBE084});
32 $thread_compare{FCA62_PBE144} = threads -> create(\&p_distance, $seqs{FCA62}, $seqs{PBE144});
33 $thread_compare{FCA62_PVI033} = threads -> create(\&p_distance, $seqs{FCA62}, $seqs{PVI033});
34 $thread_compare{PBE084_PBE144} = threads -> create(\&p_distance, $seqs{PBE084}, $seqs{PBE144});
35 $thread_compare{PBE084_PVI033} = threads -> create(\&p_distance, $seqs{PBE084}, $seqs{PVI033});
36 $thread_compare{PBE144_PVI033} = threads -> create(\&p_distance, $seqs{PBE144}, $seqs{PVI033});
37 warn "Comparing sequences...\n";
39 my %distance;
40 foreach (keys %thread_compare) {
41 $distance{$_} = $thread_compare{$_} -> join();
43 warn "Compare sequences complete!\n";
45 my %thread_hetero;
46 foreach (keys %seqs) {
47 $thread_hetero{$_} = threads -> create (\&heterozygous, $seqs{$_});
50 my %titv;
51 foreach (keys %seqs) {
52 $titv{$_} = $thread_hetero{$_} -> join();
54 warn "Calculate heterozygous sites complete!\n";
56 print OUT "#SamplePair\t#ZeroSame\t#OneSame\t#TwoSame\t#NotN\n";
57 foreach (sort keys %distance) {
58 print OUT $_;
59 foreach (@{$distance{$_}}) {
60 print OUT "\t$_";
62 print OUT "\n";
64 print OUT "#Sample\t#Ti\t#Tv\t#NotN\n";
65 foreach (sort keys %titv) {
66 print OUT "$_\t$titv{$_}[0]\t$titv{$_}[1]\t$titv{$_}[2]\n";
68 close OUT;
69 warn "Done!\n";
71 sub p_distance {
72 my ($seq1, $seq2) = @_;
73 my $len1 = length $seq1;
74 my $len2 = length $seq2;
75 die "Different sequence length!" if $len1 != $len2;
76 my $two; # both alleles are same.
77 my $one; # only one allele is same.
78 my $zero; # both alleles are different.
79 my $total; # total informative bp;
80 foreach my $i (0 .. $len1 - 1) {
81 my $s1 = substr $seq1, $i, 1;
82 my $s2 = substr $seq2, $i, 1;
83 if (($s1 eq "N") or ($s2 eq "N")) {
84 next;
85 } else {
86 ++$total;
87 if ($s1 eq $s2) {
88 ++$two;
89 } elsif ((($s1 eq "A") and ($s2 =~ /[WMR]/))
90 or (($s1 eq "C") and ($s2 =~ /[SMY]/))
91 or (($s1 eq "G") and ($s2 =~ /[SKR]/))
92 or (($s1 eq "T") and ($s2 =~ /[WKY]/))
93 or (($s1 eq "W") and ($s2 =~ /[ATMKRY]/))
94 or (($s1 eq "S") and ($s2 =~ /[CGMKRY]/))
95 or (($s1 eq "M") and ($s2 =~ /[ACWSRY]/))
96 or (($s1 eq "K") and ($s2 =~ /[GTWSRY]/))
97 or (($s1 eq "R") and ($s2 =~ /[AGWSMK]/))
98 or (($s1 eq "Y") and ($s2 =~ /[CTWSMK]/))) {
99 ++$one;
100 } else {
101 ++$zero;
105 return [$zero, $one, $two, $total];
108 sub heterozygous {
109 my $seq = $_[0];
110 my $ti = ($seq =~ tr/[RY]//);
111 my $tv = ($seq =~ tr/[WSMK]//);
112 my $homo = ($seq =~ tr/AGCT//);
113 my $nn = $ti + $tv + $homo;
114 return [$ti, $tv, $nn];