new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / 20140324hw7.pl
blobbcaa7623f857c17a78bc6355167ee1d25258180d
1 #!/usr/bin/perl -w
2 use strict;
3 use warnings;
4 use Data::Dump;
6 #die "Usage: $0 <s> <t>\nW(AA), W(Aa) and W(aa) is 1-s, 1, 1-t respectively.\n" if @ARGV<2;
8 my $totalLen = 1000;
10 my $mfa=<<Emfa;
11 1 TAGACAATGACATACGTTATGTTGAA
12 2 ..T...TA.G.C.C...C..T.....
13 3 ..T...T.C..C.C.AA..CT.....
14 4 .TTCACT...GCG.A...T.T.G.T.
15 5 C.T...T..G.C.C...C..TC.C.G
16 Emfa
17 my @t=split /\n/,$mfa;
18 my %dat;
19 map {@_=split /\s+/;$dat{$_[0]}=[split //,$_[1]]} @t;
20 #ddx \%dat;
21 my @seqid = sort keys %dat;
22 my $seqcnt = @seqid;
23 my $seqlen = @{$dat{'1'}};
25 print "$_: ",join('',@{$dat{$_}}),"\n" for @seqid;
26 print "=== @seqid ===\n$mfa";
28 my @PolyT;
29 for my $i (0 .. $#{$dat{'1'}}) {
30 my $ref = $dat{'1'}->[$i];
31 for my $k (keys %dat) {
32 next if $k eq '1';
33 if ($dat{$k}->[$i] eq '.') {
34 $dat{$k}->[$i] = $ref;
35 #$PolyT[$i] = 0;
36 } else {
37 $PolyT[$i] = 1;
41 print "P: ",join('',@PolyT),"\n\n";
42 my $PolyC = 0;
43 for (@PolyT) {
44 ++$PolyC if $_ and $_ eq '1';
47 print "$_: ",join('',@{$dat{$_}}),"\n" for @seqid;
48 print "=== $seqcnt,$seqlen,$PolyC ===\n";
50 sub getTpi(@) {
51 print "--I: @_ ";
52 my %t;
53 ++$t{$_} for @_;
54 my @t = sort {$a <=> $b} values %t;
55 die if @t != 2;
56 my ($p,$q) = @t;
57 my $sum = $p + $q;
58 $p /= $sum; $q /= $sum;
59 my $Tpi = 2*$p*$q*$seqcnt / ($seqcnt-1);
60 print "$p,$q,$Tpi\n";
61 return $Tpi;
64 my $Tpi = 0;
65 for my $i (0 .. $#{$dat{'1'}}) {
66 my @arr;
67 for my $k (@seqid) {
68 push @arr,$dat{$k}->[$i];
70 $Tpi += getTpi(@arr);
72 print $Tpi;
73 $Tpi /= $totalLen;
74 print ", $Tpi\n";
76 my $wUnder = 0;
77 for my $i ( 1 .. ($seqcnt-1) ) {
78 $wUnder += 1/$i;
79 print "$i: $wUnder\n";
81 my $Tw = $PolyC / $wUnder / $totalLen;
82 print "-> $Tw, $wUnder\n";
84 print "Tpi: $Tpi Tw: $Tw\n";