modified: Makefile
[GalaxyCodeBases.git] / perl / etc / radseq / getsyn.pl
blobf9bf26261a2670ac70b80ffca8e2ca5be39eb74e
1 #!/usr/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
5 =cut
6 use strict;
7 use warnings;
8 use Data::Dump qw(ddx);
9 use Statistics::LineFit;
10 use Galaxy::IO;
11 use Galaxy::SeqTools;
13 die "Usage: $0 <in> <out.prefix>\n" if @ARGV<2;
14 my $inf=shift;
15 my $outf=shift;
17 my $lineFit = Statistics::LineFit->new();
18 my (%dat,$t,%out);
19 open I,'<',$inf or die $!;
20 while (<I>) {
21 chomp;
22 my @items = split /\s+/;
23 # 0NMid 1chr 2+- 3start 4end 5gene 6incmpl5 7incmpl3 8scaffold 9start 10end 11+- 12gene(same)
24 if ( $items[2] eq $items[11] ) {
25 $t = 0;
26 } else {
27 $t = 1;
29 push @{$dat{$items[8]}{$items[1]}}, [$t,$items[5],$items[3],$items[4],$items[9],$items[10]];
31 close I;
32 #ddx \%dat;
34 open OL,'>',$outf.'.plog' or die $!;
35 my %OutDat;
36 for my $scafd (sort keys %dat) {
37 my (%scoreSame,%scoreAbs,%thePos);
38 for my $chr (keys %{$dat{$scafd}}) {
39 my $alignmtsA = $dat{$scafd}{$chr};
40 my (%cntStrand);
41 @$alignmtsA = sort {$a->[2] <=> $b->[2]} @$alignmtsA;
42 for my $item (@$alignmtsA) {
43 print OL join("\t",$scafd,$chr,@$item);
44 ++$cntStrand{$$item[0]};
45 my $lenChr = $$item[3] - $$item[2];
46 my $lenScafd = $$item[5] - $$item[4];
47 my ($a,$b) = sort {$a<=>$b} ($lenChr,$lenScafd);
48 if ($a) {
49 $t = $a / $b;
50 } else {
51 #print "\t***\n";
52 #next;
53 $t = 0.1;
55 push @{$thePos{$chr}},[$$item[0],$$item[2]+$lenChr/2,$$item[4]+$lenScafd/2];
56 print OL join("\t",'',$lenScafd,$lenChr,$t),"\n";
57 if ($$item[0]) {
58 $scoreSame{$chr} -= $t;
59 } else {
60 $scoreSame{$chr} += $t;
62 $scoreAbs{$chr} += $t;
64 print OL join(",",$scoreAbs{$chr},$scoreSame{$chr},%cntStrand),"\t-----\n";
66 my ($theChr) = sort { $scoreAbs{$b} <=> $scoreAbs{$a} } keys %scoreAbs;
67 my $diffStrand;
68 if ($scoreSame{$theChr} > 0) {
69 $diffStrand = 0;
70 } else {
71 $diffStrand = 1;
73 my (@x,@y);
74 for my $item (@{$thePos{$theChr}}) {
75 next unless $item->[0] == $diffStrand;
76 push @y,$item->[1]; # ChrPos
77 push @x,$item->[2]; # ScaffPos
79 my $scaffOnChr;
80 my ($intercept, $slope, $r2,$sigma,@residuals) = (0,0,-1,-1);
81 if (@x > 1) {
82 $lineFit->setData(\@x, \@y) or die "Invalid regression data\n";
83 ($intercept, $slope) = $lineFit->coefficients();
84 defined $intercept or die "Can't fit line if x values are all equal";
85 $r2 = $lineFit->rSquared();
86 $scaffOnChr = $intercept + $slope;
87 $sigma = $lineFit->sigma();
88 @residuals = $lineFit->residuals();
89 } elsif (@x == 1) {
90 $scaffOnChr = 1 + $y[0] + ($diffStrand?1:-1)*$x[0];
91 } else {
92 die;
94 $scaffOnChr = int($scaffOnChr);
95 print OL '-' x 10, join(',',
96 $theChr,$scaffOnChr,'r2',$r2, 's',$sigma,'k',$slope,'b',$intercept, 'dy',(map {int($_)} @residuals),
97 "\n",'-' x 10,
98 'x',(map {int($_)} @x),'y',(map {int($_)} @y) ),"\n";
99 push @{$OutDat{$theChr}},[$scaffOnChr,$diffStrand,$scafd,$r2,$slope,$sigma];
100 #print O join("\t",$theChr,$scaffOnChr,$diffStrand,$scafd,$r2,$slope,$sigma),"\n";
102 close OL;
104 open O,'>',$outf.'.plst' or die $!;
105 print O join("\t",'#ChrID','Pos','isDiffStrand','scaffold','r^2','slope(k)','sigma'),"\n";
106 for my $chr (sort keys %OutDat) {
107 for my $item (sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @{$OutDat{$chr}}) {
108 print O join("\t",$chr,@$item),"\n";
111 close O;
113 __END__
115 perl ./getsyn.pl tigris2cat.out tigris2cat
116 perl ./getsyn.pl tigris2dog.out tigris2dog
117 perl ./getsyn.pl tigris2hum.out tigris2hum
119 join -1 4 -2 2 <(sort -k4 tigris2cat.plst) <(sort -k2 rec.pas) -o "1.1 1.2 1.3 1.4 2.3 2.7 2.8 2.9 2.10" -t $'\t' >plot2cat.lst
120 join -1 4 -2 2 <(sort -k4 tigris2dog.plst) <(sort -k2 rec.pas) -o "1.1 1.2 1.3 1.4 2.3 2.7 2.8 2.9 2.10" -t $'\t' >plot2dog.lst
121 join -1 4 -2 2 <(sort -k4 tigris2hum.plst) <(sort -k2 rec.pas) -o "1.1 1.2 1.3 1.4 2.3 2.7 2.8 2.9 2.10" -t $'\t' >plot2hum.lst
123 ./dojoin.pl tigris2cat.plst 4 rec.npa 2 plot2cat
124 ./dojoin.pl tigris2dog.plst 4 rec.npa 2 plot2dog
125 ./dojoin.pl tigris2hum.plst 4 rec.npa 2 plot2hum
127 awk -F "\t" 'BEGIN {OFS="\t"} {print $1,$2,$3,$6,$4,$10,$14,$15,$16,$17}' plot2cat.out |sort -k1.1 -k2.2n >plot2cat.nlst
128 awk -F "\t" 'BEGIN {OFS="\t"} {print $1,$2,$3,$6,$4,$10,$14,$15,$16,$17}' plot2dog.out |sort -k1.1 -k2.2n >plot2dog.nlst
129 awk -F "\t" 'BEGIN {OFS="\t"} {print $1,$2,$3,$6,$4,$10,$14,$15,$16,$17}' plot2hum.out |sort -k1.1 -k2.2n >plot2hum.nlst
131 perl -lane 'BEGIN {my %a} @x=split /\|/; $a{$x[-1]}=$_; END {print $a{$_} for sort keys %a}' plot2dog.chrorder > plot2dog.chrorder1