3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
8 use Data
::Dump
qw(ddx);
9 use Statistics
::LineFit
;
13 die "Usage: $0 <in> <out.prefix>\n" if @ARGV<2;
17 my $lineFit = Statistics
::LineFit
->new();
19 open I
,'<',$inf or die $!;
22 my @items = split /\s+/;
23 # 0NMid 1chr 2+- 3start 4end 5gene 6incmpl5 7incmpl3 8scaffold 9start 10end 11+- 12gene(same)
24 if ( $items[2] eq $items[11] ) {
29 push @
{$dat{$items[8]}{$items[1]}}, [$t,$items[5],$items[3],$items[4],$items[9],$items[10]];
34 open OL
,'>',$outf.'.plog' or die $!;
36 for my $scafd (sort keys %dat) {
37 my (%scoreSame,%scoreAbs,%thePos);
38 for my $chr (keys %{$dat{$scafd}}) {
39 my $alignmtsA = $dat{$scafd}{$chr};
41 @
$alignmtsA = sort {$a->[2] <=> $b->[2]} @
$alignmtsA;
42 for my $item (@
$alignmtsA) {
43 print OL
join("\t",$scafd,$chr,@
$item);
44 ++$cntStrand{$$item[0]};
45 my $lenChr = $$item[3] - $$item[2];
46 my $lenScafd = $$item[5] - $$item[4];
47 my ($a,$b) = sort {$a<=>$b} ($lenChr,$lenScafd);
55 push @
{$thePos{$chr}},[$$item[0],$$item[2]+$lenChr/2,$$item[4]+$lenScafd/2];
56 print OL
join("\t",'',$lenScafd,$lenChr,$t),"\n";
58 $scoreSame{$chr} -= $t;
60 $scoreSame{$chr} += $t;
62 $scoreAbs{$chr} += $t;
64 print OL
join(",",$scoreAbs{$chr},$scoreSame{$chr},%cntStrand),"\t-----\n";
66 my ($theChr) = sort { $scoreAbs{$b} <=> $scoreAbs{$a} } keys %scoreAbs;
68 if ($scoreSame{$theChr} > 0) {
74 for my $item (@
{$thePos{$theChr}}) {
75 next unless $item->[0] == $diffStrand;
76 push @y,$item->[1]; # ChrPos
77 push @x,$item->[2]; # ScaffPos
80 my ($intercept, $slope, $r2,$sigma,@residuals) = (0,0,-1,-1);
82 $lineFit->setData(\
@x, \
@y) or die "Invalid regression data\n";
83 ($intercept, $slope) = $lineFit->coefficients();
84 defined $intercept or die "Can't fit line if x values are all equal";
85 $r2 = $lineFit->rSquared();
86 $scaffOnChr = $intercept + $slope;
87 $sigma = $lineFit->sigma();
88 @residuals = $lineFit->residuals();
90 $scaffOnChr = 1 + $y[0] + ($diffStrand?
1:-1)*$x[0];
94 $scaffOnChr = int($scaffOnChr);
95 print OL
'-' x
10, join(',',
96 $theChr,$scaffOnChr,'r2',$r2, 's',$sigma,'k',$slope,'b',$intercept, 'dy',(map {int($_)} @residuals),
98 'x',(map {int($_)} @x),'y',(map {int($_)} @y) ),"\n";
99 push @
{$OutDat{$theChr}},[$scaffOnChr,$diffStrand,$scafd,$r2,$slope,$sigma];
100 #print O join("\t",$theChr,$scaffOnChr,$diffStrand,$scafd,$r2,$slope,$sigma),"\n";
104 open O
,'>',$outf.'.plst' or die $!;
105 print O
join("\t",'#ChrID','Pos','isDiffStrand','scaffold','r^2','slope(k)','sigma'),"\n";
106 for my $chr (sort keys %OutDat) {
107 for my $item (sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @
{$OutDat{$chr}}) {
108 print O
join("\t",$chr,@
$item),"\n";
115 perl
./getsyn
.pl tigris2cat
.out tigris2cat
116 perl
./getsyn
.pl tigris2dog
.out tigris2dog
117 perl
./getsyn
.pl tigris2hum
.out tigris2hum
119 join -1 4 -2 2 <(sort -k4 tigris2cat
.plst
) <(sort -k2 rec
.pas
) -o
"1.1 1.2 1.3 1.4 2.3 2.7 2.8 2.9 2.10" -t
$'\t' >plot2cat
.lst
120 join -1 4 -2 2 <(sort -k4 tigris2dog
.plst
) <(sort -k2 rec
.pas
) -o
"1.1 1.2 1.3 1.4 2.3 2.7 2.8 2.9 2.10" -t
$'\t' >plot2dog
.lst
121 join -1 4 -2 2 <(sort -k4 tigris2hum
.plst
) <(sort -k2 rec
.pas
) -o
"1.1 1.2 1.3 1.4 2.3 2.7 2.8 2.9 2.10" -t
$'\t' >plot2hum
.lst
123 ./dojoin
.pl tigris2cat
.plst
4 rec
.npa
2 plot2cat
124 ./dojoin
.pl tigris2dog
.plst
4 rec
.npa
2 plot2dog
125 ./dojoin
.pl tigris2hum
.plst
4 rec
.npa
2 plot2hum
127 awk
-F
"\t" 'BEGIN {OFS="\t"} {print $1,$2,$3,$6,$4,$10,$14,$15,$16,$17}' plot2cat
.out
|sort -k1
.1
-k2
.2n
>plot2cat
.nlst
128 awk
-F
"\t" 'BEGIN {OFS="\t"} {print $1,$2,$3,$6,$4,$10,$14,$15,$16,$17}' plot2dog
.out
|sort -k1
.1
-k2
.2n
>plot2dog
.nlst
129 awk
-F
"\t" 'BEGIN {OFS="\t"} {print $1,$2,$3,$6,$4,$10,$14,$15,$16,$17}' plot2hum
.out
|sort -k1
.1
-k2
.2n
>plot2hum
.nlst
131 perl
-lane
'BEGIN {my %a} @x=split /\|/; $a{$x[-1]}=$_; END {print $a{$_} for sort keys %a}' plot2dog
.chrorder
> plot2dog
.chrorder1