4 use lib
'/nas/RD_09C/resequencing/soft/lib';
5 use GalaxyXS
::ChromByte
;
6 use Data
::Dump
qw(ddx);
14 print "perl $0 <markerpos scaffold> <markerpos chr> <chr nfo> <chr Nzone> <scaff nfo> <scaff Nzone> <out>\n";
18 my ($scaff,$chrf,$ChrNFO,$NZchrf,$ScaffNFO,$NZScafff,$outf)=@ARGV;
20 my (%MarkerDat,%ScaffAlign,%NZone,%ScaffLen);
22 open S
,'<',$ScaffNFO or die "Error:[$ScaffNFO] $!\n";
26 my ($chrid,$len,$EffLen)=split /\t/;
27 $ScaffLen{$chrid}=[$len,$EffLen];
30 open S
,'<',$ChrNFO or die "Error:[$ChrNFO] $!\n";
34 my ($chrid,$len)=split /\t/;
35 $NZone{$chrid}=initchr
($len);
38 open N
,'<',$NZchrf or die "Error:[$NZchrf] $!\n";
42 my ($chrid,$start,$end)=split /\t/;
43 next unless exists $NZone{$chrid};
44 #die "[x]Wrong ChrID as [$chrid] !\n" unless exists $NZone{$chrid};
45 setbases
($NZone{$chrid},$start,$end,1);
48 open S
,'<',$scaff or die "Error:[$scaff] $!\n";
49 #Markerid MarkercM Sid pos strand Pidentity E BTOP
53 my ($Qid,$mcM,$Sid,$pos,$strand,$Pidentity,$E,$BTOP)=split /\t/;
54 next if $mcM eq '-'; # '-' is due to different version of marker id set. If this may lead to false negative, the BLAST must be re-run towards the new filtered marker sets.
55 $MarkerDat{$Qid}=[[$Sid,$pos,$strand]];
58 open C
,'<',$chrf or die "Error:[$chrf] $!\n";
62 my ($Qid,$mcM,$Sid,$pos,$strand,$Pidentity,$E,$BTOP)=split /\t/;
63 next unless exists $MarkerDat{$Qid};
64 push @
{$MarkerDat{$Qid}},[$Sid,$pos,$strand];
68 for my $markid (keys %MarkerDat) {
69 my ($Scaff,$Spos,$Ss)=@
{$MarkerDat{$markid}->[0]};
70 next if @
{$MarkerDat{$markid}} != 2;
71 my ($Chr,$Cpos,$Cs)=@
{$MarkerDat{$markid}->[1]};
72 # There can be markers with the same sequence >_<
73 #push @{$ScaffAlign{$Scaff}{$Chr}{$Ss*$Cs}},[$Spos,$Cpos];
74 unless (exists $ScaffAlign{$Scaff}{$Chr}{$Ss*$Cs}{$Spos}) {
75 $ScaffAlign{$Scaff}{$Chr}{$Ss*$Cs}{$Spos}=$Cpos;
77 $ScaffAlign{$Scaff}{$Chr}{$Ss*$Cs}{$Spos}=undef;
103 $base=($base*$valuecount+$add)/($valuecount+1);
107 my $inScaff = keys %ScaffAlign;
108 my $outScaff=$inScaff;
109 for my $Scaff (keys %ScaffAlign) {
110 my ($posSum,$posMax,$datAref,@datMPath)=(0,0);
111 for my $Chr (keys %{$ScaffAlign{$Scaff}}) {
112 for my $Strand (keys %{$ScaffAlign{$Scaff}{$Chr}}) {
114 for my $Spos (keys %{$ScaffAlign{$Scaff}{$Chr}{$Strand}}) {
115 my $Cpos=$ScaffAlign{$Scaff}{$Chr}{$Strand}{$Spos};
116 next unless defined $Cpos;
117 push @
$datAref,[$Spos,$Cpos];
119 #$datAref=$ScaffAlign{$Scaff}{$Chr}{$Strand};
120 $ScaffAlign{$Scaff}{$Chr}{$Strand}=$datAref;
121 $posSum += @
$datAref;
122 if ($posMax <= @
$datAref) {
124 @datMPath=($Chr,$Strand);
128 $datAref=$ScaffAlign{$Scaff}{$datMPath[0]}{$datMPath[1]};
129 #next unless defined $$datAref[0][0]; # alignbympos.pl:183: [[]]
130 if ($opt_v && $posMax != $posSum) {
131 print '-'x75
,"\n[!][DiffStrand]$Scaff:\t$posSum -> $posMax\n";
132 ddx
$ScaffAlign{$Scaff};
134 if ($posMax < $posSum * $scaffmainR or (! defined $$datAref[0][0]) ) {
135 delete $ScaffAlign{$Scaff};
136 #delete $ScaffLen{$Scaff};
138 print "Deleted !\n" if $opt_v;
140 } elsif ($posMax>1) {
141 #$datAref = [ sort {$a->[0] <=> $b->[0] } @{$datAref} ];
142 #my @dat=@{$datAref};
143 my @dat=sort {$a->[0] <=> $b->[0] } @
{$datAref};
144 my (%IndelsDat,@IndelsGroup);
146 my ($Spos,$Cpos)=@
$_;
147 push @
{$IndelsDat{$Cpos-$Spos}},$_;
150 my $majorIndelGroup2R=0.6;
151 my $majorIndelGroup3R=0.85;
152 @dat=sort {$#{$IndelsDat{$b}} <=> $#{$IndelsDat{$a}} } keys %IndelsDat;
153 my ($Sum,@CountArr,$t)=(0);
155 $t=scalar @
{$IndelsDat{$_}};
160 my ($flag,$ss)=(0,0);
165 $flag=1 if $ss/$Sum >= $majorIndelGroup2R;
167 $flag=1 if $ss/$Sum >= $majorIndelGroup3R;
172 delete $ScaffAlign{$Scaff};
173 #delete $ScaffLen{$Scaff};
175 print "Deleted,too !\n" if $opt_v;
178 #ddx $ScaffAlign{$Scaff};
179 @dat=sort { ${$IndelsDat{$a}}[0][0] <=> ${$IndelsDat{$b}}[0][0] } keys %IndelsDat;
182 push @
$datAref,$_,$IndelsDat{$_};
185 #ddx $datAref unless defined $$datAref[0][1];
186 $datAref=[0,$$datAref[0][1]-$$datAref[0][0],$datAref];
188 $ScaffAlign{$Scaff}=[@datMPath,$datAref];
190 print STDERR
"[!]Scaffold Count: $inScaff -> $outScaff\t";
192 my ($ScafflenTotal,$ScafflenEff,%ScaffNZone)=(0,0);
193 open N
,'<',$NZScafff or die "Error:[$NZScafff] $!\n";
197 my ($chrid,$start,$end)=split /\t/;
198 next unless exists $ScaffAlign{$chrid};
199 $ScafflenTotal += $ScaffLen{$chrid}->[0];
200 $ScafflenEff += $ScaffLen{$chrid}->[1];
201 $ScaffNZone{$chrid}=initchr
($ScaffLen{$chrid}->[0]) unless exists $ScaffNZone{$chrid}; # malloc only once.
202 setbases
($ScaffNZone{$chrid},$start,$end,1);
206 warn "Len: $ScafflenTotal, $ScafflenEff\n";
215 # [[1476, 12419758], [1690, 12419972]],
235 # [1, 8711880, [[445, 8712325]], 8710850, [[997, 8711847]]]
237 # scaffold92619 => ["Chr10", 1, [0, 17878303, [[2999, 17881302]] ] ],
238 # ChrID, 正负链, [记录项数减一, 第一个indel组的偏移值,第一个indel组的[起点,终点], 第二个,第二个, ... ]
239 for my $Scaff (keys %ScaffAlign) {
240 my ($chr,$sscs,$datAref)=@
{$ScaffAlign{$Scaff}};
241 my ($MaxIndex,@DAT)=@
$datAref;
242 # for (0..$MaxIndex) {
243 # my $CptoSp = shift @DAT;
244 # my $SpCpArr = shift @DAT;
246 # Within Indel Group, should be match, choose the base that not N, we may favor the ref. base type;
247 # Between I.G., may with indels, choose the strand with less N or do seq. aligenment that favor the marker anchor first.
248 # Since all N zone are artific, within IG, we can favor the scafford base to do base fixing;
249 # between, seq. ali. might be a must. (./out20110113/scafSeq.sTo6)
250 my ($lastScaffPos,$lastChrPos)=(1,0);
257 ./alignbympos.pl markerpos/m2sChr
10.pos.f markerpos
/m2cChr10
.pos.f pa64chronly
.nfo
../Pa64
.Nzone
../denovo20110113
/Rice_PA64_63Km.scafSeq.chr.nfo ../denovo
20110113/Rice_PA64_63Km
.scafSeq
.Nzone t
.out
262 cat pa64chro
| while read a
;do echo
$a; ./alignbympos.pl markerpos/m2s
$a.pos.f markerpos
/m2c
$a.pos.f pa64chronly
.nfo
../Pa64
.Nzone
../denovo20110113
/Rice_PA64_63Km.scafSeq.chr.nfo ../denovo
20110113/Rice_PA64_63Km
.scafSeq
.Nzone t
$a.out
; done