limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / tools / linkage / blast2markerpos.pl
blob7b33c85925d97f3f4557c398b5de64279a9c5b46
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use lib '/nas/RD_09C/resequencing/soft/lib';
5 #use GalaxyXS::ChromByte;
6 use Data::Dump qw(ddx);
8 unless (@ARGV > 0) {
9 print "perl $0 <marker linkage> <marker blast_m6c rd> <Out_prefix>\n";
10 exit 0;
13 my ($markerf,$blastf,$out)=@ARGV;
14 my $opt_v=0;
15 my (%MarkercM,%Scaffords);
16 #open C,'<',$chrnfof or die "Error:[$chrnfof] $!\n";
17 #while (<C>) {
18 # next if /^#/;
19 # my ($chrid,$len,$efflen)=split /\t/;
20 # $ChrLen{$chrid}=$len;
22 #close C;
23 open C,'<',$markerf or die "Error:[$markerf] $!\n";
24 while (<C>) {
25 next if /^#/;
26 my ($chrid,$pos,$cm)=split /\t/;
27 $MarkercM{$chrid}{$pos}=$cm;
29 close C;
31 sub getcM($$) {
32 my ($chrid,$pos)=@_;
33 if (exists $MarkercM{$chrid}{$pos}) {
34 return $MarkercM{$chrid}{$pos};
35 } else {
36 return "-"; # too slow ...
37 my @Pos=sort {$a <=> $b} keys %{$MarkercM{$chrid}};
38 my $i;
39 for ($i=0;$i<=$#Pos;$i++) {
40 last if $Pos[$i] > $pos;
42 my ($left,$right)=($Pos[$i-1],$Pos[$i]);
43 if ($pos-$left > $right-$pos) {
44 return $MarkercM{$chrid}{$right};
45 } else {
46 return $MarkercM{$chrid}{$left};
50 sub splitMarkerid($) {
51 my $MarkerID=$_[0];
52 my ($mChr,$mPos,$mSiderLen)=split /[_m]/,$MarkerID,3;
53 my $mcM=&getcM($mChr,$mPos);
54 return [$mChr,$mPos,$mSiderLen,$mcM];
56 sub GetRelPos(@) {
57 my ($Qid,$Sid,$Pidentity,$AlnLen,$identical,$mismatches,$Gap,$Qs,$Qe,$Ss,$Se,$E,$bitScore,$BTOP,$Hit)=@_;
58 my ($mChr,$mPos,$mSiderLen,$mcM)=@{&splitMarkerid($Qid)};
59 my $LeftBPalongQ=$mSiderLen-$Qs+1;
60 my $WalkingOn=$LeftBPalongQ;
61 my @btop0=split /(\D+)/,$BTOP;
62 # $ perl -le '$a="45YT9-Ac-c-c-11TC4";@b=split /(\D+)/,$a;print "[",join("|",@b),"]"'
63 # [45|YT|9|-Ac-c-c-|11|TC|4]
64 my @btop=();
65 for (@btop0) {
66 if (/\d+/) {
67 push @btop,$_;
68 } else {
69 my @bin=split /([\w-]{2})/;
70 # $ perl -le '$a="-Ac-c-c-";@b=split /([\w-]{2})/,$a,0;print "[",join("|",@b),"]"'
71 # [|-A||c-||c-||c-]
72 for (@bin) {
73 next unless $_;
74 push @btop,$_;
78 my $LeftBPalongS=0;
79 for (@btop) {
80 if ($WalkingOn <= 0) {
81 print STDERR "$Qid [$_] " if $opt_v and $WalkingOn == 0;
82 last;
84 print STDERR "-$Qid [$_]-" if $opt_v>1;
85 if (/\d/) {
86 $LeftBPalongS += $_;
87 $WalkingOn -= $_;
88 } else {
89 my @op=split //;
90 if (/-/) {
91 --$WalkingOn if $op[1] eq '-' and $op[0] ne '-';
92 ++$LeftBPalongS if $op[0] eq '-' and $op[1] ne '-';
93 } else {
94 --$WalkingOn;
95 ++$LeftBPalongS;
98 print STDERR "-$WalkingOn $LeftBPalongS\n" if $opt_v>1;
99 my $NOP;
101 my $strand;
102 if ($Ss < $Se) {
103 $strand=1;
104 } else {
105 $strand=-1;
107 my $Spos=$Ss + $strand*$LeftBPalongS;
108 warn "$mChr,$Spos,$strand\n" if $opt_v;
109 return [$Spos,$strand,$mcM];
112 my ($CountI,$CountO)=(0,0);
113 open B,'<',$blastf or die "Error:[$blastf] $!\n";
114 open O,'>',$out or die "Error:[$out] $!\n";
115 print O "#Markerid\tMarkercM\tSid\tpos\tstrand\tPidentity\tE\tBTOP\n";
116 while (<B>) {
117 next if /^#/;
118 # Fields: query id, subject id, % identity, alignment length, identical, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score, BTOP
119 #-outfmt '6 qseqid sseqid pident length nident mismatch gapopen qstart qend sstart send evalue bitscore btop'
120 #Chr01_457584m45 Scaffold011460 95.45 88 87 4 0 1 88 1020 933 1e-34 147 16CA2WA25MA6WA35 1
121 chomp;
122 my @Dat=split /\t/;
123 my ($Qid,$Sid,$Pidentity,$AlnLen,$identical,$mismatches,$Gap,$Qs,$Qe,$Ss,$Se,$E,$bitScore,$BTOP,$Hit)=@Dat;
124 #next if $Sid =~ /^chr/i; # Just skip ...
125 ++$CountI;
126 next if $Dat[14] > 1;
127 my ($pos,$strand,$mcM)=@{&GetRelPos(@Dat)};
128 #print "$pos,$strand\t@Dat\n";
129 print O join("\t",$Qid,$mcM,$Sid,$pos,$strand,$Pidentity,$E,$BTOP),"\n";
130 ++$CountO;
132 close B;
133 print O "# $CountI,$CountO\n";
134 print "[!]$blastf $CountI->$CountO\t$out\n";
135 close O;
137 __END__
138 /ifs1/POPULATION/Rice/gapfilling/f3545ChrScaff.pos.n3
140 /ifs1/POPULATION/Rice/gapfilling/denovo20110113/Rice_PA64_63Km.scafSeq.chr.nfo
141 /ifs1/POPULATION/Rice/gapfilling/denovo20110113/out20110113/ex45Chr01.mTns6.rd
142 /ifs1/POPULATION/Rice/allfq/linkage/markerl/cmChr01.18
144 cat ../chrorder|while read a;do ./readblast.pl 15 ./out20110113/ex45$a.mTns6.rd ./out20110113/ex45$a.mTns6;done
145 cat ../chrorder|while read a;do ./readblast.pl 15 ./out20110113/ex45$a.mToc6.rd ./out20110113/ex45$a.mToc6;done
147 ./blast2markerpos.pl ./markerl/cmChr01.18 ./out20110113/ex45Chr01.mTns6.rd Rice_PA64_63Km.scafSeq.chr.nfo t.out
149 cat ../chrorder|while read a;do ./blast2markerpos.pl ./markerl/cm$a.18 ./out20110113/ex45$a.mTns6.rd ./markerpos/m2s$a.pos;done
150 cat ../chrorder|while read a;do ./blast2markerpos.pl ./markerl/cm$a.18 ./out20110113/ex45$a.mToc6.rd ./markerpos/m2c$a.pos;done