4 use lib
'/nas/RD_09C/resequencing/soft/lib';
6 use Time
::HiRes qw
( gettimeofday tv_interval
);
7 use Data
::Dump
qw(ddx);
11 our $opts='i:o:n:g:v:r:c:l:p:bqd';
12 our($opt_i, $opt_n, $opt_g, $opt_r, $opt_c, $opt_l, $opt_o, $opt_v, $opt_b, $opt_q, $opt_d, $opt_h, $opt_p);
15 \t-i Genome FASTA (./Pa64_genome.fa)
16 \t-p Anchor Pos file (./f3545ChrScaff.pos.n3)
17 \t-n N zone file (Pa64.Nzone)
19 \t-v Verbose level (undef=0)
20 \t-b No pause for batch runs
21 \t-d Debug Mode, for test only
26 $opt_i='./Pa64_genome.fa' if ! $opt_i;
27 $opt_p='./f3545ChrScaff.pos.n3' if ! $opt_p;
28 $opt_n='Pa64.Nzone' if ! $opt_n;
31 print STDERR
"From [$opt_i] to [$opt_o].{fa,stat} refer to [$opt_n][$opt_p]\n";
32 print STDERR
"DEBUG Mode on !\n" if $opt_d;
33 print STDERR
"Verbose Mode [$opt_v] !\n" if $opt_v;
34 unless ($opt_b) {print STDERR
'press [Enter] to continue...'; <>;}
36 my $start_time = [gettimeofday
];
38 my (%Nzone,%SacffPos,%Genome,%GenomPos);
39 open I
,'<',$opt_n or die $!;
42 my ($chr,$s,$e,$len)=split /\t/;
43 push @
{$Nzone{$chr}},[$s,$e];
46 open I
,'<',$opt_p or die $!;
50 my ($ScaffID,$sPosRel,$chr,$strand,$scM,$pos,$err)=split /\t/;
51 push @
{$SacffPos{$chr}},[$ScaffID,$strand,$sPosRel,$pos,$err];
54 open I
,'<',$opt_i or die $!;
58 die "[x]Not a FASTA file ! [$opt_i]\n" unless /^\s*>/;
62 my ($id,$desc)=split / /,$_,2;
63 $desc='' unless $desc;
69 print STDERR
">$id,\t[$desc]: ",length $seq,"\n";
75 sub getChrSeq
($$) { # Well, no more N triming.
77 my $last=$GenomPos{$id};
81 $str=substr $Genome{$id},$last,$end-$last;
83 $str=substr $Genome{$id},$last;
88 warn "[!]$id\t$last\t",$end-$last,"\n";
92 sub searchtoPlug
($$$$) {
93 my ($chr,$pos,$left,$right)=@_;
95 for (@
{$Nzone{$chr}}) {
103 for (@
{$Nzone{$chr}}) {
110 my ($thePos,$cutL,$cutR);
111 if ($pos>$theZone->[0] and $pos<$theZone->[1]) { # in zone
113 ($cutL,$cutR)=($thePos-$theZone->[0]-1,$theZone->[1]-$thePos-1);
115 $thePos=$theZone->[0];
117 $cutR=$theZone->[1]-$theZone->[0]-1;
119 return [$thePos,$cutL,$cutR];
123 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
124 my $rev = reverse $str;
125 $rev =~ tr/[](){}<>/][)(}{></;
129 open S
,'>',$opt_o.'.stat' or die $!;
130 open O
,'>',$opt_o.'.fa' or die $!;
131 for my $chr (sort keys %SacffPos) {
136 for my $ref (sort {$a->[3] <=> $b->[3]} @
{$SacffPos{$chr}}) {
137 my ($ScaffID,$strand,$sPosRel,$pos,$err)=@
$ref;
138 my $ScaffLen=length $Genome{$ScaffID};
139 my ($ScaffLeft,$ScaffRight);
141 if ($strand eq '+') {
142 $ScaffLeft=1-$sPosRel;
143 $ScaffRight=$ScaffLen-$sPosRel;
144 $str=$Genome{$ScaffID};
146 $ScaffLeft=$sPosRel-$ScaffLen;
147 $ScaffRight=$sPosRel-1;
148 $str=revcom
($Genome{$ScaffID});
150 my ($left,$right)=($pos+$ScaffLeft-$err,$pos+$ScaffRight+$err); # this should be wrong but should be right.
151 my ($thePos,$cutL,$cutR)=@
{&searchtoPlug
($chr,$pos,$left,$right)};
152 print S
"$ScaffID\t",length($Genome{$ScaffID}),"\t$strand\t$thePos\t$cutL,$cutR\t$thePos\n";
153 print O
${&getChrSeq
($chr,$thePos-1)},"x",$str,"x\n";
154 #$lastPos=$thePos+$cutR;
155 delete $Genome{$ScaffID};
157 print O
${&getChrSeq
($chr,-1)},"\n";
161 for my $id (keys %Genome) {
162 next if $id =~ /^chr/i;
164 print O
'>',$id,"\n",$Genome{$id},"\n";
168 warn "[!]$SCount Scaffold(s) remains.\n";
171 ./pluginScaff
.pl
-o PA64new
.fa