2 use lib
'/nas/RD_09C/resequencing/soft/lib';
3 use lib
'E:/BGI/toGit/perlib/etc';
7 use Time
::HiRes qw
( gettimeofday tv_interval
);
14 our($opt_i, $opt_f, $opt_b);
18 \t-f Also Split to [fabychr/]
19 \t-b No pause for batch runs
24 die "[x]Must specify FASTA file !\n" if ! $opt_i;
25 die "[x]-i $opt_i not exists !\n" unless -f
$opt_i;
29 my ($filename, $outpath) = fileparse
($opt_i);
30 unless (-w
$outpath) {
31 warn "[!]Cannot write to [$outpath]. Change to [./].\n";
37 $cutfa=", Stat. Only.";
40 $cutfa=" to [$outpath$opt_f/]";
43 print STDERR
"From [$opt_i]$cutfa\n";
44 unless ($opt_b) {print STDERR
'press [Enter] to continue...'; <STDIN
>;}
46 my $start_time = [gettimeofday
];
48 mkdir $outpath.$opt_f,0755; # its $! is so unpredictable ...
49 die "[x]Error creating [$outpath$opt_f]. $!\n" unless -w
$outpath.$opt_f;
54 if ($opt_i =~ /.bz2$/) {
55 open( $infile,"-|","bzip2 -dc $opt_i") or die "Error: $!\n";
56 } elsif ($opt_i =~ /.gz$/) {
57 open( $infile,"-|","gzip -dc $opt_i") or die "Error: $!\n";
58 } else {open( $infile,"<",$opt_i) or die "Error: $!\n";}
63 die "[x]Not a FASTA file !\n" unless /^\s*>/;
66 open CHRO
,'>',$outpath.'chrorder' or die "Error: $!\n";
67 open STAT
,'>',$outpath.'chr.nfo' or die "Error: $!\n";
68 print STAT
'#',join("\t",qw
/ChrID Len EffLen GCratio N n Xx lc GC ChrDesc/),"\n";
69 my ($tlen,$tN,$tn,$tX,$tlc,$tGC)=(0,0,0,0,0,0);
73 my ($id,$desc)=split / /,$_,2;
74 if ($desc && $desc !~ /^\s*$/) {
77 } else { $desc='.';$Head=$id; }
83 print STDERR
">$id,\t[$desc]:\n";
87 open O
,'>',"$outpath$opt_f/$id.fa" or die "Error: $!\n";
89 for (my $i=0; $i<$len; $i+=$FASTAwidth) {
90 print O
substr($seq,$i,$FASTAwidth)."\n";
94 my $N = $seq=~tr/N//; # stand for gap or low quality
96 my $lc = $seq=~tr/agct/AGCT/; # may stand for masked region
97 my $GC = $seq=~tr/GCgc//;
98 my $X = $seq=~tr/Xx//; # stand for masked region
99 my $Efflen=$len-$N-$n-$X;
100 my $GCratio=sprintf('%.3f',int(0.5+1000*$GC/$len)/1000);
101 print STAT
join("\t",$id,$len,$Efflen,$GCratio,$N,$n,$X,$lc,$GC,$desc),"\n";
102 print STDERR
" Len:$len, Effictive_Len:$Efflen, GC_Ratio:$GCratio\n";
110 my $Efflen=$tlen-$tN-$tn-$tX;
111 my $GCratio=sprintf('%.3f',int(0.5+1000*$tGC/$tlen)/1000);
112 print STAT
join("\t",'#Total',$tlen,$Efflen,$GCratio,$tN,$tn,$tX,$tlc,$tGC,'Summary'),"\n";
113 print STDERR
"\nTotal:\nLen:$tlen, Effictive_Len:$Efflen, GC_Ratio:$GCratio\n";
119 my $stop_time = [gettimeofday
];
121 print STDERR
"\nTime Elapsed:\t",tv_interval
( $start_time, $stop_time )," second(s).\n";