2 #use lib "/ifs1/ST_ASMB/USER/huxuesong/public/lib";
3 use lib
'/export/data0/gentoo/develop/toGit/perl/perlib/etc';
6 use Time
::HiRes qw
( gettimeofday tv_interval
);
11 our($opt_o, $opt_c, $opt_b);
16 \t-c keep only ChrID ~ /^chr\\d+\$/
17 \t-b No pause for batch runs
19 our $ARG_DESC='fa_files{,.gz,.bz2}';
22 die "[x]No input files found !\n" unless @ARGV;
23 die "[!]Max 252 files supported.\n" if @ARGV>252;
25 die "[x]Need output file !\n" unless $opt_o;
27 print STDERR
"From [@ARGV] to [$opt_o]\n";
28 print STDERR
"Keep only ChrID =~ /^chr\\d+\$/\n" if $opt_c;
29 unless ($opt_b) {print STDERR
'press [Enter] to continue...'; <STDIN
>;}
31 my $start_time = [gettimeofday
];
34 while($_=shift @ARGV) {
37 open( $infile,"-|","bzip2 -dc $_") or die "Error opening $_: $!\n";
39 open( $infile,"-|","gzip -dc $_") or die "Error opening $_: $!\n";
40 } else {open( $infile,"<",$_) or die "Error opening $_: $!\n";}
43 warn '[!]Files opened: ',scalar @FH,"\n[!]Reading:\n";
46 open $OutFile,'>',$opt_o or die "Error opening $opt_o: $!\n";
47 print $OutFile ">Seq\n";
56 next if $seqname !~ /^chr\d+$/;
58 print STDERR
">$seqname\n";
62 $genome=~s/[^ATCGatcg]//g;
64 print $OutFile $genome;
70 &rwfa
($_,$OutFile) for @FH;
77 my $stop_time = [gettimeofday
];
79 print STDERR
"\nTime Elapsed:\t",tv_interval
( $start_time, $stop_time )," second(s).\n";
81 my (%Genome,%EffChrLen);
88 #print STDERR " >$seqname ...";
93 $genome=~s/[^ATCG]//g;
95 $Genome{$seqname}=$genome;
96 #my $n=($genome=~s/[^ATCG]/A/ig);
97 #$EffChrLen{$seqname}=length($Genome{$seqname})-$n;
98 #print STDERR "\b\b\b \t",length $Genome{$seqname},".\n";