new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / mfa4bwa.pl
blobc4ff0017ff5471d5a8f682283fc477e24b168cde
1 #!/bin/env perl
2 use lib 'E:/BGI/toGit/perlib/etc';
3 use strict;
4 use warnings;
5 use File::Basename;
6 use Time::HiRes qw ( gettimeofday tv_interval );
7 use Galaxy::ShowHelp;
9 $main::VERSION=0.0.1;
12 our $opts='p:b';
13 our($opt_p, $opt_b);
15 our $help=<<EOH;
16 \t-p output prefix
17 \t-b No pause for batch runs
18 EOH
20 ShowHelp();
22 die "[x]Must specify output prefix !\n" if ! $opt_p;
24 my $FASTAwidth=75;
26 open O,'>',$opt_p.'.fa' or die "Error: $!\n";
28 sub openfile($) {
29 my $opti =$_[0];
30 my $infile;
31 if ($opti =~ /.bz2$/) {
32 open( $infile,"-|","bzip2 -dc $opti") or die "Error: $!\n";
33 } elsif ($opti =~ /.gz$/) {
34 open( $infile,"-|","gzip -dc $opti") or die "Error: $!\n";
35 } else {open( $infile,"<",$opti) or die "Error: $!\n";}
36 return $infile;
39 warn "Files: [@ARGV]\n";
41 my $TotalLen=0;
42 while ($_=shift @ARGV) {
43 warn "[$_]\n";
44 my $infile=openfile($_);
46 local $/=">";
47 $_=<$infile>;
48 die "[x]Not a FASTA file !\n" unless /^\s*>/;
50 while (<$infile>) {
51 chomp;
52 my $Head;
53 my ($id,$desc)=split / /,$_,2;
54 if ($desc && $desc !~ /^\s*$/) {
55 $desc=~s/\t/_/g;
56 $Head="$id $desc";
57 } else { $desc='.';$Head=$id; }
58 $/=">";
59 my $seq=<$infile>;
60 chomp $seq;
61 $seq =~ s/\s//g;
62 $/="\n";
63 my $len=length($seq);
64 my $lc = $seq=~tr/agct/nnnn/;
65 $TotalLen += $len;
66 print STDERR ">$id\t$len, $lc\t[$desc]:\n";
67 print O ">$Head $desc\n";
68 for (my $i=0; $i<$len; $i+=$FASTAwidth) {
69 print O substr($seq,$i,$FASTAwidth)."\n";
72 close $infile;
74 close O;
76 my $cmd='bwa index ';
77 if ($TotalLen > 2000000000) {
78 $cmd .= '-a bwtsw ';
80 system($cmd."-p $opt_p ${opt_p}.fa");
81 system("samtools faidx ${opt_p}.fa");