new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / mergefq.pl
blob56bd82db6b71d2e8f0cb24a5bc50e705a846a9e2
1 #!/bin/env perl
2 use strict;
3 use warnings;
5 die "Usage: $0 <mergedfa> <inputfa>\n" if @ARGV != 2;
6 my ($out,$in)=@ARGV;
7 warn "From [$in] to [$out]\n";
9 sub openfile($) {
10 my ($filename)=@_;
11 my $infile;
12 if ($filename=~/.bz2$/) {
13 open( $infile,"-|","bzip2 -dc $filename") or die "Error opening $filename: $!\n";
14 } elsif ($filename=~/.gz$/) {
15 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
16 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
17 return $infile;
20 my $fh=&openfile($in);
21 my $SEQ='';
22 my $Ncnt=0;
23 while (<$fh>) {
24 #chomp(my $a=<$fh>) or last;
25 chomp(my $genome=<$fh>) or last;
26 chomp(my $c=<$fh>) or last;
27 chomp(my $d=<$fh>) or last;
28 $SEQ .= $genome;
29 my $n=($genome=~s/[^ATCG]/A/ig);
30 $Ncnt += $n;
31 #print STDERR "\b\b\b \t",length $genome,".\n";
32 $genome='';
35 open OUT,'>',$out or die "Error opening $out: $!\n";
36 print OUT ">merged $Ncnt\n$SEQ\n";
37 close OUT;
39 __END__
40 find . -name '1.fq.gz.out'|perl -lane '$n=$_;s/1\.fq\.gz\.out$//;$p=$_;print "./bwa aln -n $_ -o 0 -N -I ${p}1merge.fa $n >${p}1.$_.sai 2>${p}1.$_.sai.log" for (3,6,10,2,5,1,20)' > rsai.cmd
41 find . -name '1.fq.gz.out'|perl -lane '$n=$_;s/1\.fq\.gz\.out$//;$p=$_;print "./bwa samse -n 500 -f ${p}1.$_.sam ${p}1merge.fa ${p}1.$_.sai $n 2>${p}1.$_.sam.log" for (3,6,10,2,5,1,20)' > rsam.cmd
43 $ cat rdo1.sh
44 #!/bin/sh
45 #$ -N "snp_1"
46 #$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
47 #$ -cwd -r y -l vf=500m,p=1
48 #$ -o /dev/null -e /dev/null
49 #$ -S /bin/bash -t 1-21
50 SEEDFILE=./rsai.cmd
51 SEED=$(sed -n -e "$SGE_TASK_ID p" $SEEDFILE)
52 WCNS=./rsam.cmd
53 STR=$(sed -n -e "$SGE_TASK_ID p" $WCNS)
54 SAMFILE=$(echo $STR|awk '{print $6}')
56 eval $SEED
57 eval $STR
58 grep -P '\tXT:A:R\t' $SAMFILE >$SAMFILE.r