new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / bamst2fq.pl
blob0e4fed355b0c0071ad9c5fe8241ce71d4741f1ba
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump qw(ddx);
6 our $MINLENGTH = 15;
7 $MINLENGTH = 17; # lg(3G)/lg4 = 15.74
9 die "Usage: $0 <input sam.gz/bam> <output(gzipped)>\n" if @ARGV < 2;
10 my ($inf,$outf)=@ARGV;
12 my $FH;
13 if ($inf =~ /\.sam\.gz$/i) {
14 open $FH,'-|',"gzip -dc $inf" or die "Error opening $inf: $!\n";
15 } elsif ($inf =~ /\.bam$/i) {
16 open $FH,'-|',"samtools view -h $inf" or die "Error opening $inf: $!\n";
17 } elsif ($inf =~ /\.sam$/i) {
18 open $FH,'<',$inf or die "Error opening $inf: $!\n";
19 } else { die; }
21 my ($line,$t);
23 open OUT,'|-', "gzip -9c >$outf" or die "Error opening $outf: $!\n";
25 while ($line = <$FH>) {
26 next if $line =~ /^\@/;
27 my ($id, $flag, $ref, $pos, $mapq, $CIAGR, $mref, $mpos, $isize, $seq, $qual, @OPT) = split /\t/,$line;
28 #print "$id, $flag, $ref, $pos, $mapq, $CIAGR, $mref, $mpos, $isize\n";
29 $t = 0;
30 my @Ss=();
31 while ($CIAGR =~ /(\d+)(\D)/g) {
32 if ($2 eq 'S') {
33 push @Ss,[$t,$1] if $1 >= $MINLENGTH;
35 $t += $1 if $2 ne 'D';
37 $t = 0;
38 for (@Ss) {
39 ++$t;
40 #if ($_->[0] + $_->[1] -1 > length $seq) {
41 # ddx \@Ss;
42 # die "$line";
44 my $outseq = substr $seq,$_->[0],$_->[1];
45 my $outqual = substr $qual,$_->[0],$_->[1];
46 print OUT join("\n",'@'."$id.$flag.$t $CIAGR",$outseq,'+',$outqual),"\n";
50 close $FH;
51 close OUT;
52 __END__
54 perl -e 'use Data::Dump qw(ddx); $a="3S22M7S1D9S";$t=0;@Ss=();
55 while ($a=~/(\d+)(\D)/g) {print pos $a,"\t$1 $2\n";
56 {push @Ss,[$t,$1,$t+$1-1,$2];} $t += $1 if $2 ne 'D';} ddx \@Ss;'
58 find xtubam/*bam | while read a;do echo perl bamst2fq.pl $a $a.S.fq.gz \&;done
59 perl bamst2fq.pl xtubam/mdaSperm23xtu.bam xtubam/mdaSperm23xtu.bam.S.fq.gz &