new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / WoodyMiaoLin / Tiger / Count_reads_n_bp.pl
blob18e84bc005fb2a06604ff0806053afc5505724ee
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use threads;
6 my %file;
7 while (<>) {
8 chomp;
9 $file{$_} = openfile($_);
11 print STDERR "Read file list complete!\n";
13 my %thread;
14 foreach (sort keys %file) {
15 if (/sam.gz$/ or /bam$/) {
16 $thread{$_} = threads->new(\&countsam, $file{$_});
17 } elsif (/fq.gz$/ or /cut.gz$/) {
18 $thread{$_} = threads->new(\&countfq, $file{$_});
22 my %count;
23 foreach (sort keys %thread) {
24 $count{$_} = $thread{$_}->join;
25 print STDERR "Count $_ complete!\n";
28 foreach (sort keys %count) {
29 print $_, "\t", (join "\t", @{$count{$_}}), "\n";
32 sub openfile {
33 my $filename = shift;
34 my $infile;
35 if ($filename =~ /.bam$/) {
36 open $infile, "-|", "samtools view $filename" or die "Error opening $filename: $!\n";
37 } elsif ($filename =~ /.sam.gz$/) {
38 open $infile, "-|", "samtools view -S $filename" or die "Error opening $filename: $!\n";
39 } else {
40 open $infile, "-|", "gzip -dc $filename" or die "Error opening $filename: $!\n";
42 return $infile;
45 sub countfq {
46 my $in = shift;
47 my ($reads, $empty, $bp) = (0, 0, 0);
48 while (<$in>) {
49 chomp (my $seq = <$in>);
50 <$in>;
51 <$in>;
52 my $length = length $seq;
53 ++$reads;
54 ++$empty unless $length;
55 $bp += $length;
57 close $in;
58 my $a = [$reads, $empty, $bp];
59 return $a;
62 sub countsam {
63 my $in = shift;
64 my ($aligned_reads, $aligned_bp) = (0, 0);
65 while (<$in>) {
66 my @a = split /\t/;
67 ++$aligned_reads if $a[3];
68 my @b = $a[5] =~ /(\d+)[MI]/g;
69 $aligned_bp += $_ foreach @b;
71 close $in;
72 my $a = [$aligned_reads, $aligned_bp];
73 return $a;