new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / sperm / sperm_1_startplot_bychr.pl
blobe22a501155a2d5cd804cc3dd19682351a2ec08a5
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 #use IO::Unread qw(unread);
5 use Data::Dump qw(ddx);
7 die "Usage: $0 <input sam.gz/bam> <output>\n" if @ARGV < 2;
8 my ($inf,$outf)=@ARGV;
10 =pod
11 open CHR,'<','chr.lst' or die "[x]Cannot read chr.lst, use `samtools faidx` and append ChrID manually to get one!\n";
12 my %ChrGI2ID;
13 while (<CHR>) {
14 my ($gi,$id) = split /\s+/,$_;
15 $ChrGI2ID{$gi}=$id;
17 close CHR;
18 $ChrGI2ID{'='}='=';
19 ddx \%ChrGI2ID;
20 =cut
22 sub openifile($) {
23 my $inf = $_[0];
24 my $FH;
25 if ($inf =~ /\.sam\.gz$/i) {
26 open $FH,'-|',"gzip -dc $inf" or die "Error opening $inf: $!\n";
27 } elsif ($inf =~ /\.bam$/i) {
28 open $FH,'-|',"samtools view -h $inf" or die "Error opening $inf: $!\n";
29 } elsif ($inf =~ /\.sam$/i) {
30 open $FH,'<',$inf or die "Error opening $inf: $!\n";
31 } else { die; }
32 return $FH;
34 my $FH = openifile($inf);
36 my $line;
37 my %ChrLen;
38 while ($line = <$FH>) {
39 unless ($line =~ /^\@/) {
40 #unread $FH, $line;
41 last;
43 if ($line =~ /^\@SQ/) {
44 my (undef,$gi,$len) = split /\s+/,$line;
45 $gi =~ s/^SN://;
46 $len =~ s/^LN://;
47 $ChrLen{$gi} = $len;
50 close $FH;
51 ddx \%ChrLen;
53 my %DatbyChrM;
54 #$DatbyChrM{$_}=[] for values %ChrGI2ID;
56 open OUT,'>',$outf or die "Error opening $outf: $!\n";
57 print OUT "# $inf\n";
59 for my $chr ( keys %ChrLen ) {
60 $FH = openifile($inf);
61 while ($line = <$FH>) {
62 my ($id, $flag, $ref, $pos, $mapq, $CIAGR, $mref, $mpos, $isize, $seq, $qual, @OPT) = split /\t/,$line;
63 #print "$id, $flag, Chr$ChrGI2ID{$ref}, $pos, $mapq, $CIAGR, Chr$ChrGI2ID{$mref}, $mpos, $isize\n";
64 next if $ref ne $chr;
65 #my $commonChrID = $ChrGI2ID{$ref};
66 my $commonChrID = $ref;
67 my $posto1m = int($pos/1000000);
68 ++$DatbyChrM{$commonChrID}->[$posto1m];
70 #ddx \%DatbyChrM;
71 close $FH;
73 for my $ccid (sort { "$a$b"=~/^\d+$/ ? $a<=>$b : $a cmp $b } keys %DatbyChrM) {
74 print OUT "[$ccid]\n";
75 my $ArrayRef = $DatbyChrM{$ccid};
76 for my $i ( 0 .. $#$ArrayRef ) {
77 my $t = $$ArrayRef[$i];
78 print OUT (defined $t)?$t:0,"\t",1+$i,"\n";
81 %DatbyChrM = ();
84 close OUT;
86 __END__
87 perl startplot.pl t.sam t.out
88 grep \\[ t.out