new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / mm10.overlap.pl
blob4560dd4d34e6c9318a8be4b13a5cbc12ff67ac2c
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 #use Data::Dump qw(ddx);
6 my %Dat;
7 while(<>) {
8 chomp;
9 my ($chr,undef,$type,$start,$end,undef,$strand,undef,undef,$geneid) = split;
10 next if $type ne 'transcript';
11 $geneid =~ s/[\";\,]//g;
12 #$geneid =~ s/(\.\d+$)//g;
13 $Dat{"$chr\t$strand"}{$start}{$end} = $geneid;
14 #push @{$Dat{"$chr\t$strand"}{$start}},[$end,$geneid];
17 my ($chrStrand,$lastChrStrand);
18 my ($lastStart,$lastEnd,$laseGene) = (0,0,'');
19 for $chrStrand (sort keys %Dat) {
20 my $startDat = $Dat{$chrStrand};
21 my $start;
22 for $start (sort {$a <=> $b} keys %{$startDat}) {
23 my $endGeneDat = $startDat->{$start};
24 #my $end;
25 #for $end (sort {$b <=> $a} keys %{$endGeneDat}) {
26 # my $gene = $endGeneDat->{$end};
27 # print join("\t",$chrStrand,$start,$end,$gene),"\n";
29 my @ends = sort {$b <=> $a} keys %{$endGeneDat};
30 my $thisGene = $endGeneDat->{$ends[0]};
31 if ($lastEnd >= $start and $thisGene ne $laseGene and $lastChrStrand eq $chrStrand) {
32 print join("\t",$chrStrand,$start,$ends[0],$thisGene,'<',$lastStart,$lastEnd,$laseGene,'=',$lastEnd-$start,$lastEnd-$ends[0]),"\n";
33 #ddx $Dat{$chrStrand}{$start};
35 $lastEnd = $ends[0];
36 $laseGene = $endGeneDat->{$lastEnd};
37 $lastStart = $start;
38 $lastChrStrand = $chrStrand;