4 use Data
::Dump
qw(ddx);
6 my ($in,$out)=("refGene.filter.gff", "ymy20120321.lst");
8 my (%GFF,%TransID,%Dat);
13 $v =~ s/%([0-9a-fA-F]{2})/chr hex($1)/ge;
16 open IN
,'<',$in or die "Error: $!\n";
18 next if /^\s+$/ or /^[;#]+/;
21 my ($seqname, $source, $primary, $start, $end,
22 $score, $strand, $frame, $groups) = split /\t/;
23 my @groups = split(/\s*;\s*/, $groups);
25 for my $group (@groups) {
26 my ($tag,$value) = split /=/,$group;
27 $tag = unescape
($tag);
28 my @values = map {unescape
($_)} split /,/,$value;
29 #$groups{$tag}=\@values; # patch for those alter-splices
30 $groups{$tag}=$values[0];
33 if ($primary eq 'mRNA') {
34 push @
{$GFF{$groups{'name'}}},$groups{'ID'};
38 for my $k (keys %GFF) {
39 if (@
{$GFF{$k}} != 2) {
50 open IN
,'<',$in or die "Error: $!\n"; # It is a small file after all, ...
52 next if /^\s+$/ or /^[;#]+/;
55 my ($seqname, $source, $primary, $start, $end,
56 $score, $strand, $frame, $groups) = split /\t/;
57 my @groups = split(/\s*;\s*/, $groups);
59 for my $group (@groups) {
60 my ($tag,$value) = split /=/,$group;
61 $tag = unescape
($tag);
62 my @values = map {unescape
($_)} split /,/,$value;
63 #$groups{$tag}=\@values; # patch for those alter-splices
64 $groups{$tag}=$values[0];
66 if ($primary =~ /CDS|3-UTR/) {
67 next unless exists $TransID{$groups{'Parent'}};
68 push @
{$Dat{$groups{'Parent'}}{$primary}},[$start, $end];
69 } elsif ($primary eq 'mRNA') {
70 next unless exists $TransID{$groups{'ID'}};
71 $Dat{$groups{'ID'}}{'Strand'}=$strand;
78 my ($StrA,$StrB)=('','');
80 $StrA .= "$$_[0],$$_[1] " for (@
$RefA);
81 return 'Same' if @
$RefA != 1; # filter those with more 3-UTR
84 $StrB .= "$$_[0],$$_[1] " for (@
$RefB);
85 return 'Same' if @
$RefB != 1;
90 return "[$StrA],[$StrB]";
93 open OUT
,'>',$out or die "Error: $!\n";
94 for my $k (keys %GFF) {
95 my ($IDA,$IDB)=@
{$GFF{$k}};
98 my $Strand = $$RefA{"Strand"};
99 die if $Strand ne $$RefB{"Strand"};
100 my $retStr=cmpUTR
($$RefA{"3-UTR"},$$RefB{"3-UTR"});
101 next if $retStr eq 'Same';
103 my ($ua,$ub,$uc,$ud);
104 unless (exists $$RefA{"3-UTR"} and exists $$RefB{"3-UTR"}) {
105 #ddx $RefA; ddx $RefB;
106 warn "$k: [$IDA,$IDB]\n";
107 if (exists $$RefA{"3-UTR"}) {
108 ($ua,$ub)=@{$$RefA{"3-UTR"}->[0]};
110 } elsif (exists $$RefB{"3-UTR"}) {
112 ($uc,$ud)=@{$$RefB{"3-UTR"}->[0]};
114 warn "$k: [$IDA,$IDB]\n";
118 ($ua,$ub)=@{$$RefA{"3-UTR"}->[0]};
119 ($uc,$ud)=@{$$RefB{"3-UTR"}->[0]};
121 next if $ua==$uc and $ub==$ud;
124 if ($Strand eq '+') {
125 ($a,$b)=@
{$$RefA{"CDS"}->[-1]}; # sorted
126 ($c,$d)=@
{$$RefB{"CDS"}->[-1]};
127 } elsif ($Strand eq '-') {
128 ($a,$b)=@
{$$RefA{"CDS"}->[0]};
129 ($c,$d)=@
{$$RefB{"CDS"}->[0]};
130 #ddx $RefA; ddx $RefB;
131 #print "$a,$b $c,$d\n"; die;
133 next unless $a==$c and $b==$d;
134 print OUT
"$k\t[$IDA, $IDB]\t$Strand\t3-UTR:$retStr\tCDS:<$a,$b>\n";