universal isa is deprecated and throws compile error
[ITAG.git] / hacks / fix_tomato_transcripts.pl
blob0e0e535f5dd18a7f40704016df7ff24b6afdfd8e
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
5 use Data::Dump 'dump';
6 use Storable 'dclone';
8 use List::Util qw/ min max /;
10 # hash the lines by Target
11 my %seen;
12 my %lines;
13 while( <> ) {
14 unless( /^(\S+)\tITAG_transcripts_\S+.+Target=(\S+)/ ) {
15 next;
17 #warn "$1:$2 $_";
18 next if $seen{$_}++;
20 my @f = split /\t/, $_, 9;
21 push @{ $lines{"$1:$2"} }, \@f;
24 #warn dump( [values %lines] );
26 my @groups = map {
27 my ($pts,$pte);
28 my $pfe;
29 my $curr_group = [];
30 my @groups = ($curr_group);
31 for my $l ( @$_ ) {
32 my ($fs,$fe) = @{$l}[3,4];
33 my $strand = $l->[6];
34 my ($ts,$te) = $l->[8] =~ /Target=\S+ (\d+) (\d+)/ or die;
36 if( $pte
37 && $pfe
38 && (
39 ( $strand eq '+'
40 ? abs( $pte - $ts ) > 3
41 : abs( $pts - $te ) > 3
43 || $fs - $pfe > 30_000
45 ) {
46 $curr_group = [];
47 push @groups, $curr_group;
49 push @$curr_group, $l;
51 $pts = $ts;
52 $pte = $te;
53 $pfe = $fe;
55 @groups;
56 } values %lines;
58 #warn dump( \@groups );
60 # make feature groups out of them
61 for my $g ( @groups ) {
62 if( @$g > 1 ) {
63 my $superfeature = dclone($g->[0]);
64 $superfeature->[5] = '.';
65 my ( $sf_id ) = $superfeature->[8] =~ /ID=([^;]+)/
66 or die "cannot parse line $superfeature";
67 my @tcoords = map { $_->[8] =~ /Target=\S+ (\d+) (\d+)/ } @$g;
68 @{$superfeature}[3,4] = ( min( map $_->[3], @$g ), max( map $_->[4], @$g ) );
69 $superfeature->[8] =~ s/Target=(\S+) \d+ \d+/"Target=$1 ".min(@tcoords).' '.max(@tcoords)/e
70 or die;
72 for ( @$g ) {
73 $_->[8] =~ s/ID=[^;]+;/Parent=$sf_id;/;
74 $_->[2] = 'match_part';
76 print(join "\t", @$_) for $superfeature, @$g;
78 else {
79 print join "\t", @$_ for @$g;