1 package CXGN
::Tools
::InterProGFF3
;
3 use Moose
::Util
::TypeConstraints
;
4 use Bio
::OntologyIO
::InterProParser
;
9 with
'MooseX::Runnable';
10 with
'MooseX::Getopt';
14 CXGN::Tools::InterProGFF3 - Convert InterPro XML to GFF3
18 This tool converts InterPro XML to GFF3 so that InterPro domains
19 can be loaded as features into Chado.
25 Jonathan "Duke" Leto <jonathan@leto.net>
29 Jonathan "Duke" Leto <jonathan@leto.net>
31 =head1 COPYRIGHT & LICENSE
33 Copyright 2010 Boyce Thompson Institute for Plant Research
35 This program is free software; you can redistribute it and/or modify
36 it under the same terms as Perl itself.
40 has gff3_preamble
=> (
43 default => "##gff-version 3
44 ##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.220\n",
64 isa
=> 'Bio::OntologyIO::InterProParser',
74 default => 'InterPro Version X',
80 default => 'polypeptide_domain',
92 return $class->SUPER::BUILDARGS
( %args );
96 my ($self,%args) = @_;
97 $self->parser( Bio
::OntologyIO
->new(
98 -format
=> 'interpro',
99 -file
=> $self->filename,
101 $self->ontology( $self->parser->next_ontology );
102 $self->gff3( $self->gff3_preamble );
103 $self->generate_parent_list;
106 open my $fh, '>', $self->output;
107 print $fh $self->gff3;
116 sub generate_parent_list
{
118 my $relations = $self->ontology->{engine
}->{_inverted_relationship_store
} ;
119 my $parent_list = {};
121 while ( my ($k,$v) = each %$relations ) {
122 $parent_list->{$k} = join(',',grep { $_ =~ m/^IPR/ && $v->{$_}->name eq 'IS_A' } keys %$v);
124 $self->parent_list( $parent_list );
129 my @domains = $self->get_domains;
130 for my $domain (@domains) {
132 my (@relations) = $self->ontology->get_relationships($domain);
134 # Find all IS_A relations of this domain, excluding itself
135 # This should include parent terms, but does not. See
136 # generate_parent_list for how parents are found
137 my @isa_relations = grep {
138 $_->predicate_term->name eq 'IS_A' &&
139 $_->object_term->identifier ne $domain->identifier
141 my $type = @isa_relations ?
$isa_relations[0]->object_term->name : '';
143 $self->gff3( $self->gff3 . $self->make_gff3_line($domain, $type) );
148 my ($self,$domain, $type) = @_;
149 my $fmt = "%s\t" x
8 . "%s\n";
150 return sprintf $fmt, $domain->identifier,
151 $self->source, $self->term_type,
152 0, 0, qw
/. . ./, $self->make_attribute_string($domain, $type);
156 my ($self, $data) = @_;
157 return uri_escape
($data, ';=%&,');
160 sub make_attribute_string
{
161 my ($self,$domain, $type) = @_;
162 my $fmt = 'ID=%s;Name=%s;Alias=%s;Parent=%s;Note=%s;Dbxref=%s;interpro_type=%s;protein_count=%s';
163 no warnings
'uninitialized';
165 return sprintf $fmt, (
166 $domain->identifier, $self->escape_gff($domain->name),
167 $self->escape_gff($domain->short_name),
168 $self->parent_list()->{$domain->identifier},
169 $self->escape_gff( $domain->definition),
170 join(',', "INTERPRO:" . $domain->identifier, (map { $_->database . ':' . $_->primary_id } $domain->get_members)),
171 $type, $domain->protein_count);
176 return sort { $b <=> $a } grep { $_->identifier =~ m/^IPR/ } $self->ontology->get_all_terms;
179 __PACKAGE__
->meta->make_immutable;