2 # BioPerl module for InterProHandler
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Peter Dimitrov <dimitrov@gnf.org>
8 # Copyright Peter Dimitrov
9 # (c) Peter Dimitrov, dimitrov@gnf.org, 2003.
10 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
12 # You may distribute this module under the same terms as perl itself.
13 # Refer to the Perl Artistic License (see the license accompanying this
14 # software package, or see http://www.perl.com/language/misc/Artistic.html)
15 # for the terms under which you may use, modify, and redistribute this module.
17 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
18 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
21 # POD documentation - main docs before the code
25 Bio::OntologyIO::Handlers::InterProHandler - XML handler class for InterProParser
29 # do not use directly - used and instantiated by InterProParser
33 Handles xml events generated by InterProParser when parsing InterPro
40 User feedback is an integral part of the evolution of this and other
41 Bioperl modules. Send your comments and suggestions preferably to
42 the Bioperl mailing list. Your participation is much appreciated.
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
49 Please direct usage questions or support issues to the mailing list:
51 I<bioperl-l@bioperl.org>
53 rather than to the module maintainer directly. Many experienced and
54 reponsive experts will be able look at the problem and quickly
55 address it. Please include a thorough description of the problem
56 with code and data examples if at all possible.
60 Report bugs to the Bioperl bug tracking system to help us keep track
61 of the bugs and their resolution. Bug reports can be submitted via the
64 https://github.com/bioperl/bioperl-live/issues
66 =head1 AUTHOR - Peter Dimitrov
68 Email dimitrov@gnf.org
72 Juguang Xiao, juguang@tll.org.sg
76 The rest of the documentation details each of the object methods.
77 Internal methods are usually preceded with a _
81 # Let the code begin...
83 package Bio
::OntologyIO
::Handlers
::InterProHandler
;
86 use Bio
::Ontology
::Ontology
;
87 use Bio
::Ontology
::RelationshipType
;
88 use Bio
::Ontology
::SimpleOntologyEngine
;
89 use Bio
::Annotation
::Reference
;
92 use base
qw(Bio::Root::Root);
94 my ( $record_count, $processed_count, $is_a_rel, $contains_rel, $found_in_rel );
99 Usage : $h = Bio::OntologyIO::Handlers::InterProHandler->new;
100 Function: Initializes global variables
102 Returns : an InterProHandler object
109 my ( $class, @args ) = @_;
110 my $self = $class->SUPER::new
(@args);
112 my ( $eng, $ont, $name, $fact ) = $self->_rearrange(
122 if ( defined($ont) ) {
123 $self->ontology($ont);
125 $name = "InterPro" unless $name;
126 $self->ontology( Bio
::Ontology
::Ontology
->new( -name
=> $name ) );
128 $self->ontology_engine($eng) if $eng;
130 $self->term_factory($fact) if $fact;
132 $is_a_rel = Bio
::Ontology
::RelationshipType
->get_instance("IS_A");
133 $contains_rel = Bio
::Ontology
::RelationshipType
->get_instance("CONTAINS");
134 $found_in_rel = Bio
::Ontology
::RelationshipType
->get_instance("FOUND_IN");
135 $is_a_rel->ontology( $self->ontology() );
136 $contains_rel->ontology( $self->ontology() );
137 $found_in_rel->ontology( $self->ontology() );
138 $self->_cite_skip(0);
139 $self->secondary_accessions_map( {} );
144 =head2 ontology_engine
146 Title : ontology_engine
147 Usage : $obj->ontology_engine($newval)
148 Function: Get/set ontology engine. Can be initialized only once.
150 Returns : value of ontology_engine (a scalar)
151 Args : new value (a scalar, optional)
156 sub ontology_engine
{
157 my ( $self, $value ) = @_;
159 if ( defined $value ) {
160 if ( defined $self->{'ontology_engine'} ) {
161 $self->throw("ontology_engine already defined");
164 ref($value) . " does not implement " . "Bio::Ontology::OntologyEngineI. Bummer." )
165 unless $value->isa("Bio::Ontology::OntologyEngineI");
166 $self->{'ontology_engine'} = $value;
168 # don't forget to set this as the engine of the ontology, otherwise
169 # those two might not point to the same object
170 my $ont = $self->ontology();
171 if ( $ont && $ont->can("engine") && ( !$ont->engine() ) ) {
172 $ont->engine($value);
177 . "::ontology_engine: registering ontology engine ("
178 . ref($value) . "):\n"
184 return $self->{'ontology_engine'};
191 Function: Get the ontology to add the InterPro terms to.
193 The value is determined automatically once ontology_engine
194 has been set and if it hasn't been set before.
197 Returns : A L<Bio::Ontology::OntologyI> implementing object.
198 Args : On set, a L<Bio::Ontology::OntologyI> implementing object.
203 my ( $self, $ont ) = @_;
205 if ( defined($ont) ) {
206 $self->throw( ref($ont) . " does not implement Bio::Ontology::OntologyI" . ". Bummer." )
207 unless $ont->isa("Bio::Ontology::OntologyI");
208 $self->{'_ontology'} = $ont;
210 return $self->{'_ontology'};
216 Usage : $obj->term_factory($newval)
217 Function: Get/set the ontology term object factory
219 Returns : value of term_factory (a Bio::Factory::ObjectFactory instance)
220 Args : on set, new value (a Bio::Factory::ObjectFactory instance
229 return $self->{'term_factory'} = shift if @_;
230 return $self->{'term_factory'};
236 Usage : $obj->_cite_skip($newval)
239 Returns : value of _cite_skip (a scalar)
240 Args : new value (a scalar, optional)
246 my ( $self, $value ) = @_;
248 if ( defined $value ) {
249 $self->{'_cite_skip'} = $value;
252 return $self->{'_cite_skip'};
258 Usage : $obj->_hash($newval)
261 Returns : value of _hash (a scalar)
262 Args : new value (a scalar, optional)
268 my ( $self, $value ) = @_;
270 if ( defined $value ) {
271 $self->{'_hash'} = $value;
274 return $self->{'_hash'};
280 Usage : $obj->_stack($newval)
283 Returns : value of _stack (a scalar)
284 Args : new value (a scalar, optional)
290 my ( $self, $value ) = @_;
292 if ( defined $value ) {
293 $self->{'_stack'} = $value;
295 return $self->{'_stack'};
311 my ( $self, $_stack ) = @_;
312 my @stack = @
{$_stack};
314 return ( @stack >= 1 ) ?
$stack[ @stack - 1 ] : undef;
320 Usage : $obj->_term($newval)
321 Function: Get/set method for the term currently processed.
323 Returns : value of term (a scalar)
324 Args : new value (a scalar, optional)
330 my ( $self, $value ) = @_;
332 if ( defined $value ) {
333 $self->{'_term'} = $value;
336 return $self->{'_term'};
343 Function: Removes the current term from the handler
354 delete $self->{'_term'};
360 Usage : $obj->_names($newval)
363 Returns : value of _names (a scalar)
364 Args : new value (a scalar, optional)
370 my ( $self, $value ) = @_;
372 if ( defined $value ) {
373 $self->{'_names'} = $value;
376 return $self->{'_names'};
379 =head2 _create_relationship
381 Title : _create_relationship
383 Function: Helper function. Adds relationships to one of the relationship stores.
392 my %relationship_cache;
394 sub _clear_cache
{ %relationship_cache = () }
396 sub _create_relationship
{
397 my ( $self, $ref_id, $rel_type_term ) = @_;
399 my $ont = $self->ontology();
400 my $fact = $self->term_factory();
401 my $term_temp = ( $ont->engine->get_term_by_identifier($ref_id) )[0];
403 if ( !defined $term_temp ) {
405 $ont->engine->add_term(
406 $fact->create_object( -InterPro_id
=> $ref_id, -name
=> $ref_id, -ontology
=> $ont ) );
407 $ont->engine->mark_uninstantiated($term_temp);
409 my $marshalled = join(':', (sort $self->_term->identifier, $ref_id));
411 # check cache to see if the two have been seen before, using marshalled IDs
412 if ($relationship_cache{$marshalled}++) {
413 # TODO: should check that the relationship type for these terms is the
414 # inverse of the stored relationship type
418 my $rel_type_name = $self->_top( $self->_names );
420 my $rel = Bio
::Ontology
::Relationship
->new( -predicate_term
=> $rel_type_term );
422 if ( $rel_type_name eq 'parent_list' || $rel_type_name eq 'found_in' ) {
423 $rel->object_term($term_temp);
424 $rel->subject_term( $self->_term );
426 $rel->object_term( $self->_term );
427 $rel->subject_term($term_temp);
429 $rel->ontology($ont);
430 $ont->add_relationship($rel);
437 Title : start_element
439 Function: This is a method that is derived from XML::SAX::Base and
440 has to be overridden for processing start of xml element
441 events. Used internally only.
451 my ( $self, $element ) = @_;
452 my $ont = $self->ontology();
453 my $fact = $self->term_factory();
455 if ( $element->{Name
} eq 'interprodb' ) {
457 $fact->create_object(
458 -identifier
=> "Active_site",
459 -name
=> "Active Site"
463 $fact->create_object(
464 -identifier
=> "Conserved_site",
465 -name
=> "Conserved Site"
469 $fact->create_object(
470 -identifier
=> "Binding_site",
471 -name
=> "Binding Site"
475 $fact->create_object(
476 -identifier
=> "Family",
481 $fact->create_object(
482 -identifier
=> "Domain",
487 $fact->create_object(
488 -identifier
=> "Repeat",
493 $fact->create_object(
494 -identifier
=> "PTM",
495 -name
=> "post-translational modification"
499 $fact->create_object(
500 -identifier
=> "Region",
504 } elsif ( $element->{Name
} eq 'interpro' ) {
505 my %record_args = %{ $element->{Attributes
} };
506 my $id = $record_args{"id"};
508 # this sets the current term
509 my $term = ( $ont->engine->get_term_by_identifier($id) )[0] ||
510 $fact->create_object( -InterPro_id
=> $id, -name
=> $id );
513 $term->ontology($ont);
514 $term->short_name( $record_args{"short_name"} );
515 $term->protein_count( $record_args{"protein_count"} );
516 $self->_increment_record_count();
517 $self->_stack( [ { interpro
=> undef } ] );
518 $self->_names( ["interpro"] );
520 ## Adding a relationship between the newly created InterPro term
521 ## and the term describing its type
523 my $rel = Bio
::Ontology
::Relationship
->new( -predicate_term
=> $is_a_rel );
524 my ($object_term) = $ont->find_terms( -identifier
=> $record_args{"type"} )
526 "when processing interpro ID '$id', no term found for interpro type '$record_args{type}'"
528 $rel->object_term($object_term);
529 $rel->subject_term( $self->_term );
530 $rel->ontology($ont);
531 $ont->add_relationship($rel);
532 $ont->add_term($term);
533 } elsif ( defined $self->_stack ) {
536 if ( keys %{ $element->{Attributes
} } > 0 ) {
537 foreach my $key ( keys %{ $element->{Attributes
} } ) {
538 $hash{$key} = $element->{Attributes
}->{$key};
541 push @
{ $self->_stack }, \
%hash;
542 if ( $element->{Name
} eq 'rel_ref' ) {
543 my $ref_id = $element->{Attributes
}->{"ipr_ref"};
544 my $parent = $self->_top( $self->_names );
546 if ( $parent eq 'parent_list' || $parent eq 'child_list' ) {
547 $self->_create_relationship( $ref_id, $is_a_rel );
549 if ( $parent eq 'contains' ) {
550 $self->_create_relationship( $ref_id, $contains_rel );
552 if ( $parent eq 'found_in' ) {
553 $self->_create_relationship( $ref_id, $found_in_rel );
555 } elsif ( $element->{Name
} eq 'abstract' ) {
556 $self->_cite_skip(1);
558 push @
{ $self->_names }, $element->{Name
};
565 Title : _char_storage
566 Usage : $obj->_char_storage($newval)
569 Returns : value of _char_storage (a scalar)
570 Args : new value (a scalar, optional)
576 my ( $self, $value ) = @_;
578 if ( defined $value ) {
579 $self->{'_char_storage'} = $value;
582 return $self->{'_char_storage'};
589 Function: This is a method that is derived from XML::SAX::Base and has to be overridden for processing xml characters events. Used internally only.
598 my ( $self, $characters ) = @_;
599 my $text = $characters->{Data
};
603 $self->{_char_storage
} .= $text;
611 Function: This is a method that is derived from XML::SAX::Base and has to be overridden for processing end of xml element events. Used internally only.
620 my ( $self, $element ) = @_;
622 if ( $element->{Name
} eq 'interprodb' ) {
624 "Interpro DB Parser Finished: $record_count read, $processed_count processed\n");
625 $self->_clear_cache();
626 } elsif ( $element->{Name
} eq 'interpro' ) {
628 $self->_increment_processed_count();
629 } elsif ( $element->{Name
} ne 'cite' ) {
630 $self->{_char_storage
} =~ s/<\/?p>//g
;
631 if ( ( defined $self->_stack ) ) {
632 my $current_hash = pop @
{ $self->_stack };
633 my $parent_hash = $self->_top( $self->_stack );
634 my $current_hash_key = pop @
{ $self->_names };
636 if ( keys %{$current_hash} > 0 && $self->_char_storage ne "" ) {
637 $current_hash->{comment
} = $self->_char_storage;
638 push @
{ $parent_hash->{$current_hash_key} }, $current_hash;
639 } elsif ( $self->_char_storage ne "" ) {
640 push @
{ $parent_hash->{$current_hash_key} },
641 { 'accumulated_text_12345' => $self->_char_storage };
642 } elsif ( keys %{$current_hash} > 0 ) {
643 push @
{ $parent_hash->{$current_hash_key} }, $current_hash;
645 if ( $element->{Name
} eq 'pub_list' ) {
648 foreach my $pub_record ( @
{ $current_hash->{publication
} } ) {
649 my $ref = Bio
::Annotation
::Reference
->new;
650 my $loc = $pub_record->{location
}->[0];
651 # TODO: Getting unset stuff here; should this be an error?
653 sprintf("%s, %s-%s, %s, %s",
654 $pub_record->{journal
}->[0]->{accumulated_text_12345
} || '',
655 $loc->{firstpage
} || '',
656 $loc->{lastpage
} || '',
657 $loc->{volume
} || '',
658 $pub_record->{year
}->[0]->{accumulated_text_12345
} || '')
660 $ref->title( $pub_record->{title
}->[0]->{accumulated_text_12345
} );
661 my $ttt = $pub_record->{author_list
}->[0];
663 $ref->authors( $ttt->{accumulated_text_12345
} );
664 $ref->medline( scalar( $ttt->{dbkey
} ) )
665 if exists( $ttt->{db
} ) && $ttt->{db
} eq "MEDLINE";
668 $self->_term->add_reference(@refs);
669 } elsif ( $element->{Name
} eq 'name' ) {
670 $self->_term->name( $self->_char_storage );
671 } elsif ( $element->{Name
} eq 'abstract' ) {
672 $self->_term->definition( $self->_char_storage );
673 $self->_cite_skip(0);
674 } elsif ( $element->{Name
} eq 'member_list' ) {
677 foreach my $db_xref ( @
{ $current_hash->{db_xref
} } ) {
679 Bio
::Annotation
::DBLink
->new(
680 -database
=> $db_xref->{db
},
681 -primary_id
=> $db_xref->{dbkey
}
684 $self->_term->add_dbxref(-dbxrefs
=> \
@refs,
685 -context
=> 'member_list');
686 } elsif ( $element->{Name
} eq 'sec_list' ) {
689 foreach my $sec_ac ( @
{ $current_hash->{sec_ac
} } ) {
690 push @refs, $sec_ac->{sec_ac
};
692 $self->_term->add_secondary_id(@refs);
693 $self->secondary_accessions_map->{ $self->_term->identifier } = \
@refs;
694 } elsif ( $element->{Name
} eq 'example_list' ) {
697 foreach my $example ( @
{ $current_hash->{examples
} } ) {
699 Bio
::Annotation
::DBLink
->new(
700 -database
=> $example->{db_xref
}->[0]->{db
},
701 -primary_id
=> $example->{db_xref
}->[0]->{dbkey
},
702 -comment
=> $example->{comment
}
705 $self->_term->add_dbxref(-dbxrefs
=> \
@refs,
706 -context
=> 'example_list');
707 } elsif ( $element->{Name
} eq 'external_doc_list' ) {
710 foreach my $db_xref ( @
{ $current_hash->{db_xref
} } ) {
712 Bio
::Annotation
::DBLink
->new(
713 -database
=> $db_xref->{db
},
714 -primary_id
=> $db_xref->{dbkey
}
717 $self->_term->add_dbxref(-dbxrefs
=> \
@refs,
718 -context
=> 'external_doc_list');
719 } elsif ( $element->{Name
} eq 'class_list' ) {
722 foreach my $classification ( @
{ $current_hash->{classification
} } ) {
724 Bio
::Annotation
::DBLink
->new(
725 -database
=> $classification->{class_type
},
726 -primary_id
=> $classification->{id
}
729 $self->_term->add_dbxref(-dbxrefs
=> \
@refs,
730 -context
=> 'class_list');
731 } elsif ( $element->{Name
} eq 'deleted_entries' ) {
734 foreach my $del_ref ( @
{ $current_hash->{del_ref
} } ) {
736 ( $self->ontology_engine->get_term_by_identifier( $del_ref->{id
} ) )[0];
738 $term->is_obsolete(1) if defined $term;
742 $self->_char_storage('') if !$self->_cite_skip;
746 =head2 secondary_accessions_map
748 Title : secondary_accessions_map
749 Usage : $obj->secondary_accessions_map($newval)
751 Example : $map = $interpro_handler->secondary_accessions_map();
752 Returns : Reference to a hash that maps InterPro identifier to an
753 array reference of secondary accessions following the InterPro
755 Args : Empty hash reference
760 sub secondary_accessions_map
{
761 my ( $self, $value ) = @_;
763 if ( defined $value ) {
764 $self->{'secondary_accessions_map'} = $value;
767 return $self->{'secondary_accessions_map'};
770 =head2 _increment_record_count
772 Title : _increment_record_count
782 sub _increment_record_count
{
786 =head2 _increment_processed_count
788 Title : _increment_processed_count
798 sub _increment_processed_count
{
801 $self->debug("$processed_count\n") if $processed_count % 100 == 0;