2 # BioPerl module for Bio::OntologyIO::simplehierarchy
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Allen Day, allenday@ucla.edu
8 # (c) Allen Day, allenday@ucla.edu, 2003.
9 # (c) Department of Human Genetics, UCLA Medical School, 2003.
11 # You may distribute this module under the same terms as perl itself.
12 # Refer to the Perl Artistic License (see the license accompanying this
13 # software package, or see http://www.perl.com/language/misc/Artistic.html)
14 # for the terms under which you may use, modify, and redistribute this module.
16 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
17 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
18 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 # You may distribute this module under the same terms as perl itself
22 # POD documentation - main docs before the code
26 Bio::OntologyIO::simplehierarchy - a base class parser for simple hierarchy-by-indentation
33 # do not use directly -- use via Bio::OntologyIO
34 my $parser = Bio::OntologyIO->new
35 ( -format => "simplehierarchy",
36 -file => "pathology_terms.csv",
37 -indent_string => ",",
38 -ontology_name => "eVOC",
39 -term_factory => $fact,
42 my $ontology = $parser->next_ontology();
46 Needs Graph.pm from CPAN. This class is nearly identical to
47 OntologyIO::dagflat, see L<Bio::OntologyIO::dagflat> for details.
53 User feedback is an integral part of the evolution of this and other
54 Bioperl modules. Send your comments and suggestions preferably to the
55 Bioperl mailing lists Your participation is much appreciated.
57 bioperl-l@bioperl.org - General discussion
58 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62 Please direct usage questions or support issues to the mailing list:
64 I<bioperl-l@bioperl.org>
66 rather than to the module maintainer directly. Many experienced and
67 reponsive experts will be able look at the problem and quickly
68 address it. Please include a thorough description of the problem
69 with code and data examples if at all possible.
73 Report bugs to the Bioperl bug tracking system to help us keep track
74 the bugs and their resolution. Bug reports can be submitted via the
77 https://github.com/bioperl/bioperl-live/issues
83 Email: allenday@ucla.edu
91 The rest of the documentation details each of the object
92 methods. Internal methods are usually preceded with a _
97 # Let the code begin...
100 package Bio
::OntologyIO
::simplehierarchy
;
107 use Bio
::Ontology
::OBOEngine
;
108 use Bio
::Ontology
::Ontology
;
109 use Bio
::Ontology
::TermFactory
;
111 use constant TRUE
=> 1;
112 use constant FALSE
=> 0;
115 use base
qw(Bio::OntologyIO);
122 Function: Creates a new simplehierarchy parser.
123 Returns : A new simplehierarchy parser object, implementing Bio::OntologyIO.
124 Args : -files => a single ontology flat file holding the
125 term relationships, or an array ref holding
127 -file => if there is only a single flat file, it may
128 also be specified via the -file parameter
129 -ontology_name => the name of the ontology, defaults to
131 -file_is_root => Boolean indicating whether a virtual root
132 term is to be added, the name of which will
133 be derived from the file name. Default is false.
134 Enabling this allows one to parse multiple input
135 files into the same ontology and still have
137 -engine => the L<Bio::Ontology::OntologyEngineI> object
138 to be reused (will be created otherwise); note
139 that every L<Bio::Ontology::OntologyI> will
140 qualify as well since that one inherits from the
142 -indent_string => the string used to indent hierarchical
145 For a file like this:
153 indent_string would be " ". Defaults to
155 -comment_char => Allows specification of a regular
156 expression string to indicate a comment line.
157 Currently defaults to "[\|\-]".
158 Note: this is not yet implemented.
160 See L<Bio::OntologyIO>.
164 # in reality, we let OntologyIO::new do the instantiation, and override
165 # _initialize for all initialization work
167 my ($self, @args) = @_;
168 $self->SUPER::_initialize
( @args );
170 my ( $indent,$files,$fileisroot,$name,$eng ) =
171 $self->_rearrange([qw(INDENT_STRING
178 $self->_done( FALSE
);
179 $self->_not_first_record( FALSE
);
181 $self->file_is_root($fileisroot) if defined($fileisroot);
182 $indent = ' ' unless defined($indent); #reasonable default?
183 # the indentation string may have escaped chars
184 if (($indent =~ /\\/) && ($indent !~ /[\$\`]/)) {
185 $indent = "\$indent = \"$indent\"";
188 $self->indent_string($indent);
189 delete $self->{'_ontologies'};
191 # ontology engine (and possibly name if it's an OntologyI)
192 $eng = Bio
::Ontology
::OBOEngine
->new() unless $eng;
193 if($eng->isa("Bio::Ontology::OntologyI")) {
194 $self->ontology_name($eng->name());
195 $eng = $eng->engine() if $eng->can('engine');
197 $self->_ont_engine($eng);
199 # flat files to parse
200 $self->{_flat_files
} = $files ?
ref($files) ?
$files : [$files] : [];
202 # ontology name (overrides implicit one through OntologyI engine)
203 $self->ontology_name($name) if $name;
209 Title : ontology_name
210 Usage : $obj->ontology_name($newval)
211 Function: Get/set the name of the ontology parsed by this module.
213 Returns : value of ontology_name (a scalar)
214 Args : on set, new value (a scalar or undef, optional)
221 return $self->{'ontology_name'} = shift if @_;
222 return $self->{'ontology_name'};
229 Usage : $parser->parse();
230 Function: Parses the files set with "new" or with methods
231 defs_file and _flat_files.
233 Normally you should not need to call this method as it will
234 be called automatically upon the first call to
237 Returns : [Bio::Ontology::OntologyEngineI]
245 # setup the default term factory if not done by anyone yet
246 $self->term_factory(Bio
::Ontology
::TermFactory
->new(
247 -type
=> "Bio::Ontology::Term"))
248 unless $self->term_factory();
250 # create the ontology object itself
251 my $ont = Bio
::Ontology
::Ontology
->new(-name
=> $self->ontology_name(),
252 -engine
=> $self->_ont_engine());
254 # set up the ontology of the relationship types
255 foreach ($self->_part_of_relationship(),
256 $self->_is_a_relationship(),
257 $self->_related_to_relationship()) {
261 # pre-seed the IO system with the first flat file if -file wasn't provided
263 $self->_initialize_io(-file
=> shift(@
{$self->_flat_files()}));
267 $self->_parse_flat_file($ont);
268 # advance to next flat file if more are available
269 if(@
{$self->_flat_files()}) {
271 # reset the virtual root so that the next one is generated from
273 $self->_virtual_root(undef);
274 # now re-initialize the IO object
275 $self->_initialize_io(-file
=> shift(@
{$self->_flat_files()}));
277 last; # nothing else to parse so terminate the loop
280 $self->_add_ontology($ont);
281 # not needed anywhere, only because of backward compatibility
282 return $self->_ont_engine();
287 Title : next_ontology
289 Function: Get the next available ontology from the parser. This is the
290 method prescribed by Bio::OntologyIO.
292 Returns : An object implementing Bio::Ontology::OntologyI, and undef if
293 there is no more ontology in the input.
301 # parse if not done already
302 $self->parse() unless exists($self->{'_ontologies'});
303 # return next available ontology
304 return shift(@
{$self->{'_ontologies'}}) if exists($self->{'_ontologies'});
311 Usage : $files_to_parse = $parser->_flat_files();
312 Function: Get the array of ontology flat files that need to be parsed.
314 Note that this array will decrease in elements over the
315 parsing process. Therefore, it\'s value outside of this
316 module will be limited. Also, be careful not to alter the
317 array unless you know what you are doing.
319 Returns : a reference to an array of zero or more strings
327 $self->{_flat_files
} = [] unless exists($self->{_flat_files
});
328 return $self->{_flat_files
};
338 Usage : $obj->_defs_io($newval)
339 Function: Get/set the Bio::Root::IO instance representing the
340 definition file, if provided (see defs_file()).
342 Returns : value of _defs_io (a Bio::Root::IO object)
343 Args : on set, new value (a Bio::Root::IO object or undef, optional)
350 return $self->{'_defs_io'} = shift if @_;
351 return $self->{'_defs_io'};
356 $self->{'_ontologies'} = [] unless exists($self->{'_ontologies'});
357 foreach my $ont (@_) {
358 $self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
359 unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
360 push(@
{$self->{'_ontologies'}}, $ont);
364 # This simply delegates. See SimpleGOEngine.
366 my ( $self, $term, $ont ) = @_;
368 $term->ontology($ont) if $ont && (! $term->ontology);
369 $self->_ont_engine()->add_term( $term );
374 # This simply delegates. See SimpleGOEngine
375 sub _part_of_relationship
{
376 my ( $self, $term ) = @_;
378 return $self->_ont_engine()->part_of_relationship();
380 } # _part_of_relationship
384 # This simply delegates. See SimpleGOEngine
385 sub _is_a_relationship
{
386 my ( $self, $term ) = @_;
388 return $self->_ont_engine()->is_a_relationship();
390 } # _is_a_relationship
393 # This simply delegates. See SimpleGOEngine
394 sub _related_to_relationship
{
395 my ( $self, $term ) = @_;
397 return $self->_ont_engine()->related_to_relationship();
399 } # _is_a_relationship
402 # This simply delegates. See SimpleGOEngine
403 sub _add_relationship
{
404 my ( $self, $parent, $child, $type, $ont ) = @_;
406 # note the triple terminology (subject,predicate,object) corresponds to
407 # (child,type,parent)
408 $self->_ont_engine()->add_relationship( $child, $type, $parent, $ont );
411 } # _add_relationship
414 # This simply delegates. See SimpleGOEngine
416 my ( $self, $term ) = @_;
418 $term = $self->ontology_name() .'|'. $term
419 unless ref($term) || !$self->ontology_name();
420 return $self->_ont_engine()->has_term( $term );
424 # This simply delegates after prefixing the namespace name if it is just a
425 # base identifier. See SimpleGOEngine
431 unshift(@args, pop(@_)); # this actually does preserve the order
432 $args[0] = $self->ontology_name() .'|'. $args[0]
433 unless ref($args[0]) || !$self->ontology_name();
435 return $self->_ont_engine->get_terms(@args);
439 # This parses the relationships files
440 sub _parse_flat_file
{
445 my $prev_indent = -1;
449 my $indent_string = $self->indent_string;
452 while ( my $line = $self->_readline() ) {
453 if ( $line =~ /^[$indent_string]*[\|\-]/ ) { #this is not yet generalized
457 my ($current_term) = $line =~ /^[$indent_string]*(.*)/;
458 my $current_indent = $self->_count_indents( $line );
460 # remove extraneous delimiter characters at the end of the name if any
461 $current_term =~ s/[$indent_string]+$//;
462 # remove double quotes surrounding the entry, if any
463 $current_term =~ s/^\"(.*)\"$/$1/;
464 # also, the name might contain a synonym
465 my $syn = $current_term =~ s/\s+{([^}]+)}// ?
$1 : undef;
467 if ( ! $self->_has_term( $current_term ) ) {
468 my $term = $self->_create_ont_entry($current_term);
469 # add synonym(s) if any
470 $term->add_synonym(split(/[;,]\s*/,$syn)) if $syn;
472 $self->_add_term( $term, $ont );
474 #go on to the next term if a root node.
475 if($current_indent == 0) {
476 # add the virtual root as parent if there is one
477 if($self->_virtual_root()) {
478 $self->_add_relationship($self->_virtual_root(),
480 $self->_is_a_relationship(),
483 $prev_indent = $current_indent;
484 $prev_term = $current_term;
485 push @stack, $current_term;
490 # note: we are ensured to see the parent first in this type of file,
491 # so we never need to possibly insert the parent here
493 if ( $current_indent != $prev_indent ) {
494 if ( $current_indent == $prev_indent + 1 ) {
495 push( @stack, $prev_term );
496 } elsif ( $current_indent < $prev_indent ) {
497 my $n = $prev_indent - $current_indent;
498 for ( my $i = 0; $i < $n; ++$i ) {
502 $self->throw("format error: indentation level $current_indent "
503 ."is more than one higher than the previous "
504 ."level $prev_indent ('$current_term', "
505 ."file ".$self->file.")" );
509 $parent = $stack[-1];
511 if($parent ne $current_term) { #this prevents infinite recursion from a parent linking to itself
512 $self->_add_relationship($self->_get_terms($parent),
513 $self->_get_terms($current_term),
514 $self->_is_a_relationship(),
518 $prev_indent = $current_indent;
519 $prev_term = $current_term;
522 } # _parse_relationships_file
526 # Parses the 1st term id number out of line.
527 sub _get_first_termid
{
528 my ( $self, $line ) = @_;
530 if ( $line =~ /;\s*([A-Z]{1,8}:\d{7})/ ) {
534 $self->throw( "format error: no term id in line \"$line\"" );
537 } # _get_first_termid
539 # Counts the indents at the beginning of a line in the relationships files
541 my ( $self, $line ) = @_;
543 my $indent = $self->indent_string;
545 if ( $line =~ /^($indent+)/ ) {
546 return (length($1)/length($indent));
554 # Holds the GO engine to be parsed into
556 my ( $self, $value ) = @_;
558 if ( defined $value ) {
559 $self->{ "_ont_engine" } = $value;
562 return $self->{ "_ont_engine" };
566 # Used to create ontology terms.
567 # Arguments: name, id
568 sub _create_ont_entry
{
569 my ( $self, $name, $termid ) = @_;
571 my $term = $self->term_factory->create_object(-name
=> $name,
572 -identifier
=> $termid);
576 } # _create_ont_entry
579 # Holds whether first record or not
580 sub _not_first_record
{
581 my ( $self, $value ) = @_;
583 if ( defined $value ) {
584 unless ( $value == FALSE
|| $value == TRUE
) {
585 $self->throw( "Argument to method \"_not_first_record\" must be either ".TRUE
." or ".FALSE
);
587 $self->{ "_not_first_record" } = $value;
590 return $self->{ "_not_first_record" };
591 } # _not_first_record
595 # Holds whether done or not
597 my ( $self, $value ) = @_;
599 if ( defined $value ) {
600 unless ( $value == FALSE
|| $value == TRUE
) {
601 $self->throw( "Found [$value] where [" . TRUE
602 ." or " . FALSE
. "] expected" );
604 $self->{ "_done" } = $value;
607 return $self->{ "_done" };
613 my ( $self, $value ) = @_;
615 if ( defined $value ) {
616 $self->{ "_term" } = $value;
619 return $self->{ "_term" };
624 Title : indent_string
625 Usage : $obj->indent_string($newval)
628 Returns : value of indent_string (a scalar)
629 Args : on set, new value (a scalar or undef, optional)
636 return $self->{'indent_string'} = shift if @_;
637 return $self->{'indent_string'};
643 Usage : $obj->file_is_root($newval)
644 Function: Boolean indicating whether a virtual root term is to be
645 added, the name of which will be derived from the file
648 Enabling this allows one to parse multiple input files into the
649 same ontology and still have separately rooted.
652 Returns : value of file_is_root (a scalar)
653 Args : on set, new value (a scalar or undef, optional)
660 return $self->{'file_is_root'} = shift if @_;
661 return $self->{'file_is_root'};
666 Title : _virtual_root
667 Usage : $obj->_virtual_root($newval)
670 Returns : value of _virtual_root (a scalar)
671 Args : on set, new value (a scalar or undef, optional)
678 return $self->{'_virtual_root'} = shift if @_;
680 # don't return anything if not in file_is_root mode, or if we don't
681 # have a file to derive the root node from
682 return unless $self->file_is_root() && $self->file();
684 # construct it if we haven't done this before
685 if(! $self->{'_virtual_root'}) {
686 my ($rt,undef,undef) = fileparse
($self->file(), '\..*');
688 $rt = $self->_create_ont_entry($rt);
689 $self->_add_term($rt, $self->ontology_name());
690 $self->{'_virtual_root'} = $rt;
693 return $self->{'_virtual_root'};