Bio::Tools::CodonTable::is_start_codon: check in case of ambiguous codons (#266)
[bioperl-live.git] / lib / Bio / OntologyIO.pm
blobaeb2a62d0455ac8cf38135368eaef3efd5c90252
2 # BioPerl module for Bio::OntologyIO
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Hilmar Lapp <hlapp at gmx.net>
8 # Copyright Hilmar Lapp
10 # You may distribute this module under the same terms as perl itself
13 # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
14 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
16 # You may distribute this module under the same terms as perl itself.
17 # Refer to the Perl Artistic License (see the license accompanying this
18 # software package, or see http://www.perl.com/language/misc/Artistic.html)
19 # for the terms under which you may use, modify, and redistribute this module.
21 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
22 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
23 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26 # POD documentation - main docs before the code
28 =head1 NAME
30 Bio::OntologyIO - Parser factory for Ontology formats
32 =head1 SYNOPSIS
34 use Bio::OntologyIO;
36 my $parser = Bio::OntologyIO->new(-format => "go",
37 -file=> $file);
39 while(my $ont = $parser->next_ontology()) {
40 print "read ontology ",$ont->name()," with ",
41 scalar($ont->get_root_terms)," root terms, and ",
42 scalar($ont->get_leaf_terms)," leaf terms\n";
45 =head1 DESCRIPTION
47 This is the parser factory for different ontology sources and
48 formats. Conceptually, it is very similar to L<Bio::SeqIO>, but the
49 difference is that the chunk of data returned as an object is an
50 entire ontology.
52 =head1 FEEDBACK
54 =head2 Mailing Lists
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to
58 the Bioperl mailing list. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
63 =head2 Support
65 Please direct usage questions or support issues to the mailing list:
67 I<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
74 =head2 Reporting Bugs
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 of the bugs and their resolution. Bug reports can be submitted via
78 the web:
80 https://github.com/bioperl/bioperl-live/issues
82 =head1 AUTHOR - Hilmar Lapp
84 Email hlapp at gmx.net
86 =head1 APPENDIX
88 The rest of the documentation details each of the object methods.
89 Internal methods are usually preceded with a _
91 =cut
94 # Let the code begin...
97 package Bio::OntologyIO;
99 use strict;
101 # Object preamble - inherits from Bio::Root::Root
104 use base qw(Bio::Root::Root Bio::Root::IO);
107 # Maps from format name to driver suitable for the format.
109 my %format_driver_map = (
110 "go" => "goflat",
111 "so" => "soflat",
112 "interpro" => "InterProParser",
113 "interprosax" => "Handlers::InterPro_BioSQL_Handler",
114 "evoc" => "simplehierarchy",
115 "obo" => "obo"
118 =head2 new
120 Title : new
121 Usage : my $parser = Bio::OntologyIO->new(-format => 'go', @args);
122 Function: Returns a stream of ontologies opened on the specified input
123 for the specified format.
124 Returns : An ontology parser (an instance of Bio::OntologyIO) initialized
125 for the specified format.
126 Args : Named parameters. Common parameters are
128 -format - the format of the input; the following are
129 presently supported:
130 goflat: DAG-Edit Gene Ontology flat files
131 go : synonymous to goflat
132 soflat: DAG-Edit Sequence Ontology flat files
133 so : synonymous to soflat
134 simplehierarchy: text format with one term per line
135 and indentation giving the hierarchy
136 evoc : synonymous to simplehierarchy
137 interpro: InterPro XML
138 interprosax: InterPro XML - this is actually not a
139 Bio::OntologyIO compliant parser; instead it
140 persists terms as they are encountered.
141 L<Bio::OntologyIO::Handlers::InterPro_BioSQL_Handler>
142 obo : OBO format style from Gene Ontology Consortium
143 -file - the file holding the data
144 -fh - the stream providing the data (-file and -fh are
145 mutually exclusive)
146 -ontology_name - the name of the ontology
147 -engine - the L<Bio::Ontology::OntologyEngineI> object
148 to be reused (will be created otherwise); note
149 that every L<Bio::Ontology::OntologyI> will
150 qualify as well since that one inherits from the
151 former.
152 -term_factory - the ontology term factory to use. Provide a
153 value only if you know what you are doing.
155 DAG-Edit flat file parsers will usually also accept the
156 following parameters.
158 -defs_file - the name of the file holding the term
159 definitions
160 -files - an array ref holding the file names (for GO,
161 there will usually be 3 files: component.ontology,
162 function.ontology, process.ontology)
164 Other parameters are specific to the parsers.
166 =cut
168 sub new {
169 my ($caller,@args) = @_;
170 my $class = ref($caller) || $caller;
171 # or do we want to call SUPER on an object if $caller is an
172 # object?
173 if( $class =~ /Bio::OntologyIO::(\S+)/ ) {
174 my ($self) = $class->SUPER::new(@args);
175 $self->_initialize(@args);
176 return $self;
177 } else {
178 my %param = @args;
179 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
180 my $format = $class->_map_format($param{'-format'});
182 # normalize capitalization
183 return unless( $class->_load_format_module($format) );
184 return "Bio::OntologyIO::$format"->new(@args);
190 =head2 format
192 Title : format
193 Usage : $format = $parser->format()
194 Function: Get the ontology format
195 Returns : ontology format
196 Args : none
198 =cut
200 # format() method inherited from Bio::Root::IO
203 sub _initialize {
204 my($self, @args) = @_;
206 # initialize factories etc
207 my ($eng,$fact,$ontname) =
208 $self->_rearrange([qw(TERM_FACTORY)
209 ], @args);
210 # term object factory
211 $self->term_factory($fact) if $fact;
213 # initialize the Bio::Root::IO part
214 $self->_initialize_io(@args);
217 =head2 next_ontology
219 Title : next_ontology
220 Usage : $ont = $stream->next_ontology()
221 Function: Reads the next ontology object from the stream and returns it.
222 Returns : a L<Bio::Ontology::OntologyI> compliant object, or undef at the
223 end of the stream
224 Args : none
227 =cut
229 sub next_ontology {
230 shift->throw_not_implemented();
233 =head2 term_factory
235 Title : term_factory
236 Usage : $obj->term_factory($newval)
237 Function: Get/set the ontology term factory to use.
239 As a user of this module it is not necessary to call this
240 method as there will be default. In order to change the
241 default, the easiest way is to instantiate
242 L<Bio::Ontology::TermFactory> with the proper -type
243 argument. Most if not all parsers will actually use this
244 very implementation, so even easier than the aforementioned
245 way is to simply call
246 $ontio->term_factory->type("Bio::Ontology::MyTerm").
248 Example :
249 Returns : value of term_factory (a Bio::Factory::ObjectFactoryI object)
250 Args : on set, new value (a Bio::Factory::ObjectFactoryI object, optional)
253 =cut
255 sub term_factory{
256 my $self = shift;
258 return $self->{'term_factory'} = shift if @_;
259 return $self->{'term_factory'};
262 =head1 Private Methods
264 Some of these are actually 'protected' in OO speak, which means you
265 may or will want to utilize them in a derived ontology parser, but
266 you should not call them from outside.
268 =cut
270 =head2 _load_format_module
272 Title : _load_format_module
273 Usage : *INTERNAL OntologyIO stuff*
274 Function: Loads up (like use) a module at run time on demand
275 Example :
276 Returns :
277 Args :
279 =cut
281 sub _load_format_module {
282 my ($self, $format) = @_;
283 my $module = "Bio::OntologyIO::" . $format;
284 my $ok;
286 eval {
287 $ok = $self->_load_module($module);
289 if ( $@ ) {
290 print STDERR <<END;
291 $self: $format cannot be found
292 Exception $@
293 For more information about the OntologyIO system please see the docs.
294 This includes ways of checking for formats at compile time, not run time
297 return $ok;
300 sub DESTROY {
301 my $self = shift;
303 $self->close();
306 sub _map_format {
307 my $self = shift;
308 my $format = shift;
309 my $mod;
311 if($format) {
312 $mod = $format_driver_map{lc($format)};
313 $mod = lc($format) unless $mod;
314 } else {
315 $self->throw("unable to guess ontology format, specify -format");
317 return $mod;
320 sub unescape {
321 my( $self, $ref ) = @_;
322 $ref =~ s/&lt\\;/\</g;
323 $ref =~ s/&gt\\;/\>/g;
324 $ref =~ s/&pct\\;/\%/g;
325 $ref =~ s/\\n/\n/g;
326 $ref =~ s/\\t/\t/g;
327 return $ref;