comments, some memoization, some other minor code changes
[nltk_ontology_framework.git] / src / mjacob / ontologybuilder / framework.py
blobcf9c11279f291a99f2e2c5c59a98f9482b518707
1 # This Python file uses the following encoding: utf-8
2 '''
3 Created on Apr 29, 2011
5 @author: mjacob
6 '''
7 import os
8 from util.cached import Cacheable
9 class OntologyBuilderFramework(Cacheable):
10 """A framework for building domain ontologies, assuming that the framework for doing such is:
11 C{TERMS}: identify terms (from a corpus or such)
12 C{SYNONYMS}: identify synonyms amongst the terms
13 C{CONCEPTS}: identify concepts
14 C{CONCEPT_HIERARCHIES}: identify heirarchical relationships amongst concepts
15 C{RELATIONS}: identify other relationships amongst concepts
16 """
18 TERMS="terms"
19 SYNONYMS="synonyms"
20 CONCEPTS="concepts"
21 CONCEPT_HIERARCHIES="concept_hierarchies"
22 RELATIONS="relations"
24 def __init__(self, name, cachedir, initial_state={}, only_do=None, ignore_cache=None):
25 """
26 @type initial_state: C{dict}
27 @param initial_state: additional state available when processing with a framework instance
28 @type name: C{str}
29 @param name: the name of this framework instance (used debuggin)
30 @type cachedir: C{str}
31 @param cachedir: the base directory in which to cache the ontology generation process
32 """
33 Cacheable.__init__(self, os.path.join(cachedir, name), ignore_cache)
34 self.__state = initial_state
35 self.__name = name
36 self.__only_do = only_do
38 def state(self):
39 """
40 get the current state.
42 @rtype: C{dict}
43 """
44 return self.__state
46 def name(self):
47 """
48 get the name of the framework instance.
50 @rtype: C{str}
51 """
52 return self.__name
54 def _do_step(self, step):
55 """
56 returns C{True} if the framework instance is designed to perform step C{step}
58 @rtype: C{bool}
59 """
60 if self.__only_do and step not in self.__only_do:
61 return False
62 return True
64 def __get_initial_state(self, additional_state):
65 """
66 construct a state collection given the defaults for object,
67 and anything additional supplied for the specific run.
68 """
69 state = self.state().copy()
70 state.update(additional_state)
71 return state
73 def process(self, **additional_state):
74 """
75 iterate through predefined to construct an ontology.
77 @param only_do: if specified, only perform steps in this collection
78 @param ignore_cache: if specified, ignore any cached results from
79 steps specified in this collection. note that any new results
80 will still be saved to cache, possibly overwriting existing results.
82 @return the resulting state
83 """
85 state = self.__get_initial_state(additional_state)
87 for step in (OntologyBuilderFramework.TERMS,
88 OntologyBuilderFramework.SYNONYMS,
89 OntologyBuilderFramework.CONCEPTS,
90 OntologyBuilderFramework.CONCEPT_HIERARCHIES,
91 OntologyBuilderFramework.RELATIONS):
93 if self._do_step(step):
94 result = self.__getattribute__('_get_%s' % step)(**state)
96 if not result:
97 raise Exception("no result (%s) at step %s" % (result, step))
99 state[step] = result
100 print "found %s %s" % (len(result), step)
102 return state
104 class OntologyBuilderTerms(object):
105 """interface for building terms for an ontology"""
106 def _get_terms(self, **state):
107 pass
109 class OntologyBuilderSynonyms(object):
110 """interface for building synonyms (usually of terms) for an ontology"""
111 def _get_synonyms(self, **state):
112 pass
114 class OntologyBuilderConcepts(object):
115 """interface for constructing concepts for an ontology"""
116 def _get_concepts(self, **state):
117 pass
119 class OntologyBuilderConceptHierarchies(object):
120 """interafce for constructing hierarchies of concepts for an ontology"""
121 def _get_concept_hierarchies(self, **state):
122 pass
124 class OntologyBuilderRelations(object):
125 """interface for building relations between concepts in an ontology"""
126 def _get_relations(self, **state):
127 pass