1 # This Python file uses the following encoding: utf-8
8 from itertools
import chain
, combinations
9 from framework
import OntologyBuilderConcepts
, OntologyBuilderFramework
11 wn
= nltk
.corpus
.wordnet
15 HYPERNYMS
= nltk
.corpus
.reader
.wordnet
.Synset
.hypernyms
16 INSTANCE_HYPERNYMS
= nltk
.corpus
.reader
.wordnet
.Synset
.instance_hypernyms
17 HYPONYMS
= nltk
.corpus
.reader
.wordnet
.Synset
.hyponyms
18 INSTANCE_HYPONYMS
= nltk
.corpus
.reader
.wordnet
.Synset
.instance_hyponyms
19 MEMBER_HOLONYMS
= nltk
.corpus
.reader
.wordnet
.Synset
.member_holonyms
20 SUBSTANCE_HOLOYNMS
= nltk
.corpus
.reader
.wordnet
.Synset
.substance_holonyms
21 PART_HOLONYMS
= nltk
.corpus
.reader
.wordnet
.Synset
.part_holonyms
22 MEMBER_MERONYMS
= nltk
.corpus
.reader
.wordnet
.Synset
.member_meronyms
23 SUBSTANCE_MERONYMS
= nltk
.corpus
.reader
.wordnet
.Synset
.substance_meronyms
24 PART_MERONYMS
= nltk
.corpus
.reader
.wordnet
.Synset
.part_meronyms
25 ATTRIBUTES
= nltk
.corpus
.reader
.wordnet
.Synset
.attributes
26 ENTAILMENTS
= nltk
.corpus
.reader
.wordnet
.Synset
.entailments
27 CAUSES
= nltk
.corpus
.reader
.wordnet
.Synset
.causes
28 ALSO_SEES
= nltk
.corpus
.reader
.wordnet
.Synset
.also_sees
29 VERB_GROUPS
= nltk
.corpus
.reader
.wordnet
.Synset
.verb_groups
30 SIMILAR_TOS
= nltk
.corpus
.reader
.wordnet
.Synset
.similar_tos
48 class SimpleConcepts(OntologyBuilderConcepts
):
49 def _get_concepts(self
, **state
):
50 synonyms
= state
[OntologyBuilderFramework
.SYNONYMS
]
53 for synonym
in synonyms
:
54 #nets = self.__get_subterm_nets(synonym)
56 concept
= [self
.__get
_initial
_synset
(synonym
)]
60 for k
in xrange(1, len(synonym
)):
61 best_synset
= self
.__get
_best
_synset
(synonym
, k
)
67 concept
.append(best_synset
)
69 if not no_synset
and concept
in concepts
:
70 concepts
[concept
].extend(synonyms
[synonym
])
74 def __get_initial_synset(self
, synonym
):
75 # FIXME supposedly chosen manually.
78 def __get_best_synset(self
, synonym
, k
):
79 subterm_synsets
= synonym
[k
]
81 for j
in xrange(len(subterm_synsets
)):
82 subterm_synset
= subterm_synsets
[j
]
83 score
= self
.__get
_score
(subterm_synset
, synonym
, k
)
85 index
= self
.__get
_best
_score
_index
(scores
)
89 return None # we can't do anything w/ this
91 def __get_score(self
, synset
, synonym
, k
):
92 for i
in xrange(k
-1, -1, -1):
93 score
, score_total
= self
.__get
_specific
_score
(self
, synset
, synonym
, k
, i
)
97 return score
# it's all 0s, we'll deal w/ that later
99 def __get_specific_score(self
, synset
, synonym
, k
, i
):
102 other_synsets
= synonym
[i
]
103 for other_synset
in other_synsets
:
104 intersection_score
= self
.__get
_intersection
_score
(other_synset
, synset
)
105 score
.append(intersection_score
)
106 total_score
+= intersection_score
107 return score
, total_score
109 def __get_intersection_score(self
, other_synset
, synset
):
110 S1
= wn
.synset(other_synset
)
111 S2
= wn
.synset(synset
)
113 score
+= self
.__get
_colour
_score
(S1
, S2
)
114 score
+= self
.__get
_domain
_score
(S1
, S2
)
115 score
+= self
.__get
_synonymy
_score
(S1
, S2
)
116 score
+= self
.__get
_hypernymy
_meronymy
_path
_score
(S1
, S2
)
117 score
+= self
.__get
_hyponymy
_holonymy
_path
_score
(S1
, S2
)
118 score
+= self
.__get
_parallelism
_score
(S1
, S2
)
119 score
+= self
.__get
_gloss
_score
(S1
, S2
)
120 score
+= self
.__get
_topic
_score
(S1
, S2
)
121 score
+= self
.__get
_gloss
_hyperonymy
_meronymy
_path
_score
(S1
, S2
)
122 score
+= self
.__get
_gloss
_parallelism
_score
(S1
, S2
)
123 score
+= self
.__get
_gloss
_gloss
_score
(S1
, S2
)
124 score
+= self
.__get
_hyperonymy
_meronyomy
_gloss
_path
_score
(S1
, S2
)
125 score
+= self
.__get
_parallelism
_gloss
_score
(S1
, S2
)
128 def __get_colour_score(self
, S1
, S2
):
129 CHROMATIC
= wn
.synset('cromatic.a.3')
130 PHYSICAL_ENTITY
= wn
.synset('physical_entity.n.01')
131 COLORS
= frozenset(CHROMATIC
.similar_tos())
132 if S1
in COLORS
and PHYSICAL_ENTITY
in chain(*S2
.hypernym_paths()):
137 def __get_domain_score(self
, S1
, S2
):
139 DOMAIN_LABEL
= re
.compile('\(of ([^\)]*)\)')
140 m
= DOMAIN_LABEL
.match(S1
.definition
)
142 domain_label_synsets
= wn
.synsets(m
.group(1).replace(' ', '_'), pos
=wn
.NOUN
)
144 for domain_label_synset
in domain_label_synsets
:
145 if S2
== domain_label_synset
or S2
in domain_label_synset
.hyponyms():
150 def __get_synonymy_score(self
, S1
, S2
):
151 if S1
== S2
or S2
in chain(*[(b
.synset
for b
in a
.pertainyms()) for a
in S1
.lemmas
]):
156 def __path(self
, synset
, max_depth
=3, *relations
):
162 connections
= chain(*[nym(synset
) for nym
in relations
])
166 return chain(connections
,
167 chain(*[nyms(connection
, max_depth
-1) for connection
in connections
]))
170 def __get_hypernymy_meronymy_path_score(self
, S1
, S2
):
172 def __get_hyponymy_holonymy_path_score(self
, S1
, S2
):
174 def __get_parallelism_score(self
, S1
, S2
):
176 def __get_gloss_score(self
, S1
, S2
):
178 def __get_topic_score(self
, S1
, S2
):
180 def __get_gloss_hyperonymy_meronymy_path_score(self
, S1
, S2
):
182 def __get_gloss_parallelism_score(self
, S1
, S2
):
184 def __get_gloss_gloss_score(self
, S1
, S2
):
186 def __get_hyperonymy_meronyomy_gloss_path_score(self
, S1
, S2
):
188 def __get_parallelism_gloss_score(self
, S1
, S2
):
191 def __get_best_score_index(self
, scores
):
192 best_score_index
= None
193 for i
in xrange(len(scores
)):
195 if not best_score_index
:
198 elif self
.__is
_better
_score
(scores
[i
], scores
[best_score_index
]):
200 return best_score_index
202 def __is_better_score(self
, score_a
, score_b
):
203 return sorted(score_a
) > sorted(score_b
)
205 def random_code_hole(self
, nets
): # FIXME delete
206 for i
in xrange(1, len(nets
)):
207 subterm_nets_a
= nets
[i
]
209 for net_a
in subterm_nets_a
:
210 for j
in xrange(i
-1, -1, -1):
211 subterm_nets_b
= nets
[j
]
214 for net_b
in subterm_nets_b
:
215 net_i
= net_a
.intersection(net_b
)
216 score
= self
.__score
(net_i
)
223 def __get_subterm_nets(self
, synonym
):
224 return [[frozenset(self
.__nyms
(wn
.synset(synset
), 3))
225 for synset
in subterm_synonym
]
226 for subterm_synonym
in synonym
]
228 def __nyms(self
, synset
, n
=1):
229 connections
= chain(synset
.hypernyms(),
230 synset
.instance_hypernyms(),
232 synset
.instance_hyponyms(),
233 synset
.member_holonyms(),
234 synset
.substance_holonyms(),
235 synset
.part_holonyms(),
236 synset
.member_meronyms(),
237 synset
.substance_meronyms(),
238 synset
.part_meronyms(),
240 synset
.entailments(),
243 synset
.verb_groups(),
244 synset
.similar_tos())
248 return chain(connections
,
249 chain(*[self
.__nyms
(connection
, n
-1) for connection
in connections
]))
254 from itertools
import chain
255 def nyms(synset
, n
=1):
256 connections
= chain(synset
.hypernyms(),
257 synset
.instance_hypernyms(),
259 synset
.instance_hyponyms(),
260 synset
.member_holonyms(),
261 synset
.substance_holonyms(),
262 synset
.part_holonyms(),
263 synset
.member_meronyms(),
264 synset
.substance_meronyms(),
265 synset
.part_meronyms(),
267 synset
.entailments(),
270 synset
.verb_groups(),
271 synset
.similar_tos())
275 return chain(connections
,
276 chain(*[nyms(connection
, n
-1) for connection
in connections
]))
279 for collection
in (synset
.hypernyms(),
280 synset
.instance_hypernyms(),
282 synset
.instance_hyponyms(),
283 synset
.member_holonyms(),
284 synset
.substance_holonyms(),
285 synset
.part_holonyms(),
286 synset
.member_meronyms(),
287 synset
.substance_meronyms(),
288 synset
.part_meronyms(),
290 synset
.entailments(),
293 synset
.verb_groups(),
294 synset
.similar_tos()):