loc_h_dmv IO and reest done, need to do main.py:def evaluate
[dmvccm.git] / src / main.py
blob7092a07f70a915cb04af58d2da926c2fd187fbf8
1 from common_dmv import ROOTNUM
2 from wsjdep import WSJDepCorpusReader
3 from loc_h_dmv import DMV_Grammar, reestimate, DEBUG, mpp
4 from loc_h_harmonic import initialize
6 # todo:
7 def evaluate(g, tagged_and_parsed_sents):
8 '''
9 tagged_and_parsed_sents is a list of pairs:
10 (tagonly_sent, parsed_sent)
12 R_num += 1 if pair from parsed is in mpp
13 R_den += 1 per pair from parsed
15 P_num += 1 if pair from mpp is in parsed
16 P_den += 1 per pair from mpp
18 F1 = (2 * P * R)/(P + R), harmonisk snitt av P og R
19 '''
20 recall_num = 0
21 recall_den = 0
22 precision_num = 0
23 precision_den = 0
25 for sent, parse in tagged_and_parsed_sents:
26 mpp_sent = mpp(g, sent)
27 for pair in parse:
28 recall_den += 1
29 if pair in mpp_sent: recall_num += 1
30 for pair in mpp_sent:
31 if pair[0][0] == ROOTNUM: continue # todo: add ROOT to parses?
32 precision_den += 1
33 if pair in parse: precision_num += 1
35 recall = recall_num / recall_den
36 precision = precision_num / precision_den
37 F1 = (2 * recall * precision) / (precision + recall)
39 return "Recall: %.4f\tPrecision: %.4f\tF1: %.4f"%(recall, precision, F1)
42 if __name__ == "__main__":
43 print "main.py:"
44 reader = WSJDepCorpusReader(None)
46 tagonlys = reader.tagonly_sents()
47 g = initialize(tagonlys[0:500])
48 print g
50 # DEBUG.add('REEST')
51 # f = reestimate(g, reader.tagonly_sents())
52 # print g
54 print evaluate(g, tags_and_parses[0:500])
56 print "main.py: done"