1 from common_dmv
import ROOTNUM
2 from wsjdep
import WSJDepCorpusReader
3 from loc_h_dmv
import DMV_Grammar
, reestimate
, DEBUG
, mpp
4 from loc_h_harmonic
import initialize
7 def evaluate(g
, tagged_and_parsed_sents
):
9 tagged_and_parsed_sents is a list of pairs:
10 (tagonly_sent, parsed_sent)
12 R_num += 1 if pair from parsed is in mpp
13 R_den += 1 per pair from parsed
15 P_num += 1 if pair from mpp is in parsed
16 P_den += 1 per pair from mpp
18 F1 = (2 * P * R)/(P + R), harmonisk snitt av P og R
25 for sent
, parse
in tagged_and_parsed_sents
:
26 mpp_sent
= mpp(g
, sent
)
29 if pair
in mpp_sent
: recall_num
+= 1
31 if pair
[0][0] == ROOTNUM
: continue # todo: add ROOT to parses?
33 if pair
in parse
: precision_num
+= 1
35 recall
= recall_num
/ recall_den
36 precision
= precision_num
/ precision_den
37 F1
= (2 * recall
* precision
) / (precision
+ recall
)
39 return "Recall: %.4f\tPrecision: %.4f\tF1: %.4f"%(recall
, precision
, F1
)
42 if __name__
== "__main__":
44 reader
= WSJDepCorpusReader(None)
46 tagonlys
= reader
.tagonly_sents()
47 g
= initialize(tagonlys
[0:500])
51 # f = reestimate(g, reader.tagonly_sents())
54 print evaluate(g
, tags_and_parses
[0:500])