src/junk.py

   1 import timeit
   2 def adj_f(middle, loc_h):
   3     return middle+.5 == loc_h or middle-.5 == loc_h
   4 def adj_f2(middle, loc_h):
   5     return middle-1 < loc_h < middle+1
   6 def adj_p(middle, loc_h):
   7     return middle == loc_h[0] or middle == loc_h[1]
   8
   9 print "float:"
  10 print timeit.Timer("junk.adj_f(1,0.5)","import junk").timeit(1000000)
  11 print "pair:"
  12 print timeit.Timer("junk.adj_p(1,[0,1])","import junk").timeit(1000000)
  13
  14 print "float2:"
  15 print timeit.Timer("junk.adj_f2(1,0.5)","import junk").timeit(1000000)
  16
  17 # todo: some more testing on the Brown corpus:
  18 #     # first five sentences of the Brown corpus:
  19 #     g_brown = harmonic.initialize([['AT', 'NP-TL', 'NN-TL', 'JJ-TL', 'NN-TL', 'VBD', 'NR', 'AT', 'NN', 'IN', 'NP$', 'JJ', 'NN', 'NN', 'VBD', '``', 'AT', 'NN', "''", 'CS', 'DTI', 'NNS', 'VBD', 'NN', '.'], ['AT', 'NN', 'RBR', 'VBD', 'IN', 'NN', 'NNS', 'CS', 'AT', 'NN-TL', 'JJ-TL', 'NN-TL', ',', 'WDT', 'HVD', 'JJ', 'NN', 'IN', 'AT', 'NN', ',', '``', 'VBZ', 'AT', 'NN', 'CC', 'NNS', 'IN', 'AT', 'NN-TL', 'IN-TL', 'NP-TL', "''", 'IN', 'AT', 'NN', 'IN', 'WDT', 'AT', 'NN', 'BEDZ', 'VBN', '.'], ['AT', 'NP', 'NN', 'NN', 'HVD', 'BEN', 'VBN', 'IN', 'NP-TL', 'JJ-TL', 'NN-TL', 'NN-TL', 'NP', 'NP', 'TO', 'VB', 'NNS', 'IN', 'JJ', '``', 'NNS', "''", 'IN', 'AT', 'JJ', 'NN', 'WDT', 'BEDZ', 'VBN', 'IN', 'NN-TL', 'NP', 'NP', 'NP', '.'], ['``', 'RB', 'AT', 'JJ', 'NN', 'IN', 'JJ', 'NNS', 'BEDZ', 'VBN', "''", ',', 'AT', 'NN', 'VBD', ',', '``', 'IN', 'AT', 'JJ', 'NN', 'IN', 'AT', 'NN', ',', 'AT', 'NN', 'IN', 'NNS', 'CC', 'AT', 'NN', 'IN', 'DT', 'NN', "''", '.'], ['AT', 'NN', 'VBD', 'PPS', 'DOD', 'VB', 'CS', 'AP', 'IN', 'NP$', 'NN', 'CC', 'NN', 'NNS', '``', 'BER', 'JJ', 'CC', 'JJ', 'CC', 'RB', 'JJ', "''", '.'], ['PPS', 'VBD', 'CS', 'NP', 'NNS', 'VB', '``', 'TO', 'HV', 'DTS', 'NNS', 'VBN', 'CC', 'VBN', 'IN', 'AT', 'NN', 'IN', 'VBG', 'CC', 'VBG', 'PPO', "''", '.'], ['AT', 'JJ', 'NN', 'VBD', 'IN', 'AT', 'NN', 'IN', 'AP', 'NNS', ',', 'IN', 'PPO', 'AT', 'NP', 'CC', 'NP-TL', 'NN-TL', 'VBG', 'NNS', 'WDT', 'PPS', 'VBD', '``', 'BER', 'QL', 'VBN', 'CC', 'VB', 'RB', 'VBN', 'NNS', 'WDT', 'VB', 'IN', 'AT', 'JJT', 'NN', 'IN', 'ABX', 'NNS', "''", '.'], ['NN-HL', 'VBN-HL'], ['WRB', ',', 'AT', 'NN', 'VBD', 'PPS', 'VBZ', '``', 'DTS', 'CD', 'NNS', 'MD', 'BE', 'VBN', 'TO', 'VB', 'JJR', 'NN', 'CC', 'VB', 'AT', 'NN', 'IN', 'NN', "''", '.'], ['AT', 'NN-TL', 'VBG-TL', 'NN-TL', ',', 'AT', 'NN', 'VBD', ',', '``', 'BEZ', 'VBG', 'IN', 'VBN', 'JJ', 'NNS', 'CS', 'AT', 'NN', 'IN', 'NN', 'NNS', 'NNS', "''", '.']])
  20 #     # 36:'AT' in g_brown.numtag, 40:'NP-TL'
  21
  22 #     io.DEBUG = []
  23 #     test_brown = inner(0,2, (LRBAR,36), g_brown, ['AT', 'NP-TL' ,'NN-TL','JJ-TL'], {})
  24 #     if 1 in io.DEBUG:
  25 #         for r in  g_brown.rules((2,36)) + g_brown.rules((1,36)) + g_brown.rules((0,36)):
  26 #             L = r.L()
  27 #             R = r.R()
  28 #             if head(L) in [36,40,-2] and head(R) in [36,40,-2]:
  29 #                 print r
  30 #     print "Brown-test gives: %.8f" % test_brown
  31
  32
  33
  34     # this will give the tag sequences of all the 6218 Brown corpus
  35     # sentences of length < 7:
  36     # [[tag for (w, tag) in sent]
  37     #  for sent in nltk.corpus.brown.tagged_sents() if len(sent) < 7]
  38
  39
  40
  41 # ##############################
  42 # #          from dmv          #
  43 # ##############################
  44 # def prune2(s,t,LHS,loc_h, ichart,tree):
  45 #     '''TODO '''
  46 #     newichart = {}
  47 #     def prune2_helper(s,t,LHS,loc_h):
  48 #         newichart[(s,t,LHS,loc_h)] = ichart[(s,t,LHS,loc_h)]
  49 #         for d in tree[s,t,LHS,loc_h]:
  50 #             prune2_helper(d[0],d[1],d[2],d[3])
  51
  52 #     prune2_helper(s,t,LHS,loc_h)
  53 #     return newichart
  54
  55 # def prune(s,t,LHS, g, sent_nums, ichart):
  56 #     '''Removes unused subtrees with positive probability from the
  57 #     ichart.
  58
  59 #     Unused := any and all mothers (or grandmothers etc.) have
  60 #     probability 0.0'''
  61 #     def prune_helper(keep,s,t,LHS,loc_h):
  62 #         keep = keep and ichart[(s,t,LHS,loc_h)] > 0.0
  63 #         for rule in g.sent_rules(LHS, sent_nums):
  64 #             L = rule.L()
  65 #             R = rule.R()
  66 #             if R==STOP:
  67 #                 if (s,t,L,loc_h) in ichart:
  68 #                     prune_helper(keep, s,t, L,loc_h)
  69 #             elif L==STOP:
  70 #                 if (s,t,R,loc_h) in ichart:
  71 #                     prune_helper(keep, s,t, R,loc_h)
  72 #             else:
  73 #                 for r in xrange(s,t):
  74 #                     for loc_L in locs(head(L), sent_nums, s, r):
  75 #                         if (s,r,rule.L(),loc_L) in ichart:
  76 #                             prune_helper(keep, s ,r,rule.L(),loc_L)
  77 #                     for loc_R in locs(head(R), sent_nums, r+1, t):
  78 #                         if (r+1,t,rule.R(),loc_R) in ichart:
  79 #                             prune_helper(keep,r+1,t,rule.R(),loc_R)
  80
  81 #         if not (s,t,LHS,loc_h) in keepichart:
  82 #             keepichart[(s,t,LHS,loc_h)] = keep
  83 #         else: # eg. if previously some parent rule had 0.0, but then a
  84 #               # later rule said "No, I've got a use for this subtree"
  85 #             keepichart[(s,t,LHS,loc_h)] += keep
  86 #         return None
  87
  88 #     keepichart = {}
  89 #     for loc_h,h in enumerate(sent_nums):
  90 #         keep = ichart[(s,t,LHS,loc_h)] > 0.0
  91 #         keepichart[(s,t,LHS,loc_h)] = keep
  92 #         prune_helper(keep,s,t,LHS,loc_h)
  93
  94 #     for (s,t,LHS,loc_h),v in keepichart.iteritems():
  95 #         if not v:
  96 #             if  'PRUNE' in io.DEBUG:
  97 #                 print "popping s:%d t:%d LHS:%s loc_h:%d" % (s,t,LHS,loc_h)
  98 #             ichart.pop((s,t,LHS,loc_h))
  99 # # end prune(s,t,LHS,loc_h, g, sent_nums, ichart)
 100
 101 # def prune_sent(g, sent_nums, ichart):
 102 #     return prune(0, len(sent_nums)-1, ROOT, g, sent_nums, ichart)
 103
 104
 105
 106 # def P_STOP(STOP, h, dir, adj, g, corpus):
 107 #     P_STOP_num = 0
 108 #     P_STOP_den = 0
 109 #     h_tag = g.numtag(h)
 110 #     for sent in corpus:
 111 #         chart = {}
 112 #         locs_h = locs(h_tag, sent)
 113 #         io.debug( "locs_h:%s, sent:%s"%(locs_h,sent) , 'PSTOP')
 114 #         for loc_h in locs_h:
 115 #             inner(0, len(sent)-1, ROOT, loc_h, g, sent, chart)
 116 #             for s in range(loc_h): # s<loc(h), range gives strictly less
 117 #                 for t in range(loc_h, len(sent)):
 118 #                     io.debug( "s:%s t:%s loc:%d"%(s,t,loc_h)  , 'PSTOP')
 119 #                     if (s, t, (LRBAR,h), loc_h) in chart:
 120 #                         io.debug( "num+=%s"%chart[(s, t, (LRBAR,h), loc_h)]  , 'PSTOP')
 121 #                         P_STOP_num += chart[(s, t, (LRBAR,h), loc_h)]
 122 #                     if (s, t, (RBAR,h), loc_h) in chart:
 123 #                         io.debug( "den+=%s"%chart[(s, t, (RBAR,h), loc_h)]  , 'PSTOP')
 124 #                         P_STOP_den += chart[(s, t, (RBAR,h), loc_h)]
 125
 126 #     io.debug( "num/den: %s / %s"%(P_STOP_num, P_STOP_den) , 'PSTOP')
 127 #     if P_STOP_den > 0.0:
 128 #         io.debug( "num/den: %s / %s = %s"%(P_STOP_num, P_STOP_den,P_STOP_num / P_STOP_den) , 'PSTOP')
 129 #         return P_STOP_num / P_STOP_den # upside down in article
 130 #     else:
 131 #         return 0.0
 132
 133
 134 # def DMV(sent, g):
 135 #     '''Here it seems like they store rule information on a per-head (per
 136 #     direction) basis, in deps_D(h, dir) which gives us a list. '''
 137 #     def P_h(h):
 138 #         P_h = 1 # ?
 139 #         for dir in ['l', 'r']:
 140 #             for a in deps(h, dir):
 141 #                 # D(a)??
 142 #                 P_h *= \
 143 #                     P_STOP (0, h, dir, adj) * \
 144 #                     P_CHOOSE (a, h, dir) * \
 145 #                     P_h(D(a)) * \
 146 #                 P_STOP (STOP | h, dir, adj)
 147 #         return P_h
 148 #     return P_h(root(sent))
 149
 150
 151 # if __name__ == "__main__": # from dmv.py
 152 #     # these are not Real rules, just testing the classes. todo: make
 153 #     # a rule-set to test inner() on.
 154 #     b = {}
 155 #     s   = DMV_Rule((LRBAR,0), (NOBAR,1),(NOBAR,2), 1.0, 0.0) # s->np vp
 156 #     np  = DMV_Rule((NOBAR,1), (NOBAR,3),(NOBAR,4), 0.3, 0.0) # np->n p
 157 #     b[(NOBAR,1), 'n'] = 0.7 # np->'n'
 158 #     b[(NOBAR,3), 'n'] = 1.0 # n->'n'
 159 #     b[(NOBAR,4), 'p'] = 1.0 # p->'p'
 160 #     vp  = DMV_Rule((NOBAR,2), (NOBAR,5),(NOBAR,1), 0.1, 0.0) # vp->v np (two parses use this rule)
 161 #     vp2 = DMV_Rule((NOBAR,2), (NOBAR,2),(NOBAR,4), 0.9, 0.0) # vp->vp p
 162 #     b[(NOBAR,5), 'v'] = 1.0 # v->'v'
 163
 164 #     g = DMV_Grammar([s,np,vp,vp2], b, "todo","todo", "todo")
 165
 166 #     io.DEBUG = 0
 167 #     test1 = io.inner(0,0, (NOBAR,1), g, ['n'], {})
 168 #     if test1[0] != 0.7:
 169 #         print "should be 0.70 : %.2f" % test1[0]
 170 #         print ""
 171
 172 #     test2 = io.inner(0,2, (NOBAR,2), g, ['v','n','p'], {})
 173 #     if "%.2f" % test2[0] != "0.09": # 0.092999 etc, don't care about that
 174 #         print "should be 0.09 if the io.py-test is right : %.2f" % test2[0]
 175 #     # the following should manage to look stuff up in the chart:
 176 #     test2 = io.inner(0,2, (NOBAR,2), g, ['v','n','p'], test2[1])
 177 #     if "%.2f" % test2[0] != "0.09":
 178 #         print "should be 0.09 if the io.py-test is right : %.2f" % test2[0]