1 % Created 2008-06-13 Fri 17:05
3 % todo: fix stop and attachment formulas so they divide before summing
5 \documentclass[11pt,
a4paper]{article
}
6 \usepackage[utf8
]{inputenc}
7 \usepackage[T1]{fontenc}
21 \avmoptions{sorted,active
}
23 \avmsortfont{\scriptsize\it}
25 \usepackage{array
} % for a tabular with math in it
28 \author{Kevin Brubeck Unhammer
}
35 This is an attempt at fleshing out the details of the inside-outside
36 algorithm
\citep{lari-csl90
} applied to the DMV model of
41 \newcommand{\LOC}[1]{\textbf{#1}}
42 \newcommand{\GOR}[1]{\overrightarrow{#1}}
43 \newcommand{\RGOL}[1]{\overleftarrow{\overrightarrow{#1}}}
44 \newcommand{\SEAL}[1]{\overline{#1}}
45 \newcommand{\LGOR}[1]{\overrightarrow{\overleftarrow{#1}}}
46 \newcommand{\GOL}[1]{\overleftarrow{#1}}
47 \newcommand{\LN}[1]{\underleftarrow{#1}}
48 \newcommand{\RN}[1]{\underrightarrow{#1}}
49 \newcommand{\XI}{\lessdot}
50 \newcommand{\XJ}{\gtrdot}
51 \newcommand{\SMTR}[1]{\dddot{#1}}
52 \newcommand{\SDTR}[1]{\ddot{#1}}
54 \section{Note on notation
}
55 $i, j, k$ are sentence positions (between words), where $i$ and $j$
56 are always the start and end, respectively, for what we're calculating
57 ($k$ is between $i$ and $j$ for $P_
{INSIDE
}$, to their right or left
58 for $P_
{OUTSIDE
}$). $s
\in S$ are sentences in the corpus. $
\LOC{w
}$
59 is a word token (actually POS-token) of type $w$ at a certain sentence
60 location. If $
\LOC{w
}$ is between $i$ and $i+
1$, $loc(
\LOC{w
})=i$
61 following
\citet{klein-thesis
}, meaning $i$ is adjacent to $
\LOC{w
}$
62 on the left, while $j=loc(
\LOC{w
})+
1$ means that $j$ is adjacent to
63 $
\LOC{w
}$ on the right. To simplify, $loc_l(
\LOC{w
}):=loc(
\LOC{w
})$ and
64 $loc_r(
\LOC{w
}):=loc(
\LOC{w
})+
1$. We write $
\LOC{h
}$ if this is a head
65 in the rule being used, $
\LOC{a
}$ if it is an attached argument.
67 Notational differences between the thesis
\citep{klein-thesis
} and the
73 $w
\urcorner$ & $
\RGOL{w
}$ \\
74 $
\ulcorner{w
}\urcorner$ & $
\SEAL{w
}$ \\
77 I use $
\SMTR{w
}$ (or $
\SDTR{w
}$) to signify one of either $w,
\GOR{w
},
78 \RGOL{w
},
\LGOR{w
},
\GOL{w
}$ or $
\SEAL{w
}$
\footnote{This means that
79 $
\SMTR{\LOC{w
}}$ is the triplet of the actual POS-tag, its sentence
80 location as a token, and the ``level of seals''.
}.
83 \section{Inside probabilities
}
84 $P_
{INSIDE
}$ is defined in
\citet[pp.~
106-
108]{klein-thesis
}, the only
85 thing we need to add is that for right attachments,
86 $i
\leq loc_l(w)<k
\leq loc_l(
\LOC{a
})<j$ while for left attachments,
87 $i
\leq loc_l(
\LOC{a
})<k
\leq loc_l(w)<j$.
90 \
[ \forall{w
}[P_
{ORDER
}(right
\text{-
}first|w)=
1.0] \
] since the DMV implementation
91 is not yet generalized to both directions.)
97 \subsection{Sentence probability
}
99 $P_s$ is the sentence probability, based on
100 \citet[p.~
38]{lari-csl90
}. Since the ROOT rules are different from the
101 rest, we sum them explicitly in this definition:
103 P_s =
\sum_{\LOC{w
} \in s
} P_
{ROOT
}(
\LOC{w
}) P_
{INSIDE
}(
\SEAL{\LOC{w
}},
0, len(s))
106 \section{Outside probabilities
}
109 P_
{OUTSIDE_s
}(ROOT, i, j) =
\begin{cases
}
110 1.0 &
\text{ if $i =
0$ and $j = len(s)$,
}\\
111 0.0 &
\text{ otherwise
}
115 For $P_
{OUTSIDE
}(
\SEAL{w
}, i, j)$, $w$ is attached to under something
116 else ($
\SEAL{w
}$ is what we elsewhere call $
\SEAL{a
}$). Adjacency is
117 thus calculated on the basis of $h$, the head of the rule. If we are
118 attached to from the left we have $i
\leq loc_l(
\LOC{w
}) < j
\leq loc_l(
\LOC{h
}) < k$, while
119 from the right we have $k
\leq loc_l(
\LOC{h
}) < i
\leq loc_l(
\LOC{w
}) < j$:
121 P_
{OUTSIDE
}&(
\SEAL{\LOC{w
}}, i, j) = \\
122 & P_
{ROOT
}(w) P_
{OUTSIDE
}(ROOT, i, j) + \\
123 &
[ \sum_{k > j
} ~
\sum_{\LOC{h
}:j
\leq loc_l(
\LOC{h
})<k
} \sum_{\SMTR{\LOC{h
}} \in \
{\RGOL{\LOC{h
}},
\GOL{\LOC{h
}}\
}} P_
{STOP
}(
\neg stop|h, left, adj(j,
\LOC{h
})) P_
{ATTACH
}(w|h, left) \\
124 &
\qquad \qquad \qquad \qquad \qquad P_
{OUTSIDE
}(
\SMTR{\LOC{h
}}, i, k) P_
{INSIDE
}(
\SMTR{\LOC{h
}}, j, k)
] ~ + \\
125 &
[ \sum_{k < i
} ~
\sum_{\LOC{h
}:k
\leq loc_l(
\LOC{h
})<i
} \sum_{\SMTR{\LOC{h
}} \in \
{\LGOR{\LOC{h
}},
\GOR{\LOC{h
}}\
}} P_
{STOP
}(
\neg stop|h, right, adj(i,
\LOC{h
})) P_
{ATTACH
}(w|h, right) \\
126 &
\qquad \qquad \qquad \qquad \qquad P_
{INSIDE
}(
\SMTR{\LOC{h
}}, k, i) P_
{OUTSIDE
}(
\SMTR{\LOC{h
}}, k, j)
]
129 For $
\RGOL{w
}$ we know it is either under a left stop rule or it is
130 the right daughter of a left attachment rule ($k
\leq loc_l(
\LOC{a
}) <
131 i
\leq loc_l(
\LOC{w
}) < j$), and these are adjacent if the start point
132 ($i$) equals $loc_l(
\LOC{w
})$:
134 P_
{OUTSIDE
}(
\RGOL{\LOC{w
}}, i, j) = & P_
{STOP
}(stop|w, left, adj(i,
135 \LOC{w
}))P_
{OUTSIDE
}(
\SEAL{\LOC{w
}}, i, j) ~ + \\
136 &
[ \sum_{k < i
} ~
\sum_{\LOC{a
}:k
\leq loc_l(
\LOC{a
})<i
} P_
{STOP
}(
\neg stop|w, left, adj(i,
\LOC{w
})) P_
{ATTACH
}(a|w, left) \\
137 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_
{INSIDE
}(
\SEAL{\LOC{a
}}, k, i) P_
{OUTSIDE
}(
\RGOL{\LOC{w
}}, k, j)
]
140 For $
\GOR{w
}$ we are either under a right stop or the left daughter of
141 a right attachment rule ($i
\leq loc_l(
\LOC{w
}) < j
\leq
142 loc_l(
\LOC{a
}) < k$), adjacent iff the the end point ($j$) equals
145 P_
{OUTSIDE
}(
\GOR{\LOC{w
}}, i, j) = & P_
{STOP
}(stop|w, right, adj(j,
146 \LOC{w
}))P_
{OUTSIDE
}(
\RGOL{\LOC{w
}}, i, j) ~ + \\
147 &
[ \sum_{k > j
} ~
\sum_{\LOC{a
}:j
\leq loc_l(
\LOC{a
})<k
} P_
{STOP
}(
\neg stop|w, right, adj(j,
\LOC{w
})) P_
{ATTACH
}(a|w, right) \\
148 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_
{OUTSIDE
}(
\GOR{\LOC{w
}}, i, k) P_
{INSIDE
}(
\SEAL{\LOC{a
}}, j, k)
]
151 $
\GOL{w
}$ is just like $
\RGOL{w
}$, except for the outside probability
152 of having a stop above, where we use $
\LGOR{w
}$:
154 P_
{OUTSIDE
}(
\GOL{\LOC{w
}}, i, j) = & P_
{STOP
}(stop|w, left, adj(i,
155 \LOC{w
}))P_
{OUTSIDE
}(
\LGOR{\LOC{w
}}, i, j) ~ + \\
156 &
[ \sum_{k < i
} ~
\sum_{\LOC{a
}:k
\leq loc_l(
\LOC{a
})<i
} P_
{STOP
}(
\neg stop|w, left, adj(i,
\LOC{w
})) P_
{ATTACH
}(a|w, left) \\
157 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_
{INSIDE
}(
\SEAL{\LOC{a
}}, k, i) P_
{OUTSIDE
}(
\GOL{\LOC{w
}}, k, j)
]
160 $
\LGOR{w
}$ is just like $
\GOR{w
}$, except for the outside probability
161 of having a stop above, where we use $
\SEAL{w
}$:
163 P_
{OUTSIDE
}(
\LGOR{\LOC{w
}}, i, j) = & P_
{STOP
}(stop|w, right, adj(j,
164 \LOC{w
}))P_
{OUTSIDE
}(
\SEAL{\LOC{w
}}, i, j) ~ + \\
165 &
[ \sum_{k > j
} ~
\sum_{\LOC{a
}:j
\leq loc_l(
\LOC{a
})<k
} P_
{STOP
}(
\neg stop|w, right, adj(j,
\LOC{w
})) P_
{ATTACH
}(a|w, right) \\
166 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_
{OUTSIDE
}(
\LGOR{\LOC{w
}}, i, k) P_
{INSIDE
}(
\SEAL{\LOC{a
}}, j, k)
]
170 \section{Reestimating the rules
} % todo
172 \subsection{$c$ and $w$ (helper formulas used below)
}
173 % todo: left out rule-probability! wops (also, make P_{fooside}
176 $c_s(
\SMTR{\LOC{w
}} : i, j)$ is ``the expected fraction of parses of
177 $s$ with a node labeled $
\SMTR{w
}$ extending from position $i$ to
178 position $j$''
\citep[p.~
88]{klein-thesis
}, here defined to equal
179 $v_
{q
}$ of
\citet[p.~
41]{lari-csl90
}\footnote{In terms of regular EM,
180 this is the count of trees ($f_
{T_q
}(x)$ in
181 \citet[p.~
46]{prescher-em
}) in which the node extended from $i$ to
184 c_s(
\SMTR{\LOC{w
}} : i, j) = P_
{INSIDE_s
}(
\SMTR{\LOC{w
}}, i, j) P_
{OUTSIDE_s
}(
\SMTR{\LOC{w
}}, i, j) / P_s
187 $w_s$ is $w_
{q
}$ from
\citet[p.~
41]{lari-csl90
}, generalized to $
\SMTR{h
}$ and $dir$:
189 w_s(
\SEAL{a
} & :
\SMTR{\LOC{h
}}, left, i, j) = \\
190 &
1/P_s
\sum_{k:i<k<j
} ~
\sum_{\LOC{a
}:i
\leq loc_l(
\LOC{a
})<k
}
191 & P_
{STOP
}(
\neg stop|h, left, adj(k,
\LOC{h
})) P_
{CHOOSE
}(a|h, left) \\
192 & & P_
{INSIDE_s
}(
\SEAL{\LOC{a
}}, i, k) P_
{INSIDE_s
}(
\SMTR{\LOC{h
}}, k, j) P_
{OUTSIDE_s
}(
\SMTR{\LOC{h
}}, i, j)
195 w_s(
\SEAL{a
} & :
\SMTR{\LOC{h
}}, right, i, j) = \\
196 &
1/P_s
\sum_{k:i<k<j
} ~
\sum_{\LOC{a
}:k
\leq loc_l(
\LOC{a
})<j
}
197 & P_
{STOP
}(
\neg stop|h, right, adj(k,
\LOC{h
})) P_
{CHOOSE
}(a|h, right) \\
198 & & P_
{INSIDE_s
}(
\SMTR{\LOC{h
}}, i, k) P_
{INSIDE_s
}(
\SEAL{\LOC{a
}}, k, j) P_
{OUTSIDE_s
}(
\SMTR{\LOC{h
}}, i, j)
201 Let $
\hat{P
}$ be the new STOP/CHOOSE-probabilities (since the old $P$
202 are used in $P_
{INSIDE
}$ and $P_
{OUTSIDE
}$).
204 \subsection{Attachment reestimation
}
206 $
\hat{a
}$ is given in
\citet[p.~
41]{lari-csl90
}. Here $i<loc_l(
\LOC{h
})$
207 since we want trees with at least one attachment:
209 \hat{a
} (a |
\SMTR{h
}, left) =
\frac
210 { \sum_{s
\in S
} \sum_{\SMTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i<loc_l(
\LOC{h
})
} \sum_{j
\geq loc_r(
\LOC{h
})
} w_s(
\SEAL{a
} :
\SMTR{\LOC{h
}}, left, i, j)
}
211 { \sum_{s
\in S
} \sum_{\SMTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i<loc_l(
\LOC{h
})
} \sum_{j
\geq loc_r(
\LOC{h
})
} c_s(
\SMTR{\LOC{h
}} : i, j)
}
214 Here $j>loc_r(
\SMTR{\LOC{h
}})$ since we want at least one attachment:
216 \hat{a
} (a |
\SMTR{h
}, right) =
\frac
217 { \sum_{s
\in S
} \sum_{\SMTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i
\leq loc_l(
\LOC{h
})
} \sum_{j>loc_r(
\LOC{h
})
} w_s(
\SEAL{a
} :
\SMTR{\LOC{h
}}, right, i, j)
}
218 { \sum_{s
\in S
} \sum_{\SMTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i
\leq loc_l(
\LOC{h
})
} \sum_{j>loc_r(
\LOC{h
})
} c_s(
\SMTR{\LOC{h
}} : i, j)
}
221 For the first/lowest attachments, $w_s$ and $c_s$ have zero probability
222 where $i<loc_l(
\LOC{h
})$ (for $
\GOR{h
}$) or $j>loc_r(
\LOC{h
})$ (for $
\GOL{h
}$),
223 this is implicit in $P_
{INSIDE
}$.
228 \hat{P
}_
{CHOOSE
} (a | h, left) =
229 \hat{a
} (a |
\GOL{h
}, left)
230 +
\hat{a
} (a |
\RGOL{h
}, left)
233 \hat{P
}_
{CHOOSE
} (a | h, right) =
234 \hat{a
} (a |
\GOR{h
},right)
235 +
\hat{a
} (a |
\LGOR{h
},right)
238 \subsection{Stop reestimation
}
239 The following is based on
\citet[p.~
88]{klein-thesis
}. For the
240 non-adjacent rules, $i<loc_l(
\LOC{h
})$ on the left and $j>loc_r(
\LOC{h
})$ on the
241 right, while for the adjacent rules these are equal (respectively).
243 To avoid some redundancy below, define a helper function $
\hat{d
}$ as follows:
245 \hat{d
}(
\SMTR{h
},
\SDTR{h
},
\XI,
\XJ) =
\frac
246 { \sum_{s
\in S
} \sum_{\SMTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i:i
\XI loc_l(
\LOC{h
})
} \sum_{j:j
\XJ loc_r(
\LOC{h
})
} c_s(
\SMTR{\LOC{h
}} : i, j)
}
247 { \sum_{s
\in S
} \sum_{\SDTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i:i
\XI loc_l(
\LOC{h
})
} \sum_{j:j
\XJ loc_r(
\LOC{h
})
} c_s(
\SDTR{\LOC{h
}} : i, j)
}
250 Then these are our reestimated stop probabilities:
252 \hat{P
}_
{STOP
} (STOP|h, left, non
\text{-
}adj) =
253 \hat{d
}(
\SEAL{h
},
\RGOL{h
},<,
\geq) +
254 \hat{d
}(
\LGOR{h
},
\GOL{h
},<,=)
258 \hat{P
}_
{STOP
} (STOP|h, left, adj) =
259 \hat{d
}(
\SEAL{h
},
\RGOL{h
},=,
\geq) +
260 \hat{d
}(
\LGOR{h
},
\GOL{h
},=,=)
264 \hat{P
}_
{STOP
} (STOP|h, right, non
\text{-
}adj) =
265 \hat{d
}(
\RGOL{h
},
\GOR{h
},=,>) +
266 \hat{d
}(
\SEAL{h
},
\LGOR{h
},
\leq,>)
270 \hat{P
}_
{STOP
} (STOP|h, right, adj) =
271 \hat{d
}(
\RGOL{h
},
\GOR{h
},=,=) +
272 \hat{d
}(
\SEAL{h
},
\LGOR{h
},
\leq,=)
276 \subsection{Root reestimation
}
277 Following
\citet[p.~
46]{prescher-em
}, to find the reestimated
278 probability of a PCFG rule, we first find the new treebank frequencies
279 $f_
{T_P
}(tree)=P(tree)/P_s$, then for a rule $X'
\rightarrow X$ we
280 divide the new frequencies of the trees which use this rule and by
281 those of the trees containing the node $X'$. $ROOT$ appears once per
282 tree, meaning we divide by $
1$ per sentence
\footnote{Assuming each
283 tree has frequency $
1$.
}, so $
\hat{P
}_
{ROOT
}(h)=
\sum_{tree:ROOT
284 \rightarrow \SEAL{h
} \text{ used in
} tree
} f_
{T_P
}(tree)=
\sum_{tree:ROOT
285 \rightarrow \SEAL{h
} \text{ used in
} tree
} P(tree)/P_s$, which turns into:
288 \hat{P
}_
{ROOT
} (h) =
\frac
289 {\sum_{s
\in S
} 1 / P_s
\cdot \sum_{\LOC{h
}\in s
} P_
{ROOT
}(
\LOC{h
}) P_
{INSIDE_s
}(
\SEAL{h
},
0, len(s))
}
293 % todo: the following just didn't turn out right, but it ought to be
294 % possible to use this to check the CNF-like unary attachments...
296 % The root attachment is just a unary PCFG rule $ROOT \rightarrow
297 % \SEAL{h}$. Modifiying binary rule reestimation from
298 % \citet[p.~41]{lari-csl90} to unary rules gives:
301 % \hat{P}_{RULE}(ROOT \rightarrow \SEAL{h}) = \frac
302 % { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} 1/P_s \cdot P_{RULE}(ROOT \rightarrow \SEAL{h})P_{INSIDE_s}(\SEAL{h}, i, j)P_{OUTSIDE_s}(ROOT, i, j) }
303 % { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} c_s(\SEAL{h}, i, j) }
306 % Since $P_{OUTSIDE}(ROOT, i, j)$ is $1$ if $i=0$ and $j=len(s)$, and $0$ otherwise, this reduces into:
309 % \hat{P}_{RULE}(ROOT \rightarrow \SEAL{h}) = \frac
310 % { \sum_{s\in S} 1/P_s \cdot P_{RULE}(ROOT \rightarrow \SEAL{h})P_{INSIDE_s}(\SEAL{h}, 0, len(s)) }
311 % { \sum_{s\in S} 1/P_s \cdot P_{INSIDE_s}(\SEAL{h}, 0, len(s)) }
318 \section{Alternate CNF-like rules
}
319 Since the IO algorithm as described in
\citet{lari-csl90
} is made for
320 rules in CNF, we have an alternate grammar (figure
\ref{cnf-like
}) for
321 running parallell tests where we don't have to sum over the different
322 $loc(h)$ in IO. This is not yet generalized to include left-first
323 attachment. It is also not quite CNF, since it includes some unary
328 \begin{tabular
} % four left-aligned math tabs, one vertical line
329 { >
{$
}l<
{$
} >
{$
}l<
{$
} >
{$
}l<
{$
} | >
{$
}l<
{$
} }
330 \multicolumn{3}{c
}{Rule
} &
\multicolumn{1}{c
}{$P_
{RULE
}$ ($a
[i,j,k
]$ in
\citet{lari-csl90
})
}\\
333 \RN{\GOR{h
}} \rightarrow&
\GOR{h
} &
\SEAL{a
} &P_
{STOP
}(
\neg stop|h, right, adj)
\cdot P_
{ATTACH
}(a|h, right) \\
335 \RN{\GOR{h
}} \rightarrow&
\RN{\GOR{h
}} &
\SEAL{a
} &P_
{STOP
}(
\neg stop|h, right, non
\text{-
}adj)
\cdot P_
{ATTACH
}(a|h, right) \\
337 \RGOL{h
} \rightarrow&
\GOR{h
} &STOP &P_
{STOP
}(stop|h, right, adj) \\
339 \RGOL{h
} \rightarrow&
\RN{\GOR{h
}} &STOP &P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
341 \LN{\RGOL{h
}} \rightarrow&
\SEAL{a
} &
\RGOL{h
} &P_
{STOP
}(
\neg stop|h, left, adj)
\cdot P_
{ATTACH
}(a|h, left) \\
343 \LN{\RGOL{h
}} \rightarrow&
\SEAL{a
} &
\LN{\RGOL{h
}} &P_
{STOP
}(
\neg stop|h, left, non
\text{-
}adj)
\cdot P_
{ATTACH
}(a|h, left) \\
345 \SEAL{h
} \rightarrow& STOP &
\RGOL{h
} &P_
{STOP
}(stop|h, left, adj) \\
347 \SEAL{h
} \rightarrow& STOP &
\LN{\RGOL{h
}} &P_
{STOP
}(stop|h, left, non
\text{-
}adj) \\
349 \caption{Alternate CFG rules (where a child node has an arrow below,
350 we use non-adjacent probabilities), defined for all words/POS-tags
351 $h$.
}\label{cnf-like
}
354 The inside probabilities are the same as those given in
355 \citet{lari-csl90
}, with the following exceptions:
357 When calculating $P_
{INSIDE
}(
\SMTR{h
}, i, j)$ and summing through possible
358 rules which rewrite $
\SMTR{h
}$, if a rule is of the form $
\SMTR{h
} \rightarrow
359 STOP ~
\SDTR{h
}$ or $
\SMTR{h
} \rightarrow \SDTR{h
} ~ STOP$, we add $P_
{RULE
}\cdot
360 P_
{INSIDE
}(
\SDTR{h
}, i, j)$ (that is, rewrite for the same sentence range);
361 and, as a consequence of these unary rules: for ``terminal rules''
362 ($P_
{ORDER
}$) to be applicable, not only must $i = j-
1$, but also the
363 left-hand side symbol of the rule must be of the form $
\GOR{h
}$.
365 Similarly, the outside probabilities are the same as those for pure
366 CNF rules, with the exception that we add the unary rewrite
369 \sum_{\SMTR{h
}} [&P_
{OUTSIDE
}(
\SMTR{h
},i,j)
\cdot P_
{RULE
}(
\SMTR{h
} \rightarrow \SDTR{h
} ~ STOP) \\
370 + &P_
{OUTSIDE
}(
\SMTR{h
},i,j)
\cdot P_
{RULE
}(
\SMTR{h
} \rightarrow STOP ~
\SDTR{h
})
]
372 to $P_
{OUTSIDE
}(
\SDTR{h
},i,j)$ (eg. $f(s,t,i)$).
374 The reestimation just needs to be expanded to allow unary stop rules,
375 this is similar to the formula for binary rules in
376 \citet[p.~
41]{lari-csl90
}:
378 \hat{P
}_
{RULE
}(
\SMTR{h
} \rightarrow \SDTR{h
}) =
\frac
379 { \sum_{s
\in S
} \sum_{i<len(s)
} \sum_{j>i
} 1/P_s
\cdot P_
{RULE
}(
\SMTR{h
}\rightarrow \SDTR{h
})P_
{INSIDE
}(
\SDTR{h
}, i, j)P_
{OUTSIDE
}(
\SMTR{h
}, i, j)
}
380 { \sum_{s
\in S
} \sum_{i<len(s)
} \sum_{j>i
} c_s(
\SMTR{h
}, i, j)
}
382 (the denominator is unchanged, but the numerator sums $w_s$ defined
383 for unary rules; $
\SMTR{h
}\rightarrow \SDTR{h
}$ is meant to include both left and
386 % todo: add ROOT rule to CNF grammar?
389 \section{Pure CNF rules
}
390 Alternatively, one may use the ``pure CNF'' grammar of figure
391 \ref{cnf-T
} and figure
\ref{cnf-NT
}, below (not yet
392 implemented). Using the harmonic distribution to initialize this will
393 still yield a mass-deficient grammar, but at least this allows the
394 regular IO algorithm to be run.
398 \begin{tabular
} % four left-aligned math tabs, one vertical line
399 { >
{$
}l<
{$
} >
{$
}l<
{$
} >
{$
}l<
{$
} | >
{$
}l<
{$
} }
400 \multicolumn{3}{c
}{Rule
} &
\multicolumn{1}{c
}{$P_
{RULE
}$ ($b
[i,m
]$ in
\citet{lari-csl90
})
}\\
403 h
\rightarrow&
\text{``h''
}& &
1 \\
405 \SEAL{h
} \rightarrow&
\text{``h''
}& &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, adj) \\
407 ROOT
\rightarrow&
\text{``h''
}& &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, adj)
\cdot P_
{ROOT
}(h) \\
409 \caption{Terminals of pure CNF rules, defined for all POS-tags
415 \begin{tabular
} % four left-aligned math tabs, one vertical line
416 { >
{$
}l<
{$
} >
{$
}l<
{$
} >
{$
}l<
{$
} | >
{$
}l<
{$
} }
417 \multicolumn{3}{c
}{Rule
} &
\multicolumn{1}{c
}{$P_
{RULE
}$ ($a
[i,j,k
]$ in
\citet{lari-csl90
})
}\\
420 \SEAL{h
} \rightarrow& h &
\SEAL{a
} &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
421 &&&
\cdot P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, adj) \\
[3.5pt
]
422 \SEAL{h
} \rightarrow& h &
\GOR{.h
} &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
[3.5pt
]
423 \GOR{.h
} \rightarrow&
\SEAL{a
} &
\SEAL{b
} &P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, adj) \\
424 &&&
\cdot P_
{ATTACH
}(b|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, non
\text{-
}adj) \\
[3.5pt
]
425 \GOR{.h
} \rightarrow&
\SEAL{a
} &
\GOR{h
} &P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, adj) \\
[3.5pt
]
426 \GOR{h
} \rightarrow&
\SEAL{a
} &
\SEAL{b
} &P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, non
\text{-
}adj) \\
427 &&&
\cdot P_
{ATTACH
}(b|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, non
\text{-
}adj) \\
[3.5pt
]
428 \GOR{h
} \rightarrow&
\SEAL{a
} &
\GOR{h
} &P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, non
\text{-
}adj) \\
[3.5pt
]
429 \SEAL{h
} \rightarrow&
\SEAL{a
} &h &P_
{STOP
}(stop|h, left, non
\text{-
}adj)
\cdot P_
{STOP
}(stop|h, right, adj) \\
430 &&&
\cdot P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj) \\
[3.5pt
]
431 \SEAL{h
} \rightarrow&
\GOL{h.
} &h &P_
{STOP
}(stop|h, left, non
\text{-
}adj)
\cdot P_
{STOP
}(stop|h, right, adj) \\
[3.5pt
]
432 \GOL{h.
} \rightarrow&
\SEAL{b
} &
\SEAL{a
} &P_
{ATTACH
}(b|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, non
\text{-
}adj) \\
433 &&&
\cdot P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj) \\
[3.5pt
]
434 \GOL{h.
} \rightarrow&
\GOL{h
} &
\SEAL{a
} &P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj) \\
[3.5pt
]
435 \GOL{h
} \rightarrow&
\SEAL{a
} &
\SEAL{b
} &P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, non
\text{-
}adj) \\
436 &&&
\cdot P_
{ATTACH
}(b|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, non
\text{-
}adj) \\
[3.5pt
]
437 \GOL{h
} \rightarrow&
\GOL{h
} &
\SEAL{a
} &P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, non
\text{-
}adj) \\
[3.5pt
]
438 \SEAL{h
} \rightarrow&
\GOL{h.
} &
\SEAL{\GOR{h
}} &P_
{STOP
}(stop|h, left, non
\text{-
}adj) \\
[3.5pt
]
439 \SEAL{h
} \rightarrow&
\SEAL{a
} &
\SEAL{\GOR{h
}} &P_
{STOP
}(stop|h, left, non
\text{-
}adj) \\
440 &&&
\cdot P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj) \\
[3.5pt
]
441 \SEAL{\GOR{h
}} \rightarrow& h &
\GOR{.h
} &P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
[3.5pt
]
442 \SEAL{\GOR{h
}} \rightarrow& h &
\SEAL{a
} &P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
443 &&&
\cdot P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, adj) \\
[3.5pt
]
444 ROOT
\rightarrow& h &
\SEAL{a
} &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
445 &&&
\cdot P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
446 ROOT
\rightarrow& h &
\GOR{.h
} &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, non
\text{-
}adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
447 ROOT
\rightarrow&
\SEAL{a
} &h &P_
{STOP
}(stop|h, left, non
\text{-
}adj)
\cdot P_
{STOP
}(stop|h, right, adj) \\
448 &&&
\cdot P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
449 ROOT
\rightarrow&
\GOL{h.
} &h &P_
{STOP
}(stop|h, left, non
\text{-
}adj)
\cdot P_
{STOP
}(stop|h, right, adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
450 ROOT
\rightarrow&
\GOL{h.
} &
\SEAL{\GOR{h
}} &P_
{STOP
}(stop|h, left, non
\text{-
}adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
451 ROOT
\rightarrow&
\SEAL{a
} &
\SEAL{\GOR{h
}} &P_
{STOP
}(stop|h, left, non
\text{-
}adj) \\
452 &&&
\cdot P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
454 \caption{Non-terminals of pure CNF rules, defined for all POS-tags
455 $h$, $a$ and $b$.
}\label{cnf-NT
}
461 \bibliography{./statistical.bib
}
462 \bibliographystyle{plainnat
}