1 % Created 2008-06-13 Fri 17:05
2 \documentclass[11pt,
a4paper]{article
}
3 \usepackage[utf8
]{inputenc}
4 \usepackage[T1]{fontenc}
18 \avmoptions{sorted,active
}
20 \avmsortfont{\scriptsize\it}
22 \usepackage{array
} % for a tabular with math in it
25 \author{Kevin Brubeck Unhammer
}
32 This is an attempt at fleshing out the details of the inside-outside
33 algorithm
\citep{lari-csl90
} applied to the DMV model of
38 \newcommand{\LOC}[1]{\textbf{#1}}
39 \newcommand{\GOR}[1]{\overrightarrow{#1}}
40 \newcommand{\RGOL}[1]{\overleftarrow{\overrightarrow{#1}}}
41 \newcommand{\SEAL}[1]{\overline{#1}}
42 \newcommand{\LGOR}[1]{\overrightarrow{\overleftarrow{#1}}}
43 \newcommand{\GOL}[1]{\overleftarrow{#1}}
44 \newcommand{\LN}[1]{\underleftarrow{#1}}
45 \newcommand{\RN}[1]{\underrightarrow{#1}}
46 \newcommand{\XI}{\lessdot}
47 \newcommand{\XJ}{\gtrdot}
48 \newcommand{\SMTR}[1]{\dddot{#1}}
49 \newcommand{\SDTR}[1]{\ddot{#1}}
51 \section{Note on notation
}
52 $i, j, k$ are sentence positions (between words), where $i$ and $j$
53 are always the start and end, respectively, for what we're calculating
54 ($k$ is between $i$ and $j$ for $P_
{INSIDE
}$, to their right or left
55 for $P_
{OUTSIDE
}$). $s
\in S$ are sentences in the corpus. $
\LOC{w
}$
56 is a word token (actually POS-token) of type $w$ at a certain sentence
57 location. If $
\LOC{w
}$ is between $i$ and $i+
1$, $loc(
\LOC{w
})=i$
58 following
\citet{klein-thesis
}, meaning $i$ is adjacent to $
\LOC{w
}$
59 on the left, while $j=loc(
\LOC{w
})+
1$ means that $j$ is adjacent to
60 $
\LOC{w
}$ on the right. To simplify, $loc_l(
\LOC{w
}):=loc(
\LOC{w
})$ and
61 $loc_r(
\LOC{w
}):=loc(
\LOC{w
})+
1$. We write $
\LOC{h
}$ if this is a head
62 in the rule being used, $
\LOC{a
}$ if it is an attached argument.
64 Notational differences between the thesis
\citep{klein-thesis
} and the
70 $w
\urcorner$ & $
\RGOL{w
}$ \\
71 $
\ulcorner{w
}\urcorner$ & $
\SEAL{w
}$ \\
74 I use $
\SMTR{w
}$ (or $
\SDTR{w
}$) to signify one of either $w,
\GOR{w
},
75 \RGOL{w
},
\LGOR{w
},
\GOL{w
}$ or $
\SEAL{w
}$
\footnote{This means that
76 $
\SMTR{\LOC{w
}}$ is the triplet of the actual POS-tag, its sentence
77 location as a token, and the ``level of seals''.
}.
80 \section{Inside probabilities
}
81 $P_
{INSIDE
}$ is defined in
\citet[pp.~
106-
108]{klein-thesis
}, the only
82 thing we need to add is that for right attachments,
83 $i
\leq loc_l(w)<k
\leq loc_l(
\LOC{a
})<j$ while for left attachments,
84 $i
\leq loc_l(
\LOC{a
})<k
\leq loc_l(w)<j$.
87 \
[ \forall{w
}[P_
{ORDER
}(right
\text{-
}first|w)=
1.0] \
] since the DMV implementation
88 is not yet generalized to both directions.)
94 \subsection{Sentence probability
}
96 $P_s$ is the sentence probability, based on
97 \citet[p.~
38]{lari-csl90
}. Since the ROOT rules are different from the
98 rest, we sum them explicitly in this definition:
100 P_s =
\sum_{\LOC{w
} \in s
} P_
{ROOT
}(
\LOC{w
}) P_
{INSIDE
}(
\SEAL{\LOC{w
}},
0, len(s))
103 \section{Outside probabilities
}
106 P_
{OUTSIDE_s
}(ROOT, i, j) =
\begin{cases
}
107 1.0 &
\text{ if $i =
0$ and $j = len(s)$,
}\\
108 0.0 &
\text{ otherwise
}
112 For $P_
{OUTSIDE
}(
\SEAL{w
}, i, j)$, $w$ is attached to under something
113 else ($
\SEAL{w
}$ is what we elsewhere call $
\SEAL{a
}$). Adjacency is
114 thus calculated on the basis of $h$, the head of the rule. If we are
115 attached to from the left we have $i
\leq loc_l(
\LOC{w
}) < j
\leq loc_l(
\LOC{h
}) < k$, while
116 from the right we have $k
\leq loc_l(
\LOC{h
}) < i
\leq loc_l(
\LOC{w
}) < j$:
118 P_
{OUTSIDE
}&(
\SEAL{\LOC{w
}}, i, j) = \\
119 & P_
{ROOT
}(w) P_
{OUTSIDE
}(ROOT, i, j) + \\
120 &
[ \sum_{k > j
} ~
\sum_{\LOC{h
}:j
\leq loc_l(
\LOC{h
})<k
} \sum_{\SMTR{\LOC{h
}} \in \
{\RGOL{\LOC{h
}},
\GOL{\LOC{h
}}\
}} P_
{STOP
}(
\neg stop|h, left, adj(j,
\LOC{h
})) P_
{ATTACH
}(w|h, left) \\
121 &
\qquad \qquad \qquad \qquad \qquad P_
{OUTSIDE
}(
\SMTR{\LOC{h
}}, i, k) P_
{INSIDE
}(
\SMTR{\LOC{h
}}, j, k)
] ~ + \\
122 &
[ \sum_{k < i
} ~
\sum_{\LOC{h
}:k
\leq loc_l(
\LOC{h
})<i
} \sum_{\SMTR{\LOC{h
}} \in \
{\LGOR{\LOC{h
}},
\GOR{\LOC{h
}}\
}} P_
{STOP
}(
\neg stop|h, right, adj(i,
\LOC{h
})) P_
{ATTACH
}(w|h, right) \\
123 &
\qquad \qquad \qquad \qquad \qquad P_
{INSIDE
}(
\SMTR{\LOC{h
}}, k, i) P_
{OUTSIDE
}(
\SMTR{\LOC{h
}}, k, j)
]
126 For $
\RGOL{w
}$ we know it is either under a left stop rule or it is
127 the right daughter of a left attachment rule ($k
\leq loc_l(
\LOC{a
}) <
128 i
\leq loc_l(
\LOC{w
}) < j$), and these are adjacent if the start point
129 ($i$) equals $loc_l(
\LOC{w
})$:
131 P_
{OUTSIDE
}(
\RGOL{\LOC{w
}}, i, j) = & P_
{STOP
}(stop|w, left, adj(i,
132 \LOC{w
}))P_
{OUTSIDE
}(
\SEAL{\LOC{w
}}, i, j) ~ + \\
133 &
[ \sum_{k < i
} ~
\sum_{\LOC{a
}:k
\leq loc_l(
\LOC{a
})<i
} P_
{STOP
}(
\neg stop|w, left, adj(i,
\LOC{w
})) P_
{ATTACH
}(a|w, left) \\
134 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_
{INSIDE
}(
\SEAL{\LOC{a
}}, k, i) P_
{OUTSIDE
}(
\RGOL{\LOC{w
}}, k, j)
]
137 For $
\GOR{w
}$ we are either under a right stop or the left daughter of
138 a right attachment rule ($i
\leq loc_l(
\LOC{w
}) < j
\leq
139 loc_l(
\LOC{a
}) < k$), adjacent iff the the end point ($j$) equals
142 P_
{OUTSIDE
}(
\GOR{\LOC{w
}}, i, j) = & P_
{STOP
}(stop|w, right, adj(j,
143 \LOC{w
}))P_
{OUTSIDE
}(
\RGOL{\LOC{w
}}, i, j) ~ + \\
144 &
[ \sum_{k > j
} ~
\sum_{\LOC{a
}:j
\leq loc_l(
\LOC{a
})<k
} P_
{STOP
}(
\neg stop|w, right, adj(j,
\LOC{w
})) P_
{ATTACH
}(a|w, right) \\
145 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_
{OUTSIDE
}(
\GOR{\LOC{w
}}, i, k) P_
{INSIDE
}(
\SEAL{\LOC{a
}}, j, k)
]
148 $
\GOL{w
}$ is just like $
\RGOL{w
}$, except for the outside probability
149 of having a stop above, where we use $
\LGOR{w
}$:
151 P_
{OUTSIDE
}(
\GOL{\LOC{w
}}, i, j) = & P_
{STOP
}(stop|w, left, adj(i,
152 \LOC{w
}))P_
{OUTSIDE
}(
\LGOR{\LOC{w
}}, i, j) ~ + \\
153 &
[ \sum_{k < i
} ~
\sum_{\LOC{a
}:k
\leq loc_l(
\LOC{a
})<i
} P_
{STOP
}(
\neg stop|w, left, adj(i,
\LOC{w
})) P_
{ATTACH
}(a|w, left) \\
154 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_
{INSIDE
}(
\SEAL{\LOC{a
}}, k, i) P_
{OUTSIDE
}(
\GOL{\LOC{w
}}, k, j)
]
157 $
\LGOR{w
}$ is just like $
\GOR{w
}$, except for the outside probability
158 of having a stop above, where we use $
\SEAL{w
}$:
160 P_
{OUTSIDE
}(
\LGOR{\LOC{w
}}, i, j) = & P_
{STOP
}(stop|w, right, adj(j,
161 \LOC{w
}))P_
{OUTSIDE
}(
\SEAL{\LOC{w
}}, i, j) ~ + \\
162 &
[ \sum_{k > j
} ~
\sum_{\LOC{a
}:j
\leq loc_l(
\LOC{a
})<k
} P_
{STOP
}(
\neg stop|w, right, adj(j,
\LOC{w
})) P_
{ATTACH
}(a|w, right) \\
163 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_
{OUTSIDE
}(
\LGOR{\LOC{w
}}, i, k) P_
{INSIDE
}(
\SEAL{\LOC{a
}}, j, k)
]
167 \section{Reestimating the rules
} % todo
169 \subsection{$c$ and $w$ (helper formulas used below)
}
170 % todo: left out rule-probability! wops (also, make P_{fooside}
173 $c_s(
\SMTR{\LOC{w
}} : i, j)$ is ``the expected fraction of parses of $s$ with a
174 node labeled $
\SMTR{w
}$ extending from position $i$ to position $j$''
175 \citep[p.~
88]{klein-thesis
}, here defined to equal $v_
{q
}$ of
176 \citet[p.~
41]{lari-csl90
}:
178 c_s(
\SMTR{\LOC{w
}} : i, j) = P_
{INSIDE_s
}(
\SMTR{\LOC{w
}}, i, j) P_
{OUTSIDE_s
}(
\SMTR{\LOC{w
}}, i, j) / P_s
181 $w_s$ is $w_
{q
}$ from
\citet[p.~
41]{lari-csl90
}, generalized to $
\SMTR{h
}$ and $dir$:
183 w_s(
\SEAL{a
} & :
\SMTR{\LOC{h
}}, left, i, j) = \\
184 &
1/P_s
\sum_{k:i<k<j
} ~
\sum_{\LOC{a
}:i
\leq loc_l(
\LOC{a
})<k
}
185 & P_
{STOP
}(
\neg stop|h, left, adj(k,
\SMTR{\LOC{h
}})) P_
{CHOOSE
}(a|h, left) \\
186 & & P_
{INSIDE_s
}(
\SEAL{\LOC{a
}}, i, k) P_
{INSIDE_s
}(
\SMTR{\LOC{h
}}, k, j) P_
{OUTSIDE_s
}(
\SMTR{\LOC{h
}}, i, j)
189 w_s(
\SEAL{a
} & :
\SMTR{\LOC{h
}}, right, i, j) = \\
190 &
1/P_s
\sum_{k:i<k<j
} ~
\sum_{\LOC{a
}:k
\leq loc_l(
\LOC{a
})<j
}
191 & P_
{STOP
}(
\neg stop|h, right, adj(k,
\SMTR{\LOC{h
}})) P_
{CHOOSE
}(a|h, right) \\
192 & & P_
{INSIDE_s
}(
\SMTR{\LOC{h
}}, i, k) P_
{INSIDE_s
}(
\SEAL{\LOC{a
}}, k, j) P_
{OUTSIDE_s
}(
\SMTR{\LOC{h
}}, i, j)
195 Let $
\hat{P
}$ be the new STOP/CHOOSE-probabilities (since the old $P$
196 are used in $P_
{INSIDE
}$ and $P_
{OUTSIDE
}$).
198 \subsection{Attachment reestimation
}
200 $
\hat{a
}$ is given in
\citet[p.~
41]{lari-csl90
}. Here $i<loc_l(
\LOC{h
})$
201 since we want trees with at least one attachment:
203 \hat{a
} (a |
\SMTR{h
}, left) =
\frac
204 { \sum_{s
\in S
} \sum_{\SMTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i<loc_l(
\LOC{h
})
} \sum_{j
\geq loc_r(
\LOC{h
})
} w_s(
\SEAL{a
} :
\SMTR{\LOC{h
}}, left, i, j)
}
205 { \sum_{s
\in S
} \sum_{\SMTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i<loc_l(
\LOC{h
})
} \sum_{j
\geq loc_r(
\LOC{h
})
} c_s(
\SMTR{\LOC{h
}} : i, j)
}
208 Here $j>loc_r(
\SMTR{\LOC{h
}})$ since we want at least one attachment:
210 \hat{a
} (a |
\SMTR{h
}, right) =
\frac
211 { \sum_{s
\in S
} \sum_{\SMTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i
\leq loc_l(
\LOC{h
})
} \sum_{j>loc_r(
\LOC{h
})
} w_s(
\SEAL{a
} :
\SMTR{\LOC{h
}}, right, i, j)
}
212 { \sum_{s
\in S
} \sum_{\SMTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i
\leq loc_l(
\LOC{h
})
} \sum_{j>loc_r(
\LOC{h
})
} c_s(
\SMTR{\LOC{h
}} : i, j)
}
215 For the first/lowest attachments, $w_s$ and $c_s$ have zero probability
216 where $i<loc_l(
\LOC{h
})$ (for $
\GOR{h
}$) or $j>loc_r(
\LOC{h
})$ (for $
\GOL{h
}$),
217 this is implicit in $P_
{INSIDE
}$.
222 \hat{P
}_
{CHOOSE
} (a | h, left) =
\frac
223 { \hat{a
} (a |
\GOL{h
}, left)
}
224 { \sum_{w
} \hat{a
} (w |
\GOL{h
}, left)
}
227 { \hat{a
} (a |
\RGOL{h
}, left)
}
228 { \sum_{w
} \hat{a
} (w |
\RGOL{h
}, left)
}
231 \hat{P
}_
{CHOOSE
} (a | h, right) =
\frac
232 { \hat{a
} (a |
\GOR{h
},right)
}
233 { \sum_{w
} \hat{a
} (w |
\GOR{h
},right)
}
236 { \hat{a
} (a |
\LGOR{h
},right)
}
237 { \sum_{w
} \hat{a
} (w |
\LGOR{h
},right)
}
240 \subsection{Stop reestimation
}
241 The following is based on
\citet[p.~
88]{klein-thesis
}. For the
242 non-adjacent rules, $i<loc_l(
\LOC{h
})$ on the left and $j>loc_r(
\LOC{h
})$ on the
243 right, while for the adjacent rules these are equal (respectively).
245 To avoid some redundancy below, define a helper function $
\hat{d
}$ as follows:
247 \hat{d
}(
\SMTR{h
},
\SDTR{h
},
\XI,
\XJ) =
\frac
248 { \sum_{s
\in S
} \sum_{\SMTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i:i
\XI loc_l(
\LOC{h
})
} \sum_{j:j
\XJ loc_r(
\LOC{h
})
} c_s(
\SMTR{\LOC{h
}} : i, j)
}
249 { \sum_{s
\in S
} \sum_{\SDTR{\LOC{h
}}:
\LOC{h
} \in s
} \sum_{i:i
\XI loc_l(
\LOC{h
})
} \sum_{j:j
\XJ loc_r(
\LOC{h
})
} c_s(
\SDTR{\LOC{h
}} : i, j)
}
252 Then these are our reestimated stop probabilities:
254 \hat{P
}_
{STOP
} (STOP|h, left, non
\text{-
}adj) =
255 \hat{d
}(
\SEAL{h
},
\RGOL{h
},<,
\geq) +
256 \hat{d
}(
\LGOR{h
},
\GOL{h
},<,=)
260 \hat{P
}_
{STOP
} (STOP|h, left, adj) =
261 \hat{d
}(
\SEAL{h
},
\RGOL{h
},=,
\geq) +
262 \hat{d
}(
\LGOR{h
},
\GOL{h
},=,=)
266 \hat{P
}_
{STOP
} (STOP|h, right, non
\text{-
}adj) =
267 \hat{d
}(
\RGOL{h
},
\GOR{h
},=,>) +
268 \hat{d
}(
\SEAL{h
},
\LGOR{h
},
\leq,>)
272 \hat{P
}_
{STOP
} (STOP|h, right, adj) =
273 \hat{d
}(
\RGOL{h
},
\GOR{h
},=,=) +
274 \hat{d
}(
\SEAL{h
},
\LGOR{h
},
\leq,=)
278 \subsection{Root reestimation
} % todo grok: no \sum_{i} \sum_{j} ?
280 \hat{P
}_
{ROOT
} (h) =
\frac
281 {\sum_{s
\in S
} P_
{ROOT
}(h) P_
{INSIDE_s
}(
\SEAL{h
},
0, len(s))
}
288 \section{Alternate CNF-like rules
}
289 Since the IO algorithm as described in
\citet{lari-csl90
} is made for
290 rules in CNF, we have an alternate grammar (figure
\ref{cnf-like
}) for
291 running parallell tests where we don't have to sum over the different
292 $loc(h)$ in IO. This is not yet generalized to include left-first
293 attachment. It is also not quite CNF, since it includes some unary
298 \begin{tabular
} % four left-aligned math tabs, one vertical line
299 { >
{$
}l<
{$
} >
{$
}l<
{$
} >
{$
}l<
{$
} | >
{$
}l<
{$
} }
300 \multicolumn{3}{c
}{Rule
} &
\multicolumn{1}{c
}{$P_
{RULE
}$ ($a
[i,j,k
]$ in
\citet{lari-csl90
})
}\\
303 \RN{\GOR{h
}} \rightarrow&
\GOR{h
} &
\SEAL{a
} &P_
{STOP
}(
\neg stop|h, right, adj)
\cdot P_
{ATTACH
}(a|h, right) \\
305 \RN{\GOR{h
}} \rightarrow&
\RN{\GOR{h
}} &
\SEAL{a
} &P_
{STOP
}(
\neg stop|h, right, non
\text{-
}adj)
\cdot P_
{ATTACH
}(a|h, right) \\
307 \RGOL{h
} \rightarrow&
\GOR{h
} &STOP &P_
{STOP
}(stop|h, right, adj) \\
309 \RGOL{h
} \rightarrow&
\RN{\GOR{h
}} &STOP &P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
311 \LN{\RGOL{h
}} \rightarrow&
\SEAL{a
} &
\RGOL{h
} &P_
{STOP
}(
\neg stop|h, left, adj)
\cdot P_
{ATTACH
}(a|h, left) \\
313 \LN{\RGOL{h
}} \rightarrow&
\SEAL{a
} &
\LN{\RGOL{h
}} &P_
{STOP
}(
\neg stop|h, left, non
\text{-
}adj)
\cdot P_
{ATTACH
}(a|h, left) \\
315 \SEAL{h
} \rightarrow& STOP &
\RGOL{h
} &P_
{STOP
}(stop|h, left, adj) \\
317 \SEAL{h
} \rightarrow& STOP &
\LN{\RGOL{h
}} &P_
{STOP
}(stop|h, left, non
\text{-
}adj) \\
319 \caption{Alternate CFG rules (where a child node has an arrow below,
320 we use non-adjacent probabilities), defined for all words/POS-tags
321 $h$.
}\label{cnf-like
}
324 The inside probabilities are the same as those given in
325 \citet{lari-csl90
}, with the following exceptions:
327 When calculating $P_
{INSIDE
}(
\SMTR{h
}, i, j)$ and summing through possible
328 rules which rewrite $
\SMTR{h
}$, if a rule is of the form $
\SMTR{h
} \rightarrow
329 STOP ~
\SDTR{h
}$ or $
\SMTR{h
} \rightarrow \SDTR{h
} ~ STOP$, we add $P_
{RULE
}\cdot
330 P_
{INSIDE
}(
\SDTR{h
}, i, j)$ (that is, rewrite for the same sentence range);
331 and, as a consequence of these unary rules: for ``terminal rules''
332 ($P_
{ORDER
}$) to be applicable, not only must $i = j-
1$, but also the
333 left-hand side symbol of the rule must be of the form $
\GOR{h
}$.
335 Similarly, the outside probabilities are the same as those for pure
336 CNF rules, with the exception that we add the unary rewrite
339 \sum_{\SMTR{h
}} [&P_
{OUTSIDE
}(
\SMTR{h
},i,j)
\cdot P_
{RULE
}(
\SMTR{h
} \rightarrow \SDTR{h
} ~ STOP) \\
340 + &P_
{OUTSIDE
}(
\SMTR{h
},i,j)
\cdot P_
{RULE
}(
\SMTR{h
} \rightarrow STOP ~
\SDTR{h
})
]
342 to $P_
{OUTSIDE
}(
\SDTR{h
},i,j)$ (eg. $f(s,t,i)$).
344 The reestimation just needs to be expanded to allow unary stop rules,
345 this is similar to the formula for binary rules in
346 \citet[p.~
41]{lari-csl90
}:
348 \hat{P
}_
{RULE
}(
\SMTR{h
} \rightarrow \SDTR{h
}) =
\frac
349 { \sum_{s
\in S
} \sum_{i<len(s)
} \sum_{j>i
} 1/P_s
\cdot P_
{RULE
}(
\SMTR{h
}\rightarrow \SDTR{h
})P_
{INSIDE
}(
\SDTR{h
}, i, j)P_
{OUTSIDE
}(
\SMTR{h
}, i, j)
}
350 { \sum_{s
\in S
} \sum_{i<len(s)
} \sum_{j>i
} c_s(
\SMTR{h
}, i, j)
}
352 (the denominator is unchanged, but the numerator sums $w_s$ defined
353 for unary rules; $
\SMTR{h
}\rightarrow \SDTR{h
}$ is meant to include both left and
356 % todo: add ROOT rule to CNF grammar?
359 \section{Pure CNF rules
}
360 Alternatively, one may use the ``pure CNF'' grammar of figure
361 \ref{cnf-T
} and figure
\ref{cnf-NT
}, below (not yet
362 implemented). Using the harmonic distribution to initialize this will
363 still yield a mass-deficient grammar, but at least this allows the
364 regular IO algorithm to be run.
368 \begin{tabular
} % four left-aligned math tabs, one vertical line
369 { >
{$
}l<
{$
} >
{$
}l<
{$
} >
{$
}l<
{$
} | >
{$
}l<
{$
} }
370 \multicolumn{3}{c
}{Rule
} &
\multicolumn{1}{c
}{$P_
{RULE
}$ ($b
[i,m
]$ in
\citet{lari-csl90
})
}\\
373 h
\rightarrow&
\text{``h''
}& &
1 \\
375 \SEAL{h
} \rightarrow&
\text{``h''
}& &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, adj) \\
377 ROOT
\rightarrow&
\text{``h''
}& &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, adj)
\cdot P_
{ROOT
}(h) \\
379 \caption{Terminals of pure CNF rules, defined for all POS-tags
385 \begin{tabular
} % four left-aligned math tabs, one vertical line
386 { >
{$
}l<
{$
} >
{$
}l<
{$
} >
{$
}l<
{$
} | >
{$
}l<
{$
} }
387 \multicolumn{3}{c
}{Rule
} &
\multicolumn{1}{c
}{$P_
{RULE
}$ ($a
[i,j,k
]$ in
\citet{lari-csl90
})
}\\
390 \SEAL{h
} \rightarrow& h &
\SEAL{a
} &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
391 &&&
\cdot P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, adj) \\
[3.5pt
]
392 \SEAL{h
} \rightarrow& h &
\GOR{.h
} &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
[3.5pt
]
393 \GOR{.h
} \rightarrow&
\SEAL{a
} &
\SEAL{b
} &P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, adj) \\
394 &&&
\cdot P_
{ATTACH
}(b|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, non
\text{-
}adj) \\
[3.5pt
]
395 \GOR{.h
} \rightarrow&
\SEAL{a
} &
\GOR{h
} &P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, adj) \\
[3.5pt
]
396 \GOR{h
} \rightarrow&
\SEAL{a
} &
\SEAL{b
} &P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, non
\text{-
}adj) \\
397 &&&
\cdot P_
{ATTACH
}(b|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, non
\text{-
}adj) \\
[3.5pt
]
398 \GOR{h
} \rightarrow&
\SEAL{a
} &
\GOR{h
} &P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, non
\text{-
}adj) \\
[3.5pt
]
399 \SEAL{h
} \rightarrow&
\SEAL{a
} &h &P_
{STOP
}(stop|h, left, non
\text{-
}adj)
\cdot P_
{STOP
}(stop|h, right, adj) \\
400 &&&
\cdot P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj) \\
[3.5pt
]
401 \SEAL{h
} \rightarrow&
\GOL{h.
} &h &P_
{STOP
}(stop|h, left, non
\text{-
}adj)
\cdot P_
{STOP
}(stop|h, right, adj) \\
[3.5pt
]
402 \GOL{h.
} \rightarrow&
\SEAL{b
} &
\SEAL{a
} &P_
{ATTACH
}(b|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, non
\text{-
}adj) \\
403 &&&
\cdot P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj) \\
[3.5pt
]
404 \GOL{h.
} \rightarrow&
\GOL{h
} &
\SEAL{a
} &P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj) \\
[3.5pt
]
405 \GOL{h
} \rightarrow&
\SEAL{a
} &
\SEAL{b
} &P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, non
\text{-
}adj) \\
406 &&&
\cdot P_
{ATTACH
}(b|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, non
\text{-
}adj) \\
[3.5pt
]
407 \GOL{h
} \rightarrow&
\GOL{h
} &
\SEAL{a
} &P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, non
\text{-
}adj) \\
[3.5pt
]
408 \SEAL{h
} \rightarrow&
\GOL{h.
} &
\SEAL{\GOR{h
}} &P_
{STOP
}(stop|h, left, non
\text{-
}adj) \\
[3.5pt
]
409 \SEAL{h
} \rightarrow&
\SEAL{a
} &
\SEAL{\GOR{h
}} &P_
{STOP
}(stop|h, left, non
\text{-
}adj) \\
410 &&&
\cdot P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj) \\
[3.5pt
]
411 \SEAL{\GOR{h
}} \rightarrow& h &
\GOR{.h
} &P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
[3.5pt
]
412 \SEAL{\GOR{h
}} \rightarrow& h &
\SEAL{a
} &P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
413 &&&
\cdot P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, adj) \\
[3.5pt
]
414 ROOT
\rightarrow& h &
\SEAL{a
} &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, non
\text{-
}adj) \\
415 &&&
\cdot P_
{ATTACH
}(a|h, right)
\cdot P_
{STOP
}(
\neg stop|h, right, adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
416 ROOT
\rightarrow& h &
\GOR{.h
} &P_
{STOP
}(stop|h, left, adj)
\cdot P_
{STOP
}(stop|h, right, non
\text{-
}adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
417 ROOT
\rightarrow&
\SEAL{a
} &h &P_
{STOP
}(stop|h, left, non
\text{-
}adj)
\cdot P_
{STOP
}(stop|h, right, adj) \\
418 &&&
\cdot P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
419 ROOT
\rightarrow&
\GOL{h.
} &h &P_
{STOP
}(stop|h, left, non
\text{-
}adj)
\cdot P_
{STOP
}(stop|h, right, adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
420 ROOT
\rightarrow&
\GOL{h.
} &
\SEAL{\GOR{h
}} &P_
{STOP
}(stop|h, left, non
\text{-
}adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
421 ROOT
\rightarrow&
\SEAL{a
} &
\SEAL{\GOR{h
}} &P_
{STOP
}(stop|h, left, non
\text{-
}adj) \\
422 &&&
\cdot P_
{ATTACH
}(a|h, left)
\cdot P_
{STOP
}(
\neg stop|h, left, adj)
\cdot P_
{ROOT
}(h) \\
[3.5pt
]
424 \caption{Non-terminals of pure CNF rules, defined for all POS-tags
425 $h$, $a$ and $b$.
}\label{cnf-NT
}
431 \bibliography{./statistical.bib
}
432 \bibliographystyle{plainnat
}