Version 6.1.0.2, tag libreoffice-6.1.0.2
[LibreOffice.git] / writerfilter / documentation / ooxml / model.rng
bloba29e1499205ab933c627378ca330f221e5a183b0
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!--
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 -->
10 <!--
11 This file is both a relax-ng schema for writerfilter/source/ooxml/model.xml and
12 documentation for that file. The schema has two parts:
14 - first part: a subset of the relax-ng grammar to define *what* we expect as
15 the input in a DOCX file
16 - second part: additional annotation on top of that to define *how* to handle
17 that expected input
18 -->
19 <grammar xmlns="http://relaxng.org/ns/structure/1.0">
20 <!--
21 First part: a subset of the relax-ng XML markup.
23 The order of elements in this part follow a bottom-up approach.
24 -->
26 <!-- Basic building blocks: element, attribute and their contents. -->
28 <!--
29 Describes an XML element.
31 Example:
33 <element name="charset">
34 <ref name="CT_Charset"/>
35 </element>
36 -->
37 <define name="element-element">
38 <element name="element" ns="http://relaxng.org/ns/structure/1.0">
39 <optional>
40 <attribute name="name"/>
41 </optional>
42 <oneOrMore>
43 <choice>
44 <ref name="attribute-element"/>
45 <ref name="data-element"/>
46 <ref name="ref-element"/>
47 </choice>
48 </oneOrMore>
49 </element>
50 </define>
52 <!--
53 Describes an attribute.
55 Example:
57 <attribute name="name">
58 <data type="string"/>
59 </attribute>
60 -->
61 <define name="attribute-element">
62 <element name="attribute" ns="http://relaxng.org/ns/structure/1.0">
63 <optional>
64 <attribute name="name"/>
65 </optional>
66 <zeroOrMore>
67 <choice>
68 <ref name="data-element"/>
69 <ref name="ref-element"/>
70 </choice>
71 </zeroOrMore>
72 </element>
73 </define>
75 <!--
76 Describes the type of the data contained in an attribute. Possible values:
77 boolean, integer or string. See also <text>.
78 -->
79 <define name="data-element">
80 <element name="data" ns="http://relaxng.org/ns/structure/1.0">
81 <attribute name="type"/>
82 </element>
83 </define>
85 <!--
86 Describes an enumeration element: a possible value for an attribute.
87 -->
88 <define name="value-element">
89 <element name="value" ns="http://relaxng.org/ns/structure/1.0">
90 <text/>
91 </element>
92 </define>
94 <!--
95 This element is ignored during parsing, it just helps readability.
97 Example:
99 <choice>
100 <value>true</value>
101 <value>false</value>
102 </choice>
104 <define name="choice-element">
105 <element name="choice" ns="http://relaxng.org/ns/structure/1.0">
106 <oneOrMore>
107 <choice>
108 <ref name="data-element"/>
109 <ref name="element-element"/>
110 <ref name="ref-element"/>
111 <ref name="value-element"/>
112 </choice>
113 </oneOrMore>
114 </element>
115 </define>
117 <!-- Grouping elements: define and grammar. -->
119 <!--
120 A define is named definition of its contents, so that multiple <ref> elements
121 can refer to it, to avoid copy&paste. OOXML named (complex and simple) types
122 are described using defines.
124 <define name="define-element">
125 <element name="define" ns="http://relaxng.org/ns/structure/1.0">
126 <attribute name="name"/>
127 <oneOrMore>
128 <choice>
129 <ref name="choice-element"/>
130 <ref name="attribute-element"/>
131 <ref name="element-element"/>
132 <ref name="data-element"/>
133 <ref name="ref-element"/>
134 <empty/>
135 </choice>
136 </oneOrMore>
137 </element>
138 </define>
140 <!--
141 A reference to a define.
143 <define name="ref-element">
144 <element name="ref" ns="http://relaxng.org/ns/structure/1.0">
145 <attribute name="name"/>
146 </element>
147 </define>
149 <!--
150 A grammar is a set of defines, one grammar is equivalent to one .xsd file
151 from the OOXML spec.
153 <define name="grammar-element">
154 <element name="grammar" ns="http://relaxng.org/ns/structure/1.0">
155 <attribute name="ns"/>
156 <optional>
157 <attribute name="attributeFormDefault"/>
158 </optional>
159 <zeroOrMore>
160 <ref name="include-element"/>
161 </zeroOrMore>
162 <oneOrMore>
163 <ref name="define-element"/>
164 </oneOrMore>
165 </element>
166 </define>
168 <!--
169 Controls the resolution of <ref> elements. The order is:
171 - the current grammar
172 - included grammars, if there are any
173 - the first define in the whole model
175 <define name="include-element">
176 <element name="include" ns="http://relaxng.org/ns/structure/1.0">
177 <attribute name="href"/>
178 </element>
179 </define>
181 <!--
182 Second part: custom markup, building on top of the first one.
184 The order of elements in this part follow a top-down approach.
186 The output of the code generated from these elements is a token stream. There
187 are two types of tokens: SPRM tokens and attribute ones. SPRM refers to
188 Single PRoperty Modifier, in this context it means a token that contains other
189 tokens. It's used to represent an XML element. That means that SPRM tokens
190 can contain other SPRM tokens, and also attribute tokens, while attribute
191 tokens only contain simple types (boolean, integer, string).
193 More terminology: the types in the OOXML schema have two typical prefixes:
195 - CT_something: complex type, used to describe an XML element
196 - ST_something: simple type, used to describe the contents of an attribute
198 For tokens the following abbreviations are used:
200 - NS_something: namespace
201 - LN_something: local name
204 <!--
205 The model element is the toplevel container for the XML element /
206 attribute mapping definition. It contains namespace aliases, direct token
207 definitions and mapping definitions for each namespace.
209 <define name="model-element">
210 <element name="model">
211 <oneOrMore>
212 <ref name="token-element"/>
213 </oneOrMore>
214 <oneOrMore>
215 <ref name="namespace-element"/>
216 </oneOrMore>
217 </element>
218 </define>
220 <!--
221 A token element can explicitly define a token. This allows generating
222 such a token in the tokenizers and handling it in the domain mapper. Ideally
223 tokens are *not* defined this way, they are mapped to an XML element or
224 attribute from the OOXML specification.
226 <define name="token-element">
227 <element name="token">
228 <!--
229 The token name must be ooxml:something, then in C++ it'll be the
230 NS_ooxml::LN_something ("OOXML namespace, something local name")
231 constant.
233 <attribute name="tokenid"/>
234 </element>
235 </define>
237 <!--
238 A namespace element is a container for a subset of the relax-ng grammar
239 of a part of the OOXML specification. It also contains the resource
240 definitions, which specify how XML elements and attributes are mapped to
241 tokens.
243 <define name="namespace-element">
244 <element name="namespace">
245 <attribute name="name"/>
246 <zeroOrMore>
247 <ref name="start-element"/>
248 </zeroOrMore>
249 <ref name="grammar-element"/>
250 <zeroOrMore>
251 <ref name="resource-element"/>
252 </zeroOrMore>
253 </element>
254 </define>
256 <!--
257 A start element is similar to the relax-ng start element, but this one has a
258 name attribute to refer to a define, while the relax-ng one has a ref child
259 element to do the same.
261 <define name="start-element">
262 <element name="start">
263 <attribute name="name"/>
264 </element>
265 </define>
267 <!--
268 A resource element always matches (by its name attribute) a define from the
269 grammar of the namespace. It describes how that (simple or complex) type is
270 parsed during import.
272 Example:
274 <resource name="CT_Font" resource="Properties">
276 </resource>
280 <resource name="CT_OMathPara" resource="Stream"/>
282 <define name="resource-element">
283 <element name="resource">
284 <!-- There should be a define element with the same name attribute. -->
285 <attribute name="name"/>
286 <!--
287 This means the resource element will be handled by the
288 OOXMLFastContextHandler<resource> class.
290 The two most important resources:
292 - Properties: this maps elements/attributes to SPRM/attribute tokens
293 - Stream: If the element itself does not require any special handling,
294 but the subelemenents are interesting, use this resource. If no
295 explicit resource element is available, then a null context will be
296 created and the element and all its subelements will be ignored.
298 <attribute name="resource"/>
299 <optional>
300 <attribute name="tokenid"/>
301 </optional>
302 <zeroOrMore>
303 <choice>
304 <ref name="resource-element-element"/>
305 <ref name="resource-attribute-element"/>
306 <ref name="resource-value-element"/>
307 <ref name="resource-action-element"/>
308 </choice>
309 </zeroOrMore>
310 </element>
311 </define>
313 <!--
314 The <element> child of a <resource> defines what element name will be handled
315 via what token.
317 Example:
319 <element name="charset" tokenid="ooxml:CT_Font_charset"/>
321 Means the <charset> element will be handled in the sprm() function of the handler
322 class as a NS_ooxml::LN_CT_Font_charset case. (sprm() is a logging wrapper
323 around lcl_sprm(), which is the real implementation.)
325 <define name="resource-element-element">
326 <element name="element">
327 <attribute name="name"/>
328 <attribute name="tokenid"/>
329 </element>
330 </define>
332 <!--
333 The <attribute> child of a <resource> defines what attribute name will be
334 handled via what token.
336 Example:
338 <attribute name="name" tokenid="ooxml:CT_Font_name"/>
340 Means the <name> attribute will be handled in the attribute() (real
341 implementation in lcl_attribute()) function of the handler class as a
342 NS_ooxml::LN_CT_Font_name case.
344 <define name="resource-attribute-element">
345 <element name="attribute">
346 <attribute name="name"/>
347 <optional>
348 <attribute name="tokenid"/>
349 </optional>
350 <optional>
351 <attribute name="action"/>
352 </optional>
353 </element>
354 </define>
356 <!--
357 A <value> inside a <resource> defines how to map the string data of a value
358 to a token. The tokenid attribute defines the token name, the text of the
359 element defines the string. This is useful in case the value of an attribute
360 is a choice from a predefined list.
362 <define name="resource-value-element">
363 <element name="value">
364 <attribute name="tokenid"/>
365 <text/>
366 </element>
367 </define>
369 <!--
370 An <action> inside a <resource> can perform additional actions in the
371 following situations:
373 - start of the element
374 - end of the element
375 - character data of the element
377 The tokenid attribute restricts the action to a particular element.
379 Example:
381 <resource name="CT_TxbxContent" resource="Stream">
382 <action name="start" action="startTxbxContent"/>
383 <action name="end" action="endTxbxContent"/>
384 </resource>
386 That means that when:
388 - <txbxContent> starts, OOXMLFastContextHandler::startTxbxContent() will be called
389 - <txbxContent> ends, OOXMLFastContextHandler::endTxbxContent() will be called
391 <define name="resource-action-element">
392 <element name="action">
393 <attribute name="name"/>
394 <attribute name="action"/>
395 <optional>
396 <attribute name="tokenid"/>
397 </optional>
398 <optional>
399 <attribute name="sendtokenid"/>
400 </optional>
401 <optional>
402 <ref name="resource-action-cond-element"/>
403 </optional>
404 </element>
405 </define>
407 <!--
408 Some actions take parameters, which can be defined by the <cond> element.
410 Example:
412 <resource name="CT_FldChar" resource="Stream">
413 <action name="start" action="fieldstart">
414 <cond tokenid="ooxml:CT_FldChar_fldCharType" value="ooxml:Value_ST_FldCharType_begin"/>
415 </action>
416 </resource>
418 That means:
420 - if the <fldChar> starts with an fldCharType attribute being "begin"
421 - then perform the "fieldstart" action.
423 <define name="resource-action-cond-element">
424 <element name="cond">
425 <attribute name="tokenid"/>
426 <attribute name="value"/>
427 </element>
428 </define>
430 <!-- The entry point of the schema. -->
431 <start>
432 <ref name="model-element"/>
433 </start>
434 </grammar>
435 <!-- vim: ft=xml shiftwidth=2 softtabstop=2 expandtab: