1 /* DOM Configuration */
9 #include "dom/configuration.h"
11 #include "dom/stack.h"
12 #include "dom/string.h"
16 normalize_text_node_whitespace(struct dom_node
*node
)
18 unsigned char buf
[256];
19 struct dom_string string
= INIT_DOM_STRING(NULL
, 0);
21 unsigned char *text
= node
->string
.string
;
23 assert(node
->type
== DOM_NODE_TEXT
);
25 while (i
< node
->string
.length
) {
28 for (j
= 0; j
< sizeof(buf
) && i
< node
->string
.length
; i
++) {
29 unsigned char data
= text
[i
];
45 if (!add_to_dom_string(&string
, buf
, j
)) {
46 done_dom_string(&string
);
47 return DOM_CODE_ALLOC_ERR
;
52 done_dom_string(&node
->string
);
54 set_dom_string(&node
->string
, string
.string
, string
.length
);
62 append_node_text(struct dom_config
*config
, struct dom_node
*node
)
64 struct dom_node
*prev
= get_dom_node_prev(node
);
66 struct dom_string dest
;
67 struct dom_string src
;
70 copy_struct(&src
, &node
->string
);
72 if (!prev
|| prev
->type
!= DOM_NODE_TEXT
) {
73 /* Preserve text nodes with no one to append to. */
74 if (node
->type
== DOM_NODE_TEXT
)
78 set_dom_string(&dest
, NULL
, 0);
81 if (prev
->allocated
) {
82 copy_struct(&dest
, &prev
->string
);
84 set_dom_string(&dest
, NULL
, 0);
85 if (!add_to_dom_string(&dest
, prev
->string
.string
, prev
->string
.length
))
86 return DOM_CODE_ALLOC_ERR
;
87 set_dom_string(&prev
->string
, dest
.string
, dest
.length
);
95 case DOM_NODE_CDATA_SECTION
:
97 if (!add_to_dom_string(&dest
, src
.string
, src
.length
))
101 case DOM_NODE_ENTITY_REFERENCE
:
102 /* FIXME: Until we will have uniform encoding at this point
103 * (UTF-8) we just add the entity reference unexpanded assuming
104 * that convert_string() will eventually do the work of
106 if (!add_to_dom_string(&dest
, "&", 1)
107 || !add_to_dom_string(&dest
, src
.string
, src
.length
)
108 || !add_to_dom_string(&dest
, ";", 1)) {
114 INTERNAL("Cannot append from node %d", node
->type
);
119 prev
->string
.length
= length
;
121 done_dom_string(&dest
);
122 return DOM_CODE_ALLOC_ERR
;
126 copy_struct(&prev
->string
, &dest
);
128 if ((config
->flags
& DOM_CONFIG_NORMALIZE_WHITESPACE
)
129 && node
->type
!= DOM_NODE_ENTITY_REFERENCE
) {
130 /* XXX: Ignore errors since we want to always
131 * free the appended node at this point. */
132 normalize_text_node_whitespace(prev
);
135 return DOM_CODE_FREE_NODE
;
138 int was_cdata_section
= node
->type
== DOM_NODE_CDATA_SECTION
;
140 node
->type
= DOM_NODE_TEXT
;
141 memset(&node
->data
, 0, sizeof(node
->data
));
143 copy_struct(&node
->string
, &dest
);
145 if ((config
->flags
& DOM_CONFIG_NORMALIZE_WHITESPACE
)
146 && was_cdata_section
) {
147 /* XXX: Ignore errors since we want to always ok the
149 normalize_text_node_whitespace(node
);
157 dom_normalize_node_end(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
159 struct dom_config
*config
= stack
->current
->data
;
160 enum dom_code code
= DOM_CODE_OK
;
162 switch (node
->type
) {
163 case DOM_NODE_ELEMENT
:
164 if ((config
->flags
& DOM_CONFIG_UNKNOWN
)
165 && !node
->data
.element
.type
) {
166 /* Drop elements that are not known from the built-in
168 code
= DOM_CODE_FREE_NODE
;
172 case DOM_NODE_ATTRIBUTE
:
173 if ((config
->flags
& DOM_CONFIG_UNKNOWN
)
174 && !node
->data
.attribute
.type
) {
175 /* Drop elements that are not known from the built-in
177 code
= DOM_CODE_FREE_NODE
;
181 case DOM_NODE_PROCESSING_INSTRUCTION
:
182 if ((config
->flags
& DOM_CONFIG_UNKNOWN
)
183 && !node
->data
.proc_instruction
.type
) {
184 /* Drop elements that are not known from the built-in
186 code
= DOM_CODE_FREE_NODE
;
191 if (!(config
->flags
& DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE
)
192 && node
->data
.text
.only_space
) {
193 /* Discard all Text nodes that contain
194 * whitespaces in element content]. */
195 code
= DOM_CODE_FREE_NODE
;
197 code
= append_node_text(config
, node
);
201 case DOM_NODE_COMMENT
:
202 if (!(config
->flags
& DOM_CONFIG_COMMENTS
)) {
203 /* Discard all comments. */
204 code
= DOM_CODE_FREE_NODE
;
208 case DOM_NODE_CDATA_SECTION
:
209 if (!(config
->flags
& DOM_CONFIG_CDATA_SECTIONS
)) {
210 /* Transform CDATASection nodes into Text nodes. The new Text
211 * node is then combined with any adjacent Text node. */
212 code
= append_node_text(config
, node
);
216 case DOM_NODE_ENTITY_REFERENCE
:
217 if (!(config
->flags
& DOM_CONFIG_ENTITIES
)) {
218 /* Remove all EntityReference nodes from the document,
219 * putting the entity expansions directly in their place. Text
220 * nodes are normalized. Only unexpanded entity references are
221 * kept in the document. */
222 code
= append_node_text(config
, node
);
226 case DOM_NODE_DOCUMENT
:
237 dom_normalize_text(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
239 struct dom_config
*config
= stack
->current
->data
;
241 if (config
->flags
& DOM_CONFIG_NORMALIZE_WHITESPACE
) {
242 /* Normalize whitespace in the text. */
243 return normalize_text_node_whitespace(node
);
250 static struct dom_stack_context_info dom_config_normalizer_context
= {
251 /* Object size: */ 0,
255 /* DOM_NODE_ELEMENT */ NULL
,
256 /* DOM_NODE_ATTRIBUTE */ NULL
,
257 /* DOM_NODE_TEXT */ dom_normalize_text
,
258 /* DOM_NODE_CDATA_SECTION */ NULL
,
259 /* DOM_NODE_ENTITY_REFERENCE */ NULL
,
260 /* DOM_NODE_ENTITY */ NULL
,
261 /* DOM_NODE_PROC_INSTRUCTION */ NULL
,
262 /* DOM_NODE_COMMENT */ NULL
,
263 /* DOM_NODE_DOCUMENT */ NULL
,
264 /* DOM_NODE_DOCUMENT_TYPE */ NULL
,
265 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL
,
266 /* DOM_NODE_NOTATION */ NULL
,
271 /* DOM_NODE_ELEMENT */ dom_normalize_node_end
,
272 /* DOM_NODE_ATTRIBUTE */ dom_normalize_node_end
,
273 /* DOM_NODE_TEXT */ dom_normalize_node_end
,
274 /* DOM_NODE_CDATA_SECTION */ dom_normalize_node_end
,
275 /* DOM_NODE_ENTITY_REFERENCE */ dom_normalize_node_end
,
276 /* DOM_NODE_ENTITY */ dom_normalize_node_end
,
277 /* DOM_NODE_PROC_INSTRUCTION */ dom_normalize_node_end
,
278 /* DOM_NODE_COMMENT */ dom_normalize_node_end
,
279 /* DOM_NODE_DOCUMENT */ dom_normalize_node_end
,
280 /* DOM_NODE_DOCUMENT_TYPE */ dom_normalize_node_end
,
281 /* DOM_NODE_DOCUMENT_FRAGMENT */ dom_normalize_node_end
,
282 /* DOM_NODE_NOTATION */ dom_normalize_node_end
,
287 add_dom_config_normalizer(struct dom_stack
*stack
, struct dom_config
*config
,
288 enum dom_config_flag flags
)
290 memset(config
, 0, sizeof(*config
));
291 config
->flags
= flags
;
293 if (add_dom_stack_context(stack
, config
, &dom_config_normalizer_context
))
299 struct dom_config_info
{
300 struct dom_string name
;
301 enum dom_config_flag flag
;
304 #define DOM_CONFIG(name, flag) \
305 { STATIC_DOM_STRING(name), (flag) }
307 static struct dom_config_info dom_config_info
[] = {
308 DOM_CONFIG("cdata-sections", DOM_CONFIG_CDATA_SECTIONS
),
309 DOM_CONFIG("comments", DOM_CONFIG_COMMENTS
),
310 DOM_CONFIG("element-content-whitespace",DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE
),
311 DOM_CONFIG("entities", DOM_CONFIG_ENTITIES
),
312 DOM_CONFIG("normalize-characters", DOM_CONFIG_NORMALIZE_CHARACTERS
),
313 DOM_CONFIG("unknown", DOM_CONFIG_UNKNOWN
),
314 DOM_CONFIG("normalize-whitespace", DOM_CONFIG_NORMALIZE_WHITESPACE
),
317 static enum dom_config_flag
318 get_dom_config_flag(struct dom_string
*name
)
322 for (i
= 0; i
< sizeof_array(dom_config_info
); i
++)
323 if (!dom_string_casecmp(&dom_config_info
[i
].name
, name
))
324 return dom_config_info
[i
].flag
;
330 parse_dom_config(unsigned char *flaglist
, unsigned char separator
)
332 enum dom_config_flag flags
= 0;
335 unsigned char *end
= separator
? strchr(flaglist
, separator
) : NULL
;
336 int length
= end
? end
- flaglist
: strlen(flaglist
);
337 struct dom_string name
= INIT_DOM_STRING(flaglist
, length
);
339 flags
|= get_dom_config_flag(&name
);