rewrite: update default dumb and smart prefixes
[elinks/elinks-j605.git] / src / dom / configuration.c
blobe722ebf76698cddca42a38edc6a459391a1a35fe
1 /* DOM Configuration */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include "elinks.h"
9 #include "dom/configuration.h"
10 #include "dom/node.h"
11 #include "dom/stack.h"
12 #include "dom/string.h"
15 static enum dom_code
16 normalize_text_node_whitespace(struct dom_node *node)
18 unsigned char buf[256];
19 struct dom_string string = INIT_DOM_STRING(NULL, 0);
20 int count = 0, i = 0;
21 unsigned char *text = node->string.string;
23 assert(node->type == DOM_NODE_TEXT);
25 while (i < node->string.length) {
26 int j;
28 for (j = 0; j < sizeof(buf) && i < node->string.length; i++) {
29 unsigned char data = text[i];
31 if (isspace(data)) {
32 if (count == 1)
33 continue;
35 data = ' ';
36 count = 1;
38 } else {
39 count = 0;
42 buf[j++] = data;
45 if (!add_to_dom_string(&string, buf, j)) {
46 done_dom_string(&string);
47 return DOM_CODE_ALLOC_ERR;
51 if (node->allocated)
52 done_dom_string(&node->string);
54 set_dom_string(&node->string, string.string, string.length);
55 node->allocated = 1;
57 return DOM_CODE_OK;
61 static enum dom_code
62 append_node_text(struct dom_config *config, struct dom_node *node)
64 struct dom_node *prev = get_dom_node_prev(node);
65 size_t length;
66 struct dom_string dest;
67 struct dom_string src;
68 int error = 0;
70 copy_struct(&src, &node->string);
72 if (!prev || prev->type != DOM_NODE_TEXT) {
73 /* Preserve text nodes with no one to append to. */
74 if (node->type == DOM_NODE_TEXT)
75 return DOM_CODE_OK;
77 prev = NULL;
78 set_dom_string(&dest, NULL, 0);
80 } else {
81 if (prev->allocated) {
82 copy_struct(&dest, &prev->string);
83 } else {
84 set_dom_string(&dest, NULL, 0);
85 if (!add_to_dom_string(&dest, prev->string.string, prev->string.length))
86 return DOM_CODE_ALLOC_ERR;
87 set_dom_string(&prev->string, dest.string, dest.length);
88 prev->allocated = 1;
92 length = dest.length;
94 switch (node->type) {
95 case DOM_NODE_CDATA_SECTION:
96 case DOM_NODE_TEXT:
97 if (!add_to_dom_string(&dest, src.string, src.length))
98 error = 1;
99 break;
101 case DOM_NODE_ENTITY_REFERENCE:
102 /* FIXME: Until we will have uniform encoding at this point
103 * (UTF-8) we just add the entity reference unexpanded assuming
104 * that convert_string() will eventually do the work of
105 * expanding it. */
106 if (!add_to_dom_string(&dest, "&", 1)
107 || !add_to_dom_string(&dest, src.string, src.length)
108 || !add_to_dom_string(&dest, ";", 1)) {
109 error = 1;
111 break;
113 default:
114 INTERNAL("Cannot append from node %d", node->type);
117 if (error) {
118 if (prev)
119 prev->string.length = length;
120 else
121 done_dom_string(&dest);
122 return DOM_CODE_ALLOC_ERR;
125 if (prev) {
126 copy_struct(&prev->string, &dest);
128 if ((config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE)
129 && node->type != DOM_NODE_ENTITY_REFERENCE) {
130 /* XXX: Ignore errors since we want to always
131 * free the appended node at this point. */
132 normalize_text_node_whitespace(prev);
135 return DOM_CODE_FREE_NODE;
137 } else {
138 int was_cdata_section = node->type == DOM_NODE_CDATA_SECTION;
140 node->type = DOM_NODE_TEXT;
141 memset(&node->data, 0, sizeof(node->data));
142 node->allocated = 1;
143 copy_struct(&node->string, &dest);
145 if ((config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE)
146 && was_cdata_section) {
147 /* XXX: Ignore errors since we want to always ok the
148 * append. */
149 normalize_text_node_whitespace(node);
152 return DOM_CODE_OK;
156 static enum dom_code
157 dom_normalize_node_end(struct dom_stack *stack, struct dom_node *node, void *data)
159 struct dom_config *config = stack->current->data;
160 enum dom_code code = DOM_CODE_OK;
162 switch (node->type) {
163 case DOM_NODE_ELEMENT:
164 if ((config->flags & DOM_CONFIG_UNKNOWN)
165 && !node->data.element.type) {
166 /* Drop elements that are not known from the built-in
167 * node info. */
168 code = DOM_CODE_FREE_NODE;
170 break;
172 case DOM_NODE_ATTRIBUTE:
173 if ((config->flags & DOM_CONFIG_UNKNOWN)
174 && !node->data.attribute.type) {
175 /* Drop elements that are not known from the built-in
176 * node info. */
177 code = DOM_CODE_FREE_NODE;
179 break;
181 case DOM_NODE_PROCESSING_INSTRUCTION:
182 if ((config->flags & DOM_CONFIG_UNKNOWN)
183 && !node->data.proc_instruction.type) {
184 /* Drop elements that are not known from the built-in
185 * node info. */
186 code = DOM_CODE_FREE_NODE;
188 break;
190 case DOM_NODE_TEXT:
191 if (!(config->flags & DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE)
192 && node->data.text.only_space) {
193 /* Discard all Text nodes that contain
194 * whitespaces in element content]. */
195 code = DOM_CODE_FREE_NODE;
196 } else {
197 code = append_node_text(config, node);
199 break;
201 case DOM_NODE_COMMENT:
202 if (!(config->flags & DOM_CONFIG_COMMENTS)) {
203 /* Discard all comments. */
204 code = DOM_CODE_FREE_NODE;
206 break;
208 case DOM_NODE_CDATA_SECTION:
209 if (!(config->flags & DOM_CONFIG_CDATA_SECTIONS)) {
210 /* Transform CDATASection nodes into Text nodes. The new Text
211 * node is then combined with any adjacent Text node. */
212 code = append_node_text(config, node);
214 break;
216 case DOM_NODE_ENTITY_REFERENCE:
217 if (!(config->flags & DOM_CONFIG_ENTITIES)) {
218 /* Remove all EntityReference nodes from the document,
219 * putting the entity expansions directly in their place. Text
220 * nodes are normalized. Only unexpanded entity references are
221 * kept in the document. */
222 code = append_node_text(config, node);
224 break;
226 case DOM_NODE_DOCUMENT:
227 break;
229 default:
230 break;
233 return code;
236 enum dom_code
237 dom_normalize_text(struct dom_stack *stack, struct dom_node *node, void *data)
239 struct dom_config *config = stack->current->data;
241 if (config->flags & DOM_CONFIG_NORMALIZE_WHITESPACE) {
242 /* Normalize whitespace in the text. */
243 return normalize_text_node_whitespace(node);
246 return DOM_CODE_OK;
250 static struct dom_stack_context_info dom_config_normalizer_context = {
251 /* Object size: */ 0,
252 /* Push: */
254 /* */ NULL,
255 /* DOM_NODE_ELEMENT */ NULL,
256 /* DOM_NODE_ATTRIBUTE */ NULL,
257 /* DOM_NODE_TEXT */ dom_normalize_text,
258 /* DOM_NODE_CDATA_SECTION */ NULL,
259 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
260 /* DOM_NODE_ENTITY */ NULL,
261 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
262 /* DOM_NODE_COMMENT */ NULL,
263 /* DOM_NODE_DOCUMENT */ NULL,
264 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
265 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
266 /* DOM_NODE_NOTATION */ NULL,
268 /* Pop: */
270 /* */ NULL,
271 /* DOM_NODE_ELEMENT */ dom_normalize_node_end,
272 /* DOM_NODE_ATTRIBUTE */ dom_normalize_node_end,
273 /* DOM_NODE_TEXT */ dom_normalize_node_end,
274 /* DOM_NODE_CDATA_SECTION */ dom_normalize_node_end,
275 /* DOM_NODE_ENTITY_REFERENCE */ dom_normalize_node_end,
276 /* DOM_NODE_ENTITY */ dom_normalize_node_end,
277 /* DOM_NODE_PROC_INSTRUCTION */ dom_normalize_node_end,
278 /* DOM_NODE_COMMENT */ dom_normalize_node_end,
279 /* DOM_NODE_DOCUMENT */ dom_normalize_node_end,
280 /* DOM_NODE_DOCUMENT_TYPE */ dom_normalize_node_end,
281 /* DOM_NODE_DOCUMENT_FRAGMENT */ dom_normalize_node_end,
282 /* DOM_NODE_NOTATION */ dom_normalize_node_end,
286 struct dom_config *
287 add_dom_config_normalizer(struct dom_stack *stack, struct dom_config *config,
288 enum dom_config_flag flags)
290 memset(config, 0, sizeof(*config));
291 config->flags = flags;
293 if (add_dom_stack_context(stack, config, &dom_config_normalizer_context))
294 return config;
296 return NULL;
299 struct dom_config_info {
300 struct dom_string name;
301 enum dom_config_flag flag;
304 #define DOM_CONFIG(name, flag) \
305 { STATIC_DOM_STRING(name), (flag) }
307 static struct dom_config_info dom_config_info[] = {
308 DOM_CONFIG("cdata-sections", DOM_CONFIG_CDATA_SECTIONS),
309 DOM_CONFIG("comments", DOM_CONFIG_COMMENTS),
310 DOM_CONFIG("element-content-whitespace",DOM_CONFIG_ELEMENT_CONTENT_WHITESPACE),
311 DOM_CONFIG("entities", DOM_CONFIG_ENTITIES),
312 DOM_CONFIG("normalize-characters", DOM_CONFIG_NORMALIZE_CHARACTERS),
313 DOM_CONFIG("unknown", DOM_CONFIG_UNKNOWN),
314 DOM_CONFIG("normalize-whitespace", DOM_CONFIG_NORMALIZE_WHITESPACE),
317 static enum dom_config_flag
318 get_dom_config_flag(struct dom_string *name)
320 int i;
322 for (i = 0; i < sizeof_array(dom_config_info); i++)
323 if (!dom_string_casecmp(&dom_config_info[i].name, name))
324 return dom_config_info[i].flag;
326 return 0;
329 enum dom_config_flag
330 parse_dom_config(unsigned char *flaglist, unsigned char separator)
332 enum dom_config_flag flags = 0;
334 while (flaglist) {
335 unsigned char *end = separator ? strchr(flaglist, separator) : NULL;
336 int length = end ? end - flaglist : strlen(flaglist);
337 struct dom_string name = INIT_DOM_STRING(flaglist, length);
339 flags |= get_dom_config_flag(&name);
340 if (end) end++;
341 flaglist = end;
344 return flags;