grafthistory: support curl
[elinks/elinks-j605.git] / src / document / css / parser.c
blob23da6dd516498e41caf044630931c2a6cbe8b5eb
1 /* CSS main parser */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <stdlib.h>
8 #include <string.h>
10 #include "elinks.h"
12 #include "document/css/parser.h"
13 #include "document/css/property.h"
14 #include "document/css/scanner.h"
15 #include "document/css/stylesheet.h"
16 #include "document/css/value.h"
17 #include "document/html/parser.h"
18 #include "util/color.h"
19 #include "util/lists.h"
20 #include "util/error.h"
21 #include "util/memory.h"
22 #include "util/string.h"
24 /* #define DEBUG_CSS */
27 void
28 css_parse_properties(struct list_head *props, struct scanner *scanner)
30 assert(props && scanner);
32 while (scanner_has_tokens(scanner)) {
33 struct css_property_info *property_info = NULL;
34 struct css_property *prop;
35 struct scanner_token *token = get_scanner_token(scanner);
36 int i;
38 if (!token || token->type == '}') break;
40 /* Extract property name. */
42 if (token->type != CSS_TOKEN_IDENT
43 || !check_next_scanner_token(scanner, ':')) {
44 /* Some use style="{ properties }" so we have to be
45 * check what to skip to. */
46 if (token->type == '{') {
47 skip_scanner_token(scanner);
48 } else {
49 skip_css_tokens(scanner, ';');
51 continue;
54 for (i = 0; css_property_info[i].name; i++) {
55 struct css_property_info *info = &css_property_info[i];
57 if (scanner_token_strlcasecmp(token, info->name, -1)) {
58 property_info = info;
59 break;
63 /* Skip property name and separator and check for expression */
64 if (!skip_css_tokens(scanner, ':')) {
65 assert(!scanner_has_tokens(scanner));
66 break;
69 if (!property_info) {
70 /* Unknown property, check the next one. */
71 goto ride_on;
74 /* We might be on track of something, cook up the struct. */
76 prop = mem_calloc(1, sizeof(*prop));
77 if (!prop) {
78 goto ride_on;
80 prop->type = property_info->type;
81 prop->value_type = property_info->value_type;
82 if (!css_parse_value(property_info, &prop->value, scanner)) {
83 mem_free(prop);
84 goto ride_on;
86 add_to_list(*props, prop);
88 /* Maybe we have something else to go yet? */
90 ride_on:
91 skip_css_tokens(scanner, ';');
96 /* TODO: We should handle support for skipping blocks better like "{ { } }"
97 * will be handled correctly. --jonas */
98 #define skip_css_block(scanner) \
99 if (skip_css_tokens(scanner, '{')) skip_css_tokens(scanner, '}');
102 /* Atrules grammer:
104 * media_types:
105 * <empty>
106 * | <ident>
107 * | media_types ',' <ident>
109 * atrule:
110 * '@charset' <string> ';'
111 * | '@import' <string> media_types ';'
112 * | '@import' <uri> media_types ';'
113 * | '@media' media_types '{' ruleset* '}'
114 * | '@page' <ident>? [':' <ident>]? '{' properties '}'
115 * | '@font-face' '{' properties '}'
117 static void
118 css_parse_atrule(struct css_stylesheet *css, struct scanner *scanner,
119 struct uri *base_uri)
121 struct scanner_token *token = get_scanner_token(scanner);
123 /* Skip skip skip that code */
124 switch (token->type) {
125 case CSS_TOKEN_AT_IMPORT:
126 token = get_next_scanner_token(scanner);
127 if (!token) break;
129 if (token->type == CSS_TOKEN_STRING
130 || token->type == CSS_TOKEN_URL) {
131 assert(css->import);
132 css->import(css, base_uri, token->string, token->length);
134 skip_css_tokens(scanner, ';');
135 break;
137 case CSS_TOKEN_AT_CHARSET:
138 skip_css_tokens(scanner, ';');
139 break;
141 case CSS_TOKEN_AT_FONT_FACE:
142 case CSS_TOKEN_AT_MEDIA:
143 case CSS_TOKEN_AT_PAGE:
144 skip_css_block(scanner);
145 break;
147 case CSS_TOKEN_AT_KEYWORD:
148 /* TODO: Unkown @-rule so either skip til ';' or next block. */
149 while (scanner_has_tokens(scanner)) {
150 token = get_next_scanner_token(scanner);
152 if (!token) break;
154 if (token->type == ';') {
155 skip_scanner_token(scanner);
156 break;
158 } else if (token->type == '{') {
159 skip_css_block(scanner);
160 break;
163 break;
164 default:
165 INTERNAL("@-rule parser called without atrule.");
170 struct selector_pkg {
171 LIST_HEAD(struct selector_pkg);
172 struct css_selector *selector;
175 struct css_selector *
176 reparent_selector(struct list_head *sels, struct css_selector *selector,
177 struct css_selector **watch)
179 struct css_selector *twin = find_css_selector(sels, selector->type,
180 selector->relation,
181 selector->name, -1);
183 if (twin) {
184 merge_css_selectors(twin, selector);
185 /* Reparent leaves. */
186 while (selector->leaves.next != &selector->leaves) {
187 struct css_selector *leaf = selector->leaves.next;
189 reparent_selector(&twin->leaves, leaf, watch);
191 if (*watch == selector)
192 *watch = twin;
193 done_css_selector(selector);
194 } else {
195 if (selector->next) del_from_list(selector);
196 add_to_list(*sels, selector);
199 return twin ? twin : selector;
202 /* Our selector grammar:
204 * selector:
205 * element_name? ('#' id)? ('.' class)? (':' pseudo_class)? \
206 * ((' ' | '>') selector)?
209 static void
210 css_parse_selector(struct css_stylesheet *css, struct scanner *scanner,
211 struct list_head *selectors)
213 /* Shell for the last selector (the whole selector chain, that is). */
214 struct selector_pkg *pkg = NULL;
215 /* In 'p#x.y i.z', it's NULL for 'p', 'p' for '#x', '.y' and 'i', and
216 * 'i' for '.z'. */
217 struct css_selector *prev_element_selector = NULL;
218 /* In 'p#x.y:q i', it's NULL for 'p' and '#x', '#x' for '.y', and '.y'
219 * for ':q', and again NULL for 'i'. */
220 struct css_selector *prev_specific_selector = NULL;
221 /* In 'p#x.y div.z:a' it is NULL for 'p#x.y' and 'div', and 'p' for
222 * '.z' and ':a'. So the difference from @prev_element_selector is that
223 * it is changed after the current selector fragment is finished, not
224 * right after the base selector is loaded. So it is set differently
225 * for the '#x.y' and '.z:a' parts of selector. */
226 struct css_selector *last_chained_selector = NULL;
227 /* In 'p#x.y div.z:a, i.b {}', it's set for ':a' and '.b'. */
228 int last_fragment = 0;
229 /* In 'p#x .y', it's set for 'p' and '.y'. Note that it is always set in
230 * the previous iteration so it's valid for the current token only
231 * before "saving" the token. */
232 int selector_start = 1;
234 /* FIXME: element can be even '*' --pasky */
236 while (scanner_has_tokens(scanner)) {
237 struct scanner_token *token = get_scanner_token(scanner);
238 struct scanner_token last_token;
239 struct css_selector *selector;
240 enum css_selector_relation reltype = CSR_ROOT;
241 enum css_selector_type seltype = CST_ELEMENT;
243 assert(token);
244 assert(!last_fragment);
247 if (token->type == '{'
248 || token->type == '}'
249 || token->type == ';')
250 break;
253 /* Examine the selector fragment */
255 if (token->type != CSS_TOKEN_IDENT) {
256 switch (token->type) {
257 case CSS_TOKEN_HASH:
258 case CSS_TOKEN_HEX_COLOR:
259 seltype = CST_ID;
260 reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY;
261 break;
263 case '.':
264 seltype = CST_CLASS;
265 reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY;
266 break;
268 case ':':
269 seltype = CST_PSEUDO;
270 reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY;
271 break;
273 case '>':
274 seltype = CST_ELEMENT;
275 reltype = CSR_PARENT;
276 break;
278 default:
279 /* FIXME: Temporary fix for this weird CSS
280 * precedence thing. ')' has higher than ','
281 * and it can cause problems when skipping
282 * here. The reason is for the function()
283 * parsing. Hmm... --jonas */
284 if (!skip_css_tokens(scanner, ','))
285 skip_scanner_token(scanner);
286 seltype = CST_INVALID;
287 break;
290 if (seltype == CST_INVALID)
291 continue;
293 /* Hexcolor and hash already contains the ident
294 * inside. */
295 if (token->type != CSS_TOKEN_HEX_COLOR
296 && token->type != CSS_TOKEN_HASH) {
297 token = get_next_scanner_token(scanner);
298 if (!token) break;
299 if (token->type != CSS_TOKEN_IDENT) /* wtf */
300 continue;
301 } else {
302 /* Skip the leading '#'. */
303 token->string++, token->length--;
306 } else {
307 if (pkg) reltype = CSR_ANCESTOR;
311 /* Look ahead at what's coming next */
313 copy_struct(&last_token, token);
314 /* Detect whether upcoming tokens are separated by
315 * whitespace or not (that's important for determining
316 * whether it's a combinator or specificitier). */
317 if (last_token.string + last_token.length < scanner->end) {
318 selector_start = last_token.string[last_token.length];
319 selector_start = (selector_start != '#'
320 && selector_start != '.'
321 && selector_start != ':');
322 } /* else it doesn't matter as we are gonna bail out anyway. */
324 token = get_next_scanner_token(scanner);
325 if (!token) break;
326 last_fragment = (token->type == ',' || token->type == '{');
329 /* Register the selector */
331 if (!pkg) {
332 selector = get_css_base_selector(
333 last_fragment ? css : NULL, seltype,
334 CSR_ROOT,
335 last_token.string, last_token.length);
336 if (!selector) continue;
338 pkg = mem_calloc(1, sizeof(*pkg));
339 if (!pkg) continue;
340 add_to_list(*selectors, pkg);
341 pkg->selector = selector;
343 } else if (reltype == CSR_SPECIFITY) {
344 /* We append under the last fragment. */
345 struct css_selector *base_sel = prev_specific_selector;
347 if (!base_sel) base_sel = prev_element_selector;
348 assert(base_sel);
350 selector = get_css_selector(&base_sel->leaves,
351 seltype, reltype,
352 last_token.string,
353 last_token.length);
354 if (!selector) continue;
356 if (last_chained_selector) {
357 /* The situation is like: 'div p#x', now it was
358 * 'p -> div', but we need to redo that as
359 * '(p ->) #x -> div'. */
360 del_from_list(last_chained_selector);
361 add_to_list(selector->leaves,
362 last_chained_selector);
365 if (pkg->selector == base_sel) {
366 /* This is still just specificitying offspring
367 * of the previous pkg->selector. */
368 pkg->selector = selector;
371 if (last_fragment) {
372 /* This is the last fragment of the selector
373 * chain, that means the last base fragment
374 * wasn't marked so and thus wasn't bound to
375 * the stylesheet. Let's do that now. */
376 assert(prev_element_selector);
377 prev_element_selector->relation = CSR_ROOT;
378 prev_element_selector =
379 reparent_selector(&css->selectors,
380 prev_element_selector,
381 &pkg->selector);
384 } else /* CSR_PARENT || CSR_ANCESTOR */ {
385 /* We - in the perlish speak - unshift in front
386 * of the previous selector fragment and reparent
387 * it to the upcoming one. */
388 selector = get_css_base_selector(
389 last_fragment ? css : NULL, seltype,
390 CSR_ROOT,
391 last_token.string, last_token.length);
392 if (!selector) continue;
394 assert(prev_element_selector);
395 add_to_list(selector->leaves, prev_element_selector);
396 last_chained_selector = prev_element_selector;
398 prev_element_selector->relation = reltype;
402 /* Record the selector fragment for future generations */
404 if (reltype == CSR_SPECIFITY) {
405 prev_specific_selector = selector;
406 } else {
407 prev_element_selector = selector;
408 prev_specific_selector = NULL;
412 /* What to do next */
414 if (last_fragment) {
415 /* Next selector coming, clean up. */
416 pkg = NULL; last_fragment = 0; selector_start = 1;
417 prev_element_selector = NULL;
418 prev_specific_selector = NULL;
419 last_chained_selector = NULL;
422 if (token->type == ',') {
423 /* Another selector hooked to these properties. */
424 skip_scanner_token(scanner);
426 } else if (token->type == '{') {
427 /* End of selector list. */
428 break;
430 } /* else Another selector fragment probably coming up. */
433 /* Wipe the selector we were currently composing, if any. */
434 if (pkg) {
435 if (prev_element_selector)
436 done_css_selector(prev_element_selector);
437 del_from_list(pkg);
438 mem_free(pkg);
443 /* Ruleset grammar:
445 * ruleset:
446 * selector [ ',' selector ]* '{' properties '}'
448 static void
449 css_parse_ruleset(struct css_stylesheet *css, struct scanner *scanner)
451 INIT_LIST_HEAD(selectors);
452 INIT_LIST_HEAD(properties);
453 struct selector_pkg *pkg;
455 css_parse_selector(css, scanner, &selectors);
456 if (list_empty(selectors)
457 || !skip_css_tokens(scanner, '{')) {
458 if (!list_empty(selectors)) free_list(selectors);
459 skip_css_tokens(scanner, '}');
460 return;
464 /* We don't handle the case where a property has already been added to
465 * a selector. That doesn't matter though, because the best one will be
466 * always the last one (FIXME: 'important!'), therefore the applier
467 * will take it last and it will have the "final" effect.
469 * So it's only a little waste and no real harm. The thing is, what do
470 * you do when you have 'background: #fff' and then 'background:
471 * x-repeat'? It would require yet another logic to handle merging of
472 * these etc and the induced overhead would in most cases mean more
473 * waste that having the property multiple times in a selector, I
474 * believe. --pasky */
476 pkg = selectors.next;
477 css_parse_properties(&properties, scanner);
479 skip_css_tokens(scanner, '}');
481 /* Mirror the properties to all the selectors. */
482 foreach (pkg, selectors) {
483 #ifdef DEBUG_CSS
484 DBG("Binding properties (!!%d) to selector %s (type %d, relation %d, children %d)",
485 !list_empty(properties),
486 pkg->selector->name, pkg->selector->type,
487 pkg->selector->relation,
488 !list_empty(pkg->selector->leaves));
489 #endif
490 add_selector_properties(pkg->selector, &properties);
492 free_list(selectors);
493 free_list(properties);
497 void
498 css_parse_stylesheet(struct css_stylesheet *css, struct uri *base_uri,
499 unsigned char *string, unsigned char *end)
501 struct scanner scanner;
503 init_scanner(&scanner, &css_scanner_info, string, end);
505 while (scanner_has_tokens(&scanner)) {
506 struct scanner_token *token = get_scanner_token(&scanner);
508 assert(token);
510 switch (token->type) {
511 case CSS_TOKEN_AT_KEYWORD:
512 case CSS_TOKEN_AT_CHARSET:
513 case CSS_TOKEN_AT_FONT_FACE:
514 case CSS_TOKEN_AT_IMPORT:
515 case CSS_TOKEN_AT_MEDIA:
516 case CSS_TOKEN_AT_PAGE:
517 css_parse_atrule(css, &scanner, base_uri);
518 break;
520 default:
521 /* And WHAT ELSE could it be?! */
522 css_parse_ruleset(css, &scanner);
525 #ifdef DEBUG_CSS
526 dump_css_selector_tree(&css->selectors);
527 WDBG("That's it.");
528 #endif