12 #include "document/css/parser.h"
13 #include "document/css/property.h"
14 #include "document/css/scanner.h"
15 #include "document/css/stylesheet.h"
16 #include "document/css/value.h"
17 #include "document/html/parser.h"
18 #include "util/color.h"
19 #include "util/lists.h"
20 #include "util/error.h"
21 #include "util/memory.h"
22 #include "util/string.h"
24 /* #define DEBUG_CSS */
28 css_parse_properties(struct list_head
*props
, struct scanner
*scanner
)
30 assert(props
&& scanner
);
32 while (scanner_has_tokens(scanner
)) {
33 struct css_property_info
*property_info
= NULL
;
34 struct css_property
*prop
;
35 struct scanner_token
*token
= get_scanner_token(scanner
);
38 if (!token
|| token
->type
== '}') break;
40 /* Extract property name. */
42 if (token
->type
!= CSS_TOKEN_IDENT
43 || !check_next_scanner_token(scanner
, ':')) {
44 /* Some use style="{ properties }" so we have to be
45 * check what to skip to. */
46 if (token
->type
== '{') {
47 skip_scanner_token(scanner
);
49 skip_css_tokens(scanner
, ';');
54 for (i
= 0; css_property_info
[i
].name
; i
++) {
55 struct css_property_info
*info
= &css_property_info
[i
];
57 if (scanner_token_strlcasecmp(token
, info
->name
, -1)) {
63 /* Skip property name and separator and check for expression */
64 if (!skip_css_tokens(scanner
, ':')) {
65 assert(!scanner_has_tokens(scanner
));
70 /* Unknown property, check the next one. */
74 /* We might be on track of something, cook up the struct. */
76 prop
= mem_calloc(1, sizeof(*prop
));
80 prop
->type
= property_info
->type
;
81 prop
->value_type
= property_info
->value_type
;
82 if (!css_parse_value(property_info
, &prop
->value
, scanner
)) {
86 add_to_list(*props
, prop
);
88 /* Maybe we have something else to go yet? */
91 skip_css_tokens(scanner
, ';');
96 /* TODO: We should handle support for skipping blocks better like "{ { } }"
97 * will be handled correctly. --jonas */
98 #define skip_css_block(scanner) \
99 if (skip_css_tokens(scanner, '{')) skip_css_tokens(scanner, '}');
107 * | media_types ',' <ident>
110 * '@charset' <string> ';'
111 * | '@import' <string> media_types ';'
112 * | '@import' <uri> media_types ';'
113 * | '@media' media_types '{' ruleset* '}'
114 * | '@page' <ident>? [':' <ident>]? '{' properties '}'
115 * | '@font-face' '{' properties '}'
118 css_parse_atrule(struct css_stylesheet
*css
, struct scanner
*scanner
,
119 struct uri
*base_uri
)
121 struct scanner_token
*token
= get_scanner_token(scanner
);
123 /* Skip skip skip that code */
124 switch (token
->type
) {
125 case CSS_TOKEN_AT_IMPORT
:
126 token
= get_next_scanner_token(scanner
);
129 if (token
->type
== CSS_TOKEN_STRING
130 || token
->type
== CSS_TOKEN_URL
) {
132 css
->import(css
, base_uri
, token
->string
, token
->length
);
134 skip_css_tokens(scanner
, ';');
137 case CSS_TOKEN_AT_CHARSET
:
138 skip_css_tokens(scanner
, ';');
141 case CSS_TOKEN_AT_FONT_FACE
:
142 case CSS_TOKEN_AT_MEDIA
:
143 case CSS_TOKEN_AT_PAGE
:
144 skip_css_block(scanner
);
147 case CSS_TOKEN_AT_KEYWORD
:
148 /* TODO: Unkown @-rule so either skip til ';' or next block. */
149 while (scanner_has_tokens(scanner
)) {
150 token
= get_next_scanner_token(scanner
);
154 if (token
->type
== ';') {
155 skip_scanner_token(scanner
);
158 } else if (token
->type
== '{') {
159 skip_css_block(scanner
);
165 INTERNAL("@-rule parser called without atrule.");
170 struct selector_pkg
{
171 LIST_HEAD(struct selector_pkg
);
172 struct css_selector
*selector
;
175 struct css_selector
*
176 reparent_selector(struct list_head
*sels
, struct css_selector
*selector
,
177 struct css_selector
**watch
)
179 struct css_selector
*twin
= find_css_selector(sels
, selector
->type
,
184 merge_css_selectors(twin
, selector
);
185 /* Reparent leaves. */
186 while (selector
->leaves
.next
!= &selector
->leaves
) {
187 struct css_selector
*leaf
= selector
->leaves
.next
;
189 reparent_selector(&twin
->leaves
, leaf
, watch
);
191 if (*watch
== selector
)
193 done_css_selector(selector
);
195 if (selector
->next
) del_from_list(selector
);
196 add_to_list(*sels
, selector
);
199 return twin
? twin
: selector
;
202 /* Our selector grammar:
205 * element_name? ('#' id)? ('.' class)? (':' pseudo_class)? \
206 * ((' ' | '>') selector)?
210 css_parse_selector(struct css_stylesheet
*css
, struct scanner
*scanner
,
211 struct list_head
*selectors
)
213 /* Shell for the last selector (the whole selector chain, that is). */
214 struct selector_pkg
*pkg
= NULL
;
215 /* In 'p#x.y i.z', it's NULL for 'p', 'p' for '#x', '.y' and 'i', and
217 struct css_selector
*prev_element_selector
= NULL
;
218 /* In 'p#x.y:q i', it's NULL for 'p' and '#x', '#x' for '.y', and '.y'
219 * for ':q', and again NULL for 'i'. */
220 struct css_selector
*prev_specific_selector
= NULL
;
221 /* In 'p#x.y div.z:a' it is NULL for 'p#x.y' and 'div', and 'p' for
222 * '.z' and ':a'. So the difference from @prev_element_selector is that
223 * it is changed after the current selector fragment is finished, not
224 * right after the base selector is loaded. So it is set differently
225 * for the '#x.y' and '.z:a' parts of selector. */
226 struct css_selector
*last_chained_selector
= NULL
;
227 /* In 'p#x.y div.z:a, i.b {}', it's set for ':a' and '.b'. */
228 int last_fragment
= 0;
229 /* In 'p#x .y', it's set for 'p' and '.y'. Note that it is always set in
230 * the previous iteration so it's valid for the current token only
231 * before "saving" the token. */
232 int selector_start
= 1;
234 /* FIXME: element can be even '*' --pasky */
236 while (scanner_has_tokens(scanner
)) {
237 struct scanner_token
*token
= get_scanner_token(scanner
);
238 struct scanner_token last_token
;
239 struct css_selector
*selector
;
240 enum css_selector_relation reltype
= CSR_ROOT
;
241 enum css_selector_type seltype
= CST_ELEMENT
;
244 assert(!last_fragment
);
247 if (token
->type
== '{'
248 || token
->type
== '}'
249 || token
->type
== ';')
253 /* Examine the selector fragment */
255 if (token
->type
!= CSS_TOKEN_IDENT
) {
256 switch (token
->type
) {
258 case CSS_TOKEN_HEX_COLOR
:
260 reltype
= selector_start
? CSR_ANCESTOR
: CSR_SPECIFITY
;
265 reltype
= selector_start
? CSR_ANCESTOR
: CSR_SPECIFITY
;
269 seltype
= CST_PSEUDO
;
270 reltype
= selector_start
? CSR_ANCESTOR
: CSR_SPECIFITY
;
274 seltype
= CST_ELEMENT
;
275 reltype
= CSR_PARENT
;
279 /* FIXME: Temporary fix for this weird CSS
280 * precedence thing. ')' has higher than ','
281 * and it can cause problems when skipping
282 * here. The reason is for the function()
283 * parsing. Hmm... --jonas */
284 if (!skip_css_tokens(scanner
, ','))
285 skip_scanner_token(scanner
);
286 seltype
= CST_INVALID
;
290 if (seltype
== CST_INVALID
)
293 /* Hexcolor and hash already contains the ident
295 if (token
->type
!= CSS_TOKEN_HEX_COLOR
296 && token
->type
!= CSS_TOKEN_HASH
) {
297 token
= get_next_scanner_token(scanner
);
299 if (token
->type
!= CSS_TOKEN_IDENT
) /* wtf */
302 /* Skip the leading '#'. */
303 token
->string
++, token
->length
--;
307 if (pkg
) reltype
= CSR_ANCESTOR
;
311 /* Look ahead at what's coming next */
313 copy_struct(&last_token
, token
);
314 /* Detect whether upcoming tokens are separated by
315 * whitespace or not (that's important for determining
316 * whether it's a combinator or specificitier). */
317 if (last_token
.string
+ last_token
.length
< scanner
->end
) {
318 selector_start
= last_token
.string
[last_token
.length
];
319 selector_start
= (selector_start
!= '#'
320 && selector_start
!= '.'
321 && selector_start
!= ':');
322 } /* else it doesn't matter as we are gonna bail out anyway. */
324 token
= get_next_scanner_token(scanner
);
326 last_fragment
= (token
->type
== ',' || token
->type
== '{');
329 /* Register the selector */
332 selector
= get_css_base_selector(
333 last_fragment
? css
: NULL
, seltype
,
335 last_token
.string
, last_token
.length
);
336 if (!selector
) continue;
338 pkg
= mem_calloc(1, sizeof(*pkg
));
340 add_to_list(*selectors
, pkg
);
341 pkg
->selector
= selector
;
343 } else if (reltype
== CSR_SPECIFITY
) {
344 /* We append under the last fragment. */
345 struct css_selector
*base_sel
= prev_specific_selector
;
347 if (!base_sel
) base_sel
= prev_element_selector
;
350 selector
= get_css_selector(&base_sel
->leaves
,
354 if (!selector
) continue;
356 if (last_chained_selector
) {
357 /* The situation is like: 'div p#x', now it was
358 * 'p -> div', but we need to redo that as
359 * '(p ->) #x -> div'. */
360 del_from_list(last_chained_selector
);
361 add_to_list(selector
->leaves
,
362 last_chained_selector
);
365 if (pkg
->selector
== base_sel
) {
366 /* This is still just specificitying offspring
367 * of the previous pkg->selector. */
368 pkg
->selector
= selector
;
372 /* This is the last fragment of the selector
373 * chain, that means the last base fragment
374 * wasn't marked so and thus wasn't bound to
375 * the stylesheet. Let's do that now. */
376 assert(prev_element_selector
);
377 prev_element_selector
->relation
= CSR_ROOT
;
378 prev_element_selector
=
379 reparent_selector(&css
->selectors
,
380 prev_element_selector
,
384 } else /* CSR_PARENT || CSR_ANCESTOR */ {
385 /* We - in the perlish speak - unshift in front
386 * of the previous selector fragment and reparent
387 * it to the upcoming one. */
388 selector
= get_css_base_selector(
389 last_fragment
? css
: NULL
, seltype
,
391 last_token
.string
, last_token
.length
);
392 if (!selector
) continue;
394 assert(prev_element_selector
);
395 add_to_list(selector
->leaves
, prev_element_selector
);
396 last_chained_selector
= prev_element_selector
;
398 prev_element_selector
->relation
= reltype
;
402 /* Record the selector fragment for future generations */
404 if (reltype
== CSR_SPECIFITY
) {
405 prev_specific_selector
= selector
;
407 prev_element_selector
= selector
;
408 prev_specific_selector
= NULL
;
412 /* What to do next */
415 /* Next selector coming, clean up. */
416 pkg
= NULL
; last_fragment
= 0; selector_start
= 1;
417 prev_element_selector
= NULL
;
418 prev_specific_selector
= NULL
;
419 last_chained_selector
= NULL
;
422 if (token
->type
== ',') {
423 /* Another selector hooked to these properties. */
424 skip_scanner_token(scanner
);
426 } else if (token
->type
== '{') {
427 /* End of selector list. */
430 } /* else Another selector fragment probably coming up. */
433 /* Wipe the selector we were currently composing, if any. */
435 if (prev_element_selector
)
436 done_css_selector(prev_element_selector
);
446 * selector [ ',' selector ]* '{' properties '}'
449 css_parse_ruleset(struct css_stylesheet
*css
, struct scanner
*scanner
)
451 INIT_LIST_HEAD(selectors
);
452 INIT_LIST_HEAD(properties
);
453 struct selector_pkg
*pkg
;
455 css_parse_selector(css
, scanner
, &selectors
);
456 if (list_empty(selectors
)
457 || !skip_css_tokens(scanner
, '{')) {
458 if (!list_empty(selectors
)) free_list(selectors
);
459 skip_css_tokens(scanner
, '}');
464 /* We don't handle the case where a property has already been added to
465 * a selector. That doesn't matter though, because the best one will be
466 * always the last one (FIXME: 'important!'), therefore the applier
467 * will take it last and it will have the "final" effect.
469 * So it's only a little waste and no real harm. The thing is, what do
470 * you do when you have 'background: #fff' and then 'background:
471 * x-repeat'? It would require yet another logic to handle merging of
472 * these etc and the induced overhead would in most cases mean more
473 * waste that having the property multiple times in a selector, I
474 * believe. --pasky */
476 pkg
= selectors
.next
;
477 css_parse_properties(&properties
, scanner
);
479 skip_css_tokens(scanner
, '}');
481 /* Mirror the properties to all the selectors. */
482 foreach (pkg
, selectors
) {
484 DBG("Binding properties (!!%d) to selector %s (type %d, relation %d, children %d)",
485 !list_empty(properties
),
486 pkg
->selector
->name
, pkg
->selector
->type
,
487 pkg
->selector
->relation
,
488 !list_empty(pkg
->selector
->leaves
));
490 add_selector_properties(pkg
->selector
, &properties
);
492 free_list(selectors
);
493 free_list(properties
);
498 css_parse_stylesheet(struct css_stylesheet
*css
, struct uri
*base_uri
,
499 unsigned char *string
, unsigned char *end
)
501 struct scanner scanner
;
503 init_scanner(&scanner
, &css_scanner_info
, string
, end
);
505 while (scanner_has_tokens(&scanner
)) {
506 struct scanner_token
*token
= get_scanner_token(&scanner
);
510 switch (token
->type
) {
511 case CSS_TOKEN_AT_KEYWORD
:
512 case CSS_TOKEN_AT_CHARSET
:
513 case CSS_TOKEN_AT_FONT_FACE
:
514 case CSS_TOKEN_AT_IMPORT
:
515 case CSS_TOKEN_AT_MEDIA
:
516 case CSS_TOKEN_AT_PAGE
:
517 css_parse_atrule(css
, &scanner
, base_uri
);
521 /* And WHAT ELSE could it be?! */
522 css_parse_ruleset(css
, &scanner
);
526 dump_css_selector_tree(&css
->selectors
);