src/document/css/parser.c

   1 /* CSS main parser */
   2
   3 #ifdef HAVE_CONFIG_H
   4 #include "config.h"
   5 #endif
   6
   7 #include <stdlib.h>
   8 #include <string.h>
   9
  10 #include "elinks.h"
  11
  12 #include "document/css/parser.h"
  13 #include "document/css/property.h"
  14 #include "document/css/scanner.h"
  15 #include "document/css/stylesheet.h"
  16 #include "document/css/value.h"
  17 #include "document/html/parser.h"
  18 #include "util/color.h"
  19 #include "util/lists.h"
  20 #include "util/error.h"
  21 #include "util/memory.h"
  22 #include "util/string.h"
  23
  24 /* #define DEBUG_CSS */
  25
  26
  27 void
  28 css_parse_properties(struct list_head *props, struct scanner *scanner)
  29 {
  30         assert(props && scanner);
  31
  32         while (scanner_has_tokens(scanner)) {
  33                 struct css_property_info *property_info = NULL;
  34                 struct css_property *prop;
  35                 struct scanner_token *token = get_scanner_token(scanner);
  36                 int i;
  37
  38                 if (!token || token->type == '}') break;
  39
  40                 /* Extract property name. */
  41
  42                 if (token->type != CSS_TOKEN_IDENT
  43                     || !check_next_scanner_token(scanner, ':')) {
  44                         /* Some use style="{ properties }" so we have to be
  45                          * check what to skip to. */
  46                         if (token->type == '{') {
  47                                 skip_scanner_token(scanner);
  48                         } else {
  49                                 skip_css_tokens(scanner, ';');
  50                         }
  51                         continue;
  52                 }
  53
  54                 for (i = 0; css_property_info[i].name; i++) {
  55                         struct css_property_info *info = &css_property_info[i];
  56
  57                         if (scanner_token_strlcasecmp(token, info->name, -1)) {
  58                                 property_info = info;
  59                                 break;
  60                         }
  61                 }
  62
  63                 /* Skip property name and separator and check for expression */
  64                 if (!skip_css_tokens(scanner, ':')) {
  65                         assert(!scanner_has_tokens(scanner));
  66                         break;
  67                 }
  68
  69                 if (!property_info) {
  70                         /* Unknown property, check the next one. */
  71                         goto ride_on;
  72                 }
  73
  74                 /* We might be on track of something, cook up the struct. */
  75
  76                 prop = mem_calloc(1, sizeof(*prop));
  77                 if (!prop) {
  78                         goto ride_on;
  79                 }
  80                 prop->type = property_info->type;
  81                 prop->value_type = property_info->value_type;
  82                 if (!css_parse_value(property_info, &prop->value, scanner)) {
  83                         mem_free(prop);
  84                         goto ride_on;
  85                 }
  86                 add_to_list(*props, prop);
  87
  88                 /* Maybe we have something else to go yet? */
  89
  90 ride_on:
  91                 skip_css_tokens(scanner, ';');
  92         }
  93 }
  94
  95
  96 /* TODO: We should handle support for skipping blocks better like "{ { } }"
  97  * will be handled correctly. --jonas */
  98 #define skip_css_block(scanner) \
  99         if (skip_css_tokens(scanner, '{')) skip_css_tokens(scanner, '}');
 100
 101
 102 /* Atrules grammer:
 103  *
 104  * media_types:
 105  *        <empty>
 106  *      | <ident>
 107  *      | media_types ',' <ident>
 108  *
 109  * atrule:
 110  *        '@charset' <string> ';'
 111  *      | '@import' <string> media_types ';'
 112  *      | '@import' <uri> media_types ';'
 113  *      | '@media' media_types '{' ruleset* '}'
 114  *      | '@page' <ident>? [':' <ident>]? '{' properties '}'
 115  *      | '@font-face' '{' properties '}'
 116  */
 117 static void
 118 css_parse_atrule(struct css_stylesheet *css, struct scanner *scanner,
 119                  struct uri *base_uri)
 120 {
 121         struct scanner_token *token = get_scanner_token(scanner);
 122
 123         /* Skip skip skip that code */
 124         switch (token->type) {
 125                 case CSS_TOKEN_AT_IMPORT:
 126                         token = get_next_scanner_token(scanner);
 127                         if (!token) break;
 128
 129                         if (token->type == CSS_TOKEN_STRING
 130                             || token->type == CSS_TOKEN_URL) {
 131                                 assert(css->import);
 132                                 css->import(css, base_uri, token->string, token->length);
 133                         }
 134                         skip_css_tokens(scanner, ';');
 135                         break;
 136
 137                 case CSS_TOKEN_AT_CHARSET:
 138                         skip_css_tokens(scanner, ';');
 139                         break;
 140
 141                 case CSS_TOKEN_AT_FONT_FACE:
 142                 case CSS_TOKEN_AT_MEDIA:
 143                 case CSS_TOKEN_AT_PAGE:
 144                         skip_css_block(scanner);
 145                         break;
 146
 147                 case CSS_TOKEN_AT_KEYWORD:
 148                         /* TODO: Unkown @-rule so either skip til ';' or next block. */
 149                         while (scanner_has_tokens(scanner)) {
 150                                 token = get_next_scanner_token(scanner);
 151
 152                                 if (!token) break;
 153
 154                                 if (token->type == ';') {
 155                                         skip_scanner_token(scanner);
 156                                         break;
 157
 158                                 } else if (token->type == '{') {
 159                                         skip_css_block(scanner);
 160                                         break;
 161                                 }
 162                         }
 163                         break;
 164                 default:
 165                         INTERNAL("@-rule parser called without atrule.");
 166         }
 167 }
 168
 169
 170 struct selector_pkg {
 171         LIST_HEAD(struct selector_pkg);
 172         struct css_selector *selector;
 173 };
 174
 175 struct css_selector *
 176 reparent_selector(struct list_head *sels, struct css_selector *selector,
 177                   struct css_selector **watch)
 178 {
 179         struct css_selector *twin = find_css_selector(sels, selector->type,
 180                                                       selector->relation,
 181                                                       selector->name, -1);
 182
 183         if (twin) {
 184                 merge_css_selectors(twin, selector);
 185                 /* Reparent leaves. */
 186                 while (selector->leaves.next != &selector->leaves) {
 187                         struct css_selector *leaf = selector->leaves.next;
 188
 189                         reparent_selector(&twin->leaves, leaf, watch);
 190                 }
 191                 if (*watch == selector)
 192                         *watch = twin;
 193                 done_css_selector(selector);
 194         } else {
 195                 if (selector->next) del_from_list(selector);
 196                 add_to_list(*sels, selector);
 197         }
 198
 199         return twin ? twin : selector;
 200 }
 201
 202 /* Our selector grammar:
 203  *
 204  * selector:
 205  *        element_name? ('#' id)? ('.' class)? (':' pseudo_class)? \
 206  *                ((' ' | '>') selector)?
 207  *
 208  */
 209 static void
 210 css_parse_selector(struct css_stylesheet *css, struct scanner *scanner,
 211                    struct list_head *selectors)
 212 {
 213         /* Shell for the last selector (the whole selector chain, that is). */
 214         struct selector_pkg *pkg = NULL;
 215         /* In 'p#x.y i.z', it's NULL for 'p', 'p' for '#x', '.y' and 'i', and
 216          * 'i' for '.z'. */
 217         struct css_selector *prev_element_selector = NULL;
 218         /* In 'p#x.y:q i', it's NULL for 'p' and '#x', '#x' for '.y', and '.y'
 219          * for ':q', and again NULL for 'i'. */
 220         struct css_selector *prev_specific_selector = NULL;
 221         /* In 'p#x.y div.z:a' it is NULL for 'p#x.y' and 'div', and 'p' for
 222          * '.z' and ':a'. So the difference from @prev_element_selector is that
 223          * it is changed after the current selector fragment is finished, not
 224          * right after the base selector is loaded. So it is set differently
 225          * for the '#x.y' and '.z:a' parts of selector. */
 226         struct css_selector *last_chained_selector = NULL;
 227         /* In 'p#x.y div.z:a, i.b {}', it's set for ':a' and '.b'. */
 228         int last_fragment = 0;
 229         /* In 'p#x .y', it's set for 'p' and '.y'. Note that it is always set in
 230          * the previous iteration so it's valid for the current token only
 231          * before "saving" the token. */
 232         int selector_start = 1;
 233
 234         /* FIXME: element can be even '*' --pasky */
 235
 236         while (scanner_has_tokens(scanner)) {
 237                 struct scanner_token *token = get_scanner_token(scanner);
 238                 struct scanner_token last_token;
 239                 struct css_selector *selector;
 240                 enum css_selector_relation reltype = CSR_ROOT;
 241                 enum css_selector_type seltype = CST_ELEMENT;
 242
 243                 assert(token);
 244                 assert(!last_fragment);
 245
 246
 247                 if (token->type == '{'
 248                     || token->type == '}'
 249                     || token->type == ';')
 250                         break;
 251
 252
 253                 /* Examine the selector fragment */
 254
 255                 if (token->type != CSS_TOKEN_IDENT) {
 256                         switch (token->type) {
 257                         case CSS_TOKEN_HASH:
 258                         case CSS_TOKEN_HEX_COLOR:
 259                                 seltype = CST_ID;
 260                                 reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY;
 261                                 break;
 262
 263                         case '.':
 264                                 seltype = CST_CLASS;
 265                                 reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY;
 266                                 break;
 267
 268                         case ':':
 269                                 seltype = CST_PSEUDO;
 270                                 reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY;
 271                                 break;
 272
 273                         case '>':
 274                                 seltype = CST_ELEMENT;
 275                                 reltype = CSR_PARENT;
 276                                 break;
 277
 278                         default:
 279                                 /* FIXME: Temporary fix for this weird CSS
 280                                  * precedence thing. ')' has higher than ','
 281                                  * and it can cause problems when skipping
 282                                  * here. The reason is for the function()
 283                                  * parsing. Hmm... --jonas */
 284                                 if (!skip_css_tokens(scanner, ','))
 285                                         skip_scanner_token(scanner);
 286                                 seltype = CST_INVALID;
 287                                 break;
 288                         }
 289
 290                         if (seltype == CST_INVALID)
 291                                 continue;
 292
 293                         /* Hexcolor and hash already contains the ident
 294                          * inside. */
 295                         if (token->type != CSS_TOKEN_HEX_COLOR
 296                             && token->type != CSS_TOKEN_HASH) {
 297                                 token = get_next_scanner_token(scanner);
 298                                 if (!token) break;
 299                                 if (token->type != CSS_TOKEN_IDENT) /* wtf */
 300                                         continue;
 301                         } else {
 302                                 /* Skip the leading '#'. */
 303                                 token->string++, token->length--;
 304                         }
 305
 306                 } else {
 307                         if (pkg) reltype = CSR_ANCESTOR;
 308                 }
 309
 310
 311                 /* Look ahead at what's coming next */
 312
 313                 copy_struct(&last_token, token);
 314                 /* Detect whether upcoming tokens are separated by
 315                  * whitespace or not (that's important for determining
 316                  * whether it's a combinator or specificitier). */
 317                 if (last_token.string + last_token.length < scanner->end) {
 318                         selector_start = last_token.string[last_token.length];
 319                         selector_start = (selector_start != '#'
 320                                           && selector_start != '.'
 321                                           && selector_start != ':');
 322                 } /* else it doesn't matter as we are gonna bail out anyway. */
 323
 324                 token = get_next_scanner_token(scanner);
 325                 if (!token) break;
 326                 last_fragment = (token->type == ',' || token->type == '{');
 327
 328
 329                 /* Register the selector */
 330
 331                 if (!pkg) {
 332                         selector = get_css_base_selector(
 333                                         last_fragment ? css : NULL, seltype,
 334                                         CSR_ROOT,
 335                                         last_token.string, last_token.length);
 336                         if (!selector) continue;
 337
 338                         pkg = mem_calloc(1, sizeof(*pkg));
 339                         if (!pkg) continue;
 340                         add_to_list(*selectors, pkg);
 341                         pkg->selector = selector;
 342
 343                 } else if (reltype == CSR_SPECIFITY) {
 344                         /* We append under the last fragment. */
 345                         struct css_selector *base_sel = prev_specific_selector;
 346
 347                         if (!base_sel) base_sel = prev_element_selector;
 348                         assert(base_sel);
 349
 350                         selector = get_css_selector(&base_sel->leaves,
 351                                                     seltype, reltype,
 352                                                     last_token.string,
 353                                                     last_token.length);
 354                         if (!selector) continue;
 355
 356                         if (last_chained_selector) {
 357                                 /* The situation is like: 'div p#x', now it was
 358                                  * 'p -> div', but we need to redo that as
 359                                  * '(p ->) #x -> div'. */
 360                                 del_from_list(last_chained_selector);
 361                                 add_to_list(selector->leaves,
 362                                             last_chained_selector);
 363                         }
 364
 365                         if (pkg->selector == base_sel) {
 366                                 /* This is still just specificitying offspring
 367                                  * of the previous pkg->selector. */
 368                                 pkg->selector = selector;
 369                         }
 370
 371                         if (last_fragment) {
 372                                 /* This is the last fragment of the selector
 373                                  * chain, that means the last base fragment
 374                                  * wasn't marked so and thus wasn't bound to
 375                                  * the stylesheet. Let's do that now. */
 376                                 assert(prev_element_selector);
 377                                 prev_element_selector->relation = CSR_ROOT;
 378                                 prev_element_selector =
 379                                         reparent_selector(&css->selectors,
 380                                                          prev_element_selector,
 381                                                          &pkg->selector);
 382                         }
 383
 384                 } else /* CSR_PARENT || CSR_ANCESTOR */ {
 385                         /* We - in the perlish speak - unshift in front
 386                          * of the previous selector fragment and reparent
 387                          * it to the upcoming one. */
 388                         selector = get_css_base_selector(
 389                                         last_fragment ? css : NULL, seltype,
 390                                         CSR_ROOT,
 391                                         last_token.string, last_token.length);
 392                         if (!selector) continue;
 393
 394                         assert(prev_element_selector);
 395                         add_to_list(selector->leaves, prev_element_selector);
 396                         last_chained_selector = prev_element_selector;
 397
 398                         prev_element_selector->relation = reltype;
 399                 }
 400
 401
 402                 /* Record the selector fragment for future generations */
 403
 404                 if (reltype == CSR_SPECIFITY) {
 405                         prev_specific_selector = selector;
 406                 } else {
 407                         prev_element_selector = selector;
 408                         prev_specific_selector = NULL;
 409                 }
 410
 411
 412                 /* What to do next */
 413
 414                 if (last_fragment) {
 415                         /* Next selector coming, clean up. */
 416                         pkg = NULL; last_fragment = 0; selector_start = 1;
 417                         prev_element_selector = NULL;
 418                         prev_specific_selector = NULL;
 419                         last_chained_selector = NULL;
 420                 }
 421
 422                 if (token->type == ',') {
 423                         /* Another selector hooked to these properties. */
 424                         skip_scanner_token(scanner);
 425
 426                 } else if (token->type == '{') {
 427                         /* End of selector list. */
 428                         break;
 429
 430                 } /* else Another selector fragment probably coming up. */
 431         }
 432
 433         /* Wipe the selector we were currently composing, if any. */
 434         if (pkg) {
 435                 if (prev_element_selector)
 436                         done_css_selector(prev_element_selector);
 437                 del_from_list(pkg);
 438                 mem_free(pkg);
 439         }
 440 }
 441
 442
 443 /* Ruleset grammar:
 444  *
 445  * ruleset:
 446  *        selector [ ',' selector ]* '{' properties '}'
 447  */
 448 static void
 449 css_parse_ruleset(struct css_stylesheet *css, struct scanner *scanner)
 450 {
 451         INIT_LIST_HEAD(selectors);
 452         INIT_LIST_HEAD(properties);
 453         struct selector_pkg *pkg;
 454
 455         css_parse_selector(css, scanner, &selectors);
 456         if (list_empty(selectors)
 457             || !skip_css_tokens(scanner, '{')) {
 458                 if (!list_empty(selectors)) free_list(selectors);
 459                 skip_css_tokens(scanner, '}');
 460                 return;
 461         }
 462
 463
 464         /* We don't handle the case where a property has already been added to
 465          * a selector. That doesn't matter though, because the best one will be
 466          * always the last one (FIXME: 'important!'), therefore the applier
 467          * will take it last and it will have the "final" effect.
 468          *
 469          * So it's only a little waste and no real harm. The thing is, what do
 470          * you do when you have 'background: #fff' and then 'background:
 471          * x-repeat'? It would require yet another logic to handle merging of
 472          * these etc and the induced overhead would in most cases mean more
 473          * waste that having the property multiple times in a selector, I
 474          * believe. --pasky */
 475
 476         pkg = selectors.next;
 477         css_parse_properties(&properties, scanner);
 478
 479         skip_css_tokens(scanner, '}');
 480
 481         /* Mirror the properties to all the selectors. */
 482         foreach (pkg, selectors) {
 483 #ifdef DEBUG_CSS
 484                 DBG("Binding properties (!!%d) to selector %s (type %d, relation %d, children %d)",
 485                         !list_empty(properties),
 486                         pkg->selector->name, pkg->selector->type,
 487                         pkg->selector->relation,
 488                         !list_empty(pkg->selector->leaves));
 489 #endif
 490                 add_selector_properties(pkg->selector, &properties);
 491         }
 492         free_list(selectors);
 493         free_list(properties);
 494 }
 495
 496
 497 void
 498 css_parse_stylesheet(struct css_stylesheet *css, struct uri *base_uri,
 499                      unsigned char *string, unsigned char *end)
 500 {
 501         struct scanner scanner;
 502
 503         init_scanner(&scanner, &css_scanner_info, string, end);
 504
 505         while (scanner_has_tokens(&scanner)) {
 506                 struct scanner_token *token = get_scanner_token(&scanner);
 507
 508                 assert(token);
 509
 510                 switch (token->type) {
 511                 case CSS_TOKEN_AT_KEYWORD:
 512                 case CSS_TOKEN_AT_CHARSET:
 513                 case CSS_TOKEN_AT_FONT_FACE:
 514                 case CSS_TOKEN_AT_IMPORT:
 515                 case CSS_TOKEN_AT_MEDIA:
 516                 case CSS_TOKEN_AT_PAGE:
 517                         css_parse_atrule(css, &scanner, base_uri);
 518                         break;
 519
 520                 default:
 521                         /* And WHAT ELSE could it be?! */
 522                         css_parse_ruleset(css, &scanner);
 523                 }
 524         }
 525 #ifdef DEBUG_CSS
 526         dump_css_selector_tree(&css->selectors);
 527         WDBG("That's it.");
 528 #endif
 529 }