khtml/html/htmlprospectivetokenizer.cpp

   1 /*
   2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
   3  *           (C) 2008 Germain Garand <germain@ebooksfrance.org>
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
  18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25  */
  26
  27 #include "config.h"
  28 #include "htmlprospectivetokenizer.h"
  29
  30 #include <QTime>
  31
  32 #include "html_headimpl.h"
  33 #include "html_documentimpl.h"
  34 #include "htmlparser.h"
  35 #include "dtd.h"
  36
  37 #include <misc/loader.h>
  38 #include <misc/htmlhashes.h>
  39 #include <khtmlview.h>
  40 #include <khtml_part.h>
  41 #include <xml/dom_docimpl.h>
  42 #include <css/csshelper.h>
  43 #include <ecma/kjs_proxy.h>
  44 #include <kglobal.h>
  45 #include <ctype.h>
  46 #include <assert.h>
  47 #include <QtCore/QVariant>
  48 #include <kdebug.h>
  49 #include <stdlib.h>
  50
  51 #ifdef __GNUC__
  52 // The main tokenizer includes this too so we are getting two copies of the data. However, this way the code gets inlined.
  53 #include "kentities.c"
  54 #else
  55 // Not inlined for non-GCC compilers
  56 struct entity {
  57     const char* name;
  58     int code;
  59 };
  60 const struct entity *kde_findEntity (register const char *str, register unsigned int len);
  61 #endif
  62
  63 #define PRELOAD_DEBUG 0
  64
  65 #define U16_TRAIL(sup) (ushort)(((sup)&0x3ff)|0xdc00)
  66 #define U16_LEAD(sup) (ushort)(((sup)>>10)+0xd7c0)
  67
  68 using namespace khtml;
  69
  70 ProspectiveTokenizer::ProspectiveTokenizer(DOM::DocumentImpl * doc)
  71     : m_inProgress(false)
  72     , m_tagName(32)
  73     , m_attributeName(32)
  74     , m_attributeValue(255)
  75     , m_cssRule(16)
  76     , m_cssRuleValue(255)
  77     , m_timeUsed(0)
  78     , m_document(doc)
  79 {
  80 #if PRELOAD_DEBUG
  81     kDebug() << "CREATING PRELOAD SCANNER FOR" << m_document << m_document->URL().prettyUrl();
  82 #endif
  83 }
  84
  85 ProspectiveTokenizer::~ProspectiveTokenizer()
  86 {
  87 #if PRELOAD_DEBUG
  88     fprintf(stderr, "DELETING PRELOAD SCANNER FOR %p\n", m_document);
  89     fprintf(stderr, "TOTAL TIME USED %dms\n", m_timeUsed);
  90 #endif
  91 }
  92
  93 void ProspectiveTokenizer::begin()
  94 {
  95     assert(!m_inProgress);
  96     reset();
  97     m_inProgress = true;
  98 }
  99
 100 void ProspectiveTokenizer::end()
 101 {
 102     assert(m_inProgress);
 103     m_inProgress = false;
 104 }
 105
 106 void ProspectiveTokenizer::reset()
 107 {
 108     m_source.clear();
 109
 110     m_state = Data;
 111     m_escape = false;
 112     m_contentModel = PCDATA;
 113     m_commentPos = 0;
 114
 115     m_closeTag = false;
 116     m_tagName.clear();
 117     m_attributeName.clear();
 118     m_attributeValue.clear();
 119     m_lastStartTag.clear();
 120     m_lastStartTagId = 0;
 121
 122     m_urlToLoad = "";
 123     m_linkIsStyleSheet = false;
 124     m_lastCharacterIndex = 0;
 125     clearLastCharacters();
 126
 127     m_cssState = CSSInitial;
 128     m_cssRule.clear();
 129     m_cssRuleValue.clear();
 130 }
 131
 132 void ProspectiveTokenizer::write(const TokenizerString& source)
 133 {
 134 #if PRELOAD_DEBUG
 135     QTime t;
 136     t.start();
 137 #endif
 138
 139     tokenize(source);
 140
 141 #if PRELOAD_DEBUG
 142     m_timeUsed += t.elapsed();
 143 #endif
 144 }
 145
 146 static inline bool isWhitespace(QChar c)
 147 {
 148     return c == ' ' || c == '\n' || c == '\r' || c == '\t';
 149 }
 150
 151 inline void ProspectiveTokenizer::clearLastCharacters()
 152 {
 153     memset(m_lastCharacters, 0, lastCharactersBufferSize * sizeof(QChar));
 154 }
 155
 156 inline void ProspectiveTokenizer::rememberCharacter(QChar c)
 157 {
 158     m_lastCharacterIndex = (m_lastCharacterIndex + 1) % lastCharactersBufferSize;
 159     m_lastCharacters[m_lastCharacterIndex] = c;
 160 }
 161
 162 inline bool ProspectiveTokenizer::lastCharactersMatch(const char* chars, unsigned count) const
 163 {
 164     unsigned pos = m_lastCharacterIndex;
 165     while (count) {
 166         if (chars[count - 1] != m_lastCharacters[pos])
 167             return false;
 168         --count;
 169         if (!pos)
 170             pos = lastCharactersBufferSize;
 171         --pos;
 172     }
 173     return true;
 174 }
 175
 176 static inline unsigned legalEntityFor(unsigned value)
 177 {
 178     // FIXME There is a table for more exceptions in the HTML5 specification.
 179     if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))
 180         return 0xFFFD;
 181     return value;
 182 }
 183
 184 unsigned ProspectiveTokenizer::consumeEntity(TokenizerString& source, bool& notEnoughCharacters)
 185 {
 186     enum EntityState {
 187         Initial,
 188         NumberType,
 189         MaybeHex,
 190         Hex,
 191         Decimal,
 192         Named
 193     };
 194     EntityState entityState = Initial;
 195     unsigned result = 0;
 196     QVector<QChar> seenChars(10);
 197     QVector<char> entityName(10);
 198
 199     while (!source.isEmpty()) {
 200         seenChars.append(*source);
 201         ushort cc = source->unicode();
 202         switch (entityState) {
 203         case Initial:
 204             if (isWhitespace(cc) || cc == '<' || cc == '&')
 205                 return 0;
 206             else if (cc == '#')
 207                 entityState = NumberType;
 208             else if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
 209                 entityName.append(cc);
 210                 entityState = Named;
 211             } else
 212                 return 0;
 213             break;
 214         case NumberType:
 215             if (cc == 'x' || cc == 'X')
 216                 entityState = MaybeHex;
 217             else if (cc >= '0' && cc <= '9') {
 218                 entityState = Decimal;
 219                 result = cc - '0';
 220             } else {
 221                 source.push('#');
 222                 return 0;
 223             }
 224             break;
 225         case MaybeHex:
 226             if (cc >= '0' && cc <= '9')
 227                 result = cc - '0';
 228             else if (cc >= 'a' && cc <= 'f')
 229                 result = 10 + cc - 'a';
 230             else if (cc >= 'A' && cc <= 'F')
 231                 result = 10 + cc - 'A';
 232             else {
 233                 source.push(seenChars[1]);
 234                 source.push('#');
 235                 return 0;
 236             }
 237             entityState = Hex;
 238             break;
 239         case Hex:
 240             if (cc >= '0' && cc <= '9')
 241                 result = result * 16 + cc - '0';
 242             else if (cc >= 'a' && cc <= 'f')
 243                 result = result * 16 + 10 + cc - 'a';
 244             else if (cc >= 'A' && cc <= 'F')
 245                 result = result * 16 + 10 + cc - 'A';
 246             else if (cc == ';') {
 247                 source.advance();
 248                 return legalEntityFor(result);
 249             } else
 250                 return legalEntityFor(result);
 251             break;
 252         case Decimal:
 253             if (cc >= '0' && cc <= '9')
 254                 result = result * 10 + cc - '0';
 255             else if (cc == ';') {
 256                 source.advance();
 257                 return legalEntityFor(result);
 258             } else
 259                 return legalEntityFor(result);
 260             break;
 261         case Named:
 262             // This is the attribute only version, generic version matches somewhat differently
 263             while (entityName.size() <= 8) {
 264                 if (cc == ';') {
 265                     const entity* e = kde_findEntity(entityName.data(), entityName.size());
 266                     if (e) {
 267                         source.advance();
 268                         return e->code;
 269                     }
 270                     break;
 271                 }
 272                 if (!(cc >= 'a' && cc <= 'z') && !(cc >= 'A' && cc <= 'Z') && !(cc >= '0' && cc <= '9')) {
 273                     const entity* e = kde_findEntity(entityName.data(), entityName.size());
 274                     if (e)
 275                         return e->code;
 276                     break;
 277                 }
 278                 entityName.append(cc);
 279                 source.advance();
 280                 if (source.isEmpty())
 281                     goto outOfCharacters;
 282                 cc = source->unicode();
 283                 seenChars.append(cc);
 284             }
 285             if (seenChars.size() == 2)
 286                 source.push(seenChars[0]);
 287             else if (seenChars.size() == 3) {
 288                 source.push(seenChars[1]);
 289                 source.push(seenChars[0]);
 290             } else
 291                 source.prepend(TokenizerString(QString(seenChars.data(), seenChars.size() - 1)));
 292             return 0;
 293         }
 294         source.advance();
 295     }
 296 outOfCharacters:
 297     notEnoughCharacters = true;
 298     source.prepend(TokenizerString(QString(seenChars.data(), seenChars.size())));
 299     return 0;
 300 }
 301
 302 void ProspectiveTokenizer::tokenize(const TokenizerString& source)
 303 {
 304     assert(m_inProgress);
 305
 306     m_source.append(source);
 307
 308     // This is a simplified HTML5 Tokenizer
 309     // http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
 310     while (!m_source.isEmpty()) {
 311         ushort cc = m_source->unicode();
 312         switch (m_state) {
 313         case Data:
 314             while (1) {
 315                 rememberCharacter(cc);
 316                 if (cc == '&') {
 317                     if (m_contentModel == PCDATA || m_contentModel == RCDATA) {
 318                         m_state = EntityData;
 319                         break;
 320                     }
 321                 } else if (cc == '-') {
 322                     if ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape) {
 323                         if (lastCharactersMatch("<!--", 4))
 324                             m_escape = true;
 325                     }
 326                 } else if (cc == '<') {
 327                     if (m_contentModel == PCDATA || ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape)) {
 328                         m_state = TagOpen;
 329                         break;
 330                     }
 331                 } else if (cc == '>') {
 332                      if ((m_contentModel == RCDATA || m_contentModel == CDATA) && m_escape) {
 333                          if (lastCharactersMatch("-->", 3))
 334                              m_escape = false;
 335                      }
 336                 }
 337                 emitCharacter(cc);
 338                 m_source.advance();
 339                 if (m_source.isEmpty())
 340                      return;
 341                 cc = m_source->unicode();
 342             }
 343             break;
 344         case EntityData:
 345             // should try to consume the entity but we only care about entities in attributes
 346             m_state = Data;
 347             break;
 348         case TagOpen:
 349             if (m_contentModel == RCDATA || m_contentModel == CDATA) {
 350                 if (cc == '/')
 351                     m_state = CloseTagOpen;
 352                 else {
 353                     m_state = Data;
 354                     continue;
 355                 }
 356             } else if (m_contentModel == PCDATA) {
 357                 if (cc == '!')
 358                     m_state = MarkupDeclarationOpen;
 359                 else if (cc == '/')
 360                     m_state = CloseTagOpen;
 361                 else if (cc >= 'A' && cc <= 'Z') {
 362                     m_tagName.clear();
 363                     m_tagName.append(cc + 0x20);
 364                     m_closeTag = false;
 365                     m_state = TagName;
 366                 } else if (cc >= 'a' && cc <= 'z') {
 367                     m_tagName.clear();
 368                     m_tagName.append(cc);
 369                     m_closeTag = false;
 370                     m_state = TagName;
 371                 } else if (cc == '>') {
 372                     m_state = Data;
 373                 } else if (cc == '?') {
 374                     m_state = BogusComment;
 375                 } else {
 376                     m_state = Data;
 377                     continue;
 378                 }
 379             }
 380             break;
 381         case CloseTagOpen:
 382             if (m_contentModel == RCDATA || m_contentModel == CDATA) {
 383                 if (!m_lastStartTag.size()) {
 384                     m_state = Data;
 385                     continue;
 386                 }
 387                 if ((unsigned)m_source.length() < m_lastStartTag.size() + 1)
 388                     return;
 389                 QVector<QChar> tmpString;
 390                 QChar tmpChar = 0;
 391                 bool match = true;
 392                 for (unsigned n = 0; n < m_lastStartTag.size() + 1; n++) {
 393                     tmpChar = m_source->toLower();
 394                     if (n < m_lastStartTag.size() && tmpChar != m_lastStartTag[n])
 395                         match = false;
 396                     tmpString.append(tmpChar);
 397                     m_source.advance();
 398                 }
 399                 m_source.prepend(TokenizerString(QString(tmpString.data(), tmpString.size())));
 400                 if (!match || (!isWhitespace(tmpChar) && tmpChar != '>' && tmpChar != '/')) {
 401                     m_state = Data;
 402                     continue;
 403                 }
 404             }
 405             if (cc >= 'A' && cc <= 'Z') {
 406                 m_tagName.clear();
 407                 m_tagName.append(cc + 0x20);
 408                 m_closeTag = true;
 409                 m_state = TagName;
 410             } else if (cc >= 'a' && cc <= 'z') {
 411                 m_tagName.clear();
 412                 m_tagName.append(cc);
 413                 m_closeTag = true;
 414                 m_state = TagName;
 415             } else if (cc == '>') {
 416                 m_state = Data;
 417             } else
 418                 m_state = BogusComment;
 419             break;
 420         case TagName:
 421             while (1) {
 422                 if (isWhitespace(cc)) {
 423                     m_state = BeforeAttributeName;
 424                     break;
 425                 }
 426                 if (cc == '>') {
 427                     emitTag();
 428                     m_state = Data;
 429                     break;
 430                 }
 431                 if (cc == '/') {
 432                     m_state = BeforeAttributeName;
 433                     break;
 434                 }
 435                 if (cc >= 'A' && cc <= 'Z')
 436                     m_tagName.append(cc + 0x20);
 437                 else
 438                     m_tagName.append(cc);
 439                 m_source.advance();
 440                 if (m_source.isEmpty())
 441                     return;
 442                 cc = m_source->unicode();
 443             }
 444             break;
 445         case BeforeAttributeName:
 446             if (isWhitespace(cc))
 447                 ;
 448             else if (cc == '>') {
 449                 emitTag();
 450                 m_state = Data;
 451             } else if (cc >= 'A' && cc <= 'Z') {
 452                 m_attributeName.clear();
 453                 m_attributeValue.clear();
 454                 m_attributeName.append(cc + 0x20);
 455                 m_state = AttributeName;
 456             } else if (cc == '/')
 457                 ;
 458             else {
 459                 m_attributeName.clear();
 460                 m_attributeValue.clear();
 461                 m_attributeName.append(cc);
 462                 m_state = AttributeName;
 463             }
 464             break;
 465         case AttributeName:
 466             while (1) {
 467                 if (isWhitespace(cc)) {
 468                     m_state = AfterAttributeName;
 469                     break;
 470                 }
 471                 if (cc == '=') {
 472                     m_state = BeforeAttributeValue;
 473                     break;
 474                 }
 475                 if (cc == '>') {
 476                     emitTag();
 477                     m_state = Data;
 478                     break;
 479                 }
 480                 if (cc == '/') {
 481                     m_state = BeforeAttributeName;
 482                     break;
 483                 }
 484                 if (cc >= 'A' && cc <= 'Z')
 485                     m_attributeName.append(cc + 0x20);
 486                 else
 487                     m_attributeName.append(cc);
 488                 m_source.advance();
 489                 if (m_source.isEmpty())
 490                     return;
 491                 cc = m_source->unicode();
 492             }
 493             break;
 494         case AfterAttributeName:
 495             if (isWhitespace(cc))
 496                 ;
 497             else if (cc == '=')
 498                 m_state = BeforeAttributeValue;
 499             else if (cc == '>') {
 500                 emitTag();
 501                 m_state = Data;
 502             } else if (cc >= 'A' && cc <= 'Z') {
 503                 m_attributeName.clear();
 504                 m_attributeValue.clear();
 505                 m_attributeName.append(cc + 0x20);
 506                 m_state = AttributeName;
 507             } else if (cc == '/')
 508                 m_state = BeforeAttributeName;
 509             else {
 510                 m_attributeName.clear();
 511                 m_attributeValue.clear();
 512                 m_attributeName.append(cc);
 513                 m_state = AttributeName;
 514             }
 515             break;
 516         case BeforeAttributeValue:
 517             if (isWhitespace(cc))
 518                 ;
 519             else if (cc == '"')
 520                 m_state = AttributeValueDoubleQuoted;
 521             else if (cc == '&') {
 522                 m_state = AttributeValueUnquoted;
 523                 continue;
 524             } else if (cc == '\'')
 525                 m_state = AttributeValueSingleQuoted;
 526             else if (cc == '>') {
 527                 emitTag();
 528                 m_state = Data;
 529             } else {
 530                 m_attributeValue.append(cc);
 531                 m_state = AttributeValueUnquoted;
 532             }
 533             break;
 534         case AttributeValueDoubleQuoted:
 535             while (1) {
 536                 if (cc == '"') {
 537                     processAttribute();
 538                     m_state = BeforeAttributeName;
 539                     break;
 540                 }
 541                 if (cc == '&') {
 542                     m_stateBeforeEntityInAttributeValue = m_state;
 543                     m_state = EntityInAttributeValue;
 544                     break;
 545                 }
 546                 m_attributeValue.append(cc);
 547                 m_source.advance();
 548                 if (m_source.isEmpty())
 549                     return;
 550                 cc = m_source->unicode();
 551             }
 552             break;
 553         case AttributeValueSingleQuoted:
 554             while (1) {
 555                 if (cc == '\'') {
 556                     processAttribute();
 557                     m_state = BeforeAttributeName;
 558                     break;
 559                 }
 560                 if (cc == '&') {
 561                     m_stateBeforeEntityInAttributeValue = m_state;
 562                     m_state = EntityInAttributeValue;
 563                     break;
 564                 }
 565                 m_attributeValue.append(cc);
 566                 m_source.advance();
 567                 if (m_source.isEmpty())
 568                     return;
 569                 cc = m_source->unicode();
 570             }
 571             break;
 572         case AttributeValueUnquoted:
 573             while (1) {
 574                 if (isWhitespace(cc)) {
 575                     processAttribute();
 576                     m_state = BeforeAttributeName;
 577                     break;
 578                 }
 579                 if (cc == '&') {
 580                     m_stateBeforeEntityInAttributeValue = m_state;
 581                     m_state = EntityInAttributeValue;
 582                     break;
 583                 }
 584                 if (cc == '>') {
 585                     processAttribute();
 586                     emitTag();
 587                     m_state = Data;
 588                     break;
 589                 }
 590                 m_attributeValue.append(cc);
 591                 m_source.advance();
 592                 if (m_source.isEmpty())
 593                     return;
 594                 cc = m_source->unicode();
 595             }
 596             break;
 597         case EntityInAttributeValue:
 598             {
 599                 bool notEnoughCharacters = false;
 600                 unsigned entity = consumeEntity(m_source, notEnoughCharacters);
 601                 if (notEnoughCharacters)
 602                     return;
 603                 if (entity > 0xFFFF) {
 604                     m_attributeValue.append(U16_LEAD(entity));
 605                     m_attributeValue.append(U16_TRAIL(entity));
 606                 } else if (entity)
 607                     m_attributeValue.append(entity);
 608                 else
 609                     m_attributeValue.append('&');
 610             }
 611             m_state = m_stateBeforeEntityInAttributeValue;
 612             continue;
 613         case BogusComment:
 614             while (1) {
 615                 if (cc == '>') {
 616                     m_state = Data;
 617                     break;
 618                 }
 619                 m_source.advance();
 620                 if (m_source.isEmpty())
 621                     return;
 622                 cc = m_source->unicode();
 623             }
 624             break;
 625         case MarkupDeclarationOpen: {
 626             if (cc == '-') {
 627                 if (m_source.length() < 2)
 628                     return;
 629                 m_source.advance();
 630                 cc = m_source->unicode();
 631                 if (cc == '-')
 632                     m_state = CommentStart;
 633                 else {
 634                     m_state = BogusComment;
 635                     continue;
 636                 }
 637             // If we cared about the DOCTYPE we would test to enter those states here
 638             } else {
 639                 m_state = BogusComment;
 640                 continue;
 641             }
 642             break;
 643         }
 644         case CommentStart:
 645             if (cc == '-')
 646                 m_state = CommentStartDash;
 647             else if (cc == '>')
 648                 m_state = Data;
 649             else
 650                 m_state = Comment;
 651             break;
 652         case CommentStartDash:
 653             if (cc == '-')
 654                 m_state = CommentEnd;
 655             else if (cc == '>')
 656                 m_state = Data;
 657             else
 658                 m_state = Comment;
 659             break;
 660         case Comment:
 661             while (1) {
 662                 if (cc == '-') {
 663                     m_state = CommentEndDash;
 664                     break;
 665                 }
 666                 m_source.advance();
 667                 if (m_source.isEmpty())
 668                     return;
 669                 cc = m_source->unicode();
 670             }
 671             break;
 672         case CommentEndDash:
 673             if (cc == '-')
 674                 m_state = CommentEnd;
 675             else
 676                 m_state = Comment;
 677             break;
 678         case CommentEnd:
 679             if (cc == '>')
 680                 m_state = Data;
 681             else if (cc == '-')
 682                 ;
 683             else
 684                 m_state = Comment;
 685             break;
 686         }
 687         m_source.advance();
 688     }
 689 }
 690
 691 void ProspectiveTokenizer::processAttribute()
 692 {
 693     LocalName tagLocal = LocalName::fromString(DOMString(m_tagName.data(), m_tagName.size()).lower());
 694     uint tag = tagLocal.id();
 695     LocalName attrLocal = LocalName::fromString(DOMString(m_tagName.data(), m_tagName.size()).lower());
 696     uint attribute = attrLocal.id();
 697
 698     const DOMString value = DOMString(m_attributeValue.data(), m_attributeValue.size()); // ####
 699     switch (tag) {
 700       case ID_SCRIPT:
 701       case ID_IMAGE:
 702       case ID_IMG:
 703           if (attribute == ATTR_SRC && m_urlToLoad.isEmpty())
 704               m_urlToLoad = parseURL(value);
 705           break;
 706       case ID_LINK:
 707           if (attribute == ATTR_HREF && m_urlToLoad.isEmpty())
 708               m_urlToLoad = parseURL(value);
 709           else if (attribute == ATTR_REL) {
 710               QString val = value.string();
 711               m_linkIsStyleSheet = val.contains("styleSheet") && !val.contains("alternate") && !val.contains("icon");
 712           }
 713       default:
 714           break;
 715     }
 716 }
 717
 718 inline void ProspectiveTokenizer::emitCharacter(QChar c)
 719 {
 720     if (m_contentModel == CDATA && m_lastStartTagId == ID_STYLE)
 721         tokenizeCSS(c);
 722 }
 723
 724 inline void ProspectiveTokenizer::tokenizeCSS(QChar c)
 725 {
 726     // We are just interested in @import rules, no need for real tokenization here
 727     // Searching for other types of resources is probably low payoff
 728     switch (m_cssState) {
 729     case CSSInitial:
 730         if (c == '@')
 731             m_cssState = CSSRuleStart;
 732         else if (c == '/')
 733             m_cssState = CSSMaybeComment;
 734         break;
 735     case CSSMaybeComment:
 736         if (c == '*')
 737             m_cssState = CSSComment;
 738         else
 739             m_cssState = CSSInitial;
 740         break;
 741     case CSSComment:
 742         if (c == '*')
 743             m_cssState = CSSMaybeCommentEnd;
 744         break;
 745     case CSSMaybeCommentEnd:
 746         if (c == '/')
 747             m_cssState = CSSInitial;
 748         else if (c == '*')
 749             ;
 750         else
 751             m_cssState = CSSComment;
 752         break;
 753     case CSSRuleStart:
 754         if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 755             m_cssRule.clear();
 756             m_cssRuleValue.clear();
 757             m_cssRule.append(c);
 758             m_cssState = CSSRule;
 759         } else
 760             m_cssState = CSSInitial;
 761         break;
 762     case CSSRule:
 763         if (isWhitespace(c))
 764             m_cssState = CSSAfterRule;
 765         else if (c == ';')
 766             m_cssState = CSSInitial;
 767         else
 768             m_cssRule.append(c);
 769         break;
 770     case CSSAfterRule:
 771         if (isWhitespace(c))
 772             ;
 773         else if (c == ';')
 774             m_cssState = CSSInitial;
 775         else {
 776             m_cssState = CSSRuleValue;
 777             m_cssRuleValue.append(c);
 778         }
 779         break;
 780     case CSSRuleValue:
 781         if (isWhitespace(c))
 782             m_cssState = CSSAferRuleValue;
 783         else if (c == ';') {
 784             emitCSSRule();
 785             m_cssState = CSSInitial;
 786         } else
 787             m_cssRuleValue.append(c);
 788         break;
 789     case CSSAferRuleValue:
 790         if (isWhitespace(c))
 791             ;
 792         else if (c == ';') {
 793             emitCSSRule();
 794             m_cssState = CSSInitial;
 795         } else {
 796             // FIXME media rules
 797              m_cssState = CSSInitial;
 798         }
 799         break;
 800     }
 801 }
 802
 803 void ProspectiveTokenizer::emitTag()
 804 {
 805     if (m_closeTag) {
 806         m_contentModel = PCDATA;
 807         m_cssState = CSSInitial;
 808         clearLastCharacters();
 809         return;
 810     }
 811
 812     LocalName tagLocal = LocalName::fromString(DOMString(m_tagName.data(), m_tagName.size()));
 813     uint tag = tagLocal.id();
 814     m_lastStartTagId = tag;
 815     m_lastStartTag = m_tagName;
 816
 817     switch (tag) {
 818       case ID_TEXTAREA:
 819       case ID_TITLE:
 820         m_contentModel = RCDATA;
 821         break;
 822       case ID_STYLE:
 823       case ID_XMP:
 824       case ID_SCRIPT:
 825       case ID_IFRAME:
 826       case ID_NOEMBED:
 827       case ID_NOFRAMES:
 828         m_contentModel = CDATA;
 829         break;
 830       case ID_NOSCRIPT:
 831         // we wouldn't be here if scripts were disabled
 832         m_contentModel = CDATA;
 833         break;
 834       case ID_PLAINTEXT:
 835         m_contentModel = PLAINTEXT;
 836         break;
 837       default:
 838         m_contentModel = PCDATA;
 839     }
 840
 841     if (m_urlToLoad.isEmpty()) {
 842         m_linkIsStyleSheet = false;
 843         return;
 844     }
 845
 846     CachedObject* o = 0;
 847     if (tag == ID_SCRIPT)
 848          o = m_document->docLoader()->requestScript( m_urlToLoad, m_document->part()->encoding() );
 849     else if (tag == ID_IMAGE || tag == ID_IMG)
 850          o = m_document->docLoader()->requestImage( m_urlToLoad );
 851     else if (tag == ID_LINK && m_linkIsStyleSheet)
 852         o = m_document->docLoader()->requestStyleSheet( m_urlToLoad, m_document->part()->encoding() );
 853
 854     if (o)
 855         m_document->docLoader()->registerPreload( o );
 856
 857     m_urlToLoad = QString();
 858     m_linkIsStyleSheet = false;
 859 }
 860
 861 void ProspectiveTokenizer::emitCSSRule()
 862 {
 863     QString rule(m_cssRule.data(), m_cssRule.size());
 864     if (rule.toLower() == "import" && !m_cssRuleValue.isEmpty()) {
 865         DOMString value = DOMString(m_cssRuleValue.data(), m_cssRuleValue.size());
 866         DOMString url = parseURL(value);
 867         if (!url.isEmpty())
 868             m_document->docLoader()->registerPreload( m_document->docLoader()->requestStyleSheet( m_urlToLoad, m_document->part()->encoding() ) ); // #### charset
 869     }
 870     m_cssRule.clear();
 871     m_cssRuleValue.clear();
 872 }
 873