Rubber-stamped by Brady Eidson.
[webbrowser.git] / WebCore / html / HTMLParser.cpp
blob88c6eb148d01872d1b959dc8f5dede16bd3af50c
1 /*
2 Copyright (C) 1997 Martin Jones (mjones@kde.org)
3 (C) 1997 Torben Weis (weis@kde.org)
4 (C) 1999,2001 Lars Knoll (knoll@kde.org)
5 (C) 2000,2001 Dirk Mueller (mueller@kde.org)
6 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
7 Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Library General Public
11 License as published by the Free Software Foundation; either
12 version 2 of the License, or (at your option) any later version.
14 This library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Library General Public License for more details.
19 You should have received a copy of the GNU Library General Public License
20 along with this library; see the file COPYING.LIB. If not, write to
21 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA.
25 #include "config.h"
26 #include "HTMLParser.h"
28 #include "CharacterNames.h"
29 #include "CSSPropertyNames.h"
30 #include "CSSValueKeywords.h"
31 #include "ChromeClient.h"
32 #include "Comment.h"
33 #include "Console.h"
34 #include "DOMWindow.h"
35 #include "DocumentFragment.h"
36 #include "DocumentType.h"
37 #include "Frame.h"
38 #include "HTMLBodyElement.h"
39 #include "HTMLDocument.h"
40 #include "HTMLDivElement.h"
41 #include "HTMLDListElement.h"
42 #include "HTMLElementFactory.h"
43 #include "HTMLFormElement.h"
44 #include "HTMLHeadElement.h"
45 #include "HTMLHRElement.h"
46 #include "HTMLHtmlElement.h"
47 #include "HTMLIsIndexElement.h"
48 #include "HTMLMapElement.h"
49 #include "HTMLNames.h"
50 #include "HTMLParserQuirks.h"
51 #include "HTMLTableCellElement.h"
52 #include "HTMLTableRowElement.h"
53 #include "HTMLTableSectionElement.h"
54 #include "HTMLTokenizer.h"
55 #include "LocalizedStrings.h"
56 #include "Page.h"
57 #include "Settings.h"
58 #include "Text.h"
59 #include <wtf/StdLibExtras.h>
61 namespace WebCore {
63 using namespace HTMLNames;
65 static const unsigned cMaxRedundantTagDepth = 20;
66 static const unsigned cResidualStyleMaxDepth = 200;
68 static const int minBlockLevelTagPriority = 3;
70 // A cap on the number of tags with priority minBlockLevelTagPriority or higher
71 // allowed in m_blockStack. The cap is enforced by adding such new elements as
72 // siblings instead of children once it is reached.
73 static const size_t cMaxBlockDepth = 4096;
75 struct HTMLStackElem : Noncopyable {
76 HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx)
77 : tagName(t)
78 , level(lvl)
79 , strayTableContent(false)
80 , node(n)
81 , didRefNode(r)
82 , next(nx)
86 void derefNode()
88 if (didRefNode)
89 node->deref();
92 AtomicString tagName;
93 int level;
94 bool strayTableContent;
95 Node* node;
96 bool didRefNode;
97 HTMLStackElem* next;
101 * The parser parses tokenized input into the document, building up the
102 * document tree. If the document is well-formed, parsing it is straightforward.
104 * Unfortunately, we have to handle many HTML documents that are not well-formed,
105 * so the parser has to be tolerant about errors.
107 * We have to take care of at least the following error conditions:
109 * 1. The element being added is explicitly forbidden inside some outer tag.
110 * In this case we should close all tags up to the one, which forbids
111 * the element, and add it afterwards.
113 * 2. We are not allowed to add the element directly. It could be that
114 * the person writing the document forgot some tag in between (or that the
115 * tag in between is optional). This could be the case with the following
116 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?).
118 * 3. We want to add a block element inside to an inline element. Close all
119 * inline elements up to the next higher block element.
121 * 4. If this doesn't help, close elements until we are allowed to add the
122 * element or ignore the tag.
126 HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors)
127 : m_document(doc)
128 , m_current(doc)
129 , m_didRefCurrent(false)
130 , m_blockStack(0)
131 , m_blocksInStack(0)
132 , m_hasPElementInScope(NotInScope)
133 , m_inBody(false)
134 , m_haveContent(false)
135 , m_haveFrameSet(false)
136 , m_isParsingFragment(false)
137 , m_reportErrors(reportErrors)
138 , m_handlingResidualStyleAcrossBlocks(false)
139 , m_inStrayTableContent(0)
140 , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0)
144 HTMLParser::HTMLParser(DocumentFragment* frag)
145 : m_document(frag->document())
146 , m_current(frag)
147 , m_didRefCurrent(true)
148 , m_blockStack(0)
149 , m_blocksInStack(0)
150 , m_hasPElementInScope(NotInScope)
151 , m_inBody(true)
152 , m_haveContent(false)
153 , m_haveFrameSet(false)
154 , m_isParsingFragment(true)
155 , m_reportErrors(false)
156 , m_handlingResidualStyleAcrossBlocks(false)
157 , m_inStrayTableContent(0)
158 , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0)
160 if (frag)
161 frag->ref();
164 HTMLParser::~HTMLParser()
166 freeBlock();
167 if (m_didRefCurrent)
168 m_current->deref();
171 void HTMLParser::reset()
173 ASSERT(!m_isParsingFragment);
175 setCurrent(m_document);
177 freeBlock();
179 m_inBody = false;
180 m_haveFrameSet = false;
181 m_haveContent = false;
182 m_inStrayTableContent = 0;
184 m_currentFormElement = 0;
185 m_currentMapElement = 0;
186 m_head = 0;
187 m_isindexElement = 0;
189 m_skipModeTag = nullAtom;
191 if (m_parserQuirks)
192 m_parserQuirks->reset();
195 void HTMLParser::setCurrent(Node* newCurrent)
197 bool didRefNewCurrent = newCurrent && newCurrent != m_document;
198 if (didRefNewCurrent)
199 newCurrent->ref();
200 if (m_didRefCurrent)
201 m_current->deref();
202 m_current = newCurrent;
203 m_didRefCurrent = didRefNewCurrent;
206 inline static int tagPriorityOfNode(Node* n)
208 return n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0;
211 inline void HTMLParser::limitBlockDepth(int tagPriority)
213 if (tagPriority >= minBlockLevelTagPriority) {
214 while (m_blocksInStack >= cMaxBlockDepth)
215 popBlock(m_blockStack->tagName);
219 inline bool HTMLParser::insertNodeAfterLimitBlockDepth(Node* n, bool flat)
221 limitBlockDepth(tagPriorityOfNode(n));
222 return insertNode(n, flat);
225 PassRefPtr<Node> HTMLParser::parseToken(Token* t)
227 if (!m_skipModeTag.isNull()) {
228 if (!t->beginTag && t->tagName == m_skipModeTag)
229 // Found the end tag for the current skip mode, so we're done skipping.
230 m_skipModeTag = nullAtom;
231 else if (m_current->localName() == t->tagName)
232 // Do not skip </iframe>.
233 // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag?
235 else
236 return 0;
239 // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>.
240 if (t->isCloseTag(brTag) && m_document->inCompatMode()) {
241 reportError(MalformedBRError);
242 t->beginTag = true;
245 if (!t->beginTag) {
246 processCloseTag(t);
247 return 0;
250 // Ignore spaces, if we're not inside a paragraph or other inline code.
251 // Do not alter the text if it is part of a scriptTag.
252 if (t->tagName == textAtom && t->text && m_current->localName() != scriptTag) {
253 if (m_inBody && !skipMode() && m_current->localName() != styleTag &&
254 m_current->localName() != titleTag && !t->text->containsOnlyWhitespace())
255 m_haveContent = true;
257 RefPtr<Node> n;
258 String text = t->text.get();
259 unsigned charsLeft = text.length();
260 while (charsLeft) {
261 // split large blocks of text to nodes of manageable size
262 n = Text::createWithLengthLimit(m_document, text, charsLeft);
263 if (!insertNodeAfterLimitBlockDepth(n.get(), t->selfClosingTag))
264 return 0;
266 return n;
269 RefPtr<Node> n = getNode(t);
270 // just to be sure, and to catch currently unimplemented stuff
271 if (!n)
272 return 0;
274 // set attributes
275 if (n->isHTMLElement()) {
276 HTMLElement* e = static_cast<HTMLElement*>(n.get());
277 e->setAttributeMap(t->attrs.get());
279 // take care of optional close tags
280 if (e->endTagRequirement() == TagStatusOptional)
281 popBlock(t->tagName);
283 // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing
284 // syntax was used, report an error.
285 if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) {
286 if (t->tagName == scriptTag)
287 reportError(IncorrectXMLCloseScriptWarning);
288 else
289 reportError(IncorrectXMLSelfCloseError, &t->tagName);
293 if (!insertNodeAfterLimitBlockDepth(n.get(), t->selfClosingTag)) {
294 // we couldn't insert the node
296 if (n->isElementNode()) {
297 Element* e = static_cast<Element*>(n.get());
298 e->setAttributeMap(0);
301 if (m_currentMapElement == n)
302 m_currentMapElement = 0;
304 if (m_currentFormElement == n)
305 m_currentFormElement = 0;
307 if (m_head == n)
308 m_head = 0;
310 return 0;
312 return n;
315 void HTMLParser::parseDoctypeToken(DoctypeToken* t)
317 // Ignore any doctype after the first. Ignore doctypes in fragments.
318 if (m_document->doctype() || m_isParsingFragment || m_current != m_document)
319 return;
321 // Make a new doctype node and set it as our doctype.
322 m_document->addChild(DocumentType::create(m_document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID)));
325 static bool isTableSection(const Node* n)
327 return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag);
330 static bool isTablePart(const Node* n)
332 return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) ||
333 isTableSection(n);
336 static bool isTableRelated(const Node* n)
338 return n->hasTagName(tableTag) || isTablePart(n);
341 static bool isScopingTag(const AtomicString& tagName)
343 return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag;
346 bool HTMLParser::insertNode(Node* n, bool flat)
348 RefPtr<Node> protectNode(n);
350 const AtomicString& localName = n->localName();
352 // <table> is never allowed inside stray table content. Always pop out of the stray table content
353 // and close up the first table, and then start the second table as a sibling.
354 if (m_inStrayTableContent && localName == tableTag)
355 popBlock(tableTag);
357 if (m_parserQuirks && !m_parserQuirks->shouldInsertNode(m_current, n))
358 return false;
360 int tagPriority = tagPriorityOfNode(n);
362 // let's be stupid and just try to insert it.
363 // this should work if the document is well-formed
364 Node* newNode = m_current->addChild(n);
365 if (!newNode)
366 return handleError(n, flat, localName, tagPriority); // Try to handle the error.
368 // don't push elements without end tags (e.g., <img>) on the stack
369 bool parentAttached = m_current->attached();
370 if (tagPriority > 0 && !flat) {
371 if (newNode == m_current) {
372 // This case should only be hit when a demoted <form> is placed inside a table.
373 ASSERT(localName == formTag);
374 reportError(FormInsideTablePartError, &m_current->localName());
375 HTMLFormElement* form = static_cast<HTMLFormElement*>(n);
376 form->setDemoted(true);
377 } else {
378 // The pushBlock function transfers ownership of current to the block stack
379 // so we're guaranteed that m_didRefCurrent is false. The code below is an
380 // optimized version of setCurrent that takes advantage of that fact and also
381 // assumes that newNode is neither 0 nor a pointer to the document.
382 pushBlock(localName, tagPriority);
383 newNode->beginParsingChildren();
384 ASSERT(!m_didRefCurrent);
385 newNode->ref();
386 m_current = newNode;
387 m_didRefCurrent = true;
389 if (parentAttached && !n->attached() && !m_isParsingFragment)
390 n->attach();
391 } else {
392 if (parentAttached && !n->attached() && !m_isParsingFragment)
393 n->attach();
394 n->finishParsingChildren();
397 if (localName == htmlTag && m_document->frame())
398 m_document->frame()->loader()->dispatchDocumentElementAvailable();
400 return true;
403 bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority)
405 // Error handling code. This is just ad hoc handling of specific parent/child combinations.
406 HTMLElement* e;
407 bool handled = false;
409 // 1. Check out the element's tag name to decide how to deal with errors.
410 if (n->isHTMLElement()) {
411 HTMLElement* h = static_cast<HTMLElement*>(n);
412 if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) {
413 if (m_inStrayTableContent && !isTableRelated(m_current)) {
414 reportError(MisplacedTablePartError, &localName, &m_current->localName());
415 // pop out to the nearest enclosing table-related tag.
416 while (m_blockStack && !isTableRelated(m_current))
417 popOneBlock();
418 return insertNode(n);
420 } else if (h->hasLocalName(headTag)) {
421 if (!m_current->isDocumentNode() && !m_current->hasTagName(htmlTag)) {
422 reportError(MisplacedHeadError);
423 return false;
425 } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) {
426 bool createdHead = false;
427 if (!m_head) {
428 createHead();
429 createdHead = true;
431 if (m_head) {
432 if (!createdHead)
433 reportError(MisplacedHeadContentError, &localName, &m_current->localName());
434 if (m_head->addChild(n)) {
435 if (!n->attached() && !m_isParsingFragment)
436 n->attach();
437 return true;
438 } else
439 return false;
441 } else if (h->hasLocalName(htmlTag)) {
442 if (!m_current->isDocumentNode() ) {
443 if (m_document->documentElement() && m_document->documentElement()->hasTagName(htmlTag)) {
444 reportError(RedundantHTMLBodyError, &localName);
445 // we have another <HTML> element.... apply attributes to existing one
446 // make sure we don't overwrite already existing attributes
447 NamedNodeMap* map = static_cast<Element*>(n)->attributes(true);
448 Element* existingHTML = static_cast<Element*>(m_document->documentElement());
449 NamedNodeMap* bmap = existingHTML->attributes(false);
450 for (unsigned l = 0; map && l < map->length(); ++l) {
451 Attribute* it = map->attributeItem(l);
452 if (!bmap->getAttributeItem(it->name()))
453 existingHTML->setAttribute(it->name(), it->value());
456 return false;
458 } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag) || h->hasLocalName(scriptTag)) {
459 bool createdHead = false;
460 if (!m_head) {
461 createHead();
462 createdHead = true;
464 if (m_head) {
465 Node* newNode = m_head->addChild(n);
466 if (!newNode) {
467 setSkipMode(h->tagQName());
468 return false;
471 if (!createdHead)
472 reportError(MisplacedHeadContentError, &localName, &m_current->localName());
474 pushBlock(localName, tagPriority);
475 newNode->beginParsingChildren();
476 setCurrent(newNode);
477 if (!n->attached() && !m_isParsingFragment)
478 n->attach();
479 return true;
481 if (m_inBody) {
482 setSkipMode(h->tagQName());
483 return false;
485 } else if (h->hasLocalName(bodyTag)) {
486 if (m_inBody && m_document->body()) {
487 // we have another <BODY> element.... apply attributes to existing one
488 // make sure we don't overwrite already existing attributes
489 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
490 reportError(RedundantHTMLBodyError, &localName);
491 NamedNodeMap* map = static_cast<Element*>(n)->attributes(true);
492 Element* existingBody = m_document->body();
493 NamedNodeMap* bmap = existingBody->attributes(false);
494 for (unsigned l = 0; map && l < map->length(); ++l) {
495 Attribute* it = map->attributeItem(l);
496 if (!bmap->getAttributeItem(it->name()))
497 existingBody->setAttribute(it->name(), it->value());
499 return false;
501 else if (!m_current->isDocumentNode())
502 return false;
503 } else if (h->hasLocalName(areaTag)) {
504 if (m_currentMapElement) {
505 reportError(MisplacedAreaError, &m_current->localName());
506 m_currentMapElement->addChild(n);
507 if (!n->attached() && !m_isParsingFragment)
508 n->attach();
509 handled = true;
510 return true;
512 return false;
513 } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) {
514 if (isTableRelated(m_current)) {
515 while (m_blockStack && isTablePart(m_current))
516 popOneBlock();
517 return insertNode(n);
520 } else if (n->isCommentNode() && !m_head)
521 return false;
523 // 2. Next we examine our currently active element to do some further error handling.
524 if (m_current->isHTMLElement()) {
525 HTMLElement* h = static_cast<HTMLElement*>(m_current);
526 const AtomicString& currentTagName = h->localName();
527 if (h->hasLocalName(htmlTag)) {
528 HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0;
529 if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) ||
530 elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) ||
531 elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) ||
532 elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) ||
533 elt->hasLocalName(baseTag))) {
534 if (!m_head) {
535 m_head = new HTMLHeadElement(headTag, m_document);
536 insertNode(m_head.get());
537 handled = true;
539 } else {
540 if (n->isTextNode()) {
541 Text* t = static_cast<Text*>(n);
542 if (t->containsOnlyWhitespace())
543 return false;
545 if (!m_haveFrameSet) {
546 // Ensure that head exists.
547 // But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795>
548 if (shouldCreateImplicitHead(m_document))
549 createHead();
551 popBlock(headTag);
552 e = new HTMLBodyElement(bodyTag, m_document);
553 startBody();
554 insertNode(e);
555 handled = true;
556 } else
557 reportError(MisplacedFramesetContentError, &localName);
559 } else if (h->hasLocalName(headTag)) {
560 if (n->hasTagName(htmlTag))
561 return false;
562 else {
563 // This means the body starts here...
564 if (!m_haveFrameSet) {
565 ASSERT(currentTagName == headTag);
566 popBlock(currentTagName);
567 e = new HTMLBodyElement(bodyTag, m_document);
568 startBody();
569 insertNode(e);
570 handled = true;
571 } else
572 reportError(MisplacedFramesetContentError, &localName);
574 } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag)
575 || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) {
576 reportError(MisplacedContentRetryError, &localName, &currentTagName);
577 popBlock(currentTagName);
578 handled = true;
579 } else if (h->hasLocalName(captionTag)) {
580 // Illegal content in a caption. Close the caption and try again.
581 reportError(MisplacedCaptionContentError, &localName);
582 popBlock(currentTagName);
583 if (isTablePart(n))
584 return insertNode(n, flat);
585 } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) {
586 if (n->hasTagName(tableTag)) {
587 reportError(MisplacedTableError, &currentTagName);
588 if (m_isParsingFragment && !h->hasLocalName(tableTag))
589 // fragment may contain table parts without <table> ancestor, pop them one by one
590 popBlock(h->localName());
591 popBlock(localName); // end the table
592 handled = true; // ...and start a new one
593 } else {
594 ExceptionCode ec = 0;
595 Node* node = m_current;
596 Node* parent = node->parentNode();
597 // A script may have removed the current node's parent from the DOM
598 // http://bugs.webkit.org/show_bug.cgi?id=7137
599 // FIXME: we should do real recovery here and re-parent with the correct node.
600 if (!parent)
601 return false;
602 Node* grandparent = parent->parentNode();
604 if (n->isTextNode() ||
605 (h->hasLocalName(trTag) &&
606 isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) ||
607 ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) &&
608 !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) &&
609 parent->hasTagName(tableTag))) {
610 node = (node->hasTagName(tableTag)) ? node :
611 ((node->hasTagName(trTag)) ? grandparent : parent);
612 // This can happen with fragments
613 if (!node)
614 return false;
615 Node* parent = node->parentNode();
616 if (!parent)
617 return false;
618 parent->insertBefore(n, node, ec);
619 if (!ec) {
620 reportError(StrayTableContentError, &localName, &currentTagName);
621 if (n->isHTMLElement() && tagPriority > 0 &&
622 !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden)
624 pushBlock(localName, tagPriority);
625 n->beginParsingChildren();
626 setCurrent(n);
627 m_inStrayTableContent++;
628 m_blockStack->strayTableContent = true;
630 return true;
634 if (!ec) {
635 if (m_current->hasTagName(trTag)) {
636 reportError(TablePartRequiredError, &localName, &tdTag.localName());
637 e = new HTMLTableCellElement(tdTag, m_document);
638 } else if (m_current->hasTagName(tableTag)) {
639 // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>,
640 // and it isn't really a parse error per se.
641 e = new HTMLTableSectionElement(tbodyTag, m_document);
642 } else {
643 reportError(TablePartRequiredError, &localName, &trTag.localName());
644 e = new HTMLTableRowElement(trTag, m_document);
647 insertNode(e);
648 handled = true;
651 } else if (h->hasLocalName(objectTag)) {
652 reportError(MisplacedContentRetryError, &localName, &currentTagName);
653 popBlock(objectTag);
654 handled = true;
655 } else if (h->hasLocalName(pTag) || isHeaderTag(currentTagName)) {
656 if (!isInline(n)) {
657 popBlock(currentTagName);
658 handled = true;
660 } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) {
661 if (localName == optgroupTag) {
662 popBlock(currentTagName);
663 handled = true;
664 } else if (localName == selectTag) {
665 // IE treats a nested select as </select>. Let's do the same
666 popBlock(localName);
668 } else if (h->hasLocalName(selectTag)) {
669 if (localName == inputTag || localName == textareaTag) {
670 reportError(MisplacedContentRetryError, &localName, &currentTagName);
671 popBlock(currentTagName);
672 handled = true;
674 } else if (h->hasLocalName(colgroupTag)) {
675 popBlock(currentTagName);
676 handled = true;
677 } else if (!h->hasLocalName(bodyTag)) {
678 if (isInline(m_current)) {
679 popInlineBlocks();
680 handled = true;
683 } else if (m_current->isDocumentNode()) {
684 if (n->isTextNode()) {
685 Text* t = static_cast<Text*>(n);
686 if (t->containsOnlyWhitespace())
687 return false;
690 if (!m_document->documentElement()) {
691 e = new HTMLHtmlElement(htmlTag, m_document);
692 insertNode(e);
693 handled = true;
697 // 3. If we couldn't handle the error, just return false and attempt to error-correct again.
698 if (!handled) {
699 reportError(IgnoredContentError, &localName, &m_current->localName());
700 return false;
702 return insertNode(n);
705 typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&);
706 typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap;
708 bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result)
710 result = Text::create(m_document, t->text.get());
711 return false;
714 bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result)
716 result = Comment::create(m_document, t->text.get());
717 return false;
720 bool HTMLParser::headCreateErrorCheck(Token*, RefPtr<Node>& result)
722 if (!m_head || m_current->localName() == htmlTag) {
723 m_head = new HTMLHeadElement(headTag, m_document);
724 result = m_head;
725 } else
726 reportError(MisplacedHeadError);
727 return false;
730 bool HTMLParser::bodyCreateErrorCheck(Token*, RefPtr<Node>&)
732 // body no longer allowed if we have a frameset
733 if (m_haveFrameSet)
734 return false;
736 // Ensure that head exists (unless parsing a fragment).
737 // But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795>
738 if (!m_isParsingFragment && shouldCreateImplicitHead(m_document))
739 createHead();
741 popBlock(headTag);
742 startBody();
743 return true;
746 bool HTMLParser::framesetCreateErrorCheck(Token*, RefPtr<Node>&)
748 popBlock(headTag);
749 if (m_inBody && !m_haveFrameSet && !m_haveContent) {
750 popBlock(bodyTag);
751 // ### actually for IE document.body returns the now hidden "body" element
752 // we can't implement that behaviour now because it could cause too many
753 // regressions and the headaches are not worth the work as long as there is
754 // no site actually relying on that detail (Dirk)
755 if (m_document->body())
756 m_document->body()->setAttribute(styleAttr, "display:none");
757 m_inBody = false;
759 if ((m_haveContent || m_haveFrameSet) && m_current->localName() == htmlTag)
760 return false;
761 m_haveFrameSet = true;
762 startBody();
763 return true;
766 bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr<Node>& result)
768 // Only create a new form if we're not already inside one.
769 // This is consistent with other browsers' behavior.
770 if (!m_currentFormElement) {
771 m_currentFormElement = new HTMLFormElement(formTag, m_document);
772 result = m_currentFormElement;
773 pCloserCreateErrorCheck(t, result);
775 return false;
778 bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result)
780 RefPtr<Node> n = handleIsindex(t);
781 if (!m_inBody)
782 m_isindexElement = n.release();
783 else {
784 t->selfClosingTag = true;
785 result = n.release();
787 return false;
790 bool HTMLParser::selectCreateErrorCheck(Token*, RefPtr<Node>&)
792 return true;
795 bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr<Node>& result)
797 pCloserCreateErrorCheck(t, result);
798 popBlock(dtTag);
799 popBlock(ddTag);
800 return true;
803 bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr<Node>& result)
805 pCloserCreateErrorCheck(t, result);
806 popBlock(ddTag);
807 popBlock(dtTag);
808 return true;
811 bool HTMLParser::rpCreateErrorCheck(Token*, RefPtr<Node>&)
813 popBlock(rpTag);
814 popBlock(rtTag);
815 return true;
818 bool HTMLParser::rtCreateErrorCheck(Token*, RefPtr<Node>&)
820 popBlock(rpTag);
821 popBlock(rtTag);
822 return true;
825 bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>&)
827 popBlock(t->tagName);
828 return true;
831 bool HTMLParser::nestedPCloserCreateErrorCheck(Token* t, RefPtr<Node>& result)
833 pCloserCreateErrorCheck(t, result);
834 popBlock(t->tagName);
835 return true;
838 bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>&)
840 return allowNestedRedundantTag(t->tagName);
843 bool HTMLParser::tableCellCreateErrorCheck(Token*, RefPtr<Node>&)
845 popBlock(tdTag);
846 popBlock(thTag);
847 return true;
850 bool HTMLParser::tableSectionCreateErrorCheck(Token*, RefPtr<Node>&)
852 popBlock(theadTag);
853 popBlock(tbodyTag);
854 popBlock(tfootTag);
855 return true;
858 bool HTMLParser::noembedCreateErrorCheck(Token*, RefPtr<Node>&)
860 setSkipMode(noembedTag);
861 return true;
864 bool HTMLParser::noframesCreateErrorCheck(Token*, RefPtr<Node>&)
866 setSkipMode(noframesTag);
867 return true;
870 bool HTMLParser::noscriptCreateErrorCheck(Token*, RefPtr<Node>&)
872 if (!m_isParsingFragment) {
873 Settings* settings = m_document->settings();
874 if (settings && settings->isJavaScriptEnabled())
875 setSkipMode(noscriptTag);
877 return true;
880 bool HTMLParser::pCloserCreateErrorCheck(Token*, RefPtr<Node>&)
882 if (hasPElementInScope())
883 popBlock(pTag);
884 return true;
887 bool HTMLParser::pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&)
889 if (m_document->inCompatMode())
890 return true;
891 if (hasPElementInScope())
892 popBlock(pTag);
893 return true;
896 bool HTMLParser::mapCreateErrorCheck(Token*, RefPtr<Node>& result)
898 m_currentMapElement = new HTMLMapElement(mapTag, m_document);
899 result = m_currentMapElement;
900 return false;
903 PassRefPtr<Node> HTMLParser::getNode(Token* t)
905 // Init our error handling table.
906 DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ());
907 if (gFunctionMap.isEmpty()) {
908 gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
909 gFunctionMap.set(addressTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
910 gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
911 gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
912 gFunctionMap.set(blockquoteTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
913 gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck);
914 gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
915 gFunctionMap.set(centerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
916 gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck);
917 gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck);
918 gFunctionMap.set(dirTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
919 gFunctionMap.set(divTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
920 gFunctionMap.set(dlTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
921 gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck);
922 gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck);
923 gFunctionMap.set(fieldsetTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
924 gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck);
925 gFunctionMap.set(h1Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
926 gFunctionMap.set(h2Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
927 gFunctionMap.set(h3Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
928 gFunctionMap.set(h4Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
929 gFunctionMap.set(h5Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
930 gFunctionMap.set(h6Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
931 gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck);
932 gFunctionMap.set(hrTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
933 gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
934 gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck);
935 gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedPCloserCreateErrorCheck);
936 gFunctionMap.set(listingTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
937 gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck);
938 gFunctionMap.set(menuTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
939 gFunctionMap.set(navTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
940 gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
941 gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck);
942 gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck);
943 #if !ENABLE(XHTMLMP)
944 gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck);
945 #endif
946 gFunctionMap.set(olTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
947 gFunctionMap.set(pTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
948 gFunctionMap.set(plaintextTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
949 gFunctionMap.set(preTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
950 gFunctionMap.set(rpTag.localName().impl(), &HTMLParser::rpCreateErrorCheck);
951 gFunctionMap.set(rtTag.localName().impl(), &HTMLParser::rtCreateErrorCheck);
952 gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
953 gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck);
954 gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
955 gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
956 gFunctionMap.set(tableTag.localName().impl(), &HTMLParser::pCloserStrictCreateErrorCheck);
957 gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
958 gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
959 gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck);
960 gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
961 gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
962 gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
963 gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
964 gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
965 gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
966 gFunctionMap.set(ulTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
969 bool proceed = true;
970 RefPtr<Node> result;
971 if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl()))
972 proceed = (this->*errorCheckFunc)(t, result);
973 if (proceed)
974 result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), m_document, m_currentFormElement.get());
975 return result.release();
978 bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName)
980 // www.liceo.edu.mx is an example of a site that achieves a level of nesting of
981 // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20
982 // nested tags of the same type before just ignoring them all together.
983 unsigned i = 0;
984 for (HTMLStackElem* curr = m_blockStack;
985 i < cMaxRedundantTagDepth && curr && curr->tagName == tagName;
986 curr = curr->next, i++) { }
987 return i != cMaxRedundantTagDepth;
990 void HTMLParser::processCloseTag(Token* t)
992 // Support for really broken html.
993 // we never close the body tag, since some stupid web pages close it before the actual end of the doc.
994 // let's rely on the end() call to close things.
995 if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom)
996 return;
998 bool checkForCloseTagErrors = true;
999 if (t->tagName == formTag && m_currentFormElement) {
1000 m_currentFormElement = 0;
1001 checkForCloseTagErrors = false;
1002 } else if (t->tagName == mapTag)
1003 m_currentMapElement = 0;
1004 else if (t->tagName == pTag)
1005 checkForCloseTagErrors = false;
1007 HTMLStackElem* oldElem = m_blockStack;
1008 popBlock(t->tagName, checkForCloseTagErrors);
1009 if (oldElem == m_blockStack && t->tagName == pTag) {
1010 // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat
1011 // this as a valid break, i.e., <p></p>. So go ahead and make the empty
1012 // paragraph.
1013 t->beginTag = true;
1014 parseToken(t);
1015 popBlock(t->tagName);
1016 reportError(StrayParagraphCloseError);
1020 bool HTMLParser::isHeaderTag(const AtomicString& tagName)
1022 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, headerTags, ());
1023 if (headerTags.isEmpty()) {
1024 headerTags.add(h1Tag.localName().impl());
1025 headerTags.add(h2Tag.localName().impl());
1026 headerTags.add(h3Tag.localName().impl());
1027 headerTags.add(h4Tag.localName().impl());
1028 headerTags.add(h5Tag.localName().impl());
1029 headerTags.add(h6Tag.localName().impl());
1032 return headerTags.contains(tagName.impl());
1035 bool HTMLParser::isInline(Node* node) const
1037 if (node->isTextNode())
1038 return true;
1040 if (node->isHTMLElement()) {
1041 HTMLElement* e = static_cast<HTMLElement*>(node);
1042 if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) ||
1043 e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) ||
1044 e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) ||
1045 e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) ||
1046 e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) ||
1047 e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) ||
1048 e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) ||
1049 e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) ||
1050 e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) ||
1051 e->hasLocalName(noembedTag))
1052 return true;
1053 #if !ENABLE(XHTMLMP)
1054 if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) {
1055 Settings* settings = m_document->settings();
1056 if (settings && settings->isJavaScriptEnabled())
1057 return true;
1059 #endif
1062 return false;
1065 bool HTMLParser::isResidualStyleTag(const AtomicString& tagName)
1067 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, residualStyleTags, ());
1068 if (residualStyleTags.isEmpty()) {
1069 residualStyleTags.add(aTag.localName().impl());
1070 residualStyleTags.add(fontTag.localName().impl());
1071 residualStyleTags.add(ttTag.localName().impl());
1072 residualStyleTags.add(uTag.localName().impl());
1073 residualStyleTags.add(bTag.localName().impl());
1074 residualStyleTags.add(iTag.localName().impl());
1075 residualStyleTags.add(sTag.localName().impl());
1076 residualStyleTags.add(strikeTag.localName().impl());
1077 residualStyleTags.add(bigTag.localName().impl());
1078 residualStyleTags.add(smallTag.localName().impl());
1079 residualStyleTags.add(emTag.localName().impl());
1080 residualStyleTags.add(strongTag.localName().impl());
1081 residualStyleTags.add(dfnTag.localName().impl());
1082 residualStyleTags.add(codeTag.localName().impl());
1083 residualStyleTags.add(sampTag.localName().impl());
1084 residualStyleTags.add(kbdTag.localName().impl());
1085 residualStyleTags.add(varTag.localName().impl());
1086 residualStyleTags.add(nobrTag.localName().impl());
1089 return residualStyleTags.contains(tagName.impl());
1092 bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName)
1094 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, unaffectedTags, ());
1095 if (unaffectedTags.isEmpty()) {
1096 unaffectedTags.add(bodyTag.localName().impl());
1097 unaffectedTags.add(tableTag.localName().impl());
1098 unaffectedTags.add(theadTag.localName().impl());
1099 unaffectedTags.add(tbodyTag.localName().impl());
1100 unaffectedTags.add(tfootTag.localName().impl());
1101 unaffectedTags.add(trTag.localName().impl());
1102 unaffectedTags.add(thTag.localName().impl());
1103 unaffectedTags.add(tdTag.localName().impl());
1104 unaffectedTags.add(captionTag.localName().impl());
1105 unaffectedTags.add(colgroupTag.localName().impl());
1106 unaffectedTags.add(colTag.localName().impl());
1107 unaffectedTags.add(optionTag.localName().impl());
1108 unaffectedTags.add(optgroupTag.localName().impl());
1109 unaffectedTags.add(selectTag.localName().impl());
1110 unaffectedTags.add(objectTag.localName().impl());
1111 unaffectedTags.add(datagridTag.localName().impl());
1112 unaffectedTags.add(datalistTag.localName().impl());
1115 return !unaffectedTags.contains(tagName.impl());
1118 void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
1120 HTMLStackElem* maxElem = 0;
1121 bool finished = false;
1122 bool strayTableContent = elem->strayTableContent;
1124 m_handlingResidualStyleAcrossBlocks = true;
1125 while (!finished) {
1126 // Find the outermost element that crosses over to a higher level. If there exists another higher-level
1127 // element, we will do another pass, until we have corrected the innermost one.
1128 ExceptionCode ec = 0;
1129 HTMLStackElem* curr = m_blockStack;
1130 HTMLStackElem* prev = 0;
1131 HTMLStackElem* prevMaxElem = 0;
1132 maxElem = 0;
1133 finished = true;
1134 while (curr && curr != elem) {
1135 if (curr->level > elem->level) {
1136 if (!isAffectedByResidualStyle(curr->tagName))
1137 return;
1138 if (maxElem)
1139 // We will need another pass.
1140 finished = false;
1141 maxElem = curr;
1142 prevMaxElem = prev;
1145 prev = curr;
1146 curr = curr->next;
1149 if (!curr || !maxElem)
1150 return;
1152 Node* residualElem = prev->node;
1153 Node* blockElem = prevMaxElem ? prevMaxElem->node : m_current;
1154 Node* parentElem = elem->node;
1156 // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1157 // FIXME: We should either always allow it or perform an additional fixup instead of
1158 // just bailing here.
1159 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1160 if (!parentElem->childAllowed(blockElem))
1161 return;
1163 m_hasPElementInScope = Unknown;
1165 if (maxElem->node->parentNode() != elem->node) {
1166 // Walk the stack and remove any elements that aren't residual style tags. These
1167 // are basically just being closed up. Example:
1168 // <font><span>Moo<p>Goo</font></p>.
1169 // In the above example, the <span> doesn't need to be reopened. It can just close.
1170 HTMLStackElem* currElem = maxElem->next;
1171 HTMLStackElem* prevElem = maxElem;
1172 while (currElem != elem) {
1173 HTMLStackElem* nextElem = currElem->next;
1174 if (!isResidualStyleTag(currElem->tagName)) {
1175 prevElem->next = nextElem;
1176 prevElem->derefNode();
1177 prevElem->node = currElem->node;
1178 prevElem->didRefNode = currElem->didRefNode;
1179 delete currElem;
1181 else
1182 prevElem = currElem;
1183 currElem = nextElem;
1186 // We have to reopen residual tags in between maxElem and elem. An example of this case is:
1187 // <font><i>Moo<p>Foo</font>.
1188 // In this case, we need to transform the part before the <p> into:
1189 // <font><i>Moo</i></font><i>
1190 // so that the <i> will remain open. This involves the modification of elements
1191 // in the block stack.
1192 // This will also affect how we ultimately reparent the block, since we want it to end up
1193 // under the reopened residual tags (e.g., the <i> in the above example.)
1194 RefPtr<Node> prevNode = 0;
1195 currElem = maxElem;
1196 while (currElem->node != residualElem) {
1197 if (isResidualStyleTag(currElem->node->localName())) {
1198 // Create a clone of this element.
1199 // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem.
1200 Node* currNode = currElem->node->cloneNode(false).releaseRef();
1201 reportError(ResidualStyleError, &currNode->localName());
1203 // Change the stack element's node to point to the clone.
1204 // The stack element adopts the reference we obtained above by calling release().
1205 currElem->derefNode();
1206 currElem->node = currNode;
1207 currElem->didRefNode = true;
1209 // Attach the previous node as a child of this new node.
1210 if (prevNode)
1211 currNode->appendChild(prevNode, ec);
1212 else // The new parent for the block element is going to be the innermost clone.
1213 parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though.
1215 prevNode = currNode;
1218 currElem = currElem->next;
1221 // Now append the chain of new residual style elements if one exists.
1222 if (prevNode)
1223 elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section.
1226 // Check if the block is still in the tree. If it isn't, then we don't
1227 // want to remove it from its parent (that would crash) or insert it into
1228 // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778
1229 bool isBlockStillInTree = blockElem->parentNode();
1231 // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1232 // All content of |blockElem| is reparented to be under this clone. We then
1233 // reparent |blockElem| using real DOM calls so that attachment/detachment will
1234 // be performed to fix up the rendering tree.
1235 // So for this example: <b>...<p>Foo</b>Goo</p>
1236 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1238 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1239 if (isBlockStillInTree)
1240 blockElem->parentNode()->removeChild(blockElem, ec);
1242 Node* newNodePtr = 0;
1243 if (blockElem->firstChild()) {
1244 // Step 2: Clone |residualElem|.
1245 RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1246 newNodePtr = newNode.get();
1247 reportError(ResidualStyleError, &newNode->localName());
1249 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
1250 // before we've put |newElem| into the document. That way we'll only do one attachment of all
1251 // the new content (instead of a bunch of individual attachments).
1252 Node* currNode = blockElem->firstChild();
1253 while (currNode) {
1254 Node* nextNode = currNode->nextSibling();
1255 newNode->appendChild(currNode, ec);
1256 currNode = nextNode;
1259 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
1260 // attachment can occur yet.
1261 blockElem->appendChild(newNode.release(), ec);
1262 } else
1263 finished = true;
1265 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
1266 if (isBlockStillInTree)
1267 parentElem->appendChild(blockElem, ec);
1269 // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update
1270 // the node associated with the previous stack element so that when it gets popped,
1271 // it doesn't make the residual element the next current node.
1272 HTMLStackElem* currElem = maxElem;
1273 HTMLStackElem* prevElem = 0;
1274 while (currElem != elem) {
1275 prevElem = currElem;
1276 currElem = currElem->next;
1278 prevElem->next = elem->next;
1279 prevElem->derefNode();
1280 prevElem->node = elem->node;
1281 prevElem->didRefNode = elem->didRefNode;
1282 if (!finished) {
1283 // Repurpose |elem| to represent |newNode| and insert it at the appropriate position
1284 // in the stack. We do not do this for the innermost block, because in that case the new
1285 // node is effectively no longer open.
1286 elem->next = maxElem;
1287 elem->node = prevMaxElem->node;
1288 elem->didRefNode = prevMaxElem->didRefNode;
1289 elem->strayTableContent = false;
1290 prevMaxElem->next = elem;
1291 ASSERT(newNodePtr);
1292 prevMaxElem->node = newNodePtr;
1293 newNodePtr->ref();
1294 prevMaxElem->didRefNode = true;
1295 } else
1296 delete elem;
1299 // FIXME: If we ever make a case like this work:
1300 // <table><b><i><form></b></form></i></table>
1301 // Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy.
1302 if (strayTableContent)
1303 m_inStrayTableContent--;
1305 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1306 // In the above example, Goo should stay italic.
1307 // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth.
1309 HTMLStackElem* curr = m_blockStack;
1310 HTMLStackElem* residualStyleStack = 0;
1311 unsigned stackDepth = 1;
1312 unsigned redundantStyleCount = 0;
1313 while (curr && curr != maxElem) {
1314 // We will actually schedule this tag for reopening
1315 // after we complete the close of this entire block.
1316 if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) {
1317 // We've overloaded the use of stack elements and are just reusing the
1318 // struct with a slightly different meaning to the variables. Instead of chaining
1319 // from innermost to outermost, we build up a list of all the tags we need to reopen
1320 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1321 // to the outermost tag we need to reopen.
1322 // We also set curr->node to be the actual element that corresponds to the ID stored in
1323 // curr->id rather than the node that you should pop to when the element gets pulled off
1324 // the stack.
1325 if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))
1326 redundantStyleCount++;
1327 else
1328 redundantStyleCount = 0;
1330 if (redundantStyleCount < cMaxRedundantTagDepth)
1331 moveOneBlockToStack(residualStyleStack);
1332 else
1333 popOneBlock();
1334 } else
1335 popOneBlock();
1337 curr = m_blockStack;
1340 reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content.
1342 m_handlingResidualStyleAcrossBlocks = false;
1345 void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent)
1347 // Loop for each tag that needs to be reopened.
1348 while (elem) {
1349 // Create a shallow clone of the DOM node for this element.
1350 RefPtr<Node> newNode = elem->node->cloneNode(false);
1351 reportError(ResidualStyleError, &newNode->localName());
1353 // Append the new node. In the malformed table case, we need to insert before the table,
1354 // which will be the last child.
1355 ExceptionCode ec = 0;
1356 if (malformedTableParent)
1357 malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec);
1358 else
1359 m_current->appendChild(newNode, ec);
1360 // FIXME: Is it really OK to ignore the exceptions here?
1362 // Now push a new stack element for this node we just created.
1363 pushBlock(elem->tagName, elem->level);
1364 newNode->beginParsingChildren();
1366 // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1367 // that it is inside a malformed table.
1368 m_blockStack->strayTableContent = malformedTableParent != 0;
1369 if (m_blockStack->strayTableContent)
1370 m_inStrayTableContent++;
1372 // Clear our malformed table parent variable.
1373 malformedTableParent = 0;
1375 // Update |current| manually to point to the new node.
1376 setCurrent(newNode.get());
1378 // Advance to the next tag that needs to be reopened.
1379 HTMLStackElem* next = elem->next;
1380 elem->derefNode();
1381 delete elem;
1382 elem = next;
1386 void HTMLParser::pushBlock(const AtomicString& tagName, int level)
1388 m_blockStack = new HTMLStackElem(tagName, level, m_current, m_didRefCurrent, m_blockStack);
1389 if (level >= minBlockLevelTagPriority)
1390 m_blocksInStack++;
1391 m_didRefCurrent = false;
1392 if (tagName == pTag)
1393 m_hasPElementInScope = InScope;
1394 else if (isScopingTag(tagName))
1395 m_hasPElementInScope = NotInScope;
1398 void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors)
1400 HTMLStackElem* elem = m_blockStack;
1402 if (m_parserQuirks && elem && !m_parserQuirks->shouldPopBlock(elem->tagName, tagName))
1403 return;
1405 int maxLevel = 0;
1407 while (elem && (elem->tagName != tagName)) {
1408 if (maxLevel < elem->level)
1409 maxLevel = elem->level;
1410 elem = elem->next;
1413 if (!elem) {
1414 if (reportErrors)
1415 reportError(StrayCloseTagError, &tagName, 0, true);
1416 return;
1419 if (maxLevel > elem->level) {
1420 // We didn't match because the tag is in a different scope, e.g.,
1421 // <b><p>Foo</b>. Try to correct the problem.
1422 if (!isResidualStyleTag(tagName))
1423 return;
1424 return handleResidualStyleCloseTagAcrossBlocks(elem);
1427 bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName);
1428 HTMLStackElem* residualStyleStack = 0;
1429 Node* malformedTableParent = 0;
1431 elem = m_blockStack;
1432 unsigned stackDepth = 1;
1433 unsigned redundantStyleCount = 0;
1434 while (elem) {
1435 if (elem->tagName == tagName) {
1436 int strayTable = m_inStrayTableContent;
1437 popOneBlock();
1438 elem = 0;
1440 // This element was the root of some malformed content just inside an implicit or
1441 // explicit <tbody> or <tr>.
1442 // If we end up needing to reopen residual style tags, the root of the reopened chain
1443 // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1444 if (strayTable && (m_inStrayTableContent < strayTable) && residualStyleStack) {
1445 Node* curr = m_current;
1446 while (curr && !curr->hasTagName(tableTag))
1447 curr = curr->parentNode();
1448 malformedTableParent = curr ? curr->parentNode() : 0;
1451 else {
1452 if (m_currentFormElement && elem->tagName == formTag)
1453 // A <form> is being closed prematurely (and this is
1454 // malformed HTML). Set an attribute on the form to clear out its
1455 // bottom margin.
1456 m_currentFormElement->setMalformed(true);
1458 // Schedule this tag for reopening
1459 // after we complete the close of this entire block.
1460 if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) {
1461 // We've overloaded the use of stack elements and are just reusing the
1462 // struct with a slightly different meaning to the variables. Instead of chaining
1463 // from innermost to outermost, we build up a list of all the tags we need to reopen
1464 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1465 // to the outermost tag we need to reopen.
1466 // We also set elem->node to be the actual element that corresponds to the ID stored in
1467 // elem->id rather than the node that you should pop to when the element gets pulled off
1468 // the stack.
1469 if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))
1470 redundantStyleCount++;
1471 else
1472 redundantStyleCount = 0;
1474 if (redundantStyleCount < cMaxRedundantTagDepth)
1475 moveOneBlockToStack(residualStyleStack);
1476 else
1477 popOneBlock();
1478 } else
1479 popOneBlock();
1480 elem = m_blockStack;
1484 reopenResidualStyleTags(residualStyleStack, malformedTableParent);
1487 inline HTMLStackElem* HTMLParser::popOneBlockCommon()
1489 HTMLStackElem* elem = m_blockStack;
1491 // Form elements restore their state during the parsing process.
1492 // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available.
1493 if (m_current && elem->node != m_current)
1494 m_current->finishParsingChildren();
1496 if (m_blockStack->level >= minBlockLevelTagPriority) {
1497 ASSERT(m_blocksInStack > 0);
1498 m_blocksInStack--;
1500 m_blockStack = elem->next;
1501 m_current = elem->node;
1502 m_didRefCurrent = elem->didRefNode;
1504 if (elem->strayTableContent)
1505 m_inStrayTableContent--;
1507 if (elem->tagName == pTag)
1508 m_hasPElementInScope = NotInScope;
1509 else if (isScopingTag(elem->tagName))
1510 m_hasPElementInScope = Unknown;
1512 return elem;
1515 void HTMLParser::popOneBlock()
1517 // Store the current node before popOneBlockCommon overwrites it.
1518 Node* lastCurrent = m_current;
1519 bool didRefLastCurrent = m_didRefCurrent;
1521 delete popOneBlockCommon();
1523 if (didRefLastCurrent)
1524 lastCurrent->deref();
1527 void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head)
1529 // We'll be using the stack element we're popping, but for the current node.
1530 // See the two callers for details.
1532 // Store the current node before popOneBlockCommon overwrites it.
1533 Node* lastCurrent = m_current;
1534 bool didRefLastCurrent = m_didRefCurrent;
1536 // Pop the block, but don't deref the current node as popOneBlock does because
1537 // we'll be using the pointer in the new stack element.
1538 HTMLStackElem* elem = popOneBlockCommon();
1540 // Transfer the current node into the stack element.
1541 // No need to deref the old elem->node because popOneBlockCommon transferred
1542 // it into the m_current/m_didRefCurrent fields.
1543 elem->node = lastCurrent;
1544 elem->didRefNode = didRefLastCurrent;
1545 elem->next = head;
1546 head = elem;
1549 void HTMLParser::checkIfHasPElementInScope()
1551 m_hasPElementInScope = NotInScope;
1552 HTMLStackElem* elem = m_blockStack;
1553 while (elem) {
1554 const AtomicString& tagName = elem->tagName;
1555 if (tagName == pTag) {
1556 m_hasPElementInScope = InScope;
1557 return;
1558 } else if (isScopingTag(tagName))
1559 return;
1560 elem = elem->next;
1564 void HTMLParser::popInlineBlocks()
1566 while (m_blockStack && isInline(m_current))
1567 popOneBlock();
1570 void HTMLParser::freeBlock()
1572 while (m_blockStack)
1573 popOneBlock();
1574 ASSERT(!m_blocksInStack);
1577 void HTMLParser::createHead()
1579 if (m_head)
1580 return;
1582 if (!m_document->documentElement()) {
1583 insertNode(new HTMLHtmlElement(htmlTag, m_document));
1584 ASSERT(m_document->documentElement());
1587 m_head = new HTMLHeadElement(headTag, m_document);
1588 HTMLElement* body = m_document->body();
1589 ExceptionCode ec = 0;
1590 m_document->documentElement()->insertBefore(m_head.get(), body, ec);
1591 if (ec)
1592 m_head = 0;
1594 // If the body does not exist yet, then the <head> should be pushed as the current block.
1595 if (m_head && !body) {
1596 pushBlock(m_head->localName(), m_head->tagPriority());
1597 setCurrent(m_head.get());
1601 PassRefPtr<Node> HTMLParser::handleIsindex(Token* t)
1603 RefPtr<Node> n = new HTMLDivElement(divTag, m_document);
1605 NamedMappedAttrMap* attrs = t->attrs.get();
1607 RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(isindexTag, m_document, m_currentFormElement.get());
1608 isIndex->setAttributeMap(attrs);
1609 isIndex->setAttribute(typeAttr, "khtml_isindex");
1611 String text = searchableIndexIntroduction();
1612 if (attrs) {
1613 if (Attribute* a = attrs->getAttributeItem(promptAttr))
1614 text = a->value().string() + " ";
1615 t->attrs = 0;
1618 n->addChild(new HTMLHRElement(hrTag, m_document));
1619 n->addChild(Text::create(m_document, text));
1620 n->addChild(isIndex.release());
1621 n->addChild(new HTMLHRElement(hrTag, m_document));
1623 return n.release();
1626 void HTMLParser::startBody()
1628 if (m_inBody)
1629 return;
1631 m_inBody = true;
1633 if (m_isindexElement) {
1634 insertNode(m_isindexElement.get(), true /* don't descend into this node */);
1635 m_isindexElement = 0;
1639 void HTMLParser::finished()
1641 // In the case of a completely empty document, here's the place to create the HTML element.
1642 if (m_current && m_current->isDocumentNode() && !m_document->documentElement())
1643 insertNode(new HTMLHtmlElement(htmlTag, m_document));
1645 // This ensures that "current" is not left pointing to a node when the document is destroyed.
1646 freeBlock();
1647 setCurrent(0);
1649 // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this.
1650 if (!m_isParsingFragment)
1651 m_document->finishedParsing();
1654 void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags)
1656 Frame* frame = m_document->frame();
1657 if (!frame)
1658 return;
1660 HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(m_document->tokenizer());
1661 int lineNumber = htmlTokenizer->lineNumber() + 1;
1663 AtomicString tag1;
1664 AtomicString tag2;
1665 if (tagName1) {
1666 if (*tagName1 == "#text")
1667 tag1 = "Text";
1668 else if (*tagName1 == "#comment")
1669 tag1 = "<!-- comment -->";
1670 else
1671 tag1 = (closeTags ? "</" : "<") + *tagName1 + ">";
1673 if (tagName2) {
1674 if (*tagName2 == "#text")
1675 tag2 = "Text";
1676 else if (*tagName2 == "#comment")
1677 tag2 = "<!-- comment -->";
1678 else
1679 tag2 = (closeTags ? "</" : "<") + *tagName2 + ">";
1682 const char* errorMsg = htmlParserErrorMessageTemplate(errorCode);
1683 if (!errorMsg)
1684 return;
1686 String message;
1687 if (htmlTokenizer->processingContentWrittenByScript())
1688 message += htmlParserDocumentWriteMessage();
1689 message += errorMsg;
1690 message.replace("%tag1", tag1);
1691 message.replace("%tag2", tag2);
1693 frame->domWindow()->console()->addMessage(HTMLMessageSource, LogMessageType,
1694 isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel,
1695 message, lineNumber, m_document->url().string());
1698 #ifdef BUILDING_ON_LEOPARD
1699 bool shouldCreateImplicitHead(Document* document)
1701 ASSERT(document);
1703 Settings* settings = document->page() ? document->page()->settings() : 0;
1704 return settings ? !settings->needsLeopardMailQuirks() : true;
1706 #elif defined(BUILDING_ON_TIGER)
1707 bool shouldCreateImplicitHead(Document* document)
1709 ASSERT(document);
1711 Settings* settings = document->page() ? document->page()->settings() : 0;
1712 return settings ? !settings->needsTigerMailQuirks() : true;
1714 #endif