2 Copyright (C) 1997 Martin Jones (mjones@kde.org)
3 (C) 1997 Torben Weis (weis@kde.org)
4 (C) 1999,2001 Lars Knoll (knoll@kde.org)
5 (C) 2000,2001 Dirk Mueller (mueller@kde.org)
6 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
7 Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Library General Public
11 License as published by the Free Software Foundation; either
12 version 2 of the License, or (at your option) any later version.
14 This library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Library General Public License for more details.
19 You should have received a copy of the GNU Library General Public License
20 along with this library; see the file COPYING.LIB. If not, write to
21 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA.
26 #include "HTMLParser.h"
28 #include "CharacterNames.h"
29 #include "CSSPropertyNames.h"
30 #include "CSSValueKeywords.h"
31 #include "ChromeClient.h"
34 #include "DOMWindow.h"
35 #include "DocumentFragment.h"
36 #include "DocumentType.h"
38 #include "HTMLBodyElement.h"
39 #include "HTMLDocument.h"
40 #include "HTMLDivElement.h"
41 #include "HTMLDListElement.h"
42 #include "HTMLElementFactory.h"
43 #include "HTMLFormElement.h"
44 #include "HTMLHeadElement.h"
45 #include "HTMLHRElement.h"
46 #include "HTMLHtmlElement.h"
47 #include "HTMLIsIndexElement.h"
48 #include "HTMLMapElement.h"
49 #include "HTMLNames.h"
50 #include "HTMLParserQuirks.h"
51 #include "HTMLTableCellElement.h"
52 #include "HTMLTableRowElement.h"
53 #include "HTMLTableSectionElement.h"
54 #include "HTMLTokenizer.h"
55 #include "LocalizedStrings.h"
59 #include <wtf/StdLibExtras.h>
63 using namespace HTMLNames
;
65 static const unsigned cMaxRedundantTagDepth
= 20;
66 static const unsigned cResidualStyleMaxDepth
= 200;
68 static const int minBlockLevelTagPriority
= 3;
70 // A cap on the number of tags with priority minBlockLevelTagPriority or higher
71 // allowed in m_blockStack. The cap is enforced by adding such new elements as
72 // siblings instead of children once it is reached.
73 static const size_t cMaxBlockDepth
= 4096;
75 struct HTMLStackElem
: Noncopyable
{
76 HTMLStackElem(const AtomicString
& t
, int lvl
, Node
* n
, bool r
, HTMLStackElem
* nx
)
79 , strayTableContent(false)
94 bool strayTableContent
;
101 * The parser parses tokenized input into the document, building up the
102 * document tree. If the document is well-formed, parsing it is straightforward.
104 * Unfortunately, we have to handle many HTML documents that are not well-formed,
105 * so the parser has to be tolerant about errors.
107 * We have to take care of at least the following error conditions:
109 * 1. The element being added is explicitly forbidden inside some outer tag.
110 * In this case we should close all tags up to the one, which forbids
111 * the element, and add it afterwards.
113 * 2. We are not allowed to add the element directly. It could be that
114 * the person writing the document forgot some tag in between (or that the
115 * tag in between is optional). This could be the case with the following
116 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?).
118 * 3. We want to add a block element inside to an inline element. Close all
119 * inline elements up to the next higher block element.
121 * 4. If this doesn't help, close elements until we are allowed to add the
122 * element or ignore the tag.
126 HTMLParser::HTMLParser(HTMLDocument
* doc
, bool reportErrors
)
129 , m_didRefCurrent(false)
132 , m_hasPElementInScope(NotInScope
)
134 , m_haveContent(false)
135 , m_haveFrameSet(false)
136 , m_isParsingFragment(false)
137 , m_reportErrors(reportErrors
)
138 , m_handlingResidualStyleAcrossBlocks(false)
139 , m_inStrayTableContent(0)
140 , m_parserQuirks(m_document
->page() ? m_document
->page()->chrome()->client()->createHTMLParserQuirks() : 0)
144 HTMLParser::HTMLParser(DocumentFragment
* frag
)
145 : m_document(frag
->document())
147 , m_didRefCurrent(true)
150 , m_hasPElementInScope(NotInScope
)
152 , m_haveContent(false)
153 , m_haveFrameSet(false)
154 , m_isParsingFragment(true)
155 , m_reportErrors(false)
156 , m_handlingResidualStyleAcrossBlocks(false)
157 , m_inStrayTableContent(0)
158 , m_parserQuirks(m_document
->page() ? m_document
->page()->chrome()->client()->createHTMLParserQuirks() : 0)
164 HTMLParser::~HTMLParser()
171 void HTMLParser::reset()
173 ASSERT(!m_isParsingFragment
);
175 setCurrent(m_document
);
180 m_haveFrameSet
= false;
181 m_haveContent
= false;
182 m_inStrayTableContent
= 0;
184 m_currentFormElement
= 0;
185 m_currentMapElement
= 0;
187 m_isindexElement
= 0;
189 m_skipModeTag
= nullAtom
;
192 m_parserQuirks
->reset();
195 void HTMLParser::setCurrent(Node
* newCurrent
)
197 bool didRefNewCurrent
= newCurrent
&& newCurrent
!= m_document
;
198 if (didRefNewCurrent
)
202 m_current
= newCurrent
;
203 m_didRefCurrent
= didRefNewCurrent
;
206 inline static int tagPriorityOfNode(Node
* n
)
208 return n
->isHTMLElement() ? static_cast<HTMLElement
*>(n
)->tagPriority() : 0;
211 inline void HTMLParser::limitBlockDepth(int tagPriority
)
213 if (tagPriority
>= minBlockLevelTagPriority
) {
214 while (m_blocksInStack
>= cMaxBlockDepth
)
215 popBlock(m_blockStack
->tagName
);
219 inline bool HTMLParser::insertNodeAfterLimitBlockDepth(Node
* n
, bool flat
)
221 limitBlockDepth(tagPriorityOfNode(n
));
222 return insertNode(n
, flat
);
225 PassRefPtr
<Node
> HTMLParser::parseToken(Token
* t
)
227 if (!m_skipModeTag
.isNull()) {
228 if (!t
->beginTag
&& t
->tagName
== m_skipModeTag
)
229 // Found the end tag for the current skip mode, so we're done skipping.
230 m_skipModeTag
= nullAtom
;
231 else if (m_current
->localName() == t
->tagName
)
232 // Do not skip </iframe>.
233 // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag?
239 // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>.
240 if (t
->isCloseTag(brTag
) && m_document
->inCompatMode()) {
241 reportError(MalformedBRError
);
250 // Ignore spaces, if we're not inside a paragraph or other inline code.
251 // Do not alter the text if it is part of a scriptTag.
252 if (t
->tagName
== textAtom
&& t
->text
&& m_current
->localName() != scriptTag
) {
253 if (m_inBody
&& !skipMode() && m_current
->localName() != styleTag
&&
254 m_current
->localName() != titleTag
&& !t
->text
->containsOnlyWhitespace())
255 m_haveContent
= true;
258 String text
= t
->text
.get();
259 unsigned charsLeft
= text
.length();
261 // split large blocks of text to nodes of manageable size
262 n
= Text::createWithLengthLimit(m_document
, text
, charsLeft
);
263 if (!insertNodeAfterLimitBlockDepth(n
.get(), t
->selfClosingTag
))
269 RefPtr
<Node
> n
= getNode(t
);
270 // just to be sure, and to catch currently unimplemented stuff
275 if (n
->isHTMLElement()) {
276 HTMLElement
* e
= static_cast<HTMLElement
*>(n
.get());
277 e
->setAttributeMap(t
->attrs
.get());
279 // take care of optional close tags
280 if (e
->endTagRequirement() == TagStatusOptional
)
281 popBlock(t
->tagName
);
283 // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing
284 // syntax was used, report an error.
285 if (t
->brokenXMLStyle
&& e
->endTagRequirement() != TagStatusForbidden
) {
286 if (t
->tagName
== scriptTag
)
287 reportError(IncorrectXMLCloseScriptWarning
);
289 reportError(IncorrectXMLSelfCloseError
, &t
->tagName
);
293 if (!insertNodeAfterLimitBlockDepth(n
.get(), t
->selfClosingTag
)) {
294 // we couldn't insert the node
296 if (n
->isElementNode()) {
297 Element
* e
= static_cast<Element
*>(n
.get());
298 e
->setAttributeMap(0);
301 if (m_currentMapElement
== n
)
302 m_currentMapElement
= 0;
304 if (m_currentFormElement
== n
)
305 m_currentFormElement
= 0;
315 void HTMLParser::parseDoctypeToken(DoctypeToken
* t
)
317 // Ignore any doctype after the first. Ignore doctypes in fragments.
318 if (m_document
->doctype() || m_isParsingFragment
|| m_current
!= m_document
)
321 // Make a new doctype node and set it as our doctype.
322 m_document
->addChild(DocumentType::create(m_document
, String::adopt(t
->m_name
), String::adopt(t
->m_publicID
), String::adopt(t
->m_systemID
)));
325 static bool isTableSection(const Node
* n
)
327 return n
->hasTagName(tbodyTag
) || n
->hasTagName(tfootTag
) || n
->hasTagName(theadTag
);
330 static bool isTablePart(const Node
* n
)
332 return n
->hasTagName(trTag
) || n
->hasTagName(tdTag
) || n
->hasTagName(thTag
) ||
336 static bool isTableRelated(const Node
* n
)
338 return n
->hasTagName(tableTag
) || isTablePart(n
);
341 static bool isScopingTag(const AtomicString
& tagName
)
343 return tagName
== appletTag
|| tagName
== captionTag
|| tagName
== tdTag
|| tagName
== thTag
|| tagName
== buttonTag
|| tagName
== marqueeTag
|| tagName
== objectTag
|| tagName
== tableTag
|| tagName
== htmlTag
;
346 bool HTMLParser::insertNode(Node
* n
, bool flat
)
348 RefPtr
<Node
> protectNode(n
);
350 const AtomicString
& localName
= n
->localName();
352 // <table> is never allowed inside stray table content. Always pop out of the stray table content
353 // and close up the first table, and then start the second table as a sibling.
354 if (m_inStrayTableContent
&& localName
== tableTag
)
357 if (m_parserQuirks
&& !m_parserQuirks
->shouldInsertNode(m_current
, n
))
360 int tagPriority
= tagPriorityOfNode(n
);
362 // let's be stupid and just try to insert it.
363 // this should work if the document is well-formed
364 Node
* newNode
= m_current
->addChild(n
);
366 return handleError(n
, flat
, localName
, tagPriority
); // Try to handle the error.
368 // don't push elements without end tags (e.g., <img>) on the stack
369 bool parentAttached
= m_current
->attached();
370 if (tagPriority
> 0 && !flat
) {
371 if (newNode
== m_current
) {
372 // This case should only be hit when a demoted <form> is placed inside a table.
373 ASSERT(localName
== formTag
);
374 reportError(FormInsideTablePartError
, &m_current
->localName());
375 HTMLFormElement
* form
= static_cast<HTMLFormElement
*>(n
);
376 form
->setDemoted(true);
378 // The pushBlock function transfers ownership of current to the block stack
379 // so we're guaranteed that m_didRefCurrent is false. The code below is an
380 // optimized version of setCurrent that takes advantage of that fact and also
381 // assumes that newNode is neither 0 nor a pointer to the document.
382 pushBlock(localName
, tagPriority
);
383 newNode
->beginParsingChildren();
384 ASSERT(!m_didRefCurrent
);
387 m_didRefCurrent
= true;
389 if (parentAttached
&& !n
->attached() && !m_isParsingFragment
)
392 if (parentAttached
&& !n
->attached() && !m_isParsingFragment
)
394 n
->finishParsingChildren();
397 if (localName
== htmlTag
&& m_document
->frame())
398 m_document
->frame()->loader()->dispatchDocumentElementAvailable();
403 bool HTMLParser::handleError(Node
* n
, bool flat
, const AtomicString
& localName
, int tagPriority
)
405 // Error handling code. This is just ad hoc handling of specific parent/child combinations.
407 bool handled
= false;
409 // 1. Check out the element's tag name to decide how to deal with errors.
410 if (n
->isHTMLElement()) {
411 HTMLElement
* h
= static_cast<HTMLElement
*>(n
);
412 if (h
->hasLocalName(trTag
) || h
->hasLocalName(thTag
) || h
->hasLocalName(tdTag
)) {
413 if (m_inStrayTableContent
&& !isTableRelated(m_current
)) {
414 reportError(MisplacedTablePartError
, &localName
, &m_current
->localName());
415 // pop out to the nearest enclosing table-related tag.
416 while (m_blockStack
&& !isTableRelated(m_current
))
418 return insertNode(n
);
420 } else if (h
->hasLocalName(headTag
)) {
421 if (!m_current
->isDocumentNode() && !m_current
->hasTagName(htmlTag
)) {
422 reportError(MisplacedHeadError
);
425 } else if (h
->hasLocalName(metaTag
) || h
->hasLocalName(linkTag
) || h
->hasLocalName(baseTag
)) {
426 bool createdHead
= false;
433 reportError(MisplacedHeadContentError
, &localName
, &m_current
->localName());
434 if (m_head
->addChild(n
)) {
435 if (!n
->attached() && !m_isParsingFragment
)
441 } else if (h
->hasLocalName(htmlTag
)) {
442 if (!m_current
->isDocumentNode() ) {
443 if (m_document
->documentElement() && m_document
->documentElement()->hasTagName(htmlTag
)) {
444 reportError(RedundantHTMLBodyError
, &localName
);
445 // we have another <HTML> element.... apply attributes to existing one
446 // make sure we don't overwrite already existing attributes
447 NamedNodeMap
* map
= static_cast<Element
*>(n
)->attributes(true);
448 Element
* existingHTML
= static_cast<Element
*>(m_document
->documentElement());
449 NamedNodeMap
* bmap
= existingHTML
->attributes(false);
450 for (unsigned l
= 0; map
&& l
< map
->length(); ++l
) {
451 Attribute
* it
= map
->attributeItem(l
);
452 if (!bmap
->getAttributeItem(it
->name()))
453 existingHTML
->setAttribute(it
->name(), it
->value());
458 } else if (h
->hasLocalName(titleTag
) || h
->hasLocalName(styleTag
) || h
->hasLocalName(scriptTag
)) {
459 bool createdHead
= false;
465 Node
* newNode
= m_head
->addChild(n
);
467 setSkipMode(h
->tagQName());
472 reportError(MisplacedHeadContentError
, &localName
, &m_current
->localName());
474 pushBlock(localName
, tagPriority
);
475 newNode
->beginParsingChildren();
477 if (!n
->attached() && !m_isParsingFragment
)
482 setSkipMode(h
->tagQName());
485 } else if (h
->hasLocalName(bodyTag
)) {
486 if (m_inBody
&& m_document
->body()) {
487 // we have another <BODY> element.... apply attributes to existing one
488 // make sure we don't overwrite already existing attributes
489 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
490 reportError(RedundantHTMLBodyError
, &localName
);
491 NamedNodeMap
* map
= static_cast<Element
*>(n
)->attributes(true);
492 Element
* existingBody
= m_document
->body();
493 NamedNodeMap
* bmap
= existingBody
->attributes(false);
494 for (unsigned l
= 0; map
&& l
< map
->length(); ++l
) {
495 Attribute
* it
= map
->attributeItem(l
);
496 if (!bmap
->getAttributeItem(it
->name()))
497 existingBody
->setAttribute(it
->name(), it
->value());
501 else if (!m_current
->isDocumentNode())
503 } else if (h
->hasLocalName(areaTag
)) {
504 if (m_currentMapElement
) {
505 reportError(MisplacedAreaError
, &m_current
->localName());
506 m_currentMapElement
->addChild(n
);
507 if (!n
->attached() && !m_isParsingFragment
)
513 } else if (h
->hasLocalName(colgroupTag
) || h
->hasLocalName(captionTag
)) {
514 if (isTableRelated(m_current
)) {
515 while (m_blockStack
&& isTablePart(m_current
))
517 return insertNode(n
);
520 } else if (n
->isCommentNode() && !m_head
)
523 // 2. Next we examine our currently active element to do some further error handling.
524 if (m_current
->isHTMLElement()) {
525 HTMLElement
* h
= static_cast<HTMLElement
*>(m_current
);
526 const AtomicString
& currentTagName
= h
->localName();
527 if (h
->hasLocalName(htmlTag
)) {
528 HTMLElement
* elt
= n
->isHTMLElement() ? static_cast<HTMLElement
*>(n
) : 0;
529 if (elt
&& (elt
->hasLocalName(scriptTag
) || elt
->hasLocalName(styleTag
) ||
530 elt
->hasLocalName(metaTag
) || elt
->hasLocalName(linkTag
) ||
531 elt
->hasLocalName(objectTag
) || elt
->hasLocalName(embedTag
) ||
532 elt
->hasLocalName(titleTag
) || elt
->hasLocalName(isindexTag
) ||
533 elt
->hasLocalName(baseTag
))) {
535 m_head
= new HTMLHeadElement(headTag
, m_document
);
536 insertNode(m_head
.get());
540 if (n
->isTextNode()) {
541 Text
* t
= static_cast<Text
*>(n
);
542 if (t
->containsOnlyWhitespace())
545 if (!m_haveFrameSet
) {
546 // Ensure that head exists.
547 // But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795>
548 if (shouldCreateImplicitHead(m_document
))
552 e
= new HTMLBodyElement(bodyTag
, m_document
);
557 reportError(MisplacedFramesetContentError
, &localName
);
559 } else if (h
->hasLocalName(headTag
)) {
560 if (n
->hasTagName(htmlTag
))
563 // This means the body starts here...
564 if (!m_haveFrameSet
) {
565 ASSERT(currentTagName
== headTag
);
566 popBlock(currentTagName
);
567 e
= new HTMLBodyElement(bodyTag
, m_document
);
572 reportError(MisplacedFramesetContentError
, &localName
);
574 } else if (h
->hasLocalName(addressTag
) || h
->hasLocalName(fontTag
)
575 || h
->hasLocalName(styleTag
) || h
->hasLocalName(titleTag
)) {
576 reportError(MisplacedContentRetryError
, &localName
, ¤tTagName
);
577 popBlock(currentTagName
);
579 } else if (h
->hasLocalName(captionTag
)) {
580 // Illegal content in a caption. Close the caption and try again.
581 reportError(MisplacedCaptionContentError
, &localName
);
582 popBlock(currentTagName
);
584 return insertNode(n
, flat
);
585 } else if (h
->hasLocalName(tableTag
) || h
->hasLocalName(trTag
) || isTableSection(h
)) {
586 if (n
->hasTagName(tableTag
)) {
587 reportError(MisplacedTableError
, ¤tTagName
);
588 if (m_isParsingFragment
&& !h
->hasLocalName(tableTag
))
589 // fragment may contain table parts without <table> ancestor, pop them one by one
590 popBlock(h
->localName());
591 popBlock(localName
); // end the table
592 handled
= true; // ...and start a new one
594 ExceptionCode ec
= 0;
595 Node
* node
= m_current
;
596 Node
* parent
= node
->parentNode();
597 // A script may have removed the current node's parent from the DOM
598 // http://bugs.webkit.org/show_bug.cgi?id=7137
599 // FIXME: we should do real recovery here and re-parent with the correct node.
602 Node
* grandparent
= parent
->parentNode();
604 if (n
->isTextNode() ||
605 (h
->hasLocalName(trTag
) &&
606 isTableSection(parent
) && grandparent
&& grandparent
->hasTagName(tableTag
)) ||
607 ((!n
->hasTagName(tdTag
) && !n
->hasTagName(thTag
) &&
608 !n
->hasTagName(formTag
) && !n
->hasTagName(scriptTag
)) && isTableSection(node
) &&
609 parent
->hasTagName(tableTag
))) {
610 node
= (node
->hasTagName(tableTag
)) ? node
:
611 ((node
->hasTagName(trTag
)) ? grandparent
: parent
);
612 // This can happen with fragments
615 Node
* parent
= node
->parentNode();
618 parent
->insertBefore(n
, node
, ec
);
620 reportError(StrayTableContentError
, &localName
, ¤tTagName
);
621 if (n
->isHTMLElement() && tagPriority
> 0 &&
622 !flat
&& static_cast<HTMLElement
*>(n
)->endTagRequirement() != TagStatusForbidden
)
624 pushBlock(localName
, tagPriority
);
625 n
->beginParsingChildren();
627 m_inStrayTableContent
++;
628 m_blockStack
->strayTableContent
= true;
635 if (m_current
->hasTagName(trTag
)) {
636 reportError(TablePartRequiredError
, &localName
, &tdTag
.localName());
637 e
= new HTMLTableCellElement(tdTag
, m_document
);
638 } else if (m_current
->hasTagName(tableTag
)) {
639 // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>,
640 // and it isn't really a parse error per se.
641 e
= new HTMLTableSectionElement(tbodyTag
, m_document
);
643 reportError(TablePartRequiredError
, &localName
, &trTag
.localName());
644 e
= new HTMLTableRowElement(trTag
, m_document
);
651 } else if (h
->hasLocalName(objectTag
)) {
652 reportError(MisplacedContentRetryError
, &localName
, ¤tTagName
);
655 } else if (h
->hasLocalName(pTag
) || isHeaderTag(currentTagName
)) {
657 popBlock(currentTagName
);
660 } else if (h
->hasLocalName(optionTag
) || h
->hasLocalName(optgroupTag
)) {
661 if (localName
== optgroupTag
) {
662 popBlock(currentTagName
);
664 } else if (localName
== selectTag
) {
665 // IE treats a nested select as </select>. Let's do the same
668 } else if (h
->hasLocalName(selectTag
)) {
669 if (localName
== inputTag
|| localName
== textareaTag
) {
670 reportError(MisplacedContentRetryError
, &localName
, ¤tTagName
);
671 popBlock(currentTagName
);
674 } else if (h
->hasLocalName(colgroupTag
)) {
675 popBlock(currentTagName
);
677 } else if (!h
->hasLocalName(bodyTag
)) {
678 if (isInline(m_current
)) {
683 } else if (m_current
->isDocumentNode()) {
684 if (n
->isTextNode()) {
685 Text
* t
= static_cast<Text
*>(n
);
686 if (t
->containsOnlyWhitespace())
690 if (!m_document
->documentElement()) {
691 e
= new HTMLHtmlElement(htmlTag
, m_document
);
697 // 3. If we couldn't handle the error, just return false and attempt to error-correct again.
699 reportError(IgnoredContentError
, &localName
, &m_current
->localName());
702 return insertNode(n
);
705 typedef bool (HTMLParser::*CreateErrorCheckFunc
)(Token
* t
, RefPtr
<Node
>&);
706 typedef HashMap
<AtomicStringImpl
*, CreateErrorCheckFunc
> FunctionMap
;
708 bool HTMLParser::textCreateErrorCheck(Token
* t
, RefPtr
<Node
>& result
)
710 result
= Text::create(m_document
, t
->text
.get());
714 bool HTMLParser::commentCreateErrorCheck(Token
* t
, RefPtr
<Node
>& result
)
716 result
= Comment::create(m_document
, t
->text
.get());
720 bool HTMLParser::headCreateErrorCheck(Token
*, RefPtr
<Node
>& result
)
722 if (!m_head
|| m_current
->localName() == htmlTag
) {
723 m_head
= new HTMLHeadElement(headTag
, m_document
);
726 reportError(MisplacedHeadError
);
730 bool HTMLParser::bodyCreateErrorCheck(Token
*, RefPtr
<Node
>&)
732 // body no longer allowed if we have a frameset
736 // Ensure that head exists (unless parsing a fragment).
737 // But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795>
738 if (!m_isParsingFragment
&& shouldCreateImplicitHead(m_document
))
746 bool HTMLParser::framesetCreateErrorCheck(Token
*, RefPtr
<Node
>&)
749 if (m_inBody
&& !m_haveFrameSet
&& !m_haveContent
) {
751 // ### actually for IE document.body returns the now hidden "body" element
752 // we can't implement that behaviour now because it could cause too many
753 // regressions and the headaches are not worth the work as long as there is
754 // no site actually relying on that detail (Dirk)
755 if (m_document
->body())
756 m_document
->body()->setAttribute(styleAttr
, "display:none");
759 if ((m_haveContent
|| m_haveFrameSet
) && m_current
->localName() == htmlTag
)
761 m_haveFrameSet
= true;
766 bool HTMLParser::formCreateErrorCheck(Token
* t
, RefPtr
<Node
>& result
)
768 // Only create a new form if we're not already inside one.
769 // This is consistent with other browsers' behavior.
770 if (!m_currentFormElement
) {
771 m_currentFormElement
= new HTMLFormElement(formTag
, m_document
);
772 result
= m_currentFormElement
;
773 pCloserCreateErrorCheck(t
, result
);
778 bool HTMLParser::isindexCreateErrorCheck(Token
* t
, RefPtr
<Node
>& result
)
780 RefPtr
<Node
> n
= handleIsindex(t
);
782 m_isindexElement
= n
.release();
784 t
->selfClosingTag
= true;
785 result
= n
.release();
790 bool HTMLParser::selectCreateErrorCheck(Token
*, RefPtr
<Node
>&)
795 bool HTMLParser::ddCreateErrorCheck(Token
* t
, RefPtr
<Node
>& result
)
797 pCloserCreateErrorCheck(t
, result
);
803 bool HTMLParser::dtCreateErrorCheck(Token
* t
, RefPtr
<Node
>& result
)
805 pCloserCreateErrorCheck(t
, result
);
811 bool HTMLParser::rpCreateErrorCheck(Token
*, RefPtr
<Node
>&)
818 bool HTMLParser::rtCreateErrorCheck(Token
*, RefPtr
<Node
>&)
825 bool HTMLParser::nestedCreateErrorCheck(Token
* t
, RefPtr
<Node
>&)
827 popBlock(t
->tagName
);
831 bool HTMLParser::nestedPCloserCreateErrorCheck(Token
* t
, RefPtr
<Node
>& result
)
833 pCloserCreateErrorCheck(t
, result
);
834 popBlock(t
->tagName
);
838 bool HTMLParser::nestedStyleCreateErrorCheck(Token
* t
, RefPtr
<Node
>&)
840 return allowNestedRedundantTag(t
->tagName
);
843 bool HTMLParser::tableCellCreateErrorCheck(Token
*, RefPtr
<Node
>&)
850 bool HTMLParser::tableSectionCreateErrorCheck(Token
*, RefPtr
<Node
>&)
858 bool HTMLParser::noembedCreateErrorCheck(Token
*, RefPtr
<Node
>&)
860 setSkipMode(noembedTag
);
864 bool HTMLParser::noframesCreateErrorCheck(Token
*, RefPtr
<Node
>&)
866 setSkipMode(noframesTag
);
870 bool HTMLParser::noscriptCreateErrorCheck(Token
*, RefPtr
<Node
>&)
872 if (!m_isParsingFragment
) {
873 Settings
* settings
= m_document
->settings();
874 if (settings
&& settings
->isJavaScriptEnabled())
875 setSkipMode(noscriptTag
);
880 bool HTMLParser::pCloserCreateErrorCheck(Token
*, RefPtr
<Node
>&)
882 if (hasPElementInScope())
887 bool HTMLParser::pCloserStrictCreateErrorCheck(Token
*, RefPtr
<Node
>&)
889 if (m_document
->inCompatMode())
891 if (hasPElementInScope())
896 bool HTMLParser::mapCreateErrorCheck(Token
*, RefPtr
<Node
>& result
)
898 m_currentMapElement
= new HTMLMapElement(mapTag
, m_document
);
899 result
= m_currentMapElement
;
903 PassRefPtr
<Node
> HTMLParser::getNode(Token
* t
)
905 // Init our error handling table.
906 DEFINE_STATIC_LOCAL(FunctionMap
, gFunctionMap
, ());
907 if (gFunctionMap
.isEmpty()) {
908 gFunctionMap
.set(aTag
.localName().impl(), &HTMLParser::nestedCreateErrorCheck
);
909 gFunctionMap
.set(addressTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
910 gFunctionMap
.set(bTag
.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck
);
911 gFunctionMap
.set(bigTag
.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck
);
912 gFunctionMap
.set(blockquoteTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
913 gFunctionMap
.set(bodyTag
.localName().impl(), &HTMLParser::bodyCreateErrorCheck
);
914 gFunctionMap
.set(buttonTag
.localName().impl(), &HTMLParser::nestedCreateErrorCheck
);
915 gFunctionMap
.set(centerTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
916 gFunctionMap
.set(commentAtom
.impl(), &HTMLParser::commentCreateErrorCheck
);
917 gFunctionMap
.set(ddTag
.localName().impl(), &HTMLParser::ddCreateErrorCheck
);
918 gFunctionMap
.set(dirTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
919 gFunctionMap
.set(divTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
920 gFunctionMap
.set(dlTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
921 gFunctionMap
.set(dtTag
.localName().impl(), &HTMLParser::dtCreateErrorCheck
);
922 gFunctionMap
.set(formTag
.localName().impl(), &HTMLParser::formCreateErrorCheck
);
923 gFunctionMap
.set(fieldsetTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
924 gFunctionMap
.set(framesetTag
.localName().impl(), &HTMLParser::framesetCreateErrorCheck
);
925 gFunctionMap
.set(h1Tag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
926 gFunctionMap
.set(h2Tag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
927 gFunctionMap
.set(h3Tag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
928 gFunctionMap
.set(h4Tag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
929 gFunctionMap
.set(h5Tag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
930 gFunctionMap
.set(h6Tag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
931 gFunctionMap
.set(headTag
.localName().impl(), &HTMLParser::headCreateErrorCheck
);
932 gFunctionMap
.set(hrTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
933 gFunctionMap
.set(iTag
.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck
);
934 gFunctionMap
.set(isindexTag
.localName().impl(), &HTMLParser::isindexCreateErrorCheck
);
935 gFunctionMap
.set(liTag
.localName().impl(), &HTMLParser::nestedPCloserCreateErrorCheck
);
936 gFunctionMap
.set(listingTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
937 gFunctionMap
.set(mapTag
.localName().impl(), &HTMLParser::mapCreateErrorCheck
);
938 gFunctionMap
.set(menuTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
939 gFunctionMap
.set(navTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
940 gFunctionMap
.set(nobrTag
.localName().impl(), &HTMLParser::nestedCreateErrorCheck
);
941 gFunctionMap
.set(noembedTag
.localName().impl(), &HTMLParser::noembedCreateErrorCheck
);
942 gFunctionMap
.set(noframesTag
.localName().impl(), &HTMLParser::noframesCreateErrorCheck
);
944 gFunctionMap
.set(noscriptTag
.localName().impl(), &HTMLParser::noscriptCreateErrorCheck
);
946 gFunctionMap
.set(olTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
947 gFunctionMap
.set(pTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
948 gFunctionMap
.set(plaintextTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
949 gFunctionMap
.set(preTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
950 gFunctionMap
.set(rpTag
.localName().impl(), &HTMLParser::rpCreateErrorCheck
);
951 gFunctionMap
.set(rtTag
.localName().impl(), &HTMLParser::rtCreateErrorCheck
);
952 gFunctionMap
.set(sTag
.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck
);
953 gFunctionMap
.set(selectTag
.localName().impl(), &HTMLParser::selectCreateErrorCheck
);
954 gFunctionMap
.set(smallTag
.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck
);
955 gFunctionMap
.set(strikeTag
.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck
);
956 gFunctionMap
.set(tableTag
.localName().impl(), &HTMLParser::pCloserStrictCreateErrorCheck
);
957 gFunctionMap
.set(tbodyTag
.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck
);
958 gFunctionMap
.set(tdTag
.localName().impl(), &HTMLParser::tableCellCreateErrorCheck
);
959 gFunctionMap
.set(textAtom
.impl(), &HTMLParser::textCreateErrorCheck
);
960 gFunctionMap
.set(tfootTag
.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck
);
961 gFunctionMap
.set(thTag
.localName().impl(), &HTMLParser::tableCellCreateErrorCheck
);
962 gFunctionMap
.set(theadTag
.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck
);
963 gFunctionMap
.set(trTag
.localName().impl(), &HTMLParser::nestedCreateErrorCheck
);
964 gFunctionMap
.set(ttTag
.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck
);
965 gFunctionMap
.set(uTag
.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck
);
966 gFunctionMap
.set(ulTag
.localName().impl(), &HTMLParser::pCloserCreateErrorCheck
);
971 if (CreateErrorCheckFunc errorCheckFunc
= gFunctionMap
.get(t
->tagName
.impl()))
972 proceed
= (this->*errorCheckFunc
)(t
, result
);
974 result
= HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom
, t
->tagName
, xhtmlNamespaceURI
), m_document
, m_currentFormElement
.get());
975 return result
.release();
978 bool HTMLParser::allowNestedRedundantTag(const AtomicString
& tagName
)
980 // www.liceo.edu.mx is an example of a site that achieves a level of nesting of
981 // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20
982 // nested tags of the same type before just ignoring them all together.
984 for (HTMLStackElem
* curr
= m_blockStack
;
985 i
< cMaxRedundantTagDepth
&& curr
&& curr
->tagName
== tagName
;
986 curr
= curr
->next
, i
++) { }
987 return i
!= cMaxRedundantTagDepth
;
990 void HTMLParser::processCloseTag(Token
* t
)
992 // Support for really broken html.
993 // we never close the body tag, since some stupid web pages close it before the actual end of the doc.
994 // let's rely on the end() call to close things.
995 if (t
->tagName
== htmlTag
|| t
->tagName
== bodyTag
|| t
->tagName
== commentAtom
)
998 bool checkForCloseTagErrors
= true;
999 if (t
->tagName
== formTag
&& m_currentFormElement
) {
1000 m_currentFormElement
= 0;
1001 checkForCloseTagErrors
= false;
1002 } else if (t
->tagName
== mapTag
)
1003 m_currentMapElement
= 0;
1004 else if (t
->tagName
== pTag
)
1005 checkForCloseTagErrors
= false;
1007 HTMLStackElem
* oldElem
= m_blockStack
;
1008 popBlock(t
->tagName
, checkForCloseTagErrors
);
1009 if (oldElem
== m_blockStack
&& t
->tagName
== pTag
) {
1010 // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat
1011 // this as a valid break, i.e., <p></p>. So go ahead and make the empty
1015 popBlock(t
->tagName
);
1016 reportError(StrayParagraphCloseError
);
1020 bool HTMLParser::isHeaderTag(const AtomicString
& tagName
)
1022 DEFINE_STATIC_LOCAL(HashSet
<AtomicStringImpl
*>, headerTags
, ());
1023 if (headerTags
.isEmpty()) {
1024 headerTags
.add(h1Tag
.localName().impl());
1025 headerTags
.add(h2Tag
.localName().impl());
1026 headerTags
.add(h3Tag
.localName().impl());
1027 headerTags
.add(h4Tag
.localName().impl());
1028 headerTags
.add(h5Tag
.localName().impl());
1029 headerTags
.add(h6Tag
.localName().impl());
1032 return headerTags
.contains(tagName
.impl());
1035 bool HTMLParser::isInline(Node
* node
) const
1037 if (node
->isTextNode())
1040 if (node
->isHTMLElement()) {
1041 HTMLElement
* e
= static_cast<HTMLElement
*>(node
);
1042 if (e
->hasLocalName(aTag
) || e
->hasLocalName(fontTag
) || e
->hasLocalName(ttTag
) ||
1043 e
->hasLocalName(uTag
) || e
->hasLocalName(bTag
) || e
->hasLocalName(iTag
) ||
1044 e
->hasLocalName(sTag
) || e
->hasLocalName(strikeTag
) || e
->hasLocalName(bigTag
) ||
1045 e
->hasLocalName(smallTag
) || e
->hasLocalName(emTag
) || e
->hasLocalName(strongTag
) ||
1046 e
->hasLocalName(dfnTag
) || e
->hasLocalName(codeTag
) || e
->hasLocalName(sampTag
) ||
1047 e
->hasLocalName(kbdTag
) || e
->hasLocalName(varTag
) || e
->hasLocalName(citeTag
) ||
1048 e
->hasLocalName(abbrTag
) || e
->hasLocalName(acronymTag
) || e
->hasLocalName(subTag
) ||
1049 e
->hasLocalName(supTag
) || e
->hasLocalName(spanTag
) || e
->hasLocalName(nobrTag
) ||
1050 e
->hasLocalName(noframesTag
) || e
->hasLocalName(nolayerTag
) ||
1051 e
->hasLocalName(noembedTag
))
1053 #if !ENABLE(XHTMLMP)
1054 if (e
->hasLocalName(noscriptTag
) && !m_isParsingFragment
) {
1055 Settings
* settings
= m_document
->settings();
1056 if (settings
&& settings
->isJavaScriptEnabled())
1065 bool HTMLParser::isResidualStyleTag(const AtomicString
& tagName
)
1067 DEFINE_STATIC_LOCAL(HashSet
<AtomicStringImpl
*>, residualStyleTags
, ());
1068 if (residualStyleTags
.isEmpty()) {
1069 residualStyleTags
.add(aTag
.localName().impl());
1070 residualStyleTags
.add(fontTag
.localName().impl());
1071 residualStyleTags
.add(ttTag
.localName().impl());
1072 residualStyleTags
.add(uTag
.localName().impl());
1073 residualStyleTags
.add(bTag
.localName().impl());
1074 residualStyleTags
.add(iTag
.localName().impl());
1075 residualStyleTags
.add(sTag
.localName().impl());
1076 residualStyleTags
.add(strikeTag
.localName().impl());
1077 residualStyleTags
.add(bigTag
.localName().impl());
1078 residualStyleTags
.add(smallTag
.localName().impl());
1079 residualStyleTags
.add(emTag
.localName().impl());
1080 residualStyleTags
.add(strongTag
.localName().impl());
1081 residualStyleTags
.add(dfnTag
.localName().impl());
1082 residualStyleTags
.add(codeTag
.localName().impl());
1083 residualStyleTags
.add(sampTag
.localName().impl());
1084 residualStyleTags
.add(kbdTag
.localName().impl());
1085 residualStyleTags
.add(varTag
.localName().impl());
1086 residualStyleTags
.add(nobrTag
.localName().impl());
1089 return residualStyleTags
.contains(tagName
.impl());
1092 bool HTMLParser::isAffectedByResidualStyle(const AtomicString
& tagName
)
1094 DEFINE_STATIC_LOCAL(HashSet
<AtomicStringImpl
*>, unaffectedTags
, ());
1095 if (unaffectedTags
.isEmpty()) {
1096 unaffectedTags
.add(bodyTag
.localName().impl());
1097 unaffectedTags
.add(tableTag
.localName().impl());
1098 unaffectedTags
.add(theadTag
.localName().impl());
1099 unaffectedTags
.add(tbodyTag
.localName().impl());
1100 unaffectedTags
.add(tfootTag
.localName().impl());
1101 unaffectedTags
.add(trTag
.localName().impl());
1102 unaffectedTags
.add(thTag
.localName().impl());
1103 unaffectedTags
.add(tdTag
.localName().impl());
1104 unaffectedTags
.add(captionTag
.localName().impl());
1105 unaffectedTags
.add(colgroupTag
.localName().impl());
1106 unaffectedTags
.add(colTag
.localName().impl());
1107 unaffectedTags
.add(optionTag
.localName().impl());
1108 unaffectedTags
.add(optgroupTag
.localName().impl());
1109 unaffectedTags
.add(selectTag
.localName().impl());
1110 unaffectedTags
.add(objectTag
.localName().impl());
1111 unaffectedTags
.add(datagridTag
.localName().impl());
1112 unaffectedTags
.add(datalistTag
.localName().impl());
1115 return !unaffectedTags
.contains(tagName
.impl());
1118 void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem
* elem
)
1120 HTMLStackElem
* maxElem
= 0;
1121 bool finished
= false;
1122 bool strayTableContent
= elem
->strayTableContent
;
1124 m_handlingResidualStyleAcrossBlocks
= true;
1126 // Find the outermost element that crosses over to a higher level. If there exists another higher-level
1127 // element, we will do another pass, until we have corrected the innermost one.
1128 ExceptionCode ec
= 0;
1129 HTMLStackElem
* curr
= m_blockStack
;
1130 HTMLStackElem
* prev
= 0;
1131 HTMLStackElem
* prevMaxElem
= 0;
1134 while (curr
&& curr
!= elem
) {
1135 if (curr
->level
> elem
->level
) {
1136 if (!isAffectedByResidualStyle(curr
->tagName
))
1139 // We will need another pass.
1149 if (!curr
|| !maxElem
)
1152 Node
* residualElem
= prev
->node
;
1153 Node
* blockElem
= prevMaxElem
? prevMaxElem
->node
: m_current
;
1154 Node
* parentElem
= elem
->node
;
1156 // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1157 // FIXME: We should either always allow it or perform an additional fixup instead of
1158 // just bailing here.
1159 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1160 if (!parentElem
->childAllowed(blockElem
))
1163 m_hasPElementInScope
= Unknown
;
1165 if (maxElem
->node
->parentNode() != elem
->node
) {
1166 // Walk the stack and remove any elements that aren't residual style tags. These
1167 // are basically just being closed up. Example:
1168 // <font><span>Moo<p>Goo</font></p>.
1169 // In the above example, the <span> doesn't need to be reopened. It can just close.
1170 HTMLStackElem
* currElem
= maxElem
->next
;
1171 HTMLStackElem
* prevElem
= maxElem
;
1172 while (currElem
!= elem
) {
1173 HTMLStackElem
* nextElem
= currElem
->next
;
1174 if (!isResidualStyleTag(currElem
->tagName
)) {
1175 prevElem
->next
= nextElem
;
1176 prevElem
->derefNode();
1177 prevElem
->node
= currElem
->node
;
1178 prevElem
->didRefNode
= currElem
->didRefNode
;
1182 prevElem
= currElem
;
1183 currElem
= nextElem
;
1186 // We have to reopen residual tags in between maxElem and elem. An example of this case is:
1187 // <font><i>Moo<p>Foo</font>.
1188 // In this case, we need to transform the part before the <p> into:
1189 // <font><i>Moo</i></font><i>
1190 // so that the <i> will remain open. This involves the modification of elements
1191 // in the block stack.
1192 // This will also affect how we ultimately reparent the block, since we want it to end up
1193 // under the reopened residual tags (e.g., the <i> in the above example.)
1194 RefPtr
<Node
> prevNode
= 0;
1196 while (currElem
->node
!= residualElem
) {
1197 if (isResidualStyleTag(currElem
->node
->localName())) {
1198 // Create a clone of this element.
1199 // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem.
1200 Node
* currNode
= currElem
->node
->cloneNode(false).releaseRef();
1201 reportError(ResidualStyleError
, &currNode
->localName());
1203 // Change the stack element's node to point to the clone.
1204 // The stack element adopts the reference we obtained above by calling release().
1205 currElem
->derefNode();
1206 currElem
->node
= currNode
;
1207 currElem
->didRefNode
= true;
1209 // Attach the previous node as a child of this new node.
1211 currNode
->appendChild(prevNode
, ec
);
1212 else // The new parent for the block element is going to be the innermost clone.
1213 parentElem
= currNode
; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though.
1215 prevNode
= currNode
;
1218 currElem
= currElem
->next
;
1221 // Now append the chain of new residual style elements if one exists.
1223 elem
->node
->appendChild(prevNode
, ec
); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section.
1226 // Check if the block is still in the tree. If it isn't, then we don't
1227 // want to remove it from its parent (that would crash) or insert it into
1228 // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778
1229 bool isBlockStillInTree
= blockElem
->parentNode();
1231 // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1232 // All content of |blockElem| is reparented to be under this clone. We then
1233 // reparent |blockElem| using real DOM calls so that attachment/detachment will
1234 // be performed to fix up the rendering tree.
1235 // So for this example: <b>...<p>Foo</b>Goo</p>
1236 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1238 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1239 if (isBlockStillInTree
)
1240 blockElem
->parentNode()->removeChild(blockElem
, ec
);
1242 Node
* newNodePtr
= 0;
1243 if (blockElem
->firstChild()) {
1244 // Step 2: Clone |residualElem|.
1245 RefPtr
<Node
> newNode
= residualElem
->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1246 newNodePtr
= newNode
.get();
1247 reportError(ResidualStyleError
, &newNode
->localName());
1249 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
1250 // before we've put |newElem| into the document. That way we'll only do one attachment of all
1251 // the new content (instead of a bunch of individual attachments).
1252 Node
* currNode
= blockElem
->firstChild();
1254 Node
* nextNode
= currNode
->nextSibling();
1255 newNode
->appendChild(currNode
, ec
);
1256 currNode
= nextNode
;
1259 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
1260 // attachment can occur yet.
1261 blockElem
->appendChild(newNode
.release(), ec
);
1265 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
1266 if (isBlockStillInTree
)
1267 parentElem
->appendChild(blockElem
, ec
);
1269 // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update
1270 // the node associated with the previous stack element so that when it gets popped,
1271 // it doesn't make the residual element the next current node.
1272 HTMLStackElem
* currElem
= maxElem
;
1273 HTMLStackElem
* prevElem
= 0;
1274 while (currElem
!= elem
) {
1275 prevElem
= currElem
;
1276 currElem
= currElem
->next
;
1278 prevElem
->next
= elem
->next
;
1279 prevElem
->derefNode();
1280 prevElem
->node
= elem
->node
;
1281 prevElem
->didRefNode
= elem
->didRefNode
;
1283 // Repurpose |elem| to represent |newNode| and insert it at the appropriate position
1284 // in the stack. We do not do this for the innermost block, because in that case the new
1285 // node is effectively no longer open.
1286 elem
->next
= maxElem
;
1287 elem
->node
= prevMaxElem
->node
;
1288 elem
->didRefNode
= prevMaxElem
->didRefNode
;
1289 elem
->strayTableContent
= false;
1290 prevMaxElem
->next
= elem
;
1292 prevMaxElem
->node
= newNodePtr
;
1294 prevMaxElem
->didRefNode
= true;
1299 // FIXME: If we ever make a case like this work:
1300 // <table><b><i><form></b></form></i></table>
1301 // Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy.
1302 if (strayTableContent
)
1303 m_inStrayTableContent
--;
1305 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1306 // In the above example, Goo should stay italic.
1307 // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth.
1309 HTMLStackElem
* curr
= m_blockStack
;
1310 HTMLStackElem
* residualStyleStack
= 0;
1311 unsigned stackDepth
= 1;
1312 unsigned redundantStyleCount
= 0;
1313 while (curr
&& curr
!= maxElem
) {
1314 // We will actually schedule this tag for reopening
1315 // after we complete the close of this entire block.
1316 if (isResidualStyleTag(curr
->tagName
) && stackDepth
++ < cResidualStyleMaxDepth
) {
1317 // We've overloaded the use of stack elements and are just reusing the
1318 // struct with a slightly different meaning to the variables. Instead of chaining
1319 // from innermost to outermost, we build up a list of all the tags we need to reopen
1320 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1321 // to the outermost tag we need to reopen.
1322 // We also set curr->node to be the actual element that corresponds to the ID stored in
1323 // curr->id rather than the node that you should pop to when the element gets pulled off
1325 if (residualStyleStack
&& curr
->tagName
== residualStyleStack
->tagName
&& curr
->node
->attributes()->mapsEquivalent(residualStyleStack
->node
->attributes()))
1326 redundantStyleCount
++;
1328 redundantStyleCount
= 0;
1330 if (redundantStyleCount
< cMaxRedundantTagDepth
)
1331 moveOneBlockToStack(residualStyleStack
);
1337 curr
= m_blockStack
;
1340 reopenResidualStyleTags(residualStyleStack
, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content.
1342 m_handlingResidualStyleAcrossBlocks
= false;
1345 void HTMLParser::reopenResidualStyleTags(HTMLStackElem
* elem
, Node
* malformedTableParent
)
1347 // Loop for each tag that needs to be reopened.
1349 // Create a shallow clone of the DOM node for this element.
1350 RefPtr
<Node
> newNode
= elem
->node
->cloneNode(false);
1351 reportError(ResidualStyleError
, &newNode
->localName());
1353 // Append the new node. In the malformed table case, we need to insert before the table,
1354 // which will be the last child.
1355 ExceptionCode ec
= 0;
1356 if (malformedTableParent
)
1357 malformedTableParent
->insertBefore(newNode
, malformedTableParent
->lastChild(), ec
);
1359 m_current
->appendChild(newNode
, ec
);
1360 // FIXME: Is it really OK to ignore the exceptions here?
1362 // Now push a new stack element for this node we just created.
1363 pushBlock(elem
->tagName
, elem
->level
);
1364 newNode
->beginParsingChildren();
1366 // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1367 // that it is inside a malformed table.
1368 m_blockStack
->strayTableContent
= malformedTableParent
!= 0;
1369 if (m_blockStack
->strayTableContent
)
1370 m_inStrayTableContent
++;
1372 // Clear our malformed table parent variable.
1373 malformedTableParent
= 0;
1375 // Update |current| manually to point to the new node.
1376 setCurrent(newNode
.get());
1378 // Advance to the next tag that needs to be reopened.
1379 HTMLStackElem
* next
= elem
->next
;
1386 void HTMLParser::pushBlock(const AtomicString
& tagName
, int level
)
1388 m_blockStack
= new HTMLStackElem(tagName
, level
, m_current
, m_didRefCurrent
, m_blockStack
);
1389 if (level
>= minBlockLevelTagPriority
)
1391 m_didRefCurrent
= false;
1392 if (tagName
== pTag
)
1393 m_hasPElementInScope
= InScope
;
1394 else if (isScopingTag(tagName
))
1395 m_hasPElementInScope
= NotInScope
;
1398 void HTMLParser::popBlock(const AtomicString
& tagName
, bool reportErrors
)
1400 HTMLStackElem
* elem
= m_blockStack
;
1402 if (m_parserQuirks
&& elem
&& !m_parserQuirks
->shouldPopBlock(elem
->tagName
, tagName
))
1407 while (elem
&& (elem
->tagName
!= tagName
)) {
1408 if (maxLevel
< elem
->level
)
1409 maxLevel
= elem
->level
;
1415 reportError(StrayCloseTagError
, &tagName
, 0, true);
1419 if (maxLevel
> elem
->level
) {
1420 // We didn't match because the tag is in a different scope, e.g.,
1421 // <b><p>Foo</b>. Try to correct the problem.
1422 if (!isResidualStyleTag(tagName
))
1424 return handleResidualStyleCloseTagAcrossBlocks(elem
);
1427 bool isAffectedByStyle
= isAffectedByResidualStyle(elem
->tagName
);
1428 HTMLStackElem
* residualStyleStack
= 0;
1429 Node
* malformedTableParent
= 0;
1431 elem
= m_blockStack
;
1432 unsigned stackDepth
= 1;
1433 unsigned redundantStyleCount
= 0;
1435 if (elem
->tagName
== tagName
) {
1436 int strayTable
= m_inStrayTableContent
;
1440 // This element was the root of some malformed content just inside an implicit or
1441 // explicit <tbody> or <tr>.
1442 // If we end up needing to reopen residual style tags, the root of the reopened chain
1443 // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1444 if (strayTable
&& (m_inStrayTableContent
< strayTable
) && residualStyleStack
) {
1445 Node
* curr
= m_current
;
1446 while (curr
&& !curr
->hasTagName(tableTag
))
1447 curr
= curr
->parentNode();
1448 malformedTableParent
= curr
? curr
->parentNode() : 0;
1452 if (m_currentFormElement
&& elem
->tagName
== formTag
)
1453 // A <form> is being closed prematurely (and this is
1454 // malformed HTML). Set an attribute on the form to clear out its
1456 m_currentFormElement
->setMalformed(true);
1458 // Schedule this tag for reopening
1459 // after we complete the close of this entire block.
1460 if (isAffectedByStyle
&& isResidualStyleTag(elem
->tagName
) && stackDepth
++ < cResidualStyleMaxDepth
) {
1461 // We've overloaded the use of stack elements and are just reusing the
1462 // struct with a slightly different meaning to the variables. Instead of chaining
1463 // from innermost to outermost, we build up a list of all the tags we need to reopen
1464 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1465 // to the outermost tag we need to reopen.
1466 // We also set elem->node to be the actual element that corresponds to the ID stored in
1467 // elem->id rather than the node that you should pop to when the element gets pulled off
1469 if (residualStyleStack
&& elem
->tagName
== residualStyleStack
->tagName
&& elem
->node
->attributes()->mapsEquivalent(residualStyleStack
->node
->attributes()))
1470 redundantStyleCount
++;
1472 redundantStyleCount
= 0;
1474 if (redundantStyleCount
< cMaxRedundantTagDepth
)
1475 moveOneBlockToStack(residualStyleStack
);
1480 elem
= m_blockStack
;
1484 reopenResidualStyleTags(residualStyleStack
, malformedTableParent
);
1487 inline HTMLStackElem
* HTMLParser::popOneBlockCommon()
1489 HTMLStackElem
* elem
= m_blockStack
;
1491 // Form elements restore their state during the parsing process.
1492 // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available.
1493 if (m_current
&& elem
->node
!= m_current
)
1494 m_current
->finishParsingChildren();
1496 if (m_blockStack
->level
>= minBlockLevelTagPriority
) {
1497 ASSERT(m_blocksInStack
> 0);
1500 m_blockStack
= elem
->next
;
1501 m_current
= elem
->node
;
1502 m_didRefCurrent
= elem
->didRefNode
;
1504 if (elem
->strayTableContent
)
1505 m_inStrayTableContent
--;
1507 if (elem
->tagName
== pTag
)
1508 m_hasPElementInScope
= NotInScope
;
1509 else if (isScopingTag(elem
->tagName
))
1510 m_hasPElementInScope
= Unknown
;
1515 void HTMLParser::popOneBlock()
1517 // Store the current node before popOneBlockCommon overwrites it.
1518 Node
* lastCurrent
= m_current
;
1519 bool didRefLastCurrent
= m_didRefCurrent
;
1521 delete popOneBlockCommon();
1523 if (didRefLastCurrent
)
1524 lastCurrent
->deref();
1527 void HTMLParser::moveOneBlockToStack(HTMLStackElem
*& head
)
1529 // We'll be using the stack element we're popping, but for the current node.
1530 // See the two callers for details.
1532 // Store the current node before popOneBlockCommon overwrites it.
1533 Node
* lastCurrent
= m_current
;
1534 bool didRefLastCurrent
= m_didRefCurrent
;
1536 // Pop the block, but don't deref the current node as popOneBlock does because
1537 // we'll be using the pointer in the new stack element.
1538 HTMLStackElem
* elem
= popOneBlockCommon();
1540 // Transfer the current node into the stack element.
1541 // No need to deref the old elem->node because popOneBlockCommon transferred
1542 // it into the m_current/m_didRefCurrent fields.
1543 elem
->node
= lastCurrent
;
1544 elem
->didRefNode
= didRefLastCurrent
;
1549 void HTMLParser::checkIfHasPElementInScope()
1551 m_hasPElementInScope
= NotInScope
;
1552 HTMLStackElem
* elem
= m_blockStack
;
1554 const AtomicString
& tagName
= elem
->tagName
;
1555 if (tagName
== pTag
) {
1556 m_hasPElementInScope
= InScope
;
1558 } else if (isScopingTag(tagName
))
1564 void HTMLParser::popInlineBlocks()
1566 while (m_blockStack
&& isInline(m_current
))
1570 void HTMLParser::freeBlock()
1572 while (m_blockStack
)
1574 ASSERT(!m_blocksInStack
);
1577 void HTMLParser::createHead()
1582 if (!m_document
->documentElement()) {
1583 insertNode(new HTMLHtmlElement(htmlTag
, m_document
));
1584 ASSERT(m_document
->documentElement());
1587 m_head
= new HTMLHeadElement(headTag
, m_document
);
1588 HTMLElement
* body
= m_document
->body();
1589 ExceptionCode ec
= 0;
1590 m_document
->documentElement()->insertBefore(m_head
.get(), body
, ec
);
1594 // If the body does not exist yet, then the <head> should be pushed as the current block.
1595 if (m_head
&& !body
) {
1596 pushBlock(m_head
->localName(), m_head
->tagPriority());
1597 setCurrent(m_head
.get());
1601 PassRefPtr
<Node
> HTMLParser::handleIsindex(Token
* t
)
1603 RefPtr
<Node
> n
= new HTMLDivElement(divTag
, m_document
);
1605 NamedMappedAttrMap
* attrs
= t
->attrs
.get();
1607 RefPtr
<HTMLIsIndexElement
> isIndex
= new HTMLIsIndexElement(isindexTag
, m_document
, m_currentFormElement
.get());
1608 isIndex
->setAttributeMap(attrs
);
1609 isIndex
->setAttribute(typeAttr
, "khtml_isindex");
1611 String text
= searchableIndexIntroduction();
1613 if (Attribute
* a
= attrs
->getAttributeItem(promptAttr
))
1614 text
= a
->value().string() + " ";
1618 n
->addChild(new HTMLHRElement(hrTag
, m_document
));
1619 n
->addChild(Text::create(m_document
, text
));
1620 n
->addChild(isIndex
.release());
1621 n
->addChild(new HTMLHRElement(hrTag
, m_document
));
1626 void HTMLParser::startBody()
1633 if (m_isindexElement
) {
1634 insertNode(m_isindexElement
.get(), true /* don't descend into this node */);
1635 m_isindexElement
= 0;
1639 void HTMLParser::finished()
1641 // In the case of a completely empty document, here's the place to create the HTML element.
1642 if (m_current
&& m_current
->isDocumentNode() && !m_document
->documentElement())
1643 insertNode(new HTMLHtmlElement(htmlTag
, m_document
));
1645 // This ensures that "current" is not left pointing to a node when the document is destroyed.
1649 // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this.
1650 if (!m_isParsingFragment
)
1651 m_document
->finishedParsing();
1654 void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode
, const AtomicString
* tagName1
, const AtomicString
* tagName2
, bool closeTags
)
1656 Frame
* frame
= m_document
->frame();
1660 HTMLTokenizer
* htmlTokenizer
= static_cast<HTMLTokenizer
*>(m_document
->tokenizer());
1661 int lineNumber
= htmlTokenizer
->lineNumber() + 1;
1666 if (*tagName1
== "#text")
1668 else if (*tagName1
== "#comment")
1669 tag1
= "<!-- comment -->";
1671 tag1
= (closeTags
? "</" : "<") + *tagName1
+ ">";
1674 if (*tagName2
== "#text")
1676 else if (*tagName2
== "#comment")
1677 tag2
= "<!-- comment -->";
1679 tag2
= (closeTags
? "</" : "<") + *tagName2
+ ">";
1682 const char* errorMsg
= htmlParserErrorMessageTemplate(errorCode
);
1687 if (htmlTokenizer
->processingContentWrittenByScript())
1688 message
+= htmlParserDocumentWriteMessage();
1689 message
+= errorMsg
;
1690 message
.replace("%tag1", tag1
);
1691 message
.replace("%tag2", tag2
);
1693 frame
->domWindow()->console()->addMessage(HTMLMessageSource
, LogMessageType
,
1694 isWarning(errorCode
) ? WarningMessageLevel
: ErrorMessageLevel
,
1695 message
, lineNumber
, m_document
->url().string());
1698 #ifdef BUILDING_ON_LEOPARD
1699 bool shouldCreateImplicitHead(Document
* document
)
1703 Settings
* settings
= document
->page() ? document
->page()->settings() : 0;
1704 return settings
? !settings
->needsLeopardMailQuirks() : true;
1706 #elif defined(BUILDING_ON_TIGER)
1707 bool shouldCreateImplicitHead(Document
* document
)
1711 Settings
* settings
= document
->page() ? document
->page()->settings() : 0;
1712 return settings
? !settings
->needsTigerMailQuirks() : true;