2 This file is part of the KDE libraries
4 Copyright (C) 1997 Martin Jones (mjones@kde.org)
5 (C) 1997 Torben Weis (weis@kde.org)
6 (C) 1999,2001 Lars Knoll (knoll@kde.org)
7 (C) 2000,2001 Dirk Mueller (mueller@kde.org)
8 (C) 2003 Apple Computer, Inc.
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Library General Public
12 License as published by the Free Software Foundation; either
13 version 2 of the License, or (at your option) any later version.
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Library General Public License for more details.
20 You should have received a copy of the GNU Library General Public License
21 along with this library; see the file COPYING.LIB. If not, write to
22 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 Boston, MA 02110-1301, USA.
25 //----------------------------------------------------------------------------
27 // KDE HTML Widget -- HTML Parser
28 // #define PARSER_DEBUG
30 #include "htmlparser.h"
32 #include <dom/dom_exception.h>
34 #include <html/html_baseimpl.h>
35 #include <html/html_blockimpl.h>
36 #include <html/html_canvasimpl.h>
37 #include <html/html_documentimpl.h>
38 #include <html/html_elementimpl.h>
39 #include <html/html_formimpl.h>
40 #include <html/html_headimpl.h>
41 #include <html/html_imageimpl.h>
42 #include <html/html_inlineimpl.h>
43 #include <html/html_listimpl.h>
44 #include <html/html_miscimpl.h>
45 #include <html/html_tableimpl.h>
46 #include <html/html_objectimpl.h>
47 #include <html/HTMLAudioElement.h>
48 #include <html/HTMLVideoElement.h>
49 #include <html/HTMLSourceElement.h>
50 #include <xml/dom_textimpl.h>
51 #include <xml/dom_nodeimpl.h>
52 #include <misc/htmlhashes.h>
53 #include <html/htmltokenizer.h>
54 #include <khtmlview.h>
55 #include <khtml_part.h>
56 #include <khtml_global.h>
57 #include <css/cssproperties.h>
58 #include <css/cssvalues.h>
59 #include <css/csshelper.h>
61 #include <rendering/render_object.h>
66 // Turn off inlining to avoid warning with newer gcc.
69 #include "doctypes.cpp"
73 using namespace khtml
;
76 static QString
getParserPrintableName(int id
)
78 if (id
>= ID_CLOSE_TAG
)
79 return "/" + getPrintableName(id
- ID_CLOSE_TAG
);
81 return getPrintableName(id
);
85 //----------------------------------------------------------------------------
93 HTMLStackElem( int _id
,
97 HTMLStackElem
* _next
)
101 strayTableContent(false),
110 void setNode(NodeImpl
* newNode
)
119 bool strayTableContent
;
128 * The parser parses tokenized input into the document, building up the
129 * document tree. If the document is wellformed, parsing it is
131 * Unfortunately, people can't write wellformed HTML documents, so the parser
132 * has to be tolerant about errors.
134 * We have to take care of the following error conditions:
135 * 1. The element being added is explicitly forbidden inside some outer tag.
136 * In this case we should close all tags up to the one, which forbids
137 * the element, and add it afterwards.
138 * 2. We are not allowed to add the element directly. It could be, that
139 * the person writing the document forgot some tag inbetween (or that the
140 * tag inbetween is optional...) This could be the case with the following
141 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?)
142 * 3. We wan't to add a block element inside to an inline element. Close all
143 * inline elements up to the next higher block element.
144 * 4. If this doesn't help close elements, until we are allowed to add the
145 * element or ignore the tag.
149 KHTMLParser::KHTMLParser( KHTMLView
*_parent
, DocumentImpl
*doc
)
151 //kDebug( 6035 ) << "parser constructor";
156 HTMLWidget
= _parent
;
162 // ID_CLOSE_TAG == Num of tags
163 forbiddenTag
= new ushort
[ID_CLOSE_TAG
+1];
168 KHTMLParser::KHTMLParser( DOM::DocumentFragmentImpl
*i
, DocumentImpl
*doc
)
173 forbiddenTag
= new ushort
[ID_CLOSE_TAG
+1];
185 KHTMLParser::~KHTMLParser()
188 kDebug( ) << "TIME: parsing time was = " << qt
.elapsed();
193 if (current
) current
->deref();
195 delete [] forbiddenTag
;
199 void KHTMLParser::reset()
201 setCurrent ( document
);
205 // before parsing no tags are forbidden...
206 memset(forbiddenTag
, 0, (ID_CLOSE_TAG
+1)*sizeof(ushort
));
209 haveFrameSet
= false;
214 inStrayTableContent
= 0;
226 void KHTMLParser::parseToken(Token
*t
)
228 if (t
->tid
> 2*ID_CLOSE_TAG
)
230 kDebug( 6035 ) << "Unknown tag!! tagID = " << t
->tid
;
234 if(t
->tid
== discard_until
)
237 // do not skip </iframe>
238 if ( discard_until
|| current
->id() + ID_CLOSE_TAG
!= t
->tid
)
243 kDebug( 6035 ) << "\n\n==> parser: processing token " << getParserPrintableName(t
->tid
) << "(" << t
->tid
<< ")"
244 << " current = " << getParserPrintableName(current
->id()) << "(" << current
->id() << ")" << endl
;
245 kDebug(6035) << "inline=" << m_inline
<< " inBody=" << inBody
<< " haveFrameSet=" << haveFrameSet
<< " haveContent=" << haveContent
;
248 // holy shit. apparently some sites use </br> instead of <br>
249 // be compatible with IE and NS
250 if(t
->tid
== ID_BR
+ID_CLOSE_TAG
&& document
->inCompatMode())
251 t
->tid
-= ID_CLOSE_TAG
;
253 if(t
->tid
> ID_CLOSE_TAG
)
259 // ignore spaces, if we're not inside a paragraph or other inline code
260 if( t
->tid
== ID_TEXT
&& t
->text
) {
261 if(inBody
&& !skipMode() &&
262 current
->id() != ID_STYLE
&& current
->id() != ID_TITLE
&&
263 current
->id() != ID_SCRIPT
&&
264 !t
->text
->containsOnlyWhitespace()) haveContent
= true;
267 kDebug(6035) << "length="<< t
->text
->l
<< " text='" << QString::fromRawData(t
->text
->s
, t
->text
->l
) << "'";
271 NodeImpl
*n
= getElement(t
);
272 // just to be sure, and to catch currently unimplemented stuff
277 if(n
->isElementNode() && t
->tid
!= ID_ISINDEX
)
279 ElementImpl
*e
= static_cast<ElementImpl
*>(n
);
280 e
->setAttributeMap(t
->attrs
);
283 // if this tag is forbidden inside the current context, pop
284 // blocks until we are allowed to add it...
285 while(blockStack
&& forbiddenTag
[t
->tid
]) {
287 kDebug( 6035 ) << "t->id: " << t
->tid
<< " is forbidden :-( ";
292 // sometimes flat doesn't make sense
299 // the tokenizer needs the feedback for space discarding
300 if ( tagPriority(t
->tid
) == 0 )
303 if ( !insertNode(n
, t
->flat
) ) {
304 // we couldn't insert the node...
306 kDebug( 6035 ) << "insertNode failed current=" << current
->id() << ", new=" << n
->id() << "!";
311 kDebug( 6035 ) << " --> resetting map!";
318 kDebug( 6035 ) << " --> resetting form!";
326 void KHTMLParser::parseDoctypeToken(DoctypeToken
* t
)
328 // Ignore any doctype after the first. TODO It should be also ignored when processing DocumentFragment
329 if (current
!= document
|| document
->doctype())
332 DocumentTypeImpl
* doctype
= new DocumentTypeImpl(document
->implementation(), document
, t
->name
, t
->publicID
, t
->systemID
);
333 if (!t
->internalSubset
.isEmpty())
334 doctype
->setInternalSubset(t
->internalSubset
);
335 document
->addChild(doctype
);
337 // Determine parse mode here
338 // This code more or less mimics Mozilla's implementation.
340 // There are three possible parse modes:
341 // COMPAT - quirks mode emulates WinIE
342 // and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
343 // be omitted from numbers.
344 // ALMOST STRICT - This mode is identical to strict mode
345 // except for its treatment of line-height in the inline box model. For
346 // now (until the inline box model is re-written), this mode is identical
347 // to STANDARDS mode.
348 // STRICT - no quirks apply. Web pages will obey the specifications to
351 if (!document
->isHTMLDocument()) // FIXME Could document be non-HTML?
353 DOM::HTMLDocumentImpl
* htmldoc
= static_cast<DOM::HTMLDocumentImpl
*> (document
);
354 if (t
->name
.toLower() == "html") {
355 if (!t
->internalSubset
.isEmpty() || t
->publicID
.isEmpty()) {
356 // Internal subsets always denote full standards, as does
357 // a doctype without a public ID.
358 htmldoc
->changeModes(DOM::DocumentImpl::Strict
, DOM::DocumentImpl::Html4
);
360 // We have to check a list of public IDs to see what we
362 QString lowerPubID
= t
->publicID
.toLower();
363 QByteArray pubIDStr
= lowerPubID
.toLocal8Bit();
365 // Look up the entry in our gperf-generated table.
366 const PubIDInfo
* doctypeEntry
= findDoctypeEntry(pubIDStr
.constData(), t
->publicID
.length());
368 // The DOCTYPE is not in the list. Assume strict mode.
369 // ### Doesn't make any sense, but it's what Mozilla does.
370 htmldoc
->changeModes(DOM::DocumentImpl::Strict
, DOM::DocumentImpl::Html4
);
372 switch ((!t
->systemID
.isEmpty()) ?
373 doctypeEntry
->mode_if_sysid
:
374 doctypeEntry
->mode_if_no_sysid
) {
375 case PubIDInfo::eQuirks3
:
376 htmldoc
->changeModes(DOM::DocumentImpl::Compat
, DOM::DocumentImpl::Html3
);
378 case PubIDInfo::eQuirks
:
379 htmldoc
->changeModes(DOM::DocumentImpl::Compat
, DOM::DocumentImpl::Html4
);
381 case PubIDInfo::eAlmostStandards
:
382 htmldoc
->changeModes(DOM::DocumentImpl::Transitional
, DOM::DocumentImpl::Html4
);
385 assert(!"Unknown parse mode");
390 // Malformed doctype implies quirks mode.
391 htmldoc
->changeModes(DOM::DocumentImpl::Compat
, DOM::DocumentImpl::Html3
);
395 static bool isTableRelatedTag(int id
)
397 return (id
== ID_TR
|| id
== ID_TD
|| id
== ID_TABLE
|| id
== ID_TBODY
|| id
== ID_TFOOT
|| id
== ID_THEAD
||
401 bool KHTMLParser::insertNode(NodeImpl
*n
, bool flat
)
405 // <table> is never allowed inside stray table content. Always pop out of the stray table content
406 // and close up the first table, and then start the second table as a sibling.
407 if (inStrayTableContent
&& id
== ID_TABLE
)
410 // let's be stupid and just try to insert it.
411 // this should work if the document is wellformed
413 NodeImpl
*tmp
= current
;
415 NodeImpl
*newNode
= current
->addChild(n
);
418 kDebug( 6035 ) << "added " << n
->nodeName().string() << " to " << tmp
->nodeName().string() << ", new current=" << newNode
->nodeName().string();
420 // We allow TABLE > FORM in dtd.cpp, but do not allow the form have children in this case
421 if (current
->id() == ID_TABLE
&& id
== ID_FORM
) {
423 static_cast<HTMLFormElementImpl
*>(n
)->setMalformed(true);
426 // don't push elements without end tag on the stack
427 if(tagPriority(id
) != 0 && !flat
) {
429 if(!n
->attached() && HTMLWidget
)
432 if(n
->isInline()) m_inline
= true;
433 pushBlock(id
, tagPriority(id
));
434 setCurrent( newNode
);
437 if(!n
->attached() && HTMLWidget
)
439 if (n
->maintainsState()) {
440 document
->registerMaintainsState(n
);
441 document
->attemptRestoreState(n
);
445 if(n
->isInline()) m_inline
= true;
450 if(tagPriority(id
) == 0 && n
->renderer())
451 n
->renderer()->calcMinMaxWidth();
456 kDebug( 6035 ) << "ADDING NODE FAILED!!!! current = " << current
->nodeName().string() << ", new = " << n
->nodeName().string();
460 bool handled
= false;
462 // first switch on current element for elements with optional end-tag and inline-only content
463 switch(current
->id())
469 popBlock(current
->id());
470 return insertNode(n
);
477 // switch according to the element to insert
483 if (inStrayTableContent
&& !isTableRelatedTag(current
->id())) {
484 // pop out to the nearest enclosing table-related tag.
485 while (blockStack
&& !isTableRelatedTag(current
->id()))
487 return insertNode(n
);
491 // ### allow not having <HTML> in at all, as per HTML spec
492 if (!current
->isDocumentNode() && current
->id() != ID_HTML
)
505 if ( head
->addChild(n
) ) {
507 if(!n
->attached() && HTMLWidget
)
517 if (!current
->isDocumentNode() ) {
518 if ( doc()->documentElement()->id() == ID_HTML
) {
519 // we have another <HTML> element.... apply attributes to existing one
520 // make sure we don't overwrite already existing attributes
521 NamedAttrMapImpl
*map
= static_cast<ElementImpl
*>(n
)->attributes(true);
522 NamedAttrMapImpl
*bmap
= static_cast<ElementImpl
*>(doc()->documentElement())->attributes(false);
523 bool changed
= false;
524 for (unsigned long l
= 0; map
&& l
< map
->length(); ++l
) {
525 NodeImpl::Id attrId
= map
->idAt(l
);
526 DOMStringImpl
*attrValue
= map
->valueAt(l
);
527 changed
= !bmap
->getValue(attrId
);
528 bmap
->setValue(attrId
,attrValue
);
531 doc()->recalcStyle( NodeImpl::Inherit
);
541 DOM::NodeImpl
*newNode
= head
->addChild(n
);
543 pushBlock(id
, tagPriority(id
));
544 setCurrent ( newNode
);
546 if(!n
->attached() && HTMLWidget
)
551 kDebug( 6035 ) << "adding style before to body failed!!!!";
553 discard_until
= ID_STYLE
+ ID_CLOSE_TAG
;
558 discard_until
= id
+ ID_CLOSE_TAG
;
563 // if we failed to insert it, go into skip mode
564 discard_until
= id
+ ID_CLOSE_TAG
;
567 if(inBody
&& doc()->body()) {
568 // we have another <BODY> element.... apply attributes to existing one
569 // make sure we don't overwrite already existing attributes
570 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
571 NamedAttrMapImpl
*map
= static_cast<ElementImpl
*>(n
)->attributes(true);
572 NamedAttrMapImpl
*bmap
= doc()->body()->attributes(false);
573 bool changed
= false;
574 for (unsigned long l
= 0; map
&& l
< map
->length(); ++l
) {
575 NodeImpl::Id attrId
= map
->idAt(l
);
576 DOMStringImpl
*attrValue
= map
->valueAt(l
);
577 if ( !bmap
->getValue(attrId
) ) {
578 bmap
->setValue(attrId
,attrValue
);
583 doc()->recalcStyle( NodeImpl::Inherit
);
584 } else if ( current
->isDocumentNode() )
589 // the following is a hack to move non rendered elements
590 // outside of tables.
591 // needed for broken constructs like <table><form ...><tr>....
594 ElementImpl
*e
= static_cast<ElementImpl
*>(n
);
595 DOMString type
= e
->getAttribute(ATTR_TYPE
);
597 if ( strcasecmp( type
, "hidden" ) != 0 )
603 // Don't try to fit random white-space anywhere
604 TextImpl
*t
= static_cast<TextImpl
*>(n
);
605 if (t
->containsOnlyWhitespace())
607 // ignore text inside the following elements.
608 switch(current
->id())
620 if ( current
->id() == ID_DL
) {
621 e
= new HTMLGenericElementImpl( document
, ID_DD
);
627 e
= new HTMLDListElementImpl(document
);
628 if ( insertNode(e
) ) {
639 if(!n
->attached() && HTMLWidget
)
654 if (isTableRelatedTag(current
->id())) {
655 while (blockStack
&& current
->id() != ID_TABLE
&& isTableRelatedTag(current
->id()))
657 return insertNode(n
);
664 // switch on the currently active element
665 switch(current
->id())
680 head
= new HTMLHeadElementImpl(document
);
687 TextImpl
*t
= static_cast<TextImpl
*>(n
);
688 if (t
->containsOnlyWhitespace())
690 /* Fall through to default */
693 if ( haveFrameSet
) break;
694 e
= new HTMLBodyElementImpl(document
);
702 // we can get here only if the element is not allowed in head.
706 // This means the body starts here...
707 if ( haveFrameSet
) break;
709 e
= new HTMLBodyElementImpl(document
);
718 // Illegal content in a caption. Close the caption and try again.
719 popBlock(ID_CAPTION
);
727 return insertNode(n
, flat
);
738 popBlock(ID_TABLE
); // end the table
739 handled
= checkChild( current
->id(), id
, doc()->inStrictMode());
743 NodeImpl
*node
= current
;
744 NodeImpl
*parent
= node
->parentNode();
745 // A script may have removed the current node's parent from the DOM
746 // http://bugzilla.opendarwin.org/show_bug.cgi?id=7137
747 // FIXME: we should do real recovery here and re-parent with the correct node.
750 NodeImpl
*parentparent
= parent
->parentNode();
752 if (n
->isTextNode() ||
753 ( node
->id() == ID_TR
&&
754 ( parent
->id() == ID_THEAD
||
755 parent
->id() == ID_TBODY
||
756 parent
->id() == ID_TFOOT
) && parentparent
->id() == ID_TABLE
) ||
757 ( !checkChild( ID_TR
, id
) && ( node
->id() == ID_THEAD
|| node
->id() == ID_TBODY
|| node
->id() == ID_TFOOT
) &&
758 parent
->id() == ID_TABLE
) )
760 node
= (node
->id() == ID_TABLE
) ? node
:
761 ((node
->id() == ID_TR
) ? parentparent
: parent
);
762 NodeImpl
*parent
= node
->parentNode();
765 int exceptioncode
= 0;
767 kDebug( 6035 ) << "calling insertBefore(" << n
->nodeName().string() << "," << node
->nodeName().string() << ")";
769 parent
->insertBefore(n
, node
, exceptioncode
);
772 if (!n
->isTextNode())
774 kDebug(6035) << "adding content before table failed..";
777 if ( n
->isElementNode() && tagPriority(id
) != 0 &&
778 !flat
&& endTagRequirement(id
) != DOM::FORBIDDEN
) {
780 pushBlock(id
, tagPriority(id
));
782 inStrayTableContent
++;
783 blockStack
->strayTableContent
= true;
788 if ( current
->id() == ID_TR
)
789 e
= new HTMLTableCellElementImpl(document
, ID_TD
);
790 else if ( current
->id() == ID_TABLE
)
791 e
= new HTMLTableSectionElementImpl( document
, ID_TBODY
, true /* implicit */ );
793 e
= new HTMLTableRowElementImpl( document
);
802 discard_until
= id
+ ID_CLOSE_TAG
;
808 e
= new HTMLLIElementImpl(document
);
809 e
->addCSSProperty(CSS_PROP_LIST_STYLE_TYPE
, CSS_VAL_NONE
);
830 popBlock(current
->id());
836 if (id
== ID_OPTGROUP
)
838 popBlock(current
->id());
841 else if(id
== ID_SELECT
)
843 // IE treats a nested select as </select>. Let's do the same
844 popBlock( ID_SELECT
);
848 // head elements in the body should be ignored.
853 popBlock(current
->id());
857 if(current
->isDocumentNode())
859 DocumentImpl
* doc
= static_cast<DocumentImpl
*>(current
);
860 if (!doc
->documentElement()) {
861 e
= new HTMLHtmlElementImpl(document
);
866 else if(current
->isInline())
873 // if we couldn't handle the error, just rethrow the exception...
876 //kDebug( 6035 ) << "Exception handler failed in HTMLPArser::insertNode()";
880 return insertNode(n
);
885 NodeImpl
*KHTMLParser::getElement(Token
* t
)
892 n
= new HTMLHtmlElementImpl(document
);
895 if(!head
&& (current
->id() == ID_HTML
|| current
->isDocumentNode())) {
896 head
= new HTMLHeadElementImpl(document
);
901 // body no longer allowed if we have a frameset
902 if(haveFrameSet
) break;
904 n
= new HTMLBodyElementImpl(document
);
911 n
= new HTMLBaseElementImpl(document
);
914 n
= new HTMLLinkElementImpl(document
);
917 n
= new HTMLMetaElementImpl(document
);
920 n
= new HTMLStyleElementImpl(document
);
923 // only one non-empty <title> allowed
925 discard_until
= ID_TITLE
+ID_CLOSE_TAG
;
928 n
= new HTMLTitleElementImpl(document
);
929 // we'll set haveTitle when closing the tag
934 n
= new HTMLFrameElementImpl(document
);
938 if ( inBody
&& !haveFrameSet
&& !haveContent
&& !haveBody
) {
940 // ### actually for IE document.body returns the now hidden "body" element
941 // we can't implement that behavior now because it could cause too many
942 // regressions and the headaches are not worth the work as long as there is
943 // no site actually relying on that detail (Dirk)
944 if (static_cast<HTMLDocumentImpl
*>(document
)->body())
945 static_cast<HTMLDocumentImpl
*>(document
)->body()
946 ->addCSSProperty(CSS_PROP_DISPLAY
, CSS_VAL_NONE
);
949 if ( (haveBody
|| haveContent
|| haveFrameSet
) && current
->id() == ID_HTML
)
951 n
= new HTMLFrameSetElementImpl(document
);
955 // a bit a special case, since the frame is inlined...
957 n
= new HTMLIFrameElementImpl(document
);
962 // thou shall not nest <form> - NS/IE quirk
964 n
= form
= new HTMLFormElementImpl(document
, false);
967 n
= new HTMLButtonElementImpl(document
, form
);
970 n
= new HTMLFieldSetElementImpl(document
, form
);
974 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() &&
975 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled() &&
976 !strcasecmp( t
->attrs
->getValue( ATTR_TYPE
), "image" ) )
978 if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered( doc()->completeURL( khtml::parseURL(t
->attrs
->getValue(ATTR_SRC
)).string() ) ))
981 n
= new HTMLInputElementImpl(document
, form
);
984 n
= handleIsindex(t
);
992 n
= new HTMLKeygenElementImpl(document
, form
);
995 n
= new HTMLLabelElementImpl(document
);
998 n
= new HTMLLegendElementImpl(document
, form
);
1001 n
= new HTMLOptGroupElementImpl(document
, form
);
1004 popOptionalBlock(ID_OPTION
);
1005 n
= new HTMLOptionElementImpl(document
, form
);
1009 n
= new HTMLSelectElementImpl(document
, form
);
1012 n
= new HTMLTextAreaElementImpl(document
, form
);
1017 n
= new HTMLDListElementImpl(document
);
1020 popOptionalBlock(ID_DT
);
1021 popOptionalBlock(ID_DD
);
1022 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1025 popOptionalBlock(ID_DD
);
1026 popOptionalBlock(ID_DT
);
1027 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1031 n
= new HTMLUListElementImpl(document
);
1036 n
= new HTMLOListElementImpl(document
);
1040 n
= new HTMLDirectoryElementImpl(document
);
1043 n
= new HTMLMenuElementImpl(document
);
1046 popOptionalBlock(ID_LI
);
1047 n
= new HTMLLIElementImpl(document
);
1049 // formatting elements (block)
1051 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1055 n
= new HTMLLayerElementImpl(document
, t
->tid
);
1059 n
= new HTMLDivElementImpl(document
, t
->tid
);
1067 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1070 n
= new HTMLHRElementImpl(document
);
1076 n
= new HTMLPreElementImpl(document
, t
->tid
);
1081 n
= new HTMLBaseFontElementImpl(document
);
1084 n
= new HTMLFontElementImpl(document
);
1090 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1097 n
= new HTMLAnchorElementImpl(document
);
1104 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled()&&
1105 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled())
1107 QString url
= doc()->completeURL( khtml::parseURL(t
->attrs
->getValue(ATTR_SRC
)).string() );
1108 if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url
))
1111 n
= new HTMLImageElementImpl(document
, form
);
1115 n
= new HTMLCanvasElementImpl(document
);
1119 map
= new HTMLMapElementImpl(document
);
1123 n
= new HTMLAreaElementImpl(document
);
1126 // objects, applets and scripts
1128 n
= new HTMLAppletElementImpl(document
);
1131 n
= new HTMLEmbedElementImpl(document
);
1134 n
= new HTMLObjectElementImpl(document
);
1137 n
= new HTMLParamElementImpl(document
);
1141 HTMLScriptElementImpl
*scriptElement
= new HTMLScriptElementImpl(document
);
1142 scriptElement
->setCreatedByParser(true);
1149 n
= new HTMLAudioElement(document
);
1152 n
= new HTMLVideoElement(document
);
1155 n
= new HTMLSourceElement(document
);
1160 n
= new HTMLTableElementImpl(document
);
1163 n
= new HTMLTableCaptionElementImpl(document
);
1167 n
= new HTMLTableColElementImpl(document
, t
->tid
);
1171 n
= new HTMLTableRowElementImpl(document
);
1177 n
= new HTMLTableCellElementImpl(document
, t
->tid
);
1182 popBlock( ID_THEAD
);
1183 popBlock( ID_TBODY
);
1184 popBlock( ID_TFOOT
);
1185 n
= new HTMLTableSectionElementImpl(document
, t
->tid
, false);
1190 n
= new HTMLBRElementImpl(document
);
1193 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1196 // elements with no special representation in the DOM
1201 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1232 if ( t
->tid
== ID_NOBR
|| t
->tid
== ID_WBR
)
1233 popOptionalBlock( t
->tid
);
1235 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1238 // these are special, and normally not rendered
1241 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1242 discard_until
= ID_NOEMBED
+ ID_CLOSE_TAG
;
1247 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1248 discard_until
= ID_NOFRAMES
+ ID_CLOSE_TAG
;
1253 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1254 if(HTMLWidget
&& HTMLWidget
->part()->jScriptEnabled())
1255 discard_until
= ID_NOSCRIPT
+ ID_CLOSE_TAG
;
1259 // discard_until = ID_NOLAYER + ID_CLOSE_TAG;
1263 n
= new HTMLMarqueeElementImpl(document
);
1267 // kDebug(6035) << "ID_TEXT: \"" << DOMString(t->text).string() << "\"";
1268 n
= new TextImpl(document
, t
->text
);
1271 n
= new CommentImpl(document
, t
->text
);
1274 n
= new HTMLGenericElementImpl(document
, t
->tid
);
1276 // kDebug( 6035 ) << "Unknown tag " << t->tid << "!";
1281 void KHTMLParser::processCloseTag(Token
*t
)
1283 // support for really broken html. Can't believe I'm supporting such crap (lars)
1286 case ID_HTML
+ID_CLOSE_TAG
:
1287 case ID_BODY
+ID_CLOSE_TAG
:
1288 // we never trust those close tags, since stupid webpages close
1291 case ID_FORM
+ID_CLOSE_TAG
:
1293 // this one is to get the right style on the body element
1295 case ID_MAP
+ID_CLOSE_TAG
:
1298 case ID_SELECT
+ID_CLOSE_TAG
:
1301 case ID_TITLE
+ID_CLOSE_TAG
:
1302 // Set haveTitle only if <title> isn't empty
1303 if ( current
->firstChild() )
1311 kDebug( 6035 ) << "added the following children to " << current
->nodeName().string();
1312 NodeImpl
*child
= current
->firstChild();
1315 kDebug( 6035 ) << " " << child
->nodeName().string();
1316 child
= child
->nextSibling();
1319 generateImpliedEndTags( t
->tid
- ID_CLOSE_TAG
);
1320 popBlock( t
->tid
- ID_CLOSE_TAG
);
1322 kDebug( 6035 ) << "closeTag --> current = " << current
->nodeName().string();
1326 bool KHTMLParser::isResidualStyleTag(int _id
)
1355 bool KHTMLParser::isAffectedByResidualStyle(int _id
)
1357 if (isResidualStyleTag(_id
))
1386 void KHTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem
* elem
)
1388 // Find the element that crosses over to a higher level.
1389 // ### For now, if there is more than one, we will only make sure we close the residual style.
1390 int exceptionCode
= 0;
1391 HTMLStackElem
* curr
= blockStack
;
1392 HTMLStackElem
* maxElem
= 0;
1393 HTMLStackElem
* endElem
= 0;
1394 HTMLStackElem
* prev
= 0;
1395 HTMLStackElem
* prevMaxElem
= 0;
1396 bool advancedResidual
= false; // ### if set we only close the residual style
1397 while (curr
&& curr
!= elem
) {
1398 if (curr
->level
> elem
->level
) {
1399 if (!isAffectedByResidualStyle(curr
->id
)) return;
1400 if (maxElem
) advancedResidual
= true;
1411 if (!curr
|| !maxElem
) return;
1413 NodeImpl
* residualElem
= prev
->node
;
1414 NodeImpl
* blockElem
= prevMaxElem
? prevMaxElem
->node
: current
;
1415 RefPtr
<NodeImpl
> parentElem
= elem
->node
;
1417 // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1418 // FIXME: We should either always allow it or perform an additional fixup instead of
1419 // just bailing here.
1420 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1421 if (!parentElem
->childAllowed(blockElem
))
1424 if (maxElem
->node
->parentNode() != elem
->node
&& !advancedResidual
) {
1425 // Walk the stack and remove any elements that aren't residual style tags. These
1426 // are basically just being closed up. Example:
1427 // <font><span>Moo<p>Goo</font></p>.
1428 // In the above example, the <span> doesn't need to be reopened. It can just close.
1429 HTMLStackElem
* currElem
= maxElem
->next
;
1430 HTMLStackElem
* prevElem
= maxElem
;
1431 while (currElem
!= elem
) {
1432 HTMLStackElem
* nextElem
= currElem
->next
;
1433 if (!isResidualStyleTag(currElem
->id
)) {
1434 prevElem
->next
= nextElem
;
1435 prevElem
->setNode(currElem
->node
);
1439 prevElem
= currElem
;
1440 currElem
= nextElem
;
1443 // We have to reopen residual tags in between maxElem and elem. An example of this case s:
1444 // <font><i>Moo<p>Foo</font>.
1445 // In this case, we need to transform the part before the <p> into:
1446 // <font><i>Moo</i></font><i>
1447 // so that the <i> will remain open. This involves the modification of elements
1448 // in the block stack.
1449 // This will also affect how we ultimately reparent the block, since we want it to end up
1450 // under the reopened residual tags (e.g., the <i> in the above example.)
1451 RefPtr
<NodeImpl
> prevNode
= 0;
1452 RefPtr
<NodeImpl
> currNode
= 0;
1454 while (currElem
->node
!= residualElem
) {
1455 if (isResidualStyleTag(currElem
->node
->id())) {
1456 // Create a clone of this element.
1457 currNode
= currElem
->node
->cloneNode(false);
1458 currElem
->node
->close();
1459 removeForbidden(currElem
->id
, forbiddenTag
);
1461 // Change the stack element's node to point to the clone.
1462 currElem
->setNode(currNode
.get());
1464 // Attach the previous node as a child of this new node.
1466 currNode
->appendChild(prevNode
.get(), exceptionCode
);
1467 else // The new parent for the block element is going to be the innermost clone.
1468 parentElem
= currNode
;
1470 prevNode
= currNode
;
1473 currElem
= currElem
->next
;
1476 // Now append the chain of new residual style elements if one exists.
1478 elem
->node
->appendChild(prevNode
.get(), exceptionCode
);
1481 // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1482 // All content of |blockElem| is reparented to be under this clone. We then
1483 // reparent |blockElem| using real DOM calls so that attachment/detachment will
1484 // be performed to fix up the rendering tree.
1485 // So for this example: <b>...<p>Foo</b>Goo</p>
1486 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1488 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1489 SharedPtr
<NodeImpl
> guard(blockElem
);
1490 blockElem
->parentNode()->removeChild(blockElem
, exceptionCode
);
1492 if (!advancedResidual
) {
1493 // Step 2: Clone |residualElem|.
1494 RefPtr
<NodeImpl
> newNode
= residualElem
->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1496 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
1497 // before we've put |newElem| into the document. That way we'll only do one attachment of all
1498 // the new content (instead of a bunch of individual attachments).
1499 NodeImpl
* currNode
= blockElem
->firstChild();
1501 NodeImpl
* nextNode
= currNode
->nextSibling();
1502 SharedPtr
<NodeImpl
> guard(currNode
); //Protect from deletion while moving
1503 blockElem
->removeChild(currNode
, exceptionCode
);
1504 newNode
->appendChild(currNode
, exceptionCode
);
1505 currNode
= nextNode
;
1507 // TODO - To be replaced.
1508 // Re-register form elements with currently active form, step 1 will have removed them
1509 if (form
&& currNode
&& currNode
->isGenericFormElement())
1511 HTMLGenericFormElementImpl
*e
= static_cast<HTMLGenericFormElementImpl
*>(currNode
);
1512 form
->registerFormElement(e
);
1516 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
1517 // attachment can occur yet.
1518 blockElem
->appendChild(newNode
.get(), exceptionCode
);
1521 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
1522 parentElem
->appendChild(blockElem
, exceptionCode
);
1524 // Step 6: Elide |elem|, since it is effectively no longer open. Also update
1525 // the node associated with the previous stack element so that when it gets popped,
1526 // it doesn't make the residual element the next current node.
1527 HTMLStackElem
* currElem
= maxElem
;
1528 HTMLStackElem
* prevElem
= 0;
1529 while (currElem
!= elem
) {
1530 prevElem
= currElem
;
1531 currElem
= currElem
->next
;
1533 prevElem
->next
= elem
->next
;
1534 prevElem
->setNode(elem
->node
);
1537 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1538 // In the above example, Goo should stay italic.
1540 HTMLStackElem
* residualStyleStack
= 0;
1541 while (curr
&& curr
!= endElem
) {
1542 // We will actually schedule this tag for reopening
1543 // after we complete the close of this entire block.
1544 NodeImpl
* currNode
= current
;
1545 if (isResidualStyleTag(curr
->id
)) {
1546 // We've overloaded the use of stack elements and are just reusing the
1547 // struct with a slightly different meaning to the variables. Instead of chaining
1548 // from innermost to outermost, we build up a list of all the tags we need to reopen
1549 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1550 // to the outermost tag we need to reopen.
1551 // We also set curr->node to be the actual element that corresponds to the ID stored in
1552 // curr->id rather than the node that you should pop to when the element gets pulled off
1555 curr
->setNode(currNode
);
1556 curr
->next
= residualStyleStack
;
1557 residualStyleStack
= curr
;
1565 reopenResidualStyleTags(residualStyleStack
, 0); // FIXME: Deal with stray table content some day
1566 // if it becomes necessary to do so.
1569 void KHTMLParser::reopenResidualStyleTags(HTMLStackElem
* elem
, DOM::NodeImpl
* malformedTableParent
)
1571 // Loop for each tag that needs to be reopened.
1573 // Create a shallow clone of the DOM node for this element.
1574 RefPtr
<NodeImpl
> newNode
= elem
->node
->cloneNode(false);
1576 // Append the new node. In the malformed table case, we need to insert before the table,
1577 // which will be the last child.
1578 int exceptionCode
= 0;
1579 if (malformedTableParent
)
1580 malformedTableParent
->insertBefore(newNode
.get(), malformedTableParent
->lastChild(), exceptionCode
);
1582 current
->appendChild(newNode
.get(), exceptionCode
);
1583 // FIXME: Is it really OK to ignore the exceptions here?
1585 // Now push a new stack element for this node we just created.
1586 pushBlock(elem
->id
, elem
->level
);
1588 // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1589 // that it is inside a malformed table.
1590 blockStack
->strayTableContent
= malformedTableParent
!= 0;
1591 if (blockStack
->strayTableContent
)
1592 inStrayTableContent
++;
1594 // Clear our malformed table parent variable.
1595 malformedTableParent
= 0;
1597 // Update |current| manually to point to the new node.
1598 setCurrent(newNode
.get());
1600 // Advance to the next tag that needs to be reopened.
1601 HTMLStackElem
* next
= elem
->next
;
1607 void KHTMLParser::pushBlock(int _id
, int _level
)
1609 HTMLStackElem
*Elem
= new HTMLStackElem(_id
, _level
, current
, m_inline
, blockStack
);
1612 addForbidden(_id
, forbiddenTag
);
1615 void KHTMLParser::generateImpliedEndTags( int _id
)
1617 HTMLStackElem
*Elem
= blockStack
;
1619 int level
= tagPriority(_id
);
1620 while( Elem
&& Elem
->id
!= _id
)
1622 HTMLStackElem
*NextElem
= Elem
->next
;
1623 if (endTagRequirement(Elem
->id
) == DOM::OPTIONAL
&& Elem
->level
<= level
) {
1632 void KHTMLParser::popOptionalBlock( int _id
)
1635 HTMLStackElem
*Elem
= blockStack
;
1637 int level
= tagPriority(_id
);
1640 if (Elem
->id
== _id
) {
1644 if (Elem
->level
> level
|| (endTagRequirement(Elem
->id
) != DOM::OPTIONAL
&& !isResidualStyleTag(Elem
->id
)) )
1650 generateImpliedEndTags(_id
);
1655 void KHTMLParser::popBlock( int _id
)
1657 HTMLStackElem
*Elem
= blockStack
;
1661 kDebug( 6035 ) << "popBlock(" << getParserPrintableName(_id
) << ")";
1663 kDebug( 6035) << " > " << getParserPrintableName(Elem
->id
);
1669 while( Elem
&& (Elem
->id
!= _id
))
1671 if (maxLevel
< Elem
->level
)
1673 maxLevel
= Elem
->level
;
1680 if (maxLevel
> Elem
->level
) {
1681 // We didn't match because the tag is in a different scope, e.g.,
1682 // <b><p>Foo</b>. Try to correct the problem.
1683 if (!isResidualStyleTag(_id
))
1685 return handleResidualStyleCloseTagAcrossBlocks(Elem
);
1688 bool isAffectedByStyle
= isAffectedByResidualStyle(Elem
->id
);
1689 HTMLStackElem
* residualStyleStack
= 0;
1690 NodeImpl
* malformedTableParent
= 0;
1696 if (Elem
->id
== _id
)
1698 int strayTable
= inStrayTableContent
;
1702 // This element was the root of some malformed content just inside an implicit or
1703 // explicit <tbody> or <tr>.
1704 // If we end up needing to reopen residual style tags, the root of the reopened chain
1705 // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1706 if (strayTable
&& (inStrayTableContent
< strayTable
) && residualStyleStack
) {
1707 NodeImpl
* curr
= current
;
1708 while (curr
&& curr
->id() != ID_TABLE
)
1709 curr
= curr
->parentNode();
1710 malformedTableParent
= curr
? curr
->parentNode() : 0;
1715 // Schedule this tag for reopening
1716 // after we complete the close of this entire block.
1717 NodeImpl
* currNode
= current
;
1718 if (isAffectedByStyle
&& isResidualStyleTag(Elem
->id
)) {
1719 // We've overloaded the use of stack elements and are just reusing the
1720 // struct with a slightly different meaning to the variables. Instead of chaining
1721 // from innermost to outermost, we build up a list of all the tags we need to reopen
1722 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1723 // to the outermost tag we need to reopen.
1724 // We also set Elem->node to be the actual element that corresponds to the ID stored in
1725 // Elem->id rather than the node that you should pop to when the element gets pulled off
1728 Elem
->next
= residualStyleStack
;
1729 Elem
->setNode(currNode
);
1730 residualStyleStack
= Elem
;
1738 reopenResidualStyleTags(residualStyleStack
, malformedTableParent
);
1741 void KHTMLParser::popOneBlock(bool delBlock
)
1743 HTMLStackElem
*Elem
= blockStack
;
1745 // we should never get here, but some bad html might cause it.
1746 #ifndef PARSER_DEBUG
1749 kDebug( 6035 ) << "popping block: " << getParserPrintableName(Elem
->id
) << "(" << Elem
->id
<< ")";
1753 if((Elem
->node
!= current
)) {
1754 if (current
->maintainsState() && document
){
1755 document
->registerMaintainsState(current
);
1756 document
->attemptRestoreState(current
);
1762 removeForbidden(Elem
->id
, forbiddenTag
);
1764 blockStack
= Elem
->next
;
1765 // we only set inline to false, if the element we close is a block level element.
1766 // This helps getting cases as <p><b>bla</b> <b>bla</b> right.
1768 m_inline
= Elem
->m_inline
;
1770 if (current
->id() == ID_FORM
&& form
&& inStrayTableContent
)
1771 form
->setMalformed(true);
1773 setCurrent( Elem
->node
);
1775 if (Elem
->strayTableContent
)
1776 inStrayTableContent
--;
1782 void KHTMLParser::popInlineBlocks()
1784 while(blockStack
&& current
->isInline() && current
->id() != ID_FONT
)
1788 void KHTMLParser::freeBlock()
1795 void KHTMLParser::createHead()
1797 if(head
|| !doc()->documentElement())
1800 head
= new HTMLHeadElementImpl(document
);
1801 HTMLElementImpl
*body
= doc()->body();
1802 int exceptioncode
= 0;
1803 doc()->documentElement()->insertBefore(head
, body
, exceptioncode
);
1804 if ( exceptioncode
) {
1806 kDebug( 6035 ) << "creation of head failed!!!!:" << exceptioncode
;
1812 // If the body does not exist yet, then the <head> should be pushed as the current block.
1813 if (head
&& !body
) {
1814 pushBlock(head
->id(), tagPriority(head
->id()));
1819 NodeImpl
*KHTMLParser::handleIsindex( Token
*t
)
1822 HTMLFormElementImpl
*myform
= form
;
1824 myform
= new HTMLFormElementImpl(document
, true);
1827 n
= new HTMLDivElementImpl( document
, ID_DIV
);
1828 NodeImpl
*child
= new HTMLHRElementImpl( document
);
1829 n
->addChild( child
);
1830 DOMStringImpl
* a
= t
->attrs
? t
->attrs
->getValue(ATTR_PROMPT
) : 0;
1831 DOMString text
= i18n("This is a searchable index. Enter search keywords: ");
1834 child
= new TextImpl(document
, text
.implementation());
1835 n
->addChild( child
);
1836 child
= new HTMLIsIndexElementImpl(document
, myform
);
1837 static_cast<ElementImpl
*>(child
)->setAttribute(ATTR_TYPE
, "khtml_isindex");
1838 n
->addChild( child
);
1839 child
= new HTMLHRElementImpl( document
);
1840 n
->addChild( child
);
1845 void KHTMLParser::startBody()
1852 insertNode( isindex
, true /* don't decend into this node */ );