don't discard iframe children.
[kdelibs.git] / khtml / html / htmlparser.cpp
blob4ef32b3acf573bfcfe19d3f801d153790bb36cda
1 /*
2 This file is part of the KDE libraries
4 Copyright (C) 1997 Martin Jones (mjones@kde.org)
5 (C) 1997 Torben Weis (weis@kde.org)
6 (C) 1999,2001 Lars Knoll (knoll@kde.org)
7 (C) 2000,2001 Dirk Mueller (mueller@kde.org)
8 (C) 2003 Apple Computer, Inc.
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Library General Public
12 License as published by the Free Software Foundation; either
13 version 2 of the License, or (at your option) any later version.
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Library General Public License for more details.
20 You should have received a copy of the GNU Library General Public License
21 along with this library; see the file COPYING.LIB. If not, write to
22 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 Boston, MA 02110-1301, USA.
25 //----------------------------------------------------------------------------
27 // KDE HTML Widget -- HTML Parser
28 // #define PARSER_DEBUG
30 #include "htmlparser.h"
32 #include <dom/dom_exception.h>
34 #include <html/html_baseimpl.h>
35 #include <html/html_blockimpl.h>
36 #include <html/html_canvasimpl.h>
37 #include <html/html_documentimpl.h>
38 #include <html/html_elementimpl.h>
39 #include <html/html_formimpl.h>
40 #include <html/html_headimpl.h>
41 #include <html/html_imageimpl.h>
42 #include <html/html_inlineimpl.h>
43 #include <html/html_listimpl.h>
44 #include <html/html_miscimpl.h>
45 #include <html/html_tableimpl.h>
46 #include <html/html_objectimpl.h>
47 #include <html/HTMLAudioElement.h>
48 #include <html/HTMLVideoElement.h>
49 #include <html/HTMLSourceElement.h>
50 #include <xml/dom_textimpl.h>
51 #include <xml/dom_nodeimpl.h>
52 #include <misc/htmlhashes.h>
53 #include <html/htmltokenizer.h>
54 #include <khtmlview.h>
55 #include <khtml_part.h>
56 #include <khtml_global.h>
57 #include <css/cssproperties.h>
58 #include <css/cssvalues.h>
59 #include <css/csshelper.h>
61 #include <rendering/render_object.h>
63 #include <kdebug.h>
64 #include <klocale.h>
66 // Turn off inlining to avoid warning with newer gcc.
67 #undef __inline
68 #define __inline
69 #include "doctypes.cpp"
70 #undef __inline
72 using namespace DOM;
73 using namespace khtml;
75 #ifdef PARSER_DEBUG
76 static QString getParserPrintableName(int id)
78 if (id >= ID_CLOSE_TAG)
79 return "/" + getPrintableName(id - ID_CLOSE_TAG);
80 else
81 return getPrintableName(id);
83 #endif
85 //----------------------------------------------------------------------------
87 /**
88 * @internal
90 class HTMLStackElem
92 public:
93 HTMLStackElem( int _id,
94 int _level,
95 DOM::NodeImpl *_node,
96 bool _inline_,
97 HTMLStackElem * _next )
99 id(_id),
100 level(_level),
101 strayTableContent(false),
102 m_inline(_inline_),
103 node(_node),
104 next(_next)
105 { node->ref(); }
107 ~HTMLStackElem()
108 { node->deref(); }
110 void setNode(NodeImpl* newNode)
112 newNode->ref();
113 node->deref();
114 node = newNode;
117 int id;
118 int level;
119 bool strayTableContent;
120 bool m_inline;
121 NodeImpl *node;
122 HTMLStackElem *next;
126 * @internal
128 * The parser parses tokenized input into the document, building up the
129 * document tree. If the document is wellformed, parsing it is
130 * straightforward.
131 * Unfortunately, people can't write wellformed HTML documents, so the parser
132 * has to be tolerant about errors.
134 * We have to take care of the following error conditions:
135 * 1. The element being added is explicitly forbidden inside some outer tag.
136 * In this case we should close all tags up to the one, which forbids
137 * the element, and add it afterwards.
138 * 2. We are not allowed to add the element directly. It could be, that
139 * the person writing the document forgot some tag inbetween (or that the
140 * tag inbetween is optional...) This could be the case with the following
141 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?)
142 * 3. We wan't to add a block element inside to an inline element. Close all
143 * inline elements up to the next higher block element.
144 * 4. If this doesn't help close elements, until we are allowed to add the
145 * element or ignore the tag.
149 KHTMLParser::KHTMLParser( KHTMLView *_parent, DocumentImpl *doc)
151 //kDebug( 6035 ) << "parser constructor";
152 #if SPEED_DEBUG > 0
153 qt.start();
154 #endif
156 HTMLWidget = _parent;
157 document = doc;
159 blockStack = 0;
160 current = 0;
162 // ID_CLOSE_TAG == Num of tags
163 forbiddenTag = new ushort[ID_CLOSE_TAG+1];
165 reset();
168 KHTMLParser::KHTMLParser( DOM::DocumentFragmentImpl *i, DocumentImpl *doc )
170 HTMLWidget = 0;
171 document = doc;
173 forbiddenTag = new ushort[ID_CLOSE_TAG+1];
175 blockStack = 0;
176 current = 0;
178 reset();
180 setCurrent(i);
182 inBody = true;
185 KHTMLParser::~KHTMLParser()
187 #if SPEED_DEBUG > 0
188 kDebug( ) << "TIME: parsing time was = " << qt.elapsed();
189 #endif
191 freeBlock();
193 if (current) current->deref();
195 delete [] forbiddenTag;
196 delete isindex;
199 void KHTMLParser::reset()
201 setCurrent ( document );
203 freeBlock();
205 // before parsing no tags are forbidden...
206 memset(forbiddenTag, 0, (ID_CLOSE_TAG+1)*sizeof(ushort));
208 inBody = false;
209 haveFrameSet = false;
210 haveContent = false;
211 haveBody = false;
212 haveTitle = false;
213 inSelect = false;
214 inStrayTableContent = 0;
215 m_inline = false;
217 form = 0;
218 map = 0;
219 head = 0;
220 end = false;
221 isindex = 0;
223 discard_until = 0;
226 void KHTMLParser::parseToken(Token *t)
228 if (t->tid > 2*ID_CLOSE_TAG)
230 kDebug( 6035 ) << "Unknown tag!! tagID = " << t->tid;
231 return;
233 if(discard_until) {
234 if(t->tid == discard_until)
235 discard_until = 0;
237 // do not skip </iframe>
238 if ( discard_until || current->id() + ID_CLOSE_TAG != t->tid )
239 return;
242 #ifdef PARSER_DEBUG
243 kDebug( 6035 ) << "\n\n==> parser: processing token " << getParserPrintableName(t->tid) << "(" << t->tid << ")"
244 << " current = " << getParserPrintableName(current->id()) << "(" << current->id() << ")" << endl;
245 kDebug(6035) << "inline=" << m_inline << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << " haveContent=" << haveContent;
246 #endif
248 // holy shit. apparently some sites use </br> instead of <br>
249 // be compatible with IE and NS
250 if(t->tid == ID_BR+ID_CLOSE_TAG && document->inCompatMode())
251 t->tid -= ID_CLOSE_TAG;
253 if(t->tid > ID_CLOSE_TAG)
255 processCloseTag(t);
256 return;
259 // ignore spaces, if we're not inside a paragraph or other inline code
260 if( t->tid == ID_TEXT && t->text ) {
261 if(inBody && !skipMode() &&
262 current->id() != ID_STYLE && current->id() != ID_TITLE &&
263 current->id() != ID_SCRIPT &&
264 !t->text->containsOnlyWhitespace()) haveContent = true;
265 #ifdef PARSER_DEBUG
267 kDebug(6035) << "length="<< t->text->l << " text='" << QString::fromRawData(t->text->s, t->text->l) << "'";
268 #endif
271 NodeImpl *n = getElement(t);
272 // just to be sure, and to catch currently unimplemented stuff
273 if(!n)
274 return;
276 // set attributes
277 if(n->isElementNode() && t->tid != ID_ISINDEX)
279 ElementImpl *e = static_cast<ElementImpl *>(n);
280 e->setAttributeMap(t->attrs);
283 // if this tag is forbidden inside the current context, pop
284 // blocks until we are allowed to add it...
285 while(blockStack && forbiddenTag[t->tid]) {
286 #ifdef PARSER_DEBUG
287 kDebug( 6035 ) << "t->id: " << t->tid << " is forbidden :-( ";
288 #endif
289 popOneBlock();
292 // sometimes flat doesn't make sense
293 switch(t->tid) {
294 case ID_SELECT:
295 case ID_OPTION:
296 t->flat = false;
299 // the tokenizer needs the feedback for space discarding
300 if ( tagPriority(t->tid) == 0 )
301 t->flat = true;
303 if ( !insertNode(n, t->flat) ) {
304 // we couldn't insert the node...
305 #ifdef PARSER_DEBUG
306 kDebug( 6035 ) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!";
307 #endif
308 if (map == n)
310 #ifdef PARSER_DEBUG
311 kDebug( 6035 ) << " --> resetting map!";
312 #endif
313 map = 0;
315 if (form == n)
317 #ifdef PARSER_DEBUG
318 kDebug( 6035 ) << " --> resetting form!";
319 #endif
320 form = 0;
322 delete n;
326 void KHTMLParser::parseDoctypeToken(DoctypeToken* t)
328 // Ignore any doctype after the first. TODO It should be also ignored when processing DocumentFragment
329 if (current != document || document->doctype())
330 return;
332 DocumentTypeImpl* doctype = new DocumentTypeImpl(document->implementation(), document, t->name, t->publicID, t->systemID);
333 if (!t->internalSubset.isEmpty())
334 doctype->setInternalSubset(t->internalSubset);
335 document->addChild(doctype);
337 // Determine parse mode here
338 // This code more or less mimics Mozilla's implementation.
340 // There are three possible parse modes:
341 // COMPAT - quirks mode emulates WinIE
342 // and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
343 // be omitted from numbers.
344 // ALMOST STRICT - This mode is identical to strict mode
345 // except for its treatment of line-height in the inline box model. For
346 // now (until the inline box model is re-written), this mode is identical
347 // to STANDARDS mode.
348 // STRICT - no quirks apply. Web pages will obey the specifications to
349 // the letter.
351 if (!document->isHTMLDocument()) // FIXME Could document be non-HTML?
352 return;
353 DOM::HTMLDocumentImpl* htmldoc = static_cast<DOM::HTMLDocumentImpl*> (document);
354 if (t->name.toLower() == "html") {
355 if (!t->internalSubset.isEmpty() || t->publicID.isEmpty()) {
356 // Internal subsets always denote full standards, as does
357 // a doctype without a public ID.
358 htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4);
359 } else {
360 // We have to check a list of public IDs to see what we
361 // should do.
362 QString lowerPubID = t->publicID.toLower();
363 QByteArray pubIDStr = lowerPubID.toLocal8Bit();
365 // Look up the entry in our gperf-generated table.
366 const PubIDInfo* doctypeEntry = findDoctypeEntry(pubIDStr.constData(), t->publicID.length());
367 if (!doctypeEntry) {
368 // The DOCTYPE is not in the list. Assume strict mode.
369 // ### Doesn't make any sense, but it's what Mozilla does.
370 htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4);
371 } else {
372 switch ((!t->systemID.isEmpty()) ?
373 doctypeEntry->mode_if_sysid :
374 doctypeEntry->mode_if_no_sysid) {
375 case PubIDInfo::eQuirks3:
376 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3);
377 break;
378 case PubIDInfo::eQuirks:
379 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html4);
380 break;
381 case PubIDInfo::eAlmostStandards:
382 htmldoc->changeModes(DOM::DocumentImpl::Transitional, DOM::DocumentImpl::Html4);
383 break;
384 default:
385 assert(!"Unknown parse mode");
389 } else {
390 // Malformed doctype implies quirks mode.
391 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3);
395 static bool isTableRelatedTag(int id)
397 return (id == ID_TR || id == ID_TD || id == ID_TABLE || id == ID_TBODY || id == ID_TFOOT || id == ID_THEAD ||
398 id == ID_TH);
401 bool KHTMLParser::insertNode(NodeImpl *n, bool flat)
403 int id = n->id();
405 // <table> is never allowed inside stray table content. Always pop out of the stray table content
406 // and close up the first table, and then start the second table as a sibling.
407 if (inStrayTableContent && id == ID_TABLE)
408 popBlock(ID_TABLE);
410 // let's be stupid and just try to insert it.
411 // this should work if the document is wellformed
412 #ifdef PARSER_DEBUG
413 NodeImpl *tmp = current;
414 #endif
415 NodeImpl *newNode = current->addChild(n);
416 if ( newNode ) {
417 #ifdef PARSER_DEBUG
418 kDebug( 6035 ) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string();
419 #endif
420 // We allow TABLE > FORM in dtd.cpp, but do not allow the form have children in this case
421 if (current->id() == ID_TABLE && id == ID_FORM) {
422 flat = true;
423 static_cast<HTMLFormElementImpl*>(n)->setMalformed(true);
426 // don't push elements without end tag on the stack
427 if(tagPriority(id) != 0 && !flat) {
428 #if SPEED_DEBUG < 2
429 if(!n->attached() && HTMLWidget )
430 n->attach();
431 #endif
432 if(n->isInline()) m_inline = true;
433 pushBlock(id, tagPriority(id));
434 setCurrent( newNode );
435 } else {
436 #if SPEED_DEBUG < 2
437 if(!n->attached() && HTMLWidget)
438 n->attach();
439 if (n->maintainsState()) {
440 document->registerMaintainsState(n);
441 document->attemptRestoreState(n);
443 n->close();
444 #endif
445 if(n->isInline()) m_inline = true;
449 #if SPEED_DEBUG < 1
450 if(tagPriority(id) == 0 && n->renderer())
451 n->renderer()->calcMinMaxWidth();
452 #endif
453 return true;
454 } else {
455 #ifdef PARSER_DEBUG
456 kDebug( 6035 ) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string();
457 #endif
458 // error handling...
459 HTMLElementImpl *e;
460 bool handled = false;
462 // first switch on current element for elements with optional end-tag and inline-only content
463 switch(current->id())
465 case ID_P:
466 case ID_DT:
467 if(!n->isInline())
469 popBlock(current->id());
470 return insertNode(n);
472 break;
473 default:
474 break;
477 // switch according to the element to insert
478 switch(id)
480 case ID_TR:
481 case ID_TH:
482 case ID_TD:
483 if (inStrayTableContent && !isTableRelatedTag(current->id())) {
484 // pop out to the nearest enclosing table-related tag.
485 while (blockStack && !isTableRelatedTag(current->id()))
486 popOneBlock();
487 return insertNode(n);
489 break;
490 case ID_HEAD:
491 // ### allow not having <HTML> in at all, as per HTML spec
492 if (!current->isDocumentNode() && current->id() != ID_HTML )
493 return false;
494 break;
495 case ID_COMMENT:
496 if( head )
497 break;
498 case ID_META:
499 case ID_LINK:
500 case ID_ISINDEX:
501 case ID_BASE:
502 if( !head )
503 createHead();
504 if( head ) {
505 if ( head->addChild(n) ) {
506 #if SPEED_DEBUG < 2
507 if(!n->attached() && HTMLWidget)
508 n->attach();
509 #endif
512 return true;
515 break;
516 case ID_HTML:
517 if (!current->isDocumentNode() ) {
518 if ( doc()->documentElement()->id() == ID_HTML) {
519 // we have another <HTML> element.... apply attributes to existing one
520 // make sure we don't overwrite already existing attributes
521 NamedAttrMapImpl *map = static_cast<ElementImpl*>(n)->attributes(true);
522 NamedAttrMapImpl *bmap = static_cast<ElementImpl*>(doc()->documentElement())->attributes(false);
523 bool changed = false;
524 for (unsigned long l = 0; map && l < map->length(); ++l) {
525 NodeImpl::Id attrId = map->idAt(l);
526 DOMStringImpl *attrValue = map->valueAt(l);
527 changed = !bmap->getValue(attrId);
528 bmap->setValue(attrId,attrValue);
530 if ( changed )
531 doc()->recalcStyle( NodeImpl::Inherit );
533 return false;
535 break;
536 case ID_TITLE:
537 case ID_STYLE:
538 if ( !head )
539 createHead();
540 if ( head ) {
541 DOM::NodeImpl *newNode = head->addChild(n);
542 if ( newNode ) {
543 pushBlock(id, tagPriority(id));
544 setCurrent ( newNode );
545 #if SPEED_DEBUG < 2
546 if(!n->attached() && HTMLWidget)
547 n->attach();
548 #endif
549 } else {
550 #ifdef PARSER_DEBUG
551 kDebug( 6035 ) << "adding style before to body failed!!!!";
552 #endif
553 discard_until = ID_STYLE + ID_CLOSE_TAG;
554 return false;
556 return true;
557 } else if(inBody) {
558 discard_until = id + ID_CLOSE_TAG;
559 return false;
561 break;
562 case ID_SCRIPT:
563 // if we failed to insert it, go into skip mode
564 discard_until = id + ID_CLOSE_TAG;
565 break;
566 case ID_BODY:
567 if(inBody && doc()->body()) {
568 // we have another <BODY> element.... apply attributes to existing one
569 // make sure we don't overwrite already existing attributes
570 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
571 NamedAttrMapImpl *map = static_cast<ElementImpl*>(n)->attributes(true);
572 NamedAttrMapImpl *bmap = doc()->body()->attributes(false);
573 bool changed = false;
574 for (unsigned long l = 0; map && l < map->length(); ++l) {
575 NodeImpl::Id attrId = map->idAt(l);
576 DOMStringImpl *attrValue = map->valueAt(l);
577 if ( !bmap->getValue(attrId) ) {
578 bmap->setValue(attrId,attrValue);
579 changed = true;
582 if ( changed )
583 doc()->recalcStyle( NodeImpl::Inherit );
584 } else if ( current->isDocumentNode() )
585 break;
586 return false;
587 break;
589 // the following is a hack to move non rendered elements
590 // outside of tables.
591 // needed for broken constructs like <table><form ...><tr>....
592 case ID_INPUT:
594 ElementImpl *e = static_cast<ElementImpl *>(n);
595 DOMString type = e->getAttribute(ATTR_TYPE);
597 if ( strcasecmp( type, "hidden" ) != 0 )
598 break;
599 // Fall through!
601 case ID_TEXT:
603 // Don't try to fit random white-space anywhere
604 TextImpl *t = static_cast<TextImpl *>(n);
605 if (t->containsOnlyWhitespace())
606 return false;
607 // ignore text inside the following elements.
608 switch(current->id())
610 case ID_SELECT:
611 return false;
612 default:
614 // fall through!!
616 break;
618 case ID_DL:
619 popBlock( ID_DT );
620 if ( current->id() == ID_DL ) {
621 e = new HTMLGenericElementImpl( document, ID_DD );
622 insertNode( e );
623 handled = true;
625 break;
626 case ID_DT:
627 e = new HTMLDListElementImpl(document);
628 if ( insertNode(e) ) {
629 insertNode(n);
630 return true;
632 break;
633 case ID_AREA:
635 if(map)
637 map->addChild(n);
638 #if SPEED_DEBUG < 2
639 if(!n->attached() && HTMLWidget)
640 n->attach();
641 #endif
642 handled = true;
643 return true;
645 else
646 return false;
649 case ID_THEAD:
650 case ID_TBODY:
651 case ID_TFOOT:
652 case ID_CAPTION:
653 case ID_COLGROUP: {
654 if (isTableRelatedTag(current->id())) {
655 while (blockStack && current->id() != ID_TABLE && isTableRelatedTag(current->id()))
656 popOneBlock();
657 return insertNode(n);
660 default:
661 break;
664 // switch on the currently active element
665 switch(current->id())
667 case ID_HTML:
668 switch(id)
670 case ID_SCRIPT:
671 case ID_STYLE:
672 case ID_META:
673 case ID_LINK:
674 case ID_OBJECT:
675 case ID_EMBED:
676 case ID_TITLE:
677 case ID_ISINDEX:
678 case ID_BASE:
679 if(!head) {
680 head = new HTMLHeadElementImpl(document);
681 e = head;
682 insertNode(e);
683 handled = true;
685 break;
686 case ID_TEXT: {
687 TextImpl *t = static_cast<TextImpl *>(n);
688 if (t->containsOnlyWhitespace())
689 return false;
690 /* Fall through to default */
692 default:
693 if ( haveFrameSet ) break;
694 e = new HTMLBodyElementImpl(document);
695 startBody();
696 insertNode(e);
697 handled = true;
698 break;
700 break;
701 case ID_HEAD:
702 // we can get here only if the element is not allowed in head.
703 if (id == ID_HTML)
704 return false;
705 else {
706 // This means the body starts here...
707 if ( haveFrameSet ) break;
708 popBlock(ID_HEAD);
709 e = new HTMLBodyElementImpl(document);
710 startBody();
711 insertNode(e);
712 handled = true;
714 break;
715 case ID_BODY:
716 break;
717 case ID_CAPTION:
718 // Illegal content in a caption. Close the caption and try again.
719 popBlock(ID_CAPTION);
720 switch( id ) {
721 case ID_THEAD:
722 case ID_TFOOT:
723 case ID_TBODY:
724 case ID_TR:
725 case ID_TD:
726 case ID_TH:
727 return insertNode(n, flat);
729 break;
730 case ID_TABLE:
731 case ID_THEAD:
732 case ID_TFOOT:
733 case ID_TBODY:
734 case ID_TR:
735 switch(id)
737 case ID_TABLE:
738 popBlock(ID_TABLE); // end the table
739 handled = checkChild( current->id(), id, doc()->inStrictMode());
740 break;
741 default:
743 NodeImpl *node = current;
744 NodeImpl *parent = node->parentNode();
745 // A script may have removed the current node's parent from the DOM
746 // http://bugzilla.opendarwin.org/show_bug.cgi?id=7137
747 // FIXME: we should do real recovery here and re-parent with the correct node.
748 if (!parent)
749 return false;
750 NodeImpl *parentparent = parent->parentNode();
752 if (n->isTextNode() ||
753 ( node->id() == ID_TR &&
754 ( parent->id() == ID_THEAD ||
755 parent->id() == ID_TBODY ||
756 parent->id() == ID_TFOOT ) && parentparent->id() == ID_TABLE ) ||
757 ( !checkChild( ID_TR, id ) && ( node->id() == ID_THEAD || node->id() == ID_TBODY || node->id() == ID_TFOOT ) &&
758 parent->id() == ID_TABLE ) )
760 node = (node->id() == ID_TABLE) ? node :
761 ((node->id() == ID_TR ) ? parentparent : parent);
762 NodeImpl *parent = node->parentNode();
763 if (!parent)
764 return false;
765 int exceptioncode = 0;
766 #ifdef PARSER_DEBUG
767 kDebug( 6035 ) << "calling insertBefore(" << n->nodeName().string() << "," << node->nodeName().string() << ")";
768 #endif
769 parent->insertBefore(n, node, exceptioncode);
770 if (exceptioncode) {
771 #ifndef PARSER_DEBUG
772 if (!n->isTextNode())
773 #endif
774 kDebug(6035) << "adding content before table failed..";
775 break;
777 if ( n->isElementNode() && tagPriority(id) != 0 &&
778 !flat && endTagRequirement(id) != DOM::FORBIDDEN ) {
780 pushBlock(id, tagPriority(id));
781 setCurrent ( n );
782 inStrayTableContent++;
783 blockStack->strayTableContent = true;
785 return true;
788 if ( current->id() == ID_TR )
789 e = new HTMLTableCellElementImpl(document, ID_TD);
790 else if ( current->id() == ID_TABLE )
791 e = new HTMLTableSectionElementImpl( document, ID_TBODY, true /* implicit */ );
792 else
793 e = new HTMLTableRowElementImpl( document );
795 insertNode(e);
796 handled = true;
797 break;
798 } // end default
799 } // end switch
800 break;
801 case ID_OBJECT:
802 discard_until = id + ID_CLOSE_TAG;
803 return false;
804 case ID_UL:
805 case ID_OL:
806 case ID_DIR:
807 case ID_MENU:
808 e = new HTMLLIElementImpl(document);
809 e->addCSSProperty(CSS_PROP_LIST_STYLE_TYPE, CSS_VAL_NONE);
810 insertNode(e);
811 handled = true;
812 break;
813 case ID_FORM:
814 popBlock(ID_FORM);
815 handled = true;
816 break;
817 case ID_SELECT:
818 if( n->isInline() )
819 return false;
820 break;
821 case ID_P:
822 case ID_H1:
823 case ID_H2:
824 case ID_H3:
825 case ID_H4:
826 case ID_H5:
827 case ID_H6:
828 if(!n->isInline())
830 popBlock(current->id());
831 handled = true;
833 break;
834 case ID_OPTION:
835 case ID_OPTGROUP:
836 if (id == ID_OPTGROUP)
838 popBlock(current->id());
839 handled = true;
841 else if(id == ID_SELECT)
843 // IE treats a nested select as </select>. Let's do the same
844 popBlock( ID_SELECT );
845 break;
847 break;
848 // head elements in the body should be ignored.
850 case ID_ADDRESS:
851 case ID_COLGROUP:
852 case ID_FONT:
853 popBlock(current->id());
854 handled = true;
855 break;
856 default:
857 if(current->isDocumentNode())
859 DocumentImpl* doc = static_cast<DocumentImpl*>(current);
860 if (!doc->documentElement()) {
861 e = new HTMLHtmlElementImpl(document);
862 insertNode(e);
863 handled = true;
866 else if(current->isInline())
868 popInlineBlocks();
869 handled = true;
873 // if we couldn't handle the error, just rethrow the exception...
874 if(!handled)
876 //kDebug( 6035 ) << "Exception handler failed in HTMLPArser::insertNode()";
877 return false;
880 return insertNode(n);
885 NodeImpl *KHTMLParser::getElement(Token* t)
887 NodeImpl *n = 0;
889 switch(t->tid)
891 case ID_HTML:
892 n = new HTMLHtmlElementImpl(document);
893 break;
894 case ID_HEAD:
895 if(!head && (current->id() == ID_HTML || current->isDocumentNode())) {
896 head = new HTMLHeadElementImpl(document);
897 n = head;
899 break;
900 case ID_BODY:
901 // body no longer allowed if we have a frameset
902 if(haveFrameSet) break;
903 popBlock(ID_HEAD);
904 n = new HTMLBodyElementImpl(document);
905 haveBody = true;
906 startBody();
907 break;
909 // head elements
910 case ID_BASE:
911 n = new HTMLBaseElementImpl(document);
912 break;
913 case ID_LINK:
914 n = new HTMLLinkElementImpl(document);
915 break;
916 case ID_META:
917 n = new HTMLMetaElementImpl(document);
918 break;
919 case ID_STYLE:
920 n = new HTMLStyleElementImpl(document);
921 break;
922 case ID_TITLE:
923 // only one non-empty <title> allowed
924 if (haveTitle) {
925 discard_until = ID_TITLE+ID_CLOSE_TAG;
926 break;
928 n = new HTMLTitleElementImpl(document);
929 // we'll set haveTitle when closing the tag
930 break;
932 // frames
933 case ID_FRAME:
934 n = new HTMLFrameElementImpl(document);
935 break;
936 case ID_FRAMESET:
937 popBlock(ID_HEAD);
938 if ( inBody && !haveFrameSet && !haveContent && !haveBody) {
939 popBlock( ID_BODY );
940 // ### actually for IE document.body returns the now hidden "body" element
941 // we can't implement that behavior now because it could cause too many
942 // regressions and the headaches are not worth the work as long as there is
943 // no site actually relying on that detail (Dirk)
944 if (static_cast<HTMLDocumentImpl*>(document)->body())
945 static_cast<HTMLDocumentImpl*>(document)->body()
946 ->addCSSProperty(CSS_PROP_DISPLAY, CSS_VAL_NONE);
947 inBody = false;
949 if ( (haveBody || haveContent || haveFrameSet) && current->id() == ID_HTML)
950 break;
951 n = new HTMLFrameSetElementImpl(document);
952 haveFrameSet = true;
953 startBody();
954 break;
955 // a bit a special case, since the frame is inlined...
956 case ID_IFRAME:
957 n = new HTMLIFrameElementImpl(document);
958 break;
960 // form elements
961 case ID_FORM:
962 // thou shall not nest <form> - NS/IE quirk
963 if (form) break;
964 n = form = new HTMLFormElementImpl(document, false);
965 break;
966 case ID_BUTTON:
967 n = new HTMLButtonElementImpl(document, form);
968 break;
969 case ID_FIELDSET:
970 n = new HTMLFieldSetElementImpl(document, form);
971 break;
972 case ID_INPUT:
973 if ( t->attrs &&
974 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() &&
975 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled() &&
976 !strcasecmp( t->attrs->getValue( ATTR_TYPE ), "image" ) )
978 if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered( doc()->completeURL( khtml::parseURL(t->attrs->getValue(ATTR_SRC)).string() ) ))
979 return 0;
981 n = new HTMLInputElementImpl(document, form);
982 break;
983 case ID_ISINDEX:
984 n = handleIsindex(t);
985 if( !inBody ) {
986 isindex = n;
987 n = 0;
988 } else
989 t->flat = true;
990 break;
991 case ID_KEYGEN:
992 n = new HTMLKeygenElementImpl(document, form);
993 break;
994 case ID_LABEL:
995 n = new HTMLLabelElementImpl(document);
996 break;
997 case ID_LEGEND:
998 n = new HTMLLegendElementImpl(document, form);
999 break;
1000 case ID_OPTGROUP:
1001 n = new HTMLOptGroupElementImpl(document, form);
1002 break;
1003 case ID_OPTION:
1004 popOptionalBlock(ID_OPTION);
1005 n = new HTMLOptionElementImpl(document, form);
1006 break;
1007 case ID_SELECT:
1008 inSelect = true;
1009 n = new HTMLSelectElementImpl(document, form);
1010 break;
1011 case ID_TEXTAREA:
1012 n = new HTMLTextAreaElementImpl(document, form);
1013 break;
1015 // lists
1016 case ID_DL:
1017 n = new HTMLDListElementImpl(document);
1018 break;
1019 case ID_DD:
1020 popOptionalBlock(ID_DT);
1021 popOptionalBlock(ID_DD);
1022 n = new HTMLGenericElementImpl(document, t->tid);
1023 break;
1024 case ID_DT:
1025 popOptionalBlock(ID_DD);
1026 popOptionalBlock(ID_DT);
1027 n = new HTMLGenericElementImpl(document, t->tid);
1028 break;
1029 case ID_UL:
1031 n = new HTMLUListElementImpl(document);
1032 break;
1034 case ID_OL:
1036 n = new HTMLOListElementImpl(document);
1037 break;
1039 case ID_DIR:
1040 n = new HTMLDirectoryElementImpl(document);
1041 break;
1042 case ID_MENU:
1043 n = new HTMLMenuElementImpl(document);
1044 break;
1045 case ID_LI:
1046 popOptionalBlock(ID_LI);
1047 n = new HTMLLIElementImpl(document);
1048 break;
1049 // formatting elements (block)
1050 case ID_BLOCKQUOTE:
1051 n = new HTMLGenericElementImpl(document, t->tid);
1052 break;
1053 case ID_LAYER:
1054 case ID_ILAYER:
1055 n = new HTMLLayerElementImpl(document, t->tid);
1056 break;
1057 case ID_P:
1058 case ID_DIV:
1059 n = new HTMLDivElementImpl(document, t->tid);
1060 break;
1061 case ID_H1:
1062 case ID_H2:
1063 case ID_H3:
1064 case ID_H4:
1065 case ID_H5:
1066 case ID_H6:
1067 n = new HTMLGenericElementImpl(document, t->tid);
1068 break;
1069 case ID_HR:
1070 n = new HTMLHRElementImpl(document);
1071 break;
1072 case ID_PRE:
1073 case ID_XMP:
1074 case ID_PLAINTEXT:
1075 case ID_LISTING:
1076 n = new HTMLPreElementImpl(document, t->tid);
1077 break;
1079 // font stuff
1080 case ID_BASEFONT:
1081 n = new HTMLBaseFontElementImpl(document);
1082 break;
1083 case ID_FONT:
1084 n = new HTMLFontElementImpl(document);
1085 break;
1087 // ins/del
1088 case ID_DEL:
1089 case ID_INS:
1090 n = new HTMLGenericElementImpl(document, t->tid);
1091 break;
1093 // anchor
1094 case ID_A:
1095 popBlock(ID_A);
1097 n = new HTMLAnchorElementImpl(document);
1098 break;
1100 // images
1101 case ID_IMAGE:
1102 case ID_IMG:
1103 if (t->attrs&&
1104 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled()&&
1105 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled())
1107 QString url = doc()->completeURL( khtml::parseURL(t->attrs->getValue(ATTR_SRC)).string() );
1108 if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url))
1109 return 0;
1111 n = new HTMLImageElementImpl(document, form);
1112 break;
1114 case ID_CANVAS:
1115 n = new HTMLCanvasElementImpl(document);
1116 break;
1118 case ID_MAP:
1119 map = new HTMLMapElementImpl(document);
1120 n = map;
1121 break;
1122 case ID_AREA:
1123 n = new HTMLAreaElementImpl(document);
1124 break;
1126 // objects, applets and scripts
1127 case ID_APPLET:
1128 n = new HTMLAppletElementImpl(document);
1129 break;
1130 case ID_EMBED:
1131 n = new HTMLEmbedElementImpl(document);
1132 break;
1133 case ID_OBJECT:
1134 n = new HTMLObjectElementImpl(document);
1135 break;
1136 case ID_PARAM:
1137 n = new HTMLParamElementImpl(document);
1138 break;
1139 case ID_SCRIPT:
1141 HTMLScriptElementImpl *scriptElement = new HTMLScriptElementImpl(document);
1142 scriptElement->setCreatedByParser(true);
1143 n = scriptElement;
1144 break;
1147 // media
1148 case ID_AUDIO:
1149 n = new HTMLAudioElement(document);
1150 break;
1151 case ID_VIDEO:
1152 n = new HTMLVideoElement(document);
1153 break;
1154 case ID_SOURCE:
1155 n = new HTMLSourceElement(document);
1156 break;
1158 // tables
1159 case ID_TABLE:
1160 n = new HTMLTableElementImpl(document);
1161 break;
1162 case ID_CAPTION:
1163 n = new HTMLTableCaptionElementImpl(document);
1164 break;
1165 case ID_COLGROUP:
1166 case ID_COL:
1167 n = new HTMLTableColElementImpl(document, t->tid);
1168 break;
1169 case ID_TR:
1170 popBlock(ID_TR);
1171 n = new HTMLTableRowElementImpl(document);
1172 break;
1173 case ID_TD:
1174 case ID_TH:
1175 popBlock(ID_TH);
1176 popBlock(ID_TD);
1177 n = new HTMLTableCellElementImpl(document, t->tid);
1178 break;
1179 case ID_TBODY:
1180 case ID_THEAD:
1181 case ID_TFOOT:
1182 popBlock( ID_THEAD );
1183 popBlock( ID_TBODY );
1184 popBlock( ID_TFOOT );
1185 n = new HTMLTableSectionElementImpl(document, t->tid, false);
1186 break;
1188 // inline elements
1189 case ID_BR:
1190 n = new HTMLBRElementImpl(document);
1191 break;
1192 case ID_Q:
1193 n = new HTMLGenericElementImpl(document, t->tid);
1194 break;
1196 // elements with no special representation in the DOM
1198 // block:
1199 case ID_ADDRESS:
1200 case ID_CENTER:
1201 n = new HTMLGenericElementImpl(document, t->tid);
1202 break;
1203 // inline
1204 // %fontstyle
1205 case ID_TT:
1206 case ID_U:
1207 case ID_B:
1208 case ID_I:
1209 case ID_S:
1210 case ID_STRIKE:
1211 case ID_BIG:
1212 case ID_SMALL:
1214 // %phrase
1215 case ID_EM:
1216 case ID_STRONG:
1217 case ID_DFN:
1218 case ID_CODE:
1219 case ID_SAMP:
1220 case ID_KBD:
1221 case ID_VAR:
1222 case ID_CITE:
1223 case ID_ABBR:
1224 case ID_ACRONYM:
1226 // %special
1227 case ID_SUB:
1228 case ID_SUP:
1229 case ID_SPAN:
1230 case ID_WBR:
1231 case ID_NOBR:
1232 if ( t->tid == ID_NOBR || t->tid == ID_WBR )
1233 popOptionalBlock( t->tid );
1234 case ID_BDO:
1235 n = new HTMLGenericElementImpl(document, t->tid);
1236 break;
1238 // these are special, and normally not rendered
1239 case ID_NOEMBED:
1240 if (!t->flat) {
1241 n = new HTMLGenericElementImpl(document, t->tid);
1242 discard_until = ID_NOEMBED + ID_CLOSE_TAG;
1244 return n;
1245 case ID_NOFRAMES:
1246 if (!t->flat) {
1247 n = new HTMLGenericElementImpl(document, t->tid);
1248 discard_until = ID_NOFRAMES + ID_CLOSE_TAG;
1250 return n;
1251 case ID_NOSCRIPT:
1252 if (!t->flat) {
1253 n = new HTMLGenericElementImpl(document, t->tid);
1254 if(HTMLWidget && HTMLWidget->part()->jScriptEnabled())
1255 discard_until = ID_NOSCRIPT + ID_CLOSE_TAG;
1257 return n;
1258 case ID_NOLAYER:
1259 // discard_until = ID_NOLAYER + ID_CLOSE_TAG;
1260 return 0;
1261 break;
1262 case ID_MARQUEE:
1263 n = new HTMLMarqueeElementImpl(document);
1264 break;
1265 // text
1266 case ID_TEXT:
1267 // kDebug(6035) << "ID_TEXT: \"" << DOMString(t->text).string() << "\"";
1268 n = new TextImpl(document, t->text);
1269 break;
1270 case ID_COMMENT:
1271 n = new CommentImpl(document, t->text);
1272 break;
1273 default:
1274 n = new HTMLGenericElementImpl(document, t->tid);
1275 break;
1276 // kDebug( 6035 ) << "Unknown tag " << t->tid << "!";
1278 return n;
1281 void KHTMLParser::processCloseTag(Token *t)
1283 // support for really broken html. Can't believe I'm supporting such crap (lars)
1284 switch(t->tid)
1286 case ID_HTML+ID_CLOSE_TAG:
1287 case ID_BODY+ID_CLOSE_TAG:
1288 // we never trust those close tags, since stupid webpages close
1289 // them prematurely
1290 return;
1291 case ID_FORM+ID_CLOSE_TAG:
1292 form = 0;
1293 // this one is to get the right style on the body element
1294 break;
1295 case ID_MAP+ID_CLOSE_TAG:
1296 map = 0;
1297 break;
1298 case ID_SELECT+ID_CLOSE_TAG:
1299 inSelect = false;
1300 break;
1301 case ID_TITLE+ID_CLOSE_TAG:
1302 // Set haveTitle only if <title> isn't empty
1303 if ( current->firstChild() )
1304 haveTitle = true;
1305 break;
1306 default:
1307 break;
1310 #ifdef PARSER_DEBUG
1311 kDebug( 6035 ) << "added the following children to " << current->nodeName().string();
1312 NodeImpl *child = current->firstChild();
1313 while(child != 0)
1315 kDebug( 6035 ) << " " << child->nodeName().string();
1316 child = child->nextSibling();
1318 #endif
1319 generateImpliedEndTags( t->tid - ID_CLOSE_TAG );
1320 popBlock( t->tid - ID_CLOSE_TAG );
1321 #ifdef PARSER_DEBUG
1322 kDebug( 6035 ) << "closeTag --> current = " << current->nodeName().string();
1323 #endif
1326 bool KHTMLParser::isResidualStyleTag(int _id)
1328 switch (_id) {
1329 case ID_A:
1330 case ID_B:
1331 case ID_BIG:
1332 case ID_EM:
1333 case ID_FONT:
1334 case ID_I:
1335 case ID_NOBR:
1336 case ID_S:
1337 case ID_SMALL:
1338 case ID_STRIKE:
1339 case ID_STRONG:
1340 case ID_TT:
1341 case ID_U:
1342 case ID_DFN:
1343 case ID_CODE:
1344 case ID_SAMP:
1345 case ID_KBD:
1346 case ID_VAR:
1347 case ID_DEL:
1348 case ID_INS:
1349 return true;
1350 default:
1351 return false;
1355 bool KHTMLParser::isAffectedByResidualStyle(int _id)
1357 if (isResidualStyleTag(_id))
1358 return true;
1360 switch (_id) {
1361 case ID_P:
1362 case ID_DIV:
1363 case ID_BLOCKQUOTE:
1364 case ID_ADDRESS:
1365 case ID_H1:
1366 case ID_H2:
1367 case ID_H3:
1368 case ID_H4:
1369 case ID_H5:
1370 case ID_H6:
1371 case ID_CENTER:
1372 case ID_UL:
1373 case ID_OL:
1374 case ID_LI:
1375 case ID_DL:
1376 case ID_DT:
1377 case ID_DD:
1378 case ID_PRE:
1379 case ID_LISTING:
1380 return true;
1381 default:
1382 return false;
1386 void KHTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
1388 // Find the element that crosses over to a higher level.
1389 // ### For now, if there is more than one, we will only make sure we close the residual style.
1390 int exceptionCode = 0;
1391 HTMLStackElem* curr = blockStack;
1392 HTMLStackElem* maxElem = 0;
1393 HTMLStackElem* endElem = 0;
1394 HTMLStackElem* prev = 0;
1395 HTMLStackElem* prevMaxElem = 0;
1396 bool advancedResidual = false; // ### if set we only close the residual style
1397 while (curr && curr != elem) {
1398 if (curr->level > elem->level) {
1399 if (!isAffectedByResidualStyle(curr->id)) return;
1400 if (maxElem) advancedResidual = true;
1401 else
1402 endElem = curr;
1403 maxElem = curr;
1404 prevMaxElem = prev;
1407 prev = curr;
1408 curr = curr->next;
1411 if (!curr || !maxElem ) return;
1413 NodeImpl* residualElem = prev->node;
1414 NodeImpl* blockElem = prevMaxElem ? prevMaxElem->node : current;
1415 RefPtr<NodeImpl> parentElem = elem->node;
1417 // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1418 // FIXME: We should either always allow it or perform an additional fixup instead of
1419 // just bailing here.
1420 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1421 if (!parentElem->childAllowed(blockElem))
1422 return;
1424 if (maxElem->node->parentNode() != elem->node && !advancedResidual) {
1425 // Walk the stack and remove any elements that aren't residual style tags. These
1426 // are basically just being closed up. Example:
1427 // <font><span>Moo<p>Goo</font></p>.
1428 // In the above example, the <span> doesn't need to be reopened. It can just close.
1429 HTMLStackElem* currElem = maxElem->next;
1430 HTMLStackElem* prevElem = maxElem;
1431 while (currElem != elem) {
1432 HTMLStackElem* nextElem = currElem->next;
1433 if (!isResidualStyleTag(currElem->id)) {
1434 prevElem->next = nextElem;
1435 prevElem->setNode(currElem->node);
1436 delete currElem;
1438 else
1439 prevElem = currElem;
1440 currElem = nextElem;
1443 // We have to reopen residual tags in between maxElem and elem. An example of this case s:
1444 // <font><i>Moo<p>Foo</font>.
1445 // In this case, we need to transform the part before the <p> into:
1446 // <font><i>Moo</i></font><i>
1447 // so that the <i> will remain open. This involves the modification of elements
1448 // in the block stack.
1449 // This will also affect how we ultimately reparent the block, since we want it to end up
1450 // under the reopened residual tags (e.g., the <i> in the above example.)
1451 RefPtr<NodeImpl> prevNode = 0;
1452 RefPtr<NodeImpl> currNode = 0;
1453 currElem = maxElem;
1454 while (currElem->node != residualElem) {
1455 if (isResidualStyleTag(currElem->node->id())) {
1456 // Create a clone of this element.
1457 currNode = currElem->node->cloneNode(false);
1458 currElem->node->close();
1459 removeForbidden(currElem->id, forbiddenTag);
1461 // Change the stack element's node to point to the clone.
1462 currElem->setNode(currNode.get());
1464 // Attach the previous node as a child of this new node.
1465 if (prevNode)
1466 currNode->appendChild(prevNode.get(), exceptionCode);
1467 else // The new parent for the block element is going to be the innermost clone.
1468 parentElem = currNode;
1470 prevNode = currNode;
1473 currElem = currElem->next;
1476 // Now append the chain of new residual style elements if one exists.
1477 if (prevNode)
1478 elem->node->appendChild(prevNode.get(), exceptionCode);
1481 // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1482 // All content of |blockElem| is reparented to be under this clone. We then
1483 // reparent |blockElem| using real DOM calls so that attachment/detachment will
1484 // be performed to fix up the rendering tree.
1485 // So for this example: <b>...<p>Foo</b>Goo</p>
1486 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1488 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1489 SharedPtr<NodeImpl> guard(blockElem);
1490 blockElem->parentNode()->removeChild(blockElem, exceptionCode);
1492 if (!advancedResidual) {
1493 // Step 2: Clone |residualElem|.
1494 RefPtr<NodeImpl> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1496 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
1497 // before we've put |newElem| into the document. That way we'll only do one attachment of all
1498 // the new content (instead of a bunch of individual attachments).
1499 NodeImpl* currNode = blockElem->firstChild();
1500 while (currNode) {
1501 NodeImpl* nextNode = currNode->nextSibling();
1502 SharedPtr<NodeImpl> guard(currNode); //Protect from deletion while moving
1503 blockElem->removeChild(currNode, exceptionCode);
1504 newNode->appendChild(currNode, exceptionCode);
1505 currNode = nextNode;
1507 // TODO - To be replaced.
1508 // Re-register form elements with currently active form, step 1 will have removed them
1509 if (form && currNode && currNode->isGenericFormElement())
1511 HTMLGenericFormElementImpl *e = static_cast<HTMLGenericFormElementImpl *>(currNode);
1512 form->registerFormElement(e);
1516 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
1517 // attachment can occur yet.
1518 blockElem->appendChild(newNode.get(), exceptionCode);
1521 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
1522 parentElem->appendChild(blockElem, exceptionCode);
1524 // Step 6: Elide |elem|, since it is effectively no longer open. Also update
1525 // the node associated with the previous stack element so that when it gets popped,
1526 // it doesn't make the residual element the next current node.
1527 HTMLStackElem* currElem = maxElem;
1528 HTMLStackElem* prevElem = 0;
1529 while (currElem != elem) {
1530 prevElem = currElem;
1531 currElem = currElem->next;
1533 prevElem->next = elem->next;
1534 prevElem->setNode(elem->node);
1535 delete elem;
1537 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1538 // In the above example, Goo should stay italic.
1539 curr = blockStack;
1540 HTMLStackElem* residualStyleStack = 0;
1541 while (curr && curr != endElem) {
1542 // We will actually schedule this tag for reopening
1543 // after we complete the close of this entire block.
1544 NodeImpl* currNode = current;
1545 if (isResidualStyleTag(curr->id)) {
1546 // We've overloaded the use of stack elements and are just reusing the
1547 // struct with a slightly different meaning to the variables. Instead of chaining
1548 // from innermost to outermost, we build up a list of all the tags we need to reopen
1549 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1550 // to the outermost tag we need to reopen.
1551 // We also set curr->node to be the actual element that corresponds to the ID stored in
1552 // curr->id rather than the node that you should pop to when the element gets pulled off
1553 // the stack.
1554 popOneBlock(false);
1555 curr->setNode(currNode);
1556 curr->next = residualStyleStack;
1557 residualStyleStack = curr;
1559 else
1560 popOneBlock();
1562 curr = blockStack;
1565 reopenResidualStyleTags(residualStyleStack, 0); // FIXME: Deal with stray table content some day
1566 // if it becomes necessary to do so.
1569 void KHTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, DOM::NodeImpl* malformedTableParent)
1571 // Loop for each tag that needs to be reopened.
1572 while (elem) {
1573 // Create a shallow clone of the DOM node for this element.
1574 RefPtr<NodeImpl> newNode = elem->node->cloneNode(false);
1576 // Append the new node. In the malformed table case, we need to insert before the table,
1577 // which will be the last child.
1578 int exceptionCode = 0;
1579 if (malformedTableParent)
1580 malformedTableParent->insertBefore(newNode.get(), malformedTableParent->lastChild(), exceptionCode);
1581 else
1582 current->appendChild(newNode.get(), exceptionCode);
1583 // FIXME: Is it really OK to ignore the exceptions here?
1585 // Now push a new stack element for this node we just created.
1586 pushBlock(elem->id, elem->level);
1588 // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1589 // that it is inside a malformed table.
1590 blockStack->strayTableContent = malformedTableParent != 0;
1591 if (blockStack->strayTableContent)
1592 inStrayTableContent++;
1594 // Clear our malformed table parent variable.
1595 malformedTableParent = 0;
1597 // Update |current| manually to point to the new node.
1598 setCurrent(newNode.get());
1600 // Advance to the next tag that needs to be reopened.
1601 HTMLStackElem* next = elem->next;
1602 delete elem;
1603 elem = next;
1607 void KHTMLParser::pushBlock(int _id, int _level)
1609 HTMLStackElem *Elem = new HTMLStackElem(_id, _level, current, m_inline, blockStack);
1611 blockStack = Elem;
1612 addForbidden(_id, forbiddenTag);
1615 void KHTMLParser::generateImpliedEndTags( int _id )
1617 HTMLStackElem *Elem = blockStack;
1619 int level = tagPriority(_id);
1620 while( Elem && Elem->id != _id)
1622 HTMLStackElem *NextElem = Elem->next;
1623 if (endTagRequirement(Elem->id) == DOM::OPTIONAL && Elem->level <= level) {
1624 popOneBlock();
1626 else
1627 break;
1628 Elem = NextElem;
1632 void KHTMLParser::popOptionalBlock( int _id )
1634 bool found = false;
1635 HTMLStackElem *Elem = blockStack;
1637 int level = tagPriority(_id);
1638 while( Elem )
1640 if (Elem->id == _id) {
1641 found = true;
1642 break;
1644 if (Elem->level > level || (endTagRequirement(Elem->id) != DOM::OPTIONAL && !isResidualStyleTag(Elem->id)) )
1645 break;
1646 Elem = Elem->next;
1649 if (found) {
1650 generateImpliedEndTags(_id);
1651 popBlock(_id);
1655 void KHTMLParser::popBlock( int _id )
1657 HTMLStackElem *Elem = blockStack;
1658 int maxLevel = 0;
1660 #ifdef PARSER_DEBUG
1661 kDebug( 6035 ) << "popBlock(" << getParserPrintableName(_id) << ")";
1662 while(Elem) {
1663 kDebug( 6035) << " > " << getParserPrintableName(Elem->id);
1664 Elem = Elem->next;
1666 Elem = blockStack;
1667 #endif
1669 while( Elem && (Elem->id != _id))
1671 if (maxLevel < Elem->level)
1673 maxLevel = Elem->level;
1675 Elem = Elem->next;
1677 if (!Elem)
1678 return;
1680 if (maxLevel > Elem->level) {
1681 // We didn't match because the tag is in a different scope, e.g.,
1682 // <b><p>Foo</b>. Try to correct the problem.
1683 if (!isResidualStyleTag(_id))
1684 return;
1685 return handleResidualStyleCloseTagAcrossBlocks(Elem);
1688 bool isAffectedByStyle = isAffectedByResidualStyle(Elem->id);
1689 HTMLStackElem* residualStyleStack = 0;
1690 NodeImpl* malformedTableParent = 0;
1692 Elem = blockStack;
1694 while (Elem)
1696 if (Elem->id == _id)
1698 int strayTable = inStrayTableContent;
1699 popOneBlock();
1700 Elem = 0;
1702 // This element was the root of some malformed content just inside an implicit or
1703 // explicit <tbody> or <tr>.
1704 // If we end up needing to reopen residual style tags, the root of the reopened chain
1705 // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1706 if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
1707 NodeImpl* curr = current;
1708 while (curr && curr->id() != ID_TABLE)
1709 curr = curr->parentNode();
1710 malformedTableParent = curr ? curr->parentNode() : 0;
1713 else
1715 // Schedule this tag for reopening
1716 // after we complete the close of this entire block.
1717 NodeImpl* currNode = current;
1718 if (isAffectedByStyle && isResidualStyleTag(Elem->id)) {
1719 // We've overloaded the use of stack elements and are just reusing the
1720 // struct with a slightly different meaning to the variables. Instead of chaining
1721 // from innermost to outermost, we build up a list of all the tags we need to reopen
1722 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1723 // to the outermost tag we need to reopen.
1724 // We also set Elem->node to be the actual element that corresponds to the ID stored in
1725 // Elem->id rather than the node that you should pop to when the element gets pulled off
1726 // the stack.
1727 popOneBlock(false);
1728 Elem->next = residualStyleStack;
1729 Elem->setNode(currNode);
1730 residualStyleStack = Elem;
1732 else
1733 popOneBlock();
1734 Elem = blockStack;
1738 reopenResidualStyleTags(residualStyleStack, malformedTableParent);
1741 void KHTMLParser::popOneBlock(bool delBlock)
1743 HTMLStackElem *Elem = blockStack;
1745 // we should never get here, but some bad html might cause it.
1746 #ifndef PARSER_DEBUG
1747 if(!Elem) return;
1748 #else
1749 kDebug( 6035 ) << "popping block: " << getParserPrintableName(Elem->id) << "(" << Elem->id << ")";
1750 #endif
1752 #if SPEED_DEBUG < 1
1753 if((Elem->node != current)) {
1754 if (current->maintainsState() && document){
1755 document->registerMaintainsState(current);
1756 document->attemptRestoreState(current);
1758 current->close();
1760 #endif
1762 removeForbidden(Elem->id, forbiddenTag);
1764 blockStack = Elem->next;
1765 // we only set inline to false, if the element we close is a block level element.
1766 // This helps getting cases as <p><b>bla</b> <b>bla</b> right.
1768 m_inline = Elem->m_inline;
1770 if (current->id() == ID_FORM && form && inStrayTableContent)
1771 form->setMalformed(true);
1773 setCurrent( Elem->node );
1775 if (Elem->strayTableContent)
1776 inStrayTableContent--;
1778 if (delBlock)
1779 delete Elem;
1782 void KHTMLParser::popInlineBlocks()
1784 while(blockStack && current->isInline() && current->id() != ID_FONT)
1785 popOneBlock();
1788 void KHTMLParser::freeBlock()
1790 while (blockStack)
1791 popOneBlock();
1792 blockStack = 0;
1795 void KHTMLParser::createHead()
1797 if(head || !doc()->documentElement())
1798 return;
1800 head = new HTMLHeadElementImpl(document);
1801 HTMLElementImpl *body = doc()->body();
1802 int exceptioncode = 0;
1803 doc()->documentElement()->insertBefore(head, body, exceptioncode);
1804 if ( exceptioncode ) {
1805 #ifdef PARSER_DEBUG
1806 kDebug( 6035 ) << "creation of head failed!!!!:" << exceptioncode;
1807 #endif
1808 delete head;
1809 head = 0;
1812 // If the body does not exist yet, then the <head> should be pushed as the current block.
1813 if (head && !body) {
1814 pushBlock(head->id(), tagPriority(head->id()));
1815 setCurrent(head);
1819 NodeImpl *KHTMLParser::handleIsindex( Token *t )
1821 NodeImpl *n;
1822 HTMLFormElementImpl *myform = form;
1823 if ( !myform ) {
1824 myform = new HTMLFormElementImpl(document, true);
1825 n = myform;
1826 } else
1827 n = new HTMLDivElementImpl( document, ID_DIV );
1828 NodeImpl *child = new HTMLHRElementImpl( document );
1829 n->addChild( child );
1830 DOMStringImpl* a = t->attrs ? t->attrs->getValue(ATTR_PROMPT) : 0;
1831 DOMString text = i18n("This is a searchable index. Enter search keywords: ");
1832 if (a)
1833 text = a;
1834 child = new TextImpl(document, text.implementation());
1835 n->addChild( child );
1836 child = new HTMLIsIndexElementImpl(document, myform);
1837 static_cast<ElementImpl *>(child)->setAttribute(ATTR_TYPE, "khtml_isindex");
1838 n->addChild( child );
1839 child = new HTMLHRElementImpl( document );
1840 n->addChild( child );
1842 return n;
1845 void KHTMLParser::startBody()
1847 if(inBody) return;
1849 inBody = true;
1851 if( isindex ) {
1852 insertNode( isindex, true /* don't decend into this node */ );
1853 isindex = 0;