fix logic
[personal-kdelibs.git] / khtml / html / htmlparser.cpp
blobd55326352d4370a2efcd1fcb682654139b74591c
1 /*
2 This file is part of the KDE libraries
4 Copyright (C) 1997 Martin Jones (mjones@kde.org)
5 (C) 1997 Torben Weis (weis@kde.org)
6 (C) 1999,2001 Lars Knoll (knoll@kde.org)
7 (C) 2000,2001 Dirk Mueller (mueller@kde.org)
8 (C) 2003 Apple Computer, Inc.
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Library General Public
12 License as published by the Free Software Foundation; either
13 version 2 of the License, or (at your option) any later version.
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Library General Public License for more details.
20 You should have received a copy of the GNU Library General Public License
21 along with this library; see the file COPYING.LIB. If not, write to
22 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 Boston, MA 02110-1301, USA.
25 //----------------------------------------------------------------------------
27 // KDE HTML Widget -- HTML Parser
28 // #define PARSER_DEBUG
30 #include "htmlparser.h"
32 #include <dom/dom_exception.h>
34 #include <html/html_baseimpl.h>
35 #include <html/html_blockimpl.h>
36 #include <html/html_canvasimpl.h>
37 #include <html/html_documentimpl.h>
38 #include <html/html_elementimpl.h>
39 #include <html/html_formimpl.h>
40 #include <html/html_headimpl.h>
41 #include <html/html_imageimpl.h>
42 #include <html/html_inlineimpl.h>
43 #include <html/html_listimpl.h>
44 #include <html/html_miscimpl.h>
45 #include <html/html_tableimpl.h>
46 #include <html/html_objectimpl.h>
47 #include <html/HTMLAudioElement.h>
48 #include <html/HTMLVideoElement.h>
49 #include <html/HTMLSourceElement.h>
50 #include <xml/dom_textimpl.h>
51 #include <xml/dom_nodeimpl.h>
52 #include <misc/htmlhashes.h>
53 #include <html/htmltokenizer.h>
54 #include <khtmlview.h>
55 #include <khtml_part.h>
56 #include <khtml_global.h>
57 #include <css/cssproperties.h>
58 #include <css/cssvalues.h>
59 #include <css/csshelper.h>
61 #include <rendering/render_object.h>
63 #include <kdebug.h>
64 #include <klocale.h>
66 // Turn off inlining to avoid warning with newer gcc.
67 #undef __inline
68 #define __inline
69 #include "doctypes.cpp"
70 #undef __inline
72 using namespace DOM;
73 using namespace khtml;
75 #ifdef PARSER_DEBUG
76 static QString getParserPrintableName(int id)
78 if (id >= ID_CLOSE_TAG)
79 return "/" + getPrintableName(id - ID_CLOSE_TAG);
80 else
81 return getPrintableName(id);
83 #endif
85 //----------------------------------------------------------------------------
87 /**
88 * @internal
90 class HTMLStackElem
92 public:
93 HTMLStackElem( int _id,
94 int _level,
95 DOM::NodeImpl *_node,
96 bool _inline_,
97 HTMLStackElem * _next )
99 id(_id),
100 level(_level),
101 strayTableContent(false),
102 m_inline(_inline_),
103 node(_node),
104 next(_next)
105 { node->ref(); }
107 ~HTMLStackElem()
108 { node->deref(); }
110 void setNode(NodeImpl* newNode)
112 newNode->ref();
113 node->deref();
114 node = newNode;
117 int id;
118 int level;
119 bool strayTableContent;
120 bool m_inline;
121 NodeImpl *node;
122 HTMLStackElem *next;
126 * @internal
128 * The parser parses tokenized input into the document, building up the
129 * document tree. If the document is wellformed, parsing it is
130 * straightforward.
131 * Unfortunately, people can't write wellformed HTML documents, so the parser
132 * has to be tolerant about errors.
134 * We have to take care of the following error conditions:
135 * 1. The element being added is explicitly forbidden inside some outer tag.
136 * In this case we should close all tags up to the one, which forbids
137 * the element, and add it afterwards.
138 * 2. We are not allowed to add the element directly. It could be, that
139 * the person writing the document forgot some tag inbetween (or that the
140 * tag inbetween is optional...) This could be the case with the following
141 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?)
142 * 3. We wan't to add a block element inside to an inline element. Close all
143 * inline elements up to the next higher block element.
144 * 4. If this doesn't help close elements, until we are allowed to add the
145 * element or ignore the tag.
149 KHTMLParser::KHTMLParser( KHTMLView *_parent, DocumentImpl *doc)
151 //kDebug( 6035 ) << "parser constructor";
152 #if SPEED_DEBUG > 0
153 qt.start();
154 #endif
156 HTMLWidget = _parent;
157 document = doc;
159 blockStack = 0;
160 current = 0;
162 // ID_CLOSE_TAG == Num of tags
163 forbiddenTag = new ushort[ID_CLOSE_TAG+1];
165 reset();
168 KHTMLParser::KHTMLParser( DOM::DocumentFragmentImpl *i, DocumentImpl *doc )
170 HTMLWidget = 0;
171 document = doc;
173 forbiddenTag = new ushort[ID_CLOSE_TAG+1];
175 blockStack = 0;
176 current = 0;
178 reset();
180 setCurrent(i);
182 inBody = true;
185 KHTMLParser::~KHTMLParser()
187 #if SPEED_DEBUG > 0
188 kDebug( ) << "TIME: parsing time was = " << qt.elapsed();
189 #endif
191 freeBlock();
193 if (current) current->deref();
195 delete [] forbiddenTag;
196 delete isindex;
199 void KHTMLParser::reset()
201 setCurrent ( document );
203 freeBlock();
205 // before parsing no tags are forbidden...
206 memset(forbiddenTag, 0, (ID_CLOSE_TAG+1)*sizeof(ushort));
208 inBody = false;
209 haveFrameSet = false;
210 haveContent = false;
211 haveBody = false;
212 haveTitle = false;
213 inSelect = false;
214 inStrayTableContent = 0;
215 m_inline = false;
217 form = 0;
218 map = 0;
219 head = 0;
220 end = false;
221 isindex = 0;
223 discard_until = 0;
226 void KHTMLParser::parseToken(Token *t)
228 if (t->tid > 2*ID_CLOSE_TAG)
230 kDebug( 6035 ) << "Unknown tag!! tagID = " << t->tid;
231 return;
233 if(discard_until) {
234 if(t->tid == discard_until)
235 discard_until = 0;
237 // do not skip </iframe>
238 if ( discard_until || current->id() + ID_CLOSE_TAG != t->tid )
239 return;
242 #ifdef PARSER_DEBUG
243 kDebug( 6035 ) << "\n\n==> parser: processing token " << getParserPrintableName(t->tid) << "(" << t->tid << ")"
244 << " current = " << getParserPrintableName(current->id()) << "(" << current->id() << ")" << endl;
245 kDebug(6035) << "inline=" << m_inline << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << " haveContent=" << haveContent;
246 #endif
248 // holy shit. apparently some sites use </br> instead of <br>
249 // be compatible with IE and NS
250 if(t->tid == ID_BR+ID_CLOSE_TAG && document->inCompatMode())
251 t->tid -= ID_CLOSE_TAG;
253 if(t->tid > ID_CLOSE_TAG)
255 processCloseTag(t);
256 return;
259 // ignore spaces, if we're not inside a paragraph or other inline code
260 if( t->tid == ID_TEXT && t->text ) {
261 if(inBody && !skipMode() &&
262 current->id() != ID_STYLE && current->id() != ID_TITLE &&
263 current->id() != ID_SCRIPT &&
264 !t->text->containsOnlyWhitespace()) haveContent = true;
265 #ifdef PARSER_DEBUG
267 kDebug(6035) << "length="<< t->text->l << " text='" << QString::fromRawData(t->text->s, t->text->l) << "'";
268 #endif
271 NodeImpl *n = getElement(t);
272 // just to be sure, and to catch currently unimplemented stuff
273 if(!n)
274 return;
276 // set attributes
277 if(n->isElementNode() && t->tid != ID_ISINDEX)
279 ElementImpl *e = static_cast<ElementImpl *>(n);
280 e->setAttributeMap(t->attrs);
283 // if this tag is forbidden inside the current context, pop
284 // blocks until we are allowed to add it...
285 while(blockStack && forbiddenTag[t->tid]) {
286 #ifdef PARSER_DEBUG
287 kDebug( 6035 ) << "t->id: " << t->tid << " is forbidden :-( ";
288 #endif
289 popOneBlock();
292 // sometimes flat doesn't make sense
293 switch(t->tid) {
294 case ID_SELECT:
295 case ID_OPTION:
296 t->flat = false;
299 // the tokenizer needs the feedback for space discarding
300 if ( tagPriority(t->tid) == 0 )
301 t->flat = true;
303 if ( !insertNode(n, t->flat) ) {
304 // we couldn't insert the node...
305 #ifdef PARSER_DEBUG
306 kDebug( 6035 ) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!";
307 #endif
308 if (map == n)
310 #ifdef PARSER_DEBUG
311 kDebug( 6035 ) << " --> resetting map!";
312 #endif
313 map = 0;
315 if (form == n)
317 #ifdef PARSER_DEBUG
318 kDebug( 6035 ) << " --> resetting form!";
319 #endif
320 form = 0;
322 delete n;
326 void KHTMLParser::parseDoctypeToken(DoctypeToken* t)
328 // Ignore any doctype after the first. TODO It should be also ignored when processing DocumentFragment
329 if (current != document || document->doctype())
330 return;
332 DocumentTypeImpl* doctype = new DocumentTypeImpl(document->implementation(), document, t->name, t->publicID, t->systemID);
333 if (!t->internalSubset.isEmpty())
334 doctype->setInternalSubset(t->internalSubset);
335 document->addChild(doctype);
337 // Determine parse mode here
338 // This code more or less mimics Mozilla's implementation.
340 // There are three possible parse modes:
341 // COMPAT - quirks mode emulates WinIE
342 // and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
343 // be omitted from numbers.
344 // ALMOST STRICT - This mode is identical to strict mode
345 // except for its treatment of line-height in the inline box model. For
346 // now (until the inline box model is re-written), this mode is identical
347 // to STANDARDS mode.
348 // STRICT - no quirks apply. Web pages will obey the specifications to
349 // the letter.
351 if (!document->isHTMLDocument()) // FIXME Could document be non-HTML?
352 return;
353 DOM::HTMLDocumentImpl* htmldoc = static_cast<DOM::HTMLDocumentImpl*> (document);
354 if (t->name.toLower() == "html") {
355 if (!t->internalSubset.isEmpty() || t->publicID.isEmpty()) {
356 // Internal subsets always denote full standards, as does
357 // a doctype without a public ID.
358 htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4);
359 } else {
360 // We have to check a list of public IDs to see what we
361 // should do.
362 QString lowerPubID = t->publicID.toLower();
363 QByteArray pubIDStr = lowerPubID.toLocal8Bit();
365 // Look up the entry in our gperf-generated table.
366 const PubIDInfo* doctypeEntry = findDoctypeEntry(pubIDStr.constData(), t->publicID.length());
367 if (!doctypeEntry) {
368 // The DOCTYPE is not in the list. Assume strict mode.
369 // ### Doesn't make any sense, but it's what Mozilla does.
370 htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4);
371 } else {
372 switch ((!t->systemID.isEmpty()) ?
373 doctypeEntry->mode_if_sysid :
374 doctypeEntry->mode_if_no_sysid) {
375 case PubIDInfo::eQuirks3:
376 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3);
377 break;
378 case PubIDInfo::eQuirks:
379 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html4);
380 break;
381 case PubIDInfo::eAlmostStandards:
382 htmldoc->changeModes(DOM::DocumentImpl::Transitional, DOM::DocumentImpl::Html4);
383 break;
384 default:
385 assert(!"Unknown parse mode");
389 } else {
390 // Malformed doctype implies quirks mode.
391 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3);
395 static bool isTableRelatedTag(int id)
397 return (id == ID_TR || id == ID_TD || id == ID_TABLE || id == ID_TBODY || id == ID_TFOOT || id == ID_THEAD ||
398 id == ID_TH);
401 bool KHTMLParser::insertNode(NodeImpl *n, bool flat)
403 int id = n->id();
405 // <table> is never allowed inside stray table content. Always pop out of the stray table content
406 // and close up the first table, and then start the second table as a sibling.
407 if (inStrayTableContent && id == ID_TABLE)
408 popBlock(ID_TABLE);
410 // let's be stupid and just try to insert it.
411 // this should work if the document is wellformed
412 #ifdef PARSER_DEBUG
413 NodeImpl *tmp = current;
414 #endif
415 NodeImpl *newNode = current->addChild(n);
416 if ( newNode ) {
417 #ifdef PARSER_DEBUG
418 kDebug( 6035 ) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string();
419 #endif
420 // We allow TABLE > FORM in dtd.cpp, but do not allow the form have children in this case
421 if (current->id() == ID_TABLE && id == ID_FORM) {
422 flat = true;
423 static_cast<HTMLFormElementImpl*>(n)->setMalformed(true);
426 // don't push elements without end tag on the stack
427 if(tagPriority(id) != 0 && !flat) {
428 #if SPEED_DEBUG < 2
429 if(!n->attached() && HTMLWidget )
430 n->attach();
431 #endif
432 if(n->isInline()) m_inline = true;
433 pushBlock(id, tagPriority(id));
434 setCurrent( newNode );
435 } else {
436 #if SPEED_DEBUG < 2
437 if(!n->attached() && HTMLWidget)
438 n->attach();
439 if (n->maintainsState()) {
440 document->registerMaintainsState(n);
441 document->attemptRestoreState(n);
443 n->close();
444 #endif
445 if(n->isInline()) m_inline = true;
449 #if SPEED_DEBUG < 1
450 if(tagPriority(id) == 0 && n->renderer())
451 n->renderer()->calcMinMaxWidth();
452 #endif
453 return true;
454 } else {
455 #ifdef PARSER_DEBUG
456 kDebug( 6035 ) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string();
457 #endif
458 // error handling...
459 HTMLElementImpl *e;
460 bool handled = false;
462 // first switch on current element for elements with optional end-tag and inline-only content
463 switch(current->id())
465 case ID_P:
466 case ID_DT:
467 if(!n->isInline())
469 popBlock(current->id());
470 return insertNode(n);
472 break;
473 default:
474 break;
477 // switch according to the element to insert
478 switch(id)
480 case ID_TR:
481 case ID_TH:
482 case ID_TD:
483 if (inStrayTableContent && !isTableRelatedTag(current->id())) {
484 // pop out to the nearest enclosing table-related tag.
485 while (blockStack && !isTableRelatedTag(current->id()))
486 popOneBlock();
487 return insertNode(n);
489 break;
490 case ID_HEAD:
491 // ### allow not having <HTML> in at all, as per HTML spec
492 if (!current->isDocumentNode() && current->id() != ID_HTML )
493 return false;
494 break;
495 case ID_COMMENT:
496 if( head )
497 break;
498 case ID_META:
499 case ID_LINK:
500 case ID_ISINDEX:
501 case ID_BASE:
502 if( !head )
503 createHead();
504 if( head ) {
505 if ( head->addChild(n) ) {
506 #if SPEED_DEBUG < 2
507 if(!n->attached() && HTMLWidget)
508 n->attach();
509 #endif
512 return true;
515 break;
516 case ID_HTML:
517 if (!current->isDocumentNode() ) {
518 if ( doc()->documentElement()->id() == ID_HTML) {
519 // we have another <HTML> element.... apply attributes to existing one
520 // make sure we don't overwrite already existing attributes
521 NamedAttrMapImpl *map = static_cast<ElementImpl*>(n)->attributes(true);
522 NamedAttrMapImpl *bmap = static_cast<ElementImpl*>(doc()->documentElement())->attributes(false);
523 bool changed = false;
524 for (unsigned long l = 0; map && l < map->length(); ++l) {
525 NodeImpl::Id attrId = map->idAt(l);
526 DOMStringImpl *attrValue = map->valueAt(l);
527 changed = !bmap->getValue(attrId);
528 bmap->setValue(attrId,attrValue);
530 if ( changed )
531 doc()->recalcStyle( NodeImpl::Inherit );
533 return false;
535 break;
536 case ID_TITLE:
537 case ID_STYLE:
538 if ( !head )
539 createHead();
540 if ( head ) {
541 DOM::NodeImpl *newNode = head->addChild(n);
542 if ( newNode ) {
543 pushBlock(id, tagPriority(id));
544 setCurrent ( newNode );
545 #if SPEED_DEBUG < 2
546 if(!n->attached() && HTMLWidget)
547 n->attach();
548 #endif
549 } else {
550 #ifdef PARSER_DEBUG
551 kDebug( 6035 ) << "adding style before to body failed!!!!";
552 #endif
553 discard_until = ID_STYLE + ID_CLOSE_TAG;
554 return false;
556 return true;
557 } else if(inBody) {
558 discard_until = id + ID_CLOSE_TAG;
559 return false;
561 break;
562 case ID_SCRIPT:
563 // if we failed to insert it, go into skip mode
564 discard_until = id + ID_CLOSE_TAG;
565 break;
566 case ID_BODY:
567 if(inBody && doc()->body()) {
568 // we have another <BODY> element.... apply attributes to existing one
569 // make sure we don't overwrite already existing attributes
570 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
571 NamedAttrMapImpl *map = static_cast<ElementImpl*>(n)->attributes(true);
572 NamedAttrMapImpl *bmap = doc()->body()->attributes(false);
573 bool changed = false;
574 for (unsigned long l = 0; map && l < map->length(); ++l) {
575 NodeImpl::Id attrId = map->idAt(l);
576 DOMStringImpl *attrValue = map->valueAt(l);
577 if ( !bmap->getValue(attrId) ) {
578 bmap->setValue(attrId,attrValue);
579 changed = true;
582 if ( changed )
583 doc()->recalcStyle( NodeImpl::Inherit );
584 } else if ( current->isDocumentNode() )
585 break;
586 return false;
587 break;
589 // the following is a hack to move non rendered elements
590 // outside of tables.
591 // needed for broken constructs like <table><form ...><tr>....
592 case ID_INPUT:
594 ElementImpl *e = static_cast<ElementImpl *>(n);
595 DOMString type = e->getAttribute(ATTR_TYPE);
597 if ( strcasecmp( type, "hidden" ) != 0 )
598 break;
599 // Fall through!
601 case ID_TEXT:
603 // Don't try to fit random white-space anywhere
604 TextImpl *t = static_cast<TextImpl *>(n);
605 if (t->containsOnlyWhitespace())
606 return false;
607 // ignore text inside the following elements.
608 switch(current->id())
610 case ID_SELECT:
611 return false;
612 default:
614 // fall through!!
616 break;
618 case ID_DL:
619 popBlock( ID_DT );
620 if ( current->id() == ID_DL ) {
621 e = new HTMLGenericElementImpl( document, ID_DD );
622 insertNode( e );
623 handled = true;
625 break;
626 case ID_DT:
627 e = new HTMLDListElementImpl(document);
628 if ( insertNode(e) ) {
629 insertNode(n);
630 return true;
632 break;
633 case ID_AREA:
635 if(map)
637 map->addChild(n);
638 #if SPEED_DEBUG < 2
639 if(!n->attached() && HTMLWidget)
640 n->attach();
641 #endif
642 handled = true;
643 return true;
645 else
646 return false;
649 case ID_THEAD:
650 case ID_TBODY:
651 case ID_TFOOT:
652 case ID_CAPTION:
653 case ID_COLGROUP: {
654 if (isTableRelatedTag(current->id())) {
655 while (blockStack && current->id() != ID_TABLE && isTableRelatedTag(current->id()))
656 popOneBlock();
657 return insertNode(n);
660 default:
661 break;
664 // switch on the currently active element
665 switch(current->id())
667 case ID_HTML:
668 switch(id)
670 case ID_SCRIPT:
671 case ID_STYLE:
672 case ID_META:
673 case ID_LINK:
674 case ID_OBJECT:
675 case ID_EMBED:
676 case ID_TITLE:
677 case ID_ISINDEX:
678 case ID_BASE:
679 if(!head) {
680 head = new HTMLHeadElementImpl(document);
681 e = head;
682 insertNode(e);
683 handled = true;
685 break;
686 case ID_TEXT: {
687 TextImpl *t = static_cast<TextImpl *>(n);
688 if (t->containsOnlyWhitespace())
689 return false;
690 /* Fall through to default */
692 default:
693 if ( haveFrameSet ) break;
694 e = new HTMLBodyElementImpl(document);
695 startBody();
696 insertNode(e);
697 handled = true;
698 break;
700 break;
701 case ID_HEAD:
702 // we can get here only if the element is not allowed in head.
703 if (id == ID_HTML)
704 return false;
705 else {
706 // This means the body starts here...
707 if ( haveFrameSet ) break;
708 popBlock(ID_HEAD);
709 e = new HTMLBodyElementImpl(document);
710 startBody();
711 insertNode(e);
712 handled = true;
714 break;
715 case ID_BODY:
716 break;
717 case ID_CAPTION:
718 // Illegal content in a caption. Close the caption and try again.
719 popBlock(ID_CAPTION);
720 switch( id ) {
721 case ID_THEAD:
722 case ID_TFOOT:
723 case ID_TBODY:
724 case ID_TR:
725 case ID_TD:
726 case ID_TH:
727 return insertNode(n, flat);
729 break;
730 case ID_TABLE:
731 case ID_THEAD:
732 case ID_TFOOT:
733 case ID_TBODY:
734 case ID_TR:
735 switch(id)
737 case ID_TABLE:
738 popBlock(ID_TABLE); // end the table
739 handled = checkChild( current->id(), id, doc()->inStrictMode());
740 break;
741 default:
743 NodeImpl *node = current;
744 NodeImpl *parent = node->parentNode();
745 // A script may have removed the current node's parent from the DOM
746 // http://bugzilla.opendarwin.org/show_bug.cgi?id=7137
747 // FIXME: we should do real recovery here and re-parent with the correct node.
748 if (!parent)
749 return false;
750 NodeImpl *parentparent = parent->parentNode();
752 if (n->isTextNode() ||
753 ( node->id() == ID_TR &&
754 ( parent->id() == ID_THEAD ||
755 parent->id() == ID_TBODY ||
756 parent->id() == ID_TFOOT ) && parentparent->id() == ID_TABLE ) ||
757 ( !checkChild( ID_TR, id ) && ( node->id() == ID_THEAD || node->id() == ID_TBODY || node->id() == ID_TFOOT ) &&
758 parent->id() == ID_TABLE ) )
760 node = (node->id() == ID_TABLE) ? node :
761 ((node->id() == ID_TR ) ? parentparent : parent);
762 NodeImpl *parent = node->parentNode();
763 if (!parent)
764 return false;
765 int exceptioncode = 0;
766 #ifdef PARSER_DEBUG
767 kDebug( 6035 ) << "calling insertBefore(" << n->nodeName().string() << "," << node->nodeName().string() << ")";
768 #endif
769 parent->insertBefore(n, node, exceptioncode);
770 if (exceptioncode) {
771 #ifndef PARSER_DEBUG
772 if (!n->isTextNode())
773 #endif
774 kDebug(6035) << "adding content before table failed..";
775 break;
777 if ( n->isElementNode() && tagPriority(id) != 0 &&
778 !flat && endTagRequirement(id) != DOM::FORBIDDEN ) {
780 pushBlock(id, tagPriority(id));
781 setCurrent ( n );
782 inStrayTableContent++;
783 blockStack->strayTableContent = true;
785 return true;
788 if ( current->id() == ID_TR )
789 e = new HTMLTableCellElementImpl(document, ID_TD);
790 else if ( current->id() == ID_TABLE )
791 e = new HTMLTableSectionElementImpl( document, ID_TBODY, true /* implicit */ );
792 else
793 e = new HTMLTableRowElementImpl( document );
795 insertNode(e);
796 handled = true;
797 break;
798 } // end default
799 } // end switch
800 break;
801 case ID_OBJECT:
802 discard_until = id + ID_CLOSE_TAG;
803 return false;
804 case ID_UL:
805 case ID_OL:
806 case ID_DIR:
807 case ID_MENU:
808 e = new HTMLLIElementImpl(document);
809 e->addCSSProperty(CSS_PROP_LIST_STYLE_TYPE, CSS_VAL_NONE);
810 insertNode(e);
811 handled = true;
812 break;
813 case ID_FORM:
814 popBlock(ID_FORM);
815 handled = true;
816 break;
817 case ID_SELECT:
818 if( n->isInline() )
819 return false;
820 break;
821 case ID_P:
822 case ID_H1:
823 case ID_H2:
824 case ID_H3:
825 case ID_H4:
826 case ID_H5:
827 case ID_H6:
828 if(!n->isInline())
830 popBlock(current->id());
831 handled = true;
833 break;
834 case ID_OPTION:
835 case ID_OPTGROUP:
836 if (id == ID_OPTGROUP)
838 popBlock(current->id());
839 handled = true;
841 else if(id == ID_SELECT)
843 // IE treats a nested select as </select>. Let's do the same
844 popBlock( ID_SELECT );
845 break;
847 break;
848 // head elements in the body should be ignored.
850 case ID_ADDRESS:
851 case ID_COLGROUP:
852 case ID_FONT:
853 popBlock(current->id());
854 handled = true;
855 break;
856 default:
857 if(current->isDocumentNode())
859 DocumentImpl* doc = static_cast<DocumentImpl*>(current);
860 if (!doc->documentElement()) {
861 e = new HTMLHtmlElementImpl(document);
862 insertNode(e);
863 handled = true;
866 else if(current->isInline())
868 popInlineBlocks();
869 handled = true;
873 // if we couldn't handle the error, just rethrow the exception...
874 if(!handled)
876 //kDebug( 6035 ) << "Exception handler failed in HTMLPArser::insertNode()";
877 return false;
880 return insertNode(n);
885 NodeImpl *KHTMLParser::getElement(Token* t)
887 NodeImpl *n = 0;
889 switch(t->tid)
891 case ID_HTML:
892 n = new HTMLHtmlElementImpl(document);
893 break;
894 case ID_HEAD:
895 if(!head && (current->id() == ID_HTML || current->isDocumentNode())) {
896 head = new HTMLHeadElementImpl(document);
897 n = head;
899 break;
900 case ID_BODY:
901 // body no longer allowed if we have a frameset
902 if(haveFrameSet) break;
903 popBlock(ID_HEAD);
904 n = new HTMLBodyElementImpl(document);
905 haveBody = true;
906 startBody();
907 break;
909 // head elements
910 case ID_BASE:
911 n = new HTMLBaseElementImpl(document);
912 break;
913 case ID_LINK:
914 n = new HTMLLinkElementImpl(document);
915 break;
916 case ID_META:
917 n = new HTMLMetaElementImpl(document);
918 break;
919 case ID_STYLE:
920 n = new HTMLStyleElementImpl(document);
921 break;
922 case ID_TITLE:
923 // only one non-empty <title> allowed
924 if (haveTitle) {
925 discard_until = ID_TITLE+ID_CLOSE_TAG;
926 break;
928 n = new HTMLTitleElementImpl(document);
929 // we'll set haveTitle when closing the tag
930 break;
932 // frames
933 case ID_FRAME:
934 n = new HTMLFrameElementImpl(document);
935 break;
936 case ID_FRAMESET:
937 popBlock(ID_HEAD);
938 if ( inBody && !haveFrameSet && !haveContent && !haveBody) {
939 popBlock( ID_BODY );
940 // ### actually for IE document.body returns the now hidden "body" element
941 // we can't implement that behavior now because it could cause too many
942 // regressions and the headaches are not worth the work as long as there is
943 // no site actually relying on that detail (Dirk)
944 if (static_cast<HTMLDocumentImpl*>(document)->body())
945 static_cast<HTMLDocumentImpl*>(document)->body()
946 ->addCSSProperty(CSS_PROP_DISPLAY, CSS_VAL_NONE);
947 inBody = false;
949 if ( (haveBody || haveContent || haveFrameSet) && current->id() == ID_HTML)
950 break;
951 n = new HTMLFrameSetElementImpl(document);
952 haveFrameSet = true;
953 startBody();
954 break;
955 // a bit a special case, since the frame is inlined...
956 case ID_IFRAME:
957 n = new HTMLIFrameElementImpl(document);
958 if (!t->flat) discard_until = ID_IFRAME+ID_CLOSE_TAG;
959 break;
961 // form elements
962 case ID_FORM:
963 // thou shall not nest <form> - NS/IE quirk
964 if (form) break;
965 n = form = new HTMLFormElementImpl(document, false);
966 break;
967 case ID_BUTTON:
968 n = new HTMLButtonElementImpl(document, form);
969 break;
970 case ID_FIELDSET:
971 n = new HTMLFieldSetElementImpl(document, form);
972 break;
973 case ID_INPUT:
974 if ( t->attrs &&
975 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() &&
976 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled() &&
977 !strcasecmp( t->attrs->getValue( ATTR_TYPE ), "image" ) )
979 if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered( doc()->completeURL( khtml::parseURL(t->attrs->getValue(ATTR_SRC)).string() ) ))
980 return 0;
982 n = new HTMLInputElementImpl(document, form);
983 break;
984 case ID_ISINDEX:
985 n = handleIsindex(t);
986 if( !inBody ) {
987 isindex = n;
988 n = 0;
989 } else
990 t->flat = true;
991 break;
992 case ID_KEYGEN:
993 n = new HTMLKeygenElementImpl(document, form);
994 break;
995 case ID_LABEL:
996 n = new HTMLLabelElementImpl(document);
997 break;
998 case ID_LEGEND:
999 n = new HTMLLegendElementImpl(document, form);
1000 break;
1001 case ID_OPTGROUP:
1002 n = new HTMLOptGroupElementImpl(document, form);
1003 break;
1004 case ID_OPTION:
1005 popOptionalBlock(ID_OPTION);
1006 n = new HTMLOptionElementImpl(document, form);
1007 break;
1008 case ID_SELECT:
1009 inSelect = true;
1010 n = new HTMLSelectElementImpl(document, form);
1011 break;
1012 case ID_TEXTAREA:
1013 n = new HTMLTextAreaElementImpl(document, form);
1014 break;
1016 // lists
1017 case ID_DL:
1018 n = new HTMLDListElementImpl(document);
1019 break;
1020 case ID_DD:
1021 popOptionalBlock(ID_DT);
1022 popOptionalBlock(ID_DD);
1023 n = new HTMLGenericElementImpl(document, t->tid);
1024 break;
1025 case ID_DT:
1026 popOptionalBlock(ID_DD);
1027 popOptionalBlock(ID_DT);
1028 n = new HTMLGenericElementImpl(document, t->tid);
1029 break;
1030 case ID_UL:
1032 n = new HTMLUListElementImpl(document);
1033 break;
1035 case ID_OL:
1037 n = new HTMLOListElementImpl(document);
1038 break;
1040 case ID_DIR:
1041 n = new HTMLDirectoryElementImpl(document);
1042 break;
1043 case ID_MENU:
1044 n = new HTMLMenuElementImpl(document);
1045 break;
1046 case ID_LI:
1047 popOptionalBlock(ID_LI);
1048 n = new HTMLLIElementImpl(document);
1049 break;
1050 // formatting elements (block)
1051 case ID_BLOCKQUOTE:
1052 n = new HTMLGenericElementImpl(document, t->tid);
1053 break;
1054 case ID_LAYER:
1055 case ID_ILAYER:
1056 n = new HTMLLayerElementImpl(document, t->tid);
1057 break;
1058 case ID_P:
1059 case ID_DIV:
1060 n = new HTMLDivElementImpl(document, t->tid);
1061 break;
1062 case ID_H1:
1063 case ID_H2:
1064 case ID_H3:
1065 case ID_H4:
1066 case ID_H5:
1067 case ID_H6:
1068 n = new HTMLGenericElementImpl(document, t->tid);
1069 break;
1070 case ID_HR:
1071 n = new HTMLHRElementImpl(document);
1072 break;
1073 case ID_PRE:
1074 case ID_XMP:
1075 case ID_PLAINTEXT:
1076 case ID_LISTING:
1077 n = new HTMLPreElementImpl(document, t->tid);
1078 break;
1080 // font stuff
1081 case ID_BASEFONT:
1082 n = new HTMLBaseFontElementImpl(document);
1083 break;
1084 case ID_FONT:
1085 n = new HTMLFontElementImpl(document);
1086 break;
1088 // ins/del
1089 case ID_DEL:
1090 case ID_INS:
1091 n = new HTMLGenericElementImpl(document, t->tid);
1092 break;
1094 // anchor
1095 case ID_A:
1096 popBlock(ID_A);
1098 n = new HTMLAnchorElementImpl(document);
1099 break;
1101 // images
1102 case ID_IMAGE:
1103 case ID_IMG:
1104 if (t->attrs&&
1105 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled()&&
1106 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled())
1108 QString url = doc()->completeURL( khtml::parseURL(t->attrs->getValue(ATTR_SRC)).string() );
1109 if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url))
1110 return 0;
1112 n = new HTMLImageElementImpl(document, form);
1113 break;
1115 case ID_CANVAS:
1116 n = new HTMLCanvasElementImpl(document);
1117 break;
1119 case ID_MAP:
1120 map = new HTMLMapElementImpl(document);
1121 n = map;
1122 break;
1123 case ID_AREA:
1124 n = new HTMLAreaElementImpl(document);
1125 break;
1127 // objects, applets and scripts
1128 case ID_APPLET:
1129 n = new HTMLAppletElementImpl(document);
1130 break;
1131 case ID_EMBED:
1132 n = new HTMLEmbedElementImpl(document);
1133 break;
1134 case ID_OBJECT:
1135 n = new HTMLObjectElementImpl(document);
1136 break;
1137 case ID_PARAM:
1138 n = new HTMLParamElementImpl(document);
1139 break;
1140 case ID_SCRIPT:
1142 HTMLScriptElementImpl *scriptElement = new HTMLScriptElementImpl(document);
1143 scriptElement->setCreatedByParser(true);
1144 n = scriptElement;
1145 break;
1148 // media
1149 case ID_AUDIO:
1150 n = new HTMLAudioElement(document);
1151 break;
1152 case ID_VIDEO:
1153 n = new HTMLVideoElement(document);
1154 break;
1155 case ID_SOURCE:
1156 n = new HTMLSourceElement(document);
1157 break;
1159 // tables
1160 case ID_TABLE:
1161 n = new HTMLTableElementImpl(document);
1162 break;
1163 case ID_CAPTION:
1164 n = new HTMLTableCaptionElementImpl(document);
1165 break;
1166 case ID_COLGROUP:
1167 case ID_COL:
1168 n = new HTMLTableColElementImpl(document, t->tid);
1169 break;
1170 case ID_TR:
1171 popBlock(ID_TR);
1172 n = new HTMLTableRowElementImpl(document);
1173 break;
1174 case ID_TD:
1175 case ID_TH:
1176 popBlock(ID_TH);
1177 popBlock(ID_TD);
1178 n = new HTMLTableCellElementImpl(document, t->tid);
1179 break;
1180 case ID_TBODY:
1181 case ID_THEAD:
1182 case ID_TFOOT:
1183 popBlock( ID_THEAD );
1184 popBlock( ID_TBODY );
1185 popBlock( ID_TFOOT );
1186 n = new HTMLTableSectionElementImpl(document, t->tid, false);
1187 break;
1189 // inline elements
1190 case ID_BR:
1191 n = new HTMLBRElementImpl(document);
1192 break;
1193 case ID_Q:
1194 n = new HTMLGenericElementImpl(document, t->tid);
1195 break;
1197 // elements with no special representation in the DOM
1199 // block:
1200 case ID_ADDRESS:
1201 case ID_CENTER:
1202 n = new HTMLGenericElementImpl(document, t->tid);
1203 break;
1204 // inline
1205 // %fontstyle
1206 case ID_TT:
1207 case ID_U:
1208 case ID_B:
1209 case ID_I:
1210 case ID_S:
1211 case ID_STRIKE:
1212 case ID_BIG:
1213 case ID_SMALL:
1215 // %phrase
1216 case ID_EM:
1217 case ID_STRONG:
1218 case ID_DFN:
1219 case ID_CODE:
1220 case ID_SAMP:
1221 case ID_KBD:
1222 case ID_VAR:
1223 case ID_CITE:
1224 case ID_ABBR:
1225 case ID_ACRONYM:
1227 // %special
1228 case ID_SUB:
1229 case ID_SUP:
1230 case ID_SPAN:
1231 case ID_WBR:
1232 case ID_NOBR:
1233 if ( t->tid == ID_NOBR || t->tid == ID_WBR )
1234 popOptionalBlock( t->tid );
1235 case ID_BDO:
1236 n = new HTMLGenericElementImpl(document, t->tid);
1237 break;
1239 // these are special, and normally not rendered
1240 case ID_NOEMBED:
1241 if (!t->flat) {
1242 n = new HTMLGenericElementImpl(document, t->tid);
1243 discard_until = ID_NOEMBED + ID_CLOSE_TAG;
1245 return n;
1246 case ID_NOFRAMES:
1247 if (!t->flat) {
1248 n = new HTMLGenericElementImpl(document, t->tid);
1249 discard_until = ID_NOFRAMES + ID_CLOSE_TAG;
1251 return n;
1252 case ID_NOSCRIPT:
1253 if (!t->flat) {
1254 n = new HTMLGenericElementImpl(document, t->tid);
1255 if(HTMLWidget && HTMLWidget->part()->jScriptEnabled())
1256 discard_until = ID_NOSCRIPT + ID_CLOSE_TAG;
1258 return n;
1259 case ID_NOLAYER:
1260 // discard_until = ID_NOLAYER + ID_CLOSE_TAG;
1261 return 0;
1262 break;
1263 case ID_MARQUEE:
1264 n = new HTMLMarqueeElementImpl(document);
1265 break;
1266 // text
1267 case ID_TEXT:
1268 // kDebug(6035) << "ID_TEXT: \"" << DOMString(t->text).string() << "\"";
1269 n = new TextImpl(document, t->text);
1270 break;
1271 case ID_COMMENT:
1272 n = new CommentImpl(document, t->text);
1273 break;
1274 default:
1275 n = new HTMLGenericElementImpl(document, t->tid);
1276 break;
1277 // kDebug( 6035 ) << "Unknown tag " << t->tid << "!";
1279 return n;
1282 void KHTMLParser::processCloseTag(Token *t)
1284 // support for really broken html. Can't believe I'm supporting such crap (lars)
1285 switch(t->tid)
1287 case ID_HTML+ID_CLOSE_TAG:
1288 case ID_BODY+ID_CLOSE_TAG:
1289 // we never trust those close tags, since stupid webpages close
1290 // them prematurely
1291 return;
1292 case ID_FORM+ID_CLOSE_TAG:
1293 form = 0;
1294 // this one is to get the right style on the body element
1295 break;
1296 case ID_MAP+ID_CLOSE_TAG:
1297 map = 0;
1298 break;
1299 case ID_SELECT+ID_CLOSE_TAG:
1300 inSelect = false;
1301 break;
1302 case ID_TITLE+ID_CLOSE_TAG:
1303 // Set haveTitle only if <title> isn't empty
1304 if ( current->firstChild() )
1305 haveTitle = true;
1306 break;
1307 default:
1308 break;
1311 #ifdef PARSER_DEBUG
1312 kDebug( 6035 ) << "added the following children to " << current->nodeName().string();
1313 NodeImpl *child = current->firstChild();
1314 while(child != 0)
1316 kDebug( 6035 ) << " " << child->nodeName().string();
1317 child = child->nextSibling();
1319 #endif
1320 generateImpliedEndTags( t->tid - ID_CLOSE_TAG );
1321 popBlock( t->tid - ID_CLOSE_TAG );
1322 #ifdef PARSER_DEBUG
1323 kDebug( 6035 ) << "closeTag --> current = " << current->nodeName().string();
1324 #endif
1327 bool KHTMLParser::isResidualStyleTag(int _id)
1329 switch (_id) {
1330 case ID_A:
1331 case ID_B:
1332 case ID_BIG:
1333 case ID_EM:
1334 case ID_FONT:
1335 case ID_I:
1336 case ID_NOBR:
1337 case ID_S:
1338 case ID_SMALL:
1339 case ID_STRIKE:
1340 case ID_STRONG:
1341 case ID_TT:
1342 case ID_U:
1343 case ID_DFN:
1344 case ID_CODE:
1345 case ID_SAMP:
1346 case ID_KBD:
1347 case ID_VAR:
1348 case ID_DEL:
1349 case ID_INS:
1350 return true;
1351 default:
1352 return false;
1356 bool KHTMLParser::isAffectedByResidualStyle(int _id)
1358 if (isResidualStyleTag(_id))
1359 return true;
1361 switch (_id) {
1362 case ID_P:
1363 case ID_DIV:
1364 case ID_BLOCKQUOTE:
1365 case ID_ADDRESS:
1366 case ID_H1:
1367 case ID_H2:
1368 case ID_H3:
1369 case ID_H4:
1370 case ID_H5:
1371 case ID_H6:
1372 case ID_CENTER:
1373 case ID_UL:
1374 case ID_OL:
1375 case ID_LI:
1376 case ID_DL:
1377 case ID_DT:
1378 case ID_DD:
1379 case ID_PRE:
1380 case ID_LISTING:
1381 return true;
1382 default:
1383 return false;
1387 void KHTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
1389 // Find the element that crosses over to a higher level.
1390 // ### For now, if there is more than one, we will only make sure we close the residual style.
1391 int exceptionCode = 0;
1392 HTMLStackElem* curr = blockStack;
1393 HTMLStackElem* maxElem = 0;
1394 HTMLStackElem* endElem = 0;
1395 HTMLStackElem* prev = 0;
1396 HTMLStackElem* prevMaxElem = 0;
1397 bool advancedResidual = false; // ### if set we only close the residual style
1398 while (curr && curr != elem) {
1399 if (curr->level > elem->level) {
1400 if (!isAffectedByResidualStyle(curr->id)) return;
1401 if (maxElem) advancedResidual = true;
1402 else
1403 endElem = curr;
1404 maxElem = curr;
1405 prevMaxElem = prev;
1408 prev = curr;
1409 curr = curr->next;
1412 if (!curr || !maxElem ) return;
1414 NodeImpl* residualElem = prev->node;
1415 NodeImpl* blockElem = prevMaxElem ? prevMaxElem->node : current;
1416 RefPtr<NodeImpl> parentElem = elem->node;
1418 // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1419 // FIXME: We should either always allow it or perform an additional fixup instead of
1420 // just bailing here.
1421 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1422 if (!parentElem->childAllowed(blockElem))
1423 return;
1425 if (maxElem->node->parentNode() != elem->node && !advancedResidual) {
1426 // Walk the stack and remove any elements that aren't residual style tags. These
1427 // are basically just being closed up. Example:
1428 // <font><span>Moo<p>Goo</font></p>.
1429 // In the above example, the <span> doesn't need to be reopened. It can just close.
1430 HTMLStackElem* currElem = maxElem->next;
1431 HTMLStackElem* prevElem = maxElem;
1432 while (currElem != elem) {
1433 HTMLStackElem* nextElem = currElem->next;
1434 if (!isResidualStyleTag(currElem->id)) {
1435 prevElem->next = nextElem;
1436 prevElem->setNode(currElem->node);
1437 delete currElem;
1439 else
1440 prevElem = currElem;
1441 currElem = nextElem;
1444 // We have to reopen residual tags in between maxElem and elem. An example of this case s:
1445 // <font><i>Moo<p>Foo</font>.
1446 // In this case, we need to transform the part before the <p> into:
1447 // <font><i>Moo</i></font><i>
1448 // so that the <i> will remain open. This involves the modification of elements
1449 // in the block stack.
1450 // This will also affect how we ultimately reparent the block, since we want it to end up
1451 // under the reopened residual tags (e.g., the <i> in the above example.)
1452 RefPtr<NodeImpl> prevNode = 0;
1453 RefPtr<NodeImpl> currNode = 0;
1454 currElem = maxElem;
1455 while (currElem->node != residualElem) {
1456 if (isResidualStyleTag(currElem->node->id())) {
1457 // Create a clone of this element.
1458 currNode = currElem->node->cloneNode(false);
1459 currElem->node->close();
1460 removeForbidden(currElem->id, forbiddenTag);
1462 // Change the stack element's node to point to the clone.
1463 currElem->setNode(currNode.get());
1465 // Attach the previous node as a child of this new node.
1466 if (prevNode)
1467 currNode->appendChild(prevNode.get(), exceptionCode);
1468 else // The new parent for the block element is going to be the innermost clone.
1469 parentElem = currNode;
1471 prevNode = currNode;
1474 currElem = currElem->next;
1477 // Now append the chain of new residual style elements if one exists.
1478 if (prevNode)
1479 elem->node->appendChild(prevNode.get(), exceptionCode);
1482 // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1483 // All content of |blockElem| is reparented to be under this clone. We then
1484 // reparent |blockElem| using real DOM calls so that attachment/detachment will
1485 // be performed to fix up the rendering tree.
1486 // So for this example: <b>...<p>Foo</b>Goo</p>
1487 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1489 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1490 SharedPtr<NodeImpl> guard(blockElem);
1491 blockElem->parentNode()->removeChild(blockElem, exceptionCode);
1493 if (!advancedResidual) {
1494 // Step 2: Clone |residualElem|.
1495 RefPtr<NodeImpl> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1497 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
1498 // before we've put |newElem| into the document. That way we'll only do one attachment of all
1499 // the new content (instead of a bunch of individual attachments).
1500 NodeImpl* currNode = blockElem->firstChild();
1501 while (currNode) {
1502 NodeImpl* nextNode = currNode->nextSibling();
1503 SharedPtr<NodeImpl> guard(currNode); //Protect from deletion while moving
1504 blockElem->removeChild(currNode, exceptionCode);
1505 newNode->appendChild(currNode, exceptionCode);
1506 currNode = nextNode;
1508 // TODO - To be replaced.
1509 // Re-register form elements with currently active form, step 1 will have removed them
1510 if (form && currNode && currNode->isGenericFormElement())
1512 HTMLGenericFormElementImpl *e = static_cast<HTMLGenericFormElementImpl *>(currNode);
1513 form->registerFormElement(e);
1517 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
1518 // attachment can occur yet.
1519 blockElem->appendChild(newNode.get(), exceptionCode);
1522 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
1523 parentElem->appendChild(blockElem, exceptionCode);
1525 // Step 6: Elide |elem|, since it is effectively no longer open. Also update
1526 // the node associated with the previous stack element so that when it gets popped,
1527 // it doesn't make the residual element the next current node.
1528 HTMLStackElem* currElem = maxElem;
1529 HTMLStackElem* prevElem = 0;
1530 while (currElem != elem) {
1531 prevElem = currElem;
1532 currElem = currElem->next;
1534 prevElem->next = elem->next;
1535 prevElem->setNode(elem->node);
1536 delete elem;
1538 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1539 // In the above example, Goo should stay italic.
1540 curr = blockStack;
1541 HTMLStackElem* residualStyleStack = 0;
1542 while (curr && curr != endElem) {
1543 // We will actually schedule this tag for reopening
1544 // after we complete the close of this entire block.
1545 NodeImpl* currNode = current;
1546 if (isResidualStyleTag(curr->id)) {
1547 // We've overloaded the use of stack elements and are just reusing the
1548 // struct with a slightly different meaning to the variables. Instead of chaining
1549 // from innermost to outermost, we build up a list of all the tags we need to reopen
1550 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1551 // to the outermost tag we need to reopen.
1552 // We also set curr->node to be the actual element that corresponds to the ID stored in
1553 // curr->id rather than the node that you should pop to when the element gets pulled off
1554 // the stack.
1555 popOneBlock(false);
1556 curr->setNode(currNode);
1557 curr->next = residualStyleStack;
1558 residualStyleStack = curr;
1560 else
1561 popOneBlock();
1563 curr = blockStack;
1566 reopenResidualStyleTags(residualStyleStack, 0); // FIXME: Deal with stray table content some day
1567 // if it becomes necessary to do so.
1570 void KHTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, DOM::NodeImpl* malformedTableParent)
1572 // Loop for each tag that needs to be reopened.
1573 while (elem) {
1574 // Create a shallow clone of the DOM node for this element.
1575 RefPtr<NodeImpl> newNode = elem->node->cloneNode(false);
1577 // Append the new node. In the malformed table case, we need to insert before the table,
1578 // which will be the last child.
1579 int exceptionCode = 0;
1580 if (malformedTableParent)
1581 malformedTableParent->insertBefore(newNode.get(), malformedTableParent->lastChild(), exceptionCode);
1582 else
1583 current->appendChild(newNode.get(), exceptionCode);
1584 // FIXME: Is it really OK to ignore the exceptions here?
1586 // Now push a new stack element for this node we just created.
1587 pushBlock(elem->id, elem->level);
1589 // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1590 // that it is inside a malformed table.
1591 blockStack->strayTableContent = malformedTableParent != 0;
1592 if (blockStack->strayTableContent)
1593 inStrayTableContent++;
1595 // Clear our malformed table parent variable.
1596 malformedTableParent = 0;
1598 // Update |current| manually to point to the new node.
1599 setCurrent(newNode.get());
1601 // Advance to the next tag that needs to be reopened.
1602 HTMLStackElem* next = elem->next;
1603 delete elem;
1604 elem = next;
1608 void KHTMLParser::pushBlock(int _id, int _level)
1610 HTMLStackElem *Elem = new HTMLStackElem(_id, _level, current, m_inline, blockStack);
1612 blockStack = Elem;
1613 addForbidden(_id, forbiddenTag);
1616 void KHTMLParser::generateImpliedEndTags( int _id )
1618 HTMLStackElem *Elem = blockStack;
1620 int level = tagPriority(_id);
1621 while( Elem && Elem->id != _id)
1623 HTMLStackElem *NextElem = Elem->next;
1624 if (endTagRequirement(Elem->id) == DOM::OPTIONAL && Elem->level <= level) {
1625 popOneBlock();
1627 else
1628 break;
1629 Elem = NextElem;
1633 void KHTMLParser::popOptionalBlock( int _id )
1635 bool found = false;
1636 HTMLStackElem *Elem = blockStack;
1638 int level = tagPriority(_id);
1639 while( Elem )
1641 if (Elem->id == _id) {
1642 found = true;
1643 break;
1645 if (Elem->level > level || (endTagRequirement(Elem->id) != DOM::OPTIONAL && !isResidualStyleTag(Elem->id)) )
1646 break;
1647 Elem = Elem->next;
1650 if (found) {
1651 generateImpliedEndTags(_id);
1652 popBlock(_id);
1656 void KHTMLParser::popBlock( int _id )
1658 HTMLStackElem *Elem = blockStack;
1659 int maxLevel = 0;
1661 #ifdef PARSER_DEBUG
1662 kDebug( 6035 ) << "popBlock(" << getParserPrintableName(_id) << ")";
1663 while(Elem) {
1664 kDebug( 6035) << " > " << getParserPrintableName(Elem->id);
1665 Elem = Elem->next;
1667 Elem = blockStack;
1668 #endif
1670 while( Elem && (Elem->id != _id))
1672 if (maxLevel < Elem->level)
1674 maxLevel = Elem->level;
1676 Elem = Elem->next;
1678 if (!Elem)
1679 return;
1681 if (maxLevel > Elem->level) {
1682 // We didn't match because the tag is in a different scope, e.g.,
1683 // <b><p>Foo</b>. Try to correct the problem.
1684 if (!isResidualStyleTag(_id))
1685 return;
1686 return handleResidualStyleCloseTagAcrossBlocks(Elem);
1689 bool isAffectedByStyle = isAffectedByResidualStyle(Elem->id);
1690 HTMLStackElem* residualStyleStack = 0;
1691 NodeImpl* malformedTableParent = 0;
1693 Elem = blockStack;
1695 while (Elem)
1697 if (Elem->id == _id)
1699 int strayTable = inStrayTableContent;
1700 popOneBlock();
1701 Elem = 0;
1703 // This element was the root of some malformed content just inside an implicit or
1704 // explicit <tbody> or <tr>.
1705 // If we end up needing to reopen residual style tags, the root of the reopened chain
1706 // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1707 if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
1708 NodeImpl* curr = current;
1709 while (curr && curr->id() != ID_TABLE)
1710 curr = curr->parentNode();
1711 malformedTableParent = curr ? curr->parentNode() : 0;
1714 else
1716 // Schedule this tag for reopening
1717 // after we complete the close of this entire block.
1718 NodeImpl* currNode = current;
1719 if (isAffectedByStyle && isResidualStyleTag(Elem->id)) {
1720 // We've overloaded the use of stack elements and are just reusing the
1721 // struct with a slightly different meaning to the variables. Instead of chaining
1722 // from innermost to outermost, we build up a list of all the tags we need to reopen
1723 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1724 // to the outermost tag we need to reopen.
1725 // We also set Elem->node to be the actual element that corresponds to the ID stored in
1726 // Elem->id rather than the node that you should pop to when the element gets pulled off
1727 // the stack.
1728 popOneBlock(false);
1729 Elem->next = residualStyleStack;
1730 Elem->setNode(currNode);
1731 residualStyleStack = Elem;
1733 else
1734 popOneBlock();
1735 Elem = blockStack;
1739 reopenResidualStyleTags(residualStyleStack, malformedTableParent);
1742 void KHTMLParser::popOneBlock(bool delBlock)
1744 HTMLStackElem *Elem = blockStack;
1746 // we should never get here, but some bad html might cause it.
1747 #ifndef PARSER_DEBUG
1748 if(!Elem) return;
1749 #else
1750 kDebug( 6035 ) << "popping block: " << getParserPrintableName(Elem->id) << "(" << Elem->id << ")";
1751 #endif
1753 #if SPEED_DEBUG < 1
1754 if((Elem->node != current)) {
1755 if (current->maintainsState() && document){
1756 document->registerMaintainsState(current);
1757 document->attemptRestoreState(current);
1759 current->close();
1761 #endif
1763 removeForbidden(Elem->id, forbiddenTag);
1765 blockStack = Elem->next;
1766 // we only set inline to false, if the element we close is a block level element.
1767 // This helps getting cases as <p><b>bla</b> <b>bla</b> right.
1769 m_inline = Elem->m_inline;
1771 if (current->id() == ID_FORM && form && inStrayTableContent)
1772 form->setMalformed(true);
1774 setCurrent( Elem->node );
1776 if (Elem->strayTableContent)
1777 inStrayTableContent--;
1779 if (delBlock)
1780 delete Elem;
1783 void KHTMLParser::popInlineBlocks()
1785 while(blockStack && current->isInline() && current->id() != ID_FONT)
1786 popOneBlock();
1789 void KHTMLParser::freeBlock()
1791 while (blockStack)
1792 popOneBlock();
1793 blockStack = 0;
1796 void KHTMLParser::createHead()
1798 if(head || !doc()->documentElement())
1799 return;
1801 head = new HTMLHeadElementImpl(document);
1802 HTMLElementImpl *body = doc()->body();
1803 int exceptioncode = 0;
1804 doc()->documentElement()->insertBefore(head, body, exceptioncode);
1805 if ( exceptioncode ) {
1806 #ifdef PARSER_DEBUG
1807 kDebug( 6035 ) << "creation of head failed!!!!:" << exceptioncode;
1808 #endif
1809 delete head;
1810 head = 0;
1813 // If the body does not exist yet, then the <head> should be pushed as the current block.
1814 if (head && !body) {
1815 pushBlock(head->id(), tagPriority(head->id()));
1816 setCurrent(head);
1820 NodeImpl *KHTMLParser::handleIsindex( Token *t )
1822 NodeImpl *n;
1823 HTMLFormElementImpl *myform = form;
1824 if ( !myform ) {
1825 myform = new HTMLFormElementImpl(document, true);
1826 n = myform;
1827 } else
1828 n = new HTMLDivElementImpl( document, ID_DIV );
1829 NodeImpl *child = new HTMLHRElementImpl( document );
1830 n->addChild( child );
1831 DOMStringImpl* a = t->attrs ? t->attrs->getValue(ATTR_PROMPT) : 0;
1832 DOMString text = i18n("This is a searchable index. Enter search keywords: ");
1833 if (a)
1834 text = a;
1835 child = new TextImpl(document, text.implementation());
1836 n->addChild( child );
1837 child = new HTMLIsIndexElementImpl(document, myform);
1838 static_cast<ElementImpl *>(child)->setAttribute(ATTR_TYPE, "khtml_isindex");
1839 n->addChild( child );
1840 child = new HTMLHRElementImpl( document );
1841 n->addChild( child );
1843 return n;
1846 void KHTMLParser::startBody()
1848 if(inBody) return;
1850 inBody = true;
1852 if( isindex ) {
1853 insertNode( isindex, true /* don't decend into this node */ );
1854 isindex = 0;