fix logic
[personal-kdelibs.git] / khtml / xml / xml_tokenizer.cpp
blob4a1534cc8fe321e4e3d489c8ecffbf38a9efcd73
1 /**
2 * This file is part of the DOM implementation for KDE.
4 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
5 * Copyright (C) 2003 Apple Computer, Inc.
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
24 #include "xml_tokenizer.h"
25 #include "xml/dom_docimpl.h"
26 #include "xml/dom_textimpl.h"
27 #include "xml/dom_xmlimpl.h"
28 #include "html/html_tableimpl.h"
29 #include "html/html_headimpl.h"
30 #include "rendering/render_object.h"
31 #include "misc/htmltags.h"
32 #include "misc/htmlattrs.h"
33 #include "misc/loader.h"
35 #include "khtmlview.h"
36 #include "khtml_part.h"
37 #include <QtCore/QVariant>
38 #include <kdebug.h>
39 #include <klocale.h>
41 // SVG includes
42 #include "svg/SVGScriptElement.h"
43 #include "svg/XLinkNames.h"
45 using namespace DOM;
46 using namespace khtml;
48 XMLIncrementalSource::XMLIncrementalSource()
49 : QXmlInputSource(), m_pos( 0 ), m_unicode( 0 ),
50 m_finished( false ), m_paused(false)
54 void XMLIncrementalSource::fetchData()
56 //just a dummy to overwrite default behavior
59 QChar XMLIncrementalSource::next()
61 if ( m_finished )
62 return QXmlInputSource::EndOfDocument;
63 else if (m_paused || m_data.length() <= m_pos)
64 return QXmlInputSource::EndOfData;
65 else
66 return m_unicode[m_pos++];
69 void XMLIncrementalSource::setData( const QString& str )
71 m_data = str;
72 m_unicode = m_data.unicode();
73 m_pos = 0;
74 if ( !str.isEmpty() )
75 m_finished = false;
77 void XMLIncrementalSource::setData( const QByteArray& data )
79 setData( fromRawData( data, true ) );
82 void XMLIncrementalSource::appendXML( const QString& str )
84 m_data += str;
85 m_unicode = m_data.unicode();
88 QString XMLIncrementalSource::data() const
90 return m_data;
93 void XMLIncrementalSource::setFinished( bool finished )
95 m_finished = finished;
98 XMLHandler::XMLHandler(DocumentImpl *_doc, KHTMLView *_view)
99 : errorLine(0)
101 m_doc = _doc;
102 m_view = _view;
103 pushNode( _doc );
106 XMLHandler::~XMLHandler()
110 void XMLHandler::pushNode( NodeImpl *node )
112 m_nodes.push( node );
115 NodeImpl *XMLHandler::popNode()
117 return m_nodes.pop();
120 NodeImpl *XMLHandler::currentNode() const
122 if ( m_nodes.isEmpty() )
123 return 0;
124 else
125 return m_nodes.top();
128 QString XMLHandler::errorProtocol()
130 return errorProt;
134 bool XMLHandler::startDocument()
136 // at the beginning of parsing: do some initialization
137 errorProt = "";
138 state = StateInit;
140 return true;
143 bool XMLHandler::startPrefixMapping(const QString& prefix, const QString& uri)
145 namespaceInfo[prefix].push(uri);
146 return true;
149 bool XMLHandler::endPrefixMapping(const QString& prefix)
151 QStack<QString>& stack = namespaceInfo[prefix];
152 stack.pop();
153 if (stack.isEmpty())
154 namespaceInfo.remove(prefix);
155 return true;
158 void XMLHandler::fixUpNSURI(QString& uri, const QString& qname)
160 /* QXml does not resolve the namespaces of attributes in the same
161 tag that preceed the xmlns declaration. This fixes up that case */
162 if (uri.isEmpty() && qname.indexOf(':') != -1) {
163 QXmlNamespaceSupport ns;
164 QString localName, prefix;
165 ns.splitName(qname, prefix, localName);
166 if (namespaceInfo.contains(prefix)) {
167 uri = namespaceInfo[prefix].top();
172 bool XMLHandler::startElement( const QString& namespaceURI, const QString& /*localName*/,
173 const QString& qName, const QXmlAttributes& atts )
175 if (currentNode()->nodeType() == Node::TEXT_NODE)
176 exitText();
178 DOMString nsURI;
179 if (!namespaceURI.isNull())
180 nsURI = DOMString(namespaceURI);
181 else
182 // No namespace declared, default to the no namespace
183 nsURI = DOMString("");
184 ElementImpl *newElement = m_doc->createElementNS(nsURI,qName);
185 if (!newElement)
186 return false;
187 int i;
188 for (i = 0; i < atts.length(); i++) {
189 int exceptioncode = 0;
190 QString uriString = atts.uri(i);
191 QString qnString = atts.qName(i);
192 fixUpNSURI(uriString, qnString);
193 DOMString uri(uriString);
194 DOMString qn(qnString);
195 DOMString val(atts.value(i));
196 newElement->setAttributeNS(uri, qn, val, exceptioncode);
197 if (exceptioncode) // exception setting attributes
198 return false;
201 if (newElement->id() == ID_SCRIPT || newElement->id() == makeId(xhtmlNamespace, ID_SCRIPT))
202 static_cast<HTMLScriptElementImpl *>(newElement)->setCreatedByParser(true);
204 //this is tricky. in general the node doesn't have to attach to the one it's in. as far
205 //as standards go this is wrong, but there's literally thousands of documents where
206 //we see <p><ul>...</ul></p>. the following code is there for those cases.
207 //when we can't attach to the currently holding us node we try to attach to its parent
208 bool attached = false;
209 for ( NodeImpl *current = currentNode(); current; current = current->parent() ) {
210 attached = current->addChild( newElement );
211 if ( attached )
212 break;
214 if (attached) {
215 if (m_view && !newElement->attached() && !m_doc->hasPendingSheets())
216 newElement->attach();
217 pushNode( newElement );
218 return true;
220 else {
221 delete newElement;
222 return false;
225 // ### DOM spec states: "if there is no markup inside an element's content, the text is contained in a
226 // single object implementing the Text interface that is the only child of the element."... do we
227 // need to ensure that empty elements always have an empty text child?
231 bool XMLHandler::endElement( const QString& /*namespaceURI*/, const QString& /*localName*/, const QString& /*qName*/ )
233 if (currentNode()->nodeType() == Node::TEXT_NODE)
234 exitText();
236 NodeImpl *node = popNode();
237 if ( node ) {
238 node->close();
239 while ( currentNode() && currentNode()->implicitNode() ) //for the implicit HTMLTableSectionElementImpl
240 popNode()->close();
241 } else
242 return false;
244 // if the node is a script element try to execute it immediately
245 if ((node->id() == ID_SCRIPT) || (node->id() == makeId(xhtmlNamespace, ID_SCRIPT)) || node->id() == WebCore::SVGNames::scriptTag.id())
246 static_cast<XMLTokenizer*>(m_doc->tokenizer())->executeScript(node);
248 return true;
252 bool XMLHandler::startCDATA()
254 if (currentNode()->nodeType() == Node::TEXT_NODE)
255 exitText();
257 NodeImpl *newNode = m_doc->createCDATASection(new DOMStringImpl(""));
258 if (currentNode()->addChild(newNode)) {
259 if (m_view && !newNode->attached() && !m_doc->hasPendingSheets())
260 newNode->attach();
261 pushNode( newNode );
262 return true;
264 else {
265 delete newNode;
266 return false;
271 bool XMLHandler::endCDATA()
273 popNode();
274 Q_ASSERT( currentNode() );
275 return currentNode();
278 bool XMLHandler::characters( const QString& ch )
280 if (currentNode()->nodeType() == Node::TEXT_NODE ||
281 currentNode()->nodeType() == Node::CDATA_SECTION_NODE ||
282 enterText()) {
283 int exceptioncode = 0;
284 static_cast<TextImpl*>(currentNode())->appendData(ch,exceptioncode);
285 if (exceptioncode)
286 return false;
287 return true;
289 else {
290 // Don't worry about white-space violating DTD
291 if (ch.trimmed().isEmpty()) return true;
293 return false;
298 bool XMLHandler::comment(const QString & ch)
300 if (currentNode()->nodeType() == Node::TEXT_NODE)
301 exitText();
302 // ### handle exceptions
303 currentNode()->addChild(m_doc->createComment(new DOMStringImpl(ch.unicode(), ch.length())));
304 return true;
307 bool XMLHandler::processingInstruction(const QString &target, const QString &data)
309 if (currentNode()->nodeType() == Node::TEXT_NODE)
310 exitText();
312 // Ignore XML target -- shouldn't be part of the DOM
313 if (target == "xml")
314 return true;
316 // ### handle exceptions
317 ProcessingInstructionImpl *pi =
318 m_doc->createProcessingInstruction(target, new DOMStringImpl(data.unicode(), data.length()));
319 currentNode()->addChild(pi);
320 pi->checkStyleSheet();
321 return true;
325 QString XMLHandler::errorString() const
327 // ### Make better error-messages
328 return i18n("the document is not in the correct file format");
332 bool XMLHandler::fatalError( const QXmlParseException& exception )
334 errorProt += i18n( "fatal parsing error: %1 in line %2, column %3" ,
335 exception.message() ,
336 exception.lineNumber() ,
337 exception.columnNumber() );
339 errorLine = exception.lineNumber();
340 errorCol = exception.columnNumber();
342 return false;
345 bool XMLHandler::enterText()
347 NodeImpl *newNode = m_doc->createTextNode("");
348 if (currentNode()->addChild(newNode)) {
349 pushNode( newNode );
350 return true;
352 else {
353 delete newNode;
354 return false;
358 void XMLHandler::exitText()
360 if ( m_view && !currentNode()->attached() && !m_doc->hasPendingSheets() )
361 currentNode()->attach();
362 popNode();
365 bool XMLHandler::attributeDecl(const QString &/*eName*/, const QString &/*aName*/, const QString &/*type*/,
366 const QString &/*valueDefault*/, const QString &/*value*/)
368 // qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and
369 // value. When it does, we can store these somewhere and have default attributes on elements
370 return true;
373 bool XMLHandler::externalEntityDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/)
375 // ### insert these too - is there anything special we have to do here?
376 return true;
379 bool XMLHandler::internalEntityDecl(const QString &name, const QString &value)
381 EntityImpl *e = new EntityImpl(m_doc,name);
382 // ### further parse entities inside the value and add them as separate nodes (or entityreferences)?
383 e->addChild(m_doc->createTextNode(new DOMStringImpl(value.unicode(), value.length())));
384 if (m_doc->doctype())
385 static_cast<GenericRONamedNodeMapImpl*>(m_doc->doctype()->entities())->addNode(e);
386 return true;
389 bool XMLHandler::notationDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/)
391 // ### FIXME
392 // if (m_doc->document()->doctype()) {
393 // NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId);
394 // static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->notations())->addNode(n);
395 // }
396 return true;
399 bool XMLHandler::unparsedEntityDecl(const QString &/*name*/, const QString &/*publicId*/,
400 const QString &/*systemId*/, const QString &/*notationName*/)
402 // ###
403 return true;
407 //------------------------------------------------------------------------------
409 XMLTokenizer::XMLTokenizer(DOM::DocumentImpl *_doc, KHTMLView *_view)
410 : m_handler(_doc,_view)
412 m_doc = _doc;
413 m_view = _view;
414 m_cachedScript = 0;
415 m_noErrors = true;
416 m_executingScript = false;
417 m_explicitFinishParsingNeeded = false;
418 m_insideWrite = false;
419 m_reader.setContentHandler( &m_handler );
420 m_reader.setLexicalHandler( &m_handler );
421 m_reader.setErrorHandler( &m_handler );
422 m_reader.setDeclHandler( &m_handler );
423 m_reader.setDTDHandler( &m_handler );
424 m_reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
427 XMLTokenizer::~XMLTokenizer()
429 if (m_cachedScript)
430 m_cachedScript->deref(this);
434 void XMLTokenizer::begin()
436 // parse xml file
437 m_reader.parse( &m_source, true );
440 void XMLTokenizer::write( const TokenizerString &str, bool appendData )
442 if ( !m_noErrors && appendData )
443 return;
444 // check if we try to re-enter inside write()
445 // if so buffer the data
446 if (m_insideWrite) {
447 m_bufferedData.append(str.toString());
448 return;
450 m_insideWrite = true;
452 if ( appendData ) {
453 m_source.appendXML( str.toString() );
455 } else {
456 m_source.setData( str.toString() );
458 m_noErrors = m_reader.parseContinue();
460 // check if while parsing we tried to re-enter write() method so now we have some buffered data we need to write to document
461 while (m_noErrors && !m_bufferedData.isEmpty()) {
462 m_source.appendXML(m_bufferedData);
463 m_bufferedData.clear();
464 m_noErrors = m_reader.parseContinue();
466 // check if we need to call finish explicitly (see XMLTokenizer::finish() comment for details)
467 if (m_explicitFinishParsingNeeded)
468 finish();
469 m_insideWrite = false;
472 void XMLTokenizer::end()
474 m_source.setFinished( true );
475 //if ( m_noErrors )
476 //m_noErrors = m_reader.parseContinue();
477 emit finishedParsing();
480 void XMLTokenizer::finish()
482 if (m_executingScript) {
483 // still executing script, it can happen because of reentrancy, e.g. when we have alert() inside script and we got the rest of the data
484 m_explicitFinishParsingNeeded = true;
485 return;
487 m_source.setFinished( true );
488 if (!m_noErrors) {
489 // An error occurred during parsing of the code. Display an error page to the user (the DOM
490 // tree is created manually and includes an excerpt from the code where the error is located)
492 // ### for multiple error messages, display the code for each (can this happen?)
494 // Clear the document
495 int exceptioncode = 0;
496 while (m_doc->hasChildNodes())
497 static_cast<NodeImpl*>(m_doc)->removeChild(m_doc->firstChild(),exceptioncode);
499 QString line, errorLocPtr;
500 if ( m_handler.errorLine ) {
501 QString xmlCode = m_source.data();
502 QTextStream stream(&xmlCode, QIODevice::ReadOnly);
503 for (unsigned long lineno = 0; lineno < m_handler.errorLine-1; lineno++)
504 stream.readLine();
505 line = stream.readLine();
507 for (unsigned long colno = 0; colno < m_handler.errorCol-1; colno++)
508 errorLocPtr += ' ';
509 errorLocPtr += '^';
512 // Create elements for display
513 DocumentImpl *doc = m_doc;
514 NodeImpl *html = doc->createElementNS(XHTML_NAMESPACE,"html");
515 NodeImpl *body = doc->createElementNS(XHTML_NAMESPACE,"body");
516 NodeImpl *h1 = doc->createElementNS(XHTML_NAMESPACE,"h1");
517 NodeImpl *headingText = doc->createTextNode(i18n("XML parsing error"));
518 NodeImpl *errorText = doc->createTextNode(m_handler.errorProtocol());
519 NodeImpl *hr = 0;
520 NodeImpl *pre = 0;
521 NodeImpl *lineText = 0;
522 NodeImpl *errorLocText = 0;
523 if ( !line.isNull() ) {
524 hr = doc->createElementNS(XHTML_NAMESPACE,"hr");
525 pre = doc->createElementNS(XHTML_NAMESPACE,"pre");
526 lineText = doc->createTextNode(line+'\n');
527 errorLocText = doc->createTextNode(errorLocPtr);
530 // Construct DOM tree. We ignore exceptions as we assume they will not be thrown here (due to the
531 // fact we are using a known tag set)
532 doc->appendChild(html,exceptioncode);
533 html->appendChild(body,exceptioncode);
534 body->appendChild(h1,exceptioncode);
535 h1->appendChild(headingText,exceptioncode);
536 body->appendChild(errorText,exceptioncode);
537 body->appendChild(hr,exceptioncode);
538 body->appendChild(pre,exceptioncode);
539 if ( pre ) {
540 pre->appendChild(lineText,exceptioncode);
541 pre->appendChild(errorLocText,exceptioncode);
544 // Close the renderers so that they update their display correctly
545 // ### this should not be necessary, but requires changes in the rendering code...
546 h1->close();
547 if ( pre ) pre->close();
548 body->close();
550 m_doc->recalcStyle( NodeImpl::Inherit );
551 m_doc->updateRendering();
553 else {
554 // Parsing was successful, all scripts have finished downloading and executing,
555 // calculating the style for the document and close the last element
556 m_doc->updateStyleSelector();
559 // finished parsing, call end()
560 end();
563 void XMLTokenizer::notifyFinished(CachedObject *finishedObj)
565 // This is called when a script has finished loading that was requested from executeScript(). We execute
566 // the script, and then continue parsing of the document
567 if (finishedObj == m_cachedScript) {
568 DOMString scriptSource = m_cachedScript->script();
569 m_cachedScript->deref(this);
570 m_cachedScript = 0;
571 if (m_view) {
572 m_executingScript = true;
573 m_view->part()->executeScript(DOM::Node(), scriptSource.string());
574 m_executingScript = false;
576 // should continue parsing here after we fetched and executed the script
577 m_source.setPaused(false);
578 m_reader.parseContinue();
582 bool XMLTokenizer::isWaitingForScripts() const
584 return m_cachedScript != 0;
587 void XMLTokenizer::executeScript(NodeImpl* node)
589 ElementImpl* script = static_cast<ElementImpl*>(node);
590 DOMString scriptSrc;
591 if (node->id() == WebCore::SVGNames::scriptTag.id())
592 scriptSrc = script->getAttribute(WebCore::XLinkNames::hrefAttr.id());
593 else
594 scriptSrc = script->getAttribute(ATTR_SRC);
596 QString charset = script->getAttribute(ATTR_CHARSET).string();
598 if (!scriptSrc.isEmpty()) {
599 // we have a src attribute
600 m_cachedScript = m_doc->docLoader()->requestScript(scriptSrc, charset);
601 if (m_cachedScript) {
602 // pause parsing until we got script
603 m_source.setPaused();
604 m_cachedScript->ref(this); // the parsing will be continued once the script is fetched and executed in notifyFinished()
605 return;
607 } else {
608 // no src attribute - execute from contents of tag
609 QString scriptCode = "";
610 NodeImpl *child;
611 for (child = script->firstChild(); child; child = child->nextSibling()) {
612 if ( ( child->nodeType() == Node::TEXT_NODE || child->nodeType() == Node::CDATA_SECTION_NODE) &&
613 static_cast<TextImpl*>(child)->string() )
614 scriptCode += QString::fromRawData(static_cast<TextImpl*>(child)->string()->s,
615 static_cast<TextImpl*>(child)->string()->l);
617 // the script cannot do document.write until we support incremental parsing
618 // ### handle the case where the script deletes the node or redirects to
619 // another page, etc. (also in notifyFinished())
620 // ### the script may add another script node after this one which should be executed
621 if (m_view) {
622 m_executingScript = true;
623 m_view->part()->executeScript(DOM::Node(), scriptCode);
624 m_executingScript = false;
629 #include "xml_tokenizer.moc"