2 * This file is part of the DOM implementation for KDE.
4 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
5 * Copyright (C) 2003 Apple Computer, Inc.
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
24 #include "xml_tokenizer.h"
25 #include "xml/dom_docimpl.h"
26 #include "xml/dom_textimpl.h"
27 #include "xml/dom_xmlimpl.h"
28 #include "html/html_tableimpl.h"
29 #include "html/html_headimpl.h"
30 #include "rendering/render_object.h"
31 #include "misc/htmltags.h"
32 #include "misc/htmlattrs.h"
33 #include "misc/loader.h"
35 #include "khtmlview.h"
36 #include "khtml_part.h"
37 #include <QtCore/QVariant>
42 #include "svg/SVGScriptElement.h"
43 #include "svg/XLinkNames.h"
46 using namespace khtml
;
48 XMLIncrementalSource::XMLIncrementalSource()
49 : QXmlInputSource(), m_pos( 0 ), m_unicode( 0 ),
50 m_finished( false ), m_paused(false)
54 void XMLIncrementalSource::fetchData()
56 //just a dummy to overwrite default behavior
59 QChar
XMLIncrementalSource::next()
62 return QXmlInputSource::EndOfDocument
;
63 else if (m_paused
|| m_data
.length() <= m_pos
)
64 return QXmlInputSource::EndOfData
;
66 return m_unicode
[m_pos
++];
69 void XMLIncrementalSource::setData( const QString
& str
)
72 m_unicode
= m_data
.unicode();
77 void XMLIncrementalSource::setData( const QByteArray
& data
)
79 setData( fromRawData( data
, true ) );
82 void XMLIncrementalSource::appendXML( const QString
& str
)
85 m_unicode
= m_data
.unicode();
88 QString
XMLIncrementalSource::data() const
93 void XMLIncrementalSource::setFinished( bool finished
)
95 m_finished
= finished
;
98 XMLHandler::XMLHandler(DocumentImpl
*_doc
, KHTMLView
*_view
)
106 XMLHandler::~XMLHandler()
110 void XMLHandler::pushNode( NodeImpl
*node
)
112 m_nodes
.push( node
);
115 NodeImpl
*XMLHandler::popNode()
117 return m_nodes
.pop();
120 NodeImpl
*XMLHandler::currentNode() const
122 if ( m_nodes
.isEmpty() )
125 return m_nodes
.top();
128 QString
XMLHandler::errorProtocol()
134 bool XMLHandler::startDocument()
136 // at the beginning of parsing: do some initialization
143 bool XMLHandler::startPrefixMapping(const QString
& prefix
, const QString
& uri
)
145 namespaceInfo
[prefix
].push(uri
);
149 bool XMLHandler::endPrefixMapping(const QString
& prefix
)
151 QStack
<QString
>& stack
= namespaceInfo
[prefix
];
154 namespaceInfo
.remove(prefix
);
158 void XMLHandler::fixUpNSURI(QString
& uri
, const QString
& qname
)
160 /* QXml does not resolve the namespaces of attributes in the same
161 tag that preceed the xmlns declaration. This fixes up that case */
162 if (uri
.isEmpty() && qname
.indexOf(':') != -1) {
163 QXmlNamespaceSupport ns
;
164 QString localName
, prefix
;
165 ns
.splitName(qname
, prefix
, localName
);
166 if (namespaceInfo
.contains(prefix
)) {
167 uri
= namespaceInfo
[prefix
].top();
172 bool XMLHandler::startElement( const QString
& namespaceURI
, const QString
& /*localName*/,
173 const QString
& qName
, const QXmlAttributes
& atts
)
175 if (currentNode()->nodeType() == Node::TEXT_NODE
)
179 if (!namespaceURI
.isNull())
180 nsURI
= DOMString(namespaceURI
);
182 // No namespace declared, default to the no namespace
183 nsURI
= DOMString("");
184 ElementImpl
*newElement
= m_doc
->createElementNS(nsURI
,qName
);
188 for (i
= 0; i
< atts
.length(); i
++) {
189 int exceptioncode
= 0;
190 QString uriString
= atts
.uri(i
);
191 QString qnString
= atts
.qName(i
);
192 fixUpNSURI(uriString
, qnString
);
193 DOMString
uri(uriString
);
194 DOMString
qn(qnString
);
195 DOMString
val(atts
.value(i
));
196 newElement
->setAttributeNS(uri
, qn
, val
, exceptioncode
);
197 if (exceptioncode
) // exception setting attributes
201 if (newElement
->id() == ID_SCRIPT
|| newElement
->id() == makeId(xhtmlNamespace
, ID_SCRIPT
))
202 static_cast<HTMLScriptElementImpl
*>(newElement
)->setCreatedByParser(true);
204 //this is tricky. in general the node doesn't have to attach to the one it's in. as far
205 //as standards go this is wrong, but there's literally thousands of documents where
206 //we see <p><ul>...</ul></p>. the following code is there for those cases.
207 //when we can't attach to the currently holding us node we try to attach to its parent
208 bool attached
= false;
209 for ( NodeImpl
*current
= currentNode(); current
; current
= current
->parent() ) {
210 attached
= current
->addChild( newElement
);
215 if (m_view
&& !newElement
->attached() && !m_doc
->hasPendingSheets())
216 newElement
->attach();
217 pushNode( newElement
);
225 // ### DOM spec states: "if there is no markup inside an element's content, the text is contained in a
226 // single object implementing the Text interface that is the only child of the element."... do we
227 // need to ensure that empty elements always have an empty text child?
231 bool XMLHandler::endElement( const QString
& /*namespaceURI*/, const QString
& /*localName*/, const QString
& /*qName*/ )
233 if (currentNode()->nodeType() == Node::TEXT_NODE
)
236 NodeImpl
*node
= popNode();
239 while ( currentNode() && currentNode()->implicitNode() ) //for the implicit HTMLTableSectionElementImpl
244 // if the node is a script element try to execute it immediately
245 if ((node
->id() == ID_SCRIPT
) || (node
->id() == makeId(xhtmlNamespace
, ID_SCRIPT
)) || node
->id() == WebCore::SVGNames::scriptTag
.id())
246 static_cast<XMLTokenizer
*>(m_doc
->tokenizer())->executeScript(node
);
252 bool XMLHandler::startCDATA()
254 if (currentNode()->nodeType() == Node::TEXT_NODE
)
257 NodeImpl
*newNode
= m_doc
->createCDATASection(new DOMStringImpl(""));
258 if (currentNode()->addChild(newNode
)) {
259 if (m_view
&& !newNode
->attached() && !m_doc
->hasPendingSheets())
271 bool XMLHandler::endCDATA()
274 Q_ASSERT( currentNode() );
275 return currentNode();
278 bool XMLHandler::characters( const QString
& ch
)
280 if (currentNode()->nodeType() == Node::TEXT_NODE
||
281 currentNode()->nodeType() == Node::CDATA_SECTION_NODE
||
283 int exceptioncode
= 0;
284 static_cast<TextImpl
*>(currentNode())->appendData(ch
,exceptioncode
);
290 // Don't worry about white-space violating DTD
291 if (ch
.trimmed().isEmpty()) return true;
298 bool XMLHandler::comment(const QString
& ch
)
300 if (currentNode()->nodeType() == Node::TEXT_NODE
)
302 // ### handle exceptions
303 currentNode()->addChild(m_doc
->createComment(new DOMStringImpl(ch
.unicode(), ch
.length())));
307 bool XMLHandler::processingInstruction(const QString
&target
, const QString
&data
)
309 if (currentNode()->nodeType() == Node::TEXT_NODE
)
312 // Ignore XML target -- shouldn't be part of the DOM
316 // ### handle exceptions
317 ProcessingInstructionImpl
*pi
=
318 m_doc
->createProcessingInstruction(target
, new DOMStringImpl(data
.unicode(), data
.length()));
319 currentNode()->addChild(pi
);
320 pi
->checkStyleSheet();
325 QString
XMLHandler::errorString() const
327 // ### Make better error-messages
328 return i18n("the document is not in the correct file format");
332 bool XMLHandler::fatalError( const QXmlParseException
& exception
)
334 errorProt
+= i18n( "fatal parsing error: %1 in line %2, column %3" ,
335 exception
.message() ,
336 exception
.lineNumber() ,
337 exception
.columnNumber() );
339 errorLine
= exception
.lineNumber();
340 errorCol
= exception
.columnNumber();
345 bool XMLHandler::enterText()
347 NodeImpl
*newNode
= m_doc
->createTextNode("");
348 if (currentNode()->addChild(newNode
)) {
358 void XMLHandler::exitText()
360 if ( m_view
&& !currentNode()->attached() && !m_doc
->hasPendingSheets() )
361 currentNode()->attach();
365 bool XMLHandler::attributeDecl(const QString
&/*eName*/, const QString
&/*aName*/, const QString
&/*type*/,
366 const QString
&/*valueDefault*/, const QString
&/*value*/)
368 // qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and
369 // value. When it does, we can store these somewhere and have default attributes on elements
373 bool XMLHandler::externalEntityDecl(const QString
&/*name*/, const QString
&/*publicId*/, const QString
&/*systemId*/)
375 // ### insert these too - is there anything special we have to do here?
379 bool XMLHandler::internalEntityDecl(const QString
&name
, const QString
&value
)
381 EntityImpl
*e
= new EntityImpl(m_doc
,name
);
382 // ### further parse entities inside the value and add them as separate nodes (or entityreferences)?
383 e
->addChild(m_doc
->createTextNode(new DOMStringImpl(value
.unicode(), value
.length())));
384 if (m_doc
->doctype())
385 static_cast<GenericRONamedNodeMapImpl
*>(m_doc
->doctype()->entities())->addNode(e
);
389 bool XMLHandler::notationDecl(const QString
&/*name*/, const QString
&/*publicId*/, const QString
&/*systemId*/)
392 // if (m_doc->document()->doctype()) {
393 // NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId);
394 // static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->notations())->addNode(n);
399 bool XMLHandler::unparsedEntityDecl(const QString
&/*name*/, const QString
&/*publicId*/,
400 const QString
&/*systemId*/, const QString
&/*notationName*/)
407 //------------------------------------------------------------------------------
409 XMLTokenizer::XMLTokenizer(DOM::DocumentImpl
*_doc
, KHTMLView
*_view
)
410 : m_handler(_doc
,_view
)
416 m_executingScript
= false;
417 m_explicitFinishParsingNeeded
= false;
418 m_insideWrite
= false;
419 m_reader
.setContentHandler( &m_handler
);
420 m_reader
.setLexicalHandler( &m_handler
);
421 m_reader
.setErrorHandler( &m_handler
);
422 m_reader
.setDeclHandler( &m_handler
);
423 m_reader
.setDTDHandler( &m_handler
);
424 m_reader
.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
427 XMLTokenizer::~XMLTokenizer()
430 m_cachedScript
->deref(this);
434 void XMLTokenizer::begin()
437 m_reader
.parse( &m_source
, true );
440 void XMLTokenizer::write( const TokenizerString
&str
, bool appendData
)
442 if ( !m_noErrors
&& appendData
)
444 // check if we try to re-enter inside write()
445 // if so buffer the data
447 m_bufferedData
.append(str
.toString());
450 m_insideWrite
= true;
453 m_source
.appendXML( str
.toString() );
456 m_source
.setData( str
.toString() );
458 m_noErrors
= m_reader
.parseContinue();
460 // check if while parsing we tried to re-enter write() method so now we have some buffered data we need to write to document
461 while (m_noErrors
&& !m_bufferedData
.isEmpty()) {
462 m_source
.appendXML(m_bufferedData
);
463 m_bufferedData
.clear();
464 m_noErrors
= m_reader
.parseContinue();
466 // check if we need to call finish explicitly (see XMLTokenizer::finish() comment for details)
467 if (m_explicitFinishParsingNeeded
)
469 m_insideWrite
= false;
472 void XMLTokenizer::end()
474 m_source
.setFinished( true );
476 //m_noErrors = m_reader.parseContinue();
477 emit
finishedParsing();
480 void XMLTokenizer::finish()
482 if (m_executingScript
) {
483 // still executing script, it can happen because of reentrancy, e.g. when we have alert() inside script and we got the rest of the data
484 m_explicitFinishParsingNeeded
= true;
487 m_source
.setFinished( true );
489 // An error occurred during parsing of the code. Display an error page to the user (the DOM
490 // tree is created manually and includes an excerpt from the code where the error is located)
492 // ### for multiple error messages, display the code for each (can this happen?)
494 // Clear the document
495 int exceptioncode
= 0;
496 while (m_doc
->hasChildNodes())
497 static_cast<NodeImpl
*>(m_doc
)->removeChild(m_doc
->firstChild(),exceptioncode
);
499 QString line
, errorLocPtr
;
500 if ( m_handler
.errorLine
) {
501 QString xmlCode
= m_source
.data();
502 QTextStream
stream(&xmlCode
, QIODevice::ReadOnly
);
503 for (unsigned long lineno
= 0; lineno
< m_handler
.errorLine
-1; lineno
++)
505 line
= stream
.readLine();
507 for (unsigned long colno
= 0; colno
< m_handler
.errorCol
-1; colno
++)
512 // Create elements for display
513 DocumentImpl
*doc
= m_doc
;
514 NodeImpl
*html
= doc
->createElementNS(XHTML_NAMESPACE
,"html");
515 NodeImpl
*body
= doc
->createElementNS(XHTML_NAMESPACE
,"body");
516 NodeImpl
*h1
= doc
->createElementNS(XHTML_NAMESPACE
,"h1");
517 NodeImpl
*headingText
= doc
->createTextNode(i18n("XML parsing error"));
518 NodeImpl
*errorText
= doc
->createTextNode(m_handler
.errorProtocol());
521 NodeImpl
*lineText
= 0;
522 NodeImpl
*errorLocText
= 0;
523 if ( !line
.isNull() ) {
524 hr
= doc
->createElementNS(XHTML_NAMESPACE
,"hr");
525 pre
= doc
->createElementNS(XHTML_NAMESPACE
,"pre");
526 lineText
= doc
->createTextNode(line
+'\n');
527 errorLocText
= doc
->createTextNode(errorLocPtr
);
530 // Construct DOM tree. We ignore exceptions as we assume they will not be thrown here (due to the
531 // fact we are using a known tag set)
532 doc
->appendChild(html
,exceptioncode
);
533 html
->appendChild(body
,exceptioncode
);
534 body
->appendChild(h1
,exceptioncode
);
535 h1
->appendChild(headingText
,exceptioncode
);
536 body
->appendChild(errorText
,exceptioncode
);
537 body
->appendChild(hr
,exceptioncode
);
538 body
->appendChild(pre
,exceptioncode
);
540 pre
->appendChild(lineText
,exceptioncode
);
541 pre
->appendChild(errorLocText
,exceptioncode
);
544 // Close the renderers so that they update their display correctly
545 // ### this should not be necessary, but requires changes in the rendering code...
547 if ( pre
) pre
->close();
550 m_doc
->recalcStyle( NodeImpl::Inherit
);
551 m_doc
->updateRendering();
554 // Parsing was successful, all scripts have finished downloading and executing,
555 // calculating the style for the document and close the last element
556 m_doc
->updateStyleSelector();
559 // finished parsing, call end()
563 void XMLTokenizer::notifyFinished(CachedObject
*finishedObj
)
565 // This is called when a script has finished loading that was requested from executeScript(). We execute
566 // the script, and then continue parsing of the document
567 if (finishedObj
== m_cachedScript
) {
568 DOMString scriptSource
= m_cachedScript
->script();
569 m_cachedScript
->deref(this);
572 m_executingScript
= true;
573 m_view
->part()->executeScript(DOM::Node(), scriptSource
.string());
574 m_executingScript
= false;
576 // should continue parsing here after we fetched and executed the script
577 m_source
.setPaused(false);
578 m_reader
.parseContinue();
582 bool XMLTokenizer::isWaitingForScripts() const
584 return m_cachedScript
!= 0;
587 void XMLTokenizer::executeScript(NodeImpl
* node
)
589 ElementImpl
* script
= static_cast<ElementImpl
*>(node
);
591 if (node
->id() == WebCore::SVGNames::scriptTag
.id())
592 scriptSrc
= script
->getAttribute(WebCore::XLinkNames::hrefAttr
.id());
594 scriptSrc
= script
->getAttribute(ATTR_SRC
);
596 QString charset
= script
->getAttribute(ATTR_CHARSET
).string();
598 if (!scriptSrc
.isEmpty()) {
599 // we have a src attribute
600 m_cachedScript
= m_doc
->docLoader()->requestScript(scriptSrc
, charset
);
601 if (m_cachedScript
) {
602 // pause parsing until we got script
603 m_source
.setPaused();
604 m_cachedScript
->ref(this); // the parsing will be continued once the script is fetched and executed in notifyFinished()
608 // no src attribute - execute from contents of tag
609 QString scriptCode
= "";
611 for (child
= script
->firstChild(); child
; child
= child
->nextSibling()) {
612 if ( ( child
->nodeType() == Node::TEXT_NODE
|| child
->nodeType() == Node::CDATA_SECTION_NODE
) &&
613 static_cast<TextImpl
*>(child
)->string() )
614 scriptCode
+= QString::fromRawData(static_cast<TextImpl
*>(child
)->string()->s
,
615 static_cast<TextImpl
*>(child
)->string()->l
);
617 // the script cannot do document.write until we support incremental parsing
618 // ### handle the case where the script deletes the node or redirects to
619 // another page, etc. (also in notifyFinished())
620 // ### the script may add another script node after this one which should be executed
622 m_executingScript
= true;
623 m_view
->part()->executeScript(DOM::Node(), scriptCode
);
624 m_executingScript
= false;
629 #include "xml_tokenizer.moc"