Version 6.4.0.3, tag libreoffice-6.4.0.3
[LibreOffice.git] / sax / source / fastparser / fastparser.cxx
blob9b35c1682be51ad7a44c2a644a673a46b0808b80
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sax/fastparser.hxx>
21 #include <sax/fastattribs.hxx>
22 #include <xml2utf.hxx>
24 #include <com/sun/star/io/XSeekable.hpp>
25 #include <com/sun/star/lang/DisposedException.hpp>
26 #include <com/sun/star/lang/IllegalArgumentException.hpp>
27 #include <com/sun/star/uno/XComponentContext.hpp>
28 #include <com/sun/star/xml/sax/FastToken.hpp>
29 #include <com/sun/star/xml/sax/SAXParseException.hpp>
30 #include <com/sun/star/xml/sax/XFastContextHandler.hpp>
31 #include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
32 #include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
33 #include <cppuhelper/implbase.hxx>
34 #include <cppuhelper/supportsservice.hxx>
35 #include <cppuhelper/exc_hlp.hxx>
36 #include <osl/conditn.hxx>
37 #include <rtl/ref.hxx>
38 #include <rtl/ustrbuf.hxx>
39 #include <sal/log.hxx>
40 #include <salhelper/thread.hxx>
41 #include <tools/diagnose_ex.h>
43 #include <queue>
44 #include <memory>
45 #include <stack>
46 #include <unordered_map>
47 #include <vector>
48 #include <cassert>
49 #include <cstring>
50 #include <libxml/parser.h>
52 // Inverse of libxml's BAD_CAST.
53 #define XML_CAST( str ) reinterpret_cast< const sal_Char* >( str )
55 using namespace std;
56 using namespace ::osl;
57 using namespace ::cppu;
58 using namespace ::com::sun::star::uno;
59 using namespace ::com::sun::star::lang;
60 using namespace ::com::sun::star::xml::sax;
61 using namespace ::com::sun::star::io;
62 using namespace com::sun::star;
63 using namespace sax_fastparser;
65 namespace {
67 struct Event;
68 class FastLocatorImpl;
69 struct NamespaceDefine;
70 struct Entity;
72 typedef std::unordered_map< OUString, sal_Int32 > NamespaceMap;
74 struct EventList
76 std::vector<Event> maEvents;
77 bool mbIsAttributesEmpty;
80 enum class CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, PROCESSING_INSTRUCTION, DONE, EXCEPTION };
82 struct Event
84 CallbackType maType;
85 sal_Int32 mnElementToken;
86 OUString msNamespace;
87 OUString msElementName;
88 rtl::Reference< FastAttributeList > mxAttributes;
89 rtl::Reference< FastAttributeList > mxDeclAttributes;
90 OUString msChars;
93 struct NameWithToken
95 OUString const msName;
96 sal_Int32 const mnToken;
98 NameWithToken(const OUString& sName, sal_Int32 nToken) :
99 msName(sName), mnToken(nToken) {}
102 struct SaxContext
104 Reference< XFastContextHandler > mxContext;
105 sal_Int32 const mnElementToken;
106 OUString maNamespace;
107 OUString maElementName;
109 SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName ):
110 mnElementToken(nElementToken)
112 if (nElementToken == FastToken::DONTKNOW)
114 maNamespace = aNamespace;
115 maElementName = aElementName;
121 struct ParserData
123 css::uno::Reference< css::xml::sax::XFastDocumentHandler > mxDocumentHandler;
124 css::uno::Reference< css::xml::sax::XFastTokenHandler > mxTokenHandler;
125 FastTokenHandlerBase* mpTokenHandler;
126 css::uno::Reference< css::xml::sax::XErrorHandler > mxErrorHandler;
127 css::uno::Reference< css::xml::sax::XFastNamespaceHandler >mxNamespaceHandler;
129 ParserData();
132 struct NamespaceDefine
134 OString maPrefix;
135 sal_Int32 mnToken;
136 OUString maNamespaceURL;
138 NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {}
139 NamespaceDefine() : mnToken(-1) {}
142 // Entity binds all information needed for a single file | single call of parseStream
143 struct Entity : public ParserData
145 // Amount of work producer sends to consumer in one iteration:
146 static const size_t mnEventListSize = 1000;
148 // unique for each Entity instance:
150 // Number of valid events in mxProducedEvents:
151 size_t mnProducedEventsSize;
152 std::unique_ptr<EventList> mxProducedEvents;
153 std::queue<std::unique_ptr<EventList>> maPendingEvents;
154 std::queue<std::unique_ptr<EventList>> maUsedEvents;
155 osl::Mutex maEventProtector;
157 static const size_t mnEventLowWater = 4;
158 static const size_t mnEventHighWater = 8;
159 osl::Condition maConsumeResume;
160 osl::Condition maProduceResume;
161 // Event we use to store data if threading is disabled:
162 Event maSharedEvent;
164 // copied in copy constructor:
166 // Allow to disable threading for small documents:
167 bool mbEnableThreads;
168 css::xml::sax::InputSource maStructSource;
169 xmlParserCtxtPtr mpParser;
170 ::sax_expatwrap::XMLFile2UTFConverter maConverter;
172 // Exceptions cannot be thrown through the C-XmlParser (possible
173 // resource leaks), therefore any exception thrown by a UNO callback
174 // must be saved somewhere until the C-XmlParser is stopped.
175 css::uno::Any maSavedException;
176 osl::Mutex maSavedExceptionMutex;
177 void saveException( const Any & e );
178 // Thread-safe check if maSavedException has value
179 bool hasException();
180 void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
181 bool mbDuringParse );
183 std::stack< NameWithToken, std::vector<NameWithToken> > maNamespaceStack;
184 /* Context for main thread consuming events.
185 * startElement() stores the data, which characters() and endElement() uses
187 std::stack< SaxContext, std::vector<SaxContext> > maContextStack;
188 // Determines which elements of maNamespaceDefines are valid in current context
189 std::stack< sal_uInt32, std::vector<sal_uInt32> > maNamespaceCount;
190 std::vector< NamespaceDefine > maNamespaceDefines;
192 explicit Entity( const ParserData& rData );
193 Entity( const Entity& rEntity ) = delete;
194 Entity& operator=( const Entity& rEntity ) = delete;
195 void startElement( Event const *pEvent );
196 void characters( const OUString& sChars );
197 void endElement();
198 void processingInstruction( const OUString& rTarget, const OUString& rData );
199 EventList& getEventList();
200 Event& getEvent( CallbackType aType );
203 } // namespace
205 namespace sax_fastparser {
207 class FastSaxParserImpl
209 public:
210 explicit FastSaxParserImpl();
211 ~FastSaxParserImpl();
213 // XFastParser
214 /// @throws css::xml::sax::SAXException
215 /// @throws css::io::IOException
216 /// @throws css::uno::RuntimeException
217 void parseStream( const css::xml::sax::InputSource& aInputSource );
218 /// @throws css::uno::RuntimeException
219 void setFastDocumentHandler( const css::uno::Reference< css::xml::sax::XFastDocumentHandler >& Handler );
220 /// @throws css::uno::RuntimeException
221 void setTokenHandler( const css::uno::Reference< css::xml::sax::XFastTokenHandler >& Handler );
222 /// @throws css::lang::IllegalArgumentException
223 /// @throws css::uno::RuntimeException
224 void registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken );
225 /// @throws css::lang::IllegalArgumentException
226 /// @throws css::uno::RuntimeException
227 OUString const & getNamespaceURL( const OUString& rPrefix );
228 /// @throws css::uno::RuntimeException
229 void setErrorHandler( const css::uno::Reference< css::xml::sax::XErrorHandler >& Handler );
230 /// @throws css::uno::RuntimeException
231 void setNamespaceHandler( const css::uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler);
233 // called by the C callbacks of the expat parser
234 void callbackStartElement( const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
235 int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes );
236 void callbackEndElement();
237 void callbackCharacters( const xmlChar* s, int nLen );
238 void callbackProcessingInstruction( const xmlChar *target, const xmlChar *data );
240 void pushEntity(const ParserData&, xml::sax::InputSource const&);
241 void popEntity();
242 Entity& getEntity() { return *mpTop; }
243 void parse();
244 void produce( bool bForceFlush = false );
245 bool m_bIgnoreMissingNSDecl;
246 bool m_bDisableThreadedParser;
248 private:
249 bool consume(EventList&);
250 void deleteUsedEvents();
251 void sendPendingCharacters();
252 void addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes);
254 sal_Int32 GetToken( const xmlChar* pName, sal_Int32 nameLen );
255 /// @throws css::xml::sax::SAXException
256 sal_Int32 GetTokenWithPrefix( const xmlChar* pPrefix, int prefixLen, const xmlChar* pName, int nameLen );
257 /// @throws css::xml::sax::SAXException
258 OUString const & GetNamespaceURL( const OString& rPrefix );
259 sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL );
260 sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName, int nNameLen );
261 void DefineNamespace( const OString& rPrefix, const OUString& namespaceURL );
263 private:
264 osl::Mutex maMutex; ///< Protecting whole parseStream() execution
265 ::rtl::Reference< FastLocatorImpl > mxDocumentLocator;
266 NamespaceMap maNamespaceMap;
268 ParserData maData; /// Cached parser configuration for next call of parseStream().
270 Entity *mpTop; /// std::stack::top() is amazingly slow => cache this.
271 std::stack< Entity > maEntities; /// Entity stack for each call of parseStream().
272 std::vector<char> pendingCharacters; /// Data from characters() callback that needs to be sent.
275 } // namespace sax_fastparser
277 namespace {
279 class ParserThread: public salhelper::Thread
281 FastSaxParserImpl *mpParser;
282 public:
283 explicit ParserThread(FastSaxParserImpl *pParser): Thread("Parser"), mpParser(pParser) {}
284 private:
285 virtual void execute() override
289 mpParser->parse();
291 catch (...)
293 Entity &rEntity = mpParser->getEntity();
294 rEntity.getEvent( CallbackType::EXCEPTION );
295 mpParser->produce( true );
300 extern "C" {
302 static void call_callbackStartElement(void *userData, const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
303 int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes)
305 FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
306 pFastParser->callbackStartElement( localName, prefix, URI, numNamespaces, namespaces, numAttributes, attributes );
309 static void call_callbackEndElement(void *userData, const xmlChar* /*localName*/, const xmlChar* /*prefix*/, const xmlChar* /*URI*/)
311 FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
312 pFastParser->callbackEndElement();
315 static void call_callbackCharacters( void *userData , const xmlChar *s , int nLen )
317 FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
318 pFastParser->callbackCharacters( s, nLen );
321 static void call_callbackProcessingInstruction( void *userData, const xmlChar *target, const xmlChar *data )
323 FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
324 pFastParser->callbackProcessingInstruction( target, data );
329 class FastLocatorImpl : public WeakImplHelper< XLocator >
331 public:
332 explicit FastLocatorImpl(FastSaxParserImpl *p) : mpParser(p) {}
334 void dispose() { mpParser = nullptr; }
335 /// @throws RuntimeException
336 void checkDispose() const { if( !mpParser ) throw DisposedException(); }
338 //XLocator
339 virtual sal_Int32 SAL_CALL getColumnNumber() override;
340 virtual sal_Int32 SAL_CALL getLineNumber() override;
341 virtual OUString SAL_CALL getPublicId() override;
342 virtual OUString SAL_CALL getSystemId() override;
344 private:
345 FastSaxParserImpl *mpParser;
348 sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber()
350 checkDispose();
351 return xmlSAX2GetColumnNumber( mpParser->getEntity().mpParser );
354 sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber()
356 checkDispose();
357 return xmlSAX2GetLineNumber( mpParser->getEntity().mpParser );
360 OUString SAL_CALL FastLocatorImpl::getPublicId()
362 checkDispose();
363 return mpParser->getEntity().maStructSource.sPublicId;
366 OUString SAL_CALL FastLocatorImpl::getSystemId()
368 checkDispose();
369 return mpParser->getEntity().maStructSource.sSystemId;
372 ParserData::ParserData()
373 : mpTokenHandler( nullptr )
376 Entity::Entity(const ParserData& rData)
377 : ParserData(rData)
378 , mnProducedEventsSize(0)
379 , mxProducedEvents()
380 , mbEnableThreads(false)
381 , mpParser(nullptr)
385 void Entity::startElement( Event const *pEvent )
387 const sal_Int32& nElementToken = pEvent->mnElementToken;
388 const OUString& aNamespace = pEvent->msNamespace;
389 const OUString& aElementName = pEvent->msElementName;
391 // Use un-wrapped pointers to avoid significant acquire/release overhead
392 XFastContextHandler *pParentContext = nullptr;
393 if( !maContextStack.empty() )
395 pParentContext = maContextStack.top().mxContext.get();
396 if( !pParentContext )
398 maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) );
399 return;
403 maContextStack.push( SaxContext( nElementToken, aNamespace, aElementName ) );
407 Reference< XFastAttributeList > xAttr( pEvent->mxAttributes.get() );
408 Reference< XFastContextHandler > xContext;
410 if ( mxNamespaceHandler.is() )
412 const Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes();
413 for (const auto& rNSDeclAttrib : NSDeclAttribs)
415 mxNamespaceHandler->registerNamespace( rNSDeclAttrib.Name, rNSDeclAttrib.Value );
419 if( nElementToken == FastToken::DONTKNOW )
421 if( pParentContext )
422 xContext = pParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
423 else if( mxDocumentHandler.is() )
424 xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
426 if( xContext.is() )
428 xContext->startUnknownElement( aNamespace, aElementName, xAttr );
431 else
433 if( pParentContext )
434 xContext = pParentContext->createFastChildContext( nElementToken, xAttr );
435 else if( mxDocumentHandler.is() )
436 xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
438 if( xContext.is() )
439 xContext->startFastElement( nElementToken, xAttr );
441 // swap the reference we own in to avoid referencing thrash.
442 maContextStack.top().mxContext = std::move( xContext );
444 catch (...)
446 saveException( ::cppu::getCaughtException() );
450 void Entity::characters( const OUString& sChars )
452 if (maContextStack.empty())
454 // Malformed XML stream !?
455 return;
458 XFastContextHandler * pContext( maContextStack.top().mxContext.get() );
459 if( pContext ) try
461 pContext->characters( sChars );
463 catch (...)
465 saveException( ::cppu::getCaughtException() );
469 void Entity::endElement()
471 if (maContextStack.empty())
473 // Malformed XML stream !?
474 return;
477 const SaxContext& aContext = maContextStack.top();
478 XFastContextHandler* pContext( aContext.mxContext.get() );
479 if( pContext )
482 sal_Int32 nElementToken = aContext.mnElementToken;
483 if( nElementToken != FastToken::DONTKNOW )
484 pContext->endFastElement( nElementToken );
485 else
486 pContext->endUnknownElement( aContext.maNamespace, aContext.maElementName );
488 catch (...)
490 saveException( ::cppu::getCaughtException() );
492 maContextStack.pop();
495 void Entity::processingInstruction( const OUString& rTarget, const OUString& rData )
497 if( mxDocumentHandler.is() ) try
499 mxDocumentHandler->processingInstruction( rTarget, rData );
501 catch (...)
503 saveException( ::cppu::getCaughtException() );
507 EventList& Entity::getEventList()
509 if (!mxProducedEvents)
511 osl::ClearableMutexGuard aGuard(maEventProtector);
512 if (!maUsedEvents.empty())
514 mxProducedEvents = std::move(maUsedEvents.front());
515 maUsedEvents.pop();
516 aGuard.clear(); // unlock
517 mnProducedEventsSize = 0;
519 if (!mxProducedEvents)
521 mxProducedEvents.reset(new EventList);
522 mxProducedEvents->maEvents.resize(mnEventListSize);
523 mxProducedEvents->mbIsAttributesEmpty = false;
524 mnProducedEventsSize = 0;
527 return *mxProducedEvents;
530 Event& Entity::getEvent( CallbackType aType )
532 if (!mbEnableThreads)
533 return maSharedEvent;
535 EventList& rEventList = getEventList();
536 if (mnProducedEventsSize == rEventList.maEvents.size())
538 SAL_WARN_IF(!maSavedException.hasValue(), "sax",
539 "Event vector should only exceed " << mnEventListSize <<
540 " temporarily while an exception is pending");
541 rEventList.maEvents.resize(mnProducedEventsSize + 1);
543 Event& rEvent = rEventList.maEvents[mnProducedEventsSize++];
544 rEvent.maType = aType;
545 return rEvent;
548 OUString lclGetErrorMessage( xmlParserCtxtPtr ctxt, const OUString& sSystemId, sal_Int32 nLine )
550 const sal_Char* pMessage;
551 xmlErrorPtr error = xmlCtxtGetLastError( ctxt );
552 if( error && error->message )
553 pMessage = error->message;
554 else
555 pMessage = "unknown error";
556 OUStringBuffer aBuffer( 128 );
557 aBuffer.append( "[" );
558 aBuffer.append( sSystemId );
559 aBuffer.append( " line " );
560 aBuffer.append( nLine );
561 aBuffer.append( "]: " );
562 aBuffer.appendAscii( pMessage );
563 return aBuffer.makeStringAndClear();
566 // throw an exception, but avoid callback if
567 // during a threaded produce
568 void Entity::throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
569 bool mbDuringParse )
571 // Error during parsing !
572 Any savedException;
574 osl::MutexGuard g(maSavedExceptionMutex);
575 if (maSavedException.hasValue())
577 savedException.setValue(&maSavedException, cppu::UnoType<decltype(maSavedException)>::get());
580 SAXParseException aExcept(
581 lclGetErrorMessage( mpParser,
582 xDocumentLocator->getSystemId(),
583 xDocumentLocator->getLineNumber() ),
584 Reference< XInterface >(),
585 savedException,
586 xDocumentLocator->getPublicId(),
587 xDocumentLocator->getSystemId(),
588 xDocumentLocator->getLineNumber(),
589 xDocumentLocator->getColumnNumber()
592 // error handler is set, it may throw the exception
593 if( !mbDuringParse || !mbEnableThreads )
595 if (mxErrorHandler.is() )
596 mxErrorHandler->fatalError( Any( aExcept ) );
599 // error handler has not thrown, but parsing must stop => throw ourselves
600 throw aExcept;
603 // In the single threaded case we emit events via our C
604 // callbacks, so any exception caught must be queued up until
605 // we can safely re-throw it from our C++ parent of parse()
607 // If multi-threaded, we need to push an EXCEPTION event, at
608 // which point we transfer ownership of maSavedException to
609 // the consuming thread.
610 void Entity::saveException( const Any & e )
612 // fdo#81214 - allow the parser to run on after an exception,
613 // unexpectedly some 'startElements' produce a UNO_QUERY_THROW
614 // for XComponent; and yet expect to continue parsing.
615 SAL_WARN("sax", "Unexpected exception from XML parser " << exceptionToString(e));
616 osl::MutexGuard g(maSavedExceptionMutex);
617 if (maSavedException.hasValue())
619 SAL_INFO("sax.fastparser", "discarding exception, already have one");
621 else
623 maSavedException = e;
627 bool Entity::hasException()
629 osl::MutexGuard g(maSavedExceptionMutex);
630 return maSavedException.hasValue();
633 } // namespace
635 namespace sax_fastparser {
637 FastSaxParserImpl::FastSaxParserImpl() :
638 m_bIgnoreMissingNSDecl(false),
639 m_bDisableThreadedParser(false),
640 mpTop(nullptr)
642 mxDocumentLocator.set( new FastLocatorImpl( this ) );
645 FastSaxParserImpl::~FastSaxParserImpl()
647 if( mxDocumentLocator.is() )
648 mxDocumentLocator->dispose();
651 void FastSaxParserImpl::DefineNamespace( const OString& rPrefix, const OUString& namespaceURL )
653 Entity& rEntity = getEntity();
654 assert(!rEntity.maNamespaceCount.empty()); // need a context!
656 sal_uInt32 nOffset = rEntity.maNamespaceCount.top()++;
657 if( rEntity.maNamespaceDefines.size() <= nOffset )
658 rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
660 rEntity.maNamespaceDefines[nOffset] = NamespaceDefine( rPrefix, GetNamespaceToken( namespaceURL ), namespaceURL );
663 sal_Int32 FastSaxParserImpl::GetToken( const xmlChar* pName, sal_Int32 nameLen /* = 0 */ )
665 return FastTokenHandlerBase::getTokenFromChars( getEntity().mxTokenHandler,
666 getEntity().mpTokenHandler,
667 XML_CAST( pName ), nameLen ); // uses utf-8
670 sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( const xmlChar* pPrefix, int nPrefixLen, const xmlChar* pName, int nNameLen )
672 sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
674 Entity& rEntity = getEntity();
675 if (rEntity.maNamespaceCount.empty())
676 return nNamespaceToken;
678 sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
679 while( nNamespace-- )
681 const auto & rNamespaceDefine = rEntity.maNamespaceDefines[nNamespace];
682 const OString& rPrefix( rNamespaceDefine.maPrefix );
683 if( (rPrefix.getLength() == nPrefixLen) &&
684 rtl_str_reverseCompare_WithLength(rPrefix.pData->buffer, rPrefix.pData->length, XML_CAST( pPrefix ), nPrefixLen ) == 0 )
686 nNamespaceToken = rNamespaceDefine.mnToken;
687 break;
690 if( !nNamespace && !m_bIgnoreMissingNSDecl )
691 throw SAXException("No namespace defined for " + OUString(XML_CAST(pPrefix),
692 nPrefixLen, RTL_TEXTENCODING_UTF8), Reference< XInterface >(), Any());
695 if( nNamespaceToken != FastToken::DONTKNOW )
697 sal_Int32 nNameToken = GetToken( pName, nNameLen );
698 if( nNameToken != FastToken::DONTKNOW )
699 return nNamespaceToken | nNameToken;
702 return FastToken::DONTKNOW;
705 sal_Int32 FastSaxParserImpl::GetNamespaceToken( const OUString& rNamespaceURL )
707 NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) );
708 if( aIter != maNamespaceMap.end() )
709 return (*aIter).second;
710 else
711 return FastToken::DONTKNOW;
714 OUString const & FastSaxParserImpl::GetNamespaceURL( const OString& rPrefix )
716 Entity& rEntity = getEntity();
717 if( !rEntity.maNamespaceCount.empty() )
719 sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
720 while( nNamespace-- )
721 if( rEntity.maNamespaceDefines[nNamespace].maPrefix == rPrefix )
722 return rEntity.maNamespaceDefines[nNamespace].maNamespaceURL;
725 throw SAXException("No namespace defined for " + OUString::fromUtf8(rPrefix),
726 Reference< XInterface >(), Any());
729 sal_Int32 FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName, int nNameLen )
731 if( nNamespaceToken != FastToken::DONTKNOW )
733 sal_Int32 nNameToken = GetToken( pName, nNameLen );
734 if( nNameToken != FastToken::DONTKNOW )
735 return nNamespaceToken | nNameToken;
738 return FastToken::DONTKNOW;
741 namespace
743 class ParserCleanup
745 private:
746 FastSaxParserImpl& m_rParser;
747 Entity& m_rEntity;
748 rtl::Reference<ParserThread> m_xParser;
749 public:
750 ParserCleanup(FastSaxParserImpl& rParser, Entity& rEntity)
751 : m_rParser(rParser)
752 , m_rEntity(rEntity)
755 ~ParserCleanup()
757 if (m_rEntity.mpParser)
759 if (m_rEntity.mpParser->myDoc)
760 xmlFreeDoc(m_rEntity.mpParser->myDoc);
761 xmlFreeParserCtxt(m_rEntity.mpParser);
763 joinThread();
764 m_rParser.popEntity();
766 void setThread(const rtl::Reference<ParserThread> &xParser)
768 m_xParser = xParser;
770 void joinThread()
772 if (m_xParser.is())
774 rtl::Reference<ParserThread> xToJoin = m_xParser;
775 m_xParser.clear();
776 xToJoin->join();
781 /***************
783 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
784 * the file-specific initialization work. (During a parser run, external files may be opened)
786 ****************/
787 void FastSaxParserImpl::parseStream(const InputSource& rStructSource)
789 xmlInitParser();
791 // Only one text at one time
792 MutexGuard guard( maMutex );
794 pushEntity(maData, rStructSource);
795 Entity& rEntity = getEntity();
796 ParserCleanup aEnsureFree(*this, rEntity);
798 // start the document
799 if( rEntity.mxDocumentHandler.is() )
801 Reference< XLocator > xLoc( mxDocumentLocator.get() );
802 rEntity.mxDocumentHandler->setDocumentLocator( xLoc );
803 rEntity.mxDocumentHandler->startDocument();
806 if (!getenv("SAX_DISABLE_THREADS") && !m_bDisableThreadedParser)
808 Reference<css::io::XSeekable> xSeekable(rEntity.maStructSource.aInputStream, UNO_QUERY);
809 // available() is not __really__ relevant here, but leave it in as a heuristic for non-seekable streams
810 rEntity.mbEnableThreads = (xSeekable.is() && xSeekable->getLength() > 10000)
811 || (rEntity.maStructSource.aInputStream->available() > 10000);
814 if (rEntity.mbEnableThreads)
816 rtl::Reference<ParserThread> xParser = new ParserThread(this);
817 xParser->launch();
818 aEnsureFree.setThread(xParser);
819 bool done = false;
820 do {
821 rEntity.maConsumeResume.wait();
822 rEntity.maConsumeResume.reset();
824 osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
825 while (!rEntity.maPendingEvents.empty())
827 if (rEntity.maPendingEvents.size() <= Entity::mnEventLowWater)
828 rEntity.maProduceResume.set(); // start producer again
830 std::unique_ptr<EventList> xEventList = std::move(rEntity.maPendingEvents.front());
831 rEntity.maPendingEvents.pop();
832 aGuard.clear(); // unlock
834 if (!consume(*xEventList))
835 done = true;
837 aGuard.reset(); // lock
839 if ( rEntity.maPendingEvents.size() <= Entity::mnEventLowWater )
841 aGuard.clear();
842 for (auto& rEvent : xEventList->maEvents)
844 if (rEvent.mxAttributes.is())
846 rEvent.mxAttributes->clear();
847 if( rEntity.mxNamespaceHandler.is() )
848 rEvent.mxDeclAttributes->clear();
850 xEventList->mbIsAttributesEmpty = true;
852 aGuard.reset();
855 rEntity.maUsedEvents.push(std::move(xEventList));
857 } while (!done);
858 aEnsureFree.joinThread();
859 deleteUsedEvents();
861 // callbacks used inside XML_Parse may have caught an exception
862 // No need to lock maSavedExceptionMutex here because parser
863 // thread is joined.
864 if( rEntity.maSavedException.hasValue() )
865 rEntity.throwException( mxDocumentLocator, true );
867 else
869 parse();
872 // finish document
873 if( rEntity.mxDocumentHandler.is() )
875 rEntity.mxDocumentHandler->endDocument();
879 void FastSaxParserImpl::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler )
881 maData.mxDocumentHandler = Handler;
884 void FastSaxParserImpl::setTokenHandler( const Reference< XFastTokenHandler >& xHandler )
886 maData.mxTokenHandler = xHandler;
887 maData.mpTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() );
890 void FastSaxParserImpl::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
892 if( NamespaceToken >= FastToken::NAMESPACE )
894 if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW )
896 maNamespaceMap[ NamespaceURL ] = NamespaceToken;
897 return;
900 throw IllegalArgumentException();
903 OUString const & FastSaxParserImpl::getNamespaceURL( const OUString& rPrefix )
907 return GetNamespaceURL( OUStringToOString( rPrefix, RTL_TEXTENCODING_UTF8 ) );
909 catch (const Exception&)
912 throw IllegalArgumentException();
915 void FastSaxParserImpl::setErrorHandler(const Reference< XErrorHandler > & Handler)
917 maData.mxErrorHandler = Handler;
920 void FastSaxParserImpl::setNamespaceHandler( const Reference< XFastNamespaceHandler >& Handler )
922 maData.mxNamespaceHandler = Handler;
925 void FastSaxParserImpl::deleteUsedEvents()
927 Entity& rEntity = getEntity();
928 osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
930 while (!rEntity.maUsedEvents.empty())
932 std::unique_ptr<EventList> xEventList = std::move(rEntity.maUsedEvents.front());
933 rEntity.maUsedEvents.pop();
935 aGuard.clear(); // unlock
937 xEventList.reset();
939 aGuard.reset(); // lock
943 void FastSaxParserImpl::produce( bool bForceFlush )
945 Entity& rEntity = getEntity();
946 if (bForceFlush ||
947 rEntity.mnProducedEventsSize >= Entity::mnEventListSize)
949 osl::ResettableMutexGuard aGuard(rEntity.maEventProtector);
951 while (rEntity.maPendingEvents.size() >= Entity::mnEventHighWater)
952 { // pause parsing for a bit
953 aGuard.clear(); // unlock
954 rEntity.maProduceResume.wait();
955 rEntity.maProduceResume.reset();
956 aGuard.reset(); // lock
959 rEntity.maPendingEvents.push(std::move(rEntity.mxProducedEvents));
960 assert(rEntity.mxProducedEvents.get() == nullptr);
962 aGuard.clear(); // unlock
964 rEntity.maConsumeResume.set();
968 bool FastSaxParserImpl::consume(EventList& rEventList)
970 Entity& rEntity = getEntity();
971 rEventList.mbIsAttributesEmpty = false;
972 for (auto& rEvent : rEventList.maEvents)
974 switch (rEvent.maType)
976 case CallbackType::START_ELEMENT:
977 rEntity.startElement( &rEvent );
978 break;
979 case CallbackType::END_ELEMENT:
980 rEntity.endElement();
981 break;
982 case CallbackType::CHARACTERS:
983 rEntity.characters( rEvent.msChars );
984 break;
985 case CallbackType::PROCESSING_INSTRUCTION:
986 rEntity.processingInstruction(
987 rEvent.msNamespace, rEvent.msElementName ); // ( target, data )
988 break;
989 case CallbackType::DONE:
990 return false;
991 case CallbackType::EXCEPTION:
992 rEntity.throwException( mxDocumentLocator, false );
993 [[fallthrough]]; // avoid unreachable code warning with some compilers
994 default:
995 assert(false);
996 return false;
999 return true;
1002 void FastSaxParserImpl::pushEntity(const ParserData& rEntityData,
1003 xml::sax::InputSource const& rSource)
1005 if (!rSource.aInputStream.is())
1006 throw SAXException("No input source", Reference<XInterface>(), Any());
1008 maEntities.emplace(rEntityData);
1009 mpTop = &maEntities.top();
1011 mpTop->maStructSource = rSource;
1013 mpTop->maConverter.setInputStream(mpTop->maStructSource.aInputStream);
1014 if (!mpTop->maStructSource.sEncoding.isEmpty())
1016 mpTop->maConverter.setEncoding(OUStringToOString(mpTop->maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US));
1020 void FastSaxParserImpl::popEntity()
1022 maEntities.pop();
1023 mpTop = !maEntities.empty() ? &maEntities.top() : nullptr;
1026 // starts parsing with actual parser !
1027 void FastSaxParserImpl::parse()
1029 const int BUFFER_SIZE = 16 * 1024;
1030 Sequence< sal_Int8 > seqOut( BUFFER_SIZE );
1032 Entity& rEntity = getEntity();
1034 // set all necessary C-Callbacks
1035 static xmlSAXHandler callbacks;
1036 callbacks.startElementNs = call_callbackStartElement;
1037 callbacks.endElementNs = call_callbackEndElement;
1038 callbacks.characters = call_callbackCharacters;
1039 callbacks.processingInstruction = call_callbackProcessingInstruction;
1040 callbacks.initialized = XML_SAX2_MAGIC;
1041 int nRead = 0;
1044 nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE );
1045 if( nRead <= 0 )
1047 if( rEntity.mpParser != nullptr )
1049 if( xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), 0, 1 ) != XML_ERR_OK )
1050 rEntity.throwException( mxDocumentLocator, true );
1051 if (rEntity.hasException())
1052 rEntity.throwException(mxDocumentLocator, true);
1054 break;
1057 bool bContinue = true;
1058 if( rEntity.mpParser == nullptr )
1060 // create parser with proper encoding (needs the first chunk of data)
1061 rEntity.mpParser = xmlCreatePushParserCtxt( &callbacks, this,
1062 reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, nullptr );
1063 if( !rEntity.mpParser )
1064 throw SAXException("Couldn't create parser", Reference< XInterface >(), Any() );
1066 // Tell libxml2 parser to decode entities in attribute values.
1067 // coverity[unsafe_xml_parse_config] - entity support is required
1068 xmlCtxtUseOptions(rEntity.mpParser, XML_PARSE_NOENT);
1070 else
1072 bContinue = xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, 0 )
1073 == XML_ERR_OK;
1076 // callbacks used inside XML_Parse may have caught an exception
1077 if (!bContinue)
1079 rEntity.throwException( mxDocumentLocator, true );
1081 if (rEntity.hasException())
1083 rEntity.throwException( mxDocumentLocator, true );
1085 } while( nRead > 0 );
1086 rEntity.getEvent( CallbackType::DONE );
1087 if( rEntity.mbEnableThreads )
1088 produce( true );
1091 // The C-Callbacks
1092 void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
1093 int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes)
1095 if (!pendingCharacters.empty())
1096 sendPendingCharacters();
1097 Entity& rEntity = getEntity();
1098 if( rEntity.maNamespaceCount.empty() )
1100 rEntity.maNamespaceCount.push(0);
1101 DefineNamespace( "xml", "http://www.w3.org/XML/1998/namespace");
1103 else
1105 rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
1108 // create attribute map and process namespace instructions
1109 Event& rEvent = rEntity.getEvent( CallbackType::START_ELEMENT );
1110 bool bIsAttributesEmpty = false;
1111 if ( rEntity.mbEnableThreads )
1112 bIsAttributesEmpty = rEntity.getEventList().mbIsAttributesEmpty;
1114 if (rEvent.mxAttributes.is())
1116 if( !bIsAttributesEmpty )
1117 rEvent.mxAttributes->clear();
1119 else
1120 rEvent.mxAttributes.set(
1121 new FastAttributeList( rEntity.mxTokenHandler,
1122 rEntity.mpTokenHandler ) );
1124 if( rEntity.mxNamespaceHandler.is() )
1126 if (rEvent.mxDeclAttributes.is())
1128 if( !bIsAttributesEmpty )
1129 rEvent.mxDeclAttributes->clear();
1131 else
1132 rEvent.mxDeclAttributes.set(
1133 new FastAttributeList( rEntity.mxTokenHandler,
1134 rEntity.mpTokenHandler ) );
1137 OUString sNamespace;
1138 sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
1139 if (!rEntity.maNamespaceStack.empty())
1141 sNamespace = rEntity.maNamespaceStack.top().msName;
1142 nNamespaceToken = rEntity.maNamespaceStack.top().mnToken;
1147 /* #158414# Each element may define new namespaces, also for attributes.
1148 First, process all namespaces, second, process the attributes after namespaces
1149 have been initialized. */
1151 // #158414# first: get namespaces
1152 for (int i = 0; i < numNamespaces * 2; i += 2)
1154 // namespaces[] is (prefix/URI)
1155 if( namespaces[ i ] != nullptr )
1157 DefineNamespace( OString( XML_CAST( namespaces[ i ] )),
1158 OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ));
1159 if( rEntity.mxNamespaceHandler.is() )
1160 rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
1162 else
1164 // default namespace
1165 sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
1166 nNamespaceToken = GetNamespaceToken( sNamespace );
1167 if( rEntity.mxNamespaceHandler.is() )
1168 rEvent.mxDeclAttributes->addUnknown( "", OString( XML_CAST( namespaces[ i + 1 ] ) ) );
1172 if ( rEntity.mxTokenHandler.is() )
1174 // #158414# second: fill attribute list with other attributes
1175 rEvent.mxAttributes->reserve( numAttributes );
1176 for (int i = 0; i < numAttributes * 5; i += 5)
1178 // attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd )
1179 if( attributes[ i + 1 ] != nullptr )
1181 sal_Int32 nAttributeToken = GetTokenWithPrefix( attributes[ i + 1 ], strlen( XML_CAST( attributes[ i + 1 ] )), attributes[ i ], strlen( XML_CAST( attributes[ i ] )));
1182 if( nAttributeToken != FastToken::DONTKNOW )
1183 rEvent.mxAttributes->add( nAttributeToken, XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] );
1184 else
1185 addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
1187 else
1189 sal_Int32 nAttributeToken = GetToken( attributes[ i ], strlen( XML_CAST( attributes[ i ] )));
1190 if( nAttributeToken != FastToken::DONTKNOW )
1191 rEvent.mxAttributes->add( nAttributeToken, XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] );
1192 else
1193 rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
1194 OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1198 if( prefix != nullptr )
1199 rEvent.mnElementToken = GetTokenWithPrefix( prefix, strlen( XML_CAST( prefix )), localName, strlen( XML_CAST( localName )));
1200 else if( !sNamespace.isEmpty() )
1201 rEvent.mnElementToken = GetTokenWithContextNamespace( nNamespaceToken, localName, strlen( XML_CAST( localName )));
1202 else
1203 rEvent.mnElementToken = GetToken( localName, strlen( XML_CAST( localName )));
1205 else
1207 for (int i = 0; i < numAttributes * 5; i += 5)
1209 if( attributes[ i + 1 ] != nullptr )
1210 addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
1211 else
1212 rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
1213 OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1216 rEvent.mnElementToken = FastToken::DONTKNOW;
1219 if( rEvent.mnElementToken == FastToken::DONTKNOW )
1221 OUString aElementPrefix;
1222 if( prefix != nullptr )
1224 if ( !m_bIgnoreMissingNSDecl || URI != nullptr )
1225 sNamespace = OUString( XML_CAST( URI ), strlen( XML_CAST( URI )), RTL_TEXTENCODING_UTF8 );
1226 else
1227 sNamespace.clear();
1228 nNamespaceToken = GetNamespaceToken( sNamespace );
1229 aElementPrefix = OUString( XML_CAST( prefix ), strlen( XML_CAST( prefix )), RTL_TEXTENCODING_UTF8 );
1231 const OUString& rElementLocalName = OUString( XML_CAST( localName ), strlen( XML_CAST( localName )), RTL_TEXTENCODING_UTF8 );
1232 rEvent.msNamespace = sNamespace;
1233 rEvent.msElementName = (aElementPrefix.isEmpty())? rElementLocalName : aElementPrefix + ":" + rElementLocalName;
1235 else // token is always preferred.
1236 rEvent.msElementName.clear();
1238 rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
1239 if (rEntity.mbEnableThreads)
1240 produce();
1241 else
1243 SAL_INFO("sax.fastparser", " startElement line " << mxDocumentLocator->getLineNumber() << " column " << mxDocumentLocator->getColumnNumber() << " " << ( prefix ? XML_CAST(prefix) : "(null)" ) << ":" << localName);
1244 rEntity.startElement( &rEvent );
1247 catch (...)
1249 rEntity.saveException( ::cppu::getCaughtException() );
1253 void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes)
1255 OUString aNamespaceURI;
1256 if ( !m_bIgnoreMissingNSDecl || attributes[i + 2] != nullptr )
1257 aNamespaceURI = OUString( XML_CAST( attributes[ i + 2 ] ), strlen( XML_CAST( attributes[ i + 2 ] )), RTL_TEXTENCODING_UTF8 );
1258 const OString& rPrefix = OString( XML_CAST( attributes[ i + 1 ] ));
1259 const OString& rLocalName = OString( XML_CAST( attributes[ i ] ));
1260 OString aQualifiedName = (rPrefix.isEmpty())? rLocalName : rPrefix + ":" + rLocalName;
1261 xAttributes->addUnknown( aNamespaceURI, aQualifiedName,
1262 OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1265 void FastSaxParserImpl::callbackEndElement()
1267 if (!pendingCharacters.empty())
1268 sendPendingCharacters();
1269 Entity& rEntity = getEntity();
1270 SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount");
1271 if( !rEntity.maNamespaceCount.empty() )
1272 rEntity.maNamespaceCount.pop();
1274 SAL_WARN_IF(rEntity.maNamespaceStack.empty(), "sax", "Empty NamespaceStack");
1275 if( !rEntity.maNamespaceStack.empty() )
1276 rEntity.maNamespaceStack.pop();
1278 rEntity.getEvent( CallbackType::END_ELEMENT );
1279 if (rEntity.mbEnableThreads)
1280 produce();
1281 else
1282 rEntity.endElement();
1285 void FastSaxParserImpl::callbackCharacters( const xmlChar* s, int nLen )
1287 // SAX interface allows that the characters callback splits content of one XML node
1288 // (e.g. because there's an entity that needs decoding), however for consumers it's
1289 // simpler FastSaxParser's character callback provides the whole string at once,
1290 // so merge data from possible multiple calls and send them at once (before the element
1291 // ends or another one starts).
1293 // We use a std::vector<char> to avoid calling into the OUString constructor more than once when
1294 // we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly
1295 // often in writer documents.
1296 int nOriginalLen = pendingCharacters.size();
1297 pendingCharacters.resize(nOriginalLen + nLen);
1298 memcpy(pendingCharacters.data() + nOriginalLen, s, nLen);
1301 void FastSaxParserImpl::sendPendingCharacters()
1303 Entity& rEntity = getEntity();
1304 OUString sChars( pendingCharacters.data(), pendingCharacters.size(), RTL_TEXTENCODING_UTF8 );
1305 if (rEntity.mbEnableThreads)
1307 Event& rEvent = rEntity.getEvent( CallbackType::CHARACTERS );
1308 rEvent.msChars = sChars;
1309 produce();
1311 else
1312 rEntity.characters( sChars );
1313 pendingCharacters.resize(0);
1316 void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar *target, const xmlChar *data )
1318 if (!pendingCharacters.empty())
1319 sendPendingCharacters();
1320 Entity& rEntity = getEntity();
1321 Event& rEvent = rEntity.getEvent( CallbackType::PROCESSING_INSTRUCTION );
1323 // This event is very rare, so no need to waste extra space for this
1324 // Using namespace and element strings to be target and data in that order.
1325 rEvent.msNamespace = OUString( XML_CAST( target ), strlen( XML_CAST( target ) ), RTL_TEXTENCODING_UTF8 );
1326 if ( data != nullptr )
1327 rEvent.msElementName = OUString( XML_CAST( data ), strlen( XML_CAST( data ) ), RTL_TEXTENCODING_UTF8 );
1328 else
1329 rEvent.msElementName.clear();
1331 if (rEntity.mbEnableThreads)
1332 produce();
1333 else
1334 rEntity.processingInstruction( rEvent.msNamespace, rEvent.msElementName );
1337 FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl) {}
1339 FastSaxParser::~FastSaxParser()
1343 void SAL_CALL
1344 FastSaxParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments)
1346 if (rArguments.hasElements())
1348 OUString str;
1349 if ( rArguments[0] >>= str )
1351 if ( str == "IgnoreMissingNSDecl" )
1352 mpImpl->m_bIgnoreMissingNSDecl = true;
1353 else if ( str == "DoSmeplease" )
1354 ; //just ignore as this is already immune to billion laughs
1355 else if ( str == "DisableThreadedParser" )
1356 mpImpl->m_bDisableThreadedParser = true;
1357 else
1358 throw IllegalArgumentException();
1360 else
1361 throw IllegalArgumentException();
1365 void FastSaxParser::parseStream( const xml::sax::InputSource& aInputSource )
1367 mpImpl->parseStream(aInputSource);
1370 void FastSaxParser::setFastDocumentHandler( const uno::Reference<xml::sax::XFastDocumentHandler>& Handler )
1372 mpImpl->setFastDocumentHandler(Handler);
1375 void FastSaxParser::setTokenHandler( const uno::Reference<xml::sax::XFastTokenHandler>& Handler )
1377 mpImpl->setTokenHandler(Handler);
1380 void FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
1382 mpImpl->registerNamespace(NamespaceURL, NamespaceToken);
1385 OUString FastSaxParser::getNamespaceURL( const OUString& rPrefix )
1387 return mpImpl->getNamespaceURL(rPrefix);
1390 void FastSaxParser::setErrorHandler( const uno::Reference< xml::sax::XErrorHandler >& Handler )
1392 mpImpl->setErrorHandler(Handler);
1395 void FastSaxParser::setEntityResolver( const uno::Reference< xml::sax::XEntityResolver >& )
1397 // not implemented
1400 void FastSaxParser::setLocale( const lang::Locale& )
1402 // not implemented
1405 void FastSaxParser::setNamespaceHandler( const uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler)
1407 mpImpl->setNamespaceHandler(Handler);
1410 OUString FastSaxParser::getImplementationName()
1412 return "com.sun.star.comp.extensions.xml.sax.FastParser";
1415 sal_Bool FastSaxParser::supportsService( const OUString& ServiceName )
1417 return cppu::supportsService(this, ServiceName);
1420 uno::Sequence<OUString> FastSaxParser::getSupportedServiceNames()
1422 Sequence<OUString> seq { "com.sun.star.xml.sax.FastParser" };
1423 return seq;
1426 } // namespace sax_fastparser
1428 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
1429 com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation(
1430 css::uno::XComponentContext *,
1431 css::uno::Sequence<css::uno::Any> const &)
1433 return cppu::acquire(new FastSaxParser);
1436 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */