1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sax/fastparser.hxx>
21 #include <sax/fastattribs.hxx>
22 #include <xml2utf.hxx>
24 #include <com/sun/star/io/XSeekable.hpp>
25 #include <com/sun/star/lang/DisposedException.hpp>
26 #include <com/sun/star/lang/IllegalArgumentException.hpp>
27 #include <com/sun/star/uno/XComponentContext.hpp>
28 #include <com/sun/star/xml/sax/FastToken.hpp>
29 #include <com/sun/star/xml/sax/SAXParseException.hpp>
30 #include <com/sun/star/xml/sax/XFastContextHandler.hpp>
31 #include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
32 #include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
33 #include <cppuhelper/implbase.hxx>
34 #include <cppuhelper/supportsservice.hxx>
35 #include <cppuhelper/exc_hlp.hxx>
36 #include <osl/conditn.hxx>
37 #include <rtl/ref.hxx>
38 #include <rtl/ustrbuf.hxx>
39 #include <sal/log.hxx>
40 #include <salhelper/thread.hxx>
41 #include <tools/diagnose_ex.h>
46 #include <unordered_map>
50 #include <libxml/parser.h>
52 // Inverse of libxml's BAD_CAST.
53 #define XML_CAST( str ) reinterpret_cast< const sal_Char* >( str )
56 using namespace ::osl
;
57 using namespace ::cppu
;
58 using namespace ::com::sun::star::uno
;
59 using namespace ::com::sun::star::lang
;
60 using namespace ::com::sun::star::xml::sax
;
61 using namespace ::com::sun::star::io
;
62 using namespace com::sun::star
;
63 using namespace sax_fastparser
;
68 class FastLocatorImpl
;
69 struct NamespaceDefine
;
72 typedef std::unordered_map
< OUString
, sal_Int32
> NamespaceMap
;
76 std::vector
<Event
> maEvents
;
77 bool mbIsAttributesEmpty
;
80 enum class CallbackType
{ START_ELEMENT
, END_ELEMENT
, CHARACTERS
, PROCESSING_INSTRUCTION
, DONE
, EXCEPTION
};
85 sal_Int32 mnElementToken
;
87 OUString msElementName
;
88 rtl::Reference
< FastAttributeList
> mxAttributes
;
89 rtl::Reference
< FastAttributeList
> mxDeclAttributes
;
95 OUString
const msName
;
96 sal_Int32
const mnToken
;
98 NameWithToken(const OUString
& sName
, sal_Int32 nToken
) :
99 msName(sName
), mnToken(nToken
) {}
104 Reference
< XFastContextHandler
> mxContext
;
105 sal_Int32
const mnElementToken
;
106 OUString maNamespace
;
107 OUString maElementName
;
109 SaxContext( sal_Int32 nElementToken
, const OUString
& aNamespace
, const OUString
& aElementName
):
110 mnElementToken(nElementToken
)
112 if (nElementToken
== FastToken::DONTKNOW
)
114 maNamespace
= aNamespace
;
115 maElementName
= aElementName
;
123 css::uno::Reference
< css::xml::sax::XFastDocumentHandler
> mxDocumentHandler
;
124 css::uno::Reference
< css::xml::sax::XFastTokenHandler
> mxTokenHandler
;
125 FastTokenHandlerBase
* mpTokenHandler
;
126 css::uno::Reference
< css::xml::sax::XErrorHandler
> mxErrorHandler
;
127 css::uno::Reference
< css::xml::sax::XFastNamespaceHandler
>mxNamespaceHandler
;
132 struct NamespaceDefine
136 OUString maNamespaceURL
;
138 NamespaceDefine( const OString
& rPrefix
, sal_Int32 nToken
, const OUString
& rNamespaceURL
) : maPrefix( rPrefix
), mnToken( nToken
), maNamespaceURL( rNamespaceURL
) {}
139 NamespaceDefine() : mnToken(-1) {}
142 // Entity binds all information needed for a single file | single call of parseStream
143 struct Entity
: public ParserData
145 // Amount of work producer sends to consumer in one iteration:
146 static const size_t mnEventListSize
= 1000;
148 // unique for each Entity instance:
150 // Number of valid events in mxProducedEvents:
151 size_t mnProducedEventsSize
;
152 std::unique_ptr
<EventList
> mxProducedEvents
;
153 std::queue
<std::unique_ptr
<EventList
>> maPendingEvents
;
154 std::queue
<std::unique_ptr
<EventList
>> maUsedEvents
;
155 osl::Mutex maEventProtector
;
157 static const size_t mnEventLowWater
= 4;
158 static const size_t mnEventHighWater
= 8;
159 osl::Condition maConsumeResume
;
160 osl::Condition maProduceResume
;
161 // Event we use to store data if threading is disabled:
164 // copied in copy constructor:
166 // Allow to disable threading for small documents:
167 bool mbEnableThreads
;
168 css::xml::sax::InputSource maStructSource
;
169 xmlParserCtxtPtr mpParser
;
170 ::sax_expatwrap::XMLFile2UTFConverter maConverter
;
172 // Exceptions cannot be thrown through the C-XmlParser (possible
173 // resource leaks), therefore any exception thrown by a UNO callback
174 // must be saved somewhere until the C-XmlParser is stopped.
175 css::uno::Any maSavedException
;
176 osl::Mutex maSavedExceptionMutex
;
177 void saveException( const Any
& e
);
178 // Thread-safe check if maSavedException has value
180 void throwException( const ::rtl::Reference
< FastLocatorImpl
> &xDocumentLocator
,
181 bool mbDuringParse
);
183 std::stack
< NameWithToken
, std::vector
<NameWithToken
> > maNamespaceStack
;
184 /* Context for main thread consuming events.
185 * startElement() stores the data, which characters() and endElement() uses
187 std::stack
< SaxContext
, std::vector
<SaxContext
> > maContextStack
;
188 // Determines which elements of maNamespaceDefines are valid in current context
189 std::stack
< sal_uInt32
, std::vector
<sal_uInt32
> > maNamespaceCount
;
190 std::vector
< NamespaceDefine
> maNamespaceDefines
;
192 explicit Entity( const ParserData
& rData
);
193 Entity( const Entity
& rEntity
) = delete;
194 Entity
& operator=( const Entity
& rEntity
) = delete;
195 void startElement( Event
const *pEvent
);
196 void characters( const OUString
& sChars
);
198 void processingInstruction( const OUString
& rTarget
, const OUString
& rData
);
199 EventList
& getEventList();
200 Event
& getEvent( CallbackType aType
);
205 namespace sax_fastparser
{
207 class FastSaxParserImpl
210 explicit FastSaxParserImpl();
211 ~FastSaxParserImpl();
214 /// @throws css::xml::sax::SAXException
215 /// @throws css::io::IOException
216 /// @throws css::uno::RuntimeException
217 void parseStream( const css::xml::sax::InputSource
& aInputSource
);
218 /// @throws css::uno::RuntimeException
219 void setFastDocumentHandler( const css::uno::Reference
< css::xml::sax::XFastDocumentHandler
>& Handler
);
220 /// @throws css::uno::RuntimeException
221 void setTokenHandler( const css::uno::Reference
< css::xml::sax::XFastTokenHandler
>& Handler
);
222 /// @throws css::lang::IllegalArgumentException
223 /// @throws css::uno::RuntimeException
224 void registerNamespace( const OUString
& NamespaceURL
, sal_Int32 NamespaceToken
);
225 /// @throws css::lang::IllegalArgumentException
226 /// @throws css::uno::RuntimeException
227 OUString
const & getNamespaceURL( const OUString
& rPrefix
);
228 /// @throws css::uno::RuntimeException
229 void setErrorHandler( const css::uno::Reference
< css::xml::sax::XErrorHandler
>& Handler
);
230 /// @throws css::uno::RuntimeException
231 void setNamespaceHandler( const css::uno::Reference
< css::xml::sax::XFastNamespaceHandler
>& Handler
);
233 // called by the C callbacks of the expat parser
234 void callbackStartElement( const xmlChar
*localName
, const xmlChar
* prefix
, const xmlChar
* URI
,
235 int numNamespaces
, const xmlChar
** namespaces
, int numAttributes
, const xmlChar
**attributes
);
236 void callbackEndElement();
237 void callbackCharacters( const xmlChar
* s
, int nLen
);
238 void callbackProcessingInstruction( const xmlChar
*target
, const xmlChar
*data
);
240 void pushEntity(const ParserData
&, xml::sax::InputSource
const&);
242 Entity
& getEntity() { return *mpTop
; }
244 void produce( bool bForceFlush
= false );
245 bool m_bIgnoreMissingNSDecl
;
246 bool m_bDisableThreadedParser
;
249 bool consume(EventList
&);
250 void deleteUsedEvents();
251 void sendPendingCharacters();
252 void addUnknownElementWithPrefix(const xmlChar
**attributes
, int i
, rtl::Reference
< FastAttributeList
> const & xAttributes
);
254 sal_Int32
GetToken( const xmlChar
* pName
, sal_Int32 nameLen
);
255 /// @throws css::xml::sax::SAXException
256 sal_Int32
GetTokenWithPrefix( const xmlChar
* pPrefix
, int prefixLen
, const xmlChar
* pName
, int nameLen
);
257 /// @throws css::xml::sax::SAXException
258 OUString
const & GetNamespaceURL( const OString
& rPrefix
);
259 sal_Int32
GetNamespaceToken( const OUString
& rNamespaceURL
);
260 sal_Int32
GetTokenWithContextNamespace( sal_Int32 nNamespaceToken
, const xmlChar
* pName
, int nNameLen
);
261 void DefineNamespace( const OString
& rPrefix
, const OUString
& namespaceURL
);
264 osl::Mutex maMutex
; ///< Protecting whole parseStream() execution
265 ::rtl::Reference
< FastLocatorImpl
> mxDocumentLocator
;
266 NamespaceMap maNamespaceMap
;
268 ParserData maData
; /// Cached parser configuration for next call of parseStream().
270 Entity
*mpTop
; /// std::stack::top() is amazingly slow => cache this.
271 std::stack
< Entity
> maEntities
; /// Entity stack for each call of parseStream().
272 std::vector
<char> pendingCharacters
; /// Data from characters() callback that needs to be sent.
275 } // namespace sax_fastparser
279 class ParserThread
: public salhelper::Thread
281 FastSaxParserImpl
*mpParser
;
283 explicit ParserThread(FastSaxParserImpl
*pParser
): Thread("Parser"), mpParser(pParser
) {}
285 virtual void execute() override
293 Entity
&rEntity
= mpParser
->getEntity();
294 rEntity
.getEvent( CallbackType::EXCEPTION
);
295 mpParser
->produce( true );
302 static void call_callbackStartElement(void *userData
, const xmlChar
*localName
, const xmlChar
* prefix
, const xmlChar
* URI
,
303 int numNamespaces
, const xmlChar
** namespaces
, int numAttributes
, int /*defaultedAttributes*/, const xmlChar
**attributes
)
305 FastSaxParserImpl
* pFastParser
= static_cast<FastSaxParserImpl
*>( userData
);
306 pFastParser
->callbackStartElement( localName
, prefix
, URI
, numNamespaces
, namespaces
, numAttributes
, attributes
);
309 static void call_callbackEndElement(void *userData
, const xmlChar
* /*localName*/, const xmlChar
* /*prefix*/, const xmlChar
* /*URI*/)
311 FastSaxParserImpl
* pFastParser
= static_cast<FastSaxParserImpl
*>( userData
);
312 pFastParser
->callbackEndElement();
315 static void call_callbackCharacters( void *userData
, const xmlChar
*s
, int nLen
)
317 FastSaxParserImpl
* pFastParser
= static_cast<FastSaxParserImpl
*>( userData
);
318 pFastParser
->callbackCharacters( s
, nLen
);
321 static void call_callbackProcessingInstruction( void *userData
, const xmlChar
*target
, const xmlChar
*data
)
323 FastSaxParserImpl
* pFastParser
= static_cast<FastSaxParserImpl
*>( userData
);
324 pFastParser
->callbackProcessingInstruction( target
, data
);
329 class FastLocatorImpl
: public WeakImplHelper
< XLocator
>
332 explicit FastLocatorImpl(FastSaxParserImpl
*p
) : mpParser(p
) {}
334 void dispose() { mpParser
= nullptr; }
335 /// @throws RuntimeException
336 void checkDispose() const { if( !mpParser
) throw DisposedException(); }
339 virtual sal_Int32 SAL_CALL
getColumnNumber() override
;
340 virtual sal_Int32 SAL_CALL
getLineNumber() override
;
341 virtual OUString SAL_CALL
getPublicId() override
;
342 virtual OUString SAL_CALL
getSystemId() override
;
345 FastSaxParserImpl
*mpParser
;
348 sal_Int32 SAL_CALL
FastLocatorImpl::getColumnNumber()
351 return xmlSAX2GetColumnNumber( mpParser
->getEntity().mpParser
);
354 sal_Int32 SAL_CALL
FastLocatorImpl::getLineNumber()
357 return xmlSAX2GetLineNumber( mpParser
->getEntity().mpParser
);
360 OUString SAL_CALL
FastLocatorImpl::getPublicId()
363 return mpParser
->getEntity().maStructSource
.sPublicId
;
366 OUString SAL_CALL
FastLocatorImpl::getSystemId()
369 return mpParser
->getEntity().maStructSource
.sSystemId
;
372 ParserData::ParserData()
373 : mpTokenHandler( nullptr )
376 Entity::Entity(const ParserData
& rData
)
378 , mnProducedEventsSize(0)
380 , mbEnableThreads(false)
385 void Entity::startElement( Event
const *pEvent
)
387 const sal_Int32
& nElementToken
= pEvent
->mnElementToken
;
388 const OUString
& aNamespace
= pEvent
->msNamespace
;
389 const OUString
& aElementName
= pEvent
->msElementName
;
391 // Use un-wrapped pointers to avoid significant acquire/release overhead
392 XFastContextHandler
*pParentContext
= nullptr;
393 if( !maContextStack
.empty() )
395 pParentContext
= maContextStack
.top().mxContext
.get();
396 if( !pParentContext
)
398 maContextStack
.push( SaxContext(nElementToken
, aNamespace
, aElementName
) );
403 maContextStack
.push( SaxContext( nElementToken
, aNamespace
, aElementName
) );
407 Reference
< XFastAttributeList
> xAttr( pEvent
->mxAttributes
.get() );
408 Reference
< XFastContextHandler
> xContext
;
410 if ( mxNamespaceHandler
.is() )
412 const Sequence
< xml::Attribute
> NSDeclAttribs
= pEvent
->mxDeclAttributes
->getUnknownAttributes();
413 for (const auto& rNSDeclAttrib
: NSDeclAttribs
)
415 mxNamespaceHandler
->registerNamespace( rNSDeclAttrib
.Name
, rNSDeclAttrib
.Value
);
419 if( nElementToken
== FastToken::DONTKNOW
)
422 xContext
= pParentContext
->createUnknownChildContext( aNamespace
, aElementName
, xAttr
);
423 else if( mxDocumentHandler
.is() )
424 xContext
= mxDocumentHandler
->createUnknownChildContext( aNamespace
, aElementName
, xAttr
);
428 xContext
->startUnknownElement( aNamespace
, aElementName
, xAttr
);
434 xContext
= pParentContext
->createFastChildContext( nElementToken
, xAttr
);
435 else if( mxDocumentHandler
.is() )
436 xContext
= mxDocumentHandler
->createFastChildContext( nElementToken
, xAttr
);
439 xContext
->startFastElement( nElementToken
, xAttr
);
441 // swap the reference we own in to avoid referencing thrash.
442 maContextStack
.top().mxContext
= std::move( xContext
);
446 saveException( ::cppu::getCaughtException() );
450 void Entity::characters( const OUString
& sChars
)
452 if (maContextStack
.empty())
454 // Malformed XML stream !?
458 XFastContextHandler
* pContext( maContextStack
.top().mxContext
.get() );
461 pContext
->characters( sChars
);
465 saveException( ::cppu::getCaughtException() );
469 void Entity::endElement()
471 if (maContextStack
.empty())
473 // Malformed XML stream !?
477 const SaxContext
& aContext
= maContextStack
.top();
478 XFastContextHandler
* pContext( aContext
.mxContext
.get() );
482 sal_Int32 nElementToken
= aContext
.mnElementToken
;
483 if( nElementToken
!= FastToken::DONTKNOW
)
484 pContext
->endFastElement( nElementToken
);
486 pContext
->endUnknownElement( aContext
.maNamespace
, aContext
.maElementName
);
490 saveException( ::cppu::getCaughtException() );
492 maContextStack
.pop();
495 void Entity::processingInstruction( const OUString
& rTarget
, const OUString
& rData
)
497 if( mxDocumentHandler
.is() ) try
499 mxDocumentHandler
->processingInstruction( rTarget
, rData
);
503 saveException( ::cppu::getCaughtException() );
507 EventList
& Entity::getEventList()
509 if (!mxProducedEvents
)
511 osl::ClearableMutexGuard
aGuard(maEventProtector
);
512 if (!maUsedEvents
.empty())
514 mxProducedEvents
= std::move(maUsedEvents
.front());
516 aGuard
.clear(); // unlock
517 mnProducedEventsSize
= 0;
519 if (!mxProducedEvents
)
521 mxProducedEvents
.reset(new EventList
);
522 mxProducedEvents
->maEvents
.resize(mnEventListSize
);
523 mxProducedEvents
->mbIsAttributesEmpty
= false;
524 mnProducedEventsSize
= 0;
527 return *mxProducedEvents
;
530 Event
& Entity::getEvent( CallbackType aType
)
532 if (!mbEnableThreads
)
533 return maSharedEvent
;
535 EventList
& rEventList
= getEventList();
536 if (mnProducedEventsSize
== rEventList
.maEvents
.size())
538 SAL_WARN_IF(!maSavedException
.hasValue(), "sax",
539 "Event vector should only exceed " << mnEventListSize
<<
540 " temporarily while an exception is pending");
541 rEventList
.maEvents
.resize(mnProducedEventsSize
+ 1);
543 Event
& rEvent
= rEventList
.maEvents
[mnProducedEventsSize
++];
544 rEvent
.maType
= aType
;
548 OUString
lclGetErrorMessage( xmlParserCtxtPtr ctxt
, const OUString
& sSystemId
, sal_Int32 nLine
)
550 const sal_Char
* pMessage
;
551 xmlErrorPtr error
= xmlCtxtGetLastError( ctxt
);
552 if( error
&& error
->message
)
553 pMessage
= error
->message
;
555 pMessage
= "unknown error";
556 OUStringBuffer
aBuffer( 128 );
557 aBuffer
.append( "[" );
558 aBuffer
.append( sSystemId
);
559 aBuffer
.append( " line " );
560 aBuffer
.append( nLine
);
561 aBuffer
.append( "]: " );
562 aBuffer
.appendAscii( pMessage
);
563 return aBuffer
.makeStringAndClear();
566 // throw an exception, but avoid callback if
567 // during a threaded produce
568 void Entity::throwException( const ::rtl::Reference
< FastLocatorImpl
> &xDocumentLocator
,
571 // Error during parsing !
574 osl::MutexGuard
g(maSavedExceptionMutex
);
575 if (maSavedException
.hasValue())
577 savedException
.setValue(&maSavedException
, cppu::UnoType
<decltype(maSavedException
)>::get());
580 SAXParseException
aExcept(
581 lclGetErrorMessage( mpParser
,
582 xDocumentLocator
->getSystemId(),
583 xDocumentLocator
->getLineNumber() ),
584 Reference
< XInterface
>(),
586 xDocumentLocator
->getPublicId(),
587 xDocumentLocator
->getSystemId(),
588 xDocumentLocator
->getLineNumber(),
589 xDocumentLocator
->getColumnNumber()
592 // error handler is set, it may throw the exception
593 if( !mbDuringParse
|| !mbEnableThreads
)
595 if (mxErrorHandler
.is() )
596 mxErrorHandler
->fatalError( Any( aExcept
) );
599 // error handler has not thrown, but parsing must stop => throw ourselves
603 // In the single threaded case we emit events via our C
604 // callbacks, so any exception caught must be queued up until
605 // we can safely re-throw it from our C++ parent of parse()
607 // If multi-threaded, we need to push an EXCEPTION event, at
608 // which point we transfer ownership of maSavedException to
609 // the consuming thread.
610 void Entity::saveException( const Any
& e
)
612 // fdo#81214 - allow the parser to run on after an exception,
613 // unexpectedly some 'startElements' produce a UNO_QUERY_THROW
614 // for XComponent; and yet expect to continue parsing.
615 SAL_WARN("sax", "Unexpected exception from XML parser " << exceptionToString(e
));
616 osl::MutexGuard
g(maSavedExceptionMutex
);
617 if (maSavedException
.hasValue())
619 SAL_INFO("sax.fastparser", "discarding exception, already have one");
623 maSavedException
= e
;
627 bool Entity::hasException()
629 osl::MutexGuard
g(maSavedExceptionMutex
);
630 return maSavedException
.hasValue();
635 namespace sax_fastparser
{
637 FastSaxParserImpl::FastSaxParserImpl() :
638 m_bIgnoreMissingNSDecl(false),
639 m_bDisableThreadedParser(false),
642 mxDocumentLocator
.set( new FastLocatorImpl( this ) );
645 FastSaxParserImpl::~FastSaxParserImpl()
647 if( mxDocumentLocator
.is() )
648 mxDocumentLocator
->dispose();
651 void FastSaxParserImpl::DefineNamespace( const OString
& rPrefix
, const OUString
& namespaceURL
)
653 Entity
& rEntity
= getEntity();
654 assert(!rEntity
.maNamespaceCount
.empty()); // need a context!
656 sal_uInt32 nOffset
= rEntity
.maNamespaceCount
.top()++;
657 if( rEntity
.maNamespaceDefines
.size() <= nOffset
)
658 rEntity
.maNamespaceDefines
.resize( rEntity
.maNamespaceDefines
.size() + 64 );
660 rEntity
.maNamespaceDefines
[nOffset
] = NamespaceDefine( rPrefix
, GetNamespaceToken( namespaceURL
), namespaceURL
);
663 sal_Int32
FastSaxParserImpl::GetToken( const xmlChar
* pName
, sal_Int32 nameLen
/* = 0 */ )
665 return FastTokenHandlerBase::getTokenFromChars( getEntity().mxTokenHandler
,
666 getEntity().mpTokenHandler
,
667 XML_CAST( pName
), nameLen
); // uses utf-8
670 sal_Int32
FastSaxParserImpl::GetTokenWithPrefix( const xmlChar
* pPrefix
, int nPrefixLen
, const xmlChar
* pName
, int nNameLen
)
672 sal_Int32 nNamespaceToken
= FastToken::DONTKNOW
;
674 Entity
& rEntity
= getEntity();
675 if (rEntity
.maNamespaceCount
.empty())
676 return nNamespaceToken
;
678 sal_uInt32 nNamespace
= rEntity
.maNamespaceCount
.top();
679 while( nNamespace
-- )
681 const auto & rNamespaceDefine
= rEntity
.maNamespaceDefines
[nNamespace
];
682 const OString
& rPrefix( rNamespaceDefine
.maPrefix
);
683 if( (rPrefix
.getLength() == nPrefixLen
) &&
684 rtl_str_reverseCompare_WithLength(rPrefix
.pData
->buffer
, rPrefix
.pData
->length
, XML_CAST( pPrefix
), nPrefixLen
) == 0 )
686 nNamespaceToken
= rNamespaceDefine
.mnToken
;
690 if( !nNamespace
&& !m_bIgnoreMissingNSDecl
)
691 throw SAXException("No namespace defined for " + OUString(XML_CAST(pPrefix
),
692 nPrefixLen
, RTL_TEXTENCODING_UTF8
), Reference
< XInterface
>(), Any());
695 if( nNamespaceToken
!= FastToken::DONTKNOW
)
697 sal_Int32 nNameToken
= GetToken( pName
, nNameLen
);
698 if( nNameToken
!= FastToken::DONTKNOW
)
699 return nNamespaceToken
| nNameToken
;
702 return FastToken::DONTKNOW
;
705 sal_Int32
FastSaxParserImpl::GetNamespaceToken( const OUString
& rNamespaceURL
)
707 NamespaceMap::iterator
aIter( maNamespaceMap
.find( rNamespaceURL
) );
708 if( aIter
!= maNamespaceMap
.end() )
709 return (*aIter
).second
;
711 return FastToken::DONTKNOW
;
714 OUString
const & FastSaxParserImpl::GetNamespaceURL( const OString
& rPrefix
)
716 Entity
& rEntity
= getEntity();
717 if( !rEntity
.maNamespaceCount
.empty() )
719 sal_uInt32 nNamespace
= rEntity
.maNamespaceCount
.top();
720 while( nNamespace
-- )
721 if( rEntity
.maNamespaceDefines
[nNamespace
].maPrefix
== rPrefix
)
722 return rEntity
.maNamespaceDefines
[nNamespace
].maNamespaceURL
;
725 throw SAXException("No namespace defined for " + OUString::fromUtf8(rPrefix
),
726 Reference
< XInterface
>(), Any());
729 sal_Int32
FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken
, const xmlChar
* pName
, int nNameLen
)
731 if( nNamespaceToken
!= FastToken::DONTKNOW
)
733 sal_Int32 nNameToken
= GetToken( pName
, nNameLen
);
734 if( nNameToken
!= FastToken::DONTKNOW
)
735 return nNamespaceToken
| nNameToken
;
738 return FastToken::DONTKNOW
;
746 FastSaxParserImpl
& m_rParser
;
748 rtl::Reference
<ParserThread
> m_xParser
;
750 ParserCleanup(FastSaxParserImpl
& rParser
, Entity
& rEntity
)
757 if (m_rEntity
.mpParser
)
759 if (m_rEntity
.mpParser
->myDoc
)
760 xmlFreeDoc(m_rEntity
.mpParser
->myDoc
);
761 xmlFreeParserCtxt(m_rEntity
.mpParser
);
764 m_rParser
.popEntity();
766 void setThread(const rtl::Reference
<ParserThread
> &xParser
)
774 rtl::Reference
<ParserThread
> xToJoin
= m_xParser
;
783 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
784 * the file-specific initialization work. (During a parser run, external files may be opened)
787 void FastSaxParserImpl::parseStream(const InputSource
& rStructSource
)
791 // Only one text at one time
792 MutexGuard
guard( maMutex
);
794 pushEntity(maData
, rStructSource
);
795 Entity
& rEntity
= getEntity();
796 ParserCleanup
aEnsureFree(*this, rEntity
);
798 // start the document
799 if( rEntity
.mxDocumentHandler
.is() )
801 Reference
< XLocator
> xLoc( mxDocumentLocator
.get() );
802 rEntity
.mxDocumentHandler
->setDocumentLocator( xLoc
);
803 rEntity
.mxDocumentHandler
->startDocument();
806 if (!getenv("SAX_DISABLE_THREADS") && !m_bDisableThreadedParser
)
808 Reference
<css::io::XSeekable
> xSeekable(rEntity
.maStructSource
.aInputStream
, UNO_QUERY
);
809 // available() is not __really__ relevant here, but leave it in as a heuristic for non-seekable streams
810 rEntity
.mbEnableThreads
= (xSeekable
.is() && xSeekable
->getLength() > 10000)
811 || (rEntity
.maStructSource
.aInputStream
->available() > 10000);
814 if (rEntity
.mbEnableThreads
)
816 rtl::Reference
<ParserThread
> xParser
= new ParserThread(this);
818 aEnsureFree
.setThread(xParser
);
821 rEntity
.maConsumeResume
.wait();
822 rEntity
.maConsumeResume
.reset();
824 osl::ResettableMutexGuard
aGuard(rEntity
.maEventProtector
);
825 while (!rEntity
.maPendingEvents
.empty())
827 if (rEntity
.maPendingEvents
.size() <= Entity::mnEventLowWater
)
828 rEntity
.maProduceResume
.set(); // start producer again
830 std::unique_ptr
<EventList
> xEventList
= std::move(rEntity
.maPendingEvents
.front());
831 rEntity
.maPendingEvents
.pop();
832 aGuard
.clear(); // unlock
834 if (!consume(*xEventList
))
837 aGuard
.reset(); // lock
839 if ( rEntity
.maPendingEvents
.size() <= Entity::mnEventLowWater
)
842 for (auto& rEvent
: xEventList
->maEvents
)
844 if (rEvent
.mxAttributes
.is())
846 rEvent
.mxAttributes
->clear();
847 if( rEntity
.mxNamespaceHandler
.is() )
848 rEvent
.mxDeclAttributes
->clear();
850 xEventList
->mbIsAttributesEmpty
= true;
855 rEntity
.maUsedEvents
.push(std::move(xEventList
));
858 aEnsureFree
.joinThread();
861 // callbacks used inside XML_Parse may have caught an exception
862 // No need to lock maSavedExceptionMutex here because parser
864 if( rEntity
.maSavedException
.hasValue() )
865 rEntity
.throwException( mxDocumentLocator
, true );
873 if( rEntity
.mxDocumentHandler
.is() )
875 rEntity
.mxDocumentHandler
->endDocument();
879 void FastSaxParserImpl::setFastDocumentHandler( const Reference
< XFastDocumentHandler
>& Handler
)
881 maData
.mxDocumentHandler
= Handler
;
884 void FastSaxParserImpl::setTokenHandler( const Reference
< XFastTokenHandler
>& xHandler
)
886 maData
.mxTokenHandler
= xHandler
;
887 maData
.mpTokenHandler
= dynamic_cast< FastTokenHandlerBase
*>( xHandler
.get() );
890 void FastSaxParserImpl::registerNamespace( const OUString
& NamespaceURL
, sal_Int32 NamespaceToken
)
892 if( NamespaceToken
>= FastToken::NAMESPACE
)
894 if( GetNamespaceToken( NamespaceURL
) == FastToken::DONTKNOW
)
896 maNamespaceMap
[ NamespaceURL
] = NamespaceToken
;
900 throw IllegalArgumentException();
903 OUString
const & FastSaxParserImpl::getNamespaceURL( const OUString
& rPrefix
)
907 return GetNamespaceURL( OUStringToOString( rPrefix
, RTL_TEXTENCODING_UTF8
) );
909 catch (const Exception
&)
912 throw IllegalArgumentException();
915 void FastSaxParserImpl::setErrorHandler(const Reference
< XErrorHandler
> & Handler
)
917 maData
.mxErrorHandler
= Handler
;
920 void FastSaxParserImpl::setNamespaceHandler( const Reference
< XFastNamespaceHandler
>& Handler
)
922 maData
.mxNamespaceHandler
= Handler
;
925 void FastSaxParserImpl::deleteUsedEvents()
927 Entity
& rEntity
= getEntity();
928 osl::ResettableMutexGuard
aGuard(rEntity
.maEventProtector
);
930 while (!rEntity
.maUsedEvents
.empty())
932 std::unique_ptr
<EventList
> xEventList
= std::move(rEntity
.maUsedEvents
.front());
933 rEntity
.maUsedEvents
.pop();
935 aGuard
.clear(); // unlock
939 aGuard
.reset(); // lock
943 void FastSaxParserImpl::produce( bool bForceFlush
)
945 Entity
& rEntity
= getEntity();
947 rEntity
.mnProducedEventsSize
>= Entity::mnEventListSize
)
949 osl::ResettableMutexGuard
aGuard(rEntity
.maEventProtector
);
951 while (rEntity
.maPendingEvents
.size() >= Entity::mnEventHighWater
)
952 { // pause parsing for a bit
953 aGuard
.clear(); // unlock
954 rEntity
.maProduceResume
.wait();
955 rEntity
.maProduceResume
.reset();
956 aGuard
.reset(); // lock
959 rEntity
.maPendingEvents
.push(std::move(rEntity
.mxProducedEvents
));
960 assert(rEntity
.mxProducedEvents
.get() == nullptr);
962 aGuard
.clear(); // unlock
964 rEntity
.maConsumeResume
.set();
968 bool FastSaxParserImpl::consume(EventList
& rEventList
)
970 Entity
& rEntity
= getEntity();
971 rEventList
.mbIsAttributesEmpty
= false;
972 for (auto& rEvent
: rEventList
.maEvents
)
974 switch (rEvent
.maType
)
976 case CallbackType::START_ELEMENT
:
977 rEntity
.startElement( &rEvent
);
979 case CallbackType::END_ELEMENT
:
980 rEntity
.endElement();
982 case CallbackType::CHARACTERS
:
983 rEntity
.characters( rEvent
.msChars
);
985 case CallbackType::PROCESSING_INSTRUCTION
:
986 rEntity
.processingInstruction(
987 rEvent
.msNamespace
, rEvent
.msElementName
); // ( target, data )
989 case CallbackType::DONE
:
991 case CallbackType::EXCEPTION
:
992 rEntity
.throwException( mxDocumentLocator
, false );
993 [[fallthrough
]]; // avoid unreachable code warning with some compilers
1002 void FastSaxParserImpl::pushEntity(const ParserData
& rEntityData
,
1003 xml::sax::InputSource
const& rSource
)
1005 if (!rSource
.aInputStream
.is())
1006 throw SAXException("No input source", Reference
<XInterface
>(), Any());
1008 maEntities
.emplace(rEntityData
);
1009 mpTop
= &maEntities
.top();
1011 mpTop
->maStructSource
= rSource
;
1013 mpTop
->maConverter
.setInputStream(mpTop
->maStructSource
.aInputStream
);
1014 if (!mpTop
->maStructSource
.sEncoding
.isEmpty())
1016 mpTop
->maConverter
.setEncoding(OUStringToOString(mpTop
->maStructSource
.sEncoding
, RTL_TEXTENCODING_ASCII_US
));
1020 void FastSaxParserImpl::popEntity()
1023 mpTop
= !maEntities
.empty() ? &maEntities
.top() : nullptr;
1026 // starts parsing with actual parser !
1027 void FastSaxParserImpl::parse()
1029 const int BUFFER_SIZE
= 16 * 1024;
1030 Sequence
< sal_Int8
> seqOut( BUFFER_SIZE
);
1032 Entity
& rEntity
= getEntity();
1034 // set all necessary C-Callbacks
1035 static xmlSAXHandler callbacks
;
1036 callbacks
.startElementNs
= call_callbackStartElement
;
1037 callbacks
.endElementNs
= call_callbackEndElement
;
1038 callbacks
.characters
= call_callbackCharacters
;
1039 callbacks
.processingInstruction
= call_callbackProcessingInstruction
;
1040 callbacks
.initialized
= XML_SAX2_MAGIC
;
1044 nRead
= rEntity
.maConverter
.readAndConvert( seqOut
, BUFFER_SIZE
);
1047 if( rEntity
.mpParser
!= nullptr )
1049 if( xmlParseChunk( rEntity
.mpParser
, reinterpret_cast<const char*>(seqOut
.getConstArray()), 0, 1 ) != XML_ERR_OK
)
1050 rEntity
.throwException( mxDocumentLocator
, true );
1051 if (rEntity
.hasException())
1052 rEntity
.throwException(mxDocumentLocator
, true);
1057 bool bContinue
= true;
1058 if( rEntity
.mpParser
== nullptr )
1060 // create parser with proper encoding (needs the first chunk of data)
1061 rEntity
.mpParser
= xmlCreatePushParserCtxt( &callbacks
, this,
1062 reinterpret_cast<const char*>(seqOut
.getConstArray()), nRead
, nullptr );
1063 if( !rEntity
.mpParser
)
1064 throw SAXException("Couldn't create parser", Reference
< XInterface
>(), Any() );
1066 // Tell libxml2 parser to decode entities in attribute values.
1067 // coverity[unsafe_xml_parse_config] - entity support is required
1068 xmlCtxtUseOptions(rEntity
.mpParser
, XML_PARSE_NOENT
);
1072 bContinue
= xmlParseChunk( rEntity
.mpParser
, reinterpret_cast<const char*>(seqOut
.getConstArray()), nRead
, 0 )
1076 // callbacks used inside XML_Parse may have caught an exception
1079 rEntity
.throwException( mxDocumentLocator
, true );
1081 if (rEntity
.hasException())
1083 rEntity
.throwException( mxDocumentLocator
, true );
1085 } while( nRead
> 0 );
1086 rEntity
.getEvent( CallbackType::DONE
);
1087 if( rEntity
.mbEnableThreads
)
1092 void FastSaxParserImpl::callbackStartElement(const xmlChar
*localName
, const xmlChar
* prefix
, const xmlChar
* URI
,
1093 int numNamespaces
, const xmlChar
** namespaces
, int numAttributes
, const xmlChar
**attributes
)
1095 if (!pendingCharacters
.empty())
1096 sendPendingCharacters();
1097 Entity
& rEntity
= getEntity();
1098 if( rEntity
.maNamespaceCount
.empty() )
1100 rEntity
.maNamespaceCount
.push(0);
1101 DefineNamespace( "xml", "http://www.w3.org/XML/1998/namespace");
1105 rEntity
.maNamespaceCount
.push( rEntity
.maNamespaceCount
.top() );
1108 // create attribute map and process namespace instructions
1109 Event
& rEvent
= rEntity
.getEvent( CallbackType::START_ELEMENT
);
1110 bool bIsAttributesEmpty
= false;
1111 if ( rEntity
.mbEnableThreads
)
1112 bIsAttributesEmpty
= rEntity
.getEventList().mbIsAttributesEmpty
;
1114 if (rEvent
.mxAttributes
.is())
1116 if( !bIsAttributesEmpty
)
1117 rEvent
.mxAttributes
->clear();
1120 rEvent
.mxAttributes
.set(
1121 new FastAttributeList( rEntity
.mxTokenHandler
,
1122 rEntity
.mpTokenHandler
) );
1124 if( rEntity
.mxNamespaceHandler
.is() )
1126 if (rEvent
.mxDeclAttributes
.is())
1128 if( !bIsAttributesEmpty
)
1129 rEvent
.mxDeclAttributes
->clear();
1132 rEvent
.mxDeclAttributes
.set(
1133 new FastAttributeList( rEntity
.mxTokenHandler
,
1134 rEntity
.mpTokenHandler
) );
1137 OUString sNamespace
;
1138 sal_Int32 nNamespaceToken
= FastToken::DONTKNOW
;
1139 if (!rEntity
.maNamespaceStack
.empty())
1141 sNamespace
= rEntity
.maNamespaceStack
.top().msName
;
1142 nNamespaceToken
= rEntity
.maNamespaceStack
.top().mnToken
;
1147 /* #158414# Each element may define new namespaces, also for attributes.
1148 First, process all namespaces, second, process the attributes after namespaces
1149 have been initialized. */
1151 // #158414# first: get namespaces
1152 for (int i
= 0; i
< numNamespaces
* 2; i
+= 2)
1154 // namespaces[] is (prefix/URI)
1155 if( namespaces
[ i
] != nullptr )
1157 DefineNamespace( OString( XML_CAST( namespaces
[ i
] )),
1158 OUString( XML_CAST( namespaces
[ i
+ 1 ] ), strlen( XML_CAST( namespaces
[ i
+ 1 ] )), RTL_TEXTENCODING_UTF8
));
1159 if( rEntity
.mxNamespaceHandler
.is() )
1160 rEvent
.mxDeclAttributes
->addUnknown( OString( XML_CAST( namespaces
[ i
] ) ), OString( XML_CAST( namespaces
[ i
+ 1 ] ) ) );
1164 // default namespace
1165 sNamespace
= OUString( XML_CAST( namespaces
[ i
+ 1 ] ), strlen( XML_CAST( namespaces
[ i
+ 1 ] )), RTL_TEXTENCODING_UTF8
);
1166 nNamespaceToken
= GetNamespaceToken( sNamespace
);
1167 if( rEntity
.mxNamespaceHandler
.is() )
1168 rEvent
.mxDeclAttributes
->addUnknown( "", OString( XML_CAST( namespaces
[ i
+ 1 ] ) ) );
1172 if ( rEntity
.mxTokenHandler
.is() )
1174 // #158414# second: fill attribute list with other attributes
1175 rEvent
.mxAttributes
->reserve( numAttributes
);
1176 for (int i
= 0; i
< numAttributes
* 5; i
+= 5)
1178 // attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd )
1179 if( attributes
[ i
+ 1 ] != nullptr )
1181 sal_Int32 nAttributeToken
= GetTokenWithPrefix( attributes
[ i
+ 1 ], strlen( XML_CAST( attributes
[ i
+ 1 ] )), attributes
[ i
], strlen( XML_CAST( attributes
[ i
] )));
1182 if( nAttributeToken
!= FastToken::DONTKNOW
)
1183 rEvent
.mxAttributes
->add( nAttributeToken
, XML_CAST( attributes
[ i
+ 3 ] ), attributes
[ i
+ 4 ] - attributes
[ i
+ 3 ] );
1185 addUnknownElementWithPrefix(attributes
, i
, rEvent
.mxAttributes
);
1189 sal_Int32 nAttributeToken
= GetToken( attributes
[ i
], strlen( XML_CAST( attributes
[ i
] )));
1190 if( nAttributeToken
!= FastToken::DONTKNOW
)
1191 rEvent
.mxAttributes
->add( nAttributeToken
, XML_CAST( attributes
[ i
+ 3 ] ), attributes
[ i
+ 4 ] - attributes
[ i
+ 3 ] );
1193 rEvent
.mxAttributes
->addUnknown( XML_CAST( attributes
[ i
] ),
1194 OString( XML_CAST( attributes
[ i
+ 3 ] ), attributes
[ i
+ 4 ] - attributes
[ i
+ 3 ] ));
1198 if( prefix
!= nullptr )
1199 rEvent
.mnElementToken
= GetTokenWithPrefix( prefix
, strlen( XML_CAST( prefix
)), localName
, strlen( XML_CAST( localName
)));
1200 else if( !sNamespace
.isEmpty() )
1201 rEvent
.mnElementToken
= GetTokenWithContextNamespace( nNamespaceToken
, localName
, strlen( XML_CAST( localName
)));
1203 rEvent
.mnElementToken
= GetToken( localName
, strlen( XML_CAST( localName
)));
1207 for (int i
= 0; i
< numAttributes
* 5; i
+= 5)
1209 if( attributes
[ i
+ 1 ] != nullptr )
1210 addUnknownElementWithPrefix(attributes
, i
, rEvent
.mxAttributes
);
1212 rEvent
.mxAttributes
->addUnknown( XML_CAST( attributes
[ i
] ),
1213 OString( XML_CAST( attributes
[ i
+ 3 ] ), attributes
[ i
+ 4 ] - attributes
[ i
+ 3 ] ));
1216 rEvent
.mnElementToken
= FastToken::DONTKNOW
;
1219 if( rEvent
.mnElementToken
== FastToken::DONTKNOW
)
1221 OUString aElementPrefix
;
1222 if( prefix
!= nullptr )
1224 if ( !m_bIgnoreMissingNSDecl
|| URI
!= nullptr )
1225 sNamespace
= OUString( XML_CAST( URI
), strlen( XML_CAST( URI
)), RTL_TEXTENCODING_UTF8
);
1228 nNamespaceToken
= GetNamespaceToken( sNamespace
);
1229 aElementPrefix
= OUString( XML_CAST( prefix
), strlen( XML_CAST( prefix
)), RTL_TEXTENCODING_UTF8
);
1231 const OUString
& rElementLocalName
= OUString( XML_CAST( localName
), strlen( XML_CAST( localName
)), RTL_TEXTENCODING_UTF8
);
1232 rEvent
.msNamespace
= sNamespace
;
1233 rEvent
.msElementName
= (aElementPrefix
.isEmpty())? rElementLocalName
: aElementPrefix
+ ":" + rElementLocalName
;
1235 else // token is always preferred.
1236 rEvent
.msElementName
.clear();
1238 rEntity
.maNamespaceStack
.push( NameWithToken(sNamespace
, nNamespaceToken
) );
1239 if (rEntity
.mbEnableThreads
)
1243 SAL_INFO("sax.fastparser", " startElement line " << mxDocumentLocator
->getLineNumber() << " column " << mxDocumentLocator
->getColumnNumber() << " " << ( prefix
? XML_CAST(prefix
) : "(null)" ) << ":" << localName
);
1244 rEntity
.startElement( &rEvent
);
1249 rEntity
.saveException( ::cppu::getCaughtException() );
1253 void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar
**attributes
, int i
, rtl::Reference
< FastAttributeList
> const & xAttributes
)
1255 OUString aNamespaceURI
;
1256 if ( !m_bIgnoreMissingNSDecl
|| attributes
[i
+ 2] != nullptr )
1257 aNamespaceURI
= OUString( XML_CAST( attributes
[ i
+ 2 ] ), strlen( XML_CAST( attributes
[ i
+ 2 ] )), RTL_TEXTENCODING_UTF8
);
1258 const OString
& rPrefix
= OString( XML_CAST( attributes
[ i
+ 1 ] ));
1259 const OString
& rLocalName
= OString( XML_CAST( attributes
[ i
] ));
1260 OString aQualifiedName
= (rPrefix
.isEmpty())? rLocalName
: rPrefix
+ ":" + rLocalName
;
1261 xAttributes
->addUnknown( aNamespaceURI
, aQualifiedName
,
1262 OString( XML_CAST( attributes
[ i
+ 3 ] ), attributes
[ i
+ 4 ] - attributes
[ i
+ 3 ] ));
1265 void FastSaxParserImpl::callbackEndElement()
1267 if (!pendingCharacters
.empty())
1268 sendPendingCharacters();
1269 Entity
& rEntity
= getEntity();
1270 SAL_WARN_IF(rEntity
.maNamespaceCount
.empty(), "sax", "Empty NamespaceCount");
1271 if( !rEntity
.maNamespaceCount
.empty() )
1272 rEntity
.maNamespaceCount
.pop();
1274 SAL_WARN_IF(rEntity
.maNamespaceStack
.empty(), "sax", "Empty NamespaceStack");
1275 if( !rEntity
.maNamespaceStack
.empty() )
1276 rEntity
.maNamespaceStack
.pop();
1278 rEntity
.getEvent( CallbackType::END_ELEMENT
);
1279 if (rEntity
.mbEnableThreads
)
1282 rEntity
.endElement();
1285 void FastSaxParserImpl::callbackCharacters( const xmlChar
* s
, int nLen
)
1287 // SAX interface allows that the characters callback splits content of one XML node
1288 // (e.g. because there's an entity that needs decoding), however for consumers it's
1289 // simpler FastSaxParser's character callback provides the whole string at once,
1290 // so merge data from possible multiple calls and send them at once (before the element
1291 // ends or another one starts).
1293 // We use a std::vector<char> to avoid calling into the OUString constructor more than once when
1294 // we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly
1295 // often in writer documents.
1296 int nOriginalLen
= pendingCharacters
.size();
1297 pendingCharacters
.resize(nOriginalLen
+ nLen
);
1298 memcpy(pendingCharacters
.data() + nOriginalLen
, s
, nLen
);
1301 void FastSaxParserImpl::sendPendingCharacters()
1303 Entity
& rEntity
= getEntity();
1304 OUString
sChars( pendingCharacters
.data(), pendingCharacters
.size(), RTL_TEXTENCODING_UTF8
);
1305 if (rEntity
.mbEnableThreads
)
1307 Event
& rEvent
= rEntity
.getEvent( CallbackType::CHARACTERS
);
1308 rEvent
.msChars
= sChars
;
1312 rEntity
.characters( sChars
);
1313 pendingCharacters
.resize(0);
1316 void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar
*target
, const xmlChar
*data
)
1318 if (!pendingCharacters
.empty())
1319 sendPendingCharacters();
1320 Entity
& rEntity
= getEntity();
1321 Event
& rEvent
= rEntity
.getEvent( CallbackType::PROCESSING_INSTRUCTION
);
1323 // This event is very rare, so no need to waste extra space for this
1324 // Using namespace and element strings to be target and data in that order.
1325 rEvent
.msNamespace
= OUString( XML_CAST( target
), strlen( XML_CAST( target
) ), RTL_TEXTENCODING_UTF8
);
1326 if ( data
!= nullptr )
1327 rEvent
.msElementName
= OUString( XML_CAST( data
), strlen( XML_CAST( data
) ), RTL_TEXTENCODING_UTF8
);
1329 rEvent
.msElementName
.clear();
1331 if (rEntity
.mbEnableThreads
)
1334 rEntity
.processingInstruction( rEvent
.msNamespace
, rEvent
.msElementName
);
1337 FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl
) {}
1339 FastSaxParser::~FastSaxParser()
1344 FastSaxParser::initialize(css::uno::Sequence
< css::uno::Any
> const& rArguments
)
1346 if (rArguments
.hasElements())
1349 if ( rArguments
[0] >>= str
)
1351 if ( str
== "IgnoreMissingNSDecl" )
1352 mpImpl
->m_bIgnoreMissingNSDecl
= true;
1353 else if ( str
== "DoSmeplease" )
1354 ; //just ignore as this is already immune to billion laughs
1355 else if ( str
== "DisableThreadedParser" )
1356 mpImpl
->m_bDisableThreadedParser
= true;
1358 throw IllegalArgumentException();
1361 throw IllegalArgumentException();
1365 void FastSaxParser::parseStream( const xml::sax::InputSource
& aInputSource
)
1367 mpImpl
->parseStream(aInputSource
);
1370 void FastSaxParser::setFastDocumentHandler( const uno::Reference
<xml::sax::XFastDocumentHandler
>& Handler
)
1372 mpImpl
->setFastDocumentHandler(Handler
);
1375 void FastSaxParser::setTokenHandler( const uno::Reference
<xml::sax::XFastTokenHandler
>& Handler
)
1377 mpImpl
->setTokenHandler(Handler
);
1380 void FastSaxParser::registerNamespace( const OUString
& NamespaceURL
, sal_Int32 NamespaceToken
)
1382 mpImpl
->registerNamespace(NamespaceURL
, NamespaceToken
);
1385 OUString
FastSaxParser::getNamespaceURL( const OUString
& rPrefix
)
1387 return mpImpl
->getNamespaceURL(rPrefix
);
1390 void FastSaxParser::setErrorHandler( const uno::Reference
< xml::sax::XErrorHandler
>& Handler
)
1392 mpImpl
->setErrorHandler(Handler
);
1395 void FastSaxParser::setEntityResolver( const uno::Reference
< xml::sax::XEntityResolver
>& )
1400 void FastSaxParser::setLocale( const lang::Locale
& )
1405 void FastSaxParser::setNamespaceHandler( const uno::Reference
< css::xml::sax::XFastNamespaceHandler
>& Handler
)
1407 mpImpl
->setNamespaceHandler(Handler
);
1410 OUString
FastSaxParser::getImplementationName()
1412 return "com.sun.star.comp.extensions.xml.sax.FastParser";
1415 sal_Bool
FastSaxParser::supportsService( const OUString
& ServiceName
)
1417 return cppu::supportsService(this, ServiceName
);
1420 uno::Sequence
<OUString
> FastSaxParser::getSupportedServiceNames()
1422 Sequence
<OUString
> seq
{ "com.sun.star.xml.sax.FastParser" };
1426 } // namespace sax_fastparser
1428 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
*
1429 com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation(
1430 css::uno::XComponentContext
*,
1431 css::uno::Sequence
<css::uno::Any
> const &)
1433 return cppu::acquire(new FastSaxParser
);
1436 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */