1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sax/fastparser.hxx>
21 #include <sax/fastattribs.hxx>
22 #include <xml2utf.hxx>
24 #include <com/sun/star/io/XSeekable.hpp>
25 #include <com/sun/star/lang/DisposedException.hpp>
26 #include <com/sun/star/lang/IllegalArgumentException.hpp>
27 #include <com/sun/star/uno/XComponentContext.hpp>
28 #include <com/sun/star/xml/sax/FastToken.hpp>
29 #include <com/sun/star/xml/sax/SAXParseException.hpp>
30 #include <com/sun/star/xml/sax/XFastContextHandler.hpp>
31 #include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
32 #include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
33 #include <cppuhelper/implbase.hxx>
34 #include <cppuhelper/supportsservice.hxx>
35 #include <cppuhelper/exc_hlp.hxx>
36 #include <osl/conditn.hxx>
37 #include <rtl/ref.hxx>
38 #include <rtl/ustrbuf.hxx>
39 #include <sal/log.hxx>
40 #include <salhelper/thread.hxx>
41 #include <tools/diagnose_ex.h>
46 #include <unordered_map>
50 #include <libxml/parser.h>
52 // Inverse of libxml's BAD_CAST.
53 #define XML_CAST( str ) reinterpret_cast< const sal_Char* >( str )
56 using namespace ::osl
;
57 using namespace ::cppu
;
58 using namespace ::com::sun::star::uno
;
59 using namespace ::com::sun::star::lang
;
60 using namespace ::com::sun::star::xml::sax
;
61 using namespace ::com::sun::star::io
;
62 using namespace com::sun::star
;
63 using namespace sax_fastparser
;
68 class FastLocatorImpl
;
69 struct NamespaceDefine
;
72 typedef std::unordered_map
< OUString
, sal_Int32
> NamespaceMap
;
76 std::vector
<Event
> maEvents
;
77 bool mbIsAttributesEmpty
;
80 enum class CallbackType
{ START_ELEMENT
, END_ELEMENT
, CHARACTERS
, PROCESSING_INSTRUCTION
, DONE
, EXCEPTION
};
85 sal_Int32 mnElementToken
;
87 OUString msElementName
;
88 rtl::Reference
< FastAttributeList
> mxAttributes
;
89 rtl::Reference
< FastAttributeList
> mxDeclAttributes
;
95 OUString
const msName
;
96 sal_Int32
const mnToken
;
98 NameWithToken(const OUString
& sName
, sal_Int32 nToken
) :
99 msName(sName
), mnToken(nToken
) {}
104 Reference
< XFastContextHandler
> mxContext
;
105 sal_Int32
const mnElementToken
;
106 OUString maNamespace
;
107 OUString maElementName
;
109 SaxContext( sal_Int32 nElementToken
, const OUString
& aNamespace
, const OUString
& aElementName
):
110 mnElementToken(nElementToken
)
112 if (nElementToken
== FastToken::DONTKNOW
)
114 maNamespace
= aNamespace
;
115 maElementName
= aElementName
;
123 css::uno::Reference
< css::xml::sax::XFastDocumentHandler
> mxDocumentHandler
;
124 css::uno::Reference
< css::xml::sax::XFastTokenHandler
> mxTokenHandler
;
125 FastTokenHandlerBase
* mpTokenHandler
;
126 css::uno::Reference
< css::xml::sax::XErrorHandler
> mxErrorHandler
;
127 css::uno::Reference
< css::xml::sax::XFastNamespaceHandler
>mxNamespaceHandler
;
132 struct NamespaceDefine
136 OUString maNamespaceURL
;
138 NamespaceDefine( const OString
& rPrefix
, sal_Int32 nToken
, const OUString
& rNamespaceURL
) : maPrefix( rPrefix
), mnToken( nToken
), maNamespaceURL( rNamespaceURL
) {}
139 NamespaceDefine() : mnToken(-1) {}
142 // Entity binds all information needed for a single file | single call of parseStream
143 struct Entity
: public ParserData
145 // Amount of work producer sends to consumer in one iteration:
146 static const size_t mnEventListSize
= 1000;
148 // unique for each Entity instance:
150 // Number of valid events in mxProducedEvents:
151 size_t mnProducedEventsSize
;
152 std::unique_ptr
<EventList
> mxProducedEvents
;
153 std::queue
<std::unique_ptr
<EventList
>> maPendingEvents
;
154 std::queue
<std::unique_ptr
<EventList
>> maUsedEvents
;
155 osl::Mutex maEventProtector
;
157 static const size_t mnEventLowWater
= 4;
158 static const size_t mnEventHighWater
= 8;
159 osl::Condition maConsumeResume
;
160 osl::Condition maProduceResume
;
161 // Event we use to store data if threading is disabled:
164 // copied in copy constructor:
166 // Allow to disable threading for small documents:
167 bool mbEnableThreads
;
168 css::xml::sax::InputSource maStructSource
;
169 xmlParserCtxtPtr mpParser
;
170 ::sax_expatwrap::XMLFile2UTFConverter maConverter
;
172 // Exceptions cannot be thrown through the C-XmlParser (possible
173 // resource leaks), therefore any exception thrown by a UNO callback
174 // must be saved somewhere until the C-XmlParser is stopped.
175 css::uno::Any maSavedException
;
176 osl::Mutex maSavedExceptionMutex
;
177 void saveException( const Any
& e
);
178 void throwException( const ::rtl::Reference
< FastLocatorImpl
> &xDocumentLocator
,
179 bool mbDuringParse
);
181 std::stack
< NameWithToken
, std::vector
<NameWithToken
> > maNamespaceStack
;
182 /* Context for main thread consuming events.
183 * startElement() stores the data, which characters() and endElement() uses
185 std::stack
< SaxContext
, std::vector
<SaxContext
> > maContextStack
;
186 // Determines which elements of maNamespaceDefines are valid in current context
187 std::stack
< sal_uInt32
, std::vector
<sal_uInt32
> > maNamespaceCount
;
188 std::vector
< NamespaceDefine
> maNamespaceDefines
;
190 explicit Entity( const ParserData
& rData
);
191 Entity( const Entity
& rEntity
) = delete;
192 Entity
& operator=( const Entity
& rEntity
) = delete;
193 void startElement( Event
const *pEvent
);
194 void characters( const OUString
& sChars
);
196 void processingInstruction( const OUString
& rTarget
, const OUString
& rData
);
197 EventList
& getEventList();
198 Event
& getEvent( CallbackType aType
);
203 namespace sax_fastparser
{
205 class FastSaxParserImpl
208 explicit FastSaxParserImpl();
209 ~FastSaxParserImpl();
212 /// @throws css::xml::sax::SAXException
213 /// @throws css::io::IOException
214 /// @throws css::uno::RuntimeException
215 void parseStream( const css::xml::sax::InputSource
& aInputSource
);
216 /// @throws css::uno::RuntimeException
217 void setFastDocumentHandler( const css::uno::Reference
< css::xml::sax::XFastDocumentHandler
>& Handler
);
218 /// @throws css::uno::RuntimeException
219 void setTokenHandler( const css::uno::Reference
< css::xml::sax::XFastTokenHandler
>& Handler
);
220 /// @throws css::lang::IllegalArgumentException
221 /// @throws css::uno::RuntimeException
222 void registerNamespace( const OUString
& NamespaceURL
, sal_Int32 NamespaceToken
);
223 /// @throws css::lang::IllegalArgumentException
224 /// @throws css::uno::RuntimeException
225 OUString
const & getNamespaceURL( const OUString
& rPrefix
);
226 /// @throws css::uno::RuntimeException
227 void setErrorHandler( const css::uno::Reference
< css::xml::sax::XErrorHandler
>& Handler
);
228 /// @throws css::uno::RuntimeException
229 void setNamespaceHandler( const css::uno::Reference
< css::xml::sax::XFastNamespaceHandler
>& Handler
);
231 // called by the C callbacks of the expat parser
232 void callbackStartElement( const xmlChar
*localName
, const xmlChar
* prefix
, const xmlChar
* URI
,
233 int numNamespaces
, const xmlChar
** namespaces
, int numAttributes
, const xmlChar
**attributes
);
234 void callbackEndElement();
235 void callbackCharacters( const xmlChar
* s
, int nLen
);
236 void callbackProcessingInstruction( const xmlChar
*target
, const xmlChar
*data
);
238 void pushEntity(const ParserData
&, xml::sax::InputSource
const&);
240 Entity
& getEntity() { return *mpTop
; }
242 void produce( bool bForceFlush
= false );
243 bool m_bIgnoreMissingNSDecl
;
244 bool m_bDisableThreadedParser
;
247 bool consume(EventList
&);
248 void deleteUsedEvents();
249 void sendPendingCharacters();
250 void addUnknownElementWithPrefix(const xmlChar
**attributes
, int i
, rtl::Reference
< FastAttributeList
> const & xAttributes
);
252 sal_Int32
GetToken( const xmlChar
* pName
, sal_Int32 nameLen
);
253 /// @throws css::xml::sax::SAXException
254 sal_Int32
GetTokenWithPrefix( const xmlChar
* pPrefix
, int prefixLen
, const xmlChar
* pName
, int nameLen
);
255 /// @throws css::xml::sax::SAXException
256 OUString
const & GetNamespaceURL( const OString
& rPrefix
);
257 sal_Int32
GetNamespaceToken( const OUString
& rNamespaceURL
);
258 sal_Int32
GetTokenWithContextNamespace( sal_Int32 nNamespaceToken
, const xmlChar
* pName
, int nNameLen
);
259 void DefineNamespace( const OString
& rPrefix
, const OUString
& namespaceURL
);
262 osl::Mutex maMutex
; ///< Protecting whole parseStream() execution
263 ::rtl::Reference
< FastLocatorImpl
> mxDocumentLocator
;
264 NamespaceMap maNamespaceMap
;
266 ParserData maData
; /// Cached parser configuration for next call of parseStream().
268 Entity
*mpTop
; /// std::stack::top() is amazingly slow => cache this.
269 std::stack
< Entity
> maEntities
; /// Entity stack for each call of parseStream().
270 std::vector
<char> pendingCharacters
; /// Data from characters() callback that needs to be sent.
273 } // namespace sax_fastparser
277 class ParserThread
: public salhelper::Thread
279 FastSaxParserImpl
*mpParser
;
281 explicit ParserThread(FastSaxParserImpl
*pParser
): Thread("Parser"), mpParser(pParser
) {}
283 virtual void execute() override
291 Entity
&rEntity
= mpParser
->getEntity();
292 rEntity
.getEvent( CallbackType::EXCEPTION
);
293 mpParser
->produce( true );
300 static void call_callbackStartElement(void *userData
, const xmlChar
*localName
, const xmlChar
* prefix
, const xmlChar
* URI
,
301 int numNamespaces
, const xmlChar
** namespaces
, int numAttributes
, int /*defaultedAttributes*/, const xmlChar
**attributes
)
303 FastSaxParserImpl
* pFastParser
= static_cast<FastSaxParserImpl
*>( userData
);
304 pFastParser
->callbackStartElement( localName
, prefix
, URI
, numNamespaces
, namespaces
, numAttributes
, attributes
);
307 static void call_callbackEndElement(void *userData
, const xmlChar
* /*localName*/, const xmlChar
* /*prefix*/, const xmlChar
* /*URI*/)
309 FastSaxParserImpl
* pFastParser
= static_cast<FastSaxParserImpl
*>( userData
);
310 pFastParser
->callbackEndElement();
313 static void call_callbackCharacters( void *userData
, const xmlChar
*s
, int nLen
)
315 FastSaxParserImpl
* pFastParser
= static_cast<FastSaxParserImpl
*>( userData
);
316 pFastParser
->callbackCharacters( s
, nLen
);
319 static void call_callbackProcessingInstruction( void *userData
, const xmlChar
*target
, const xmlChar
*data
)
321 FastSaxParserImpl
* pFastParser
= static_cast<FastSaxParserImpl
*>( userData
);
322 pFastParser
->callbackProcessingInstruction( target
, data
);
327 class FastLocatorImpl
: public WeakImplHelper
< XLocator
>
330 explicit FastLocatorImpl(FastSaxParserImpl
*p
) : mpParser(p
) {}
332 void dispose() { mpParser
= nullptr; }
333 /// @throws RuntimeException
334 void checkDispose() const { if( !mpParser
) throw DisposedException(); }
337 virtual sal_Int32 SAL_CALL
getColumnNumber() override
;
338 virtual sal_Int32 SAL_CALL
getLineNumber() override
;
339 virtual OUString SAL_CALL
getPublicId() override
;
340 virtual OUString SAL_CALL
getSystemId() override
;
343 FastSaxParserImpl
*mpParser
;
346 sal_Int32 SAL_CALL
FastLocatorImpl::getColumnNumber()
349 return xmlSAX2GetColumnNumber( mpParser
->getEntity().mpParser
);
352 sal_Int32 SAL_CALL
FastLocatorImpl::getLineNumber()
355 return xmlSAX2GetLineNumber( mpParser
->getEntity().mpParser
);
358 OUString SAL_CALL
FastLocatorImpl::getPublicId()
361 return mpParser
->getEntity().maStructSource
.sPublicId
;
364 OUString SAL_CALL
FastLocatorImpl::getSystemId()
367 return mpParser
->getEntity().maStructSource
.sSystemId
;
370 ParserData::ParserData()
371 : mpTokenHandler( nullptr )
374 Entity::Entity(const ParserData
& rData
)
376 , mnProducedEventsSize(0)
378 , mbEnableThreads(false)
383 void Entity::startElement( Event
const *pEvent
)
385 const sal_Int32
& nElementToken
= pEvent
->mnElementToken
;
386 const OUString
& aNamespace
= pEvent
->msNamespace
;
387 const OUString
& aElementName
= pEvent
->msElementName
;
389 // Use un-wrapped pointers to avoid significant acquire/release overhead
390 XFastContextHandler
*pParentContext
= nullptr;
391 if( !maContextStack
.empty() )
393 pParentContext
= maContextStack
.top().mxContext
.get();
394 if( !pParentContext
)
396 maContextStack
.push( SaxContext(nElementToken
, aNamespace
, aElementName
) );
401 maContextStack
.push( SaxContext( nElementToken
, aNamespace
, aElementName
) );
405 Reference
< XFastAttributeList
> xAttr( pEvent
->mxAttributes
.get() );
406 Reference
< XFastContextHandler
> xContext
;
408 if ( mxNamespaceHandler
.is() )
410 const Sequence
< xml::Attribute
> NSDeclAttribs
= pEvent
->mxDeclAttributes
->getUnknownAttributes();
411 for (const auto& rNSDeclAttrib
: NSDeclAttribs
)
413 mxNamespaceHandler
->registerNamespace( rNSDeclAttrib
.Name
, rNSDeclAttrib
.Value
);
417 if( nElementToken
== FastToken::DONTKNOW
)
420 xContext
= pParentContext
->createUnknownChildContext( aNamespace
, aElementName
, xAttr
);
421 else if( mxDocumentHandler
.is() )
422 xContext
= mxDocumentHandler
->createUnknownChildContext( aNamespace
, aElementName
, xAttr
);
426 xContext
->startUnknownElement( aNamespace
, aElementName
, xAttr
);
432 xContext
= pParentContext
->createFastChildContext( nElementToken
, xAttr
);
433 else if( mxDocumentHandler
.is() )
434 xContext
= mxDocumentHandler
->createFastChildContext( nElementToken
, xAttr
);
437 xContext
->startFastElement( nElementToken
, xAttr
);
439 // swap the reference we own in to avoid referencing thrash.
440 maContextStack
.top().mxContext
= std::move( xContext
);
444 saveException( ::cppu::getCaughtException() );
448 void Entity::characters( const OUString
& sChars
)
450 if (maContextStack
.empty())
452 // Malformed XML stream !?
456 XFastContextHandler
* pContext( maContextStack
.top().mxContext
.get() );
459 pContext
->characters( sChars
);
463 saveException( ::cppu::getCaughtException() );
467 void Entity::endElement()
469 if (maContextStack
.empty())
471 // Malformed XML stream !?
475 const SaxContext
& aContext
= maContextStack
.top();
476 XFastContextHandler
* pContext( aContext
.mxContext
.get() );
480 sal_Int32 nElementToken
= aContext
.mnElementToken
;
481 if( nElementToken
!= FastToken::DONTKNOW
)
482 pContext
->endFastElement( nElementToken
);
484 pContext
->endUnknownElement( aContext
.maNamespace
, aContext
.maElementName
);
488 saveException( ::cppu::getCaughtException() );
490 maContextStack
.pop();
493 void Entity::processingInstruction( const OUString
& rTarget
, const OUString
& rData
)
495 if( mxDocumentHandler
.is() ) try
497 mxDocumentHandler
->processingInstruction( rTarget
, rData
);
501 saveException( ::cppu::getCaughtException() );
505 EventList
& Entity::getEventList()
507 if (!mxProducedEvents
)
509 osl::ClearableMutexGuard
aGuard(maEventProtector
);
510 if (!maUsedEvents
.empty())
512 mxProducedEvents
= std::move(maUsedEvents
.front());
514 aGuard
.clear(); // unlock
515 mnProducedEventsSize
= 0;
517 if (!mxProducedEvents
)
519 mxProducedEvents
.reset(new EventList
);
520 mxProducedEvents
->maEvents
.resize(mnEventListSize
);
521 mxProducedEvents
->mbIsAttributesEmpty
= false;
522 mnProducedEventsSize
= 0;
525 return *mxProducedEvents
;
528 Event
& Entity::getEvent( CallbackType aType
)
530 if (!mbEnableThreads
)
531 return maSharedEvent
;
533 EventList
& rEventList
= getEventList();
534 if (mnProducedEventsSize
== rEventList
.maEvents
.size())
536 SAL_WARN_IF(!maSavedException
.hasValue(), "sax",
537 "Event vector should only exceed " << mnEventListSize
<<
538 " temporarily while an exception is pending");
539 rEventList
.maEvents
.resize(mnProducedEventsSize
+ 1);
541 Event
& rEvent
= rEventList
.maEvents
[mnProducedEventsSize
++];
542 rEvent
.maType
= aType
;
546 OUString
lclGetErrorMessage( xmlParserCtxtPtr ctxt
, const OUString
& sSystemId
, sal_Int32 nLine
)
548 const sal_Char
* pMessage
;
549 xmlErrorPtr error
= xmlCtxtGetLastError( ctxt
);
550 if( error
&& error
->message
)
551 pMessage
= error
->message
;
553 pMessage
= "unknown error";
554 OUStringBuffer
aBuffer( 128 );
555 aBuffer
.append( "[" );
556 aBuffer
.append( sSystemId
);
557 aBuffer
.append( " line " );
558 aBuffer
.append( nLine
);
559 aBuffer
.append( "]: " );
560 aBuffer
.appendAscii( pMessage
);
561 return aBuffer
.makeStringAndClear();
564 // throw an exception, but avoid callback if
565 // during a threaded produce
566 void Entity::throwException( const ::rtl::Reference
< FastLocatorImpl
> &xDocumentLocator
,
569 // Error during parsing !
572 osl::MutexGuard
g(maSavedExceptionMutex
);
573 if (maSavedException
.hasValue())
575 savedException
.setValue(&maSavedException
, cppu::UnoType
<decltype(maSavedException
)>::get());
578 SAXParseException
aExcept(
579 lclGetErrorMessage( mpParser
,
580 xDocumentLocator
->getSystemId(),
581 xDocumentLocator
->getLineNumber() ),
582 Reference
< XInterface
>(),
584 xDocumentLocator
->getPublicId(),
585 xDocumentLocator
->getSystemId(),
586 xDocumentLocator
->getLineNumber(),
587 xDocumentLocator
->getColumnNumber()
590 // error handler is set, it may throw the exception
591 if( !mbDuringParse
|| !mbEnableThreads
)
593 if (mxErrorHandler
.is() )
594 mxErrorHandler
->fatalError( Any( aExcept
) );
597 // error handler has not thrown, but parsing must stop => throw ourselves
601 // In the single threaded case we emit events via our C
602 // callbacks, so any exception caught must be queued up until
603 // we can safely re-throw it from our C++ parent of parse()
605 // If multi-threaded, we need to push an EXCEPTION event, at
606 // which point we transfer ownership of maSavedException to
607 // the consuming thread.
608 void Entity::saveException( const Any
& e
)
610 // fdo#81214 - allow the parser to run on after an exception,
611 // unexpectedly some 'startElements' produce a UNO_QUERY_THROW
612 // for XComponent; and yet expect to continue parsing.
613 SAL_WARN("sax", "Unexpected exception from XML parser " << exceptionToString(e
));
614 osl::MutexGuard
g(maSavedExceptionMutex
);
615 if (maSavedException
.hasValue())
617 SAL_INFO("sax.fastparser", "discarding exception, already have one");
621 maSavedException
= e
;
627 namespace sax_fastparser
{
629 FastSaxParserImpl::FastSaxParserImpl() :
630 m_bIgnoreMissingNSDecl(false),
631 m_bDisableThreadedParser(false),
634 mxDocumentLocator
.set( new FastLocatorImpl( this ) );
637 FastSaxParserImpl::~FastSaxParserImpl()
639 if( mxDocumentLocator
.is() )
640 mxDocumentLocator
->dispose();
643 void FastSaxParserImpl::DefineNamespace( const OString
& rPrefix
, const OUString
& namespaceURL
)
645 Entity
& rEntity
= getEntity();
646 assert(!rEntity
.maNamespaceCount
.empty()); // need a context!
648 sal_uInt32 nOffset
= rEntity
.maNamespaceCount
.top()++;
649 if( rEntity
.maNamespaceDefines
.size() <= nOffset
)
650 rEntity
.maNamespaceDefines
.resize( rEntity
.maNamespaceDefines
.size() + 64 );
652 rEntity
.maNamespaceDefines
[nOffset
] = NamespaceDefine( rPrefix
, GetNamespaceToken( namespaceURL
), namespaceURL
);
655 sal_Int32
FastSaxParserImpl::GetToken( const xmlChar
* pName
, sal_Int32 nameLen
/* = 0 */ )
657 return FastTokenHandlerBase::getTokenFromChars( getEntity().mxTokenHandler
,
658 getEntity().mpTokenHandler
,
659 XML_CAST( pName
), nameLen
); // uses utf-8
662 sal_Int32
FastSaxParserImpl::GetTokenWithPrefix( const xmlChar
* pPrefix
, int nPrefixLen
, const xmlChar
* pName
, int nNameLen
)
664 sal_Int32 nNamespaceToken
= FastToken::DONTKNOW
;
666 Entity
& rEntity
= getEntity();
667 if (rEntity
.maNamespaceCount
.empty())
668 return nNamespaceToken
;
670 sal_uInt32 nNamespace
= rEntity
.maNamespaceCount
.top();
671 while( nNamespace
-- )
673 const auto & rNamespaceDefine
= rEntity
.maNamespaceDefines
[nNamespace
];
674 const OString
& rPrefix( rNamespaceDefine
.maPrefix
);
675 if( (rPrefix
.getLength() == nPrefixLen
) &&
676 rtl_str_reverseCompare_WithLength(rPrefix
.pData
->buffer
, rPrefix
.pData
->length
, XML_CAST( pPrefix
), nPrefixLen
) == 0 )
678 nNamespaceToken
= rNamespaceDefine
.mnToken
;
682 if( !nNamespace
&& !m_bIgnoreMissingNSDecl
)
683 throw SAXException("No namespace defined for " + OUString(XML_CAST(pPrefix
),
684 nPrefixLen
, RTL_TEXTENCODING_UTF8
), Reference
< XInterface
>(), Any());
687 if( nNamespaceToken
!= FastToken::DONTKNOW
)
689 sal_Int32 nNameToken
= GetToken( pName
, nNameLen
);
690 if( nNameToken
!= FastToken::DONTKNOW
)
691 return nNamespaceToken
| nNameToken
;
694 return FastToken::DONTKNOW
;
697 sal_Int32
FastSaxParserImpl::GetNamespaceToken( const OUString
& rNamespaceURL
)
699 NamespaceMap::iterator
aIter( maNamespaceMap
.find( rNamespaceURL
) );
700 if( aIter
!= maNamespaceMap
.end() )
701 return (*aIter
).second
;
703 return FastToken::DONTKNOW
;
706 OUString
const & FastSaxParserImpl::GetNamespaceURL( const OString
& rPrefix
)
708 Entity
& rEntity
= getEntity();
709 if( !rEntity
.maNamespaceCount
.empty() )
711 sal_uInt32 nNamespace
= rEntity
.maNamespaceCount
.top();
712 while( nNamespace
-- )
713 if( rEntity
.maNamespaceDefines
[nNamespace
].maPrefix
== rPrefix
)
714 return rEntity
.maNamespaceDefines
[nNamespace
].maNamespaceURL
;
717 throw SAXException("No namespace defined for " + OUString::fromUtf8(rPrefix
),
718 Reference
< XInterface
>(), Any());
721 sal_Int32
FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken
, const xmlChar
* pName
, int nNameLen
)
723 if( nNamespaceToken
!= FastToken::DONTKNOW
)
725 sal_Int32 nNameToken
= GetToken( pName
, nNameLen
);
726 if( nNameToken
!= FastToken::DONTKNOW
)
727 return nNamespaceToken
| nNameToken
;
730 return FastToken::DONTKNOW
;
738 FastSaxParserImpl
& m_rParser
;
740 rtl::Reference
<ParserThread
> m_xParser
;
742 ParserCleanup(FastSaxParserImpl
& rParser
, Entity
& rEntity
)
749 if (m_rEntity
.mpParser
)
751 if (m_rEntity
.mpParser
->myDoc
)
752 xmlFreeDoc(m_rEntity
.mpParser
->myDoc
);
753 xmlFreeParserCtxt(m_rEntity
.mpParser
);
756 m_rParser
.popEntity();
758 void setThread(const rtl::Reference
<ParserThread
> &xParser
)
766 rtl::Reference
<ParserThread
> xToJoin
= m_xParser
;
775 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
776 * the file-specific initialization work. (During a parser run, external files may be opened)
779 void FastSaxParserImpl::parseStream(const InputSource
& rStructSource
)
783 // Only one text at one time
784 MutexGuard
guard( maMutex
);
786 pushEntity(maData
, rStructSource
);
787 Entity
& rEntity
= getEntity();
788 ParserCleanup
aEnsureFree(*this, rEntity
);
790 // start the document
791 if( rEntity
.mxDocumentHandler
.is() )
793 Reference
< XLocator
> xLoc( mxDocumentLocator
.get() );
794 rEntity
.mxDocumentHandler
->setDocumentLocator( xLoc
);
795 rEntity
.mxDocumentHandler
->startDocument();
798 if (!getenv("SAX_DISABLE_THREADS") && !m_bDisableThreadedParser
)
800 Reference
<css::io::XSeekable
> xSeekable(rEntity
.maStructSource
.aInputStream
, UNO_QUERY
);
801 // available() is not __really__ relevant here, but leave it in as a heuristic for non-seekable streams
802 rEntity
.mbEnableThreads
= (xSeekable
.is() && xSeekable
->getLength() > 10000)
803 || (rEntity
.maStructSource
.aInputStream
->available() > 10000);
806 if (rEntity
.mbEnableThreads
)
808 rtl::Reference
<ParserThread
> xParser
= new ParserThread(this);
810 aEnsureFree
.setThread(xParser
);
813 rEntity
.maConsumeResume
.wait();
814 rEntity
.maConsumeResume
.reset();
816 osl::ResettableMutexGuard
aGuard(rEntity
.maEventProtector
);
817 while (!rEntity
.maPendingEvents
.empty())
819 if (rEntity
.maPendingEvents
.size() <= Entity::mnEventLowWater
)
820 rEntity
.maProduceResume
.set(); // start producer again
822 std::unique_ptr
<EventList
> xEventList
= std::move(rEntity
.maPendingEvents
.front());
823 rEntity
.maPendingEvents
.pop();
824 aGuard
.clear(); // unlock
826 if (!consume(*xEventList
))
829 aGuard
.reset(); // lock
831 if ( rEntity
.maPendingEvents
.size() <= Entity::mnEventLowWater
)
834 for (auto& rEvent
: xEventList
->maEvents
)
836 if (rEvent
.mxAttributes
.is())
838 rEvent
.mxAttributes
->clear();
839 if( rEntity
.mxNamespaceHandler
.is() )
840 rEvent
.mxDeclAttributes
->clear();
842 xEventList
->mbIsAttributesEmpty
= true;
847 rEntity
.maUsedEvents
.push(std::move(xEventList
));
850 aEnsureFree
.joinThread();
853 // callbacks used inside XML_Parse may have caught an exception
854 // No need to lock maSavedExceptionMutex here because parser
856 if( rEntity
.maSavedException
.hasValue() )
857 rEntity
.throwException( mxDocumentLocator
, true );
865 if( rEntity
.mxDocumentHandler
.is() )
867 rEntity
.mxDocumentHandler
->endDocument();
871 void FastSaxParserImpl::setFastDocumentHandler( const Reference
< XFastDocumentHandler
>& Handler
)
873 maData
.mxDocumentHandler
= Handler
;
876 void FastSaxParserImpl::setTokenHandler( const Reference
< XFastTokenHandler
>& xHandler
)
878 maData
.mxTokenHandler
= xHandler
;
879 maData
.mpTokenHandler
= dynamic_cast< FastTokenHandlerBase
*>( xHandler
.get() );
882 void FastSaxParserImpl::registerNamespace( const OUString
& NamespaceURL
, sal_Int32 NamespaceToken
)
884 if( NamespaceToken
>= FastToken::NAMESPACE
)
886 if( GetNamespaceToken( NamespaceURL
) == FastToken::DONTKNOW
)
888 maNamespaceMap
[ NamespaceURL
] = NamespaceToken
;
892 throw IllegalArgumentException();
895 OUString
const & FastSaxParserImpl::getNamespaceURL( const OUString
& rPrefix
)
899 return GetNamespaceURL( OUStringToOString( rPrefix
, RTL_TEXTENCODING_UTF8
) );
901 catch (const Exception
&)
904 throw IllegalArgumentException();
907 void FastSaxParserImpl::setErrorHandler(const Reference
< XErrorHandler
> & Handler
)
909 maData
.mxErrorHandler
= Handler
;
912 void FastSaxParserImpl::setNamespaceHandler( const Reference
< XFastNamespaceHandler
>& Handler
)
914 maData
.mxNamespaceHandler
= Handler
;
917 void FastSaxParserImpl::deleteUsedEvents()
919 Entity
& rEntity
= getEntity();
920 osl::ResettableMutexGuard
aGuard(rEntity
.maEventProtector
);
922 while (!rEntity
.maUsedEvents
.empty())
924 std::unique_ptr
<EventList
> xEventList
= std::move(rEntity
.maUsedEvents
.front());
925 rEntity
.maUsedEvents
.pop();
927 aGuard
.clear(); // unlock
931 aGuard
.reset(); // lock
935 void FastSaxParserImpl::produce( bool bForceFlush
)
937 Entity
& rEntity
= getEntity();
939 rEntity
.mnProducedEventsSize
>= Entity::mnEventListSize
)
941 osl::ResettableMutexGuard
aGuard(rEntity
.maEventProtector
);
943 while (rEntity
.maPendingEvents
.size() >= Entity::mnEventHighWater
)
944 { // pause parsing for a bit
945 aGuard
.clear(); // unlock
946 rEntity
.maProduceResume
.wait();
947 rEntity
.maProduceResume
.reset();
948 aGuard
.reset(); // lock
951 rEntity
.maPendingEvents
.push(std::move(rEntity
.mxProducedEvents
));
952 assert(rEntity
.mxProducedEvents
.get() == nullptr);
954 aGuard
.clear(); // unlock
956 rEntity
.maConsumeResume
.set();
960 bool FastSaxParserImpl::consume(EventList
& rEventList
)
962 Entity
& rEntity
= getEntity();
963 rEventList
.mbIsAttributesEmpty
= false;
964 for (auto& rEvent
: rEventList
.maEvents
)
966 switch (rEvent
.maType
)
968 case CallbackType::START_ELEMENT
:
969 rEntity
.startElement( &rEvent
);
971 case CallbackType::END_ELEMENT
:
972 rEntity
.endElement();
974 case CallbackType::CHARACTERS
:
975 rEntity
.characters( rEvent
.msChars
);
977 case CallbackType::PROCESSING_INSTRUCTION
:
978 rEntity
.processingInstruction(
979 rEvent
.msNamespace
, rEvent
.msElementName
); // ( target, data )
981 case CallbackType::DONE
:
983 case CallbackType::EXCEPTION
:
984 rEntity
.throwException( mxDocumentLocator
, false );
985 [[fallthrough
]]; // avoid unreachable code warning with some compilers
994 void FastSaxParserImpl::pushEntity(const ParserData
& rEntityData
,
995 xml::sax::InputSource
const& rSource
)
997 if (!rSource
.aInputStream
.is())
998 throw SAXException("No input source", Reference
<XInterface
>(), Any());
1000 maEntities
.emplace(rEntityData
);
1001 mpTop
= &maEntities
.top();
1003 mpTop
->maStructSource
= rSource
;
1005 mpTop
->maConverter
.setInputStream(mpTop
->maStructSource
.aInputStream
);
1006 if (!mpTop
->maStructSource
.sEncoding
.isEmpty())
1008 mpTop
->maConverter
.setEncoding(OUStringToOString(mpTop
->maStructSource
.sEncoding
, RTL_TEXTENCODING_ASCII_US
));
1012 void FastSaxParserImpl::popEntity()
1015 mpTop
= !maEntities
.empty() ? &maEntities
.top() : nullptr;
1018 // starts parsing with actual parser !
1019 void FastSaxParserImpl::parse()
1021 const int BUFFER_SIZE
= 16 * 1024;
1022 Sequence
< sal_Int8
> seqOut( BUFFER_SIZE
);
1024 Entity
& rEntity
= getEntity();
1026 // set all necessary C-Callbacks
1027 static xmlSAXHandler callbacks
;
1028 callbacks
.startElementNs
= call_callbackStartElement
;
1029 callbacks
.endElementNs
= call_callbackEndElement
;
1030 callbacks
.characters
= call_callbackCharacters
;
1031 callbacks
.processingInstruction
= call_callbackProcessingInstruction
;
1032 callbacks
.initialized
= XML_SAX2_MAGIC
;
1036 nRead
= rEntity
.maConverter
.readAndConvert( seqOut
, BUFFER_SIZE
);
1039 if( rEntity
.mpParser
!= nullptr )
1041 if( xmlParseChunk( rEntity
.mpParser
, reinterpret_cast<const char*>(seqOut
.getConstArray()), 0, 1 ) != XML_ERR_OK
)
1042 rEntity
.throwException( mxDocumentLocator
, true );
1047 bool bContinue
= true;
1048 if( rEntity
.mpParser
== nullptr )
1050 // create parser with proper encoding (needs the first chunk of data)
1051 rEntity
.mpParser
= xmlCreatePushParserCtxt( &callbacks
, this,
1052 reinterpret_cast<const char*>(seqOut
.getConstArray()), nRead
, nullptr );
1053 if( !rEntity
.mpParser
)
1054 throw SAXException("Couldn't create parser", Reference
< XInterface
>(), Any() );
1056 // Tell libxml2 parser to decode entities in attribute values.
1057 // coverity[unsafe_xml_parse_config] - entity support is required
1058 xmlCtxtUseOptions(rEntity
.mpParser
, XML_PARSE_NOENT
);
1062 bContinue
= xmlParseChunk( rEntity
.mpParser
, reinterpret_cast<const char*>(seqOut
.getConstArray()), nRead
, 0 )
1066 // callbacks used inside XML_Parse may have caught an exception
1069 rEntity
.throwException( mxDocumentLocator
, true );
1071 osl::ClearableMutexGuard
g(rEntity
.maSavedExceptionMutex
);
1072 if (rEntity
.maSavedException
.hasValue())
1075 rEntity
.throwException( mxDocumentLocator
, true );
1077 } while( nRead
> 0 );
1078 rEntity
.getEvent( CallbackType::DONE
);
1079 if( rEntity
.mbEnableThreads
)
1084 void FastSaxParserImpl::callbackStartElement(const xmlChar
*localName
, const xmlChar
* prefix
, const xmlChar
* URI
,
1085 int numNamespaces
, const xmlChar
** namespaces
, int numAttributes
, const xmlChar
**attributes
)
1087 if (!pendingCharacters
.empty())
1088 sendPendingCharacters();
1089 Entity
& rEntity
= getEntity();
1090 if( rEntity
.maNamespaceCount
.empty() )
1092 rEntity
.maNamespaceCount
.push(0);
1093 DefineNamespace( "xml", "http://www.w3.org/XML/1998/namespace");
1097 rEntity
.maNamespaceCount
.push( rEntity
.maNamespaceCount
.top() );
1100 // create attribute map and process namespace instructions
1101 Event
& rEvent
= rEntity
.getEvent( CallbackType::START_ELEMENT
);
1102 bool bIsAttributesEmpty
= false;
1103 if ( rEntity
.mbEnableThreads
)
1104 bIsAttributesEmpty
= rEntity
.getEventList().mbIsAttributesEmpty
;
1106 if (rEvent
.mxAttributes
.is())
1108 if( !bIsAttributesEmpty
)
1109 rEvent
.mxAttributes
->clear();
1112 rEvent
.mxAttributes
.set(
1113 new FastAttributeList( rEntity
.mxTokenHandler
,
1114 rEntity
.mpTokenHandler
) );
1116 if( rEntity
.mxNamespaceHandler
.is() )
1118 if (rEvent
.mxDeclAttributes
.is())
1120 if( !bIsAttributesEmpty
)
1121 rEvent
.mxDeclAttributes
->clear();
1124 rEvent
.mxDeclAttributes
.set(
1125 new FastAttributeList( rEntity
.mxTokenHandler
,
1126 rEntity
.mpTokenHandler
) );
1129 OUString sNamespace
;
1130 sal_Int32 nNamespaceToken
= FastToken::DONTKNOW
;
1131 if (!rEntity
.maNamespaceStack
.empty())
1133 sNamespace
= rEntity
.maNamespaceStack
.top().msName
;
1134 nNamespaceToken
= rEntity
.maNamespaceStack
.top().mnToken
;
1139 /* #158414# Each element may define new namespaces, also for attributes.
1140 First, process all namespaces, second, process the attributes after namespaces
1141 have been initialized. */
1143 // #158414# first: get namespaces
1144 for (int i
= 0; i
< numNamespaces
* 2; i
+= 2)
1146 // namespaces[] is (prefix/URI)
1147 if( namespaces
[ i
] != nullptr )
1149 DefineNamespace( OString( XML_CAST( namespaces
[ i
] )),
1150 OUString( XML_CAST( namespaces
[ i
+ 1 ] ), strlen( XML_CAST( namespaces
[ i
+ 1 ] )), RTL_TEXTENCODING_UTF8
));
1151 if( rEntity
.mxNamespaceHandler
.is() )
1152 rEvent
.mxDeclAttributes
->addUnknown( OString( XML_CAST( namespaces
[ i
] ) ), OString( XML_CAST( namespaces
[ i
+ 1 ] ) ) );
1156 // default namespace
1157 sNamespace
= OUString( XML_CAST( namespaces
[ i
+ 1 ] ), strlen( XML_CAST( namespaces
[ i
+ 1 ] )), RTL_TEXTENCODING_UTF8
);
1158 nNamespaceToken
= GetNamespaceToken( sNamespace
);
1159 if( rEntity
.mxNamespaceHandler
.is() )
1160 rEvent
.mxDeclAttributes
->addUnknown( "", OString( XML_CAST( namespaces
[ i
+ 1 ] ) ) );
1164 if ( rEntity
.mxTokenHandler
.is() )
1166 // #158414# second: fill attribute list with other attributes
1167 rEvent
.mxAttributes
->reserve( numAttributes
);
1168 for (int i
= 0; i
< numAttributes
* 5; i
+= 5)
1170 // attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd )
1171 if( attributes
[ i
+ 1 ] != nullptr )
1173 sal_Int32 nAttributeToken
= GetTokenWithPrefix( attributes
[ i
+ 1 ], strlen( XML_CAST( attributes
[ i
+ 1 ] )), attributes
[ i
], strlen( XML_CAST( attributes
[ i
] )));
1174 if( nAttributeToken
!= FastToken::DONTKNOW
)
1175 rEvent
.mxAttributes
->add( nAttributeToken
, XML_CAST( attributes
[ i
+ 3 ] ), attributes
[ i
+ 4 ] - attributes
[ i
+ 3 ] );
1177 addUnknownElementWithPrefix(attributes
, i
, rEvent
.mxAttributes
);
1181 sal_Int32 nAttributeToken
= GetToken( attributes
[ i
], strlen( XML_CAST( attributes
[ i
] )));
1182 if( nAttributeToken
!= FastToken::DONTKNOW
)
1183 rEvent
.mxAttributes
->add( nAttributeToken
, XML_CAST( attributes
[ i
+ 3 ] ), attributes
[ i
+ 4 ] - attributes
[ i
+ 3 ] );
1185 rEvent
.mxAttributes
->addUnknown( XML_CAST( attributes
[ i
] ),
1186 OString( XML_CAST( attributes
[ i
+ 3 ] ), attributes
[ i
+ 4 ] - attributes
[ i
+ 3 ] ));
1190 if( prefix
!= nullptr )
1191 rEvent
.mnElementToken
= GetTokenWithPrefix( prefix
, strlen( XML_CAST( prefix
)), localName
, strlen( XML_CAST( localName
)));
1192 else if( !sNamespace
.isEmpty() )
1193 rEvent
.mnElementToken
= GetTokenWithContextNamespace( nNamespaceToken
, localName
, strlen( XML_CAST( localName
)));
1195 rEvent
.mnElementToken
= GetToken( localName
, strlen( XML_CAST( localName
)));
1199 for (int i
= 0; i
< numAttributes
* 5; i
+= 5)
1201 if( attributes
[ i
+ 1 ] != nullptr )
1202 addUnknownElementWithPrefix(attributes
, i
, rEvent
.mxAttributes
);
1204 rEvent
.mxAttributes
->addUnknown( XML_CAST( attributes
[ i
] ),
1205 OString( XML_CAST( attributes
[ i
+ 3 ] ), attributes
[ i
+ 4 ] - attributes
[ i
+ 3 ] ));
1208 rEvent
.mnElementToken
= FastToken::DONTKNOW
;
1211 if( rEvent
.mnElementToken
== FastToken::DONTKNOW
)
1213 OUString aElementPrefix
;
1214 if( prefix
!= nullptr )
1216 if ( !m_bIgnoreMissingNSDecl
|| URI
!= nullptr )
1217 sNamespace
= OUString( XML_CAST( URI
), strlen( XML_CAST( URI
)), RTL_TEXTENCODING_UTF8
);
1220 nNamespaceToken
= GetNamespaceToken( sNamespace
);
1221 aElementPrefix
= OUString( XML_CAST( prefix
), strlen( XML_CAST( prefix
)), RTL_TEXTENCODING_UTF8
);
1223 const OUString
& rElementLocalName
= OUString( XML_CAST( localName
), strlen( XML_CAST( localName
)), RTL_TEXTENCODING_UTF8
);
1224 rEvent
.msNamespace
= sNamespace
;
1225 rEvent
.msElementName
= (aElementPrefix
.isEmpty())? rElementLocalName
: aElementPrefix
+ ":" + rElementLocalName
;
1227 else // token is always preferred.
1228 rEvent
.msElementName
.clear();
1230 rEntity
.maNamespaceStack
.push( NameWithToken(sNamespace
, nNamespaceToken
) );
1231 if (rEntity
.mbEnableThreads
)
1235 SAL_INFO("sax.fastparser", " startElement line " << mxDocumentLocator
->getLineNumber() << " column " << mxDocumentLocator
->getColumnNumber() << " " << ( prefix
? XML_CAST(prefix
) : "(null)" ) << ":" << localName
);
1236 rEntity
.startElement( &rEvent
);
1241 rEntity
.saveException( ::cppu::getCaughtException() );
1245 void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar
**attributes
, int i
, rtl::Reference
< FastAttributeList
> const & xAttributes
)
1247 OUString aNamespaceURI
;
1248 if ( !m_bIgnoreMissingNSDecl
|| attributes
[i
+ 2] != nullptr )
1249 aNamespaceURI
= OUString( XML_CAST( attributes
[ i
+ 2 ] ), strlen( XML_CAST( attributes
[ i
+ 2 ] )), RTL_TEXTENCODING_UTF8
);
1250 const OString
& rPrefix
= OString( XML_CAST( attributes
[ i
+ 1 ] ));
1251 const OString
& rLocalName
= OString( XML_CAST( attributes
[ i
] ));
1252 OString aQualifiedName
= (rPrefix
.isEmpty())? rLocalName
: rPrefix
+ ":" + rLocalName
;
1253 xAttributes
->addUnknown( aNamespaceURI
, aQualifiedName
,
1254 OString( XML_CAST( attributes
[ i
+ 3 ] ), attributes
[ i
+ 4 ] - attributes
[ i
+ 3 ] ));
1257 void FastSaxParserImpl::callbackEndElement()
1259 if (!pendingCharacters
.empty())
1260 sendPendingCharacters();
1261 Entity
& rEntity
= getEntity();
1262 SAL_WARN_IF(rEntity
.maNamespaceCount
.empty(), "sax", "Empty NamespaceCount");
1263 if( !rEntity
.maNamespaceCount
.empty() )
1264 rEntity
.maNamespaceCount
.pop();
1266 SAL_WARN_IF(rEntity
.maNamespaceStack
.empty(), "sax", "Empty NamespaceStack");
1267 if( !rEntity
.maNamespaceStack
.empty() )
1268 rEntity
.maNamespaceStack
.pop();
1270 rEntity
.getEvent( CallbackType::END_ELEMENT
);
1271 if (rEntity
.mbEnableThreads
)
1274 rEntity
.endElement();
1277 void FastSaxParserImpl::callbackCharacters( const xmlChar
* s
, int nLen
)
1279 // SAX interface allows that the characters callback splits content of one XML node
1280 // (e.g. because there's an entity that needs decoding), however for consumers it's
1281 // simpler FastSaxParser's character callback provides the whole string at once,
1282 // so merge data from possible multiple calls and send them at once (before the element
1283 // ends or another one starts).
1285 // We use a std::vector<char> to avoid calling into the OUString constructor more than once when
1286 // we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly
1287 // often in writer documents.
1288 int nOriginalLen
= pendingCharacters
.size();
1289 pendingCharacters
.resize(nOriginalLen
+ nLen
);
1290 memcpy(pendingCharacters
.data() + nOriginalLen
, s
, nLen
);
1293 void FastSaxParserImpl::sendPendingCharacters()
1295 Entity
& rEntity
= getEntity();
1296 OUString
sChars( pendingCharacters
.data(), pendingCharacters
.size(), RTL_TEXTENCODING_UTF8
);
1297 if (rEntity
.mbEnableThreads
)
1299 Event
& rEvent
= rEntity
.getEvent( CallbackType::CHARACTERS
);
1300 rEvent
.msChars
= sChars
;
1304 rEntity
.characters( sChars
);
1305 pendingCharacters
.resize(0);
1308 void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar
*target
, const xmlChar
*data
)
1310 if (!pendingCharacters
.empty())
1311 sendPendingCharacters();
1312 Entity
& rEntity
= getEntity();
1313 Event
& rEvent
= rEntity
.getEvent( CallbackType::PROCESSING_INSTRUCTION
);
1315 // This event is very rare, so no need to waste extra space for this
1316 // Using namespace and element strings to be target and data in that order.
1317 rEvent
.msNamespace
= OUString( XML_CAST( target
), strlen( XML_CAST( target
) ), RTL_TEXTENCODING_UTF8
);
1318 if ( data
!= nullptr )
1319 rEvent
.msElementName
= OUString( XML_CAST( data
), strlen( XML_CAST( data
) ), RTL_TEXTENCODING_UTF8
);
1321 rEvent
.msElementName
.clear();
1323 if (rEntity
.mbEnableThreads
)
1326 rEntity
.processingInstruction( rEvent
.msNamespace
, rEvent
.msElementName
);
1329 FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl
) {}
1331 FastSaxParser::~FastSaxParser()
1336 FastSaxParser::initialize(css::uno::Sequence
< css::uno::Any
> const& rArguments
)
1338 if (rArguments
.hasElements())
1341 if ( rArguments
[0] >>= str
)
1343 if ( str
== "IgnoreMissingNSDecl" )
1344 mpImpl
->m_bIgnoreMissingNSDecl
= true;
1345 else if ( str
== "DoSmeplease" )
1346 ; //just ignore as this is already immune to billion laughs
1347 else if ( str
== "DisableThreadedParser" )
1348 mpImpl
->m_bDisableThreadedParser
= true;
1350 throw IllegalArgumentException();
1353 throw IllegalArgumentException();
1357 void FastSaxParser::parseStream( const xml::sax::InputSource
& aInputSource
)
1359 mpImpl
->parseStream(aInputSource
);
1362 void FastSaxParser::setFastDocumentHandler( const uno::Reference
<xml::sax::XFastDocumentHandler
>& Handler
)
1364 mpImpl
->setFastDocumentHandler(Handler
);
1367 void FastSaxParser::setTokenHandler( const uno::Reference
<xml::sax::XFastTokenHandler
>& Handler
)
1369 mpImpl
->setTokenHandler(Handler
);
1372 void FastSaxParser::registerNamespace( const OUString
& NamespaceURL
, sal_Int32 NamespaceToken
)
1374 mpImpl
->registerNamespace(NamespaceURL
, NamespaceToken
);
1377 OUString
FastSaxParser::getNamespaceURL( const OUString
& rPrefix
)
1379 return mpImpl
->getNamespaceURL(rPrefix
);
1382 void FastSaxParser::setErrorHandler( const uno::Reference
< xml::sax::XErrorHandler
>& Handler
)
1384 mpImpl
->setErrorHandler(Handler
);
1387 void FastSaxParser::setEntityResolver( const uno::Reference
< xml::sax::XEntityResolver
>& )
1392 void FastSaxParser::setLocale( const lang::Locale
& )
1397 void FastSaxParser::setNamespaceHandler( const uno::Reference
< css::xml::sax::XFastNamespaceHandler
>& Handler
)
1399 mpImpl
->setNamespaceHandler(Handler
);
1402 OUString
FastSaxParser::getImplementationName()
1404 return "com.sun.star.comp.extensions.xml.sax.FastParser";
1407 sal_Bool
FastSaxParser::supportsService( const OUString
& ServiceName
)
1409 return cppu::supportsService(this, ServiceName
);
1412 uno::Sequence
<OUString
> FastSaxParser::getSupportedServiceNames()
1414 Sequence
<OUString
> seq
{ "com.sun.star.xml.sax.FastParser" };
1418 } // namespace sax_fastparser
1420 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
*
1421 com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation(
1422 css::uno::XComponentContext
*,
1423 css::uno::Sequence
<css::uno::Any
> const &)
1425 return cppu::acquire(new FastSaxParser
);
1428 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */