tdf#130857 qt weld: Support mail merge "Server Auth" dialog
[LibreOffice.git] / sax / source / fastparser / fastparser.cxx
blob8815cd58329e1fe3ac0f03db80b2f86931d3c6b3
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sax/fastparser.hxx>
21 #include <sax/fastattribs.hxx>
22 #include <utility>
23 #include <xml2utf.hxx>
25 #include <com/sun/star/io/XSeekable.hpp>
26 #include <com/sun/star/lang/DisposedException.hpp>
27 #include <com/sun/star/lang/IllegalArgumentException.hpp>
28 #include <com/sun/star/uno/XComponentContext.hpp>
29 #include <com/sun/star/xml/sax/FastToken.hpp>
30 #include <com/sun/star/xml/sax/SAXParseException.hpp>
31 #include <com/sun/star/xml/sax/XFastContextHandler.hpp>
32 #include <cppuhelper/implbase.hxx>
33 #include <cppuhelper/supportsservice.hxx>
34 #include <cppuhelper/exc_hlp.hxx>
35 #include <osl/conditn.hxx>
36 #include <rtl/ref.hxx>
37 #include <sal/log.hxx>
38 #include <salhelper/thread.hxx>
39 #include <comphelper/diagnose_ex.hxx>
40 #include <o3tl/string_view.hxx>
42 #include <queue>
43 #include <memory>
44 #include <mutex>
45 #include <optional>
46 #include <stack>
47 #include <string_view>
48 #include <unordered_map>
49 #include <vector>
50 #include <cassert>
51 #include <cstring>
52 #include <libxml/parser.h>
54 // Inverse of libxml's BAD_CAST.
55 #define XML_CAST( str ) reinterpret_cast< const char* >( str )
57 using namespace ::osl;
58 using namespace ::cppu;
59 using namespace ::com::sun::star::uno;
60 using namespace ::com::sun::star::lang;
61 using namespace ::com::sun::star::xml::sax;
62 using namespace ::com::sun::star::io;
63 using namespace com::sun::star;
64 using namespace sax_fastparser;
66 static void NormalizeURI( OUString& rName );
68 namespace {
70 struct Event;
71 class FastLocatorImpl;
72 struct NamespaceDefine;
73 struct Entity;
75 typedef std::unordered_map< OUString, sal_Int32 > NamespaceMap;
77 struct EventList
79 std::vector<Event> maEvents;
80 bool mbIsAttributesEmpty;
83 enum class CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, PROCESSING_INSTRUCTION, DONE, EXCEPTION };
85 struct Event
87 CallbackType maType;
88 sal_Int32 mnElementToken;
89 OUString msNamespace;
90 OUString msElementName;
91 rtl::Reference< FastAttributeList > mxAttributes;
92 rtl::Reference< FastAttributeList > mxDeclAttributes;
93 OUString msChars;
96 struct NameWithToken
98 OUString msName;
99 sal_Int32 mnToken;
101 NameWithToken(OUString sName, sal_Int32 nToken) :
102 msName(std::move(sName)), mnToken(nToken) {}
105 struct SaxContext
107 Reference< XFastContextHandler > mxContext;
108 sal_Int32 mnElementToken;
109 std::optional<OUString> moNamespace;
110 std::optional<OUString> moElementName;
112 SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName ):
113 mnElementToken(nElementToken)
115 if (nElementToken == FastToken::DONTKNOW)
117 moNamespace = aNamespace;
118 moElementName = aElementName;
123 struct ParserData
125 css::uno::Reference< css::xml::sax::XFastDocumentHandler > mxDocumentHandler;
126 rtl::Reference<FastTokenHandlerBase> mxTokenHandler;
127 css::uno::Reference< css::xml::sax::XErrorHandler > mxErrorHandler;
128 css::uno::Reference< css::xml::sax::XFastNamespaceHandler >mxNamespaceHandler;
130 ParserData();
133 struct NamespaceDefine
135 OString maPrefix;
136 sal_Int32 mnToken;
137 OUString maNamespaceURL;
139 NamespaceDefine( OString aPrefix, sal_Int32 nToken, OUString aNamespaceURL )
140 : maPrefix(std::move( aPrefix )), mnToken( nToken ), maNamespaceURL(std::move( aNamespaceURL )) {}
141 NamespaceDefine() : mnToken(-1) {}
144 // Entity binds all information needed for a single file | single call of parseStream
145 struct Entity : public ParserData
147 // Amount of work producer sends to consumer in one iteration:
148 static const size_t mnEventListSize = 1000;
150 // unique for each Entity instance:
152 // Number of valid events in mxProducedEvents:
153 size_t mnProducedEventsSize;
154 std::optional<EventList> mxProducedEvents;
155 std::queue<EventList> maPendingEvents;
156 std::queue<EventList> maUsedEvents;
157 std::mutex maEventProtector;
159 static const size_t mnEventLowWater = 4;
160 static const size_t mnEventHighWater = 8;
161 osl::Condition maConsumeResume;
162 osl::Condition maProduceResume;
163 // Event we use to store data if threading is disabled:
164 Event maSharedEvent;
166 // copied in copy constructor:
168 // Allow to disable threading for small documents:
169 bool mbEnableThreads;
170 css::xml::sax::InputSource maStructSource;
171 xmlParserCtxtPtr mpParser;
172 ::sax_expatwrap::XMLFile2UTFConverter maConverter;
174 // Exceptions cannot be thrown through the C-XmlParser (possible
175 // resource leaks), therefore any exception thrown by a UNO callback
176 // must be saved somewhere until the C-XmlParser is stopped.
177 css::uno::Any maSavedException;
178 std::mutex maSavedExceptionMutex;
179 void saveException( const Any & e );
180 // Thread-safe check if maSavedException has value
181 bool hasException();
182 void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
183 bool mbDuringParse );
185 std::stack< NameWithToken, std::vector<NameWithToken> > maNamespaceStack;
186 /* Context for main thread consuming events.
187 * startElement() stores the data, which characters() and endElement() uses
189 std::stack< SaxContext, std::vector<SaxContext> > maContextStack;
190 // Determines which elements of maNamespaceDefines are valid in current context
191 std::stack< sal_uInt32, std::vector<sal_uInt32> > maNamespaceCount;
192 std::vector< NamespaceDefine > maNamespaceDefines;
194 explicit Entity( const ParserData& rData );
195 Entity( const Entity& rEntity ) = delete;
196 Entity& operator=( const Entity& rEntity ) = delete;
197 void startElement( Event const *pEvent );
198 void characters( const OUString& sChars );
199 void endElement();
200 void processingInstruction( const OUString& rTarget, const OUString& rData );
201 EventList& getEventList();
202 Event& getEvent( CallbackType aType );
205 // Stuff for custom entity names
206 struct ReplacementPair
208 OUString name;
209 OUString replacement;
211 inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs)
213 return lhs.name < rhs.name;
215 inline bool operator<(const ReplacementPair& lhs, const char* rhs)
217 return lhs.name.compareToAscii(rhs) < 0;
220 } // namespace
222 namespace sax_fastparser {
224 class FastSaxParserImpl
226 public:
227 explicit FastSaxParserImpl();
228 ~FastSaxParserImpl();
230 private:
231 std::vector<ReplacementPair> m_Replacements;
232 std::vector<xmlEntityPtr> m_TemporalEntities;
234 public:
235 // XFastParser
236 /// @throws css::xml::sax::SAXException
237 /// @throws css::io::IOException
238 /// @throws css::uno::RuntimeException
239 void parseStream( const css::xml::sax::InputSource& aInputSource );
240 /// @throws css::uno::RuntimeException
241 void setFastDocumentHandler( const css::uno::Reference< css::xml::sax::XFastDocumentHandler >& Handler );
242 /// @throws css::uno::RuntimeException
243 void setTokenHandler( const css::uno::Reference< css::xml::sax::XFastTokenHandler >& Handler );
244 /// @throws css::lang::IllegalArgumentException
245 /// @throws css::uno::RuntimeException
246 void registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken );
247 /// @throws css::lang::IllegalArgumentException
248 /// @throws css::uno::RuntimeException
249 OUString const & getNamespaceURL( std::u16string_view rPrefix );
250 /// @throws css::uno::RuntimeException
251 void setErrorHandler( const css::uno::Reference< css::xml::sax::XErrorHandler >& Handler );
252 /// @throws css::uno::RuntimeException
253 void setNamespaceHandler( const css::uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler);
254 // Fake DTD file
255 void setCustomEntityNames(
256 const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements);
258 // called by the C callbacks of the expat parser
259 void callbackStartElement( const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
260 int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes );
261 void callbackEndElement();
262 void callbackCharacters( const xmlChar* s, int nLen );
263 void callbackProcessingInstruction( const xmlChar *target, const xmlChar *data );
264 xmlEntityPtr callbackGetEntity( const xmlChar *name );
266 void pushEntity(const ParserData&, xml::sax::InputSource const&);
267 void popEntity();
268 Entity& getEntity() { return *mpTop; }
269 void parse();
270 void produce( bool bForceFlush = false );
271 bool m_bIgnoreMissingNSDecl;
272 bool m_bDisableThreadedParser;
274 private:
275 bool consume(EventList&);
276 void deleteUsedEvents();
277 void sendPendingCharacters();
278 void addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes);
280 sal_Int32 GetToken( const xmlChar* pName );
281 /// @throws css::xml::sax::SAXException
282 sal_Int32 GetTokenWithPrefix( const xmlChar* pPrefix, const xmlChar* pName );
283 /// @throws css::xml::sax::SAXException
284 OUString const & GetNamespaceURL( std::string_view rPrefix );
285 sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL );
286 sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName );
287 void DefineNamespace( const OString& rPrefix, const OUString& namespaceURL );
289 private:
290 std::mutex maMutex; ///< Protecting whole parseStream() execution
291 ::rtl::Reference< FastLocatorImpl > mxDocumentLocator;
292 NamespaceMap maNamespaceMap;
294 ParserData maData; /// Cached parser configuration for next call of parseStream().
296 Entity *mpTop; /// std::stack::top() is amazingly slow => cache this.
297 std::stack< Entity > maEntities; /// Entity stack for each call of parseStream().
298 std::vector<char> pendingCharacters; /// Data from characters() callback that needs to be sent.
301 } // namespace sax_fastparser
303 namespace {
305 class ParserThread: public salhelper::Thread
307 FastSaxParserImpl *mpParser;
308 public:
309 explicit ParserThread(FastSaxParserImpl *pParser): Thread("Parser"), mpParser(pParser) {}
310 private:
311 virtual void execute() override
315 mpParser->parse();
317 catch (...)
319 Entity &rEntity = mpParser->getEntity();
320 rEntity.getEvent( CallbackType::EXCEPTION );
321 mpParser->produce( true );
326 extern "C" {
328 static void call_callbackStartElement(void *userData, const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
329 int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes)
331 FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
332 pFastParser->callbackStartElement( localName, prefix, URI, numNamespaces, namespaces, numAttributes, attributes );
335 static void call_callbackEndElement(void *userData, const xmlChar* /*localName*/, const xmlChar* /*prefix*/, const xmlChar* /*URI*/)
337 FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
338 pFastParser->callbackEndElement();
341 static void call_callbackCharacters( void *userData , const xmlChar *s , int nLen )
343 FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
344 pFastParser->callbackCharacters( s, nLen );
347 static void call_callbackProcessingInstruction( void *userData, const xmlChar *target, const xmlChar *data )
349 FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
350 pFastParser->callbackProcessingInstruction( target, data );
353 static xmlEntityPtr call_callbackGetEntity( void *userData, const xmlChar *name)
355 FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
356 return pFastParser->callbackGetEntity( name );
361 class FastLocatorImpl : public WeakImplHelper< XLocator >
363 public:
364 explicit FastLocatorImpl(FastSaxParserImpl *p) : mpParser(p) {}
366 void dispose() { mpParser = nullptr; }
367 /// @throws RuntimeException
368 void checkDispose() const { if( !mpParser ) throw DisposedException(); }
370 //XLocator
371 virtual sal_Int32 SAL_CALL getColumnNumber() override;
372 virtual sal_Int32 SAL_CALL getLineNumber() override;
373 virtual OUString SAL_CALL getPublicId() override;
374 virtual OUString SAL_CALL getSystemId() override;
376 private:
377 FastSaxParserImpl *mpParser;
380 sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber()
382 checkDispose();
383 return xmlSAX2GetColumnNumber( mpParser->getEntity().mpParser );
386 sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber()
388 checkDispose();
389 return xmlSAX2GetLineNumber( mpParser->getEntity().mpParser );
392 OUString SAL_CALL FastLocatorImpl::getPublicId()
394 checkDispose();
395 return mpParser->getEntity().maStructSource.sPublicId;
398 OUString SAL_CALL FastLocatorImpl::getSystemId()
400 checkDispose();
401 return mpParser->getEntity().maStructSource.sSystemId;
404 ParserData::ParserData()
407 Entity::Entity(const ParserData& rData)
408 : ParserData(rData)
409 , mnProducedEventsSize(0)
410 , mbEnableThreads(false)
411 , mpParser(nullptr)
415 void Entity::startElement( Event const *pEvent )
417 const sal_Int32& nElementToken = pEvent->mnElementToken;
418 const OUString& aNamespace = pEvent->msNamespace;
419 const OUString& aElementName = pEvent->msElementName;
421 // Use un-wrapped pointers to avoid significant acquire/release overhead
422 XFastContextHandler *pParentContext = nullptr;
423 if( !maContextStack.empty() )
425 pParentContext = maContextStack.top().mxContext.get();
426 if( !pParentContext )
428 maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) );
429 return;
433 maContextStack.push( SaxContext( nElementToken, aNamespace, aElementName ) );
437 const Reference< XFastAttributeList > xAttr( pEvent->mxAttributes );
438 Reference< XFastContextHandler > xContext;
440 if ( mxNamespaceHandler.is() )
442 const Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes();
443 for (const auto& rNSDeclAttrib : NSDeclAttribs)
445 mxNamespaceHandler->registerNamespace( rNSDeclAttrib.Name, rNSDeclAttrib.Value );
449 if( nElementToken == FastToken::DONTKNOW )
451 if( pParentContext )
452 xContext = pParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
453 else if( mxDocumentHandler.is() )
454 xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
456 if( xContext.is() )
458 xContext->startUnknownElement( aNamespace, aElementName, xAttr );
461 else
463 if( pParentContext )
464 xContext = pParentContext->createFastChildContext( nElementToken, xAttr );
465 else if( mxDocumentHandler.is() )
466 xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
468 if( xContext.is() )
469 xContext->startFastElement( nElementToken, xAttr );
471 // swap the reference we own in to avoid referencing thrash.
472 maContextStack.top().mxContext = std::move( xContext );
474 catch (...)
476 saveException( ::cppu::getCaughtException() );
480 void Entity::characters( const OUString& sChars )
482 if (maContextStack.empty())
484 // Malformed XML stream !?
485 return;
488 XFastContextHandler * pContext( maContextStack.top().mxContext.get() );
489 if( pContext ) try
491 pContext->characters( sChars );
493 catch (...)
495 saveException( ::cppu::getCaughtException() );
499 void Entity::endElement()
501 if (maContextStack.empty())
503 // Malformed XML stream !?
504 return;
507 const SaxContext& aContext = maContextStack.top();
508 XFastContextHandler* pContext( aContext.mxContext.get() );
509 if( pContext )
512 sal_Int32 nElementToken = aContext.mnElementToken;
513 if( nElementToken != FastToken::DONTKNOW )
514 pContext->endFastElement( nElementToken );
515 else
516 pContext->endUnknownElement( *aContext.moNamespace, *aContext.moElementName );
518 catch (...)
520 saveException( ::cppu::getCaughtException() );
522 maContextStack.pop();
525 void Entity::processingInstruction( const OUString& rTarget, const OUString& rData )
527 if( mxDocumentHandler.is() ) try
529 mxDocumentHandler->processingInstruction( rTarget, rData );
531 catch (...)
533 saveException( ::cppu::getCaughtException() );
537 EventList& Entity::getEventList()
539 if (!mxProducedEvents)
541 std::unique_lock aGuard(maEventProtector);
542 if (!maUsedEvents.empty())
544 mxProducedEvents = std::move(maUsedEvents.front());
545 maUsedEvents.pop();
546 aGuard.unlock(); // unlock
547 mnProducedEventsSize = 0;
549 if (!mxProducedEvents)
551 mxProducedEvents.emplace();
552 mxProducedEvents->maEvents.resize(mnEventListSize);
553 mxProducedEvents->mbIsAttributesEmpty = false;
554 mnProducedEventsSize = 0;
557 return *mxProducedEvents;
560 Event& Entity::getEvent( CallbackType aType )
562 if (!mbEnableThreads)
563 return maSharedEvent;
565 EventList& rEventList = getEventList();
566 if (mnProducedEventsSize == rEventList.maEvents.size())
568 SAL_WARN_IF(!maSavedException.hasValue(), "sax",
569 "Event vector should only exceed " << mnEventListSize <<
570 " temporarily while an exception is pending");
571 rEventList.maEvents.resize(mnProducedEventsSize + 1);
573 Event& rEvent = rEventList.maEvents[mnProducedEventsSize++];
574 rEvent.maType = aType;
575 return rEvent;
578 OUString lclGetErrorMessage( xmlParserCtxtPtr ctxt, std::u16string_view sSystemId, sal_Int32 nLine )
580 const char* pMessage;
581 const xmlError* error = xmlCtxtGetLastError( ctxt );
582 if( error && error->message )
583 pMessage = error->message;
584 else
585 pMessage = "unknown error";
586 return OUString::Concat("[") + sSystemId + " line " + OUString::number(nLine) + "]: " +
587 OUString(pMessage, strlen(pMessage), RTL_TEXTENCODING_ASCII_US);
590 // throw an exception, but avoid callback if
591 // during a threaded produce
592 void Entity::throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
593 bool mbDuringParse )
595 // Error during parsing !
596 Any savedException;
598 std::scoped_lock g(maSavedExceptionMutex);
599 if (maSavedException.hasValue())
601 savedException.setValue(&maSavedException, cppu::UnoType<decltype(maSavedException)>::get());
604 SAXParseException aExcept(
605 lclGetErrorMessage( mpParser,
606 xDocumentLocator->getSystemId(),
607 xDocumentLocator->getLineNumber() ),
608 Reference< XInterface >(),
609 savedException,
610 xDocumentLocator->getPublicId(),
611 xDocumentLocator->getSystemId(),
612 xDocumentLocator->getLineNumber(),
613 xDocumentLocator->getColumnNumber()
616 // error handler is set, it may throw the exception
617 if( !mbDuringParse || !mbEnableThreads )
619 if (mxErrorHandler.is() )
620 mxErrorHandler->fatalError( Any( aExcept ) );
623 // error handler has not thrown, but parsing must stop => throw ourselves
624 throw aExcept;
627 // In the single threaded case we emit events via our C
628 // callbacks, so any exception caught must be queued up until
629 // we can safely re-throw it from our C++ parent of parse()
631 // If multi-threaded, we need to push an EXCEPTION event, at
632 // which point we transfer ownership of maSavedException to
633 // the consuming thread.
634 void Entity::saveException( const Any & e )
636 // fdo#81214 - allow the parser to run on after an exception,
637 // unexpectedly some 'startElements' produce a UNO_QUERY_THROW
638 // for XComponent; and yet expect to continue parsing.
639 SAL_WARN("sax", "Unexpected exception from XML parser " << exceptionToString(e));
640 std::scoped_lock g(maSavedExceptionMutex);
641 if (maSavedException.hasValue())
643 SAL_INFO("sax.fastparser", "discarding exception, already have one");
645 else
647 maSavedException = e;
651 bool Entity::hasException()
653 std::scoped_lock g(maSavedExceptionMutex);
654 return maSavedException.hasValue();
657 } // namespace
659 namespace sax_fastparser {
661 FastSaxParserImpl::FastSaxParserImpl() :
662 m_bIgnoreMissingNSDecl(false),
663 m_bDisableThreadedParser(false),
664 mpTop(nullptr)
666 mxDocumentLocator.set( new FastLocatorImpl( this ) );
669 FastSaxParserImpl::~FastSaxParserImpl()
671 if( mxDocumentLocator.is() )
672 mxDocumentLocator->dispose();
673 for (auto& entity : m_TemporalEntities)
675 if (!entity)
676 continue;
677 xmlNodePtr pPtr = reinterpret_cast<xmlNodePtr>(entity);
678 xmlUnlinkNode(pPtr);
679 xmlFreeNode(pPtr);
683 void FastSaxParserImpl::DefineNamespace( const OString& rPrefix, const OUString& namespaceURL )
685 Entity& rEntity = getEntity();
686 assert(!rEntity.maNamespaceCount.empty()); // need a context!
688 sal_uInt32 nOffset = rEntity.maNamespaceCount.top()++;
689 if( rEntity.maNamespaceDefines.size() <= nOffset )
690 rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
692 rEntity.maNamespaceDefines[nOffset] = NamespaceDefine( rPrefix, GetNamespaceToken( namespaceURL ), namespaceURL );
695 sal_Int32 FastSaxParserImpl::GetToken(const xmlChar* pName)
697 return FastTokenHandlerBase::getTokenFromChars( getEntity(). mxTokenHandler.get(),
698 XML_CAST( pName ) ); // uses utf-8
701 sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( const xmlChar* pPrefix, const xmlChar* pName )
703 Entity& rEntity = getEntity();
704 if (rEntity.maNamespaceCount.empty())
705 return FastToken::DONTKNOW;
707 std::string_view sPrefix(XML_CAST(pPrefix));
708 sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
709 while( nNamespace-- )
711 const auto & rNamespaceDefine = rEntity.maNamespaceDefines[nNamespace];
712 if( rNamespaceDefine.maPrefix == sPrefix )
713 return GetTokenWithContextNamespace(rNamespaceDefine.mnToken, pName);
716 if (!m_bIgnoreMissingNSDecl)
717 throw SAXException("No namespace defined for " + OStringToOUString(sPrefix,
718 RTL_TEXTENCODING_UTF8), {}, {});
720 return FastToken::DONTKNOW;
723 sal_Int32 FastSaxParserImpl::GetNamespaceToken( const OUString& rNamespaceURL )
725 NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) );
726 if( aIter != maNamespaceMap.end() )
727 return (*aIter).second;
728 else
729 return FastToken::DONTKNOW;
732 OUString const & FastSaxParserImpl::GetNamespaceURL( std::string_view rPrefix )
734 Entity& rEntity = getEntity();
735 if( !rEntity.maNamespaceCount.empty() )
737 sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
738 while( nNamespace-- )
739 if( rEntity.maNamespaceDefines[nNamespace].maPrefix == rPrefix )
740 return rEntity.maNamespaceDefines[nNamespace].maNamespaceURL;
743 throw SAXException("No namespace defined for " + OUString::fromUtf8(rPrefix),
744 Reference< XInterface >(), Any());
747 sal_Int32 FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName )
749 if( nNamespaceToken != FastToken::DONTKNOW )
751 sal_Int32 nNameToken = GetToken( pName );
752 if( nNameToken != FastToken::DONTKNOW )
753 return nNamespaceToken | nNameToken;
756 return FastToken::DONTKNOW;
759 namespace
761 class ParserCleanup
763 private:
764 FastSaxParserImpl& m_rParser;
765 Entity& m_rEntity;
766 rtl::Reference<ParserThread> m_xParser;
767 public:
768 ParserCleanup(FastSaxParserImpl& rParser, Entity& rEntity)
769 : m_rParser(rParser)
770 , m_rEntity(rEntity)
773 ~ParserCleanup()
775 if (m_rEntity.mpParser)
777 if (m_rEntity.mpParser->myDoc)
778 xmlFreeDoc(m_rEntity.mpParser->myDoc);
779 xmlFreeParserCtxt(m_rEntity.mpParser);
781 joinThread();
782 m_rParser.popEntity();
784 void setThread(const rtl::Reference<ParserThread> &xParser)
786 m_xParser = xParser;
788 void joinThread()
790 if (m_xParser.is())
792 rtl::Reference<ParserThread> xToJoin = m_xParser;
793 m_xParser.clear();
794 xToJoin->join();
799 /***************
801 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
802 * the file-specific initialization work. (During a parser run, external files may be opened)
804 ****************/
805 void FastSaxParserImpl::parseStream(const InputSource& rStructSource)
807 xmlInitParser();
809 // Only one text at one time
810 std::unique_lock guard( maMutex );
812 pushEntity(maData, rStructSource);
813 Entity& rEntity = getEntity();
814 ParserCleanup aEnsureFree(*this, rEntity);
816 // start the document
817 if( rEntity.mxDocumentHandler.is() )
819 rEntity.mxDocumentHandler->setDocumentLocator( mxDocumentLocator );
820 rEntity.mxDocumentHandler->startDocument();
823 #ifdef EMSCRIPTEN
824 rEntity.mbEnableThreads = false;
825 #else
826 if (!getenv("SAX_DISABLE_THREADS") && !m_bDisableThreadedParser)
828 Reference<css::io::XSeekable> xSeekable(rEntity.maStructSource.aInputStream, UNO_QUERY);
829 // available() is not __really__ relevant here, but leave it in as a heuristic for non-seekable streams
830 rEntity.mbEnableThreads = (xSeekable.is() && xSeekable->getLength() > 10000)
831 || (rEntity.maStructSource.aInputStream->available() > 10000);
833 #endif
835 if (rEntity.mbEnableThreads)
837 rtl::Reference<ParserThread> xParser = new ParserThread(this);
838 xParser->launch();
839 aEnsureFree.setThread(xParser);
840 bool done = false;
841 do {
842 rEntity.maConsumeResume.wait();
843 rEntity.maConsumeResume.reset();
845 std::unique_lock aGuard(rEntity.maEventProtector);
846 while (!rEntity.maPendingEvents.empty())
848 if (rEntity.maPendingEvents.size() <= Entity::mnEventLowWater)
849 rEntity.maProduceResume.set(); // start producer again
851 EventList aEventList = std::move(rEntity.maPendingEvents.front());
852 rEntity.maPendingEvents.pop();
853 aGuard.unlock(); // unlock
855 if (!consume(aEventList))
856 done = true;
858 aGuard.lock(); // lock
860 if ( rEntity.maPendingEvents.size() <= Entity::mnEventLowWater )
862 aGuard.unlock();
863 for (auto& rEvent : aEventList.maEvents)
865 if (rEvent.mxAttributes.is())
867 rEvent.mxAttributes->clear();
868 if( rEntity.mxNamespaceHandler.is() )
869 rEvent.mxDeclAttributes->clear();
871 aEventList.mbIsAttributesEmpty = true;
873 aGuard.lock();
876 rEntity.maUsedEvents.push(std::move(aEventList));
878 } while (!done);
879 aEnsureFree.joinThread();
880 deleteUsedEvents();
882 // callbacks used inside XML_Parse may have caught an exception
883 // No need to lock maSavedExceptionMutex here because parser
884 // thread is joined.
885 if( rEntity.maSavedException.hasValue() )
886 rEntity.throwException( mxDocumentLocator, true );
888 else
890 parse();
893 // finish document
894 if( rEntity.mxDocumentHandler.is() )
896 rEntity.mxDocumentHandler->endDocument();
900 void FastSaxParserImpl::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler )
902 maData.mxDocumentHandler = Handler;
905 void FastSaxParserImpl::setTokenHandler( const Reference< XFastTokenHandler >& xHandler )
907 assert( dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ) && "we expect this handler to be a subclass of FastTokenHandlerBase" );
908 maData.mxTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() );
911 void FastSaxParserImpl::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
913 if( NamespaceToken < FastToken::NAMESPACE )
914 throw IllegalArgumentException("Invalid namespace token " + OUString::number(NamespaceToken), css::uno::Reference<css::uno::XInterface >(), 0);
916 if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW )
918 maNamespaceMap[ NamespaceURL ] = NamespaceToken;
919 return;
921 throw IllegalArgumentException("namespace URL is already registered: " + NamespaceURL, css::uno::Reference<css::uno::XInterface >(), 0);
924 OUString const & FastSaxParserImpl::getNamespaceURL( std::u16string_view rPrefix )
928 return GetNamespaceURL( OUStringToOString( rPrefix, RTL_TEXTENCODING_UTF8 ) );
930 catch (const Exception&)
933 throw IllegalArgumentException();
936 void FastSaxParserImpl::setErrorHandler(const Reference< XErrorHandler > & Handler)
938 maData.mxErrorHandler = Handler;
941 void FastSaxParserImpl::setNamespaceHandler( const Reference< XFastNamespaceHandler >& Handler )
943 maData.mxNamespaceHandler = Handler;
946 void FastSaxParserImpl::setCustomEntityNames(
947 const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
949 m_Replacements.resize(replacements.size());
950 for (size_t i = 0; i < replacements.size(); ++i)
952 m_Replacements[i].name = replacements[i].First;
953 m_Replacements[i].replacement = replacements[i].Second;
955 if (m_Replacements.size() > 1)
956 std::sort(m_Replacements.begin(), m_Replacements.end());
959 void FastSaxParserImpl::deleteUsedEvents()
961 Entity& rEntity = getEntity();
962 std::unique_lock aGuard(rEntity.maEventProtector);
964 while (!rEntity.maUsedEvents.empty())
966 { // the block makes sure that aEventList is destructed outside the lock
967 EventList aEventList = std::move(rEntity.maUsedEvents.front());
968 rEntity.maUsedEvents.pop();
970 aGuard.unlock(); // unlock
973 aGuard.lock(); // lock
977 void FastSaxParserImpl::produce( bool bForceFlush )
979 Entity& rEntity = getEntity();
980 if (!(bForceFlush ||
981 rEntity.mnProducedEventsSize >= Entity::mnEventListSize))
982 return;
984 std::unique_lock aGuard(rEntity.maEventProtector);
986 while (rEntity.maPendingEvents.size() >= Entity::mnEventHighWater)
987 { // pause parsing for a bit
988 aGuard.unlock(); // unlock
989 rEntity.maProduceResume.wait();
990 rEntity.maProduceResume.reset();
991 aGuard.lock(); // lock
994 rEntity.maPendingEvents.push(std::move(*rEntity.mxProducedEvents));
995 rEntity.mxProducedEvents.reset();
996 assert(!rEntity.mxProducedEvents);
998 aGuard.unlock(); // unlock
1000 rEntity.maConsumeResume.set();
1003 bool FastSaxParserImpl::consume(EventList& rEventList)
1005 Entity& rEntity = getEntity();
1006 rEventList.mbIsAttributesEmpty = false;
1007 for (auto& rEvent : rEventList.maEvents)
1009 switch (rEvent.maType)
1011 case CallbackType::START_ELEMENT:
1012 rEntity.startElement( &rEvent );
1013 break;
1014 case CallbackType::END_ELEMENT:
1015 rEntity.endElement();
1016 break;
1017 case CallbackType::CHARACTERS:
1018 rEntity.characters( rEvent.msChars );
1019 break;
1020 case CallbackType::PROCESSING_INSTRUCTION:
1021 rEntity.processingInstruction(
1022 rEvent.msNamespace, rEvent.msElementName ); // ( target, data )
1023 break;
1024 case CallbackType::DONE:
1025 return false;
1026 case CallbackType::EXCEPTION:
1027 rEntity.throwException( mxDocumentLocator, false );
1028 [[fallthrough]]; // avoid unreachable code warning with some compilers
1029 default:
1030 assert(false);
1031 return false;
1034 return true;
1037 void FastSaxParserImpl::pushEntity(const ParserData& rEntityData,
1038 xml::sax::InputSource const& rSource)
1040 if (!rSource.aInputStream.is())
1041 throw SAXException(u"No input source"_ustr, Reference<XInterface>(), Any());
1043 maEntities.emplace(rEntityData);
1044 mpTop = &maEntities.top();
1046 mpTop->maStructSource = rSource;
1048 mpTop->maConverter.setInputStream(mpTop->maStructSource.aInputStream);
1049 if (!mpTop->maStructSource.sEncoding.isEmpty())
1051 mpTop->maConverter.setEncoding(OUStringToOString(mpTop->maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US));
1055 void FastSaxParserImpl::popEntity()
1057 maEntities.pop();
1058 mpTop = !maEntities.empty() ? &maEntities.top() : nullptr;
1061 // starts parsing with actual parser !
1062 void FastSaxParserImpl::parse()
1064 const int BUFFER_SIZE = 16 * 1024;
1065 Sequence< sal_Int8 > seqOut( BUFFER_SIZE );
1067 Entity& rEntity = getEntity();
1069 // set all necessary C-Callbacks
1070 static xmlSAXHandler callbacks;
1071 callbacks.startElementNs = call_callbackStartElement;
1072 callbacks.endElementNs = call_callbackEndElement;
1073 callbacks.characters = call_callbackCharacters;
1074 callbacks.processingInstruction = call_callbackProcessingInstruction;
1075 callbacks.getEntity = call_callbackGetEntity;
1076 callbacks.initialized = XML_SAX2_MAGIC;
1077 int nRead = 0;
1080 nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE );
1081 if( nRead <= 0 )
1083 if( rEntity.mpParser != nullptr )
1085 if( xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), 0, 1 ) != XML_ERR_OK )
1086 rEntity.throwException( mxDocumentLocator, true );
1087 if (rEntity.hasException())
1088 rEntity.throwException(mxDocumentLocator, true);
1090 break;
1093 bool bContinue = true;
1094 if( rEntity.mpParser == nullptr )
1096 // create parser with proper encoding (needs the first chunk of data)
1097 rEntity.mpParser = xmlCreatePushParserCtxt( &callbacks, this,
1098 reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, nullptr );
1099 if( !rEntity.mpParser )
1100 throw SAXException(u"Couldn't create parser"_ustr, Reference< XInterface >(), Any() );
1102 // Tell libxml2 parser to decode entities in attribute values.
1103 // Also allow XML attribute values which are larger than 10MB, because this used to work
1104 // with expat.
1105 // coverity[unsafe_xml_parse_config] - entity support is required
1106 xmlCtxtUseOptions(rEntity.mpParser, XML_PARSE_NOENT | XML_PARSE_HUGE);
1108 else
1110 bContinue = xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, 0 )
1111 == XML_ERR_OK;
1114 // callbacks used inside XML_Parse may have caught an exception
1115 if (!bContinue)
1117 rEntity.throwException( mxDocumentLocator, true );
1119 if (rEntity.hasException())
1121 rEntity.throwException( mxDocumentLocator, true );
1123 } while( nRead > 0 );
1124 rEntity.getEvent( CallbackType::DONE );
1125 if( rEntity.mbEnableThreads )
1126 produce( true );
1129 // The C-Callbacks
1130 void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
1131 int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes)
1133 if (!pendingCharacters.empty())
1134 sendPendingCharacters();
1135 Entity& rEntity = getEntity();
1136 if( rEntity.maNamespaceCount.empty() )
1138 rEntity.maNamespaceCount.push(0);
1139 DefineNamespace( "xml"_ostr, u"http://www.w3.org/XML/1998/namespace"_ustr);
1141 else
1143 rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
1146 // create attribute map and process namespace instructions
1147 Event& rEvent = rEntity.getEvent( CallbackType::START_ELEMENT );
1148 bool bIsAttributesEmpty = false;
1149 if ( rEntity.mbEnableThreads )
1150 bIsAttributesEmpty = rEntity.getEventList().mbIsAttributesEmpty;
1152 if (rEvent.mxAttributes.is())
1154 if( !bIsAttributesEmpty )
1155 rEvent.mxAttributes->clear();
1157 else
1158 rEvent.mxAttributes.set(
1159 new FastAttributeList( rEntity.mxTokenHandler.get() ) );
1161 if( rEntity.mxNamespaceHandler.is() )
1163 if (rEvent.mxDeclAttributes.is())
1165 if( !bIsAttributesEmpty )
1166 rEvent.mxDeclAttributes->clear();
1168 else
1169 rEvent.mxDeclAttributes.set(
1170 new FastAttributeList( rEntity.mxTokenHandler.get() ) );
1173 OUString sNamespace;
1174 sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
1175 if (!rEntity.maNamespaceStack.empty())
1177 sNamespace = rEntity.maNamespaceStack.top().msName;
1178 nNamespaceToken = rEntity.maNamespaceStack.top().mnToken;
1183 /* #158414# Each element may define new namespaces, also for attributes.
1184 First, process all namespaces, second, process the attributes after namespaces
1185 have been initialized. */
1187 // #158414# first: get namespaces
1188 for (int i = 0; i < numNamespaces * 2; i += 2)
1190 // namespaces[] is (prefix/URI)
1191 if( namespaces[ i ] != nullptr )
1193 OString aPrefix( XML_CAST( namespaces[ i ] ));
1194 OUString namespaceURL( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
1195 NormalizeURI( namespaceURL );
1196 DefineNamespace(aPrefix, namespaceURL);
1197 if( rEntity.mxNamespaceHandler.is() )
1198 rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
1200 else
1202 // default namespace
1203 sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
1204 NormalizeURI( sNamespace );
1205 nNamespaceToken = GetNamespaceToken( sNamespace );
1206 if( rEntity.mxNamespaceHandler.is() )
1207 rEvent.mxDeclAttributes->addUnknown( ""_ostr, OString( XML_CAST( namespaces[ i + 1 ] ) ) );
1211 if ( rEntity.mxTokenHandler.is() )
1213 // #158414# second: fill attribute list with other attributes
1214 rEvent.mxAttributes->reserve( numAttributes );
1215 for (int i = 0; i < numAttributes * 5; i += 5)
1217 // attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd )
1218 if( attributes[ i + 1 ] != nullptr )
1220 sal_Int32 nAttributeToken = GetTokenWithPrefix(attributes[ i + 1 ], attributes[ i ]);
1221 if( nAttributeToken != FastToken::DONTKNOW )
1222 rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) );
1223 else
1224 addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
1226 else
1228 sal_Int32 nAttributeToken = GetToken(attributes[ i ]);
1229 if( nAttributeToken != FastToken::DONTKNOW )
1230 rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) );
1231 else
1233 SAL_WARN("xmloff", "unknown attribute " << XML_CAST( attributes[ i ] ) << "=" <<
1234 OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1235 rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
1236 OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1241 if( prefix != nullptr )
1242 rEvent.mnElementToken = GetTokenWithPrefix(prefix, localName);
1243 else if( !sNamespace.isEmpty() )
1244 rEvent.mnElementToken = GetTokenWithContextNamespace(nNamespaceToken, localName);
1245 else
1246 rEvent.mnElementToken = GetToken(localName);
1248 else
1250 for (int i = 0; i < numAttributes * 5; i += 5)
1252 if( attributes[ i + 1 ] != nullptr )
1253 addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
1254 else
1255 rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
1256 OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1259 rEvent.mnElementToken = FastToken::DONTKNOW;
1262 if( rEvent.mnElementToken == FastToken::DONTKNOW )
1264 OUString aElementPrefix;
1265 if( prefix != nullptr )
1267 aElementPrefix = OUString( XML_CAST( prefix ), strlen( XML_CAST( prefix )), RTL_TEXTENCODING_UTF8 );
1268 if ( URI != nullptr )
1269 sNamespace = OUString( XML_CAST( URI ), strlen( XML_CAST( URI )), RTL_TEXTENCODING_UTF8 );
1270 else if ( m_bIgnoreMissingNSDecl )
1271 sNamespace.clear();
1272 else
1273 throw SAXException("No namespace defined for " + aElementPrefix, {}, {});
1274 nNamespaceToken = GetNamespaceToken( sNamespace );
1276 OUString aElementLocalName( XML_CAST( localName ), strlen( XML_CAST( localName )), RTL_TEXTENCODING_UTF8 );
1277 rEvent.msNamespace = sNamespace;
1278 if( aElementPrefix.isEmpty() )
1279 rEvent.msElementName = std::move(aElementLocalName);
1280 else
1281 rEvent.msElementName = aElementPrefix + ":" + aElementLocalName;
1283 else // token is always preferred.
1284 rEvent.msElementName.clear();
1286 rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
1287 if (rEntity.mbEnableThreads)
1288 produce();
1289 else
1291 SAL_INFO("sax.fastparser", " startElement line " << mxDocumentLocator->getLineNumber() << " column " << mxDocumentLocator->getColumnNumber() << " " << ( prefix ? XML_CAST(prefix) : "(null)" ) << ":" << localName);
1292 rEntity.startElement( &rEvent );
1295 catch (...)
1297 rEntity.saveException( ::cppu::getCaughtException() );
1301 void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes)
1303 OUString aNamespaceURI;
1304 if ( !m_bIgnoreMissingNSDecl || attributes[i + 2] != nullptr )
1305 aNamespaceURI = OUString( XML_CAST( attributes[ i + 2 ] ), strlen( XML_CAST( attributes[ i + 2 ] )), RTL_TEXTENCODING_UTF8 );
1306 const OString aPrefix( XML_CAST( attributes[ i + 1 ] ));
1307 const OString aLocalName( XML_CAST( attributes[ i ] ));
1308 OString aQualifiedName = (aPrefix.isEmpty())? aLocalName : aPrefix + ":" + aLocalName;
1309 xAttributes->addUnknown( aNamespaceURI, aQualifiedName,
1310 OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
1311 SAL_INFO("xmloff", "unknown element " << aQualifiedName << " " << aNamespaceURI);
1314 void FastSaxParserImpl::callbackEndElement()
1316 if (!pendingCharacters.empty())
1317 sendPendingCharacters();
1318 Entity& rEntity = getEntity();
1319 SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount");
1320 if( !rEntity.maNamespaceCount.empty() )
1321 rEntity.maNamespaceCount.pop();
1323 SAL_WARN_IF(rEntity.maNamespaceStack.empty(), "sax", "Empty NamespaceStack");
1324 if( !rEntity.maNamespaceStack.empty() )
1325 rEntity.maNamespaceStack.pop();
1327 rEntity.getEvent( CallbackType::END_ELEMENT );
1328 if (rEntity.mbEnableThreads)
1329 produce();
1330 else
1331 rEntity.endElement();
1334 void FastSaxParserImpl::callbackCharacters( const xmlChar* s, int nLen )
1336 // SAX interface allows that the characters callback splits content of one XML node
1337 // (e.g. because there's an entity that needs decoding), however for consumers it's
1338 // simpler FastSaxParser's character callback provides the whole string at once,
1339 // so merge data from possible multiple calls and send them at once (before the element
1340 // ends or another one starts).
1342 // We use a std::vector<char> to avoid calling into the OUString constructor more than once when
1343 // we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly
1344 // often in writer documents.
1345 int nOriginalLen = pendingCharacters.size();
1346 pendingCharacters.resize(nOriginalLen + nLen);
1347 memcpy(pendingCharacters.data() + nOriginalLen, s, nLen);
1350 void FastSaxParserImpl::sendPendingCharacters()
1352 Entity& rEntity = getEntity();
1353 OUString sChars( pendingCharacters.data(), pendingCharacters.size(), RTL_TEXTENCODING_UTF8 );
1354 if (rEntity.mbEnableThreads)
1356 Event& rEvent = rEntity.getEvent( CallbackType::CHARACTERS );
1357 rEvent.msChars = std::move(sChars);
1358 produce();
1360 else
1361 rEntity.characters( sChars );
1362 pendingCharacters.resize(0);
1365 void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar *target, const xmlChar *data )
1367 if (!pendingCharacters.empty())
1368 sendPendingCharacters();
1369 Entity& rEntity = getEntity();
1370 Event& rEvent = rEntity.getEvent( CallbackType::PROCESSING_INSTRUCTION );
1372 // This event is very rare, so no need to waste extra space for this
1373 // Using namespace and element strings to be target and data in that order.
1374 rEvent.msNamespace = OUString( XML_CAST( target ), strlen( XML_CAST( target ) ), RTL_TEXTENCODING_UTF8 );
1375 if ( data != nullptr )
1376 rEvent.msElementName = OUString( XML_CAST( data ), strlen( XML_CAST( data ) ), RTL_TEXTENCODING_UTF8 );
1377 else
1378 rEvent.msElementName.clear();
1380 if (rEntity.mbEnableThreads)
1381 produce();
1382 else
1383 rEntity.processingInstruction( rEvent.msNamespace, rEvent.msElementName );
1386 xmlEntityPtr FastSaxParserImpl::callbackGetEntity( const xmlChar *name )
1388 if( !name )
1389 return xmlGetPredefinedEntity(name);
1390 const char* dname = XML_CAST(name);
1391 int lname = strlen(dname);
1392 if( lname == 0 )
1393 return xmlGetPredefinedEntity(name);
1394 if (m_Replacements.size() > 0)
1396 auto it = std::lower_bound(m_Replacements.begin(), m_Replacements.end(), dname);
1397 if (it != m_Replacements.end() && it->name.compareToAscii(dname) == 0)
1399 xmlEntityPtr entpt = xmlNewEntity(
1400 nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
1401 BAD_CAST(OUStringToOString(it->replacement, RTL_TEXTENCODING_UTF8).getStr()));
1402 m_TemporalEntities.push_back(entpt);
1403 return entpt;
1406 if( lname < 2 )
1407 return xmlGetPredefinedEntity(name);
1408 if ( dname[0] == '#' )
1410 sal_uInt32 cval = 0;
1411 if( dname[1] == 'x' || dname[1] == 'X' )
1413 if( lname < 3 )
1414 return xmlGetPredefinedEntity(name);
1415 cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 16 ) );
1416 if( cval == 0 )
1417 return xmlGetPredefinedEntity(name);
1418 OUString vname( &cval, 1 );
1419 xmlEntityPtr entpt
1420 = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
1421 BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr()));
1422 m_TemporalEntities.push_back(entpt);
1423 return entpt;
1425 else
1427 cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 10 ) );
1428 if( cval == 0 )
1429 return xmlGetPredefinedEntity(name);
1430 OUString vname(&cval, 1);
1431 xmlEntityPtr entpt
1432 = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
1433 BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr()));
1434 m_TemporalEntities.push_back(entpt);
1435 return entpt;
1438 return xmlGetPredefinedEntity(name);
1441 FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl) {}
1443 FastSaxParser::~FastSaxParser()
1447 void SAL_CALL
1448 FastSaxParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments)
1450 if (!rArguments.hasElements())
1451 return;
1453 OUString str;
1454 if ( !(rArguments[0] >>= str) )
1455 throw IllegalArgumentException();
1457 if ( str == "IgnoreMissingNSDecl" )
1458 mpImpl->m_bIgnoreMissingNSDecl = true;
1459 else if ( str == "DoSmeplease" )
1460 ; //just ignore as this is already immune to billion laughs
1461 else if ( str == "DisableThreadedParser" )
1462 mpImpl->m_bDisableThreadedParser = true;
1463 else
1464 throw IllegalArgumentException();
1468 void FastSaxParser::parseStream( const xml::sax::InputSource& aInputSource )
1470 mpImpl->parseStream(aInputSource);
1473 void FastSaxParser::setFastDocumentHandler( const uno::Reference<xml::sax::XFastDocumentHandler>& Handler )
1475 mpImpl->setFastDocumentHandler(Handler);
1478 void FastSaxParser::setTokenHandler( const uno::Reference<xml::sax::XFastTokenHandler>& Handler )
1480 mpImpl->setTokenHandler(Handler);
1483 void FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
1485 mpImpl->registerNamespace(NamespaceURL, NamespaceToken);
1488 OUString FastSaxParser::getNamespaceURL( const OUString& rPrefix )
1490 return mpImpl->getNamespaceURL(rPrefix);
1493 void FastSaxParser::setErrorHandler( const uno::Reference< xml::sax::XErrorHandler >& Handler )
1495 mpImpl->setErrorHandler(Handler);
1498 void FastSaxParser::setEntityResolver( const uno::Reference< xml::sax::XEntityResolver >& )
1500 // not implemented
1503 void FastSaxParser::setLocale( const lang::Locale& )
1505 // not implemented
1508 void FastSaxParser::setNamespaceHandler( const uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler)
1510 mpImpl->setNamespaceHandler(Handler);
1513 OUString FastSaxParser::getImplementationName()
1515 return u"com.sun.star.comp.extensions.xml.sax.FastParser"_ustr;
1518 void FastSaxParser::setCustomEntityNames(
1519 const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
1521 mpImpl->setCustomEntityNames(replacements);
1524 sal_Bool FastSaxParser::supportsService( const OUString& ServiceName )
1526 return cppu::supportsService(this, ServiceName);
1529 uno::Sequence<OUString> FastSaxParser::getSupportedServiceNames()
1531 return { u"com.sun.star.xml.sax.FastParser"_ustr };
1534 } // namespace sax_fastparser
1536 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
1537 com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation(
1538 css::uno::XComponentContext *,
1539 css::uno::Sequence<css::uno::Any> const &)
1541 return cppu::acquire(new FastSaxParser);
1544 // ----------------------------------------------------------
1545 // copy of the code in xmloff/source/core/namespace.cxx, which adds namespace aliases
1546 // for various dodgy namespace decls in the wild.
1548 static bool NormalizeW3URI( OUString& rName );
1549 static bool NormalizeOasisURN( OUString& rName );
1551 static void NormalizeURI( OUString& rName )
1553 // try OASIS + W3 URI normalization
1554 bool bSuccess = NormalizeOasisURN( rName );
1555 if( ! bSuccess )
1556 NormalizeW3URI( rName );
1559 constexpr OUStringLiteral XML_URI_W3_PREFIX(u"http://www.w3.org/");
1560 constexpr OUStringLiteral XML_URI_XFORMS_SUFFIX(u"/xforms");
1561 constexpr OUStringLiteral XML_N_XFORMS_1_0(u"http://www.w3.org/2002/xforms");
1562 constexpr OUStringLiteral XML_N_SVG(u"http://www.w3.org/2000/svg");
1563 constexpr OUStringLiteral XML_N_SVG_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0");
1564 constexpr OUStringLiteral XML_N_FO(u"http://www.w3.org/1999/XSL/Format");
1565 constexpr OUStringLiteral XML_N_FO_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0");
1566 constexpr OUStringLiteral XML_N_SMIL(u"http://www.w3.org/2001/SMIL20/");
1567 constexpr OUStringLiteral XML_N_SMIL_OLD(u"http://www.w3.org/2001/SMIL20");
1568 constexpr OUStringLiteral XML_N_SMIL_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0");
1569 constexpr OUStringLiteral XML_URN_OASIS_NAMES_TC(u"urn:oasis:names:tc");
1570 constexpr OUStringLiteral XML_XMLNS(u"xmlns");
1571 constexpr OUStringLiteral XML_OPENDOCUMENT(u"opendocument");
1572 constexpr OUStringLiteral XML_1_0(u"1.0");
1574 static bool NormalizeW3URI( OUString& rName )
1576 // check if URI matches:
1577 // http://www.w3.org/[0-9]*/[:letter:]*
1578 // (year)/(WG name)
1579 // For the following WG/standards names:
1580 // - xforms
1582 bool bSuccess = false;
1583 const OUString sURIPrefix = XML_URI_W3_PREFIX;
1584 if( rName.startsWith( sURIPrefix ) )
1586 const OUString sURISuffix = XML_URI_XFORMS_SUFFIX ;
1587 sal_Int32 nCompareFrom = rName.getLength() - sURISuffix.getLength();
1588 if( rName.subView( nCompareFrom ) == sURISuffix )
1590 // found W3 prefix, and xforms suffix
1591 rName = XML_N_XFORMS_1_0;
1592 bSuccess = true;
1595 return bSuccess;
1598 static bool NormalizeOasisURN( OUString& rName )
1600 // #i38644#
1601 // we exported the wrong namespace for smil, so we correct this here on load
1602 // for older documents
1603 if( rName == XML_N_SVG )
1605 rName = XML_N_SVG_COMPAT;
1606 return true;
1608 else if( rName == XML_N_FO )
1610 rName = XML_N_FO_COMPAT;
1611 return true;
1613 else if( rName == XML_N_SMIL || rName == XML_N_SMIL_OLD )
1615 rName = XML_N_SMIL_COMPAT;
1616 return true;
1620 // Check if URN matches
1621 // :urn:oasis:names:tc:[^:]*:xmlns:[^:]*:1.[^:]*
1622 // |---| |---| |-----|
1623 // TC-Id Sub-Id Version
1625 sal_Int32 nNameLen = rName.getLength();
1626 // :urn:oasis:names:tc.*
1627 const OUString aOasisURN = XML_URN_OASIS_NAMES_TC;
1628 if( !rName.startsWith( aOasisURN ) )
1629 return false;
1631 // :urn:oasis:names:tc:.*
1632 sal_Int32 nPos = aOasisURN.getLength();
1633 if( nPos >= nNameLen || rName[nPos] != ':' )
1634 return false;
1636 // :urn:oasis:names:tc:[^:]:.*
1637 sal_Int32 nTCIdStart = nPos+1;
1638 sal_Int32 nTCIdEnd = rName.indexOf( ':', nTCIdStart );
1639 if( -1 == nTCIdEnd )
1640 return false;
1642 // :urn:oasis:names:tc:[^:]:xmlns.*
1643 nPos = nTCIdEnd + 1;
1644 std::u16string_view sTmp( rName.subView( nPos ) );
1645 const OUString aXMLNS = XML_XMLNS;
1646 if( !o3tl::starts_with(sTmp, aXMLNS ) )
1647 return false;
1649 // :urn:oasis:names:tc:[^:]:xmlns:.*
1650 nPos += aXMLNS.getLength();
1651 if( nPos >= nNameLen || rName[nPos] != ':' )
1652 return false;
1654 // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:.*
1655 nPos = rName.indexOf( ':', nPos+1 );
1656 if( -1 == nPos )
1657 return false;
1659 // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:[^:][^:][^:][^:]*
1660 sal_Int32 nVersionStart = nPos+1;
1661 if( nVersionStart+2 >= nNameLen ||
1662 -1 != rName.indexOf( ':', nVersionStart ) )
1663 return false;
1665 // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:1\.[^:][^:]*
1666 if( rName[nVersionStart] != '1' || rName[nVersionStart+1] != '.' )
1667 return false;
1669 // replace [tcid] with current TCID and version with current version.
1671 rName = rName.subView( 0, nTCIdStart ) +
1672 XML_OPENDOCUMENT +
1673 rName.subView( nTCIdEnd, nVersionStart-nTCIdEnd ) +
1674 XML_1_0;
1676 return true;
1680 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */