Bump version to 24.04.3.4
[LibreOffice.git] / unoxml / source / dom / documentbuilder.cxx
blob3898d58e4be4a932653e0c61f83bffc0b86c226c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "documentbuilder.hxx"
22 #include <string.h>
24 #include <libxml/xmlerror.h>
25 #include <libxml/parser.h>
27 #include <memory>
29 #include <sal/log.hxx>
30 #include <comphelper/diagnose_ex.hxx>
32 #include <comphelper/processfactory.hxx>
33 #include <cppuhelper/implbase.hxx>
34 #include <cppuhelper/supportsservice.hxx>
36 #include <com/sun/star/xml/sax/SAXParseException.hpp>
37 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
38 #include <com/sun/star/task/XInteractionHandler.hpp>
39 #include <com/sun/star/ucb/SimpleFileAccess.hpp>
41 #include <ucbhelper/content.hxx>
42 #include <ucbhelper/commandenvironment.hxx>
44 #include "document.hxx"
46 using namespace css::io;
47 using namespace css::lang;
48 using namespace css::ucb;
49 using namespace css::uno;
50 using namespace css::xml::dom;
51 using namespace css::xml::sax;
52 using namespace ucbhelper;
53 using css::task::XInteractionHandler;
54 using css::xml::sax::InputSource;
57 namespace DOM
59 namespace {
61 class CDefaultEntityResolver : public cppu::WeakImplHelper< XEntityResolver >
63 public:
64 virtual InputSource SAL_CALL resolveEntity( const OUString& sPublicId, const OUString& sSystemId ) override
66 InputSource is;
67 is.sPublicId = sPublicId;
68 is.sSystemId = sSystemId;
69 is.sEncoding.clear();
71 try {
72 Reference< XCommandEnvironment > aEnvironment(
73 new CommandEnvironment(Reference< XInteractionHandler >(),
74 Reference< XProgressHandler >() ));
75 Content aContent(sSystemId, aEnvironment, comphelper::getProcessComponentContext());
77 is.aInputStream = aContent.openStream();
78 } catch (const css::uno::Exception&) {
79 TOOLS_WARN_EXCEPTION( "unoxml", "exception in default entity resolver");
80 is.aInputStream.clear();
82 return is;
89 CDocumentBuilder::CDocumentBuilder()
90 : m_xEntityResolver(new CDefaultEntityResolver)
92 // init libxml. libxml will protect itself against multiple
93 // initializations so there is no problem here if this gets
94 // called multiple times.
95 xmlInitParser();
98 Sequence< OUString > SAL_CALL CDocumentBuilder::getSupportedServiceNames()
100 return { "com.sun.star.xml.dom.DocumentBuilder" };
103 OUString SAL_CALL CDocumentBuilder::getImplementationName()
105 return "com.sun.star.comp.xml.dom.DocumentBuilder";
108 sal_Bool SAL_CALL CDocumentBuilder::supportsService(const OUString& aServiceName)
110 return cppu::supportsService(this, aServiceName);
113 Reference< XDOMImplementation > SAL_CALL CDocumentBuilder::getDOMImplementation()
116 return Reference< XDOMImplementation >();
119 sal_Bool SAL_CALL CDocumentBuilder::isNamespaceAware()
121 return true;
124 sal_Bool SAL_CALL CDocumentBuilder::isValidating()
126 return false;
129 Reference< XDocument > SAL_CALL CDocumentBuilder::newDocument()
131 std::scoped_lock const g(m_Mutex);
133 // create a new document
134 xmlDocPtr pDocument = xmlNewDoc(reinterpret_cast<const xmlChar*>("1.0"));
135 return CDocument::CreateCDocument(pDocument);
138 static OUString make_error_message(xmlParserCtxtPtr ctxt)
140 return OUString(ctxt->lastError.message, strlen(ctxt->lastError.message), RTL_TEXTENCODING_ASCII_US) +
141 "Line: " +
142 OUString::number(static_cast<sal_Int32>(ctxt->lastError.line)) +
143 "\nColumn: " +
144 OUString::number(static_cast<sal_Int32>(ctxt->lastError.int2));
147 // -- callbacks and context struct for parsing from stream
148 // -- c-linkage, so the callbacks can be used by libxml
149 extern "C" {
151 namespace {
153 // context struct passed to IO functions
154 typedef struct context {
155 Reference< XInputStream > rInputStream;
156 bool close;
157 bool freeOnClose;
158 } context_t;
162 static int xmlIO_read_func( void *context, char *buffer, int len)
164 // get the context...
165 context_t *pctx = static_cast<context_t*>(context);
166 if (!pctx->rInputStream.is())
167 return -1;
168 try {
169 // try to read the requested number of bytes
170 Sequence< sal_Int8 > chunk(len);
171 int nread = pctx->rInputStream->readBytes(chunk, len);
173 // copy bytes to the provided buffer
174 memcpy(buffer, chunk.getConstArray(), nread);
175 return nread;
176 } catch (const css::uno::Exception&) {
177 TOOLS_WARN_EXCEPTION( "unoxml", "");
178 return -1;
182 static int xmlIO_close_func(void* context)
184 // get the context...
185 context_t *pctx = static_cast<context_t*>(context);
186 if (!pctx->rInputStream.is())
187 return 0;
190 if (pctx->close)
191 pctx->rInputStream->closeInput();
192 if (pctx->freeOnClose)
193 delete pctx;
194 return 0;
195 } catch (const css::uno::Exception&) {
196 TOOLS_WARN_EXCEPTION( "unoxml", "");
197 return -1;
201 static xmlParserInputPtr resolve_func(void *ctx,
202 const xmlChar *publicId,
203 const xmlChar *systemId)
205 // get the CDocumentBuilder object
206 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(ctx);
207 CDocumentBuilder *builder = static_cast< CDocumentBuilder* >(ctxt->_private);
208 Reference< XEntityResolver > resolver = builder->getEntityResolver();
209 OUString sysid;
210 if (systemId != nullptr)
211 sysid = OUString(reinterpret_cast<char const *>(systemId), strlen(reinterpret_cast<char const *>(systemId)), RTL_TEXTENCODING_UTF8);
212 OUString pubid;
213 if (publicId != nullptr)
214 pubid = OUString(reinterpret_cast<char const *>(publicId), strlen(reinterpret_cast<char const *>(publicId)), RTL_TEXTENCODING_UTF8);
216 // resolve the entity
217 InputSource src = resolver->resolveEntity(pubid, sysid);
219 // create IO context on heap because this call will no longer be on the stack
220 // when IO is actually performed through the callbacks. The close function must
221 // free the memory which is indicated by the freeOnClose field in the context struct
222 context_t *c = new context_t;
223 c->rInputStream = src.aInputStream;
224 c->close = true;
225 c->freeOnClose = true;
227 // set up the inputBuffer and inputPtr for libxml
228 xmlParserInputBufferPtr pBuffer =
229 xmlParserInputBufferCreateIO(xmlIO_read_func, xmlIO_close_func, c, XML_CHAR_ENCODING_NONE);
230 xmlParserInputPtr pInput =
231 xmlNewIOInputStream(ctxt, pBuffer, XML_CHAR_ENCODING_NONE);
232 return pInput;
235 #if 0
236 static xmlParserInputPtr external_entity_loader(const char *URL, const char * /*ID*/, xmlParserCtxtPtr ctxt)
238 // just call our resolver function using the URL as systemId
239 return resolve_func(ctxt, 0, (const xmlChar*)URL);
241 #endif
243 // default warning handler does not trigger assertion
244 static void warning_func(void * ctx, const char * /*msg*/, ...)
248 xmlParserCtxtPtr const pctx = static_cast<xmlParserCtxtPtr>(ctx);
250 SAL_INFO(
251 "unoxml",
252 "libxml2 warning: "
253 << make_error_message(pctx));
255 CDocumentBuilder * const pDocBuilder = static_cast<CDocumentBuilder*>(pctx->_private);
257 if (pDocBuilder->getErrorHandler().is()) // if custom error handler is set (using setErrorHandler ())
259 // Prepare SAXParseException to be passed to custom XErrorHandler::warning function
260 css::xml::sax::SAXParseException saxex(make_error_message(pctx), {}, {}, {}, {},
261 pctx->lastError.line, pctx->lastError.int2);
263 // Call custom warning function
264 pDocBuilder->getErrorHandler()->warning(::css::uno::Any(saxex));
267 catch (const css::uno::Exception &)
269 // Protect lib2xml from UNO Exception
270 TOOLS_WARN_EXCEPTION("unoxml", "DOM::warning_func");
274 // default error handler triggers assertion
275 static void error_func(void * ctx, const char * /*msg*/, ...)
279 xmlParserCtxtPtr const pctx = static_cast<xmlParserCtxtPtr>(ctx);
280 SAL_WARN(
281 "unoxml",
282 "libxml2 error: "
283 << make_error_message(pctx));
285 CDocumentBuilder * const pDocBuilder = static_cast<CDocumentBuilder*>(pctx->_private);
287 if (pDocBuilder->getErrorHandler().is()) // if custom error handler is set (using setErrorHandler ())
289 // Prepare SAXParseException to be passed to custom XErrorHandler::error function
290 css::xml::sax::SAXParseException saxex(make_error_message(pctx), {}, {}, {}, {},
291 pctx->lastError.line, pctx->lastError.int2);
293 // Call custom warning function
294 pDocBuilder->getErrorHandler()->error(::css::uno::Any(saxex));
297 catch (const css::uno::Exception &)
299 // Protect lib2xml from UNO Exception
300 TOOLS_WARN_EXCEPTION("unoxml", "DOM::error_func");
303 } // extern "C"
305 static void throwEx(xmlParserCtxtPtr ctxt)
307 css::xml::sax::SAXParseException saxex(make_error_message(ctxt), {}, {}, {}, {},
308 ctxt->lastError.line, ctxt->lastError.int2);
309 throw saxex;
312 namespace {
314 struct XmlFreeParserCtxt {
315 void operator ()(xmlParserCtxt * p) const { xmlFreeParserCtxt(p); }
320 Reference< XDocument > SAL_CALL CDocumentBuilder::parse(const Reference< XInputStream >& is)
322 if (!is.is()) {
323 throw RuntimeException();
326 std::scoped_lock const g(m_Mutex);
328 // IO context struct. Must outlive pContext, as destroying that via
329 // xmlFreeParserCtxt may still access this context_t
330 context_t c;
331 c.rInputStream = is;
332 // we did not open the stream, thus we do not close it.
333 c.close = false;
334 c.freeOnClose = false;
336 std::unique_ptr<xmlParserCtxt, XmlFreeParserCtxt> const pContext(
337 xmlNewParserCtxt());
339 // register error functions to prevent errors being printed
340 // on the console
341 pContext->_private = this;
342 pContext->sax->error = error_func;
343 pContext->sax->warning = warning_func;
344 pContext->sax->resolveEntity = resolve_func;
346 xmlDocPtr const pDoc = xmlCtxtReadIO(pContext.get(),
347 xmlIO_read_func, xmlIO_close_func, &c, nullptr, nullptr, 0);
349 if (pDoc == nullptr) {
350 throwEx(pContext.get());
352 return CDocument::CreateCDocument(pDoc);
355 Reference< XDocument > SAL_CALL CDocumentBuilder::parseURI(const OUString& sUri)
357 std::scoped_lock const g(m_Mutex);
359 std::unique_ptr<xmlParserCtxt, XmlFreeParserCtxt> const pContext(
360 xmlNewParserCtxt());
361 pContext->_private = this;
362 pContext->sax->error = error_func;
363 pContext->sax->warning = warning_func;
364 pContext->sax->resolveEntity = resolve_func;
365 // xmlSetExternalEntityLoader(external_entity_loader);
366 OString oUri = OUStringToOString(sUri, RTL_TEXTENCODING_UTF8);
367 char *uri = const_cast<char*>(oUri.getStr());
368 xmlDocPtr pDoc = xmlCtxtReadFile(pContext.get(), uri, nullptr, 0);
370 Reference< XDocument > xRet;
372 // if we failed to parse the URI as a simple file, lets try via a ucb stream.
373 // For Android file:///assets/ URLs which must go via the osl/ file API.
374 if (pDoc == nullptr) {
375 Reference < XSimpleFileAccess3 > xStreamAccess(
376 SimpleFileAccess::create( comphelper::getProcessComponentContext() ) );
377 Reference< XInputStream > xInStream = xStreamAccess->openFileRead( sUri );
378 if (!xInStream.is())
379 throwEx(pContext.get());
381 // loop over every layout entry in current file
382 xRet = parse( xInStream );
384 xInStream->closeInput();
385 xInStream.clear();
387 } else
388 xRet = CDocument::CreateCDocument(pDoc).get();
390 return xRet;
393 void SAL_CALL
394 CDocumentBuilder::setEntityResolver(Reference< XEntityResolver > const& xER)
396 std::scoped_lock const g(m_Mutex);
398 m_xEntityResolver = xER;
401 Reference< XEntityResolver > CDocumentBuilder::getEntityResolver()
403 std::scoped_lock const g(m_Mutex);
405 return m_xEntityResolver;
408 void SAL_CALL
409 CDocumentBuilder::setErrorHandler(Reference< XErrorHandler > const& xEH)
411 std::scoped_lock const g(m_Mutex);
413 m_xErrorHandler = xEH;
417 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
418 unoxml_CDocumentBuilder_get_implementation(
419 css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
421 return cppu::acquire(new DOM::CDocumentBuilder());
424 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */