Bump version to 6.4-15
[LibreOffice.git] / unoxml / source / dom / documentbuilder.cxx
blob3a66b08e89d18e0aa7a90d8716aa692a3c02e543
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "documentbuilder.hxx"
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdarg.h>
26 #include <libxml/xmlerror.h>
27 #include <libxml/tree.h>
29 #include <memory>
31 #include <rtl/alloc.h>
32 #include <rtl/ustrbuf.hxx>
33 #include <osl/diagnose.h>
34 #include <sal/log.hxx>
35 #include <tools/diagnose_ex.h>
37 #include <comphelper/processfactory.hxx>
38 #include <cppuhelper/implbase.hxx>
39 #include <cppuhelper/supportsservice.hxx>
41 #include <com/sun/star/xml/sax/SAXParseException.hpp>
42 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
43 #include <com/sun/star/task/XInteractionHandler.hpp>
44 #include <com/sun/star/ucb/SimpleFileAccess.hpp>
46 #include <ucbhelper/content.hxx>
47 #include <ucbhelper/commandenvironment.hxx>
49 #include <node.hxx>
50 #include "document.hxx"
52 using namespace css::io;
53 using namespace css::lang;
54 using namespace css::ucb;
55 using namespace css::uno;
56 using namespace css::xml::dom;
57 using namespace css::xml::sax;
58 using namespace ucbhelper;
59 using css::task::XInteractionHandler;
60 using css::xml::sax::InputSource;
63 namespace DOM
66 class CDefaultEntityResolver : public cppu::WeakImplHelper< XEntityResolver >
68 public:
69 virtual InputSource SAL_CALL resolveEntity( const OUString& sPublicId, const OUString& sSystemId ) override
71 InputSource is;
72 is.sPublicId = sPublicId;
73 is.sSystemId = sSystemId;
74 is.sEncoding.clear();
76 try {
77 Reference< XCommandEnvironment > aEnvironment(
78 new CommandEnvironment(Reference< XInteractionHandler >(),
79 Reference< XProgressHandler >() ));
80 Content aContent(sSystemId, aEnvironment, comphelper::getProcessComponentContext());
82 is.aInputStream = aContent.openStream();
83 } catch (const css::uno::Exception&) {
84 OSL_FAIL("exception in default entity resolver");
85 is.aInputStream.clear();
87 return is;
92 CDocumentBuilder::CDocumentBuilder()
93 : m_xEntityResolver(new CDefaultEntityResolver)
95 // init libxml. libxml will protect itself against multiple
96 // initializations so there is no problem here if this gets
97 // called multiple times.
98 xmlInitParser();
101 Reference< XInterface > CDocumentBuilder::_getInstance(const Reference< XMultiServiceFactory >& )
103 return static_cast< XDocumentBuilder* >(new CDocumentBuilder);
106 OUString CDocumentBuilder::_getImplementationName()
108 return "com.sun.star.comp.xml.dom.DocumentBuilder";
110 Sequence<OUString> CDocumentBuilder::_getSupportedServiceNames()
112 return { "com.sun.star.xml.dom.DocumentBuilder" };
115 Sequence< OUString > SAL_CALL CDocumentBuilder::getSupportedServiceNames()
117 return CDocumentBuilder::_getSupportedServiceNames();
120 OUString SAL_CALL CDocumentBuilder::getImplementationName()
122 return CDocumentBuilder::_getImplementationName();
125 sal_Bool SAL_CALL CDocumentBuilder::supportsService(const OUString& aServiceName)
127 return cppu::supportsService(this, aServiceName);
130 Reference< XDOMImplementation > SAL_CALL CDocumentBuilder::getDOMImplementation()
133 return Reference< XDOMImplementation >();
136 sal_Bool SAL_CALL CDocumentBuilder::isNamespaceAware()
138 return true;
141 sal_Bool SAL_CALL CDocumentBuilder::isValidating()
143 return false;
146 Reference< XDocument > SAL_CALL CDocumentBuilder::newDocument()
148 ::osl::MutexGuard const g(m_Mutex);
150 // create a new document
151 xmlDocPtr pDocument = xmlNewDoc(reinterpret_cast<const xmlChar*>("1.0"));
152 Reference< XDocument > const xRet(
153 CDocument::CreateCDocument(pDocument).get());
154 return xRet;
157 static OUString make_error_message(xmlParserCtxtPtr ctxt)
159 OUStringBuffer buf;
160 buf.appendAscii(ctxt->lastError.message);
161 buf.append("Line: ");
162 buf.append(static_cast<sal_Int32>(ctxt->lastError.line));
163 buf.append("\nColumn: ");
164 buf.append(static_cast<sal_Int32>(ctxt->lastError.int2));
165 OUString msg = buf.makeStringAndClear();
166 return msg;
169 // -- callbacks and context struct for parsing from stream
170 // -- c-linkage, so the callbacks can be used by libxml
171 extern "C" {
173 // context struct passed to IO functions
174 typedef struct context {
175 Reference< XInputStream > rInputStream;
176 bool close;
177 bool freeOnClose;
178 } context_t;
180 static int xmlIO_read_func( void *context, char *buffer, int len)
182 // get the context...
183 context_t *pctx = static_cast<context_t*>(context);
184 if (!pctx->rInputStream.is())
185 return -1;
186 try {
187 // try to read the requested number of bytes
188 Sequence< sal_Int8 > chunk(len);
189 int nread = pctx->rInputStream->readBytes(chunk, len);
191 // copy bytes to the provided buffer
192 memcpy(buffer, chunk.getConstArray(), nread);
193 return nread;
194 } catch (const css::uno::Exception&) {
195 TOOLS_WARN_EXCEPTION( "unoxml", "");
196 return -1;
200 static int xmlIO_close_func(void* context)
202 // get the context...
203 context_t *pctx = static_cast<context_t*>(context);
204 if (!pctx->rInputStream.is())
205 return 0;
208 if (pctx->close)
209 pctx->rInputStream->closeInput();
210 if (pctx->freeOnClose)
211 delete pctx;
212 return 0;
213 } catch (const css::uno::Exception&) {
214 TOOLS_WARN_EXCEPTION( "unoxml", "");
215 return -1;
219 static xmlParserInputPtr resolve_func(void *ctx,
220 const xmlChar *publicId,
221 const xmlChar *systemId)
223 // get the CDocumentBuilder object
224 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(ctx);
225 CDocumentBuilder *builder = static_cast< CDocumentBuilder* >(ctxt->_private);
226 Reference< XEntityResolver > resolver = builder->getEntityResolver();
227 OUString sysid;
228 if (systemId != nullptr)
229 sysid = OUString(reinterpret_cast<char const *>(systemId), strlen(reinterpret_cast<char const *>(systemId)), RTL_TEXTENCODING_UTF8);
230 OUString pubid;
231 if (publicId != nullptr)
232 pubid = OUString(reinterpret_cast<char const *>(publicId), strlen(reinterpret_cast<char const *>(publicId)), RTL_TEXTENCODING_UTF8);
234 // resolve the entity
235 InputSource src = resolver->resolveEntity(pubid, sysid);
237 // create IO context on heap because this call will no longer be on the stack
238 // when IO is actually performed through the callbacks. The close function must
239 // free the memory which is indicated by the freeOnClose field in the context struct
240 context_t *c = new context_t;
241 c->rInputStream = src.aInputStream;
242 c->close = true;
243 c->freeOnClose = true;
245 // set up the inputBuffer and inputPtr for libxml
246 xmlParserInputBufferPtr pBuffer =
247 xmlParserInputBufferCreateIO(xmlIO_read_func, xmlIO_close_func, c, XML_CHAR_ENCODING_NONE);
248 xmlParserInputPtr pInput =
249 xmlNewIOInputStream(ctxt, pBuffer, XML_CHAR_ENCODING_NONE);
250 return pInput;
253 #if 0
254 static xmlParserInputPtr external_entity_loader(const char *URL, const char * /*ID*/, xmlParserCtxtPtr ctxt)
256 // just call our resolver function using the URL as systemId
257 return resolve_func(ctxt, 0, (const xmlChar*)URL);
259 #endif
261 // default warning handler does not trigger assertion
262 static void warning_func(void * ctx, const char * /*msg*/, ...)
266 xmlParserCtxtPtr const pctx = static_cast<xmlParserCtxtPtr>(ctx);
268 SAL_INFO(
269 "unoxml",
270 "libxml2 warning: "
271 << make_error_message(pctx));
273 CDocumentBuilder * const pDocBuilder = static_cast<CDocumentBuilder*>(pctx->_private);
275 if (pDocBuilder->getErrorHandler().is()) // if custom error handler is set (using setErrorHandler ())
277 // Prepare SAXParseException to be passed to custom XErrorHandler::warning function
278 css::xml::sax::SAXParseException saxex;
279 saxex.Message = make_error_message(pctx);
280 saxex.LineNumber = static_cast<sal_Int32>(pctx->lastError.line);
281 saxex.ColumnNumber = static_cast<sal_Int32>(pctx->lastError.int2);
283 // Call custom warning function
284 pDocBuilder->getErrorHandler()->warning(::css::uno::Any(saxex));
287 catch (const css::uno::Exception &)
289 // Protect lib2xml from UNO Exception
290 TOOLS_WARN_EXCEPTION("unoxml", "DOM::warning_func");
294 // default error handler triggers assertion
295 static void error_func(void * ctx, const char * /*msg*/, ...)
299 xmlParserCtxtPtr const pctx = static_cast<xmlParserCtxtPtr>(ctx);
300 SAL_WARN(
301 "unoxml",
302 "libxml2 error: "
303 << make_error_message(pctx));
305 CDocumentBuilder * const pDocBuilder = static_cast<CDocumentBuilder*>(pctx->_private);
307 if (pDocBuilder->getErrorHandler().is()) // if custom error handler is set (using setErrorHandler ())
309 // Prepare SAXParseException to be passed to custom XErrorHandler::error function
310 css::xml::sax::SAXParseException saxex;
311 saxex.Message = make_error_message(pctx);
312 saxex.LineNumber = static_cast<sal_Int32>(pctx->lastError.line);
313 saxex.ColumnNumber = static_cast<sal_Int32>(pctx->lastError.int2);
315 // Call custom warning function
316 pDocBuilder->getErrorHandler()->error(::css::uno::Any(saxex));
319 catch (const css::uno::Exception &)
321 // Protect lib2xml from UNO Exception
322 TOOLS_WARN_EXCEPTION("unoxml", "DOM::error_func");
325 } // extern "C"
327 static void throwEx(xmlParserCtxtPtr ctxt)
329 css::xml::sax::SAXParseException saxex;
330 saxex.Message = make_error_message(ctxt);
331 saxex.LineNumber = static_cast<sal_Int32>(ctxt->lastError.line);
332 saxex.ColumnNumber = static_cast<sal_Int32>(ctxt->lastError.int2);
333 throw saxex;
336 namespace {
338 struct XmlFreeParserCtxt {
339 void operator ()(xmlParserCtxt * p) const { xmlFreeParserCtxt(p); }
344 Reference< XDocument > SAL_CALL CDocumentBuilder::parse(const Reference< XInputStream >& is)
346 if (!is.is()) {
347 throw RuntimeException();
350 ::osl::MutexGuard const g(m_Mutex);
352 // IO context struct. Must outlive pContext, as destroying that via
353 // xmlFreeParserCtxt may still access this context_t
354 context_t c;
355 c.rInputStream = is;
356 // we did not open the stream, thus we do not close it.
357 c.close = false;
358 c.freeOnClose = false;
360 std::unique_ptr<xmlParserCtxt, XmlFreeParserCtxt> const pContext(
361 xmlNewParserCtxt());
363 // register error functions to prevent errors being printed
364 // on the console
365 pContext->_private = this;
366 pContext->sax->error = error_func;
367 pContext->sax->warning = warning_func;
368 pContext->sax->resolveEntity = resolve_func;
370 xmlDocPtr const pDoc = xmlCtxtReadIO(pContext.get(),
371 xmlIO_read_func, xmlIO_close_func, &c, nullptr, nullptr, 0);
373 if (pDoc == nullptr) {
374 throwEx(pContext.get());
376 Reference< XDocument > const xRet(
377 CDocument::CreateCDocument(pDoc).get());
378 return xRet;
381 Reference< XDocument > SAL_CALL CDocumentBuilder::parseURI(const OUString& sUri)
383 ::osl::MutexGuard const g(m_Mutex);
385 std::unique_ptr<xmlParserCtxt, XmlFreeParserCtxt> const pContext(
386 xmlNewParserCtxt());
387 pContext->_private = this;
388 pContext->sax->error = error_func;
389 pContext->sax->warning = warning_func;
390 pContext->sax->resolveEntity = resolve_func;
391 // xmlSetExternalEntityLoader(external_entity_loader);
392 OString oUri = OUStringToOString(sUri, RTL_TEXTENCODING_UTF8);
393 char *uri = const_cast<char*>(oUri.getStr());
394 xmlDocPtr pDoc = xmlCtxtReadFile(pContext.get(), uri, nullptr, 0);
396 Reference< XDocument > xRet;
398 // if we failed to parse the URI as a simple file, lets try via a ucb stream.
399 // For Android file:///assets/ URLs which must go via the osl/ file API.
400 if (pDoc == nullptr) {
401 Reference < XSimpleFileAccess3 > xStreamAccess(
402 SimpleFileAccess::create( comphelper::getProcessComponentContext() ) );
403 Reference< XInputStream > xInStream = xStreamAccess->openFileRead( sUri );
404 if (!xInStream.is())
405 throwEx(pContext.get());
407 // loop over every layout entry in current file
408 xRet = parse( xInStream );
410 xInStream->closeInput();
411 xInStream.clear();
413 } else
414 xRet = CDocument::CreateCDocument(pDoc).get();
416 return xRet;
419 void SAL_CALL
420 CDocumentBuilder::setEntityResolver(Reference< XEntityResolver > const& xER)
422 ::osl::MutexGuard const g(m_Mutex);
424 m_xEntityResolver = xER;
427 Reference< XEntityResolver > CDocumentBuilder::getEntityResolver()
429 ::osl::MutexGuard const g(m_Mutex);
431 return m_xEntityResolver;
434 void SAL_CALL
435 CDocumentBuilder::setErrorHandler(Reference< XErrorHandler > const& xEH)
437 ::osl::MutexGuard const g(m_Mutex);
439 m_xErrorHandler = xEH;
443 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */