Version 6.4.0.3, tag libreoffice-6.4.0.3
[LibreOffice.git] / filter / source / xsltfilter / LibXSLTTransformer.cxx
blob42980eeeaff7fe3aa90368a79fa2847445cc8731
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 /*
4 * This file is part of the LibreOffice project.
6 * This Source Code Form is subject to the terms of the Mozilla Public
7 * License, v. 2.0. If a copy of the MPL was not distributed with this
8 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 */
11 #include <algorithm>
12 #include <cstdio>
13 #include <cstring>
14 #include <map>
15 #include <utility>
16 #include <vector>
17 #include <iostream>
18 #include <libxml/parser.h>
19 #include <libxml/tree.h>
20 #include <libxml/xmlIO.h>
21 #include <libxml/xpath.h>
22 #include <libxml/xpathInternals.h>
23 #include <libxml/xmlstring.h>
24 #include <libxslt/transform.h>
25 #include <libxslt/xsltutils.h>
26 #include <libxslt/variables.h>
27 #include <libxslt/extensions.h>
28 #include <libexslt/exslt.h>
30 #include <cppuhelper/factory.hxx>
32 #include <osl/module.h>
33 #include <osl/file.hxx>
34 #include <osl/process.h>
35 #include <com/sun/star/lang/XComponent.hpp>
36 #include <com/sun/star/lang/XInitialization.hpp>
37 #include <com/sun/star/uno/Any.hxx>
38 #include <com/sun/star/beans/NamedValue.hpp>
39 #include <com/sun/star/io/XInputStream.hpp>
40 #include <com/sun/star/io/XOutputStream.hpp>
41 #include <com/sun/star/io/XActiveDataSource.hpp>
42 #include <com/sun/star/io/XActiveDataSink.hpp>
43 #include <com/sun/star/io/XActiveDataControl.hpp>
44 #include <com/sun/star/io/XStreamListener.hpp>
46 #include "LibXSLTTransformer.hxx"
47 #include "OleHandler.hxx"
48 #include <memory>
50 using namespace ::cppu;
51 using namespace ::osl;
52 using namespace ::com::sun::star::beans;
53 using namespace ::com::sun::star::io;
54 using namespace ::com::sun::star::uno;
55 using namespace ::com::sun::star::lang;
56 using namespace ::com::sun::star::registry;
57 using ::std::pair;
59 namespace XSLT
61 const char* const LibXSLTTransformer::PARAM_SOURCE_URL = "sourceURL";
62 const char* const LibXSLTTransformer::PARAM_SOURCE_BASE_URL =
63 "sourceBaseURL";
64 const char* const LibXSLTTransformer::PARAM_TARGET_URL = "targetURL";
65 const char* const LibXSLTTransformer::PARAM_TARGET_BASE_URL =
66 "targetBaseURL";
67 const char* const LibXSLTTransformer::PARAM_DOCTYPE_PUBLIC = "publicType";
69 const sal_Int32 Reader::OUTPUT_BUFFER_SIZE = 4096;
71 const sal_Int32 Reader::INPUT_BUFFER_SIZE = 4096;
73 /**
74 * ParserInputBufferCallback forwards IO call-backs to libxml stream IO.
76 struct ParserInputBufferCallback
78 static int
79 on_read(void * context, char * buffer, int len)
81 Reader * tmp = static_cast<Reader*> (context);
82 return tmp->read(buffer, len);
84 static int
85 on_close(void * )
87 return 0;
90 /**
91 * ParserOutputBufferCallback forwards IO call-backs to libxml stream IO.
93 struct ParserOutputBufferCallback
95 static int
96 on_write(void * context, const char * buffer, int len)
98 Reader * tmp = static_cast<Reader*> (context);
99 return tmp->write(buffer, len);
101 static int
102 on_close(void * context)
104 Reader * tmp = static_cast<Reader*> (context);
105 tmp->closeOutput();
106 return 0;
110 * ExtFuncOleCB forwards XPath extension function calls registered with libxslt to the OleHandler instance that actually
111 * provides the implementation for those functions.
113 * The OLE extension module currently supplies two functions
114 * insertByName: registers an OLE object to be later inserted into the output tree.
115 * getByName: reads a previously registered OLE object and returns a base64 encoded string representation.
117 struct ExtFuncOleCB
119 static void *
120 init(xsltTransformContextPtr, const xmlChar*)
122 return nullptr;
124 static void
125 insertByName(xmlXPathParserContextPtr ctxt, int nargs)
127 xsltTransformContextPtr tctxt;
128 void *data;
129 if (nargs != 2) {
130 xsltGenericError(xsltGenericErrorContext,
131 "insertByName: requires exactly 2 arguments\n");
132 return;
134 tctxt = xsltXPathGetTransformContext(ctxt);
135 if (tctxt == nullptr) {
136 xsltGenericError(xsltGenericErrorContext,
137 "xsltExtFunctionTest: failed to get the transformation context\n");
138 return;
140 // XXX: someone with better knowledge of libxslt might come up with a better
141 // idea to pass the OleHandler than by attaching it to tctxt->_private. See also
142 // below.
143 data = tctxt->_private;
144 if (data == nullptr) {
145 xsltGenericError(xsltGenericErrorContext,
146 "xsltExtFunctionTest: failed to get module data\n");
147 return;
149 OleHandler * oh = static_cast<OleHandler*> (data);
151 xmlXPathObjectPtr value = valuePop(ctxt);
152 value = ensureStringValue(value, ctxt);
153 xmlXPathObjectPtr streamName = valuePop(ctxt);
154 streamName = ensureStringValue(streamName, ctxt);
156 oh->insertByName(OStringToOUString(reinterpret_cast<char*>(streamName->stringval), RTL_TEXTENCODING_UTF8),
157 OString(reinterpret_cast<char*>(value->stringval)));
158 valuePush(ctxt, xmlXPathNewCString(""));
161 static xmlXPathObjectPtr ensureStringValue(xmlXPathObjectPtr obj, const xmlXPathParserContextPtr ctxt)
163 if (obj->type != XPATH_STRING) {
164 valuePush(ctxt, obj);
165 xmlXPathStringFunction(ctxt, 1);
166 obj = valuePop(ctxt);
168 return obj;
171 static void getByName(xmlXPathParserContextPtr ctxt, int nargs)
173 xsltTransformContextPtr tctxt;
174 void *data;
175 if (nargs != 1) {
176 xsltGenericError(xsltGenericErrorContext,
177 "getByName: requires exactly 1 argument\n");
178 return;
181 tctxt = xsltXPathGetTransformContext(ctxt);
182 if (tctxt == nullptr) {
183 xsltGenericError(xsltGenericErrorContext,
184 "xsltExtFunctionTest: failed to get the transformation context\n");
185 return;
187 // XXX: someone with better knowledge of libxslt might come up with a better
188 // idea to pass the OleHandler than by attaching it to tctxt->_private
189 data = tctxt->_private;
190 if (data == nullptr) {
191 xsltGenericError(xsltGenericErrorContext,
192 "xsltExtFunctionTest: failed to get module data\n");
193 return;
195 OleHandler * oh = static_cast<OleHandler*> (data);
196 xmlXPathObjectPtr streamName = valuePop(ctxt);
197 streamName = ensureStringValue(streamName, ctxt);
198 const OString content = oh->getByName(OStringToOUString(reinterpret_cast<char*>(streamName->stringval), RTL_TEXTENCODING_UTF8));
199 valuePush(ctxt, xmlXPathNewCString(content.getStr()));
200 xmlXPathFreeObject(streamName);
204 Reader::Reader(LibXSLTTransformer* transformer) :
205 Thread("LibXSLTTransformer"), m_transformer(transformer),
206 m_readBuf(INPUT_BUFFER_SIZE), m_writeBuf(OUTPUT_BUFFER_SIZE),
207 m_tcontext(nullptr)
209 LIBXML_TEST_VERSION;
214 Reader::read(char * buffer, int len)
216 // const char *ptr = (const char *) context;
217 if (buffer == nullptr || len < 0)
218 return -1;
219 sal_Int32 n;
220 css::uno::Reference<XInputStream> xis = m_transformer->getInputStream();
221 n = xis->readBytes(m_readBuf, len);
222 if (n > 0)
224 memcpy(buffer, m_readBuf.getArray(), n);
226 return n;
230 Reader::write(const char * buffer, int len)
232 if (buffer == nullptr || len < 0)
233 return -1;
234 if (len > 0)
236 css::uno::Reference<XOutputStream> xos = m_transformer->getOutputStream();
237 sal_Int32 writeLen = len;
238 sal_Int32 bufLen = ::std::min(writeLen, OUTPUT_BUFFER_SIZE);
239 const sal_uInt8* memPtr =
240 reinterpret_cast<const sal_uInt8*> (buffer);
241 while (writeLen > 0)
243 sal_Int32 n = ::std::min(writeLen, bufLen);
244 m_writeBuf.realloc(n);
245 memcpy(m_writeBuf.getArray(), memPtr,
246 static_cast<size_t> (n));
247 xos->writeBytes(m_writeBuf);
248 memPtr += n;
249 writeLen -= n;
252 return len;
255 void
256 Reader::closeOutput()
258 css::uno::Reference<XOutputStream> xos = m_transformer->getOutputStream();
259 if (xos.is())
261 xos->flush();
262 xos->closeOutput();
264 m_transformer->done();
267 void
268 Reader::execute()
270 OSL_ASSERT(m_transformer != nullptr);
271 OSL_ASSERT(m_transformer->getInputStream().is());
272 OSL_ASSERT(m_transformer->getOutputStream().is());
273 OSL_ASSERT(!m_transformer->getStyleSheetURL().isEmpty());
274 ::std::map<const char*, OString> pmap = m_transformer->getParameters();
275 ::std::vector< const char* > params( pmap.size() * 2 + 1 ); // build parameters
276 int paramIndex = 0;
277 for (auto const& elem : pmap)
279 params[paramIndex++] = elem.first;
280 params[paramIndex++] = elem.second.getStr();
282 params[paramIndex] = nullptr;
283 xmlDocPtr doc = xmlReadIO(&ParserInputBufferCallback::on_read,
284 &ParserInputBufferCallback::on_close,
285 static_cast<void*> (this), nullptr, nullptr, 0);
286 xsltStylesheetPtr styleSheet = xsltParseStylesheetFile(
287 reinterpret_cast<const xmlChar *>(m_transformer->getStyleSheetURL().getStr()));
288 xmlDocPtr result = nullptr;
289 exsltRegisterAll();
290 registerExtensionModule();
291 #ifdef DEBUG_FILTER_LIBXSLTTRANSFORMER
292 xsltSetGenericDebugFunc(stderr, NULL);
293 xsltDebugDumpExtensions(NULL);
294 #endif
295 std::unique_ptr<OleHandler> oh(new OleHandler(m_transformer->getComponentContext()));
296 if (styleSheet)
298 xsltTransformContextPtr tcontext = xsltNewTransformContext(
299 styleSheet, doc);
301 std::scoped_lock<std::mutex> g(m_mutex);
302 m_tcontext = tcontext;
304 oh->registercontext(m_tcontext);
305 xsltQuoteUserParams(m_tcontext, params.data());
306 result = xsltApplyStylesheetUser(styleSheet, doc, nullptr, nullptr, nullptr,
307 m_tcontext);
310 if (result)
312 xmlCharEncodingHandlerPtr encoder = xmlGetCharEncodingHandler(
313 XML_CHAR_ENCODING_UTF8);
314 xmlOutputBufferPtr outBuf = xmlAllocOutputBuffer(encoder);
315 outBuf->context = static_cast<void *> (this);
316 outBuf->writecallback = &ParserOutputBufferCallback::on_write;
317 outBuf->closecallback = &ParserOutputBufferCallback::on_close;
318 xsltSaveResultTo(outBuf, result, styleSheet);
319 (void)xmlOutputBufferClose(outBuf);
321 else
323 xmlErrorPtr lastErr = xmlGetLastError();
324 OUString msg;
325 if (lastErr)
326 msg = OStringToOUString(lastErr->message, RTL_TEXTENCODING_UTF8);
327 else
328 msg = "Unknown XSLT transformation error";
330 m_transformer->error(msg);
332 closeOutput();
333 oh.reset();
334 xsltFreeStylesheet(styleSheet);
335 xsltTransformContextPtr tcontext = nullptr;
337 std::scoped_lock<std::mutex> g(m_mutex);
338 std::swap(m_tcontext, tcontext);
340 xsltFreeTransformContext(tcontext);
341 xmlFreeDoc(doc);
342 xmlFreeDoc(result);
345 void
346 Reader::registerExtensionModule()
348 const xmlChar* oleModuleURI = reinterpret_cast<const xmlChar *>(EXT_MODULE_OLE_URI);
349 xsltRegisterExtModule(oleModuleURI, &ExtFuncOleCB::init, nullptr);
350 xsltRegisterExtModuleFunction(
351 reinterpret_cast<const xmlChar*>("insertByName"),
352 oleModuleURI,
353 &ExtFuncOleCB::insertByName);
354 xsltRegisterExtModuleFunction(
355 reinterpret_cast<const xmlChar*>("getByName"),
356 oleModuleURI,
357 &ExtFuncOleCB::getByName);
361 void Reader::forceStateStopped()
363 std::scoped_lock<std::mutex> g(m_mutex);
364 if (!m_tcontext)
365 return;
366 //tdf#100057 If we force a cancel, libxslt will of course just keep on going unless something
367 //tells it to stop. Here we force the stopped state so that libxslt will stop processing
368 //and so Reader::execute will complete and we can join cleanly
369 m_tcontext->state = XSLT_STATE_STOPPED;
372 Reader::~Reader()
376 LibXSLTTransformer::LibXSLTTransformer(
377 const css::uno::Reference<XComponentContext> & rxContext) :
378 m_xContext(rxContext)
382 void
383 LibXSLTTransformer::setInputStream(
384 const css::uno::Reference<XInputStream>& inputStream)
386 m_rInputStream = inputStream;
389 css::uno::Reference<XInputStream>
390 LibXSLTTransformer::getInputStream()
392 return m_rInputStream;
395 void
396 LibXSLTTransformer::setOutputStream(
397 const css::uno::Reference<XOutputStream>& outputStream)
399 m_rOutputStream = outputStream;
402 css::uno::Reference<XOutputStream>
403 LibXSLTTransformer::getOutputStream()
405 return m_rOutputStream;
408 void
409 LibXSLTTransformer::addListener(const css::uno::Reference<XStreamListener>& listener)
411 m_listeners.push_front(listener);
414 void
415 LibXSLTTransformer::removeListener(
416 const css::uno::Reference<XStreamListener>& listener)
418 m_listeners.erase( std::remove(m_listeners.begin(), m_listeners.end(), listener ), m_listeners.end() );
421 void
422 LibXSLTTransformer::start()
424 for (const css::uno::Reference<XStreamListener>& xl : m_listeners)
426 xl->started();
428 OSL_ENSURE(!m_Reader.is(), "Somebody forgot to call terminate *and* holds a reference to this LibXSLTTransformer instance");
429 m_Reader = new Reader(this);
430 m_Reader->launch();
433 void
434 LibXSLTTransformer::error(const OUString& msg)
436 Any arg;
437 arg <<= Exception(msg, *this);
438 for (const css::uno::Reference<XStreamListener>& xl : m_listeners)
440 if (xl.is())
442 xl->error(arg);
447 void
448 LibXSLTTransformer::done()
450 for (const css::uno::Reference<XStreamListener>& xl : m_listeners)
452 if (xl.is())
454 xl->closed();
459 void
460 LibXSLTTransformer::terminate()
462 if (m_Reader.is())
464 m_Reader->terminate();
465 m_Reader->forceStateStopped();
466 m_Reader->join();
468 m_Reader.clear();
469 m_parameters.clear();
472 void
473 LibXSLTTransformer::initialize(const Sequence<Any>& args)
475 Sequence<Any> params;
476 if (!(args[0] >>= params))
477 { // backward compatibility for old clients using createInstance
478 params = args;
480 xmlSubstituteEntitiesDefault(0);
481 m_parameters.clear();
482 for (int i = 0; i < params.getLength(); i++)
484 NamedValue nv;
485 params[i] >>= nv;
486 OString nameUTF8 = OUStringToOString(nv.Name,
487 RTL_TEXTENCODING_UTF8);
488 OUString value;
489 OString valueUTF8;
490 if (nv.Value >>= value)
492 valueUTF8 = OUStringToOString(value,
493 RTL_TEXTENCODING_UTF8);
495 else
497 // ignore non-string parameters
498 continue;
500 if (nameUTF8 == "StylesheetURL")
502 m_styleSheetURL = valueUTF8;
504 else if (nameUTF8 == "SourceURL")
506 m_parameters.insert(pair<const char*, OString> (
507 PARAM_SOURCE_URL, valueUTF8));
509 else if (nameUTF8 == "SourceBaseURL")
511 m_parameters.insert(pair<const char*, OString> (
512 PARAM_SOURCE_BASE_URL, valueUTF8));
514 else if (nameUTF8 == "TargetURL")
516 m_parameters.insert(pair<const char*, OString> (
517 PARAM_TARGET_URL, valueUTF8));
519 else if (nameUTF8 == "TargetBaseURL")
521 m_parameters.insert(pair<const char*, OString> (
522 PARAM_TARGET_BASE_URL, valueUTF8));
524 else if (nameUTF8 == "DoctypePublic")
526 m_parameters.insert(pair<const char*, OString> (
527 PARAM_DOCTYPE_PUBLIC, valueUTF8));
532 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */