Avoid potential negative array index access to cached text.
[LibreOffice.git] / helpcompiler / source / HelpCompiler.cxx
blobe874ed94b58f713114c3f049604c85cb68b61353
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <algorithm>
22 #include <memory>
23 #include <HelpCompiler.hxx>
24 #include <BasCodeTagger.hxx>
25 #include <iostream>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <libxslt/xsltInternals.h>
29 #include <libxslt/transform.h>
30 #include <rtl/character.hxx>
31 #include <sal/log.hxx>
32 #include <utility>
34 HelpCompiler::HelpCompiler(StreamTable &in_streamTable, fs::path in_inputFile,
35 fs::path in_src, fs::path in_zipdir, fs::path in_resCompactStylesheet,
36 fs::path in_resEmbStylesheet, std::string in_module, std::string in_lang,
37 bool in_bExtensionMode)
38 : streamTable(in_streamTable), inputFile(std::move(in_inputFile)),
39 src(std::move(in_src)), zipdir(std::move(in_zipdir)), module(std::move(in_module)), lang(std::move(in_lang)), resCompactStylesheet(std::move(in_resCompactStylesheet)),
40 resEmbStylesheet(std::move(in_resEmbStylesheet)), bExtensionMode( in_bExtensionMode )
42 #ifdef __GNUC__
43 #pragma GCC diagnostic push
44 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
45 #endif
46 #if defined(_MSC_VER)
47 #pragma warning(push)
48 #pragma warning(disable:4996)
49 #endif
50 xmlKeepBlanksDefaultValue = 0;
51 #if defined(_MSC_VER)
52 #pragma warning(pop)
53 #endif
54 #ifdef __GNUC__
55 #pragma GCC diagnostic pop
56 #endif
57 char* os = getenv("OS");
58 if (os)
60 gui = (strcmp(os, "WNT") == 0 ? "WIN" : (strcmp(os, "MACOSX") == 0 ? "MAC" : "UNIX"));
64 void HelpCompiler::tagBasicCodeExamples( xmlDocPtr doc )
66 try
68 BasicCodeTagger bct( doc );
69 bct.tagBasicCodes();
71 catch ( BasicCodeTagger::TaggerException &ex )
73 if ( ex != BasicCodeTagger::EMPTY_DOCUMENT )
74 throw;
78 xmlDocPtr HelpCompiler::compactXhpForJar( xmlDocPtr doc )
80 static xsltStylesheetPtr compact = nullptr;
81 static const char *params[2 + 1];
82 params[0] = nullptr;
83 xmlDocPtr compacted;
85 if (!compact)
87 compact = xsltParseStylesheetFile(BAD_CAST(resCompactStylesheet.native_file_string().c_str()));
90 compacted = xsltApplyStylesheet(compact, doc, params);
91 return compacted;
94 void HelpCompiler::saveXhpForJar( xmlDocPtr doc, const fs::path &filePath )
96 //save processed xhp document in ziptmp<module>_<lang>/text directory
97 #ifdef _WIN32
98 std::string pathSep = "\\";
99 #else
100 std::string pathSep = "/";
101 #endif
102 const std::string& sourceXhpPath = filePath.native_file_string();
103 std::string zipdirPath = zipdir.native_file_string();
104 const std::string srcdirPath( src.native_file_string() );
105 // srcdirPath contains trailing /, but we want the file path with / at the beginning
106 std::string jarXhpPath = sourceXhpPath.substr( srcdirPath.length() - 1 );
107 std::string xhpFileName = jarXhpPath.substr( jarXhpPath.rfind( pathSep ) + 1 );
108 jarXhpPath = jarXhpPath.substr( 0, jarXhpPath.rfind( pathSep ) );
109 if ( !jarXhpPath.compare( 1, 11, "text" + pathSep + "sbasic" ) )
111 tagBasicCodeExamples( doc );
113 if ( !jarXhpPath.compare( 1, 11, "text" + pathSep + "shared" ) )
115 const size_t pos = zipdirPath.find( "ziptmp" );
116 if ( pos != std::string::npos )
117 zipdirPath.replace( pos + 6, module.length(), "shared" );
119 xmlDocPtr compacted = compactXhpForJar( doc );
120 fs::create_directory( fs::path( zipdirPath + jarXhpPath, fs::native ) );
121 if ( -1 == xmlSaveFormatFileEnc( (zipdirPath + jarXhpPath + pathSep + xhpFileName).c_str(), compacted, "utf-8", 0 ) )
122 std::cerr << "Error saving file to " << (zipdirPath + jarXhpPath + pathSep + xhpFileName).c_str() << std::endl;
123 xmlFreeDoc(compacted);
126 xmlDocPtr HelpCompiler::getSourceDocument(const fs::path &filePath)
128 xmlDocPtr res;
129 if (bExtensionMode)
131 // this is the mode when used within LibreOffice for importing help
132 // bundled with an extension
133 res = xmlParseFile(filePath.native_file_string().c_str());
135 else
137 // this is the mode when used at build time to generate LibreOffice
138 // help from its xhp source
139 static xsltStylesheetPtr cur = nullptr;
140 static const char *params[2 + 1];
141 if (!cur)
143 static std::string fsroot('\'' + src.toUTF8() + '\'');
145 cur = xsltParseStylesheetFile(BAD_CAST(resEmbStylesheet.native_file_string().c_str()));
147 int nbparams = 0;
148 params[nbparams++] = "fsroot";
149 params[nbparams++] = fsroot.c_str();
150 params[nbparams] = nullptr;
152 xmlDocPtr doc = xmlParseFile(filePath.native_file_string().c_str());
154 saveXhpForJar( doc, filePath );
156 res = xsltApplyStylesheet(cur, doc, params);
157 xmlFreeDoc(doc);
159 return res;
162 // returns a node representing the whole stuff compiled for the current
163 // application.
164 xmlNodePtr HelpCompiler::clone(xmlNodePtr node, const std::string& appl)
166 xmlNodePtr root = xmlCopyNode(node, 2);
167 if (node->xmlChildrenNode)
169 xmlNodePtr list = node->xmlChildrenNode;
170 while (list)
172 if (strcmp(reinterpret_cast<const char*>(list->name), "switchinline") == 0 || strcmp(reinterpret_cast<const char*>(list->name), "switch") == 0)
174 std::string tmp;
175 xmlChar * prop = xmlGetProp(list, reinterpret_cast<xmlChar const *>("select"));
176 if (prop != nullptr)
178 if (strcmp(reinterpret_cast<char *>(prop), "sys") == 0)
180 tmp = gui;
182 else if (strcmp(reinterpret_cast<char *>(prop), "appl") == 0)
184 tmp = appl;
186 xmlFree(prop);
188 if (!tmp.empty())
190 bool isCase=false;
191 xmlNodePtr caseList=list->xmlChildrenNode;
192 while (caseList)
194 xmlChar* select = xmlGetProp(caseList, BAD_CAST("select"));
195 if (select)
197 if (!strcmp(reinterpret_cast<char*>(select), tmp.c_str()) && !isCase)
199 isCase=true;
200 xmlNodePtr clp = caseList->xmlChildrenNode;
201 while (clp)
203 xmlAddChild(root, clone(clp, appl));
204 clp = clp->next;
207 xmlFree(select);
209 else
211 if ((strcmp(reinterpret_cast<const char*>(caseList->name), "defaultinline") != 0) && (strcmp(reinterpret_cast<const char*>(caseList->name), "default") != 0))
213 xmlAddChild(root, clone(caseList, appl));
215 else
217 if (!isCase)
219 xmlNodePtr clp = caseList->xmlChildrenNode;
220 while (clp)
222 xmlAddChild(root, clone(clp, appl));
223 clp = clp->next;
228 caseList = caseList->next;
232 else
234 xmlAddChild(root, clone(list, appl));
236 list = list->next;
239 return root;
242 namespace {
244 class myparser
246 public:
247 std::string documentId;
248 std::string fileName;
249 std::string title;
250 std::unique_ptr< std::vector<std::string> > hidlist;
251 std::unique_ptr<Hashtable> keywords;
252 std::unique_ptr<Stringtable> helptexts;
253 private:
254 std::vector<std::string> extendedHelpText;
255 public:
256 myparser(std::string indocumentId, std::string infileName,
257 std::string intitle) : documentId(std::move(indocumentId)), fileName(std::move(infileName)),
258 title(std::move(intitle))
260 hidlist.reset(new std::vector<std::string>);
261 keywords.reset(new Hashtable);
262 helptexts.reset(new Stringtable);
264 void traverse( xmlNodePtr parentNode );
265 private:
266 std::string dump(xmlNodePtr node);
271 std::string myparser::dump(xmlNodePtr node)
273 std::string app;
274 if (node->xmlChildrenNode)
276 xmlNodePtr list = node->xmlChildrenNode;
277 while (list)
279 app += dump(list);
280 list = list->next;
283 if (xmlNodeIsText(node))
285 xmlChar *pContent = xmlNodeGetContent(node);
286 app += std::string(reinterpret_cast<char*>(pContent));
287 xmlFree(pContent);
289 return app;
292 static void trim(std::string& str)
294 std::string::size_type pos = str.find_last_not_of(' ');
295 if(pos != std::string::npos)
297 str.erase(pos + 1);
298 pos = str.find_first_not_of(' ');
299 if(pos != std::string::npos)
300 str.erase(0, pos);
302 else
303 str.clear();
306 void myparser::traverse( xmlNodePtr parentNode )
308 // traverse all nodes that belong to the parent
309 xmlNodePtr test ;
310 for (test = parentNode->xmlChildrenNode; test; test = test->next)
312 if (fileName.empty() && !strcmp(reinterpret_cast<const char*>(test->name), "filename"))
314 xmlNodePtr node = test->xmlChildrenNode;
315 if (xmlNodeIsText(node))
317 xmlChar *pContent = xmlNodeGetContent(node);
318 fileName = std::string(reinterpret_cast<char*>(pContent));
319 xmlFree(pContent);
322 else if (title.empty() && !strcmp(reinterpret_cast<const char*>(test->name), "title"))
324 title = dump(test);
325 if (title.empty())
326 title = "<notitle>";
328 else if (!strcmp(reinterpret_cast<const char*>(test->name), "bookmark"))
330 xmlChar* branchxml = xmlGetProp(test, BAD_CAST("branch"));
331 if (branchxml == nullptr) {
332 throw HelpProcessingException(
333 HelpProcessingErrorClass::XmlParsing, "bookmark lacks branch attribute");
335 std::string branch(reinterpret_cast<char*>(branchxml));
336 xmlFree (branchxml);
337 xmlChar* idxml = xmlGetProp(test, BAD_CAST("id"));
338 if (idxml == nullptr) {
339 throw HelpProcessingException(
340 HelpProcessingErrorClass::XmlParsing, "bookmark lacks id attribute");
342 std::string anchor(reinterpret_cast<char*>(idxml));
343 xmlFree (idxml);
345 if (branch.compare(0, 3, "hid") == 0)
347 size_t index = branch.find('/');
348 if (index != std::string::npos)
350 auto hid = branch.substr(1 + index);
351 // one shall serve as a documentId
352 if (documentId.empty())
353 documentId = hid;
354 extendedHelpText.push_back(hid);
355 HCDBG(std::cerr << "hid pushback" << (anchor.empty() ? hid : hid + "#" + anchor) << std::endl);
356 hidlist->push_back( anchor.empty() ? hid : hid + "#" + anchor);
358 else
359 continue;
361 else if (branch.compare("index") == 0)
363 LinkedList ll;
365 for (xmlNodePtr nd = test->xmlChildrenNode; nd; nd = nd->next)
367 if (strcmp(reinterpret_cast<const char*>(nd->name), "bookmark_value"))
368 continue;
370 std::string embedded;
371 xmlChar* embeddedxml = xmlGetProp(nd, BAD_CAST("embedded"));
372 if (embeddedxml)
374 embedded = std::string(reinterpret_cast<char*>(embeddedxml));
375 xmlFree (embeddedxml);
376 std::transform (embedded.begin(), embedded.end(),
377 embedded.begin(), tocharlower);
380 bool isEmbedded = !embedded.empty() && embedded.compare("true") == 0;
381 if (isEmbedded)
382 continue;
384 std::string keyword = dump(nd);
385 size_t keywordSem = keyword.find(';');
386 if (keywordSem != std::string::npos)
388 std::string tmppre =
389 keyword.substr(0,keywordSem);
390 trim(tmppre);
391 std::string tmppos =
392 keyword.substr(1+keywordSem);
393 trim(tmppos);
394 keyword = tmppre + ";" + tmppos;
396 ll.push_back(keyword);
398 if (!ll.empty())
399 (*keywords)[anchor] = ll;
401 else if (branch.compare("contents") == 0)
403 // currently not used
406 else if (!strcmp(reinterpret_cast<const char*>(test->name), "ahelp"))
408 //tool-tip
409 std::string text = dump(test);
410 std::replace(text.begin(), text.end(), '\n', ' ');
411 trim(text);
413 //tool-tip target
414 std::string hidstr("."); //. == previous seen hid bookmarks
415 xmlChar* hid = xmlGetProp(test, BAD_CAST("hid"));
416 if (hid)
418 hidstr = std::string(reinterpret_cast<char*>(hid));
419 xmlFree (hid);
422 if (hidstr != "." && !hidstr.empty()) //simple case of explicitly named target
424 assert(!hidstr.empty());
425 (*helptexts)[hidstr] = text;
427 else //apply to list of "current" hids determined by recent bookmarks that have hid in their branch
429 //TODO: make these asserts and flush out all our broken help ids
430 SAL_WARN_IF(hidstr.empty(), "helpcompiler", "hid='' for text:" << text);
431 SAL_WARN_IF(!hidstr.empty() && extendedHelpText.empty(), "helpcompiler", "hid='.' with no hid bookmark branches in file: " << fileName + " for text: " << text);
432 for (const std::string& name : extendedHelpText)
434 (*helptexts)[name] = text;
437 extendedHelpText.clear();
439 // traverse children
440 traverse(test);
444 void HelpCompiler::compile()
446 // we now have the jaroutputstream, which will contain the document.
447 // now determine the document as a dom tree in variable docResolved
449 xmlDocPtr docResolvedOrg = getSourceDocument(inputFile);
451 // now add path to the document
452 // resolve the dom
454 if (!docResolvedOrg)
456 std::stringstream aStrStream;
457 aStrStream << "ERROR: file not existing: " << inputFile.native_file_string().c_str() << std::endl;
458 throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
461 std::string documentId;
462 std::string fileName;
463 std::string title;
464 // returns a clone of the document with switch-cases resolved
465 std::string appl = module.substr(1);
466 for (char & i : appl)
468 i=rtl::toAsciiUpperCase(static_cast<unsigned char>(i));
470 xmlNodePtr docResolved = clone(xmlDocGetRootElement(docResolvedOrg), appl);
471 myparser aparser(documentId, fileName, title);
472 aparser.traverse(docResolved);
473 documentId = aparser.documentId;
474 fileName = aparser.fileName;
475 title = aparser.title;
477 HCDBG(std::cerr << documentId << " : " << fileName << " : " << title << std::endl);
479 xmlDocPtr docResolvedDoc = xmlCopyDoc(docResolvedOrg, false);
480 xmlDocSetRootElement(docResolvedDoc, docResolved);
482 streamTable.dropappl();
483 streamTable.appl_doc = docResolvedDoc;
484 streamTable.appl_hidlist = std::move(aparser.hidlist);
485 streamTable.appl_helptexts = std::move(aparser.helptexts);
486 streamTable.appl_keywords = std::move(aparser.keywords);
488 streamTable.document_path = fileName;
489 streamTable.document_title = title;
490 std::string actMod = module;
492 if ( !bExtensionMode && !fileName.empty())
494 if (fileName.compare(0, 6, "/text/") == 0)
496 actMod = fileName.substr(strlen("/text/"));
497 actMod = actMod.substr(0, actMod.find('/'));
500 streamTable.document_module = actMod;
501 xmlFreeDoc(docResolvedOrg);
504 namespace fs
506 void create_directory(const fs::path& indexDirName)
508 HCDBG(
509 std::cerr << "creating " <<
510 OUStringToOString(indexDirName.data, RTL_TEXTENCODING_UTF8).getStr()
511 << std::endl
513 osl::Directory::createPath(indexDirName.data);
516 void copy(const fs::path &src, const fs::path &dest)
518 osl::File::copy(src.data, dest.data);
522 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */