calc: on editing invalidation of view with different zoom is wrong
[LibreOffice.git] / sc / source / ui / dataprovider / htmldataprovider.cxx
blob3f3300d320f103559bf2d43d120c8817c84091a9
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include "htmldataprovider.hxx"
11 #include <datamapper.hxx>
12 #include <datatransformation.hxx>
13 #include <salhelper/thread.hxx>
14 #include <utility>
15 #include <vcl/svapp.hxx>
16 #include <tools/stream.hxx>
18 #include <libxml/HTMLparser.h>
20 #include <libxml/xpath.h>
22 #include <comphelper/string.hxx>
24 namespace sc {
26 class HTMLFetchThread : public salhelper::Thread
28 ScDocument& mrDocument;
29 OUString maURL;
30 OUString maID;
31 const std::vector<std::shared_ptr<sc::DataTransformation>> maDataTransformations;
32 std::function<void()> maImportFinishedHdl;
34 void handleTable(xmlNodePtr pTable);
35 void handleRow(xmlNodePtr pRow, SCROW nRow);
36 void skipHeadBody(xmlNodePtr pSkip, SCROW& rRow);
37 void handleCell(xmlNodePtr pCell, SCROW nRow, SCCOL nCol);
39 public:
40 HTMLFetchThread(ScDocument& rDoc, const OUString&, const OUString& rID, std::function<void()> aImportFinishedHdl,
41 std::vector<std::shared_ptr<sc::DataTransformation>>&& rTransformations);
43 virtual void execute() override;
46 HTMLFetchThread::HTMLFetchThread(
47 ScDocument& rDoc, const OUString& rURL, const OUString& rID,
48 std::function<void()> aImportFinishedHdl,
49 std::vector<std::shared_ptr<sc::DataTransformation>>&& rTransformations)
50 : salhelper::Thread("HTML Fetch Thread")
51 , mrDocument(rDoc)
52 , maURL(rURL)
53 , maID(rID)
54 , maDataTransformations(std::move(rTransformations))
55 , maImportFinishedHdl(std::move(aImportFinishedHdl))
59 namespace {
61 OString toString(const xmlChar* pStr)
63 return OString(reinterpret_cast<const char*>(pStr), xmlStrlen(pStr));
66 OUString trim_string(const OUString& aStr)
68 OUString aOldString;
69 OUString aString = aStr;
72 aOldString = aString;
73 aString = comphelper::string::strip(aString, ' ');
74 aString = comphelper::string::strip(aString, '\n');
75 aString = comphelper::string::strip(aString, '\r');
76 aString = comphelper::string::strip(aString, '\t');
78 while (aOldString != aString);
80 return aString;
83 OUString get_node_str(xmlNodePtr pNode)
85 OUStringBuffer aStr;
86 for (xmlNodePtr cur_node = pNode->children; cur_node; cur_node = cur_node->next)
88 if (cur_node->type == XML_TEXT_NODE)
90 OUString aString = OStringToOUString(toString(cur_node->content), RTL_TEXTENCODING_UTF8);
91 aStr.append(trim_string(aString));
93 else if (cur_node->type == XML_ELEMENT_NODE)
95 aStr.append(get_node_str(cur_node));
99 return aStr.makeStringAndClear();
104 void HTMLFetchThread::handleCell(xmlNodePtr pCellNode, SCROW nRow, SCCOL nCol)
106 OUStringBuffer aStr;
107 for (xmlNodePtr cur_node = pCellNode->children; cur_node; cur_node = cur_node->next)
109 if (cur_node->type == XML_TEXT_NODE)
111 OUString aString = OStringToOUString(toString(cur_node->content), RTL_TEXTENCODING_UTF8);
112 aStr.append(trim_string(aString));
114 else if (cur_node->type == XML_ELEMENT_NODE)
116 aStr.append(get_node_str(cur_node));
120 if (!aStr.isEmpty())
122 OUString aCellStr = aStr.makeStringAndClear();
123 mrDocument.SetString(nCol, nRow, 0, aCellStr);
127 void HTMLFetchThread::handleRow(xmlNodePtr pRowNode, SCROW nRow)
129 sal_Int32 nCol = 0;
130 for (xmlNodePtr cur_node = pRowNode->children; cur_node; cur_node = cur_node->next)
132 if (cur_node->type == XML_ELEMENT_NODE)
134 OString aNodeName = toString(cur_node->name);
135 if (aNodeName == "td" || aNodeName == "th")
137 handleCell(cur_node, nRow, nCol);
138 ++nCol;
144 void HTMLFetchThread::skipHeadBody(xmlNodePtr pSkipElement, SCROW& rRow)
146 for (xmlNodePtr cur_node = pSkipElement->children; cur_node; cur_node = cur_node->next)
148 if (cur_node->type == XML_ELEMENT_NODE)
150 OString aNodeName = toString(cur_node->name);
151 if (aNodeName == "tr")
153 handleRow(cur_node, rRow);
154 ++rRow;
161 void HTMLFetchThread::handleTable(xmlNodePtr pTable)
163 sal_Int32 nRow = 0;
164 for (xmlNodePtr cur_node = pTable->children; cur_node; cur_node = cur_node->next)
166 if (cur_node->type == XML_ELEMENT_NODE)
168 OString aNodeName = toString(cur_node->name);
169 if (aNodeName == "tr")
171 handleRow(cur_node, nRow);
172 ++nRow;
174 else if (aNodeName == "thead" || aNodeName == "tbody")
176 skipHeadBody(cur_node, nRow);
182 void HTMLFetchThread::execute()
184 OStringBuffer aBuffer(64000);
185 DataProvider::FetchStreamFromURL(maURL, aBuffer);
187 if (aBuffer.isEmpty())
188 return;
190 htmlDocPtr pHtmlPtr = htmlParseDoc(reinterpret_cast<xmlChar*>(const_cast<char*>(aBuffer.getStr())), nullptr);
192 OString aID = OUStringToOString(maID, RTL_TEXTENCODING_UTF8);
193 xmlXPathContextPtr pXmlXpathCtx = xmlXPathNewContext(pHtmlPtr);
194 xmlXPathObjectPtr pXmlXpathObj = xmlXPathEvalExpression(BAD_CAST(aID.getStr()), pXmlXpathCtx);
196 if (!pXmlXpathObj)
198 xmlXPathFreeContext(pXmlXpathCtx);
199 return;
201 xmlNodeSetPtr pXmlNodes = pXmlXpathObj->nodesetval;
203 if (!pXmlNodes)
205 xmlXPathFreeNodeSetList(pXmlXpathObj);
206 xmlXPathFreeContext(pXmlXpathCtx);
207 return;
210 if (pXmlNodes->nodeNr == 0)
212 xmlXPathFreeNodeSet(pXmlNodes);
213 xmlXPathFreeNodeSetList(pXmlXpathObj);
214 xmlXPathFreeContext(pXmlXpathCtx);
215 return;
218 xmlNodePtr pNode = pXmlNodes->nodeTab[0];
219 handleTable(pNode);
221 xmlXPathFreeNodeSet(pXmlNodes);
222 xmlXPathFreeNodeSetList(pXmlXpathObj);
223 xmlXPathFreeContext(pXmlXpathCtx);
225 for (auto& itr : maDataTransformations)
227 itr->Transform(mrDocument);
230 SolarMutexGuard aGuard;
231 maImportFinishedHdl();
234 HTMLDataProvider::HTMLDataProvider(ScDocument* pDoc, sc::ExternalDataSource& rDataSource):
235 DataProvider(rDataSource),
236 mpDocument(pDoc)
240 HTMLDataProvider::~HTMLDataProvider()
242 if (mxHTMLFetchThread.is())
244 SolarMutexReleaser aReleaser;
245 mxHTMLFetchThread->join();
249 void HTMLDataProvider::Import()
251 // already importing data
252 if (mpDoc)
253 return;
255 mpDoc.reset(new ScDocument(SCDOCMODE_CLIP));
256 mpDoc->ResetClip(mpDocument, SCTAB(0));
257 mxHTMLFetchThread = new HTMLFetchThread(*mpDoc, mrDataSource.getURL(), mrDataSource.getID(),
258 std::bind(&HTMLDataProvider::ImportFinished, this), std::vector(mrDataSource.getDataTransformation()));
259 mxHTMLFetchThread->launch();
261 if (mbDeterministic)
263 SolarMutexReleaser aReleaser;
264 mxHTMLFetchThread->join();
268 void HTMLDataProvider::ImportFinished()
270 mrDataSource.getDBManager()->WriteToDoc(*mpDoc);
273 const OUString& HTMLDataProvider::GetURL() const
275 return mrDataSource.getURL();
280 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */