Version 6.4.0.0.beta1, tag libreoffice-6.4.0.0.beta1
[LibreOffice.git] / writerfilter / source / rtftok / rtftokenizer.cxx
blob0b39232cd98387889543464607cc8b572d6bc20a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include "rtftokenizer.hxx"
11 #include <tools/stream.hxx>
12 #include <svx/dialmgr.hxx>
13 #include <svx/strings.hrc>
14 #include <rtl/strbuf.hxx>
15 #include <rtl/ustrbuf.hxx>
16 #include <rtl/character.hxx>
17 #include <sal/log.hxx>
18 #include "rtfskipdestination.hxx"
19 #include <com/sun/star/io/BufferSizeExceededException.hpp>
20 #include <com/sun/star/task/XStatusIndicator.hpp>
21 #include <osl/diagnose.h>
22 #include <filter/msfilter/rtfutil.hxx>
24 using namespace com::sun::star;
26 namespace writerfilter
28 namespace rtftok
30 std::unordered_map<OString, RTFSymbol> RTFTokenizer::s_aRTFControlWords;
31 bool RTFTokenizer::s_bControlWordsInitialised;
32 std::vector<RTFMathSymbol> RTFTokenizer::s_aRTFMathControlWords;
33 bool RTFTokenizer::s_bMathControlWordsSorted;
35 RTFTokenizer::RTFTokenizer(RTFListener& rImport, SvStream* pInStream,
36 uno::Reference<task::XStatusIndicator> const& xStatusIndicator)
37 : m_rImport(rImport)
38 , m_pInStream(pInStream)
39 , m_xStatusIndicator(xStatusIndicator)
40 , m_nGroup(0)
41 , m_nLineNumber(0)
42 , m_nLineStartPos(0)
43 , m_nGroupStart(0)
45 if (!RTFTokenizer::s_bControlWordsInitialised)
47 RTFTokenizer::s_bControlWordsInitialised = true;
48 for (int i = 0; i < nRTFControlWords; ++i)
49 s_aRTFControlWords.emplace(OString(aRTFControlWords[i].GetKeyword()),
50 aRTFControlWords[i]);
52 if (!RTFTokenizer::s_bMathControlWordsSorted)
54 RTFTokenizer::s_bMathControlWordsSorted = true;
55 s_aRTFMathControlWords = std::vector<RTFMathSymbol>(
56 aRTFMathControlWords, aRTFMathControlWords + nRTFMathControlWords);
57 std::sort(s_aRTFMathControlWords.begin(), s_aRTFMathControlWords.end());
61 RTFTokenizer::~RTFTokenizer() = default;
63 RTFError RTFTokenizer::resolveParse()
65 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC);
66 char ch;
67 RTFError ret;
68 // for hex chars
69 int b = 0;
70 int count = 2;
71 std::size_t nPercentSize = 0;
72 sal_uInt64 nLastPos = 0;
74 if (m_xStatusIndicator.is())
76 OUString sDocLoad(SvxResId(RID_SVXSTR_DOC_LOAD));
78 sal_uInt64 const nCurrentPos = Strm().Tell();
79 sal_uInt64 const nEndPos = nCurrentPos + Strm().remainingSize();
80 m_xStatusIndicator->start(sDocLoad, nEndPos);
81 nPercentSize = nEndPos / 100;
83 nLastPos = nCurrentPos;
84 m_xStatusIndicator->setValue(nLastPos);
87 while (Strm().ReadChar(ch), !Strm().eof())
89 //SAL_INFO("writerfilter", OSL_THIS_FUNC << ": parsing character '" << ch << "'");
91 sal_uInt64 const nCurrentPos = Strm().Tell();
92 if (m_xStatusIndicator.is() && nCurrentPos > (nLastPos + nPercentSize))
94 nLastPos = nCurrentPos;
95 m_xStatusIndicator->setValue(nLastPos);
98 if (m_nGroup < 0)
99 return RTFError::GROUP_UNDER;
100 if (m_nGroup > 0 && m_rImport.getInternalState() == RTFInternalState::BIN)
102 ret = m_rImport.resolveChars(ch);
103 if (ret != RTFError::OK)
104 return ret;
106 else
108 switch (ch)
110 case '{':
111 m_nGroupStart = Strm().Tell() - 1;
112 ret = m_rImport.pushState();
113 if (ret != RTFError::OK)
114 return ret;
115 break;
116 case '}':
117 ret = m_rImport.popState();
118 if (ret != RTFError::OK)
119 return ret;
120 if (m_nGroup == 0)
122 if (m_rImport.isSubstream())
123 m_rImport.finishSubstream();
124 return RTFError::OK;
126 break;
127 case '\\':
128 ret = resolveKeyword();
129 if (ret != RTFError::OK)
130 return ret;
131 break;
132 case 0x0d:
133 break; // ignore this
134 case 0x0a:
135 m_nLineNumber++;
136 m_nLineStartPos = nCurrentPos;
137 break;
138 default:
139 if (m_nGroup == 0)
140 return RTFError::CHAR_OVER;
141 if (m_rImport.getInternalState() == RTFInternalState::NORMAL)
143 ret = m_rImport.resolveChars(ch);
144 if (ret != RTFError::OK)
145 return ret;
147 else
149 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC << ": hex internal state");
150 b = b << 4;
151 sal_Int8 parsed = msfilter::rtfutil::AsHex(ch);
152 if (parsed == -1)
153 return RTFError::HEX_INVALID;
154 b += parsed;
155 count--;
156 if (!count)
158 ret = m_rImport.resolveChars(b);
159 if (ret != RTFError::OK)
160 return ret;
161 count = 2;
162 b = 0;
163 m_rImport.setInternalState(RTFInternalState::NORMAL);
166 break;
171 if (m_nGroup < 0)
172 return RTFError::GROUP_UNDER;
173 if (m_nGroup > 0)
174 return RTFError::GROUP_OVER;
175 return RTFError::OK;
178 void RTFTokenizer::pushGroup() { m_nGroup++; }
180 void RTFTokenizer::popGroup() { m_nGroup--; }
182 RTFError RTFTokenizer::resolveKeyword()
184 char ch;
185 OStringBuffer aBuf(32);
186 bool bNeg = false;
187 bool bParam = false;
188 int nParam = 0;
190 Strm().ReadChar(ch);
191 if (Strm().eof())
192 return RTFError::UNEXPECTED_EOF;
194 if (!rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
196 aBuf.append(ch);
197 OString aKeyword = aBuf.makeStringAndClear();
198 // control symbols aren't followed by a space, so we can return here
199 // without doing any SeekRel()
200 return dispatchKeyword(aKeyword, bParam, nParam);
202 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
204 aBuf.append(ch);
205 if (aBuf.getLength() > 32)
206 // See RTF spec v1.9.1, page 7
207 // A control word's name cannot be longer than 32 letters.
208 throw io::BufferSizeExceededException();
209 Strm().ReadChar(ch);
210 if (Strm().eof())
212 ch = ' ';
213 break;
217 if (ch == '-')
219 // in case we'll have a parameter, that will be negative
220 bNeg = true;
221 Strm().ReadChar(ch);
222 if (Strm().eof())
223 return RTFError::UNEXPECTED_EOF;
225 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
227 OStringBuffer aParameter;
229 // we have a parameter
230 bParam = true;
231 while (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
233 aParameter.append(ch);
234 Strm().ReadChar(ch);
235 if (Strm().eof())
237 ch = ' ';
238 break;
241 nParam = aParameter.makeStringAndClear().toInt32();
242 if (bNeg)
243 nParam = -nParam;
245 if (ch != ' ')
246 Strm().SeekRel(-1);
247 OString aKeyword = aBuf.makeStringAndClear();
248 return dispatchKeyword(aKeyword, bParam, nParam);
251 bool RTFTokenizer::lookupMathKeyword(RTFMathSymbol& rSymbol)
253 auto low
254 = std::lower_bound(s_aRTFMathControlWords.begin(), s_aRTFMathControlWords.end(), rSymbol);
255 if (low == s_aRTFMathControlWords.end() || rSymbol < *low)
256 return false;
257 rSymbol = *low;
258 return true;
261 RTFError RTFTokenizer::dispatchKeyword(OString const& rKeyword, bool bParam, int nParam)
263 if (m_rImport.getDestination() == Destination::SKIP)
265 // skip binary data explicitly, to not trip over rtf markup
266 // control characters
267 if (rKeyword == "bin" && nParam > 0)
268 Strm().SeekRel(nParam);
269 return RTFError::OK;
271 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC << ": keyword '\\" << rKeyword << "' with param? "
272 << (bParam ? 1 : 0) << " param val: '"
273 << (bParam ? nParam : 0) << "'");
274 auto findIt = s_aRTFControlWords.find(rKeyword);
275 if (findIt == s_aRTFControlWords.end())
277 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC << ": unknown keyword '\\" << rKeyword << "'");
278 RTFSkipDestination aSkip(m_rImport);
279 aSkip.setParsed(false);
280 return RTFError::OK;
283 RTFError ret;
284 RTFSymbol const& rSymbol = findIt->second;
285 switch (rSymbol.GetControlType())
287 case CONTROL_FLAG:
288 // flags ignore any parameter by definition
289 ret = m_rImport.dispatchFlag(rSymbol.GetIndex());
290 if (ret != RTFError::OK)
291 return ret;
292 break;
293 case CONTROL_DESTINATION:
294 // same for destinations
295 ret = m_rImport.dispatchDestination(rSymbol.GetIndex());
296 if (ret != RTFError::OK)
297 return ret;
298 break;
299 case CONTROL_SYMBOL:
300 // and symbols
301 ret = m_rImport.dispatchSymbol(rSymbol.GetIndex());
302 if (ret != RTFError::OK)
303 return ret;
304 break;
305 case CONTROL_TOGGLE:
306 ret = m_rImport.dispatchToggle(rSymbol.GetIndex(), bParam, nParam);
307 if (ret != RTFError::OK)
308 return ret;
309 break;
310 case CONTROL_VALUE:
311 if (!bParam)
312 nParam = rSymbol.GetDefValue();
313 ret = m_rImport.dispatchValue(rSymbol.GetIndex(), nParam);
314 if (ret != RTFError::OK)
315 return ret;
316 break;
319 return RTFError::OK;
322 OUString RTFTokenizer::getPosition()
324 OUStringBuffer aRet;
325 aRet.append(m_nLineNumber + 1);
326 aRet.append(",");
327 aRet.append(sal_Int32(Strm().Tell() - m_nLineStartPos + 1));
328 return aRet.makeStringAndClear();
331 } // namespace rtftok
332 } // namespace writerfilter
334 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */