Avoid potential negative array index access to cached text.
[LibreOffice.git] / writerfilter / source / rtftok / rtftokenizer.cxx
blob420c6d36f0c3c9acc6516dfd32cb45f8ee01664f
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include "rtftokenizer.hxx"
11 #include <o3tl/string_view.hxx>
12 #include <tools/stream.hxx>
13 #include <svx/dialmgr.hxx>
14 #include <svx/strings.hrc>
15 #include <rtl/strbuf.hxx>
16 #include <rtl/character.hxx>
17 #include <sal/log.hxx>
18 #include "rtfskipdestination.hxx"
19 #include <com/sun/star/io/BufferSizeExceededException.hpp>
20 #include <com/sun/star/task/XStatusIndicator.hpp>
21 #include <filter/msfilter/rtfutil.hxx>
23 using namespace com::sun::star;
25 namespace writerfilter::rtftok
27 std::unordered_map<OString, RTFSymbol> RTFTokenizer::s_aRTFControlWords;
28 bool RTFTokenizer::s_bControlWordsInitialised;
29 std::vector<RTFMathSymbol> RTFTokenizer::s_aRTFMathControlWords;
30 bool RTFTokenizer::s_bMathControlWordsSorted;
32 RTFTokenizer::RTFTokenizer(RTFListener& rImport, SvStream* pInStream,
33 uno::Reference<task::XStatusIndicator> const& xStatusIndicator)
34 : m_rImport(rImport)
35 , m_pInStream(pInStream)
36 , m_xStatusIndicator(xStatusIndicator)
37 , m_nGroup(0)
38 , m_nLineNumber(0)
39 , m_nLineStartPos(0)
40 , m_nGroupStart(0)
42 if (!RTFTokenizer::s_bControlWordsInitialised)
44 RTFTokenizer::s_bControlWordsInitialised = true;
45 for (int i = 0; i < nRTFControlWords; ++i)
46 s_aRTFControlWords.emplace(OString(aRTFControlWords[i].GetKeyword()),
47 aRTFControlWords[i]);
49 if (!RTFTokenizer::s_bMathControlWordsSorted)
51 RTFTokenizer::s_bMathControlWordsSorted = true;
52 s_aRTFMathControlWords = std::vector<RTFMathSymbol>(
53 aRTFMathControlWords, aRTFMathControlWords + nRTFMathControlWords);
54 std::sort(s_aRTFMathControlWords.begin(), s_aRTFMathControlWords.end());
58 RTFTokenizer::~RTFTokenizer() = default;
60 RTFError RTFTokenizer::resolveParse()
62 SAL_INFO("writerfilter.rtf", __func__);
63 char ch;
64 RTFError ret;
65 // for hex chars
66 int b = 0;
67 int count = 2;
68 std::size_t nPercentSize = 0;
69 sal_uInt64 nLastPos = 0;
71 if (m_xStatusIndicator.is())
73 OUString sDocLoad(SvxResId(RID_SVXSTR_DOC_LOAD));
75 sal_uInt64 const nCurrentPos = Strm().Tell();
76 sal_uInt64 const nEndPos = nCurrentPos + Strm().remainingSize();
77 m_xStatusIndicator->start(sDocLoad, nEndPos);
78 nPercentSize = nEndPos / 100;
80 nLastPos = nCurrentPos;
81 m_xStatusIndicator->setValue(nLastPos);
84 while (Strm().ReadChar(ch), !Strm().eof())
86 //SAL_INFO("writerfilter", __func__ << ": parsing character '" << ch << "'");
88 sal_uInt64 const nCurrentPos = Strm().Tell();
89 if (m_xStatusIndicator.is() && nCurrentPos > (nLastPos + nPercentSize))
91 nLastPos = nCurrentPos;
92 m_xStatusIndicator->setValue(nLastPos);
95 if (m_nGroup < 0)
96 return RTFError::GROUP_UNDER;
97 if (m_nGroup > 0 && m_rImport.getInternalState() == RTFInternalState::BIN)
99 ret = m_rImport.resolveChars(ch);
100 if (ret != RTFError::OK)
101 return ret;
103 else
105 switch (ch)
107 case '{':
108 m_nGroupStart = Strm().Tell() - 1;
109 ret = m_rImport.pushState();
110 if (ret != RTFError::OK)
111 return ret;
112 break;
113 case '}':
114 ret = m_rImport.popState();
115 if (ret != RTFError::OK)
116 return ret;
117 if (m_nGroup == 0)
119 if (m_rImport.isSubstream())
120 m_rImport.finishSubstream();
121 return RTFError::OK;
123 break;
124 case '\\':
125 ret = resolveKeyword();
126 if (ret != RTFError::OK)
127 return ret;
128 break;
129 case 0x0d:
130 break; // ignore this
131 case 0x0a:
132 m_nLineNumber++;
133 m_nLineStartPos = nCurrentPos;
134 break;
135 default:
136 if (m_nGroup == 0)
137 return RTFError::CHAR_OVER;
138 if (m_rImport.getInternalState() == RTFInternalState::NORMAL)
140 ret = m_rImport.resolveChars(ch);
141 if (ret != RTFError::OK)
142 return ret;
144 else
146 SAL_INFO("writerfilter.rtf", __func__ << ": hex internal state");
147 // Assume that \'<number><junk> means \'0<number>.
148 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch))
149 || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))
151 b = b << 4;
152 sal_Int8 parsed = msfilter::rtfutil::AsHex(ch);
153 if (parsed == -1)
154 return RTFError::HEX_INVALID;
155 b += parsed;
157 count--;
158 if (!count)
160 ret = m_rImport.resolveChars(b);
161 if (ret != RTFError::OK)
162 return ret;
163 count = 2;
164 b = 0;
165 m_rImport.setInternalState(RTFInternalState::NORMAL);
168 break;
173 if (m_nGroup < 0)
174 return RTFError::GROUP_UNDER;
175 if (m_nGroup > 0)
176 return RTFError::GROUP_OVER;
177 return RTFError::OK;
180 void RTFTokenizer::pushGroup() { m_nGroup++; }
182 void RTFTokenizer::popGroup() { m_nGroup--; }
184 RTFError RTFTokenizer::resolveKeyword()
186 char ch;
188 Strm().ReadChar(ch);
189 if (Strm().eof())
190 return RTFError::UNEXPECTED_EOF;
192 if (!rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
194 // control symbols aren't followed by a space, so we can return here
195 // without doing any SeekRel()
196 return dispatchKeyword(OString(ch), false, 0);
198 OStringBuffer aBuf(32);
199 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
201 aBuf.append(ch);
202 if (aBuf.getLength() > 32)
203 // See RTF spec v1.9.1, page 7
204 // A control word's name cannot be longer than 32 letters.
205 throw io::BufferSizeExceededException();
206 Strm().ReadChar(ch);
207 if (Strm().eof())
209 ch = ' ';
210 break;
214 bool bNeg = false;
215 if (ch == '-')
217 // in case we'll have a parameter, that will be negative
218 bNeg = true;
219 Strm().ReadChar(ch);
220 if (Strm().eof())
221 return RTFError::UNEXPECTED_EOF;
223 bool bParam = false;
224 int nParam = 0;
225 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
227 OStringBuffer aParameter;
229 // we have a parameter
230 bParam = true;
231 while (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
233 aParameter.append(ch);
234 Strm().ReadChar(ch);
235 if (Strm().eof())
237 ch = ' ';
238 break;
241 nParam = o3tl::toInt32(aParameter);
242 if (bNeg)
243 nParam = -nParam;
245 if (ch != ' ')
246 Strm().SeekRel(-1);
247 OString aKeyword = aBuf.makeStringAndClear();
248 return dispatchKeyword(aKeyword, bParam, nParam);
251 bool RTFTokenizer::lookupMathKeyword(RTFMathSymbol& rSymbol)
253 auto low
254 = std::lower_bound(s_aRTFMathControlWords.begin(), s_aRTFMathControlWords.end(), rSymbol);
255 if (low == s_aRTFMathControlWords.end() || rSymbol < *low)
256 return false;
257 rSymbol = *low;
258 return true;
261 RTFError RTFTokenizer::dispatchKeyword(OString const& rKeyword, bool bParam, int nParam)
263 if (m_rImport.getDestination() == Destination::SKIP)
265 // skip binary data explicitly, to not trip over rtf markup
266 // control characters
267 if (rKeyword == "bin" && nParam > 0)
268 Strm().SeekRel(nParam);
269 return RTFError::OK;
271 SAL_INFO("writerfilter.rtf", __func__ << ": keyword '\\" << rKeyword << "' with param? "
272 << (bParam ? 1 : 0) << " param val: '"
273 << (bParam ? nParam : 0) << "'");
274 auto findIt = s_aRTFControlWords.find(rKeyword);
275 if (findIt == s_aRTFControlWords.end())
277 SAL_INFO("writerfilter.rtf", __func__ << ": unknown keyword '\\" << rKeyword << "'");
278 RTFSkipDestination aSkip(m_rImport);
279 aSkip.setParsed(false);
280 return RTFError::OK;
283 RTFError ret;
284 RTFSymbol const& rSymbol = findIt->second;
285 switch (rSymbol.GetControlType())
287 case RTFControlType::FLAG:
288 // flags ignore any parameter by definition
289 ret = m_rImport.dispatchFlag(rSymbol.GetIndex());
290 if (ret != RTFError::OK)
291 return ret;
292 break;
293 case RTFControlType::DESTINATION:
294 // same for destinations
295 ret = m_rImport.dispatchDestination(rSymbol.GetIndex());
296 if (ret != RTFError::OK)
297 return ret;
298 break;
299 case RTFControlType::SYMBOL:
300 // and symbols
301 ret = m_rImport.dispatchSymbol(rSymbol.GetIndex());
302 if (ret != RTFError::OK)
303 return ret;
304 break;
305 case RTFControlType::TOGGLE:
306 ret = m_rImport.dispatchToggle(rSymbol.GetIndex(), bParam, nParam);
307 if (ret != RTFError::OK)
308 return ret;
309 break;
310 case RTFControlType::VALUE:
311 if (!bParam)
312 nParam = rSymbol.GetDefValue();
313 ret = m_rImport.dispatchValue(rSymbol.GetIndex(), nParam);
314 if (ret != RTFError::OK)
315 return ret;
316 break;
319 return RTFError::OK;
322 OUString RTFTokenizer::getPosition()
324 return OUString::number(m_nLineNumber + 1) + ","
325 + OUString::number(Strm().Tell() - m_nLineStartPos + 1);
328 } // namespace writerfilter::rtftok
330 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */