lok: vcl: fix multiple floatwin removal case more robustly.
[LibreOffice.git] / writerfilter / source / rtftok / rtftokenizer.cxx
blob310adc82db236d53efc87c06282c72b18b484cd3
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include "rtftokenizer.hxx"
11 #include <tools/stream.hxx>
12 #include <svx/dialmgr.hxx>
13 #include <svx/strings.hrc>
14 #include <rtl/strbuf.hxx>
15 #include <rtl/ustrbuf.hxx>
16 #include <rtl/character.hxx>
17 #include <sal/log.hxx>
18 #include "rtfskipdestination.hxx"
19 #include <com/sun/star/io/BufferSizeExceededException.hpp>
20 #include <com/sun/star/task/XStatusIndicator.hpp>
21 #include <osl/diagnose.h>
22 #include <filter/msfilter/rtfutil.hxx>
24 using namespace com::sun::star;
26 namespace writerfilter
28 namespace rtftok
30 std::vector<RTFSymbol> RTFTokenizer::s_aRTFControlWords;
31 bool RTFTokenizer::s_bControlWordsSorted;
32 std::vector<RTFMathSymbol> RTFTokenizer::s_aRTFMathControlWords;
33 bool RTFTokenizer::s_bMathControlWordsSorted;
35 RTFTokenizer::RTFTokenizer(RTFListener& rImport, SvStream* pInStream,
36 uno::Reference<task::XStatusIndicator> const& xStatusIndicator)
37 : m_rImport(rImport)
38 , m_pInStream(pInStream)
39 , m_xStatusIndicator(xStatusIndicator)
40 , m_nGroup(0)
41 , m_nLineNumber(0)
42 , m_nLineStartPos(0)
43 , m_nGroupStart(0)
45 if (!RTFTokenizer::s_bControlWordsSorted)
47 RTFTokenizer::s_bControlWordsSorted = true;
48 s_aRTFControlWords
49 = std::vector<RTFSymbol>(aRTFControlWords, aRTFControlWords + nRTFControlWords);
50 std::sort(s_aRTFControlWords.begin(), s_aRTFControlWords.end());
52 if (!RTFTokenizer::s_bMathControlWordsSorted)
54 RTFTokenizer::s_bMathControlWordsSorted = true;
55 s_aRTFMathControlWords = std::vector<RTFMathSymbol>(
56 aRTFMathControlWords, aRTFMathControlWords + nRTFMathControlWords);
57 std::sort(s_aRTFMathControlWords.begin(), s_aRTFMathControlWords.end());
61 RTFTokenizer::~RTFTokenizer() = default;
63 RTFError RTFTokenizer::resolveParse()
65 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC);
66 char ch;
67 RTFError ret;
68 // for hex chars
69 int b = 0, count = 2;
70 std::size_t nPercentSize = 0;
71 sal_uInt64 nLastPos = 0;
73 if (m_xStatusIndicator.is())
75 OUString sDocLoad(SvxResId(RID_SVXSTR_DOC_LOAD));
77 sal_uInt64 const nCurrentPos = Strm().Tell();
78 sal_uInt64 const nEndPos = nCurrentPos + Strm().remainingSize();
79 m_xStatusIndicator->start(sDocLoad, nEndPos);
80 nPercentSize = nEndPos / 100;
82 nLastPos = nCurrentPos;
83 m_xStatusIndicator->setValue(nLastPos);
86 while (Strm().ReadChar(ch), !Strm().eof())
88 //SAL_INFO("writerfilter", OSL_THIS_FUNC << ": parsing character '" << ch << "'");
90 sal_uInt64 const nCurrentPos = Strm().Tell();
91 if (m_xStatusIndicator.is() && nCurrentPos > (nLastPos + nPercentSize))
93 nLastPos = nCurrentPos;
94 m_xStatusIndicator->setValue(nLastPos);
97 if (m_nGroup < 0)
98 return RTFError::GROUP_UNDER;
99 if (m_nGroup > 0 && m_rImport.getInternalState() == RTFInternalState::BIN)
101 ret = m_rImport.resolveChars(ch);
102 if (ret != RTFError::OK)
103 return ret;
105 else
107 switch (ch)
109 case '{':
110 m_nGroupStart = Strm().Tell() - 1;
111 ret = m_rImport.pushState();
112 if (ret != RTFError::OK)
113 return ret;
114 break;
115 case '}':
116 ret = m_rImport.popState();
117 if (ret != RTFError::OK)
118 return ret;
119 if (m_nGroup == 0)
121 if (m_rImport.isSubstream())
122 m_rImport.finishSubstream();
123 return RTFError::OK;
125 break;
126 case '\\':
127 ret = resolveKeyword();
128 if (ret != RTFError::OK)
129 return ret;
130 break;
131 case 0x0d:
132 break; // ignore this
133 case 0x0a:
134 m_nLineNumber++;
135 m_nLineStartPos = nCurrentPos;
136 break;
137 default:
138 if (m_nGroup == 0)
139 return RTFError::CHAR_OVER;
140 if (m_rImport.getInternalState() == RTFInternalState::NORMAL)
142 ret = m_rImport.resolveChars(ch);
143 if (ret != RTFError::OK)
144 return ret;
146 else
148 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC << ": hex internal state");
149 b = b << 4;
150 sal_Int8 parsed = msfilter::rtfutil::AsHex(ch);
151 if (parsed == -1)
152 return RTFError::HEX_INVALID;
153 b += parsed;
154 count--;
155 if (!count)
157 ret = m_rImport.resolveChars(b);
158 if (ret != RTFError::OK)
159 return ret;
160 count = 2;
161 b = 0;
162 m_rImport.setInternalState(RTFInternalState::NORMAL);
165 break;
170 if (m_nGroup < 0)
171 return RTFError::GROUP_UNDER;
172 if (m_nGroup > 0)
173 return RTFError::GROUP_OVER;
174 return RTFError::OK;
177 void RTFTokenizer::pushGroup() { m_nGroup++; }
179 void RTFTokenizer::popGroup() { m_nGroup--; }
181 RTFError RTFTokenizer::resolveKeyword()
183 char ch;
184 OStringBuffer aBuf;
185 bool bNeg = false;
186 bool bParam = false;
187 int nParam = 0;
189 Strm().ReadChar(ch);
190 if (Strm().eof())
191 return RTFError::UNEXPECTED_EOF;
193 if (!rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
195 aBuf.append(ch);
196 OString aKeyword = aBuf.makeStringAndClear();
197 // control symbols aren't followed by a space, so we can return here
198 // without doing any SeekRel()
199 return dispatchKeyword(aKeyword, bParam, nParam);
201 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
203 aBuf.append(ch);
204 Strm().ReadChar(ch);
205 if (Strm().eof())
207 ch = ' ';
208 break;
211 if (aBuf.getLength() > 32)
212 // See RTF spec v1.9.1, page 7
213 // A control word's name cannot be longer than 32 letters.
214 throw io::BufferSizeExceededException();
216 if (ch == '-')
218 // in case we'll have a parameter, that will be negative
219 bNeg = true;
220 Strm().ReadChar(ch);
221 if (Strm().eof())
222 return RTFError::UNEXPECTED_EOF;
224 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
226 OStringBuffer aParameter;
228 // we have a parameter
229 bParam = true;
230 while (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
232 aParameter.append(ch);
233 Strm().ReadChar(ch);
234 if (Strm().eof())
236 ch = ' ';
237 break;
240 nParam = aParameter.makeStringAndClear().toInt32();
241 if (bNeg)
242 nParam = -nParam;
244 if (ch != ' ')
245 Strm().SeekRel(-1);
246 OString aKeyword = aBuf.makeStringAndClear();
247 return dispatchKeyword(aKeyword, bParam, nParam);
250 bool RTFTokenizer::lookupMathKeyword(RTFMathSymbol& rSymbol)
252 auto low
253 = std::lower_bound(s_aRTFMathControlWords.begin(), s_aRTFMathControlWords.end(), rSymbol);
254 if (low == s_aRTFMathControlWords.end() || rSymbol < *low)
255 return false;
256 rSymbol = *low;
257 return true;
260 RTFError RTFTokenizer::dispatchKeyword(OString const& rKeyword, bool bParam, int nParam)
262 if (m_rImport.getDestination() == Destination::SKIP)
264 // skip binary data explicitly, to not trip over rtf markup
265 // control characters
266 if (rKeyword == "bin" && nParam > 0)
267 Strm().SeekRel(nParam);
268 return RTFError::OK;
270 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC << ": keyword '\\" << rKeyword << "' with param? "
271 << (bParam ? 1 : 0) << " param val: '"
272 << (bParam ? nParam : 0) << "'");
273 RTFSymbol aSymbol;
274 aSymbol.sKeyword = rKeyword.getStr();
275 auto low = std::lower_bound(s_aRTFControlWords.begin(), s_aRTFControlWords.end(), aSymbol);
276 int i = low - s_aRTFControlWords.begin();
277 if (low == s_aRTFControlWords.end() || aSymbol < *low)
279 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC << ": unknown keyword '\\" << rKeyword << "'");
280 RTFSkipDestination aSkip(m_rImport);
281 aSkip.setParsed(false);
282 return RTFError::OK;
285 RTFError ret;
286 switch (s_aRTFControlWords[i].nControlType)
288 case CONTROL_FLAG:
289 // flags ignore any parameter by definition
290 ret = m_rImport.dispatchFlag(s_aRTFControlWords[i].nIndex);
291 if (ret != RTFError::OK)
292 return ret;
293 break;
294 case CONTROL_DESTINATION:
295 // same for destinations
296 ret = m_rImport.dispatchDestination(s_aRTFControlWords[i].nIndex);
297 if (ret != RTFError::OK)
298 return ret;
299 break;
300 case CONTROL_SYMBOL:
301 // and symbols
302 ret = m_rImport.dispatchSymbol(s_aRTFControlWords[i].nIndex);
303 if (ret != RTFError::OK)
304 return ret;
305 break;
306 case CONTROL_TOGGLE:
307 ret = m_rImport.dispatchToggle(s_aRTFControlWords[i].nIndex, bParam, nParam);
308 if (ret != RTFError::OK)
309 return ret;
310 break;
311 case CONTROL_VALUE:
312 if (!bParam)
313 nParam = s_aRTFControlWords[i].nDefValue;
314 ret = m_rImport.dispatchValue(s_aRTFControlWords[i].nIndex, nParam);
315 if (ret != RTFError::OK)
316 return ret;
317 break;
320 return RTFError::OK;
323 OUString RTFTokenizer::getPosition()
325 OUStringBuffer aRet;
326 aRet.append(m_nLineNumber + 1);
327 aRet.append(",");
328 aRet.append(sal_Int32(Strm().Tell() - m_nLineStartPos + 1));
329 return aRet.makeStringAndClear();
332 } // namespace rtftok
333 } // namespace writerfilter
335 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */