1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include "rtftokenizer.hxx"
11 #include <o3tl/string_view.hxx>
12 #include <tools/stream.hxx>
13 #include <svx/dialmgr.hxx>
14 #include <svx/strings.hrc>
15 #include <rtl/strbuf.hxx>
16 #include <rtl/character.hxx>
17 #include <sal/log.hxx>
18 #include "rtfskipdestination.hxx"
19 #include <com/sun/star/io/BufferSizeExceededException.hpp>
20 #include <com/sun/star/task/XStatusIndicator.hpp>
21 #include <filter/msfilter/rtfutil.hxx>
23 using namespace com::sun::star
;
25 namespace writerfilter::rtftok
27 std::unordered_map
<OString
, RTFSymbol
> RTFTokenizer::s_aRTFControlWords
;
28 bool RTFTokenizer::s_bControlWordsInitialised
;
29 std::vector
<RTFMathSymbol
> RTFTokenizer::s_aRTFMathControlWords
;
30 bool RTFTokenizer::s_bMathControlWordsSorted
;
32 RTFTokenizer::RTFTokenizer(RTFListener
& rImport
, SvStream
* pInStream
,
33 uno::Reference
<task::XStatusIndicator
> const& xStatusIndicator
)
35 , m_pInStream(pInStream
)
36 , m_xStatusIndicator(xStatusIndicator
)
42 if (!RTFTokenizer::s_bControlWordsInitialised
)
44 RTFTokenizer::s_bControlWordsInitialised
= true;
45 for (int i
= 0; i
< nRTFControlWords
; ++i
)
46 s_aRTFControlWords
.emplace(OString(aRTFControlWords
[i
].GetKeyword()),
49 if (!RTFTokenizer::s_bMathControlWordsSorted
)
51 RTFTokenizer::s_bMathControlWordsSorted
= true;
52 s_aRTFMathControlWords
= std::vector
<RTFMathSymbol
>(
53 aRTFMathControlWords
, aRTFMathControlWords
+ nRTFMathControlWords
);
54 std::sort(s_aRTFMathControlWords
.begin(), s_aRTFMathControlWords
.end());
58 RTFTokenizer::~RTFTokenizer() = default;
60 RTFError
RTFTokenizer::resolveParse()
62 SAL_INFO("writerfilter.rtf", __func__
);
68 std::size_t nPercentSize
= 0;
69 sal_uInt64 nLastPos
= 0;
71 if (m_xStatusIndicator
.is())
73 OUString
sDocLoad(SvxResId(RID_SVXSTR_DOC_LOAD
));
75 sal_uInt64
const nCurrentPos
= Strm().Tell();
76 sal_uInt64
const nEndPos
= nCurrentPos
+ Strm().remainingSize();
77 m_xStatusIndicator
->start(sDocLoad
, nEndPos
);
78 nPercentSize
= nEndPos
/ 100;
80 nLastPos
= nCurrentPos
;
81 m_xStatusIndicator
->setValue(nLastPos
);
84 while (Strm().ReadChar(ch
), !Strm().eof())
86 //SAL_INFO("writerfilter", __func__ << ": parsing character '" << ch << "'");
88 sal_uInt64
const nCurrentPos
= Strm().Tell();
89 if (m_xStatusIndicator
.is() && nCurrentPos
> (nLastPos
+ nPercentSize
))
91 nLastPos
= nCurrentPos
;
92 m_xStatusIndicator
->setValue(nLastPos
);
96 return RTFError::GROUP_UNDER
;
97 if (m_nGroup
> 0 && m_rImport
.getInternalState() == RTFInternalState::BIN
)
99 ret
= m_rImport
.resolveChars(ch
);
100 if (ret
!= RTFError::OK
)
108 m_nGroupStart
= Strm().Tell() - 1;
109 ret
= m_rImport
.pushState();
110 if (ret
!= RTFError::OK
)
114 ret
= m_rImport
.popState();
115 if (ret
!= RTFError::OK
)
119 if (m_rImport
.isSubstream())
120 m_rImport
.finishSubstream();
125 ret
= resolveKeyword();
126 if (ret
!= RTFError::OK
)
130 break; // ignore this
133 m_nLineStartPos
= nCurrentPos
;
137 return RTFError::CHAR_OVER
;
138 if (m_rImport
.getInternalState() == RTFInternalState::NORMAL
)
140 ret
= m_rImport
.resolveChars(ch
);
141 if (ret
!= RTFError::OK
)
146 SAL_INFO("writerfilter.rtf", __func__
<< ": hex internal state");
147 // Assume that \'<number><junk> means \'0<number>.
148 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch
))
149 || (ch
>= 'a' && ch
<= 'f') || (ch
>= 'A' && ch
<= 'F'))
152 sal_Int8 parsed
= msfilter::rtfutil::AsHex(ch
);
154 return RTFError::HEX_INVALID
;
160 ret
= m_rImport
.resolveChars(b
);
161 if (ret
!= RTFError::OK
)
165 m_rImport
.setInternalState(RTFInternalState::NORMAL
);
174 return RTFError::GROUP_UNDER
;
176 return RTFError::GROUP_OVER
;
180 void RTFTokenizer::pushGroup() { m_nGroup
++; }
182 void RTFTokenizer::popGroup() { m_nGroup
--; }
184 RTFError
RTFTokenizer::resolveKeyword()
190 return RTFError::UNEXPECTED_EOF
;
192 if (!rtl::isAsciiAlpha(static_cast<unsigned char>(ch
)))
194 // control symbols aren't followed by a space, so we can return here
195 // without doing any SeekRel()
196 return dispatchKeyword(OString(ch
), false, 0);
198 OStringBuffer
aBuf(32);
199 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch
)))
202 if (aBuf
.getLength() > 32)
203 // See RTF spec v1.9.1, page 7
204 // A control word's name cannot be longer than 32 letters.
205 throw io::BufferSizeExceededException();
217 // in case we'll have a parameter, that will be negative
221 return RTFError::UNEXPECTED_EOF
;
225 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch
)))
227 OStringBuffer aParameter
;
229 // we have a parameter
231 while (rtl::isAsciiDigit(static_cast<unsigned char>(ch
)))
233 aParameter
.append(ch
);
241 nParam
= o3tl::toInt32(aParameter
);
247 OString aKeyword
= aBuf
.makeStringAndClear();
248 return dispatchKeyword(aKeyword
, bParam
, nParam
);
251 bool RTFTokenizer::lookupMathKeyword(RTFMathSymbol
& rSymbol
)
254 = std::lower_bound(s_aRTFMathControlWords
.begin(), s_aRTFMathControlWords
.end(), rSymbol
);
255 if (low
== s_aRTFMathControlWords
.end() || rSymbol
< *low
)
261 RTFError
RTFTokenizer::dispatchKeyword(OString
const& rKeyword
, bool bParam
, int nParam
)
263 if (m_rImport
.getDestination() == Destination::SKIP
)
265 // skip binary data explicitly, to not trip over rtf markup
266 // control characters
267 if (rKeyword
== "bin" && nParam
> 0)
268 Strm().SeekRel(nParam
);
271 SAL_INFO("writerfilter.rtf", __func__
<< ": keyword '\\" << rKeyword
<< "' with param? "
272 << (bParam
? 1 : 0) << " param val: '"
273 << (bParam
? nParam
: 0) << "'");
274 auto findIt
= s_aRTFControlWords
.find(rKeyword
);
275 if (findIt
== s_aRTFControlWords
.end())
277 SAL_INFO("writerfilter.rtf", __func__
<< ": unknown keyword '\\" << rKeyword
<< "'");
278 RTFSkipDestination
aSkip(m_rImport
);
279 aSkip
.setParsed(false);
284 RTFSymbol
const& rSymbol
= findIt
->second
;
285 switch (rSymbol
.GetControlType())
287 case RTFControlType::FLAG
:
288 // flags ignore any parameter by definition
289 ret
= m_rImport
.dispatchFlag(rSymbol
.GetIndex());
290 if (ret
!= RTFError::OK
)
293 case RTFControlType::DESTINATION
:
294 // same for destinations
295 ret
= m_rImport
.dispatchDestination(rSymbol
.GetIndex());
296 if (ret
!= RTFError::OK
)
299 case RTFControlType::SYMBOL
:
301 ret
= m_rImport
.dispatchSymbol(rSymbol
.GetIndex());
302 if (ret
!= RTFError::OK
)
305 case RTFControlType::TOGGLE
:
306 ret
= m_rImport
.dispatchToggle(rSymbol
.GetIndex(), bParam
, nParam
);
307 if (ret
!= RTFError::OK
)
310 case RTFControlType::VALUE
:
312 nParam
= rSymbol
.GetDefValue();
313 ret
= m_rImport
.dispatchValue(rSymbol
.GetIndex(), nParam
);
314 if (ret
!= RTFError::OK
)
322 OUString
RTFTokenizer::getPosition()
324 return OUString::number(m_nLineNumber
+ 1) + ","
325 + OUString::number(Strm().Tell() - m_nLineStartPos
+ 1);
328 } // namespace writerfilter::rtftok
330 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */