1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include "rtftokenizer.hxx"
11 #include <tools/stream.hxx>
12 #include <svx/dialmgr.hxx>
13 #include <svx/strings.hrc>
14 #include <rtl/strbuf.hxx>
15 #include <rtl/ustrbuf.hxx>
16 #include <rtl/character.hxx>
17 #include <sal/log.hxx>
18 #include "rtfskipdestination.hxx"
19 #include <com/sun/star/io/BufferSizeExceededException.hpp>
20 #include <com/sun/star/task/XStatusIndicator.hpp>
21 #include <osl/diagnose.h>
22 #include <filter/msfilter/rtfutil.hxx>
24 using namespace com::sun::star
;
26 namespace writerfilter
30 std::unordered_map
<OString
, RTFSymbol
> RTFTokenizer::s_aRTFControlWords
;
31 bool RTFTokenizer::s_bControlWordsInitialised
;
32 std::vector
<RTFMathSymbol
> RTFTokenizer::s_aRTFMathControlWords
;
33 bool RTFTokenizer::s_bMathControlWordsSorted
;
35 RTFTokenizer::RTFTokenizer(RTFListener
& rImport
, SvStream
* pInStream
,
36 uno::Reference
<task::XStatusIndicator
> const& xStatusIndicator
)
38 , m_pInStream(pInStream
)
39 , m_xStatusIndicator(xStatusIndicator
)
45 if (!RTFTokenizer::s_bControlWordsInitialised
)
47 RTFTokenizer::s_bControlWordsInitialised
= true;
48 for (int i
= 0; i
< nRTFControlWords
; ++i
)
49 s_aRTFControlWords
.emplace(OString(aRTFControlWords
[i
].GetKeyword()),
52 if (!RTFTokenizer::s_bMathControlWordsSorted
)
54 RTFTokenizer::s_bMathControlWordsSorted
= true;
55 s_aRTFMathControlWords
= std::vector
<RTFMathSymbol
>(
56 aRTFMathControlWords
, aRTFMathControlWords
+ nRTFMathControlWords
);
57 std::sort(s_aRTFMathControlWords
.begin(), s_aRTFMathControlWords
.end());
61 RTFTokenizer::~RTFTokenizer() = default;
63 RTFError
RTFTokenizer::resolveParse()
65 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC
);
71 std::size_t nPercentSize
= 0;
72 sal_uInt64 nLastPos
= 0;
74 if (m_xStatusIndicator
.is())
76 OUString
sDocLoad(SvxResId(RID_SVXSTR_DOC_LOAD
));
78 sal_uInt64
const nCurrentPos
= Strm().Tell();
79 sal_uInt64
const nEndPos
= nCurrentPos
+ Strm().remainingSize();
80 m_xStatusIndicator
->start(sDocLoad
, nEndPos
);
81 nPercentSize
= nEndPos
/ 100;
83 nLastPos
= nCurrentPos
;
84 m_xStatusIndicator
->setValue(nLastPos
);
87 while (Strm().ReadChar(ch
), !Strm().eof())
89 //SAL_INFO("writerfilter", OSL_THIS_FUNC << ": parsing character '" << ch << "'");
91 sal_uInt64
const nCurrentPos
= Strm().Tell();
92 if (m_xStatusIndicator
.is() && nCurrentPos
> (nLastPos
+ nPercentSize
))
94 nLastPos
= nCurrentPos
;
95 m_xStatusIndicator
->setValue(nLastPos
);
99 return RTFError::GROUP_UNDER
;
100 if (m_nGroup
> 0 && m_rImport
.getInternalState() == RTFInternalState::BIN
)
102 ret
= m_rImport
.resolveChars(ch
);
103 if (ret
!= RTFError::OK
)
111 m_nGroupStart
= Strm().Tell() - 1;
112 ret
= m_rImport
.pushState();
113 if (ret
!= RTFError::OK
)
117 ret
= m_rImport
.popState();
118 if (ret
!= RTFError::OK
)
122 if (m_rImport
.isSubstream())
123 m_rImport
.finishSubstream();
128 ret
= resolveKeyword();
129 if (ret
!= RTFError::OK
)
133 break; // ignore this
136 m_nLineStartPos
= nCurrentPos
;
140 return RTFError::CHAR_OVER
;
141 if (m_rImport
.getInternalState() == RTFInternalState::NORMAL
)
143 ret
= m_rImport
.resolveChars(ch
);
144 if (ret
!= RTFError::OK
)
149 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC
<< ": hex internal state");
151 sal_Int8 parsed
= msfilter::rtfutil::AsHex(ch
);
153 return RTFError::HEX_INVALID
;
158 ret
= m_rImport
.resolveChars(b
);
159 if (ret
!= RTFError::OK
)
163 m_rImport
.setInternalState(RTFInternalState::NORMAL
);
172 return RTFError::GROUP_UNDER
;
174 return RTFError::GROUP_OVER
;
178 void RTFTokenizer::pushGroup() { m_nGroup
++; }
180 void RTFTokenizer::popGroup() { m_nGroup
--; }
182 RTFError
RTFTokenizer::resolveKeyword()
185 OStringBuffer
aBuf(32);
192 return RTFError::UNEXPECTED_EOF
;
194 if (!rtl::isAsciiAlpha(static_cast<unsigned char>(ch
)))
197 OString aKeyword
= aBuf
.makeStringAndClear();
198 // control symbols aren't followed by a space, so we can return here
199 // without doing any SeekRel()
200 return dispatchKeyword(aKeyword
, bParam
, nParam
);
202 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch
)))
205 if (aBuf
.getLength() > 32)
206 // See RTF spec v1.9.1, page 7
207 // A control word's name cannot be longer than 32 letters.
208 throw io::BufferSizeExceededException();
219 // in case we'll have a parameter, that will be negative
223 return RTFError::UNEXPECTED_EOF
;
225 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch
)))
227 OStringBuffer aParameter
;
229 // we have a parameter
231 while (rtl::isAsciiDigit(static_cast<unsigned char>(ch
)))
233 aParameter
.append(ch
);
241 nParam
= aParameter
.makeStringAndClear().toInt32();
247 OString aKeyword
= aBuf
.makeStringAndClear();
248 return dispatchKeyword(aKeyword
, bParam
, nParam
);
251 bool RTFTokenizer::lookupMathKeyword(RTFMathSymbol
& rSymbol
)
254 = std::lower_bound(s_aRTFMathControlWords
.begin(), s_aRTFMathControlWords
.end(), rSymbol
);
255 if (low
== s_aRTFMathControlWords
.end() || rSymbol
< *low
)
261 RTFError
RTFTokenizer::dispatchKeyword(OString
const& rKeyword
, bool bParam
, int nParam
)
263 if (m_rImport
.getDestination() == Destination::SKIP
)
265 // skip binary data explicitly, to not trip over rtf markup
266 // control characters
267 if (rKeyword
== "bin" && nParam
> 0)
268 Strm().SeekRel(nParam
);
271 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC
<< ": keyword '\\" << rKeyword
<< "' with param? "
272 << (bParam
? 1 : 0) << " param val: '"
273 << (bParam
? nParam
: 0) << "'");
274 auto findIt
= s_aRTFControlWords
.find(rKeyword
);
275 if (findIt
== s_aRTFControlWords
.end())
277 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC
<< ": unknown keyword '\\" << rKeyword
<< "'");
278 RTFSkipDestination
aSkip(m_rImport
);
279 aSkip
.setParsed(false);
284 RTFSymbol
const& rSymbol
= findIt
->second
;
285 switch (rSymbol
.GetControlType())
288 // flags ignore any parameter by definition
289 ret
= m_rImport
.dispatchFlag(rSymbol
.GetIndex());
290 if (ret
!= RTFError::OK
)
293 case CONTROL_DESTINATION
:
294 // same for destinations
295 ret
= m_rImport
.dispatchDestination(rSymbol
.GetIndex());
296 if (ret
!= RTFError::OK
)
301 ret
= m_rImport
.dispatchSymbol(rSymbol
.GetIndex());
302 if (ret
!= RTFError::OK
)
306 ret
= m_rImport
.dispatchToggle(rSymbol
.GetIndex(), bParam
, nParam
);
307 if (ret
!= RTFError::OK
)
312 nParam
= rSymbol
.GetDefValue();
313 ret
= m_rImport
.dispatchValue(rSymbol
.GetIndex(), nParam
);
314 if (ret
!= RTFError::OK
)
322 OUString
RTFTokenizer::getPosition()
325 aRet
.append(m_nLineNumber
+ 1);
327 aRet
.append(sal_Int32(Strm().Tell() - m_nLineStartPos
+ 1));
328 return aRet
.makeStringAndClear();
331 } // namespace rtftok
332 } // namespace writerfilter
334 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */