1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include "rtftokenizer.hxx"
11 #include <tools/stream.hxx>
12 #include <svx/dialmgr.hxx>
13 #include <svx/strings.hrc>
14 #include <rtl/strbuf.hxx>
15 #include <rtl/ustrbuf.hxx>
16 #include <rtl/character.hxx>
17 #include <sal/log.hxx>
18 #include "rtfskipdestination.hxx"
19 #include <com/sun/star/io/BufferSizeExceededException.hpp>
20 #include <com/sun/star/task/XStatusIndicator.hpp>
21 #include <osl/diagnose.h>
22 #include <filter/msfilter/rtfutil.hxx>
24 using namespace com::sun::star
;
26 namespace writerfilter
30 std::vector
<RTFSymbol
> RTFTokenizer::s_aRTFControlWords
;
31 bool RTFTokenizer::s_bControlWordsSorted
;
32 std::vector
<RTFMathSymbol
> RTFTokenizer::s_aRTFMathControlWords
;
33 bool RTFTokenizer::s_bMathControlWordsSorted
;
35 RTFTokenizer::RTFTokenizer(RTFListener
& rImport
, SvStream
* pInStream
,
36 uno::Reference
<task::XStatusIndicator
> const& xStatusIndicator
)
38 , m_pInStream(pInStream
)
39 , m_xStatusIndicator(xStatusIndicator
)
45 if (!RTFTokenizer::s_bControlWordsSorted
)
47 RTFTokenizer::s_bControlWordsSorted
= true;
49 = std::vector
<RTFSymbol
>(aRTFControlWords
, aRTFControlWords
+ nRTFControlWords
);
50 std::sort(s_aRTFControlWords
.begin(), s_aRTFControlWords
.end());
52 if (!RTFTokenizer::s_bMathControlWordsSorted
)
54 RTFTokenizer::s_bMathControlWordsSorted
= true;
55 s_aRTFMathControlWords
= std::vector
<RTFMathSymbol
>(
56 aRTFMathControlWords
, aRTFMathControlWords
+ nRTFMathControlWords
);
57 std::sort(s_aRTFMathControlWords
.begin(), s_aRTFMathControlWords
.end());
61 RTFTokenizer::~RTFTokenizer() = default;
63 RTFError
RTFTokenizer::resolveParse()
65 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC
);
70 std::size_t nPercentSize
= 0;
71 sal_uInt64 nLastPos
= 0;
73 if (m_xStatusIndicator
.is())
75 OUString
sDocLoad(SvxResId(RID_SVXSTR_DOC_LOAD
));
77 sal_uInt64
const nCurrentPos
= Strm().Tell();
78 sal_uInt64
const nEndPos
= nCurrentPos
+ Strm().remainingSize();
79 m_xStatusIndicator
->start(sDocLoad
, nEndPos
);
80 nPercentSize
= nEndPos
/ 100;
82 nLastPos
= nCurrentPos
;
83 m_xStatusIndicator
->setValue(nLastPos
);
86 while (Strm().ReadChar(ch
), !Strm().eof())
88 //SAL_INFO("writerfilter", OSL_THIS_FUNC << ": parsing character '" << ch << "'");
90 sal_uInt64
const nCurrentPos
= Strm().Tell();
91 if (m_xStatusIndicator
.is() && nCurrentPos
> (nLastPos
+ nPercentSize
))
93 nLastPos
= nCurrentPos
;
94 m_xStatusIndicator
->setValue(nLastPos
);
98 return RTFError::GROUP_UNDER
;
99 if (m_nGroup
> 0 && m_rImport
.getInternalState() == RTFInternalState::BIN
)
101 ret
= m_rImport
.resolveChars(ch
);
102 if (ret
!= RTFError::OK
)
110 m_nGroupStart
= Strm().Tell() - 1;
111 ret
= m_rImport
.pushState();
112 if (ret
!= RTFError::OK
)
116 ret
= m_rImport
.popState();
117 if (ret
!= RTFError::OK
)
121 if (m_rImport
.isSubstream())
122 m_rImport
.finishSubstream();
127 ret
= resolveKeyword();
128 if (ret
!= RTFError::OK
)
132 break; // ignore this
135 m_nLineStartPos
= nCurrentPos
;
139 return RTFError::CHAR_OVER
;
140 if (m_rImport
.getInternalState() == RTFInternalState::NORMAL
)
142 ret
= m_rImport
.resolveChars(ch
);
143 if (ret
!= RTFError::OK
)
148 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC
<< ": hex internal state");
150 sal_Int8 parsed
= msfilter::rtfutil::AsHex(ch
);
152 return RTFError::HEX_INVALID
;
157 ret
= m_rImport
.resolveChars(b
);
158 if (ret
!= RTFError::OK
)
162 m_rImport
.setInternalState(RTFInternalState::NORMAL
);
171 return RTFError::GROUP_UNDER
;
173 return RTFError::GROUP_OVER
;
177 void RTFTokenizer::pushGroup() { m_nGroup
++; }
179 void RTFTokenizer::popGroup() { m_nGroup
--; }
181 RTFError
RTFTokenizer::resolveKeyword()
191 return RTFError::UNEXPECTED_EOF
;
193 if (!rtl::isAsciiAlpha(static_cast<unsigned char>(ch
)))
196 OString aKeyword
= aBuf
.makeStringAndClear();
197 // control symbols aren't followed by a space, so we can return here
198 // without doing any SeekRel()
199 return dispatchKeyword(aKeyword
, bParam
, nParam
);
201 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch
)))
211 if (aBuf
.getLength() > 32)
212 // See RTF spec v1.9.1, page 7
213 // A control word's name cannot be longer than 32 letters.
214 throw io::BufferSizeExceededException();
218 // in case we'll have a parameter, that will be negative
222 return RTFError::UNEXPECTED_EOF
;
224 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch
)))
226 OStringBuffer aParameter
;
228 // we have a parameter
230 while (rtl::isAsciiDigit(static_cast<unsigned char>(ch
)))
232 aParameter
.append(ch
);
240 nParam
= aParameter
.makeStringAndClear().toInt32();
246 OString aKeyword
= aBuf
.makeStringAndClear();
247 return dispatchKeyword(aKeyword
, bParam
, nParam
);
250 bool RTFTokenizer::lookupMathKeyword(RTFMathSymbol
& rSymbol
)
253 = std::lower_bound(s_aRTFMathControlWords
.begin(), s_aRTFMathControlWords
.end(), rSymbol
);
254 if (low
== s_aRTFMathControlWords
.end() || rSymbol
< *low
)
260 RTFError
RTFTokenizer::dispatchKeyword(OString
const& rKeyword
, bool bParam
, int nParam
)
262 if (m_rImport
.getDestination() == Destination::SKIP
)
264 // skip binary data explicitly, to not trip over rtf markup
265 // control characters
266 if (rKeyword
== "bin" && nParam
> 0)
267 Strm().SeekRel(nParam
);
270 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC
<< ": keyword '\\" << rKeyword
<< "' with param? "
271 << (bParam
? 1 : 0) << " param val: '"
272 << (bParam
? nParam
: 0) << "'");
274 aSymbol
.sKeyword
= rKeyword
.getStr();
275 auto low
= std::lower_bound(s_aRTFControlWords
.begin(), s_aRTFControlWords
.end(), aSymbol
);
276 int i
= low
- s_aRTFControlWords
.begin();
277 if (low
== s_aRTFControlWords
.end() || aSymbol
< *low
)
279 SAL_INFO("writerfilter.rtf", OSL_THIS_FUNC
<< ": unknown keyword '\\" << rKeyword
<< "'");
280 RTFSkipDestination
aSkip(m_rImport
);
281 aSkip
.setParsed(false);
286 switch (s_aRTFControlWords
[i
].nControlType
)
289 // flags ignore any parameter by definition
290 ret
= m_rImport
.dispatchFlag(s_aRTFControlWords
[i
].nIndex
);
291 if (ret
!= RTFError::OK
)
294 case CONTROL_DESTINATION
:
295 // same for destinations
296 ret
= m_rImport
.dispatchDestination(s_aRTFControlWords
[i
].nIndex
);
297 if (ret
!= RTFError::OK
)
302 ret
= m_rImport
.dispatchSymbol(s_aRTFControlWords
[i
].nIndex
);
303 if (ret
!= RTFError::OK
)
307 ret
= m_rImport
.dispatchToggle(s_aRTFControlWords
[i
].nIndex
, bParam
, nParam
);
308 if (ret
!= RTFError::OK
)
313 nParam
= s_aRTFControlWords
[i
].nDefValue
;
314 ret
= m_rImport
.dispatchValue(s_aRTFControlWords
[i
].nIndex
, nParam
);
315 if (ret
!= RTFError::OK
)
323 OUString
RTFTokenizer::getPosition()
326 aRet
.append(m_nLineNumber
+ 1);
328 aRet
.append(sal_Int32(Strm().Tell() - m_nLineStartPos
+ 1));
329 return aRet
.makeStringAndClear();
332 } // namespace rtftok
333 } // namespace writerfilter
335 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */