Avoid potential negative array index access to cached text.
[LibreOffice.git] / writerfilter / source / rtftok / rtfdispatchsymbol.cxx
blobb40fd55dde9b3ea37b7414684656abeaaf9de8f1
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include "rtfdocumentimpl.hxx"
12 #include <com/sun/star/io/WrongFormatException.hpp>
13 #include <svl/lngmisc.hxx>
15 #include <ooxml/resourceids.hxx>
17 #include <sal/log.hxx>
19 #include "rtfreferenceproperties.hxx"
20 #include "rtfskipdestination.hxx"
22 using namespace com::sun::star;
24 namespace writerfilter::rtftok
26 RTFError RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword)
28 setNeedSect(true);
29 if (nKeyword != RTFKeyword::HEXCHAR)
30 checkUnicode(/*bUnicode =*/true, /*bHex =*/true);
31 else
32 checkUnicode(/*bUnicode =*/true, /*bHex =*/false);
33 RTFSkipDestination aSkip(*this);
35 if (RTFKeyword::LINE == nKeyword)
37 // very special handling since text() will eat lone '\n'
38 singleChar('\n', /*bRunProps=*/true);
39 return RTFError::OK;
41 // Trivial symbols
42 sal_uInt8 cCh = 0;
43 switch (nKeyword)
45 case RTFKeyword::TAB:
46 cCh = '\t';
47 break;
48 case RTFKeyword::BACKSLASH:
49 cCh = '\\';
50 break;
51 case RTFKeyword::LBRACE:
52 cCh = '{';
53 break;
54 case RTFKeyword::RBRACE:
55 cCh = '}';
56 break;
57 case RTFKeyword::EMDASH:
58 cCh = 151;
59 break;
60 case RTFKeyword::ENDASH:
61 cCh = 150;
62 break;
63 case RTFKeyword::BULLET:
64 cCh = 149;
65 break;
66 case RTFKeyword::LQUOTE:
67 cCh = 145;
68 break;
69 case RTFKeyword::RQUOTE:
70 cCh = 146;
71 break;
72 case RTFKeyword::LDBLQUOTE:
73 cCh = 147;
74 break;
75 case RTFKeyword::RDBLQUOTE:
76 cCh = 148;
77 break;
78 default:
79 break;
81 if (cCh > 0)
83 OUString aStr(OStringToOUString(OStringChar(char(cCh)), RTL_TEXTENCODING_MS_1252));
84 text(aStr);
85 return RTFError::OK;
88 switch (nKeyword)
90 case RTFKeyword::IGNORE:
92 m_bSkipUnknown = true;
93 aSkip.setReset(false);
94 return RTFError::OK;
96 break;
97 case RTFKeyword::PAR:
99 if (m_aStates.top().getDestination() == Destination::FOOTNOTESEPARATOR)
100 break; // just ignore it - only thing we read in here is CHFTNSEP
101 checkFirstRun();
102 checkNeedPap();
103 runProps(); // tdf#152872 paragraph marker formatting
104 if (!m_aStates.top().getCurrentBuffer())
106 parBreak();
107 // Not in table? Reset max width.
108 if (m_nCellxMax)
110 // Was in table, but not anymore -> tblEnd.
111 RTFSprms aAttributes;
112 RTFSprms aSprms;
113 aSprms.set(NS_ooxml::LN_tblEnd, new RTFValue(1));
114 writerfilter::Reference<Properties>::Pointer_t pProperties
115 = new RTFReferenceProperties(std::move(aAttributes), std::move(aSprms));
116 Mapper().props(pProperties);
118 m_nCellxMax = 0;
120 else if (m_aStates.top().getDestination() != Destination::SHAPETEXT)
122 RTFValue::Pointer_t pValue;
123 m_aStates.top().getCurrentBuffer()->push_back(Buf_t(BUFFER_PAR, pValue, nullptr));
125 // but don't emit properties yet, since they may change till the first text token arrives
126 m_bNeedPap = true;
127 if (!m_aStates.top().getFrame().hasProperties())
128 m_bNeedPar = false;
129 m_bNeedFinalPar = false;
131 break;
132 case RTFKeyword::SECT:
134 m_bHadSect = true;
135 if (m_bIgnoreNextContSectBreak || m_aStates.top().getFrame().hasProperties())
137 // testContSectionPageBreak: need \par now
138 dispatchSymbol(RTFKeyword::PAR);
139 m_bIgnoreNextContSectBreak = false;
141 else
143 bool bPendingFloatingTable = false;
144 RTFValue::Pointer_t pTblpPr
145 = m_aStates.top().getTableRowSprms().find(NS_ooxml::LN_CT_TblPrBase_tblpPr);
146 if (pTblpPr)
148 // We have a pending floating table, provide an anchor for it still in this
149 // section.
150 bPendingFloatingTable = true;
153 if (m_bNeedCr || bPendingFloatingTable)
154 { // tdf#158586 don't dispatch \par here, it eats deferred page breaks
155 setNeedPar(true);
158 sectBreak();
159 if (m_nResetBreakOnSectBreak != RTFKeyword::invalid)
161 // this should run on _second_ \sect after \page
162 dispatchFlag(m_nResetBreakOnSectBreak); // lazy reset
163 m_nResetBreakOnSectBreak = RTFKeyword::invalid;
164 m_bNeedSect = false; // dispatchSymbol set it
166 setNeedPar(true); // testFdo52052: need \par at end of document
167 // testNestedTable: but not m_bNeedCr, that creates a page break
170 break;
171 case RTFKeyword::NOBREAK:
173 OUString aStr(SVT_HARD_SPACE);
174 text(aStr);
176 break;
177 case RTFKeyword::NOBRKHYPH:
179 OUString aStr(SVT_HARD_HYPHEN);
180 text(aStr);
182 break;
183 case RTFKeyword::OPTHYPH:
185 OUString aStr(SVT_SOFT_HYPHEN);
186 text(aStr);
188 break;
189 case RTFKeyword::HEXCHAR:
190 m_aStates.top().setInternalState(RTFInternalState::HEX);
191 break;
192 case RTFKeyword::CELL:
193 case RTFKeyword::NESTCELL:
195 checkFirstRun();
196 if (m_bNeedPap)
198 // There were no runs in the cell, so we need to send paragraph and character properties here.
199 auto pPValue = new RTFValue(m_aStates.top().getParagraphAttributes(),
200 m_aStates.top().getParagraphSprms());
201 bufferProperties(m_aTableBufferStack.back(), pPValue, nullptr);
202 auto pCValue = new RTFValue(m_aStates.top().getCharacterAttributes(),
203 m_aStates.top().getCharacterSprms());
204 bufferProperties(m_aTableBufferStack.back(), pCValue, nullptr);
207 RTFValue::Pointer_t pValue;
208 m_aTableBufferStack.back().emplace_back(Buf_t(BUFFER_CELLEND, pValue, nullptr));
209 m_bNeedPap = true;
211 break;
212 case RTFKeyword::NESTROW:
214 tools::SvRef<TableRowBuffer> const pBuffer(
215 new TableRowBuffer(m_aTableBufferStack.back(), m_aNestedTableCellsSprms,
216 m_aNestedTableCellsAttributes, m_nNestedCells));
217 prepareProperties(m_aStates.top(), pBuffer->GetParaProperties(),
218 pBuffer->GetFrameProperties(), pBuffer->GetRowProperties(),
219 m_nNestedCells, m_nNestedCurrentCellX - m_nNestedTRLeft);
221 if (m_aTableBufferStack.size() == 1 || !m_aStates.top().getCurrentBuffer())
223 throw io::WrongFormatException("mismatch between \\itap and number of \\nestrow",
224 nullptr);
226 assert(m_aStates.top().getCurrentBuffer() == &m_aTableBufferStack.back());
227 // note: there may be several states pointing to table buffer!
228 for (std::size_t i = 0; i < m_aStates.size(); ++i)
230 if (m_aStates[i].getCurrentBuffer() == &m_aTableBufferStack.back())
232 m_aStates[i].setCurrentBuffer(
233 &m_aTableBufferStack[m_aTableBufferStack.size() - 2]);
236 m_aTableBufferStack.pop_back();
237 m_aTableBufferStack.back().emplace_back(
238 Buf_t(BUFFER_NESTROW, RTFValue::Pointer_t(), pBuffer));
240 m_aNestedTableCellsSprms.clear();
241 m_aNestedTableCellsAttributes.clear();
242 m_nNestedCells = 0;
243 m_bNeedPap = true;
245 break;
246 case RTFKeyword::ROW:
248 if (m_aStates.top().getTableRowWidthAfter() > 0)
250 // Add fake cellx / cell, RTF equivalent of
251 // OOXMLFastContextHandlerTextTableRow::handleGridAfter().
252 auto pXValue = new RTFValue(m_aStates.top().getTableRowWidthAfter());
253 m_aStates.top().getTableRowSprms().set(NS_ooxml::LN_CT_TblGridBase_gridCol, pXValue,
254 RTFOverwrite::NO_APPEND);
255 dispatchSymbol(RTFKeyword::CELL);
257 // Adjust total width, which is done in the \cellx handler for normal cells.
258 m_nTopLevelCurrentCellX += m_aStates.top().getTableRowWidthAfter();
260 m_aStates.top().setTableRowWidthAfter(0);
263 bool bRestored = false;
264 // Ending a row, but no cells defined?
265 // See if there was an invalid table row reset, so we can restore cell infos to help invalid documents.
266 if (!m_nTopLevelCurrentCellX && m_nBackupTopLevelCurrentCellX)
268 restoreTableRowProperties();
269 bRestored = true;
272 // If the right edge of the last cell (row width) is smaller than the width of some other row, mimic WW8TabDesc::CalcDefaults(): resize the last cell
273 const int MINLAY = 23; // sw/inc/swtypes.hxx, minimal possible size of frames.
274 if ((m_nCellxMax - m_nTopLevelCurrentCellX) >= MINLAY)
276 auto pXValueLast = m_aStates.top().getTableRowSprms().find(
277 NS_ooxml::LN_CT_TblGridBase_gridCol, false);
278 const int nXValueLast = pXValueLast ? pXValueLast->getInt() : 0;
279 auto pXValue = new RTFValue(nXValueLast + m_nCellxMax - m_nTopLevelCurrentCellX);
280 m_aStates.top().getTableRowSprms().eraseLast(NS_ooxml::LN_CT_TblGridBase_gridCol);
281 m_aStates.top().getTableRowSprms().set(NS_ooxml::LN_CT_TblGridBase_gridCol, pXValue,
282 RTFOverwrite::NO_APPEND);
283 m_nTopLevelCurrentCellX = m_nCellxMax;
286 if (m_nTopLevelCells)
288 // Make a backup before we start popping elements
289 m_aTableInheritingCellsSprms = m_aTopLevelTableCellsSprms;
290 m_aTableInheritingCellsAttributes = m_aTopLevelTableCellsAttributes;
291 m_nInheritingCells = m_nTopLevelCells;
293 else
295 // No table definition? Then inherit from the previous row
296 m_aTopLevelTableCellsSprms = m_aTableInheritingCellsSprms;
297 m_aTopLevelTableCellsAttributes = m_aTableInheritingCellsAttributes;
298 m_nTopLevelCells = m_nInheritingCells;
301 while (m_aTableBufferStack.size() > 1)
303 SAL_WARN("writerfilter.rtf", "dropping extra table buffer");
304 // note: there may be several states pointing to table buffer!
305 for (std::size_t i = 0; i < m_aStates.size(); ++i)
307 if (m_aStates[i].getCurrentBuffer() == &m_aTableBufferStack.back())
309 m_aStates[i].setCurrentBuffer(&m_aTableBufferStack.front());
312 m_aTableBufferStack.pop_back();
315 replayRowBuffer(m_aTableBufferStack.back(), m_aTopLevelTableCellsSprms,
316 m_aTopLevelTableCellsAttributes, m_nTopLevelCells);
318 // The scope of the table cell defaults is one row.
319 m_aDefaultState.getTableCellSprms().clear();
320 m_aStates.top().getTableCellSprms() = m_aDefaultState.getTableCellSprms();
321 m_aStates.top().getTableCellAttributes() = m_aDefaultState.getTableCellAttributes();
323 writerfilter::Reference<Properties>::Pointer_t paraProperties;
324 writerfilter::Reference<Properties>::Pointer_t frameProperties;
325 writerfilter::Reference<Properties>::Pointer_t rowProperties;
326 prepareProperties(m_aStates.top(), paraProperties, frameProperties, rowProperties,
327 m_nTopLevelCells, m_nTopLevelCurrentCellX - m_nTopLevelTRLeft);
328 sendProperties(paraProperties, frameProperties, rowProperties);
330 m_bNeedPap = true;
331 m_bNeedFinalPar = true;
332 m_aTableBufferStack.back().clear();
333 m_nTopLevelCells = 0;
335 if (bRestored)
336 // We restored cell definitions, clear these now.
337 // This is necessary, as later cell definitions want to overwrite the restored ones.
338 resetTableRowProperties();
340 break;
341 case RTFKeyword::COLUMN:
343 bool bColumns = false; // If we have multiple columns
344 RTFValue::Pointer_t pCols
345 = m_aStates.top().getSectionSprms().find(NS_ooxml::LN_EG_SectPrContents_cols);
346 if (pCols)
348 RTFValue::Pointer_t pNum = pCols->getAttributes().find(NS_ooxml::LN_CT_Columns_num);
349 if (pNum && pNum->getInt() > 1)
350 bColumns = true;
352 checkFirstRun();
353 if (bColumns)
355 sal_uInt8 const sBreak[] = { 0xe };
356 Mapper().startCharacterGroup();
357 Mapper().text(sBreak, 1);
358 Mapper().endCharacterGroup();
360 else
361 dispatchSymbol(RTFKeyword::PAGE);
363 break;
364 case RTFKeyword::CHFTN:
366 if (m_aStates.top().getCurrentBuffer() == &m_aSuperBuffer)
367 // Stop buffering, there will be no custom mark for this footnote or endnote.
368 m_aStates.top().setCurrentBuffer(nullptr);
369 break;
371 case RTFKeyword::PAGE:
373 // Ignore page breaks inside tables.
374 if (m_aStates.top().getCurrentBuffer() == &m_aTableBufferStack.back())
375 break;
377 // If we're inside a continuous section, we should send a section break, not a page one.
378 RTFValue::Pointer_t pBreak
379 = m_aStates.top().getSectionSprms().find(NS_ooxml::LN_EG_SectPrContents_type);
380 // Unless we're on a title page.
381 RTFValue::Pointer_t pTitlePg
382 = m_aStates.top().getSectionSprms().find(NS_ooxml::LN_EG_SectPrContents_titlePg);
383 if (((pBreak
384 && pBreak->getInt()
385 == static_cast<sal_Int32>(NS_ooxml::LN_Value_ST_SectionMark_continuous)
386 && m_bHadSect) // tdf#158983 before first \sect, ignore \sbknone!
387 || m_nResetBreakOnSectBreak == RTFKeyword::SBKNONE)
388 && !(pTitlePg && pTitlePg->getInt()))
390 if (m_bWasInFrame)
392 dispatchSymbol(RTFKeyword::PAR);
393 m_bWasInFrame = false;
395 sectBreak();
396 // note: this will not affect the following section break
397 // but the one just pushed
398 dispatchFlag(RTFKeyword::SBKPAGE);
399 if (m_bNeedPar)
400 dispatchSymbol(RTFKeyword::PAR);
401 m_bIgnoreNextContSectBreak = true;
402 // arrange to clean up the synthetic RTFKeyword::SBKPAGE
403 m_nResetBreakOnSectBreak = RTFKeyword::SBKNONE;
405 else
407 bool bFirstRun = m_bFirstRun;
408 checkFirstRun();
409 if (bFirstRun || m_bNeedCr)
411 // Only send the paragraph properties early if we'll create a new paragraph in a
412 // bit anyway.
413 checkNeedPap();
414 // flush previously deferred break - needed for testFdo49893_2
415 // which has consecutive \page with no text between
416 sal_Unicode const nothing[] = { 0 /*MSVC doesn't allow it to be empty*/ };
417 Mapper().utext(nothing, 0);
419 sal_uInt8 const sBreak[] = { 0xc };
420 Mapper().text(sBreak, 1);
421 // testFdo81892 don't do another \par break directly; because of
422 // GetSplitPgBreakAndParaMark() it does finishParagraph *twice*
423 m_bNeedCr = true;
426 break;
427 case RTFKeyword::CHPGN:
429 OUString aStr("PAGE");
430 singleChar(cFieldStart);
431 text(aStr);
432 singleChar(cFieldSep, true);
433 singleChar(cFieldEnd);
435 break;
436 case RTFKeyword::CHFTNSEP:
438 static const sal_Unicode uFtnEdnSep = 0x3;
439 Mapper().utext(&uFtnEdnSep, 1);
441 break;
442 default:
444 SAL_INFO("writerfilter.rtf",
445 "TODO handle symbol '" << keywordToString(nKeyword) << "'");
446 aSkip.setParsed(false);
448 break;
450 return RTFError::OK;
453 } // namespace writerfilter::rtftok
455 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */