tdf#130857 qt weld: Implement QtInstanceWidget::strip_mnemonic
[LibreOffice.git] / sc / source / ui / docshell / impex.cxx
blobb7a27b5b67116391ba4a3e1b8ec6972f1914b195
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <comphelper/processfactory.hxx>
21 #include <i18nlangtag/languagetag.hxx>
22 #include <i18nutil/unicode.hxx>
23 #include <sot/formats.hxx>
24 #include <sfx2/mieclip.hxx>
25 #include <com/sun/star/i18n/CalendarFieldIndex.hpp>
26 #include <sal/log.hxx>
27 #include <unotools/charclass.hxx>
28 #include <osl/module.hxx>
29 #include <o3tl/string_view.hxx>
31 #include <global.hxx>
32 #include <docsh.hxx>
33 #include <undoblk.hxx>
34 #include <rangenam.hxx>
35 #include <tabvwsh.hxx>
36 #include <filter.hxx>
37 #include <asciiopt.hxx>
38 #include <formulacell.hxx>
39 #include <cellform.hxx>
40 #include <progress.hxx>
41 #include <scitems.hxx>
42 #include <editable.hxx>
43 #include <compiler.hxx>
44 #include <warnbox.hxx>
45 #include <clipparam.hxx>
46 #include <impex.hxx>
47 #include <editutil.hxx>
48 #include <patattr.hxx>
49 #include <docpool.hxx>
50 #include <stringutil.hxx>
51 #include <cellvalue.hxx>
52 #include <tokenarray.hxx>
53 #include <documentimport.hxx>
54 #include <refundo.hxx>
55 #include <mtvelements.hxx>
57 #include <globstr.hrc>
58 #include <scresid.hxx>
59 #include <o3tl/safeint.hxx>
60 #include <tools/svlibrary.h>
61 #include <comphelper/configuration.hxx>
62 #include <vcl/svapp.hxx>
63 #include <vcl/weld.hxx>
64 #include <editeng/editobj.hxx>
65 #include <svl/numformat.hxx>
66 #include <rtl/character.hxx>
67 #include <rtl/math.hxx>
68 #include <sax/tools/converter.hxx>
70 #include <memory>
71 #include <string_view>
73 #include <unicode/uchar.h>
75 #include <osl/endian.h>
76 #include <osl/file.hxx>
78 // We don't want to end up with 2GB read in one line just because of malformed
79 // multiline fields, so chop it _somewhere_, which is twice supported columns
80 // times arbitrary maximum cell content length, 2*1024*64K=128M, and because
81 // it's sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of
82 // luck anyway.
83 constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16;
84 constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit;
86 namespace
88 const char SYLK_LF[] = "\x1b :";
91 namespace {
93 enum class SylkVersion
95 SCALC3, // Wrote wrongly quoted strings and unescaped semicolons.
96 OOO32, // Correct strings, plus multiline content.
97 OWN, // Place our new versions, if any, before this value.
98 OTHER // Assume that aliens wrote correct strings.
103 // Whole document without Undo
104 ScImportExport::ScImportExport( ScDocument& r )
105 : pDocSh( r.GetDocumentShell() ), rDoc( r ),
106 nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
107 cSep( '\t' ), cStr( '"' ),
108 bFormulas( false ), bIncludeFiltered( true ),
109 bAll( true ), bSingle( true ), bUndo( false ),
110 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
111 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
113 pUndoDoc = nullptr;
114 pExtOptions = nullptr;
117 // Insert am current cell without range(es)
118 ScImportExport::ScImportExport( ScDocument& r, const ScAddress& rPt )
119 : pDocSh( r.GetDocumentShell() ), rDoc( r ),
120 aRange( rPt ),
121 nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
122 cSep( '\t' ), cStr( '"' ),
123 bFormulas( false ), bIncludeFiltered( true ),
124 bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
125 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
126 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
128 pUndoDoc = nullptr;
129 pExtOptions = nullptr;
132 // ctor with a range is only used for export
133 //! ctor with a string (and bSingle=true) is also used for DdeSetData
134 ScImportExport::ScImportExport( ScDocument& r, const ScRange& rRange )
135 : pDocSh( r.GetDocumentShell() ), rDoc( r ),
136 aRange( rRange ),
137 nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
138 cSep( '\t' ), cStr( '"' ),
139 bFormulas( false ), bIncludeFiltered( true ),
140 bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ),
141 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
142 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
144 pUndoDoc = nullptr;
145 pExtOptions = nullptr;
146 // Only one sheet (table) supported
147 aRange.aEnd.SetTab( aRange.aStart.Tab() );
150 // Evaluate input string - either range, cell or the whole document (when error)
151 // If a View exists, the TabNo of the view will be used.
152 ScImportExport::ScImportExport( ScDocument& r, const OUString& rPos )
153 : pDocSh( r.GetDocumentShell() ), rDoc( r ),
154 nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
155 cSep( '\t' ), cStr( '"' ),
156 bFormulas( false ), bIncludeFiltered( true ),
157 bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
158 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
159 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
161 pUndoDoc = nullptr;
162 pExtOptions = nullptr;
164 SCTAB nTab = ScDocShell::GetCurTab();
165 aRange.aStart.SetTab( nTab );
166 OUString aPos( rPos );
167 // Named range?
168 ScRangeName* pRange = rDoc.GetRangeName();
169 if (pRange)
171 const ScRangeData* pData = pRange->findByUpperName(ScGlobal::getCharClass().uppercase(aPos));
172 if (pData)
174 if( pData->HasType( ScRangeData::Type::RefArea )
175 || pData->HasType( ScRangeData::Type::AbsArea )
176 || pData->HasType( ScRangeData::Type::AbsPos ) )
178 aPos = pData->GetSymbol();
182 formula::FormulaGrammar::AddressConvention eConv = rDoc.GetAddressConvention();
183 // Range?
184 if (aRange.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
185 bSingle = false;
186 // Cell?
187 else if (aRange.aStart.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
188 aRange.aEnd = aRange.aStart;
189 else
190 bAll = true;
193 ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE
195 pUndoDoc.reset();
196 pExtOptions.reset();
199 void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt )
201 if ( pExtOptions )
202 *pExtOptions = rOpt;
203 else
204 pExtOptions.reset(new ScAsciiOptions( rOpt ));
206 // "normal" Options
208 cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false);
209 cStr = rOpt.GetTextSep();
212 void ScImportExport::SetFilterOptions(const OUString& rFilterOptions)
214 maFilterOptions = rFilterOptions;
217 bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat )
219 return nFormat == SotClipboardFormatId::STRING
220 || nFormat == SotClipboardFormatId::STRING_TSVC
221 || nFormat == SotClipboardFormatId::SYLK
222 || nFormat == SotClipboardFormatId::LINK
223 || nFormat == SotClipboardFormatId::HTML
224 || nFormat == SotClipboardFormatId::HTML_SIMPLE
225 || nFormat == SotClipboardFormatId::DIF;
228 // Prepare for Undo
229 bool ScImportExport::StartPaste()
231 if ( !bAll )
233 ScEditableTester aTester( rDoc, aRange, sc::EditAction::Unknown );
234 if ( !aTester.IsEditable() )
236 std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(ScDocShell::GetActiveDialogParent(),
237 VclMessageType::Info, VclButtonsType::Ok,
238 ScResId(aTester.GetMessageId())));
239 xInfoBox->run();
240 return false;
243 if( bUndo && pDocSh && rDoc.IsUndoEnabled())
245 pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO ));
246 pUndoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
247 rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc);
249 return true;
252 // Create Undo/Redo actions, Invalidate/Repaint
253 void ScImportExport::EndPaste(bool bAutoRowHeight)
255 bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight(
256 aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() );
258 if( pUndoDoc && rDoc.IsUndoEnabled() && pDocSh )
260 ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO ));
261 pRedoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
262 rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc);
263 ScMarkData aDestMark(pRedoDoc->GetSheetLimits());
264 aDestMark.SetMarkArea(aRange);
265 pDocSh->GetUndoManager()->AddUndoAction(
266 std::make_unique<ScUndoPaste>(pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr));
268 pUndoDoc.reset();
269 if( pDocSh )
271 if (!bHeight)
272 pDocSh->PostPaint( aRange, PaintPartFlags::Grid );
273 pDocSh->SetDocumentModified();
275 ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell();
276 if ( pViewSh )
277 pViewSh->UpdateInputHandler();
281 bool ScImportExport::ExportData( std::u16string_view rMimeType,
282 css::uno::Any & rValue )
284 SvMemoryStream aStrm;
285 SotClipboardFormatId fmtId = SotExchange::GetFormatIdFromMimeType(rMimeType);
286 if (fmtId == SotClipboardFormatId::STRING)
287 aStrm.SetStreamCharSet(RTL_TEXTENCODING_UNICODE);
288 // mba: no BaseURL for data exchange
289 if (ExportStream(aStrm, OUString(), fmtId))
291 if (fmtId == SotClipboardFormatId::STRING)
293 assert(aStrm.TellEnd() % sizeof(sal_Unicode) == 0);
294 rValue <<= OUString(static_cast<const sal_Unicode*>(aStrm.GetData()),
295 aStrm.TellEnd() / sizeof(sal_Unicode));
297 else
299 aStrm.WriteUChar(0);
300 rValue <<= css::uno::Sequence<sal_Int8>(static_cast<sal_Int8 const*>(aStrm.GetData()),
301 aStrm.TellEnd());
303 return true;
305 return false;
308 bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt )
310 switch ( nFmt )
312 // formats supporting unicode
313 case SotClipboardFormatId::STRING :
314 case SotClipboardFormatId::STRING_TSVC :
316 ScImportStringStream aStrm( rText);
317 return ImportStream( aStrm, OUString(), nFmt );
318 // ImportStream must handle RTL_TEXTENCODING_UNICODE
320 default:
322 rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
323 OString aTmp( rText.getStr(), rText.getLength(), eEnc );
324 SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(char), StreamMode::READ );
325 aStrm.SetStreamCharSet( eEnc );
326 SetNoEndianSwap( aStrm ); //! no swapping in memory
327 return ImportStream( aStrm, OUString(), nFmt );
332 bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt )
334 if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC )
336 SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]");
337 rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
338 OString aTmp;
339 bool bOk = ExportByteString( aTmp, eEnc, nFmt );
340 rText = OStringToOUString( aTmp, eEnc );
341 return bOk;
343 // nSizeLimit not needed for OUString
345 SvMemoryStream aStrm;
346 aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
347 SetNoEndianSwap( aStrm ); //! no swapping in memory
348 // mba: no BaseURL for data exc
349 if( ExportStream( aStrm, OUString(), nFmt ) )
351 aStrm.WriteUInt16( 0 );
352 rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) );
353 return true;
355 rText.clear();
356 return false;
358 // ExportStream must handle RTL_TEXTENCODING_UNICODE
361 bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt )
363 OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" );
364 if ( eEnc == RTL_TEXTENCODING_UNICODE )
365 eEnc = osl_getThreadTextEncoding();
367 if (!nSizeLimit)
368 nSizeLimit = SAL_MAX_UINT16;
370 SvMemoryStream aStrm;
371 aStrm.SetStreamCharSet( eEnc );
372 SetNoEndianSwap( aStrm ); //! no swapping in memory
373 // mba: no BaseURL for data exchange
374 if( ExportStream( aStrm, OUString(), nFmt ) )
376 aStrm.WriteChar( 0 );
377 if( aStrm.TellEnd() <= nSizeLimit )
379 rText = static_cast<const char*>(aStrm.GetData());
380 return true;
383 rText.clear();
384 return false;
387 bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
389 if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
391 if( ExtText2Doc( rStrm ) ) // evaluate pExtOptions
392 return true;
394 if( nFmt == SotClipboardFormatId::SYLK )
396 if( Sylk2Doc( rStrm ) )
397 return true;
399 if( nFmt == SotClipboardFormatId::DIF )
401 if( Dif2Doc( rStrm ) )
402 return true;
404 if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
406 if( RTF2Doc( rStrm, rBaseURL ) )
407 return true;
409 if( nFmt == SotClipboardFormatId::LINK )
410 return true; // Link-Import?
411 if ( nFmt == SotClipboardFormatId::HTML )
413 if( HTML2Doc( rStrm, rBaseURL ) )
414 return true;
416 if ( nFmt == SotClipboardFormatId::HTML_SIMPLE )
418 MSE40HTMLClipFormatObj aMSE40ClpObj; // needed to skip the header data
419 SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm );
420 if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) )
421 return true;
424 return false;
427 bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
429 if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
431 if( Doc2Text( rStrm ) )
432 return true;
434 if( nFmt == SotClipboardFormatId::SYLK )
436 if( Doc2Sylk( rStrm ) )
437 return true;
439 if( nFmt == SotClipboardFormatId::DIF )
441 if( Doc2Dif( rStrm ) )
442 return true;
444 if( nFmt == SotClipboardFormatId::LINK && !bAll )
446 OUString aDocName;
447 if ( rDoc.IsClipboard() )
448 aDocName = ScGlobal::GetClipDocName();
449 else
451 ScDocShell* pShell = rDoc.GetDocumentShell();
452 if (pShell)
453 aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME );
456 OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" );
457 if( !aDocName.isEmpty() )
459 // Always use Calc A1 syntax for paste link.
460 OUString aRefName;
461 ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D;
462 if( bSingle )
463 aRefName = aRange.aStart.Format(nFlags, &rDoc, formula::FormulaGrammar::CONV_OOO);
464 else
466 if( aRange.aStart.Tab() != aRange.aEnd.Tab() )
467 nFlags |= ScRefFlags::TAB2_3D;
468 aRefName = aRange.Format(rDoc, nFlags, formula::FormulaGrammar::CONV_OOO);
470 OUString aAppName = Application::GetAppName();
472 // extra bits are used to tell the client to prefer external
473 // reference link.
474 return TransferableDataHelper::WriteDDELink(rStrm, aAppName, aDocName, aRefName,
475 u"calc:extref");
478 if( nFmt == SotClipboardFormatId::HTML )
480 if( Doc2HTML( rStrm, rBaseURL ) )
481 return true;
483 if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
485 if( Doc2RTF( rStrm ) )
486 return true;
489 return false;
492 // tdf#104927
493 // http://www.unicode.org/reports/tr11/
494 sal_Int32 ScImportExport::CountVisualWidth(std::u16string_view rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth)
496 sal_Int32 nWidth = 0;
497 while(nIdx < static_cast<sal_Int32>(rStr.size()) && nWidth < nMaxWidth)
499 sal_uInt32 nCode = o3tl::iterateCodePoints(rStr, &nIdx);
501 auto nEaWidth = u_getIntPropertyValue(nCode, UCHAR_EAST_ASIAN_WIDTH);
502 if (nEaWidth == U_EA_FULLWIDTH || nEaWidth == U_EA_WIDE)
503 nWidth += 2;
504 else if (!u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
505 nWidth += 1;
508 if (nIdx < static_cast<sal_Int32>(rStr.size()))
510 sal_Int32 nTmpIdx = nIdx;
511 sal_uInt32 nCode = o3tl::iterateCodePoints(rStr, &nTmpIdx);
513 if (u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
514 nIdx = nTmpIdx;
516 return nWidth;
519 sal_Int32 ScImportExport::CountVisualWidth(std::u16string_view rStr)
521 sal_Int32 nIdx = 0;
522 return CountVisualWidth(rStr, nIdx, SAL_MAX_INT32);
525 void ScImportExport::SetNoEndianSwap( SvStream& rStrm )
527 #ifdef OSL_BIGENDIAN
528 rStrm.SetEndian( SvStreamEndian::BIG );
529 #else
530 rStrm.SetEndian( SvStreamEndian::LITTLE );
531 #endif
534 static inline bool lcl_isFieldEnd( sal_Unicode c, const sal_Unicode* pSeps )
536 return !c || ScGlobal::UnicodeStrChr( pSeps, c);
539 namespace {
541 enum QuoteType
543 FIELDSTART_QUOTE,
544 FIRST_QUOTE,
545 SECOND_QUOTE,
546 FIELDEND_QUOTE,
547 DONTKNOW_QUOTE
552 /** Determine if *p is a quote that ends a quoted field.
554 Precondition: we are parsing a quoted field already and *p is a quote.
556 @return
557 FIELDEND_QUOTE if end of field quote
558 DONTKNOW_QUOTE anything else
560 static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep )
562 // Due to broken CSV generators that don't double embedded quotes check if
563 // a field separator immediately or with trailing spaces follows the quote,
564 // only then end the field, or at end of string.
565 constexpr sal_Unicode cBlank = ' ';
566 if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank))
567 return FIELDEND_QUOTE;
568 // Detect a possible blank separator if it's not already in the list (which
569 // was checked right above for p[1]==cBlank).
570 const bool bBlankSep = (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank);
571 while (p[1] == cBlank)
572 ++p;
573 if (lcl_isFieldEnd( p[1], pSeps))
574 return FIELDEND_QUOTE;
575 // Extended separator detection after a closing quote (with or without
576 // blanks). Note that nQuotes is incremented *after* the call so is not yet
577 // even here, and that with separator detection we reach here only if
578 // lcl_isEscapedOrFieldEndQuote() did not already detect FIRST_QUOTE or
579 // SECOND_QUOTE for an escaped embedded quote, thus nQuotes does not have
580 // to be checked.
581 if (!rcDetectSep)
583 static constexpr sal_Unicode vSep[] = { ',', '\t', ';' };
584 for (const sal_Unicode c : vSep)
586 if (p[1] == c)
588 rcDetectSep = c;
589 return FIELDEND_QUOTE;
593 // Blank separator is least significant, after others.
594 if (bBlankSep)
596 rcDetectSep = cBlank;
597 return FIELDEND_QUOTE;
599 return DONTKNOW_QUOTE;
602 /** Determine if *p is a quote that is escaped by being doubled or ends a
603 quoted field.
605 Precondition: *p is a quote.
607 @param nQuotes
608 Quote characters encountered so far.
609 Odd (after opening quote) means either no embedded quotes or only quote
610 pairs so far.
611 Even means either not in a quoted field or already one quote
612 encountered, the first of a pair.
614 @return
615 FIELDSTART_QUOTE if first quote in a field, either starting content or
616 embedded so caller should check beforehand.
617 FIRST_QUOTE if first of a doubled quote
618 SECOND_QUOTE if second of a doubled quote
619 FIELDEND_QUOTE if end of field quote
620 DONTKNOW_QUOTE if an unescaped quote we don't consider as end of field,
621 do not increment nQuotes in caller then!
623 static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p,
624 const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep )
626 if ((nQuotes & 1) == 0)
628 if (p[-1] == cStr)
629 return SECOND_QUOTE;
630 else
632 SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?");
633 return FIELDSTART_QUOTE;
636 if (p[1] == cStr)
637 return FIRST_QUOTE;
638 return lcl_isFieldEndQuote( p, pSeps, rcDetectSep);
641 /** Append characters of [p1,p2) to rField.
643 @returns TRUE if ok; FALSE if data overflow, truncated
645 static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 )
647 if (rField.getLength() + (p2 - p1) <= nArbitraryCellLengthLimit)
649 rField += std::u16string_view( p1, p2 - p1 );
650 return true;
652 else
654 SAL_WARN( "sc", "lcl_appendLineData: data overflow");
655 rField += std::u16string_view( p1, nArbitraryCellLengthLimit - rField.getLength() );
656 return false;
660 namespace {
662 enum class DoubledQuoteMode
664 KEEP_ALL, // both are taken, additionally start and end quote are included in string
665 ESCAPE, // escaped quote, one is taken, one ignored
670 /** Scan for a quoted string.
672 Precondition: initial current position *p is a cStr quote.
674 For DoubledQuoteMode::ESCAPE, if after the closing quote there is a field
675 end (with or without trailing blanks and as determined by
676 lcl_isFieldEndQuote()), then the content is appended to rField with quotes
677 processed and removed. Else if no field end after the quoted string was
678 detected, nothing is appended and processing continues and is repeated
679 until the next quote. If no closing quote at a field end was found at all,
680 nothing is appended and the initial position is returned and caller has to
681 decide, usually just taking all as literal data.
683 For DoubledQuoteMode::KEEP_ALL, the string up to and including the closing
684 quote is appended to rField and the next position returned, regardless
685 whether there is a field separator following or not.
688 static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rField,
689 const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell )
691 OUString aString;
692 bool bClosingQuote = (eMode == DoubledQuoteMode::KEEP_ALL);
693 const sal_Unicode* const pStart = p;
694 if (eMode != DoubledQuoteMode::KEEP_ALL)
695 p++; //! jump over opening quote
696 bool bCont;
699 bCont = false;
700 const sal_Unicode* p0 = p;
701 for( ;; )
703 if (!*p)
705 // Encountering end of data after an opening quote is not a
706 // quoted string, ReadCsvLine() concatenated lines with '\n'
707 // for a properly quoted embedded linefeed.
708 if (eMode == DoubledQuoteMode::KEEP_ALL)
709 // Caller would append that data anyway, so we can do it
710 // already here.
711 break;
713 return pStart;
716 if( *p == cStr )
718 if ( *++p != cStr )
720 // break or continue for loop
721 if (eMode == DoubledQuoteMode::ESCAPE)
723 sal_Unicode cDetectSep = 0xffff; // No separator detection here.
724 if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE)
726 bClosingQuote = true;
727 break;
729 else
730 continue;
732 else
733 break;
735 // doubled quote char
736 switch ( eMode )
738 case DoubledQuoteMode::KEEP_ALL :
739 p++; // both for us (not breaking for-loop)
740 break;
741 case DoubledQuoteMode::ESCAPE :
742 p++; // one for us (breaking for-loop)
743 bCont = true; // and more
744 break;
746 if ( eMode == DoubledQuoteMode::ESCAPE )
747 break;
749 else
750 p++;
752 if ( p0 < p )
754 if (!lcl_appendLineData( aString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p)))
755 rbOverflowCell = true;
757 } while ( bCont );
759 if (!bClosingQuote)
760 return pStart;
762 if (!aString.isEmpty())
763 rField += aString;
765 return p;
768 static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion )
770 // Older versions didn't escape the semicolon.
771 // Older versions quoted the string and doubled embedded quotes, but not
772 // the semicolons, which was plain wrong.
773 if (eVersion >= SylkVersion::OOO32)
774 rString = rString.replaceAll(";;", ";");
775 else
776 rString = rString.replaceAll("\"\"", "\"");
778 rString = rString.replaceAll(SYLK_LF, "\n");
781 static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p,
782 OUString& rString, SylkVersion eVersion )
784 const sal_Unicode* pStartQuote = p;
785 const sal_Unicode* pEndQuote = nullptr;
786 while( *(++p) )
788 if( *p == '"' )
790 pEndQuote = p;
791 if (eVersion >= SylkVersion::OOO32)
793 if (*(p+1) == ';')
795 if (*(p+2) == ';')
797 p += 2; // escaped ';'
798 pEndQuote = nullptr;
800 else
801 break; // end field
804 else
806 if (*(p+1) == '"')
808 ++p; // escaped '"'
809 pEndQuote = nullptr;
811 else if (*(p+1) == ';')
812 break; // end field
816 if (!pEndQuote)
817 pEndQuote = p; // Take all data as string.
818 rString += std::u16string_view(pStartQuote + 1, pEndQuote - pStartQuote - 1 );
819 lcl_UnescapeSylk( rString, eVersion);
820 return p;
823 static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p,
824 OUString& rString, SylkVersion eVersion )
826 const sal_Unicode* pStart = p;
827 if (eVersion >= SylkVersion::OOO32)
829 while (*p)
831 if (*p == ';')
833 if (*(p+1) == ';')
834 ++p; // escaped ';'
835 else
836 break; // end field
838 ++p;
840 rString += std::u16string_view( pStart, p - pStart);
841 lcl_UnescapeSylk( rString, eVersion);
843 else
845 // Nasty. If in old versions the formula contained a semicolon, it was
846 // quoted and embedded quotes were doubled, but semicolons were not. If
847 // there was no semicolon, it could still contain quotes and doubled
848 // embedded quotes if it was something like ="a""b", which was saved as
849 // E"a""b" as is and has to be preserved, even if older versions
850 // couldn't even load it correctly. However, theoretically another
851 // field might follow and thus the line contain a semicolon again, such
852 // as ...;E"a""b";...
853 bool bQuoted = false;
854 if (*p == '"')
856 // May be a quoted expression or just a string constant expression
857 // with quotes.
858 while (*(++p))
860 if (*p == '"')
862 if (*(p+1) == '"')
863 ++p; // escaped '"'
864 else
865 break; // closing '"', had no ';' yet
867 else if (*p == ';')
869 bQuoted = true; // ';' within quoted expression
870 break;
873 p = pStart;
875 if (bQuoted)
876 p = lcl_ScanSylkString( p, rString, eVersion);
877 else
879 while (*p && *p != ';')
880 ++p;
881 rString += std::u16string_view( pStart, p - pStart);
884 return p;
887 static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc )
889 if (cEsc)
891 // the goal is to replace cStr by cStr+cStr
892 OUString strFrom(cEsc);
893 OUString strTo = strFrom + strFrom;
894 rString = rString.replaceAll(strFrom, strTo);
897 if (cQuote)
899 rString = OUStringChar(cQuote) + rString + OUStringChar(cQuote);
902 rStrm.WriteUnicodeOrByteText(rString);
905 bool ScImportExport::Text2Doc( SvStream& rStrm )
907 bool bOk = true;
909 sal_Unicode pSeps[2];
910 pSeps[0] = cSep;
911 pSeps[1] = 0;
913 ScSetStringParam aSetStringParam;
914 aSetStringParam.mbCheckLinkFormula = true;
916 SCCOL nStartCol = aRange.aStart.Col();
917 SCROW nStartRow = aRange.aStart.Row();
918 SCCOL nEndCol = aRange.aEnd.Col();
919 SCROW nEndRow = aRange.aEnd.Row();
920 sal_uInt64 nOldPos = rStrm.Tell();
921 rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
922 bool bData = !bSingle;
923 if( !bSingle)
924 bOk = StartPaste();
926 while( bOk )
928 OUString aLine;
929 OUString aCell;
930 SCROW nRow = nStartRow;
931 rStrm.Seek( nOldPos );
932 for( ;; )
934 rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit );
935 // tdf#125440 When inserting tab separated string, consider quotes as field markers
936 DoubledQuoteMode mode = aLine.indexOf("\t") >= 0 ? DoubledQuoteMode::ESCAPE : DoubledQuoteMode::KEEP_ALL;
937 if( rStrm.eof() )
938 break;
939 SCCOL nCol = nStartCol;
940 const sal_Unicode* p = aLine.getStr();
941 while( *p )
943 aCell.clear();
944 const sal_Unicode* q = p;
945 if (*p == cStr)
947 // Look for a pairing quote.
948 q = p = lcl_ScanString( p, aCell, pSeps, cStr, mode, bOverflowCell );
950 // All until next separator.
951 while (*p && *p != cSep)
952 ++p;
953 if (!lcl_appendLineData( aCell, q, p))
954 bOverflowCell = true; // display warning on import
955 if (*p)
956 ++p;
957 if (rDoc.ValidCol(nCol) && rDoc.ValidRow(nRow) )
959 if( bSingle )
961 if (nCol>nEndCol) nEndCol = nCol;
962 if (nRow>nEndRow) nEndRow = nRow;
964 if( bData && nCol <= nEndCol && nRow <= nEndRow )
965 rDoc.SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam );
967 else // too many columns/rows
969 if (!rDoc.ValidRow(nRow))
970 bOverflowRow = true; // display warning on import
971 if (!rDoc.ValidCol(nCol))
972 bOverflowCol = true; // display warning on import
974 ++nCol;
976 ++nRow;
979 if( !bData )
981 aRange.aEnd.SetCol( nEndCol );
982 aRange.aEnd.SetRow( nEndRow );
983 bOk = StartPaste();
984 bData = true;
986 else
987 break;
990 EndPaste();
991 if (bOk && mbImportBroadcast)
993 rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
994 pDocSh->PostDataChanged();
997 return bOk;
1000 // Extended Ascii-Import
1002 static bool lcl_PutString(
1003 ScDocumentImport& rDocImport, bool bUseDocImport,
1004 SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat,
1005 SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bDetectSciNumFormat, bool bEvaluateFormulas, bool bSkipEmptyCells,
1006 const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar,
1007 const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar )
1009 ScDocument& rDoc = rDocImport.getDoc();
1010 bool bMultiLine = false;
1011 if ( nColFormat == SC_COL_SKIP || !rDoc.ValidCol(nCol) || !rDoc.ValidRow(nRow) )
1012 return bMultiLine;
1013 if ( rStr.isEmpty() )
1015 if ( !bSkipEmptyCells )
1016 { // delete destination cell
1017 if ( bUseDocImport )
1018 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr );
1019 else
1020 rDoc.SetString( nCol, nRow, nTab, rStr );
1022 return false;
1025 const bool bForceFormulaText = (!bEvaluateFormulas && rStr[0] == '=');
1026 if (nColFormat == SC_COL_TEXT || bForceFormulaText)
1028 if ( bUseDocImport )
1030 double fDummy;
1031 sal_uInt32 nIndex = 0;
1032 if (bForceFormulaText || rDoc.GetFormatTable()->IsNumberFormat(rStr, nIndex, fDummy))
1034 // Set the format of this cell to Text.
1035 // This is only necessary for ScDocumentImport,
1036 // ScDocument::SetTextCell() forces it by ScSetStringParam.
1037 sal_uInt32 nFormat = rDoc.GetFormatTable()->GetStandardFormat(SvNumFormatType::TEXT);
1038 ScPatternAttr aNewAttrs(rDoc.getCellAttributeHelper());
1039 SfxItemSet& rSet = aNewAttrs.GetItemSet();
1040 rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) );
1041 rDoc.ApplyPattern(nCol, nRow, nTab, aNewAttrs);
1043 if (ScStringUtil::isMultiline(rStr))
1045 ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
1046 rEngine.SetTextCurrentDefaults(rStr);
1047 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
1048 return true;
1050 else
1052 rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr);
1053 return false;
1056 else
1058 rDoc.SetTextCell(ScAddress(nCol, nRow, nTab), rStr);
1059 return bMultiLine;
1063 if ( nColFormat == SC_COL_ENGLISH )
1065 //! SetString with Extra-Flag ???
1067 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1068 sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US);
1069 double fVal;
1070 if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) )
1072 // Numberformat will not be set to English
1073 if ( bUseDocImport )
1074 rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal );
1075 else
1076 rDoc.SetValue( nCol, nRow, nTab, fVal );
1077 return bMultiLine;
1079 // else, continue with SetString
1081 else if ( nColFormat != SC_COL_STANDARD ) // Datumformats
1083 const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t
1084 const sal_Int32 nLen = rStr.getLength();
1085 sal_Int32 nStart[nMaxNumberParts];
1086 sal_Int32 nEnd[nMaxNumberParts];
1088 bool bIso;
1089 sal_uInt16 nDP, nMP, nYP;
1090 switch ( nColFormat )
1092 case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; bIso = true; break;
1093 case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; bIso = false; break;
1094 case SC_COL_DMY:
1095 default: nDP = 0; nMP = 1; nYP = 2; bIso = false; break;
1098 sal_uInt16 nFound = 0;
1099 bool bInNum = false;
1100 for (sal_Int32 nPos = 0; nPos < nLen && (bInNum || nFound < nMaxNumberParts); ++nPos)
1102 bool bLetter = false;
1103 if (rtl::isAsciiDigit(rStr[nPos]) ||
1104 (((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1))
1105 && (bLetter = ScGlobal::getCharClass().isLetterNumeric( rStr, nPos))))
1107 if (!bInNum)
1109 bInNum = true;
1110 nStart[nFound] = nPos;
1111 ++nFound;
1113 nEnd[nFound-1] = nPos;
1114 if (bIso && (bLetter || (2 <= nFound && nFound <= 6 && nPos > nStart[nFound-1] + 1)))
1115 // Each M,D,h,m,s at most 2 digits.
1116 bIso = false;
1118 else
1120 bInNum = false;
1121 if (bIso)
1123 // ([+-])YYYY-MM-DD([T ]hh:mm(:ss(.fff)))(([+-])TZ)
1124 // XXX NOTE: timezone is accepted here, but number
1125 // formatter parser will not, so the end result will be
1126 // type Text to preserve timezone information.
1127 switch (rStr[nPos])
1129 case '+':
1130 if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
1131 // Accept timezone offset.
1133 else if (nPos > 0)
1134 // Accept one leading sign.
1135 bIso = false;
1136 break;
1137 case '-':
1138 if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
1139 // Accept timezone offset.
1141 else if (nFound == 0 && nPos > 0)
1142 // Accept one leading sign.
1143 bIso = false;
1144 else if (nFound < 1 || 2 < nFound || nPos != nEnd[nFound-1] + 1)
1145 // Not immediately after 1 or 1-2
1146 bIso = false;
1147 break;
1148 case 'T':
1149 case ' ':
1150 if (nFound != 3 || nPos != nEnd[nFound-1] + 1)
1151 // Not immediately after 1-2-3
1152 bIso = false;
1153 break;
1154 case ':':
1155 if (nFound < 4 || 5 < nFound || nPos != nEnd[nFound-1] + 1)
1156 // Not at 1-2-3T4:5:
1157 bIso = false;
1158 break;
1159 case '.':
1160 case ',':
1161 if (nFound != 6 || nPos != nEnd[nFound-1] + 1)
1162 // Not at 1-2-3T4:5:6.
1163 bIso = false;
1164 break;
1165 case 'Z':
1166 if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
1167 // Accept Zero timezone.
1169 else
1170 bIso = false;
1171 break;
1172 default:
1173 bIso = false;
1179 if (nFound < 3)
1180 bIso = false;
1182 if (bIso)
1184 // Leave conversion and detection of various possible number
1185 // formats to the number formatter. ISO is recognized in any locale
1186 // so we can directly use the document's formatter.
1187 sal_uInt32 nFormat = 0;
1188 double fVal = 0.0;
1189 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1190 if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal))
1192 if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE)
1194 ScAddress aPos(nCol,nRow,nTab);
1195 if (bUseDocImport)
1196 rDocImport.setNumericCell(aPos, fVal);
1197 else
1198 rDoc.SetValue(aPos, fVal);
1199 rDoc.SetNumberFormat(aPos, nFormat);
1201 return bMultiLine; // success
1204 // If we reach here it is type Text (e.g. timezone or trailing
1205 // characters). Handled below.
1208 if ( nFound == 1 )
1210 // try to break one number (without separators) into date fields
1212 sal_Int32 nDateStart = nStart[0];
1213 sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart;
1215 if ( nDateLen >= 5 && nDateLen <= 8 &&
1216 ScGlobal::getCharClass().isNumeric( rStr.copy( nDateStart, nDateLen ) ) )
1218 // 6 digits: 2 each for day, month, year
1219 // 8 digits: 4 for year, 2 each for day and month
1220 // 5 or 7 digits: first field is shortened by 1
1222 bool bLongYear = ( nDateLen >= 7 );
1223 bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 );
1225 sal_uInt16 nFieldStart = nDateStart;
1226 for (sal_uInt16 nPos=0; nPos<3; nPos++)
1228 sal_uInt16 nFieldEnd = nFieldStart + 1; // default: 2 digits
1229 if ( bLongYear && nPos == nYP )
1230 nFieldEnd += 2; // 2 extra digits for long year
1231 if ( bShortFirst && nPos == 0 )
1232 --nFieldEnd; // first field shortened?
1234 nStart[nPos] = nFieldStart;
1235 nEnd[nPos] = nFieldEnd;
1236 nFieldStart = nFieldEnd + 1;
1238 nFound = 3;
1242 if (!bIso && nFound >= 3)
1244 using namespace ::com::sun::star;
1245 bool bSecondCal = false;
1246 sal_uInt16 nDay = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nDP], nEnd[nDP]+1-nStart[nDP] )));
1247 sal_uInt16 nYear = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nYP], nEnd[nYP]+1-nStart[nYP] )));
1248 OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] );
1249 sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32());
1250 if (!nMonth)
1252 static constexpr OUString aSepShortened = u"SEP"_ustr;
1253 uno::Sequence< i18n::CalendarItem2 > xMonths;
1254 sal_Int32 i, nMonthCount;
1255 // first test all month names from local international
1256 xMonths = rCalendar.getMonths();
1257 nMonthCount = xMonths.getLength();
1258 for (i=0; i<nMonthCount && !nMonth; i++)
1260 if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) ||
1261 rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) )
1262 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1263 else if ( i == 8 && rTransliteration.isEqual( u"SEPT"_ustr,
1264 xMonths[i].AbbrevName ) &&
1265 rTransliteration.isEqual( aMStr, aSepShortened ) )
1266 { // correct English abbreviation is SEPT,
1267 // but data mostly contains SEP only
1268 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1271 // if none found, then test english month names
1272 if ( !nMonth && pSecondCalendar && pSecondTransliteration )
1274 xMonths = pSecondCalendar->getMonths();
1275 nMonthCount = xMonths.getLength();
1276 for (i=0; i<nMonthCount && !nMonth; i++)
1278 if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) ||
1279 pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) )
1281 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1282 bSecondCal = true;
1284 else if ( i == 8 && pSecondTransliteration->isEqual(
1285 aMStr, aSepShortened ) )
1286 { // correct English abbreviation is SEPT,
1287 // but data mostly contains SEP only
1288 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1289 bSecondCal = true;
1295 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1296 if ( nYear < 100 )
1297 nYear = pDocFormatter->ExpandTwoDigitYear( nYear );
1299 CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar);
1300 sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear();
1301 if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths )
1303 --nMonth;
1304 pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay );
1305 pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth );
1306 pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear );
1307 sal_Int16 nHour, nMinute, nSecond;
1308 // #i14974# The imported value should have no fractional value, so set the
1309 // time fields to zero (ICU calendar instance defaults to current date/time)
1310 nHour = nMinute = nSecond = 0;
1311 if (nFound > 3)
1312 nHour = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[3], nEnd[3]+1-nStart[3])));
1313 if (nFound > 4)
1314 nMinute = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[4], nEnd[4]+1-nStart[4])));
1315 if (nFound > 5)
1316 nSecond = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[5], nEnd[5]+1-nStart[5])));
1317 // do not use calendar's milliseconds, to avoid fractional part truncation
1318 double fFrac = 0.0;
1319 if (nFound > 6)
1321 sal_Unicode cDec = '.';
1322 OUString aT = OUStringChar(cDec) + rStr.subView( nStart[6], nEnd[6]+1-nStart[6]);
1323 rtl_math_ConversionStatus eStatus;
1324 double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus );
1325 if (eStatus == rtl_math_ConversionStatus_Ok)
1326 fFrac = fV / 86400.0;
1328 sal_Int32 nPos;
1329 if (nFound > 3 && 1 <= nHour && nHour <= 12 // nHour 0 and >=13 can't be AM/PM
1330 && (nPos = nEnd[nFound-1] + 1) < nLen)
1332 // Dreaded AM/PM may be following.
1333 while (nPos < nLen && rStr[nPos] == ' ')
1334 ++nPos;
1335 if (nPos < nLen)
1337 sal_Int32 nStop = nPos;
1338 while (nStop < nLen && rStr[nStop] != ' ')
1339 ++nStop;
1340 OUString aAmPm = rStr.copy( nPos, nStop - nPos);
1341 // For AM only 12 needs to be treated, whereas for PM
1342 // it must not. Check both, locale and second/English
1343 // strings.
1344 if (nHour == 12 &&
1345 (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimeAM()) ||
1346 (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, u"AM"_ustr))))
1348 nHour = 0;
1350 else if (nHour < 12 &&
1351 (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimePM()) ||
1352 (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, u"PM"_ustr))))
1354 nHour += 12;
1358 pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour );
1359 pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute );
1360 pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond );
1361 pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 );
1362 if ( pCalendar->isValid() )
1364 // Whole days diff.
1365 double fDiff = DateTime::Sub( DateTime(pDocFormatter->GetNullDate()),
1366 pCalendar->getEpochStart());
1367 // #i14974# must use getLocalDateTime to get the same
1368 // date values as set above
1369 double fDays = pCalendar->getLocalDateTime() + fFrac;
1370 fDays -= fDiff;
1372 LanguageType eLatin, eCjk, eCtl;
1373 rDoc.GetLanguage( eLatin, eCjk, eCtl );
1374 LanguageType eDocLang = eLatin; //! which language for date formats?
1376 SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE);
1377 sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang );
1378 // maybe there is a special format including seconds or milliseconds
1379 if (nFound > 5)
1380 nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang);
1382 ScAddress aPos(nCol,nRow,nTab);
1383 if ( bUseDocImport )
1384 rDocImport.setNumericCell(aPos, fDays);
1385 else
1386 rDoc.SetValue( aPos, fDays );
1387 rDoc.SetNumberFormat(aPos, nFormat);
1389 return bMultiLine; // success
1395 // Standard or date not determined -> SetString / EditCell
1396 if( rStr.indexOf( '\n' ) == -1 )
1398 if (!bDetectNumFormat && nColFormat == SC_COL_STANDARD)
1400 // Import a strict ISO 8601 date(+time) string even without
1401 // "Detect special numbers" or "Date (YMD)".
1404 // Simple pre-check before calling more expensive parser.
1405 // ([+-])(Y)YYYY-MM-DD
1406 if (rStr.getLength() < 10)
1407 break;
1408 const sal_Int32 n1 = rStr.indexOf('-', 1);
1409 if (n1 < 4)
1410 break;
1411 const sal_Int32 n2 = rStr.indexOf('-', n1 + 1);
1412 if (n2 < 7 || n1 + 3 < n2)
1413 break;
1415 css::util::DateTime aDateTime;
1416 if (!sax::Converter::parseDateTime( aDateTime, rStr))
1417 break;
1419 sal_uInt32 nFormat = 0;
1420 double fVal = 0.0;
1421 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1422 if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal))
1424 if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE)
1426 ScAddress aPos(nCol,nRow,nTab);
1427 if (bUseDocImport)
1428 rDocImport.setNumericCell(aPos, fVal);
1429 else
1430 rDoc.SetValue(aPos, fVal);
1431 rDoc.SetNumberFormat(aPos, nFormat);
1433 return bMultiLine; // success
1437 while(false);
1440 ScSetStringParam aParam;
1441 aParam.mpNumFormatter = pFormatter;
1442 aParam.mbDetectNumberFormat = bDetectNumFormat;
1443 aParam.mbDetectScientificNumberFormat = bDetectSciNumFormat;
1444 aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly;
1445 aParam.mbHandleApostrophe = false;
1446 aParam.mbCheckLinkFormula = true;
1447 if ( bUseDocImport )
1448 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam);
1449 else
1450 rDoc.SetString( nCol, nRow, nTab, rStr, &aParam );
1452 else
1454 bMultiLine = true;
1455 ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
1456 rEngine.SetTextCurrentDefaults(rStr);
1457 if ( bUseDocImport )
1458 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
1459 else
1460 rDoc.SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() );
1462 return bMultiLine;
1465 static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext,
1466 bool& rbIsQuoted, bool& rbOverflowCell )
1468 sal_Int32 nLen = rLine.getLength();
1469 if (nNext > nLen)
1470 nNext = nLen;
1471 if ( nNext <= nStart )
1472 return OUString();
1474 const sal_Unicode* pStr = rLine.getStr();
1476 sal_Int32 nSpace = nNext;
1477 while ( nSpace > nStart && pStr[nSpace-1] == ' ' )
1478 --nSpace;
1480 rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"');
1481 if (rbIsQuoted)
1483 bool bFits = (nSpace - nStart - 3 <= nArbitraryCellLengthLimit);
1484 if (bFits)
1485 return rLine.copy(nStart+1, std::max< sal_Int32 >(0, nSpace-nStart-2));
1486 else
1488 SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
1489 rbOverflowCell = true;
1490 return rLine.copy(nStart+1, nArbitraryCellLengthLimit);
1493 else
1495 bool bFits = (nSpace - nStart <= nArbitraryCellLengthLimit);
1496 if (bFits)
1497 return rLine.copy(nStart, nSpace-nStart);
1498 else
1500 SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
1501 rbOverflowCell = true;
1502 return rLine.copy(nStart, nArbitraryCellLengthLimit);
1507 bool ScImportExport::ExtText2Doc( SvStream& rStrm )
1509 if (!pExtOptions)
1510 return Text2Doc( rStrm );
1512 sal_uInt64 const nOldPos = rStrm.Tell();
1513 sal_uInt64 const nRemaining = rStrm.remainingSize();
1514 std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh,
1515 ScResId( STR_LOAD_DOC ), nRemaining, true ));
1516 rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
1517 // tdf#82254 - check whether to include a byte-order-mark in the output
1518 if (nOldPos != rStrm.Tell())
1519 mbIncludeBOM = true;
1521 SCCOL nStartCol = aRange.aStart.Col();
1522 SCCOL nEndCol = aRange.aEnd.Col();
1523 SCROW nStartRow = aRange.aStart.Row();
1524 const SCTAB nTab = aRange.aStart.Tab();
1526 bool bFixed = pExtOptions->IsFixedLen();
1527 OUString aSeps = pExtOptions->GetFieldSeps(); // Need non-const for ReadCsvLine(),
1528 const sal_Unicode* pSeps = aSeps.getStr(); // but it will be const anyway (asserted below).
1529 bool bMerge = pExtOptions->IsMergeSeps();
1530 bool bRemoveSpace = pExtOptions->IsRemoveSpace();
1531 sal_uInt16 nInfoCount = pExtOptions->GetInfoCount();
1532 const sal_Int32* pColStart = pExtOptions->GetColStart();
1533 const sal_uInt8* pColFormat = pExtOptions->GetColFormat();
1534 tools::Long nSkipLines = pExtOptions->GetStartRow();
1536 LanguageType eDocLang = pExtOptions->GetLanguage();
1537 SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang);
1538 bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber();
1539 bool bDetectSciNumFormat = pExtOptions->IsDetectScientificNumber();
1540 bool bEvaluateFormulas = pExtOptions->IsEvaluateFormulas();
1541 bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells();
1543 // For date recognition
1544 ::utl::TransliterationWrapper aTransliteration(
1545 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE );
1546 aTransliteration.loadModuleIfNeeded( eDocLang );
1547 CalendarWrapper aCalendar( comphelper::getProcessComponentContext() );
1548 aCalendar.loadDefaultCalendar(
1549 LanguageTag::convertToLocale( eDocLang ) );
1550 std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration;
1551 std::unique_ptr< CalendarWrapper > pEnglishCalendar;
1552 if ( eDocLang != LANGUAGE_ENGLISH_US )
1554 pEnglishTransliteration.reset(new ::utl::TransliterationWrapper (
1555 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ));
1556 aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US );
1557 pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() ));
1558 pEnglishCalendar->loadDefaultCalendar(
1559 LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) );
1562 OUString aLine;
1563 OUString aCell;
1564 sal_uInt16 i;
1565 SCROW nRow = nStartRow;
1566 sal_Unicode cDetectSep = 0xffff; // No separator detection here.
1568 while(--nSkipLines>0)
1570 aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored
1571 if ( rStrm.eof() )
1572 break;
1575 // Determine range for Undo.
1576 // We don't need this during import of a file to a new sheet or document...
1577 bool bDetermineRange = bUndo;
1578 bool bColumnsAreDetermined = false;
1580 // Row heights don't need to be adjusted on the fly if EndPaste() is called
1581 // afterwards, which happens only if bDetermineRange. This variable also
1582 // survives the toggle of bDetermineRange down at the end of the do{} loop.
1583 bool bRangeIsDetermined = bDetermineRange;
1585 bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText();
1587 sal_uInt64 nOriginalStreamPos = rStrm.Tell();
1589 SCROW nFirstUpdateRowHeight = SCROW_MAX;
1590 SCROW nLastUpdateRowHeight = -1;
1592 ScDocumentImport aDocImport(rDoc);
1595 const SCCOL nLastCol = nEndCol; // tdf#129701 preserve value of nEndCol
1596 for( ;; )
1598 aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep);
1599 if ( rStrm.eof() && aLine.isEmpty() )
1600 break;
1602 assert(pSeps == aSeps.getStr());
1604 if ( nRow > rDoc.MaxRow() )
1606 bOverflowRow = true; // display warning on import
1607 break; // for
1610 if (!bDetermineRange)
1611 EmbeddedNullTreatment( aLine);
1613 sal_Int32 nLineLen = aLine.getLength();
1614 SCCOL nCol = nStartCol;
1615 bool bMultiLine = false;
1616 if ( bFixed ) // Fixed line length
1618 if (bDetermineRange)
1620 if (!bColumnsAreDetermined)
1622 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it
1623 // is only an overflow if there is really data following to
1624 // be put behind the last column, which doesn't happen if
1625 // info is SC_COL_SKIP.
1626 for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i)
1628 const sal_uInt8 nFmt = pColFormat[i];
1629 if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either
1631 if (nCol > rDoc.MaxCol())
1632 bOverflowCol = true; // display warning on import
1633 ++nCol;
1636 bColumnsAreDetermined = true;
1639 else
1641 sal_Int32 nStartIdx = 0;
1642 // Same maxcol+1 check reason as above.
1643 for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i)
1645 sal_Int32 nNextIdx = nStartIdx;
1646 if (i + 1 < nInfoCount)
1647 CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] );
1648 else
1649 nNextIdx = nLineLen;
1650 sal_uInt8 nFmt = pColFormat[i];
1651 if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either
1653 if (nCol > rDoc.MaxCol())
1654 bOverflowCol = true; // display warning on import
1655 else
1657 bool bIsQuoted = false;
1658 aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell );
1659 if (bIsQuoted && bQuotedAsText)
1660 nFmt = SC_COL_TEXT;
1662 bMultiLine |= lcl_PutString(
1663 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1664 &aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells,
1665 aTransliteration, aCalendar,
1666 pEnglishTransliteration.get(), pEnglishCalendar.get());
1668 ++nCol;
1670 nStartIdx = nNextIdx;
1674 else // Search for the separator
1676 SCCOL nSourceCol = 0;
1677 sal_uInt16 nInfoStart = 0;
1678 const sal_Unicode* p = aLine.getStr();
1679 // tdf#129701 if there is only one column, and user wants to treat empty cells,
1680 // we need to detect *p = null
1681 bool bIsLastColEmpty = !(*p) && !bSkipEmptyCells && !bDetermineRange;
1682 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1683 // overflow if there is really data following to be put behind
1684 // the last column, which doesn't happen if info is
1685 // SC_COL_SKIP.
1686 while ( (*p || bIsLastColEmpty) && nCol <= rDoc.MaxCol()+1)
1688 bool bIsQuoted = false;
1689 p = ScImportExport::ScanNextFieldFromString( p, aCell,
1690 cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace );
1692 sal_uInt8 nFmt = SC_COL_STANDARD;
1693 for ( i=nInfoStart; i<nInfoCount; i++ )
1695 if ( pColStart[i] == nSourceCol + 1 ) // pColStart is 1-based
1697 nFmt = pColFormat[i];
1698 nInfoStart = i + 1; // ColInfos are in succession
1699 break; // for
1702 if ( nFmt != SC_COL_SKIP )
1704 if (nCol > rDoc.MaxCol())
1705 bOverflowCol = true; // display warning on import
1706 else if (!bDetermineRange)
1708 if (bIsQuoted && bQuotedAsText)
1709 nFmt = SC_COL_TEXT;
1711 bMultiLine |= lcl_PutString(
1712 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1713 &aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells,
1714 aTransliteration, aCalendar,
1715 pEnglishTransliteration.get(), pEnglishCalendar.get());
1717 ++nCol;
1718 if (bIsLastColEmpty)
1720 bIsLastColEmpty = false; // toggle to stop
1722 else
1724 // tdf#129701 detect if there is a last empty column when we need it
1725 bIsLastColEmpty = (nCol == nLastCol) && !(*p) && !bSkipEmptyCells && !bDetermineRange;
1729 ++nSourceCol;
1732 if (nEndCol < nCol)
1733 nEndCol = nCol; //! points to the next free or even rDoc.MaxCol()+2
1735 if (!bDetermineRange)
1737 if (bMultiLine && !bRangeIsDetermined && pDocSh)
1738 { // Adjust just once at the end for a whole range.
1739 nFirstUpdateRowHeight = std::min( nFirstUpdateRowHeight, nRow );
1740 nLastUpdateRowHeight = std::max( nLastUpdateRowHeight, nRow );
1742 xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos );
1744 ++nRow;
1746 // so far nRow/nEndCol pointed to the next free
1747 if (nRow > nStartRow)
1748 --nRow;
1749 if (nEndCol > nStartCol)
1750 nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), rDoc.MaxCol());
1752 if (bDetermineRange)
1754 aRange.aEnd.SetCol( nEndCol );
1755 aRange.aEnd.SetRow( nRow );
1757 if ( !mbApi && nStartCol != nEndCol &&
1758 !rDoc.IsBlockEmpty( nStartCol + 1, nStartRow, nEndCol, nRow, nTab ) )
1760 ScReplaceWarnBox aBox(ScDocShell::GetActiveDialogParent());
1761 if (aBox.run() != RET_YES)
1763 return false;
1767 rStrm.Seek( nOriginalStreamPos );
1768 nRow = nStartRow;
1769 if (!StartPaste())
1771 EndPaste(false);
1772 return false;
1776 bDetermineRange = !bDetermineRange; // toggle
1777 } while (!bDetermineRange);
1779 if ( !mbOverwriting )
1780 aDocImport.finalize();
1782 xProgress.reset(); // make room for AdjustRowHeight progress
1784 if( nFirstUpdateRowHeight < nLastUpdateRowHeight && pDocSh )
1785 pDocSh->AdjustRowHeight( nFirstUpdateRowHeight, nLastUpdateRowHeight, nTab);
1787 if (bRangeIsDetermined)
1788 EndPaste(false);
1790 if (mbImportBroadcast && !mbOverwriting)
1792 rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
1793 pDocSh->PostDataChanged();
1795 return true;
1798 void ScImportExport::EmbeddedNullTreatment( OUString & rStr )
1800 // A nasty workaround for data with embedded NULL characters. As long as we
1801 // can't handle them properly as cell content (things assume 0-terminated
1802 // strings at too many places) simply strip all NULL characters from raw
1803 // data. Excel does the same. See fdo#57841 for sample data.
1805 // The normal case is no embedded NULL, check first before de-/allocating
1806 // ustring stuff.
1807 sal_Unicode cNull = 0;
1808 if (sal_Int32 pos = rStr.indexOf(cNull); pos >= 0)
1810 rStr = rStr.replaceAll(std::u16string_view(&cNull, 1), u"", pos);
1814 const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p,
1815 OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted,
1816 bool& rbOverflowCell, bool bRemoveSpace )
1818 rbIsQuoted = false;
1819 rField.clear();
1820 const sal_Unicode cBlank = ' ';
1821 if (cStr && !ScGlobal::UnicodeStrChr(pSeps, cBlank))
1823 // Cope with broken generators that put leading blanks before a quoted
1824 // field, like "field1", "field2", "..."
1825 // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180
1826 const sal_Unicode* pb = p;
1827 while (*pb == cBlank)
1828 ++pb;
1829 if (*pb == cStr)
1830 p = pb;
1832 if (cStr && *p == cStr) // String in quotes
1834 rbIsQuoted = true;
1835 const sal_Unicode* p1;
1836 p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell );
1837 while (!lcl_isFieldEnd( *p, pSeps))
1838 p++;
1839 // Append remaining unquoted and undelimited data (dirty, dirty) to
1840 // this field.
1841 if (p > p1)
1843 const sal_Unicode* ptrim_f = p;
1844 if ( bRemoveSpace )
1846 while ( ptrim_f > p1 && ( *(ptrim_f - 1) == cBlank ) )
1847 --ptrim_f;
1849 if (!lcl_appendLineData( rField, p1, ptrim_f))
1850 rbOverflowCell = true;
1852 if( *p )
1853 p++;
1855 else // up to delimiter
1857 const sal_Unicode* p0 = p;
1858 while (!lcl_isFieldEnd( *p, pSeps))
1859 p++;
1860 const sal_Unicode* ptrim_i = p0;
1861 const sal_Unicode* ptrim_f = p; // [ptrim_i,ptrim_f) is cell data after trimming
1862 if ( bRemoveSpace )
1864 while ( ptrim_i < ptrim_f && *ptrim_i == cBlank )
1865 ++ptrim_i;
1866 while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) )
1867 --ptrim_f;
1869 if (!lcl_appendLineData( rField, ptrim_i, ptrim_f))
1870 rbOverflowCell = true;
1871 if( *p )
1872 p++;
1874 if ( bMergeSeps ) // skip following delimiters
1876 while (*p && ScGlobal::UnicodeStrChr( pSeps, *p))
1877 p++;
1879 return p;
1882 namespace {
1885 * Check if a given string has any line break characters or separators.
1887 * @param rStr string to inspect.
1888 * @param cSep separator character.
1890 bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep )
1892 const sal_Unicode* p = rStr.getStr();
1893 for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p)
1895 sal_Unicode c = *p;
1896 if (c == cSep)
1897 // separator found.
1898 return true;
1900 switch (c)
1902 case '\n':
1903 case '\r':
1904 // line break found.
1905 return true;
1906 default:
1910 return false;
1915 bool ScImportExport::Doc2Text( SvStream& rStrm )
1917 SCCOL nCol;
1918 SCROW nRow;
1919 SCCOL nStartCol = aRange.aStart.Col();
1920 SCROW nStartRow = aRange.aStart.Row();
1921 SCTAB nStartTab = aRange.aStart.Tab();
1922 SCCOL nEndCol = aRange.aEnd.Col();
1923 SCROW nEndRow = aRange.aEnd.Row();
1924 SCTAB nEndTab = aRange.aEnd.Tab();
1926 if (!rDoc.GetClipParam().isMultiRange() && nStartTab == nEndTab)
1927 if (!rDoc.ShrinkToDataArea( nStartTab, nStartCol, nStartRow, nEndCol, nEndRow ))
1928 return false;
1930 OUString aCellStr;
1932 bool bConvertLF = (GetSystemLineEnd() != LINEEND_LF);
1934 // We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab.
1935 std::vector< sc::ColumnBlockPosition > blockPos( nEndCol - nStartCol + 1 );
1936 for( SCCOL i = nStartCol; i <= nEndCol; ++i )
1937 rDoc.InitColumnBlockPosition( blockPos[ i - nStartCol ], nStartTab, i );
1938 for (nRow = nStartRow; nRow <= nEndRow; nRow++)
1940 if (bIncludeFiltered || !rDoc.RowFiltered( nRow, nStartTab ))
1942 for (nCol = nStartCol; nCol <= nEndCol; nCol++)
1944 ScAddress aPos(nCol, nRow, nStartTab);
1945 sal_uInt32 nNumFmt = rDoc.GetNumberFormat(ScRange(aPos));
1946 ScInterpreterContext& rContext = rDoc.GetNonThreadedContext();
1948 ScRefCellValue aCell(rDoc, aPos, blockPos[ nCol - nStartCol ]);
1949 switch (aCell.getType())
1951 case CELLTYPE_FORMULA:
1953 if (bFormulas)
1955 aCellStr = aCell.getFormula()->GetFormula();
1956 if( aCellStr.indexOf( cSep ) != -1 )
1957 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1958 else
1959 rStrm.WriteUnicodeOrByteText(aCellStr);
1961 else
1963 const Color* pColor;
1964 aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, &rContext, rDoc);
1966 bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1967 if( bMultiLineText )
1969 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1970 aCellStr = aCellStr.replaceAll( "\n", " " );
1971 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1972 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1975 if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1976 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
1978 if( mExportTextOptions.mbAddQuotes && ( aCellStr.indexOf( cSep ) != -1 ) )
1979 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1980 else
1981 rStrm.WriteUnicodeOrByteText(aCellStr);
1984 break;
1985 case CELLTYPE_VALUE:
1987 const Color* pColor;
1988 aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, &rContext, rDoc);
1989 rStrm.WriteUnicodeOrByteText(aCellStr);
1991 break;
1992 case CELLTYPE_NONE:
1993 break;
1994 default:
1996 const Color* pColor;
1997 aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, &rContext, rDoc);
1999 bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
2000 if( bMultiLineText )
2002 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
2003 aCellStr = aCellStr.replaceAll( "\n", " " );
2004 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
2005 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
2008 if( mExportTextOptions.mcSeparatorConvertTo && cSep )
2009 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
2011 if( mExportTextOptions.mbAddQuotes && hasLineBreaksOrSeps(aCellStr, cSep) )
2012 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
2013 else
2014 rStrm.WriteUnicodeOrByteText(aCellStr);
2017 if( nCol < nEndCol )
2018 rStrm.WriteUnicodeOrByteText(rtl::OUStringChar(cSep));
2020 // Do not append a line feed for one single cell.
2021 // NOTE: this Doc2Text() is only called for clipboard via
2022 // ScImportExport::ExportStream().
2023 if (nStartRow != nEndRow || nStartCol != nEndCol)
2024 endlub(rStrm);
2025 if( rStrm.GetError() != ERRCODE_NONE )
2026 break;
2027 if( nSizeLimit && rStrm.Tell() > nSizeLimit )
2028 break;
2032 return rStrm.GetError() == ERRCODE_NONE;
2035 bool ScImportExport::Sylk2Doc( SvStream& rStrm )
2037 bool bOk = true;
2038 bool bMyDoc = false;
2039 SylkVersion eVersion = SylkVersion::OTHER;
2041 // US-English separators for StringToDouble
2042 sal_Unicode const cDecSep = '.';
2043 sal_Unicode const cGrpSep = ',';
2045 SCCOL nStartCol = aRange.aStart.Col();
2046 SCROW nStartRow = aRange.aStart.Row();
2047 SCCOL nEndCol = aRange.aEnd.Col();
2048 SCROW nEndRow = aRange.aEnd.Row();
2049 sal_uInt64 nOldPos = rStrm.Tell();
2050 bool bData = !bSingle;
2051 ::std::vector< sal_uInt32 > aFormats;
2053 if( !bSingle)
2054 bOk = StartPaste();
2056 while( bOk )
2058 OUString aLine;
2059 OUString aText;
2060 OStringBuffer aByteLine;
2061 SCCOL nCol = nStartCol;
2062 SCROW nRow = nStartRow;
2063 SCCOL nRefCol = nCol;
2064 SCROW nRefRow = nRow;
2065 rStrm.Seek( nOldPos );
2066 for( ;; )
2068 //! allow unicode
2069 (void)rStrm.ReadLine( aByteLine );
2070 aLine = OStringToOUString(aByteLine, rStrm.GetStreamCharSet());
2071 if( rStrm.eof() )
2072 break;
2073 bool bInvalidCol = false;
2074 bool bInvalidRow = false;
2075 const sal_Unicode* p = aLine.getStr();
2076 sal_Unicode cTag = *p++;
2077 if( cTag == 'C' ) // Content
2079 if( *p++ != ';' )
2080 return false;
2082 bool bInvalidRefCol = false;
2083 bool bInvalidRefRow = false;
2084 while( *p )
2086 sal_Unicode ch = *p++;
2087 ch = ScGlobal::ToUpperAlpha( ch );
2088 switch( ch )
2090 case 'X':
2092 bInvalidCol = false;
2093 bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nCol);
2094 if (bFail || nCol < 0 || rDoc.MaxCol() < nCol)
2096 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
2097 nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol());
2098 bInvalidCol = bOverflowCol = true;
2100 break;
2102 case 'Y':
2104 bInvalidRow = false;
2105 bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRow);
2106 if (bFail || nRow < 0 || nMaxImportRow < nRow)
2108 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
2109 nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow);
2110 bInvalidRow = bOverflowRow = true;
2112 break;
2114 case 'C':
2116 bInvalidRefCol = false;
2117 bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nRefCol);
2118 if (bFail || nRefCol < 0 || rDoc.MaxCol() < nRefCol)
2120 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol);
2121 nRefCol = std::clamp<SCCOL>(nRefCol, 0, rDoc.MaxCol());
2122 bInvalidRefCol = bOverflowCol = true;
2124 break;
2126 case 'R':
2128 bInvalidRefRow = false;
2129 bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRefRow);
2130 if (bFail || nRefRow < 0 || nMaxImportRow < nRefRow)
2132 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow);
2133 nRefRow = std::clamp<SCROW>(nRefRow, 0, nMaxImportRow);
2134 bInvalidRefRow = bOverflowRow = true;
2136 break;
2138 case 'K':
2140 if( !bSingle &&
2141 ( nCol < nStartCol || nCol > nEndCol
2142 || nRow < nStartRow || nRow > nEndRow
2143 || nCol > rDoc.MaxCol() || nRow > nMaxImportRow
2144 || bInvalidCol || bInvalidRow ) )
2145 break;
2146 if( !bData )
2148 if( nRow > nEndRow )
2149 nEndRow = nRow;
2150 if( nCol > nEndCol )
2151 nEndCol = nCol;
2152 break;
2154 bool bText;
2155 if( *p == '"' )
2157 bText = true;
2158 aText.clear();
2159 p = lcl_ScanSylkString( p, aText, eVersion);
2161 else
2162 bText = false;
2163 const sal_Unicode* q = p;
2164 while( *q && *q != ';' )
2165 q++;
2166 if ( (*q != ';' || *(q+1) != 'I') && !bInvalidCol && !bInvalidRow )
2167 { // don't ignore value
2168 if( bText )
2170 rDoc.EnsureTable(aRange.aStart.Tab());
2171 rDoc.SetTextCell(
2172 ScAddress(nCol, nRow, aRange.aStart.Tab()), aText);
2174 else
2176 double fVal = rtl_math_uStringToDouble( p,
2177 aLine.getStr() + aLine.getLength(),
2178 cDecSep, cGrpSep, nullptr, nullptr );
2179 rDoc.SetValue( nCol, nRow, aRange.aStart.Tab(), fVal );
2183 break;
2184 case 'E':
2185 case 'M':
2187 if ( ch == 'M' )
2189 if ( nRefCol < nCol )
2190 nRefCol = nCol;
2191 if ( nRefRow < nRow )
2192 nRefRow = nRow;
2193 if ( !bData )
2195 if( nRefRow > nEndRow )
2196 nEndRow = nRefRow;
2197 if( nRefCol > nEndCol )
2198 nEndCol = nRefCol;
2201 if( !bMyDoc || !bData )
2202 break;
2203 aText = "=";
2204 p = lcl_ScanSylkFormula( p, aText, eVersion);
2206 if (bInvalidCol || bInvalidRow || (ch == 'M' && (bInvalidRefCol || bInvalidRefRow)))
2207 break;
2209 ScAddress aPos( nCol, nRow, aRange.aStart.Tab() );
2210 /* FIXME: do we want GRAM_ODFF_A1 instead? At the
2211 * end it probably should be GRAM_ODFF_R1C1, since
2212 * R1C1 is what Excel writes in SYLK, or even
2213 * better GRAM_ENGLISH_XL_R1C1. */
2214 const formula::FormulaGrammar::Grammar eGrammar = formula::FormulaGrammar::GRAM_PODF_A1;
2215 ScCompiler aComp(rDoc, aPos, eGrammar);
2216 std::unique_ptr<ScTokenArray> xCode(aComp.CompileString(aText)); // ctor/InsertMatrixFormula did copy TokenArray
2217 rDoc.CheckLinkFormulaNeedingCheck(*xCode);
2218 if ( ch == 'M' )
2220 ScMarkData aMark(rDoc.GetSheetLimits());
2221 aMark.SelectTable( aPos.Tab(), true );
2222 rDoc.InsertMatrixFormula( nCol, nRow, nRefCol,
2223 nRefRow, aMark, OUString(), xCode.get() );
2225 else
2227 ScFormulaCell* pFCell = new ScFormulaCell(
2228 rDoc, aPos, *xCode, eGrammar, ScMatrixMode::NONE);
2229 rDoc.SetFormulaCell(aPos, pFCell);
2232 break;
2234 while( *p && *p != ';' )
2235 p++;
2236 if( *p )
2237 p++;
2240 else if( cTag == 'F' ) // Format
2242 if( *p++ != ';' )
2243 return false;
2244 sal_Int32 nFormat = -1;
2245 while( *p )
2247 sal_Unicode ch = *p++;
2248 ch = ScGlobal::ToUpperAlpha( ch );
2249 switch( ch )
2251 case 'X':
2253 bInvalidCol = false;
2254 bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nCol);
2255 if (bFail || nCol < 0 || rDoc.MaxCol() < nCol)
2257 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
2258 nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol());
2259 bInvalidCol = bOverflowCol = true;
2261 break;
2263 case 'Y':
2265 bInvalidRow = false;
2266 bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRow);
2267 if (bFail || nRow < 0 || nMaxImportRow < nRow)
2269 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
2270 nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow);
2271 bInvalidRow = bOverflowRow = true;
2273 break;
2275 case 'P' :
2276 if ( bData )
2278 // F;P<n> sets format code of P;P<code> at
2279 // current position, or at ;X;Y if specified.
2280 // Note that ;X;Y may appear after ;P
2281 const sal_Unicode* p0 = p;
2282 while( *p && *p != ';' )
2283 p++;
2284 OUString aNumber(p0, p - p0);
2285 nFormat = aNumber.toInt32();
2287 break;
2289 while( *p && *p != ';' )
2290 p++;
2291 if( *p )
2292 p++;
2294 if ( !bData )
2296 if( nRow > nEndRow )
2297 nEndRow = nRow;
2298 if( nCol > nEndCol )
2299 nEndCol = nCol;
2301 if ( 0 <= nFormat && o3tl::make_unsigned(nFormat) < aFormats.size() && !bInvalidCol && !bInvalidRow )
2303 sal_uInt32 nKey = aFormats[nFormat];
2304 rDoc.ApplyAttr( nCol, nRow, aRange.aStart.Tab(),
2305 SfxUInt32Item( ATTR_VALUE_FORMAT, nKey ) );
2308 else if( cTag == 'P' )
2310 if ( bData && *p == ';' && *(p+1) == 'P' )
2312 OUString aCode( p+2 );
2314 sal_uInt32 nKey;
2315 sal_Int32 nCheckPos;
2317 if (aCode.getLength() > 2048 && comphelper::IsFuzzing())
2319 // consider an excessive length as a failure when fuzzing
2320 nCheckPos = 1;
2322 else
2324 // unescape doubled semicolons
2325 aCode = aCode.replaceAll(";;", ";");
2326 // get rid of Xcl escape characters
2327 aCode = aCode.replaceAll("\x1b", "");
2328 SvNumFormatType nType;
2329 rDoc.GetFormatTable()->PutandConvertEntry( aCode, nCheckPos, nType, nKey,
2330 LANGUAGE_ENGLISH_US, ScGlobal::eLnge, false);
2333 if ( nCheckPos )
2334 nKey = 0;
2336 aFormats.push_back( nKey );
2339 else if (cTag == 'I' && *p == 'D' && aLine.getLength() > 4)
2341 aLine = aLine.copy(4);
2342 if (aLine == "CALCOOO32")
2343 eVersion = SylkVersion::OOO32;
2344 else if (aLine == "SCALC3")
2345 eVersion = SylkVersion::SCALC3;
2346 bMyDoc = (eVersion <= SylkVersion::OWN);
2348 else if( cTag == 'E' ) // End
2349 break;
2351 if( !bData )
2353 aRange.aEnd.SetCol( nEndCol );
2354 aRange.aEnd.SetRow( nEndRow );
2355 bOk = StartPaste();
2356 bData = true;
2358 else
2359 break;
2362 EndPaste();
2363 return bOk;
2366 bool ScImportExport::Doc2Sylk( SvStream& rStrm )
2368 SCCOL nCol;
2369 SCROW nRow;
2370 SCCOL nStartCol = aRange.aStart.Col();
2371 SCROW nStartRow = aRange.aStart.Row();
2372 SCCOL nEndCol = aRange.aEnd.Col();
2373 SCROW nEndRow = aRange.aEnd.Row();
2374 OUString aCellStr;
2375 OUString aValStr;
2376 rStrm.WriteUnicodeOrByteText(u"ID;PCALCOOO32");
2377 endlub(rStrm);
2379 for (nRow = nStartRow; nRow <= nEndRow; nRow++)
2381 for (nCol = nStartCol; nCol <= nEndCol; nCol++)
2383 OUString aBufStr;
2384 double nVal;
2385 bool bForm = false;
2386 SCROW r = nRow - nStartRow + 1;
2387 SCCOL c = nCol - nStartCol + 1;
2388 ScRefCellValue aCell(rDoc, ScAddress(nCol, nRow, aRange.aStart.Tab()));
2389 CellType eType = aCell.getType();
2390 switch( eType )
2392 case CELLTYPE_FORMULA:
2393 bForm = bFormulas;
2394 if( rDoc.HasValueData( nCol, nRow, aRange.aStart.Tab()) )
2395 goto hasvalue;
2396 else
2397 goto hasstring;
2399 case CELLTYPE_VALUE:
2400 hasvalue:
2401 nVal = rDoc.GetValue( nCol, nRow, aRange.aStart.Tab() );
2403 aValStr = ::rtl::math::doubleToUString( nVal,
2404 rtl_math_StringFormat_Automatic,
2405 rtl_math_DecimalPlaces_Max, '.', true );
2407 aBufStr = "C;X"
2408 + OUString::number( c )
2409 + ";Y"
2410 + OUString::number( r )
2411 + ";K"
2412 + aValStr;
2413 rStrm.WriteUnicodeOrByteText(aBufStr);
2414 goto checkformula;
2416 case CELLTYPE_STRING:
2417 case CELLTYPE_EDIT:
2418 hasstring:
2419 aCellStr = rDoc.GetString(nCol, nRow, aRange.aStart.Tab());
2420 aCellStr = aCellStr.replaceAll("\n", SYLK_LF);
2422 aBufStr = "C;X"
2423 + OUString::number( c )
2424 + ";Y"
2425 + OUString::number( r )
2426 + ";K";
2427 rStrm.WriteUnicodeOrByteText(aBufStr);
2428 lcl_WriteString( rStrm, aCellStr, '"', ';' );
2430 checkformula:
2431 if( bForm )
2433 const ScFormulaCell* pFCell = aCell.getFormula();
2434 switch ( pFCell->GetMatrixFlag() )
2436 case ScMatrixMode::Reference :
2437 aCellStr.clear();
2438 break;
2439 default:
2440 aCellStr = pFCell->GetFormula( formula::FormulaGrammar::GRAM_PODF_A1);
2441 /* FIXME: do we want GRAM_ODFF_A1 instead? At
2442 * the end it probably should be
2443 * GRAM_ODFF_R1C1, since R1C1 is what Excel
2444 * writes in SYLK, or even better
2445 * GRAM_ENGLISH_XL_R1C1. */
2447 if ( pFCell->GetMatrixFlag() != ScMatrixMode::NONE &&
2448 aCellStr.startsWith("{") &&
2449 aCellStr.endsWith("}") )
2450 { // cut off matrix {} characters
2451 aCellStr = aCellStr.copy(1, aCellStr.getLength()-2);
2453 if ( aCellStr[0] == '=' )
2454 aCellStr = aCellStr.copy(1);
2455 OUString aPrefix;
2456 switch ( pFCell->GetMatrixFlag() )
2458 case ScMatrixMode::Formula :
2459 { // diff expression with 'M' M$-extension
2460 SCCOL nC;
2461 SCROW nR;
2462 pFCell->GetMatColsRows( nC, nR );
2463 nC += c - 1;
2464 nR += r - 1;
2465 aPrefix = ";R"
2466 + OUString::number( nR )
2467 + ";C"
2468 + OUString::number( nC )
2469 + ";M";
2471 break;
2472 case ScMatrixMode::Reference :
2473 { // diff expression with 'I' M$-extension
2474 ScAddress aPos;
2475 (void)pFCell->GetMatrixOrigin( rDoc, aPos );
2476 aPrefix = ";I;R"
2477 + OUString::number( aPos.Row() - nStartRow + 1 )
2478 + ";C"
2479 + OUString::number( aPos.Col() - nStartCol + 1 );
2481 break;
2482 default:
2483 // formula Expression
2484 aPrefix = ";E";
2486 rStrm.WriteUnicodeOrByteText(aPrefix);
2487 if ( !aCellStr.isEmpty() )
2488 lcl_WriteString( rStrm, aCellStr, 0, ';' );
2490 endlub(rStrm);
2491 break;
2493 default:
2495 // added to avoid warnings
2500 rStrm.WriteUnicodeOrByteText(u"E");
2501 endlub(rStrm);
2502 return rStrm.GetError() == ERRCODE_NONE;
2505 bool ScImportExport::Doc2HTML( SvStream& rStream, const OUString& rBaseURL )
2507 std::optional<SvFileStream> oStream;
2508 char* pEnv = getenv("SC_DEBUG_HTML_COPY_TO");
2509 if (pEnv)
2511 OUString aURL;
2512 osl::FileBase::getFileURLFromSystemPath(OUString::fromUtf8(pEnv), aURL);
2513 oStream.emplace(aURL, StreamMode::WRITE);
2515 SvStream& rStrm = pEnv ? *oStream : rStream;
2516 // rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options
2517 ScFormatFilter::Get().ScExportHTML( rStrm, rBaseURL, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW, bAll,
2518 aStreamPath, aNonConvertibleChars, maFilterOptions );
2519 return rStrm.GetError() == ERRCODE_NONE;
2522 bool ScImportExport::Doc2RTF( SvStream& rStrm )
2524 // rtl_TextEncoding is ignored in ScExportRTF
2525 ScFormatFilter::Get().ScExportRTF( rStrm, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW );
2526 return rStrm.GetError() == ERRCODE_NONE;
2529 bool ScImportExport::Doc2Dif( SvStream& rStrm )
2531 // for DIF in the clipboard, IBM_850 is always used
2532 ScFormatFilter::Get().ScExportDif( rStrm, &rDoc, aRange, RTL_TEXTENCODING_IBM_850 );
2533 return true;
2536 bool ScImportExport::Dif2Doc( SvStream& rStrm )
2538 SCTAB nTab = aRange.aStart.Tab();
2539 ScDocumentUniquePtr pImportDoc( new ScDocument( SCDOCMODE_UNDO ) );
2540 pImportDoc->InitUndo( rDoc, nTab, nTab );
2542 // for DIF in the clipboard, IBM_850 is always used
2543 ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 );
2545 SCCOL nEndCol;
2546 SCROW nEndRow;
2547 pImportDoc->GetCellArea( nTab, nEndCol, nEndRow );
2548 // if there are no cells in the imported content, nEndCol/nEndRow may be before the start
2549 if ( nEndCol < aRange.aStart.Col() )
2550 nEndCol = aRange.aStart.Col();
2551 if ( nEndRow < aRange.aStart.Row() )
2552 nEndRow = aRange.aStart.Row();
2553 aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab );
2555 bool bOk = StartPaste();
2556 if (bOk)
2558 InsertDeleteFlags nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2559 rDoc.DeleteAreaTab( aRange, nFlags );
2560 pImportDoc->CopyToDocument(aRange, nFlags, false, rDoc);
2561 EndPaste();
2564 return bOk;
2567 bool ScImportExport::RTF2Doc( SvStream& rStrm, const OUString& rBaseURL )
2569 std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateRTFImport( &rDoc, aRange );
2570 if (!pImp)
2571 return false;
2572 pImp->Read( rStrm, rBaseURL );
2573 aRange = pImp->GetRange();
2575 bool bOk = StartPaste();
2576 if (bOk)
2578 InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2579 rDoc.DeleteAreaTab( aRange, nFlags );
2580 pImp->WriteToDocument();
2581 EndPaste();
2583 return bOk;
2586 bool ScImportExport::HTML2Doc( SvStream& rStrm, const OUString& rBaseURL )
2588 std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateHTMLImport( &rDoc, rBaseURL, aRange);
2589 if (!pImp)
2590 return false;
2592 // If this is set, read from this file, instead of the real clipboard during paste.
2593 char* pEnv = getenv("SC_DEBUG_HTML_PASTE_FROM");
2594 if (pEnv)
2596 OUString aURL;
2597 osl::FileBase::getFileURLFromSystemPath(OUString::fromUtf8(pEnv), aURL);
2598 SvFileStream aStream(aURL, StreamMode::READ);
2599 pImp->Read( aStream, rBaseURL );
2601 else
2603 pImp->Read( rStrm, rBaseURL );
2606 aRange = pImp->GetRange();
2608 bool bOk = StartPaste();
2609 if (bOk)
2611 // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in
2612 // a Draw Layer but no Draw View -> create Draw Layer and View here
2613 if (pDocSh)
2614 pDocSh->MakeDrawLayer();
2616 InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2617 rDoc.DeleteAreaTab( aRange, nFlags );
2619 if (pExtOptions)
2621 // Pick up import options if available.
2622 LanguageType eLang = pExtOptions->GetLanguage();
2623 SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eLang);
2624 bool bSpecialNumber = pExtOptions->IsDetectSpecialNumber();
2625 bool bScientificNumber = pExtOptions->IsDetectScientificNumber();
2626 pImp->WriteToDocument(false, 1.0, &aNumFormatter, bSpecialNumber, bScientificNumber);
2628 else
2629 // Regular import, with no options.
2630 pImp->WriteToDocument();
2632 EndPaste();
2634 return bOk;
2637 #ifndef DISABLE_DYNLOADING
2639 extern "C" { static void thisModule() {} }
2641 #else
2643 extern "C" {
2644 ScFormatFilterPlugin* ScFilterCreate();
2647 #endif
2649 typedef ScFormatFilterPlugin * (*FilterFn)();
2650 ScFormatFilterPlugin &ScFormatFilter::Get()
2652 static ScFormatFilterPlugin *plugin = []()
2654 #ifndef DISABLE_DYNLOADING
2655 OUString sFilterLib(SVLIBRARY("scfilt"));
2656 static ::osl::Module aModule;
2657 bool bLoaded = aModule.is();
2658 if (!bLoaded)
2659 bLoaded = aModule.loadRelative(&thisModule, sFilterLib);
2660 if (!bLoaded)
2661 bLoaded = aModule.load(sFilterLib);
2662 if (bLoaded)
2664 oslGenericFunction fn = aModule.getFunctionSymbol( "ScFilterCreate" );
2665 if (fn != nullptr)
2666 return reinterpret_cast<FilterFn>(fn)();
2668 assert(false);
2669 return static_cast<ScFormatFilterPlugin*>(nullptr);
2670 #else
2671 return ScFilterCreate();
2672 #endif
2673 }();
2675 return *plugin;
2678 // Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
2679 // array.
2680 static const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
2681 sal_Unicode c )
2683 while (*pStr)
2685 if (*pStr == c)
2686 return pStr;
2687 ++pStr;
2689 return nullptr;
2692 ScImportStringStream::ScImportStringStream( const OUString& rStr )
2693 : SvMemoryStream( const_cast<sal_Unicode *>(rStr.getStr()),
2694 rStr.getLength() * sizeof(sal_Unicode), StreamMode::READ)
2696 SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
2697 #ifdef OSL_BIGENDIAN
2698 SetEndian(SvStreamEndian::BIG);
2699 #else
2700 SetEndian(SvStreamEndian::LITTLE);
2701 #endif
2704 OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak,
2705 OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep, sal_uInt32 nMaxSourceLines )
2707 enum RetryState
2709 FORBID,
2710 ALLOW,
2711 RETRY,
2712 RETRIED
2713 } eRetryState = (bEmbeddedLineBreak && rcDetectSep == 0 ? RetryState::ALLOW : RetryState::FORBID);
2715 sal_uInt64 nStreamPos = (eRetryState == RetryState::ALLOW ? rStream.Tell() : 0);
2717 Label_RetryWithNewSep:
2719 if (eRetryState == RetryState::RETRY)
2721 eRetryState = RetryState::RETRIED;
2722 rStream.Seek( nStreamPos);
2725 OUString aStr;
2726 rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2728 if (bEmbeddedLineBreak)
2730 sal_Int32 nFirstLineLength = aStr.getLength();
2731 sal_uInt64 nFirstLineStreamPos = rStream.Tell();
2732 sal_uInt32 nLine = 0;
2734 const sal_Unicode* pSeps = rFieldSeparators.getStr();
2736 QuoteType eQuoteState = FIELDEND_QUOTE;
2737 bool bFieldStart = true;
2739 sal_Int32 nLastOffset = 0;
2740 sal_Int32 nQuotes = 0;
2741 while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit)
2743 const sal_Unicode * p = aStr.getStr() + nLastOffset;
2744 const sal_Unicode * const pStop = aStr.getStr() + aStr.getLength();
2745 while (p < pStop)
2747 if (!*p)
2749 // Skip embedded null-characters. They don't change
2750 // anything and are handled at a higher level.
2751 ++p;
2752 continue;
2755 if (nQuotes)
2757 if (*p == cFieldQuote)
2759 if (bFieldStart)
2761 ++nQuotes;
2762 bFieldStart = false;
2763 eQuoteState = FIELDSTART_QUOTE;
2764 nFirstLineLength = aStr.getLength();
2765 nFirstLineStreamPos = rStream.Tell();
2767 // Do not detect a FIELDSTART_QUOTE if not in
2768 // bFieldStart mode, in which case for unquoted content
2769 // we are in FIELDEND_QUOTE state.
2770 else if (eQuoteState != FIELDEND_QUOTE)
2772 eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep);
2774 if (eRetryState == RetryState::ALLOW && rcDetectSep)
2776 eRetryState = RetryState::RETRY;
2777 rFieldSeparators += OUStringChar(rcDetectSep);
2778 pSeps = rFieldSeparators.getStr();
2779 goto Label_RetryWithNewSep;
2782 // DONTKNOW_QUOTE is an embedded unescaped quote we
2783 // don't count for pairing.
2784 if (eQuoteState != DONTKNOW_QUOTE)
2785 ++nQuotes;
2788 else if (eQuoteState == FIELDEND_QUOTE)
2790 if (bFieldStart)
2791 // If blank is a separator it starts a field, if it
2792 // is not and thus maybe leading before quote we
2793 // are still at start of field regarding quotes.
2794 bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2795 else
2796 bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2799 else
2801 if (*p == cFieldQuote && bFieldStart)
2803 nQuotes = 1;
2804 eQuoteState = FIELDSTART_QUOTE;
2805 bFieldStart = false;
2806 nFirstLineLength = aStr.getLength();
2807 nFirstLineStreamPos = rStream.Tell();
2809 else if (eQuoteState == FIELDEND_QUOTE)
2811 // This also skips leading blanks at beginning of line
2812 // if followed by a quote. It's debatable whether we
2813 // actually want that or not, but congruent with what
2814 // ScanNextFieldFromString() does.
2815 if (bFieldStart)
2816 bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2817 else
2818 bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2821 // A quote character inside a field content does not start
2822 // a quote.
2823 ++p;
2826 if ((nQuotes & 1) == 0)
2827 // We still have a (theoretical?) problem here if due to
2828 // nArbitraryLineLengthLimit (or nMaxSourceLines below) we
2829 // split a string right between a doubled quote pair.
2830 break;
2831 else if (eQuoteState == DONTKNOW_QUOTE)
2832 // A single unescaped quote somewhere in a quote started
2833 // field, most likely that was not meant to have embedded
2834 // linefeeds either.
2835 break;
2836 else if (++nLine >= nMaxSourceLines && nMaxSourceLines > 0)
2837 // Unconditionally increment nLine even if nMaxSourceLines==0
2838 // so it can be observed in debugger.
2839 break;
2840 else
2842 nLastOffset = aStr.getLength();
2843 OUString aNext;
2844 rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2845 if (!rStream.eof())
2846 aStr += "\n" + aNext;
2849 if (nQuotes & 1)
2851 // No closing quote at all. A single quote at field start => no
2852 // embedded linefeeds for that field, take only first logical line.
2853 aStr = aStr.copy( 0, nFirstLineLength);
2854 rStream.Seek( nFirstLineStreamPos);
2857 return aStr;
2860 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */