1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <comphelper/processfactory.hxx>
21 #include <i18nlangtag/languagetag.hxx>
22 #include <i18nutil/unicode.hxx>
23 #include <sot/formats.hxx>
24 #include <sfx2/mieclip.hxx>
25 #include <com/sun/star/i18n/CalendarFieldIndex.hpp>
26 #include <sal/log.hxx>
27 #include <unotools/charclass.hxx>
28 #include <osl/module.hxx>
29 #include <o3tl/string_view.hxx>
33 #include <undoblk.hxx>
34 #include <rangenam.hxx>
35 #include <tabvwsh.hxx>
37 #include <asciiopt.hxx>
38 #include <formulacell.hxx>
39 #include <cellform.hxx>
40 #include <progress.hxx>
41 #include <scitems.hxx>
42 #include <editable.hxx>
43 #include <compiler.hxx>
44 #include <warnbox.hxx>
45 #include <clipparam.hxx>
47 #include <editutil.hxx>
48 #include <patattr.hxx>
49 #include <docpool.hxx>
50 #include <stringutil.hxx>
51 #include <cellvalue.hxx>
52 #include <tokenarray.hxx>
53 #include <documentimport.hxx>
54 #include <refundo.hxx>
55 #include <mtvelements.hxx>
57 #include <globstr.hrc>
58 #include <scresid.hxx>
59 #include <o3tl/safeint.hxx>
60 #include <tools/svlibrary.h>
61 #include <comphelper/configuration.hxx>
62 #include <vcl/svapp.hxx>
63 #include <vcl/weld.hxx>
64 #include <editeng/editobj.hxx>
65 #include <svl/numformat.hxx>
66 #include <rtl/character.hxx>
67 #include <rtl/math.hxx>
68 #include <sax/tools/converter.hxx>
71 #include <string_view>
73 #include <unicode/uchar.h>
75 #include <osl/endian.h>
76 #include <osl/file.hxx>
78 // We don't want to end up with 2GB read in one line just because of malformed
79 // multiline fields, so chop it _somewhere_, which is twice supported columns
80 // times arbitrary maximum cell content length, 2*1024*64K=128M, and because
81 // it's sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of
83 constexpr sal_Int32 nArbitraryCellLengthLimit
= SAL_MAX_UINT16
;
84 constexpr sal_Int32 nArbitraryLineLengthLimit
= 2 * MAXCOLCOUNT
* nArbitraryCellLengthLimit
;
88 const char SYLK_LF
[] = "\x1b :";
93 enum class SylkVersion
95 SCALC3
, // Wrote wrongly quoted strings and unescaped semicolons.
96 OOO32
, // Correct strings, plus multiline content.
97 OWN
, // Place our new versions, if any, before this value.
98 OTHER
// Assume that aliens wrote correct strings.
103 // Whole document without Undo
104 ScImportExport::ScImportExport( ScDocument
& r
)
105 : pDocSh( r
.GetDocumentShell() ), rDoc( r
),
106 nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc
.MaxRow() : SCROWS32K
),
107 cSep( '\t' ), cStr( '"' ),
108 bFormulas( false ), bIncludeFiltered( true ),
109 bAll( true ), bSingle( true ), bUndo( false ),
110 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
111 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
114 pExtOptions
= nullptr;
117 // Insert am current cell without range(es)
118 ScImportExport::ScImportExport( ScDocument
& r
, const ScAddress
& rPt
)
119 : pDocSh( r
.GetDocumentShell() ), rDoc( r
),
121 nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc
.MaxRow() : SCROWS32K
),
122 cSep( '\t' ), cStr( '"' ),
123 bFormulas( false ), bIncludeFiltered( true ),
124 bAll( false ), bSingle( true ), bUndo( pDocSh
!= nullptr ),
125 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
126 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
129 pExtOptions
= nullptr;
132 // ctor with a range is only used for export
133 //! ctor with a string (and bSingle=true) is also used for DdeSetData
134 ScImportExport::ScImportExport( ScDocument
& r
, const ScRange
& rRange
)
135 : pDocSh( r
.GetDocumentShell() ), rDoc( r
),
137 nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc
.MaxRow() : SCROWS32K
),
138 cSep( '\t' ), cStr( '"' ),
139 bFormulas( false ), bIncludeFiltered( true ),
140 bAll( false ), bSingle( false ), bUndo( pDocSh
!= nullptr ),
141 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
142 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
145 pExtOptions
= nullptr;
146 // Only one sheet (table) supported
147 aRange
.aEnd
.SetTab( aRange
.aStart
.Tab() );
150 // Evaluate input string - either range, cell or the whole document (when error)
151 // If a View exists, the TabNo of the view will be used.
152 ScImportExport::ScImportExport( ScDocument
& r
, const OUString
& rPos
)
153 : pDocSh( r
.GetDocumentShell() ), rDoc( r
),
154 nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc
.MaxRow() : SCROWS32K
),
155 cSep( '\t' ), cStr( '"' ),
156 bFormulas( false ), bIncludeFiltered( true ),
157 bAll( false ), bSingle( true ), bUndo( pDocSh
!= nullptr ),
158 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
159 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
162 pExtOptions
= nullptr;
164 SCTAB nTab
= ScDocShell::GetCurTab();
165 aRange
.aStart
.SetTab( nTab
);
166 OUString
aPos( rPos
);
168 ScRangeName
* pRange
= rDoc
.GetRangeName();
171 const ScRangeData
* pData
= pRange
->findByUpperName(ScGlobal::getCharClass().uppercase(aPos
));
174 if( pData
->HasType( ScRangeData::Type::RefArea
)
175 || pData
->HasType( ScRangeData::Type::AbsArea
)
176 || pData
->HasType( ScRangeData::Type::AbsPos
) )
178 aPos
= pData
->GetSymbol();
182 formula::FormulaGrammar::AddressConvention eConv
= rDoc
.GetAddressConvention();
184 if (aRange
.Parse(aPos
, rDoc
, eConv
) & ScRefFlags::VALID
)
187 else if (aRange
.aStart
.Parse(aPos
, rDoc
, eConv
) & ScRefFlags::VALID
)
188 aRange
.aEnd
= aRange
.aStart
;
193 ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE
199 void ScImportExport::SetExtOptions( const ScAsciiOptions
& rOpt
)
204 pExtOptions
.reset(new ScAsciiOptions( rOpt
));
208 cSep
= ScAsciiOptions::GetWeightedFieldSep( rOpt
.GetFieldSeps(), false);
209 cStr
= rOpt
.GetTextSep();
212 void ScImportExport::SetFilterOptions(const OUString
& rFilterOptions
)
214 maFilterOptions
= rFilterOptions
;
217 bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat
)
219 return nFormat
== SotClipboardFormatId::STRING
220 || nFormat
== SotClipboardFormatId::STRING_TSVC
221 || nFormat
== SotClipboardFormatId::SYLK
222 || nFormat
== SotClipboardFormatId::LINK
223 || nFormat
== SotClipboardFormatId::HTML
224 || nFormat
== SotClipboardFormatId::HTML_SIMPLE
225 || nFormat
== SotClipboardFormatId::DIF
;
229 bool ScImportExport::StartPaste()
233 ScEditableTester
aTester( rDoc
, aRange
, sc::EditAction::Unknown
);
234 if ( !aTester
.IsEditable() )
236 std::unique_ptr
<weld::MessageDialog
> xInfoBox(Application::CreateMessageDialog(ScDocShell::GetActiveDialogParent(),
237 VclMessageType::Info
, VclButtonsType::Ok
,
238 ScResId(aTester
.GetMessageId())));
243 if( bUndo
&& pDocSh
&& rDoc
.IsUndoEnabled())
245 pUndoDoc
.reset(new ScDocument( SCDOCMODE_UNDO
));
246 pUndoDoc
->InitUndo( rDoc
, aRange
.aStart
.Tab(), aRange
.aEnd
.Tab() );
247 rDoc
.CopyToDocument(aRange
, InsertDeleteFlags::ALL
| InsertDeleteFlags::NOCAPTIONS
, false, *pUndoDoc
);
252 // Create Undo/Redo actions, Invalidate/Repaint
253 void ScImportExport::EndPaste(bool bAutoRowHeight
)
255 bool bHeight
= bAutoRowHeight
&& pDocSh
&& pDocSh
->AdjustRowHeight(
256 aRange
.aStart
.Row(), aRange
.aEnd
.Row(), aRange
.aStart
.Tab() );
258 if( pUndoDoc
&& rDoc
.IsUndoEnabled() && pDocSh
)
260 ScDocumentUniquePtr
pRedoDoc(new ScDocument( SCDOCMODE_UNDO
));
261 pRedoDoc
->InitUndo( rDoc
, aRange
.aStart
.Tab(), aRange
.aEnd
.Tab() );
262 rDoc
.CopyToDocument(aRange
, InsertDeleteFlags::ALL
| InsertDeleteFlags::NOCAPTIONS
, false, *pRedoDoc
);
263 ScMarkData
aDestMark(pRedoDoc
->GetSheetLimits());
264 aDestMark
.SetMarkArea(aRange
);
265 pDocSh
->GetUndoManager()->AddUndoAction(
266 std::make_unique
<ScUndoPaste
>(pDocSh
, aRange
, aDestMark
, std::move(pUndoDoc
), std::move(pRedoDoc
), InsertDeleteFlags::ALL
, nullptr));
272 pDocSh
->PostPaint( aRange
, PaintPartFlags::Grid
);
273 pDocSh
->SetDocumentModified();
275 ScTabViewShell
* pViewSh
= ScTabViewShell::GetActiveViewShell();
277 pViewSh
->UpdateInputHandler();
281 bool ScImportExport::ExportData( std::u16string_view rMimeType
,
282 css::uno::Any
& rValue
)
284 SvMemoryStream aStrm
;
285 SotClipboardFormatId fmtId
= SotExchange::GetFormatIdFromMimeType(rMimeType
);
286 if (fmtId
== SotClipboardFormatId::STRING
)
287 aStrm
.SetStreamCharSet(RTL_TEXTENCODING_UNICODE
);
288 // mba: no BaseURL for data exchange
289 if (ExportStream(aStrm
, OUString(), fmtId
))
291 if (fmtId
== SotClipboardFormatId::STRING
)
293 assert(aStrm
.TellEnd() % sizeof(sal_Unicode
) == 0);
294 rValue
<<= OUString(static_cast<const sal_Unicode
*>(aStrm
.GetData()),
295 aStrm
.TellEnd() / sizeof(sal_Unicode
));
300 rValue
<<= css::uno::Sequence
<sal_Int8
>(static_cast<sal_Int8
const*>(aStrm
.GetData()),
308 bool ScImportExport::ImportString( const OUString
& rText
, SotClipboardFormatId nFmt
)
312 // formats supporting unicode
313 case SotClipboardFormatId::STRING
:
314 case SotClipboardFormatId::STRING_TSVC
:
316 ScImportStringStream
aStrm( rText
);
317 return ImportStream( aStrm
, OUString(), nFmt
);
318 // ImportStream must handle RTL_TEXTENCODING_UNICODE
322 rtl_TextEncoding eEnc
= osl_getThreadTextEncoding();
323 OString
aTmp( rText
.getStr(), rText
.getLength(), eEnc
);
324 SvMemoryStream
aStrm( const_cast<char *>(aTmp
.getStr()), aTmp
.getLength() * sizeof(char), StreamMode::READ
);
325 aStrm
.SetStreamCharSet( eEnc
);
326 SetNoEndianSwap( aStrm
); //! no swapping in memory
327 return ImportStream( aStrm
, OUString(), nFmt
);
332 bool ScImportExport::ExportString( OUString
& rText
, SotClipboardFormatId nFmt
)
334 if ( nFmt
!= SotClipboardFormatId::STRING
&& nFmt
!= SotClipboardFormatId::STRING_TSVC
)
336 SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]");
337 rtl_TextEncoding eEnc
= osl_getThreadTextEncoding();
339 bool bOk
= ExportByteString( aTmp
, eEnc
, nFmt
);
340 rText
= OStringToOUString( aTmp
, eEnc
);
343 // nSizeLimit not needed for OUString
345 SvMemoryStream aStrm
;
346 aStrm
.SetStreamCharSet( RTL_TEXTENCODING_UNICODE
);
347 SetNoEndianSwap( aStrm
); //! no swapping in memory
348 // mba: no BaseURL for data exc
349 if( ExportStream( aStrm
, OUString(), nFmt
) )
351 aStrm
.WriteUInt16( 0 );
352 rText
= OUString( static_cast<const sal_Unicode
*>(aStrm
.GetData()) );
358 // ExportStream must handle RTL_TEXTENCODING_UNICODE
361 bool ScImportExport::ExportByteString( OString
& rText
, rtl_TextEncoding eEnc
, SotClipboardFormatId nFmt
)
363 OSL_ENSURE( eEnc
!= RTL_TEXTENCODING_UNICODE
, "ScImportExport::ExportByteString: Unicode not supported" );
364 if ( eEnc
== RTL_TEXTENCODING_UNICODE
)
365 eEnc
= osl_getThreadTextEncoding();
368 nSizeLimit
= SAL_MAX_UINT16
;
370 SvMemoryStream aStrm
;
371 aStrm
.SetStreamCharSet( eEnc
);
372 SetNoEndianSwap( aStrm
); //! no swapping in memory
373 // mba: no BaseURL for data exchange
374 if( ExportStream( aStrm
, OUString(), nFmt
) )
376 aStrm
.WriteChar( 0 );
377 if( aStrm
.TellEnd() <= nSizeLimit
)
379 rText
= static_cast<const char*>(aStrm
.GetData());
387 bool ScImportExport::ImportStream( SvStream
& rStrm
, const OUString
& rBaseURL
, SotClipboardFormatId nFmt
)
389 if( nFmt
== SotClipboardFormatId::STRING
|| nFmt
== SotClipboardFormatId::STRING_TSVC
)
391 if( ExtText2Doc( rStrm
) ) // evaluate pExtOptions
394 if( nFmt
== SotClipboardFormatId::SYLK
)
396 if( Sylk2Doc( rStrm
) )
399 if( nFmt
== SotClipboardFormatId::DIF
)
401 if( Dif2Doc( rStrm
) )
404 if( nFmt
== SotClipboardFormatId::RTF
|| nFmt
== SotClipboardFormatId::RICHTEXT
)
406 if( RTF2Doc( rStrm
, rBaseURL
) )
409 if( nFmt
== SotClipboardFormatId::LINK
)
410 return true; // Link-Import?
411 if ( nFmt
== SotClipboardFormatId::HTML
)
413 if( HTML2Doc( rStrm
, rBaseURL
) )
416 if ( nFmt
== SotClipboardFormatId::HTML_SIMPLE
)
418 MSE40HTMLClipFormatObj aMSE40ClpObj
; // needed to skip the header data
419 SvStream
* pHTML
= aMSE40ClpObj
.IsValid( rStrm
);
420 if ( pHTML
&& HTML2Doc( *pHTML
, rBaseURL
) )
427 bool ScImportExport::ExportStream( SvStream
& rStrm
, const OUString
& rBaseURL
, SotClipboardFormatId nFmt
)
429 if( nFmt
== SotClipboardFormatId::STRING
|| nFmt
== SotClipboardFormatId::STRING_TSVC
)
431 if( Doc2Text( rStrm
) )
434 if( nFmt
== SotClipboardFormatId::SYLK
)
436 if( Doc2Sylk( rStrm
) )
439 if( nFmt
== SotClipboardFormatId::DIF
)
441 if( Doc2Dif( rStrm
) )
444 if( nFmt
== SotClipboardFormatId::LINK
&& !bAll
)
447 if ( rDoc
.IsClipboard() )
448 aDocName
= ScGlobal::GetClipDocName();
451 ScDocShell
* pShell
= rDoc
.GetDocumentShell();
453 aDocName
= pShell
->GetTitle( SFX_TITLE_FULLNAME
);
456 OSL_ENSURE( !aDocName
.isEmpty(), "ClipBoard document has no name! :-/" );
457 if( !aDocName
.isEmpty() )
459 // Always use Calc A1 syntax for paste link.
461 ScRefFlags nFlags
= ScRefFlags::VALID
| ScRefFlags::TAB_3D
;
463 aRefName
= aRange
.aStart
.Format(nFlags
, &rDoc
, formula::FormulaGrammar::CONV_OOO
);
466 if( aRange
.aStart
.Tab() != aRange
.aEnd
.Tab() )
467 nFlags
|= ScRefFlags::TAB2_3D
;
468 aRefName
= aRange
.Format(rDoc
, nFlags
, formula::FormulaGrammar::CONV_OOO
);
470 OUString aAppName
= Application::GetAppName();
472 // extra bits are used to tell the client to prefer external
474 return TransferableDataHelper::WriteDDELink(rStrm
, aAppName
, aDocName
, aRefName
,
478 if( nFmt
== SotClipboardFormatId::HTML
)
480 if( Doc2HTML( rStrm
, rBaseURL
) )
483 if( nFmt
== SotClipboardFormatId::RTF
|| nFmt
== SotClipboardFormatId::RICHTEXT
)
485 if( Doc2RTF( rStrm
) )
493 // http://www.unicode.org/reports/tr11/
494 sal_Int32
ScImportExport::CountVisualWidth(std::u16string_view rStr
, sal_Int32
& nIdx
, sal_Int32 nMaxWidth
)
496 sal_Int32 nWidth
= 0;
497 while(nIdx
< static_cast<sal_Int32
>(rStr
.size()) && nWidth
< nMaxWidth
)
499 sal_uInt32 nCode
= o3tl::iterateCodePoints(rStr
, &nIdx
);
501 auto nEaWidth
= u_getIntPropertyValue(nCode
, UCHAR_EAST_ASIAN_WIDTH
);
502 if (nEaWidth
== U_EA_FULLWIDTH
|| nEaWidth
== U_EA_WIDE
)
504 else if (!u_getIntPropertyValue(nCode
, UCHAR_DEFAULT_IGNORABLE_CODE_POINT
))
508 if (nIdx
< static_cast<sal_Int32
>(rStr
.size()))
510 sal_Int32 nTmpIdx
= nIdx
;
511 sal_uInt32 nCode
= o3tl::iterateCodePoints(rStr
, &nTmpIdx
);
513 if (u_getIntPropertyValue(nCode
, UCHAR_DEFAULT_IGNORABLE_CODE_POINT
))
519 sal_Int32
ScImportExport::CountVisualWidth(std::u16string_view rStr
)
522 return CountVisualWidth(rStr
, nIdx
, SAL_MAX_INT32
);
525 void ScImportExport::SetNoEndianSwap( SvStream
& rStrm
)
528 rStrm
.SetEndian( SvStreamEndian::BIG
);
530 rStrm
.SetEndian( SvStreamEndian::LITTLE
);
534 static inline bool lcl_isFieldEnd( sal_Unicode c
, const sal_Unicode
* pSeps
)
536 return !c
|| ScGlobal::UnicodeStrChr( pSeps
, c
);
552 /** Determine if *p is a quote that ends a quoted field.
554 Precondition: we are parsing a quoted field already and *p is a quote.
557 FIELDEND_QUOTE if end of field quote
558 DONTKNOW_QUOTE anything else
560 static QuoteType
lcl_isFieldEndQuote( const sal_Unicode
* p
, const sal_Unicode
* pSeps
, sal_Unicode
& rcDetectSep
)
562 // Due to broken CSV generators that don't double embedded quotes check if
563 // a field separator immediately or with trailing spaces follows the quote,
564 // only then end the field, or at end of string.
565 constexpr sal_Unicode cBlank
= ' ';
566 if (p
[1] == cBlank
&& ScGlobal::UnicodeStrChr( pSeps
, cBlank
))
567 return FIELDEND_QUOTE
;
568 // Detect a possible blank separator if it's not already in the list (which
569 // was checked right above for p[1]==cBlank).
570 const bool bBlankSep
= (p
[1] == cBlank
&& !rcDetectSep
&& p
[2] && p
[2] != cBlank
);
571 while (p
[1] == cBlank
)
573 if (lcl_isFieldEnd( p
[1], pSeps
))
574 return FIELDEND_QUOTE
;
575 // Extended separator detection after a closing quote (with or without
576 // blanks). Note that nQuotes is incremented *after* the call so is not yet
577 // even here, and that with separator detection we reach here only if
578 // lcl_isEscapedOrFieldEndQuote() did not already detect FIRST_QUOTE or
579 // SECOND_QUOTE for an escaped embedded quote, thus nQuotes does not have
583 static constexpr sal_Unicode vSep
[] = { ',', '\t', ';' };
584 for (const sal_Unicode c
: vSep
)
589 return FIELDEND_QUOTE
;
593 // Blank separator is least significant, after others.
596 rcDetectSep
= cBlank
;
597 return FIELDEND_QUOTE
;
599 return DONTKNOW_QUOTE
;
602 /** Determine if *p is a quote that is escaped by being doubled or ends a
605 Precondition: *p is a quote.
608 Quote characters encountered so far.
609 Odd (after opening quote) means either no embedded quotes or only quote
611 Even means either not in a quoted field or already one quote
612 encountered, the first of a pair.
615 FIELDSTART_QUOTE if first quote in a field, either starting content or
616 embedded so caller should check beforehand.
617 FIRST_QUOTE if first of a doubled quote
618 SECOND_QUOTE if second of a doubled quote
619 FIELDEND_QUOTE if end of field quote
620 DONTKNOW_QUOTE if an unescaped quote we don't consider as end of field,
621 do not increment nQuotes in caller then!
623 static QuoteType
lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes
, const sal_Unicode
* p
,
624 const sal_Unicode
* pSeps
, sal_Unicode cStr
, sal_Unicode
& rcDetectSep
)
626 if ((nQuotes
& 1) == 0)
632 SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?");
633 return FIELDSTART_QUOTE
;
638 return lcl_isFieldEndQuote( p
, pSeps
, rcDetectSep
);
641 /** Append characters of [p1,p2) to rField.
643 @returns TRUE if ok; FALSE if data overflow, truncated
645 static bool lcl_appendLineData( OUString
& rField
, const sal_Unicode
* p1
, const sal_Unicode
* p2
)
647 if (rField
.getLength() + (p2
- p1
) <= nArbitraryCellLengthLimit
)
649 rField
+= std::u16string_view( p1
, p2
- p1
);
654 SAL_WARN( "sc", "lcl_appendLineData: data overflow");
655 rField
+= std::u16string_view( p1
, nArbitraryCellLengthLimit
- rField
.getLength() );
662 enum class DoubledQuoteMode
664 KEEP_ALL
, // both are taken, additionally start and end quote are included in string
665 ESCAPE
, // escaped quote, one is taken, one ignored
670 /** Scan for a quoted string.
672 Precondition: initial current position *p is a cStr quote.
674 For DoubledQuoteMode::ESCAPE, if after the closing quote there is a field
675 end (with or without trailing blanks and as determined by
676 lcl_isFieldEndQuote()), then the content is appended to rField with quotes
677 processed and removed. Else if no field end after the quoted string was
678 detected, nothing is appended and processing continues and is repeated
679 until the next quote. If no closing quote at a field end was found at all,
680 nothing is appended and the initial position is returned and caller has to
681 decide, usually just taking all as literal data.
683 For DoubledQuoteMode::KEEP_ALL, the string up to and including the closing
684 quote is appended to rField and the next position returned, regardless
685 whether there is a field separator following or not.
688 static const sal_Unicode
* lcl_ScanString( const sal_Unicode
* p
, OUString
& rField
,
689 const sal_Unicode
* pSeps
, sal_Unicode cStr
, DoubledQuoteMode eMode
, bool& rbOverflowCell
)
692 bool bClosingQuote
= (eMode
== DoubledQuoteMode::KEEP_ALL
);
693 const sal_Unicode
* const pStart
= p
;
694 if (eMode
!= DoubledQuoteMode::KEEP_ALL
)
695 p
++; //! jump over opening quote
700 const sal_Unicode
* p0
= p
;
705 // Encountering end of data after an opening quote is not a
706 // quoted string, ReadCsvLine() concatenated lines with '\n'
707 // for a properly quoted embedded linefeed.
708 if (eMode
== DoubledQuoteMode::KEEP_ALL
)
709 // Caller would append that data anyway, so we can do it
720 // break or continue for loop
721 if (eMode
== DoubledQuoteMode::ESCAPE
)
723 sal_Unicode cDetectSep
= 0xffff; // No separator detection here.
724 if (lcl_isFieldEndQuote( p
-1, pSeps
, cDetectSep
) == FIELDEND_QUOTE
)
726 bClosingQuote
= true;
735 // doubled quote char
738 case DoubledQuoteMode::KEEP_ALL
:
739 p
++; // both for us (not breaking for-loop)
741 case DoubledQuoteMode::ESCAPE
:
742 p
++; // one for us (breaking for-loop)
743 bCont
= true; // and more
746 if ( eMode
== DoubledQuoteMode::ESCAPE
)
754 if (!lcl_appendLineData( aString
, p0
, ((eMode
!= DoubledQuoteMode::KEEP_ALL
&& (*p
|| *(p
-1) == cStr
)) ? p
-1 : p
)))
755 rbOverflowCell
= true;
762 if (!aString
.isEmpty())
768 static void lcl_UnescapeSylk( OUString
& rString
, SylkVersion eVersion
)
770 // Older versions didn't escape the semicolon.
771 // Older versions quoted the string and doubled embedded quotes, but not
772 // the semicolons, which was plain wrong.
773 if (eVersion
>= SylkVersion::OOO32
)
774 rString
= rString
.replaceAll(";;", ";");
776 rString
= rString
.replaceAll("\"\"", "\"");
778 rString
= rString
.replaceAll(SYLK_LF
, "\n");
781 static const sal_Unicode
* lcl_ScanSylkString( const sal_Unicode
* p
,
782 OUString
& rString
, SylkVersion eVersion
)
784 const sal_Unicode
* pStartQuote
= p
;
785 const sal_Unicode
* pEndQuote
= nullptr;
791 if (eVersion
>= SylkVersion::OOO32
)
797 p
+= 2; // escaped ';'
811 else if (*(p
+1) == ';')
817 pEndQuote
= p
; // Take all data as string.
818 rString
+= std::u16string_view(pStartQuote
+ 1, pEndQuote
- pStartQuote
- 1 );
819 lcl_UnescapeSylk( rString
, eVersion
);
823 static const sal_Unicode
* lcl_ScanSylkFormula( const sal_Unicode
* p
,
824 OUString
& rString
, SylkVersion eVersion
)
826 const sal_Unicode
* pStart
= p
;
827 if (eVersion
>= SylkVersion::OOO32
)
840 rString
+= std::u16string_view( pStart
, p
- pStart
);
841 lcl_UnescapeSylk( rString
, eVersion
);
845 // Nasty. If in old versions the formula contained a semicolon, it was
846 // quoted and embedded quotes were doubled, but semicolons were not. If
847 // there was no semicolon, it could still contain quotes and doubled
848 // embedded quotes if it was something like ="a""b", which was saved as
849 // E"a""b" as is and has to be preserved, even if older versions
850 // couldn't even load it correctly. However, theoretically another
851 // field might follow and thus the line contain a semicolon again, such
852 // as ...;E"a""b";...
853 bool bQuoted
= false;
856 // May be a quoted expression or just a string constant expression
865 break; // closing '"', had no ';' yet
869 bQuoted
= true; // ';' within quoted expression
876 p
= lcl_ScanSylkString( p
, rString
, eVersion
);
879 while (*p
&& *p
!= ';')
881 rString
+= std::u16string_view( pStart
, p
- pStart
);
887 static void lcl_WriteString( SvStream
& rStrm
, OUString
& rString
, sal_Unicode cQuote
, sal_Unicode cEsc
)
891 // the goal is to replace cStr by cStr+cStr
892 OUString
strFrom(cEsc
);
893 OUString strTo
= strFrom
+ strFrom
;
894 rString
= rString
.replaceAll(strFrom
, strTo
);
899 rString
= OUStringChar(cQuote
) + rString
+ OUStringChar(cQuote
);
902 rStrm
.WriteUnicodeOrByteText(rString
);
905 bool ScImportExport::Text2Doc( SvStream
& rStrm
)
909 sal_Unicode pSeps
[2];
913 ScSetStringParam aSetStringParam
;
914 aSetStringParam
.mbCheckLinkFormula
= true;
916 SCCOL nStartCol
= aRange
.aStart
.Col();
917 SCROW nStartRow
= aRange
.aStart
.Row();
918 SCCOL nEndCol
= aRange
.aEnd
.Col();
919 SCROW nEndRow
= aRange
.aEnd
.Row();
920 sal_uInt64 nOldPos
= rStrm
.Tell();
921 rStrm
.StartReadingUnicodeText( rStrm
.GetStreamCharSet() );
922 bool bData
= !bSingle
;
930 SCROW nRow
= nStartRow
;
931 rStrm
.Seek( nOldPos
);
934 rStrm
.ReadUniOrByteStringLine( aLine
, rStrm
.GetStreamCharSet(), nArbitraryLineLengthLimit
);
935 // tdf#125440 When inserting tab separated string, consider quotes as field markers
936 DoubledQuoteMode mode
= aLine
.indexOf("\t") >= 0 ? DoubledQuoteMode::ESCAPE
: DoubledQuoteMode::KEEP_ALL
;
939 SCCOL nCol
= nStartCol
;
940 const sal_Unicode
* p
= aLine
.getStr();
944 const sal_Unicode
* q
= p
;
947 // Look for a pairing quote.
948 q
= p
= lcl_ScanString( p
, aCell
, pSeps
, cStr
, mode
, bOverflowCell
);
950 // All until next separator.
951 while (*p
&& *p
!= cSep
)
953 if (!lcl_appendLineData( aCell
, q
, p
))
954 bOverflowCell
= true; // display warning on import
957 if (rDoc
.ValidCol(nCol
) && rDoc
.ValidRow(nRow
) )
961 if (nCol
>nEndCol
) nEndCol
= nCol
;
962 if (nRow
>nEndRow
) nEndRow
= nRow
;
964 if( bData
&& nCol
<= nEndCol
&& nRow
<= nEndRow
)
965 rDoc
.SetString( nCol
, nRow
, aRange
.aStart
.Tab(), aCell
, &aSetStringParam
);
967 else // too many columns/rows
969 if (!rDoc
.ValidRow(nRow
))
970 bOverflowRow
= true; // display warning on import
971 if (!rDoc
.ValidCol(nCol
))
972 bOverflowCol
= true; // display warning on import
981 aRange
.aEnd
.SetCol( nEndCol
);
982 aRange
.aEnd
.SetRow( nEndRow
);
991 if (bOk
&& mbImportBroadcast
)
993 rDoc
.BroadcastCells(aRange
, SfxHintId::ScDataChanged
);
994 pDocSh
->PostDataChanged();
1000 // Extended Ascii-Import
1002 static bool lcl_PutString(
1003 ScDocumentImport
& rDocImport
, bool bUseDocImport
,
1004 SCCOL nCol
, SCROW nRow
, SCTAB nTab
, const OUString
& rStr
, sal_uInt8 nColFormat
,
1005 SvNumberFormatter
* pFormatter
, bool bDetectNumFormat
, bool bDetectSciNumFormat
, bool bEvaluateFormulas
, bool bSkipEmptyCells
,
1006 const ::utl::TransliterationWrapper
& rTransliteration
, CalendarWrapper
& rCalendar
,
1007 const ::utl::TransliterationWrapper
* pSecondTransliteration
, CalendarWrapper
* pSecondCalendar
)
1009 ScDocument
& rDoc
= rDocImport
.getDoc();
1010 bool bMultiLine
= false;
1011 if ( nColFormat
== SC_COL_SKIP
|| !rDoc
.ValidCol(nCol
) || !rDoc
.ValidRow(nRow
) )
1013 if ( rStr
.isEmpty() )
1015 if ( !bSkipEmptyCells
)
1016 { // delete destination cell
1017 if ( bUseDocImport
)
1018 rDocImport
.setAutoInput(ScAddress(nCol
, nRow
, nTab
), rStr
);
1020 rDoc
.SetString( nCol
, nRow
, nTab
, rStr
);
1025 const bool bForceFormulaText
= (!bEvaluateFormulas
&& rStr
[0] == '=');
1026 if (nColFormat
== SC_COL_TEXT
|| bForceFormulaText
)
1028 if ( bUseDocImport
)
1031 sal_uInt32 nIndex
= 0;
1032 if (bForceFormulaText
|| rDoc
.GetFormatTable()->IsNumberFormat(rStr
, nIndex
, fDummy
))
1034 // Set the format of this cell to Text.
1035 // This is only necessary for ScDocumentImport,
1036 // ScDocument::SetTextCell() forces it by ScSetStringParam.
1037 sal_uInt32 nFormat
= rDoc
.GetFormatTable()->GetStandardFormat(SvNumFormatType::TEXT
);
1038 ScPatternAttr
aNewAttrs(rDoc
.getCellAttributeHelper());
1039 SfxItemSet
& rSet
= aNewAttrs
.GetItemSet();
1040 rSet
.Put( SfxUInt32Item(ATTR_VALUE_FORMAT
, nFormat
) );
1041 rDoc
.ApplyPattern(nCol
, nRow
, nTab
, aNewAttrs
);
1043 if (ScStringUtil::isMultiline(rStr
))
1045 ScFieldEditEngine
& rEngine
= rDoc
.GetEditEngine();
1046 rEngine
.SetTextCurrentDefaults(rStr
);
1047 rDocImport
.setEditCell(ScAddress(nCol
, nRow
, nTab
), rEngine
.CreateTextObject());
1052 rDocImport
.setStringCell(ScAddress(nCol
, nRow
, nTab
), rStr
);
1058 rDoc
.SetTextCell(ScAddress(nCol
, nRow
, nTab
), rStr
);
1063 if ( nColFormat
== SC_COL_ENGLISH
)
1065 //! SetString with Extra-Flag ???
1067 SvNumberFormatter
* pDocFormatter
= rDoc
.GetFormatTable();
1068 sal_uInt32 nEnglish
= pDocFormatter
->GetStandardIndex(LANGUAGE_ENGLISH_US
);
1070 if ( pDocFormatter
->IsNumberFormat( rStr
, nEnglish
, fVal
) )
1072 // Numberformat will not be set to English
1073 if ( bUseDocImport
)
1074 rDocImport
.setNumericCell( ScAddress( nCol
, nRow
, nTab
), fVal
);
1076 rDoc
.SetValue( nCol
, nRow
, nTab
, fVal
);
1079 // else, continue with SetString
1081 else if ( nColFormat
!= SC_COL_STANDARD
) // Datumformats
1083 const sal_uInt16 nMaxNumberParts
= 7; // Y-M-D h:m:s.t
1084 const sal_Int32 nLen
= rStr
.getLength();
1085 sal_Int32 nStart
[nMaxNumberParts
];
1086 sal_Int32 nEnd
[nMaxNumberParts
];
1089 sal_uInt16 nDP
, nMP
, nYP
;
1090 switch ( nColFormat
)
1092 case SC_COL_YMD
: nDP
= 2; nMP
= 1; nYP
= 0; bIso
= true; break;
1093 case SC_COL_MDY
: nDP
= 1; nMP
= 0; nYP
= 2; bIso
= false; break;
1095 default: nDP
= 0; nMP
= 1; nYP
= 2; bIso
= false; break;
1098 sal_uInt16 nFound
= 0;
1099 bool bInNum
= false;
1100 for (sal_Int32 nPos
= 0; nPos
< nLen
&& (bInNum
|| nFound
< nMaxNumberParts
); ++nPos
)
1102 bool bLetter
= false;
1103 if (rtl::isAsciiDigit(rStr
[nPos
]) ||
1104 (((!bInNum
&& nFound
==nMP
) || (bInNum
&& nFound
==nMP
+1))
1105 && (bLetter
= ScGlobal::getCharClass().isLetterNumeric( rStr
, nPos
))))
1110 nStart
[nFound
] = nPos
;
1113 nEnd
[nFound
-1] = nPos
;
1114 if (bIso
&& (bLetter
|| (2 <= nFound
&& nFound
<= 6 && nPos
> nStart
[nFound
-1] + 1)))
1115 // Each M,D,h,m,s at most 2 digits.
1123 // ([+-])YYYY-MM-DD([T ]hh:mm(:ss(.fff)))(([+-])TZ)
1124 // XXX NOTE: timezone is accepted here, but number
1125 // formatter parser will not, so the end result will be
1126 // type Text to preserve timezone information.
1130 if (nFound
>= 5 && nPos
== nEnd
[nFound
-1] + 1)
1131 // Accept timezone offset.
1134 // Accept one leading sign.
1138 if (nFound
>= 5 && nPos
== nEnd
[nFound
-1] + 1)
1139 // Accept timezone offset.
1141 else if (nFound
== 0 && nPos
> 0)
1142 // Accept one leading sign.
1144 else if (nFound
< 1 || 2 < nFound
|| nPos
!= nEnd
[nFound
-1] + 1)
1145 // Not immediately after 1 or 1-2
1150 if (nFound
!= 3 || nPos
!= nEnd
[nFound
-1] + 1)
1151 // Not immediately after 1-2-3
1155 if (nFound
< 4 || 5 < nFound
|| nPos
!= nEnd
[nFound
-1] + 1)
1156 // Not at 1-2-3T4:5:
1161 if (nFound
!= 6 || nPos
!= nEnd
[nFound
-1] + 1)
1162 // Not at 1-2-3T4:5:6.
1166 if (nFound
>= 5 && nPos
== nEnd
[nFound
-1] + 1)
1167 // Accept Zero timezone.
1184 // Leave conversion and detection of various possible number
1185 // formats to the number formatter. ISO is recognized in any locale
1186 // so we can directly use the document's formatter.
1187 sal_uInt32 nFormat
= 0;
1189 SvNumberFormatter
* pDocFormatter
= rDoc
.GetFormatTable();
1190 if (pDocFormatter
->IsNumberFormat( rStr
, nFormat
, fVal
))
1192 if (pDocFormatter
->GetType(nFormat
) & SvNumFormatType::DATE
)
1194 ScAddress
aPos(nCol
,nRow
,nTab
);
1196 rDocImport
.setNumericCell(aPos
, fVal
);
1198 rDoc
.SetValue(aPos
, fVal
);
1199 rDoc
.SetNumberFormat(aPos
, nFormat
);
1201 return bMultiLine
; // success
1204 // If we reach here it is type Text (e.g. timezone or trailing
1205 // characters). Handled below.
1210 // try to break one number (without separators) into date fields
1212 sal_Int32 nDateStart
= nStart
[0];
1213 sal_Int32 nDateLen
= nEnd
[0] + 1 - nDateStart
;
1215 if ( nDateLen
>= 5 && nDateLen
<= 8 &&
1216 ScGlobal::getCharClass().isNumeric( rStr
.copy( nDateStart
, nDateLen
) ) )
1218 // 6 digits: 2 each for day, month, year
1219 // 8 digits: 4 for year, 2 each for day and month
1220 // 5 or 7 digits: first field is shortened by 1
1222 bool bLongYear
= ( nDateLen
>= 7 );
1223 bool bShortFirst
= ( nDateLen
== 5 || nDateLen
== 7 );
1225 sal_uInt16 nFieldStart
= nDateStart
;
1226 for (sal_uInt16 nPos
=0; nPos
<3; nPos
++)
1228 sal_uInt16 nFieldEnd
= nFieldStart
+ 1; // default: 2 digits
1229 if ( bLongYear
&& nPos
== nYP
)
1230 nFieldEnd
+= 2; // 2 extra digits for long year
1231 if ( bShortFirst
&& nPos
== 0 )
1232 --nFieldEnd
; // first field shortened?
1234 nStart
[nPos
] = nFieldStart
;
1235 nEnd
[nPos
] = nFieldEnd
;
1236 nFieldStart
= nFieldEnd
+ 1;
1242 if (!bIso
&& nFound
>= 3)
1244 using namespace ::com::sun::star
;
1245 bool bSecondCal
= false;
1246 sal_uInt16 nDay
= static_cast<sal_uInt16
>(o3tl::toInt32(rStr
.subView( nStart
[nDP
], nEnd
[nDP
]+1-nStart
[nDP
] )));
1247 sal_uInt16 nYear
= static_cast<sal_uInt16
>(o3tl::toInt32(rStr
.subView( nStart
[nYP
], nEnd
[nYP
]+1-nStart
[nYP
] )));
1248 OUString aMStr
= rStr
.copy( nStart
[nMP
], nEnd
[nMP
]+1-nStart
[nMP
] );
1249 sal_Int16 nMonth
= static_cast<sal_Int16
>(aMStr
.toInt32());
1252 static constexpr OUString aSepShortened
= u
"SEP"_ustr
;
1253 uno::Sequence
< i18n::CalendarItem2
> xMonths
;
1254 sal_Int32 i
, nMonthCount
;
1255 // first test all month names from local international
1256 xMonths
= rCalendar
.getMonths();
1257 nMonthCount
= xMonths
.getLength();
1258 for (i
=0; i
<nMonthCount
&& !nMonth
; i
++)
1260 if ( rTransliteration
.isEqual( aMStr
, xMonths
[i
].FullName
) ||
1261 rTransliteration
.isEqual( aMStr
, xMonths
[i
].AbbrevName
) )
1262 nMonth
= sal::static_int_cast
<sal_Int16
>( i
+1 );
1263 else if ( i
== 8 && rTransliteration
.isEqual( u
"SEPT"_ustr
,
1264 xMonths
[i
].AbbrevName
) &&
1265 rTransliteration
.isEqual( aMStr
, aSepShortened
) )
1266 { // correct English abbreviation is SEPT,
1267 // but data mostly contains SEP only
1268 nMonth
= sal::static_int_cast
<sal_Int16
>( i
+1 );
1271 // if none found, then test english month names
1272 if ( !nMonth
&& pSecondCalendar
&& pSecondTransliteration
)
1274 xMonths
= pSecondCalendar
->getMonths();
1275 nMonthCount
= xMonths
.getLength();
1276 for (i
=0; i
<nMonthCount
&& !nMonth
; i
++)
1278 if ( pSecondTransliteration
->isEqual( aMStr
, xMonths
[i
].FullName
) ||
1279 pSecondTransliteration
->isEqual( aMStr
, xMonths
[i
].AbbrevName
) )
1281 nMonth
= sal::static_int_cast
<sal_Int16
>( i
+1 );
1284 else if ( i
== 8 && pSecondTransliteration
->isEqual(
1285 aMStr
, aSepShortened
) )
1286 { // correct English abbreviation is SEPT,
1287 // but data mostly contains SEP only
1288 nMonth
= sal::static_int_cast
<sal_Int16
>( i
+1 );
1295 SvNumberFormatter
* pDocFormatter
= rDoc
.GetFormatTable();
1297 nYear
= pDocFormatter
->ExpandTwoDigitYear( nYear
);
1299 CalendarWrapper
* pCalendar
= (bSecondCal
? pSecondCalendar
: &rCalendar
);
1300 sal_Int16 nNumMonths
= pCalendar
->getNumberOfMonthsInYear();
1301 if ( nDay
&& nMonth
&& nDay
<=31 && nMonth
<=nNumMonths
)
1304 pCalendar
->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH
, nDay
);
1305 pCalendar
->setValue( i18n::CalendarFieldIndex::MONTH
, nMonth
);
1306 pCalendar
->setValue( i18n::CalendarFieldIndex::YEAR
, nYear
);
1307 sal_Int16 nHour
, nMinute
, nSecond
;
1308 // #i14974# The imported value should have no fractional value, so set the
1309 // time fields to zero (ICU calendar instance defaults to current date/time)
1310 nHour
= nMinute
= nSecond
= 0;
1312 nHour
= static_cast<sal_Int16
>(o3tl::toInt32(rStr
.subView( nStart
[3], nEnd
[3]+1-nStart
[3])));
1314 nMinute
= static_cast<sal_Int16
>(o3tl::toInt32(rStr
.subView( nStart
[4], nEnd
[4]+1-nStart
[4])));
1316 nSecond
= static_cast<sal_Int16
>(o3tl::toInt32(rStr
.subView( nStart
[5], nEnd
[5]+1-nStart
[5])));
1317 // do not use calendar's milliseconds, to avoid fractional part truncation
1321 sal_Unicode cDec
= '.';
1322 OUString aT
= OUStringChar(cDec
) + rStr
.subView( nStart
[6], nEnd
[6]+1-nStart
[6]);
1323 rtl_math_ConversionStatus eStatus
;
1324 double fV
= rtl::math::stringToDouble( aT
, cDec
, 0, &eStatus
);
1325 if (eStatus
== rtl_math_ConversionStatus_Ok
)
1326 fFrac
= fV
/ 86400.0;
1329 if (nFound
> 3 && 1 <= nHour
&& nHour
<= 12 // nHour 0 and >=13 can't be AM/PM
1330 && (nPos
= nEnd
[nFound
-1] + 1) < nLen
)
1332 // Dreaded AM/PM may be following.
1333 while (nPos
< nLen
&& rStr
[nPos
] == ' ')
1337 sal_Int32 nStop
= nPos
;
1338 while (nStop
< nLen
&& rStr
[nStop
] != ' ')
1340 OUString aAmPm
= rStr
.copy( nPos
, nStop
- nPos
);
1341 // For AM only 12 needs to be treated, whereas for PM
1342 // it must not. Check both, locale and second/English
1345 (rTransliteration
.isEqual( aAmPm
, pFormatter
->GetLocaleData()->getTimeAM()) ||
1346 (pSecondTransliteration
&& pSecondTransliteration
->isEqual( aAmPm
, u
"AM"_ustr
))))
1350 else if (nHour
< 12 &&
1351 (rTransliteration
.isEqual( aAmPm
, pFormatter
->GetLocaleData()->getTimePM()) ||
1352 (pSecondTransliteration
&& pSecondTransliteration
->isEqual( aAmPm
, u
"PM"_ustr
))))
1358 pCalendar
->setValue( i18n::CalendarFieldIndex::HOUR
, nHour
);
1359 pCalendar
->setValue( i18n::CalendarFieldIndex::MINUTE
, nMinute
);
1360 pCalendar
->setValue( i18n::CalendarFieldIndex::SECOND
, nSecond
);
1361 pCalendar
->setValue( i18n::CalendarFieldIndex::MILLISECOND
, 0 );
1362 if ( pCalendar
->isValid() )
1365 double fDiff
= DateTime::Sub( DateTime(pDocFormatter
->GetNullDate()),
1366 pCalendar
->getEpochStart());
1367 // #i14974# must use getLocalDateTime to get the same
1368 // date values as set above
1369 double fDays
= pCalendar
->getLocalDateTime() + fFrac
;
1372 LanguageType eLatin
, eCjk
, eCtl
;
1373 rDoc
.GetLanguage( eLatin
, eCjk
, eCtl
);
1374 LanguageType eDocLang
= eLatin
; //! which language for date formats?
1376 SvNumFormatType nType
= (nFound
> 3 ? SvNumFormatType::DATETIME
: SvNumFormatType::DATE
);
1377 sal_uLong nFormat
= pDocFormatter
->GetStandardFormat( nType
, eDocLang
);
1378 // maybe there is a special format including seconds or milliseconds
1380 nFormat
= pDocFormatter
->GetStandardFormat( fDays
, nFormat
, nType
, eDocLang
);
1382 ScAddress
aPos(nCol
,nRow
,nTab
);
1383 if ( bUseDocImport
)
1384 rDocImport
.setNumericCell(aPos
, fDays
);
1386 rDoc
.SetValue( aPos
, fDays
);
1387 rDoc
.SetNumberFormat(aPos
, nFormat
);
1389 return bMultiLine
; // success
1395 // Standard or date not determined -> SetString / EditCell
1396 if( rStr
.indexOf( '\n' ) == -1 )
1398 if (!bDetectNumFormat
&& nColFormat
== SC_COL_STANDARD
)
1400 // Import a strict ISO 8601 date(+time) string even without
1401 // "Detect special numbers" or "Date (YMD)".
1404 // Simple pre-check before calling more expensive parser.
1405 // ([+-])(Y)YYYY-MM-DD
1406 if (rStr
.getLength() < 10)
1408 const sal_Int32 n1
= rStr
.indexOf('-', 1);
1411 const sal_Int32 n2
= rStr
.indexOf('-', n1
+ 1);
1412 if (n2
< 7 || n1
+ 3 < n2
)
1415 css::util::DateTime aDateTime
;
1416 if (!sax::Converter::parseDateTime( aDateTime
, rStr
))
1419 sal_uInt32 nFormat
= 0;
1421 SvNumberFormatter
* pDocFormatter
= rDoc
.GetFormatTable();
1422 if (pDocFormatter
->IsNumberFormat( rStr
, nFormat
, fVal
))
1424 if (pDocFormatter
->GetType(nFormat
) & SvNumFormatType::DATE
)
1426 ScAddress
aPos(nCol
,nRow
,nTab
);
1428 rDocImport
.setNumericCell(aPos
, fVal
);
1430 rDoc
.SetValue(aPos
, fVal
);
1431 rDoc
.SetNumberFormat(aPos
, nFormat
);
1433 return bMultiLine
; // success
1440 ScSetStringParam aParam
;
1441 aParam
.mpNumFormatter
= pFormatter
;
1442 aParam
.mbDetectNumberFormat
= bDetectNumFormat
;
1443 aParam
.mbDetectScientificNumberFormat
= bDetectSciNumFormat
;
1444 aParam
.meSetTextNumFormat
= ScSetStringParam::SpecialNumberOnly
;
1445 aParam
.mbHandleApostrophe
= false;
1446 aParam
.mbCheckLinkFormula
= true;
1447 if ( bUseDocImport
)
1448 rDocImport
.setAutoInput(ScAddress(nCol
, nRow
, nTab
), rStr
, &aParam
);
1450 rDoc
.SetString( nCol
, nRow
, nTab
, rStr
, &aParam
);
1455 ScFieldEditEngine
& rEngine
= rDoc
.GetEditEngine();
1456 rEngine
.SetTextCurrentDefaults(rStr
);
1457 if ( bUseDocImport
)
1458 rDocImport
.setEditCell(ScAddress(nCol
, nRow
, nTab
), rEngine
.CreateTextObject());
1460 rDoc
.SetEditText( ScAddress( nCol
, nRow
, nTab
), rEngine
.CreateTextObject() );
1465 static OUString
lcl_GetFixed( const OUString
& rLine
, sal_Int32 nStart
, sal_Int32 nNext
,
1466 bool& rbIsQuoted
, bool& rbOverflowCell
)
1468 sal_Int32 nLen
= rLine
.getLength();
1471 if ( nNext
<= nStart
)
1474 const sal_Unicode
* pStr
= rLine
.getStr();
1476 sal_Int32 nSpace
= nNext
;
1477 while ( nSpace
> nStart
&& pStr
[nSpace
-1] == ' ' )
1480 rbIsQuoted
= (pStr
[nStart
] == '"' && pStr
[nSpace
-1] == '"');
1483 bool bFits
= (nSpace
- nStart
- 3 <= nArbitraryCellLengthLimit
);
1485 return rLine
.copy(nStart
+1, std::max
< sal_Int32
>(0, nSpace
-nStart
-2));
1488 SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
1489 rbOverflowCell
= true;
1490 return rLine
.copy(nStart
+1, nArbitraryCellLengthLimit
);
1495 bool bFits
= (nSpace
- nStart
<= nArbitraryCellLengthLimit
);
1497 return rLine
.copy(nStart
, nSpace
-nStart
);
1500 SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
1501 rbOverflowCell
= true;
1502 return rLine
.copy(nStart
, nArbitraryCellLengthLimit
);
1507 bool ScImportExport::ExtText2Doc( SvStream
& rStrm
)
1510 return Text2Doc( rStrm
);
1512 sal_uInt64
const nOldPos
= rStrm
.Tell();
1513 sal_uInt64
const nRemaining
= rStrm
.remainingSize();
1514 std::unique_ptr
<ScProgress
> xProgress( new ScProgress( pDocSh
,
1515 ScResId( STR_LOAD_DOC
), nRemaining
, true ));
1516 rStrm
.StartReadingUnicodeText( rStrm
.GetStreamCharSet() );
1517 // tdf#82254 - check whether to include a byte-order-mark in the output
1518 if (nOldPos
!= rStrm
.Tell())
1519 mbIncludeBOM
= true;
1521 SCCOL nStartCol
= aRange
.aStart
.Col();
1522 SCCOL nEndCol
= aRange
.aEnd
.Col();
1523 SCROW nStartRow
= aRange
.aStart
.Row();
1524 const SCTAB nTab
= aRange
.aStart
.Tab();
1526 bool bFixed
= pExtOptions
->IsFixedLen();
1527 OUString aSeps
= pExtOptions
->GetFieldSeps(); // Need non-const for ReadCsvLine(),
1528 const sal_Unicode
* pSeps
= aSeps
.getStr(); // but it will be const anyway (asserted below).
1529 bool bMerge
= pExtOptions
->IsMergeSeps();
1530 bool bRemoveSpace
= pExtOptions
->IsRemoveSpace();
1531 sal_uInt16 nInfoCount
= pExtOptions
->GetInfoCount();
1532 const sal_Int32
* pColStart
= pExtOptions
->GetColStart();
1533 const sal_uInt8
* pColFormat
= pExtOptions
->GetColFormat();
1534 tools::Long nSkipLines
= pExtOptions
->GetStartRow();
1536 LanguageType eDocLang
= pExtOptions
->GetLanguage();
1537 SvNumberFormatter
aNumFormatter( comphelper::getProcessComponentContext(), eDocLang
);
1538 bool bDetectNumFormat
= pExtOptions
->IsDetectSpecialNumber();
1539 bool bDetectSciNumFormat
= pExtOptions
->IsDetectScientificNumber();
1540 bool bEvaluateFormulas
= pExtOptions
->IsEvaluateFormulas();
1541 bool bSkipEmptyCells
= pExtOptions
->IsSkipEmptyCells();
1543 // For date recognition
1544 ::utl::TransliterationWrapper
aTransliteration(
1545 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE
);
1546 aTransliteration
.loadModuleIfNeeded( eDocLang
);
1547 CalendarWrapper
aCalendar( comphelper::getProcessComponentContext() );
1548 aCalendar
.loadDefaultCalendar(
1549 LanguageTag::convertToLocale( eDocLang
) );
1550 std::unique_ptr
< ::utl::TransliterationWrapper
> pEnglishTransliteration
;
1551 std::unique_ptr
< CalendarWrapper
> pEnglishCalendar
;
1552 if ( eDocLang
!= LANGUAGE_ENGLISH_US
)
1554 pEnglishTransliteration
.reset(new ::utl::TransliterationWrapper (
1555 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE
));
1556 aTransliteration
.loadModuleIfNeeded( LANGUAGE_ENGLISH_US
);
1557 pEnglishCalendar
.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() ));
1558 pEnglishCalendar
->loadDefaultCalendar(
1559 LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US
) );
1565 SCROW nRow
= nStartRow
;
1566 sal_Unicode cDetectSep
= 0xffff; // No separator detection here.
1568 while(--nSkipLines
>0)
1570 aLine
= ReadCsvLine(rStrm
, !bFixed
, aSeps
, cStr
, cDetectSep
); // content is ignored
1575 // Determine range for Undo.
1576 // We don't need this during import of a file to a new sheet or document...
1577 bool bDetermineRange
= bUndo
;
1578 bool bColumnsAreDetermined
= false;
1580 // Row heights don't need to be adjusted on the fly if EndPaste() is called
1581 // afterwards, which happens only if bDetermineRange. This variable also
1582 // survives the toggle of bDetermineRange down at the end of the do{} loop.
1583 bool bRangeIsDetermined
= bDetermineRange
;
1585 bool bQuotedAsText
= pExtOptions
&& pExtOptions
->IsQuotedAsText();
1587 sal_uInt64 nOriginalStreamPos
= rStrm
.Tell();
1589 SCROW nFirstUpdateRowHeight
= SCROW_MAX
;
1590 SCROW nLastUpdateRowHeight
= -1;
1592 ScDocumentImport
aDocImport(rDoc
);
1595 const SCCOL nLastCol
= nEndCol
; // tdf#129701 preserve value of nEndCol
1598 aLine
= ReadCsvLine(rStrm
, !bFixed
, aSeps
, cStr
, cDetectSep
);
1599 if ( rStrm
.eof() && aLine
.isEmpty() )
1602 assert(pSeps
== aSeps
.getStr());
1604 if ( nRow
> rDoc
.MaxRow() )
1606 bOverflowRow
= true; // display warning on import
1610 if (!bDetermineRange
)
1611 EmbeddedNullTreatment( aLine
);
1613 sal_Int32 nLineLen
= aLine
.getLength();
1614 SCCOL nCol
= nStartCol
;
1615 bool bMultiLine
= false;
1616 if ( bFixed
) // Fixed line length
1618 if (bDetermineRange
)
1620 if (!bColumnsAreDetermined
)
1622 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it
1623 // is only an overflow if there is really data following to
1624 // be put behind the last column, which doesn't happen if
1625 // info is SC_COL_SKIP.
1626 for (i
=0; i
< nInfoCount
&& nCol
<= rDoc
.MaxCol()+1; ++i
)
1628 const sal_uInt8 nFmt
= pColFormat
[i
];
1629 if (nFmt
!= SC_COL_SKIP
) // otherwise don't increment nCol either
1631 if (nCol
> rDoc
.MaxCol())
1632 bOverflowCol
= true; // display warning on import
1636 bColumnsAreDetermined
= true;
1641 sal_Int32 nStartIdx
= 0;
1642 // Same maxcol+1 check reason as above.
1643 for (i
=0; i
< nInfoCount
&& nCol
<= rDoc
.MaxCol()+1; ++i
)
1645 sal_Int32 nNextIdx
= nStartIdx
;
1646 if (i
+ 1 < nInfoCount
)
1647 CountVisualWidth( aLine
, nNextIdx
, pColStart
[i
+1] - pColStart
[i
] );
1649 nNextIdx
= nLineLen
;
1650 sal_uInt8 nFmt
= pColFormat
[i
];
1651 if (nFmt
!= SC_COL_SKIP
) // otherwise don't increment nCol either
1653 if (nCol
> rDoc
.MaxCol())
1654 bOverflowCol
= true; // display warning on import
1657 bool bIsQuoted
= false;
1658 aCell
= lcl_GetFixed( aLine
, nStartIdx
, nNextIdx
, bIsQuoted
, bOverflowCell
);
1659 if (bIsQuoted
&& bQuotedAsText
)
1662 bMultiLine
|= lcl_PutString(
1663 aDocImport
, !mbOverwriting
, nCol
, nRow
, nTab
, aCell
, nFmt
,
1664 &aNumFormatter
, bDetectNumFormat
, bDetectSciNumFormat
, bEvaluateFormulas
, bSkipEmptyCells
,
1665 aTransliteration
, aCalendar
,
1666 pEnglishTransliteration
.get(), pEnglishCalendar
.get());
1670 nStartIdx
= nNextIdx
;
1674 else // Search for the separator
1676 SCCOL nSourceCol
= 0;
1677 sal_uInt16 nInfoStart
= 0;
1678 const sal_Unicode
* p
= aLine
.getStr();
1679 // tdf#129701 if there is only one column, and user wants to treat empty cells,
1680 // we need to detect *p = null
1681 bool bIsLastColEmpty
= !(*p
) && !bSkipEmptyCells
&& !bDetermineRange
;
1682 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1683 // overflow if there is really data following to be put behind
1684 // the last column, which doesn't happen if info is
1686 while ( (*p
|| bIsLastColEmpty
) && nCol
<= rDoc
.MaxCol()+1)
1688 bool bIsQuoted
= false;
1689 p
= ScImportExport::ScanNextFieldFromString( p
, aCell
,
1690 cStr
, pSeps
, bMerge
, bIsQuoted
, bOverflowCell
, bRemoveSpace
);
1692 sal_uInt8 nFmt
= SC_COL_STANDARD
;
1693 for ( i
=nInfoStart
; i
<nInfoCount
; i
++ )
1695 if ( pColStart
[i
] == nSourceCol
+ 1 ) // pColStart is 1-based
1697 nFmt
= pColFormat
[i
];
1698 nInfoStart
= i
+ 1; // ColInfos are in succession
1702 if ( nFmt
!= SC_COL_SKIP
)
1704 if (nCol
> rDoc
.MaxCol())
1705 bOverflowCol
= true; // display warning on import
1706 else if (!bDetermineRange
)
1708 if (bIsQuoted
&& bQuotedAsText
)
1711 bMultiLine
|= lcl_PutString(
1712 aDocImport
, !mbOverwriting
, nCol
, nRow
, nTab
, aCell
, nFmt
,
1713 &aNumFormatter
, bDetectNumFormat
, bDetectSciNumFormat
, bEvaluateFormulas
, bSkipEmptyCells
,
1714 aTransliteration
, aCalendar
,
1715 pEnglishTransliteration
.get(), pEnglishCalendar
.get());
1718 if (bIsLastColEmpty
)
1720 bIsLastColEmpty
= false; // toggle to stop
1724 // tdf#129701 detect if there is a last empty column when we need it
1725 bIsLastColEmpty
= (nCol
== nLastCol
) && !(*p
) && !bSkipEmptyCells
&& !bDetermineRange
;
1733 nEndCol
= nCol
; //! points to the next free or even rDoc.MaxCol()+2
1735 if (!bDetermineRange
)
1737 if (bMultiLine
&& !bRangeIsDetermined
&& pDocSh
)
1738 { // Adjust just once at the end for a whole range.
1739 nFirstUpdateRowHeight
= std::min( nFirstUpdateRowHeight
, nRow
);
1740 nLastUpdateRowHeight
= std::max( nLastUpdateRowHeight
, nRow
);
1742 xProgress
->SetStateOnPercent( rStrm
.Tell() - nOldPos
);
1746 // so far nRow/nEndCol pointed to the next free
1747 if (nRow
> nStartRow
)
1749 if (nEndCol
> nStartCol
)
1750 nEndCol
= ::std::min( static_cast<SCCOL
>(nEndCol
- 1), rDoc
.MaxCol());
1752 if (bDetermineRange
)
1754 aRange
.aEnd
.SetCol( nEndCol
);
1755 aRange
.aEnd
.SetRow( nRow
);
1757 if ( !mbApi
&& nStartCol
!= nEndCol
&&
1758 !rDoc
.IsBlockEmpty( nStartCol
+ 1, nStartRow
, nEndCol
, nRow
, nTab
) )
1760 ScReplaceWarnBox
aBox(ScDocShell::GetActiveDialogParent());
1761 if (aBox
.run() != RET_YES
)
1767 rStrm
.Seek( nOriginalStreamPos
);
1776 bDetermineRange
= !bDetermineRange
; // toggle
1777 } while (!bDetermineRange
);
1779 if ( !mbOverwriting
)
1780 aDocImport
.finalize();
1782 xProgress
.reset(); // make room for AdjustRowHeight progress
1784 if( nFirstUpdateRowHeight
< nLastUpdateRowHeight
&& pDocSh
)
1785 pDocSh
->AdjustRowHeight( nFirstUpdateRowHeight
, nLastUpdateRowHeight
, nTab
);
1787 if (bRangeIsDetermined
)
1790 if (mbImportBroadcast
&& !mbOverwriting
)
1792 rDoc
.BroadcastCells(aRange
, SfxHintId::ScDataChanged
);
1793 pDocSh
->PostDataChanged();
1798 void ScImportExport::EmbeddedNullTreatment( OUString
& rStr
)
1800 // A nasty workaround for data with embedded NULL characters. As long as we
1801 // can't handle them properly as cell content (things assume 0-terminated
1802 // strings at too many places) simply strip all NULL characters from raw
1803 // data. Excel does the same. See fdo#57841 for sample data.
1805 // The normal case is no embedded NULL, check first before de-/allocating
1807 sal_Unicode cNull
= 0;
1808 if (sal_Int32 pos
= rStr
.indexOf(cNull
); pos
>= 0)
1810 rStr
= rStr
.replaceAll(std::u16string_view(&cNull
, 1), u
"", pos
);
1814 const sal_Unicode
* ScImportExport::ScanNextFieldFromString( const sal_Unicode
* p
,
1815 OUString
& rField
, sal_Unicode cStr
, const sal_Unicode
* pSeps
, bool bMergeSeps
, bool& rbIsQuoted
,
1816 bool& rbOverflowCell
, bool bRemoveSpace
)
1820 const sal_Unicode cBlank
= ' ';
1821 if (cStr
&& !ScGlobal::UnicodeStrChr(pSeps
, cBlank
))
1823 // Cope with broken generators that put leading blanks before a quoted
1824 // field, like "field1", "field2", "..."
1825 // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180
1826 const sal_Unicode
* pb
= p
;
1827 while (*pb
== cBlank
)
1832 if (cStr
&& *p
== cStr
) // String in quotes
1835 const sal_Unicode
* p1
;
1836 p1
= p
= lcl_ScanString( p
, rField
, pSeps
, cStr
, DoubledQuoteMode::ESCAPE
, rbOverflowCell
);
1837 while (!lcl_isFieldEnd( *p
, pSeps
))
1839 // Append remaining unquoted and undelimited data (dirty, dirty) to
1843 const sal_Unicode
* ptrim_f
= p
;
1846 while ( ptrim_f
> p1
&& ( *(ptrim_f
- 1) == cBlank
) )
1849 if (!lcl_appendLineData( rField
, p1
, ptrim_f
))
1850 rbOverflowCell
= true;
1855 else // up to delimiter
1857 const sal_Unicode
* p0
= p
;
1858 while (!lcl_isFieldEnd( *p
, pSeps
))
1860 const sal_Unicode
* ptrim_i
= p0
;
1861 const sal_Unicode
* ptrim_f
= p
; // [ptrim_i,ptrim_f) is cell data after trimming
1864 while ( ptrim_i
< ptrim_f
&& *ptrim_i
== cBlank
)
1866 while ( ptrim_f
> ptrim_i
&& ( *(ptrim_f
- 1) == cBlank
) )
1869 if (!lcl_appendLineData( rField
, ptrim_i
, ptrim_f
))
1870 rbOverflowCell
= true;
1874 if ( bMergeSeps
) // skip following delimiters
1876 while (*p
&& ScGlobal::UnicodeStrChr( pSeps
, *p
))
1885 * Check if a given string has any line break characters or separators.
1887 * @param rStr string to inspect.
1888 * @param cSep separator character.
1890 bool hasLineBreaksOrSeps( const OUString
& rStr
, sal_Unicode cSep
)
1892 const sal_Unicode
* p
= rStr
.getStr();
1893 for (sal_Int32 i
= 0, n
= rStr
.getLength(); i
< n
; ++i
, ++p
)
1904 // line break found.
1915 bool ScImportExport::Doc2Text( SvStream
& rStrm
)
1919 SCCOL nStartCol
= aRange
.aStart
.Col();
1920 SCROW nStartRow
= aRange
.aStart
.Row();
1921 SCTAB nStartTab
= aRange
.aStart
.Tab();
1922 SCCOL nEndCol
= aRange
.aEnd
.Col();
1923 SCROW nEndRow
= aRange
.aEnd
.Row();
1924 SCTAB nEndTab
= aRange
.aEnd
.Tab();
1926 if (!rDoc
.GetClipParam().isMultiRange() && nStartTab
== nEndTab
)
1927 if (!rDoc
.ShrinkToDataArea( nStartTab
, nStartCol
, nStartRow
, nEndCol
, nEndRow
))
1932 bool bConvertLF
= (GetSystemLineEnd() != LINEEND_LF
);
1934 // We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab.
1935 std::vector
< sc::ColumnBlockPosition
> blockPos( nEndCol
- nStartCol
+ 1 );
1936 for( SCCOL i
= nStartCol
; i
<= nEndCol
; ++i
)
1937 rDoc
.InitColumnBlockPosition( blockPos
[ i
- nStartCol
], nStartTab
, i
);
1938 for (nRow
= nStartRow
; nRow
<= nEndRow
; nRow
++)
1940 if (bIncludeFiltered
|| !rDoc
.RowFiltered( nRow
, nStartTab
))
1942 for (nCol
= nStartCol
; nCol
<= nEndCol
; nCol
++)
1944 ScAddress
aPos(nCol
, nRow
, nStartTab
);
1945 sal_uInt32 nNumFmt
= rDoc
.GetNumberFormat(ScRange(aPos
));
1946 ScInterpreterContext
& rContext
= rDoc
.GetNonThreadedContext();
1948 ScRefCellValue
aCell(rDoc
, aPos
, blockPos
[ nCol
- nStartCol
]);
1949 switch (aCell
.getType())
1951 case CELLTYPE_FORMULA
:
1955 aCellStr
= aCell
.getFormula()->GetFormula();
1956 if( aCellStr
.indexOf( cSep
) != -1 )
1957 lcl_WriteString( rStrm
, aCellStr
, cStr
, cStr
);
1959 rStrm
.WriteUnicodeOrByteText(aCellStr
);
1963 const Color
* pColor
;
1964 aCellStr
= ScCellFormat::GetString(aCell
, nNumFmt
, &pColor
, &rContext
, rDoc
);
1966 bool bMultiLineText
= ( aCellStr
.indexOf( '\n' ) != -1 );
1967 if( bMultiLineText
)
1969 if( mExportTextOptions
.meNewlineConversion
== ScExportTextOptions::ToSpace
)
1970 aCellStr
= aCellStr
.replaceAll( "\n", " " );
1971 else if ( mExportTextOptions
.meNewlineConversion
== ScExportTextOptions::ToSystem
&& bConvertLF
)
1972 aCellStr
= convertLineEnd(aCellStr
, GetSystemLineEnd());
1975 if( mExportTextOptions
.mcSeparatorConvertTo
&& cSep
)
1976 aCellStr
= aCellStr
.replaceAll( OUStringChar(cSep
), OUStringChar(mExportTextOptions
.mcSeparatorConvertTo
) );
1978 if( mExportTextOptions
.mbAddQuotes
&& ( aCellStr
.indexOf( cSep
) != -1 ) )
1979 lcl_WriteString( rStrm
, aCellStr
, cStr
, cStr
);
1981 rStrm
.WriteUnicodeOrByteText(aCellStr
);
1985 case CELLTYPE_VALUE
:
1987 const Color
* pColor
;
1988 aCellStr
= ScCellFormat::GetString(aCell
, nNumFmt
, &pColor
, &rContext
, rDoc
);
1989 rStrm
.WriteUnicodeOrByteText(aCellStr
);
1996 const Color
* pColor
;
1997 aCellStr
= ScCellFormat::GetString(aCell
, nNumFmt
, &pColor
, &rContext
, rDoc
);
1999 bool bMultiLineText
= ( aCellStr
.indexOf( '\n' ) != -1 );
2000 if( bMultiLineText
)
2002 if( mExportTextOptions
.meNewlineConversion
== ScExportTextOptions::ToSpace
)
2003 aCellStr
= aCellStr
.replaceAll( "\n", " " );
2004 else if ( mExportTextOptions
.meNewlineConversion
== ScExportTextOptions::ToSystem
&& bConvertLF
)
2005 aCellStr
= convertLineEnd(aCellStr
, GetSystemLineEnd());
2008 if( mExportTextOptions
.mcSeparatorConvertTo
&& cSep
)
2009 aCellStr
= aCellStr
.replaceAll( OUStringChar(cSep
), OUStringChar(mExportTextOptions
.mcSeparatorConvertTo
) );
2011 if( mExportTextOptions
.mbAddQuotes
&& hasLineBreaksOrSeps(aCellStr
, cSep
) )
2012 lcl_WriteString( rStrm
, aCellStr
, cStr
, cStr
);
2014 rStrm
.WriteUnicodeOrByteText(aCellStr
);
2017 if( nCol
< nEndCol
)
2018 rStrm
.WriteUnicodeOrByteText(rtl::OUStringChar(cSep
));
2020 // Do not append a line feed for one single cell.
2021 // NOTE: this Doc2Text() is only called for clipboard via
2022 // ScImportExport::ExportStream().
2023 if (nStartRow
!= nEndRow
|| nStartCol
!= nEndCol
)
2025 if( rStrm
.GetError() != ERRCODE_NONE
)
2027 if( nSizeLimit
&& rStrm
.Tell() > nSizeLimit
)
2032 return rStrm
.GetError() == ERRCODE_NONE
;
2035 bool ScImportExport::Sylk2Doc( SvStream
& rStrm
)
2038 bool bMyDoc
= false;
2039 SylkVersion eVersion
= SylkVersion::OTHER
;
2041 // US-English separators for StringToDouble
2042 sal_Unicode
const cDecSep
= '.';
2043 sal_Unicode
const cGrpSep
= ',';
2045 SCCOL nStartCol
= aRange
.aStart
.Col();
2046 SCROW nStartRow
= aRange
.aStart
.Row();
2047 SCCOL nEndCol
= aRange
.aEnd
.Col();
2048 SCROW nEndRow
= aRange
.aEnd
.Row();
2049 sal_uInt64 nOldPos
= rStrm
.Tell();
2050 bool bData
= !bSingle
;
2051 ::std::vector
< sal_uInt32
> aFormats
;
2060 OStringBuffer aByteLine
;
2061 SCCOL nCol
= nStartCol
;
2062 SCROW nRow
= nStartRow
;
2063 SCCOL nRefCol
= nCol
;
2064 SCROW nRefRow
= nRow
;
2065 rStrm
.Seek( nOldPos
);
2069 (void)rStrm
.ReadLine( aByteLine
);
2070 aLine
= OStringToOUString(aByteLine
, rStrm
.GetStreamCharSet());
2073 bool bInvalidCol
= false;
2074 bool bInvalidRow
= false;
2075 const sal_Unicode
* p
= aLine
.getStr();
2076 sal_Unicode cTag
= *p
++;
2077 if( cTag
== 'C' ) // Content
2082 bool bInvalidRefCol
= false;
2083 bool bInvalidRefRow
= false;
2086 sal_Unicode ch
= *p
++;
2087 ch
= ScGlobal::ToUpperAlpha( ch
);
2092 bInvalidCol
= false;
2093 bool bFail
= o3tl::checked_add
<SCCOL
>(o3tl::toInt32(std::u16string_view(p
)), nStartCol
- 1, nCol
);
2094 if (bFail
|| nCol
< 0 || rDoc
.MaxCol() < nCol
)
2096 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol
);
2097 nCol
= std::clamp
<SCCOL
>(nCol
, 0, rDoc
.MaxCol());
2098 bInvalidCol
= bOverflowCol
= true;
2104 bInvalidRow
= false;
2105 bool bFail
= o3tl::checked_add(o3tl::toInt32(std::u16string_view(p
)), nStartRow
- 1, nRow
);
2106 if (bFail
|| nRow
< 0 || nMaxImportRow
< nRow
)
2108 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow
);
2109 nRow
= std::clamp
<SCROW
>(nRow
, 0, nMaxImportRow
);
2110 bInvalidRow
= bOverflowRow
= true;
2116 bInvalidRefCol
= false;
2117 bool bFail
= o3tl::checked_add
<SCCOL
>(o3tl::toInt32(std::u16string_view(p
)), nStartCol
- 1, nRefCol
);
2118 if (bFail
|| nRefCol
< 0 || rDoc
.MaxCol() < nRefCol
)
2120 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol
);
2121 nRefCol
= std::clamp
<SCCOL
>(nRefCol
, 0, rDoc
.MaxCol());
2122 bInvalidRefCol
= bOverflowCol
= true;
2128 bInvalidRefRow
= false;
2129 bool bFail
= o3tl::checked_add(o3tl::toInt32(std::u16string_view(p
)), nStartRow
- 1, nRefRow
);
2130 if (bFail
|| nRefRow
< 0 || nMaxImportRow
< nRefRow
)
2132 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow
);
2133 nRefRow
= std::clamp
<SCROW
>(nRefRow
, 0, nMaxImportRow
);
2134 bInvalidRefRow
= bOverflowRow
= true;
2141 ( nCol
< nStartCol
|| nCol
> nEndCol
2142 || nRow
< nStartRow
|| nRow
> nEndRow
2143 || nCol
> rDoc
.MaxCol() || nRow
> nMaxImportRow
2144 || bInvalidCol
|| bInvalidRow
) )
2148 if( nRow
> nEndRow
)
2150 if( nCol
> nEndCol
)
2159 p
= lcl_ScanSylkString( p
, aText
, eVersion
);
2163 const sal_Unicode
* q
= p
;
2164 while( *q
&& *q
!= ';' )
2166 if ( (*q
!= ';' || *(q
+1) != 'I') && !bInvalidCol
&& !bInvalidRow
)
2167 { // don't ignore value
2170 rDoc
.EnsureTable(aRange
.aStart
.Tab());
2172 ScAddress(nCol
, nRow
, aRange
.aStart
.Tab()), aText
);
2176 double fVal
= rtl_math_uStringToDouble( p
,
2177 aLine
.getStr() + aLine
.getLength(),
2178 cDecSep
, cGrpSep
, nullptr, nullptr );
2179 rDoc
.SetValue( nCol
, nRow
, aRange
.aStart
.Tab(), fVal
);
2189 if ( nRefCol
< nCol
)
2191 if ( nRefRow
< nRow
)
2195 if( nRefRow
> nEndRow
)
2197 if( nRefCol
> nEndCol
)
2201 if( !bMyDoc
|| !bData
)
2204 p
= lcl_ScanSylkFormula( p
, aText
, eVersion
);
2206 if (bInvalidCol
|| bInvalidRow
|| (ch
== 'M' && (bInvalidRefCol
|| bInvalidRefRow
)))
2209 ScAddress
aPos( nCol
, nRow
, aRange
.aStart
.Tab() );
2210 /* FIXME: do we want GRAM_ODFF_A1 instead? At the
2211 * end it probably should be GRAM_ODFF_R1C1, since
2212 * R1C1 is what Excel writes in SYLK, or even
2213 * better GRAM_ENGLISH_XL_R1C1. */
2214 const formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_PODF_A1
;
2215 ScCompiler
aComp(rDoc
, aPos
, eGrammar
);
2216 std::unique_ptr
<ScTokenArray
> xCode(aComp
.CompileString(aText
)); // ctor/InsertMatrixFormula did copy TokenArray
2217 rDoc
.CheckLinkFormulaNeedingCheck(*xCode
);
2220 ScMarkData
aMark(rDoc
.GetSheetLimits());
2221 aMark
.SelectTable( aPos
.Tab(), true );
2222 rDoc
.InsertMatrixFormula( nCol
, nRow
, nRefCol
,
2223 nRefRow
, aMark
, OUString(), xCode
.get() );
2227 ScFormulaCell
* pFCell
= new ScFormulaCell(
2228 rDoc
, aPos
, *xCode
, eGrammar
, ScMatrixMode::NONE
);
2229 rDoc
.SetFormulaCell(aPos
, pFCell
);
2234 while( *p
&& *p
!= ';' )
2240 else if( cTag
== 'F' ) // Format
2244 sal_Int32 nFormat
= -1;
2247 sal_Unicode ch
= *p
++;
2248 ch
= ScGlobal::ToUpperAlpha( ch
);
2253 bInvalidCol
= false;
2254 bool bFail
= o3tl::checked_add
<SCCOL
>(o3tl::toInt32(std::u16string_view(p
)), nStartCol
- 1, nCol
);
2255 if (bFail
|| nCol
< 0 || rDoc
.MaxCol() < nCol
)
2257 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol
);
2258 nCol
= std::clamp
<SCCOL
>(nCol
, 0, rDoc
.MaxCol());
2259 bInvalidCol
= bOverflowCol
= true;
2265 bInvalidRow
= false;
2266 bool bFail
= o3tl::checked_add(o3tl::toInt32(std::u16string_view(p
)), nStartRow
- 1, nRow
);
2267 if (bFail
|| nRow
< 0 || nMaxImportRow
< nRow
)
2269 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow
);
2270 nRow
= std::clamp
<SCROW
>(nRow
, 0, nMaxImportRow
);
2271 bInvalidRow
= bOverflowRow
= true;
2278 // F;P<n> sets format code of P;P<code> at
2279 // current position, or at ;X;Y if specified.
2280 // Note that ;X;Y may appear after ;P
2281 const sal_Unicode
* p0
= p
;
2282 while( *p
&& *p
!= ';' )
2284 OUString
aNumber(p0
, p
- p0
);
2285 nFormat
= aNumber
.toInt32();
2289 while( *p
&& *p
!= ';' )
2296 if( nRow
> nEndRow
)
2298 if( nCol
> nEndCol
)
2301 if ( 0 <= nFormat
&& o3tl::make_unsigned(nFormat
) < aFormats
.size() && !bInvalidCol
&& !bInvalidRow
)
2303 sal_uInt32 nKey
= aFormats
[nFormat
];
2304 rDoc
.ApplyAttr( nCol
, nRow
, aRange
.aStart
.Tab(),
2305 SfxUInt32Item( ATTR_VALUE_FORMAT
, nKey
) );
2308 else if( cTag
== 'P' )
2310 if ( bData
&& *p
== ';' && *(p
+1) == 'P' )
2312 OUString
aCode( p
+2 );
2315 sal_Int32 nCheckPos
;
2317 if (aCode
.getLength() > 2048 && comphelper::IsFuzzing())
2319 // consider an excessive length as a failure when fuzzing
2324 // unescape doubled semicolons
2325 aCode
= aCode
.replaceAll(";;", ";");
2326 // get rid of Xcl escape characters
2327 aCode
= aCode
.replaceAll("\x1b", "");
2328 SvNumFormatType nType
;
2329 rDoc
.GetFormatTable()->PutandConvertEntry( aCode
, nCheckPos
, nType
, nKey
,
2330 LANGUAGE_ENGLISH_US
, ScGlobal::eLnge
, false);
2336 aFormats
.push_back( nKey
);
2339 else if (cTag
== 'I' && *p
== 'D' && aLine
.getLength() > 4)
2341 aLine
= aLine
.copy(4);
2342 if (aLine
== "CALCOOO32")
2343 eVersion
= SylkVersion::OOO32
;
2344 else if (aLine
== "SCALC3")
2345 eVersion
= SylkVersion::SCALC3
;
2346 bMyDoc
= (eVersion
<= SylkVersion::OWN
);
2348 else if( cTag
== 'E' ) // End
2353 aRange
.aEnd
.SetCol( nEndCol
);
2354 aRange
.aEnd
.SetRow( nEndRow
);
2366 bool ScImportExport::Doc2Sylk( SvStream
& rStrm
)
2370 SCCOL nStartCol
= aRange
.aStart
.Col();
2371 SCROW nStartRow
= aRange
.aStart
.Row();
2372 SCCOL nEndCol
= aRange
.aEnd
.Col();
2373 SCROW nEndRow
= aRange
.aEnd
.Row();
2376 rStrm
.WriteUnicodeOrByteText(u
"ID;PCALCOOO32");
2379 for (nRow
= nStartRow
; nRow
<= nEndRow
; nRow
++)
2381 for (nCol
= nStartCol
; nCol
<= nEndCol
; nCol
++)
2386 SCROW r
= nRow
- nStartRow
+ 1;
2387 SCCOL c
= nCol
- nStartCol
+ 1;
2388 ScRefCellValue
aCell(rDoc
, ScAddress(nCol
, nRow
, aRange
.aStart
.Tab()));
2389 CellType eType
= aCell
.getType();
2392 case CELLTYPE_FORMULA
:
2394 if( rDoc
.HasValueData( nCol
, nRow
, aRange
.aStart
.Tab()) )
2399 case CELLTYPE_VALUE
:
2401 nVal
= rDoc
.GetValue( nCol
, nRow
, aRange
.aStart
.Tab() );
2403 aValStr
= ::rtl::math::doubleToUString( nVal
,
2404 rtl_math_StringFormat_Automatic
,
2405 rtl_math_DecimalPlaces_Max
, '.', true );
2408 + OUString::number( c
)
2410 + OUString::number( r
)
2413 rStrm
.WriteUnicodeOrByteText(aBufStr
);
2416 case CELLTYPE_STRING
:
2419 aCellStr
= rDoc
.GetString(nCol
, nRow
, aRange
.aStart
.Tab());
2420 aCellStr
= aCellStr
.replaceAll("\n", SYLK_LF
);
2423 + OUString::number( c
)
2425 + OUString::number( r
)
2427 rStrm
.WriteUnicodeOrByteText(aBufStr
);
2428 lcl_WriteString( rStrm
, aCellStr
, '"', ';' );
2433 const ScFormulaCell
* pFCell
= aCell
.getFormula();
2434 switch ( pFCell
->GetMatrixFlag() )
2436 case ScMatrixMode::Reference
:
2440 aCellStr
= pFCell
->GetFormula( formula::FormulaGrammar::GRAM_PODF_A1
);
2441 /* FIXME: do we want GRAM_ODFF_A1 instead? At
2442 * the end it probably should be
2443 * GRAM_ODFF_R1C1, since R1C1 is what Excel
2444 * writes in SYLK, or even better
2445 * GRAM_ENGLISH_XL_R1C1. */
2447 if ( pFCell
->GetMatrixFlag() != ScMatrixMode::NONE
&&
2448 aCellStr
.startsWith("{") &&
2449 aCellStr
.endsWith("}") )
2450 { // cut off matrix {} characters
2451 aCellStr
= aCellStr
.copy(1, aCellStr
.getLength()-2);
2453 if ( aCellStr
[0] == '=' )
2454 aCellStr
= aCellStr
.copy(1);
2456 switch ( pFCell
->GetMatrixFlag() )
2458 case ScMatrixMode::Formula
:
2459 { // diff expression with 'M' M$-extension
2462 pFCell
->GetMatColsRows( nC
, nR
);
2466 + OUString::number( nR
)
2468 + OUString::number( nC
)
2472 case ScMatrixMode::Reference
:
2473 { // diff expression with 'I' M$-extension
2475 (void)pFCell
->GetMatrixOrigin( rDoc
, aPos
);
2477 + OUString::number( aPos
.Row() - nStartRow
+ 1 )
2479 + OUString::number( aPos
.Col() - nStartCol
+ 1 );
2483 // formula Expression
2486 rStrm
.WriteUnicodeOrByteText(aPrefix
);
2487 if ( !aCellStr
.isEmpty() )
2488 lcl_WriteString( rStrm
, aCellStr
, 0, ';' );
2495 // added to avoid warnings
2500 rStrm
.WriteUnicodeOrByteText(u
"E");
2502 return rStrm
.GetError() == ERRCODE_NONE
;
2505 bool ScImportExport::Doc2HTML( SvStream
& rStream
, const OUString
& rBaseURL
)
2507 std::optional
<SvFileStream
> oStream
;
2508 char* pEnv
= getenv("SC_DEBUG_HTML_COPY_TO");
2512 osl::FileBase::getFileURLFromSystemPath(OUString::fromUtf8(pEnv
), aURL
);
2513 oStream
.emplace(aURL
, StreamMode::WRITE
);
2515 SvStream
& rStrm
= pEnv
? *oStream
: rStream
;
2516 // rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options
2517 ScFormatFilter::Get().ScExportHTML( rStrm
, rBaseURL
, &rDoc
, aRange
, RTL_TEXTENCODING_DONTKNOW
, bAll
,
2518 aStreamPath
, aNonConvertibleChars
, maFilterOptions
);
2519 return rStrm
.GetError() == ERRCODE_NONE
;
2522 bool ScImportExport::Doc2RTF( SvStream
& rStrm
)
2524 // rtl_TextEncoding is ignored in ScExportRTF
2525 ScFormatFilter::Get().ScExportRTF( rStrm
, &rDoc
, aRange
, RTL_TEXTENCODING_DONTKNOW
);
2526 return rStrm
.GetError() == ERRCODE_NONE
;
2529 bool ScImportExport::Doc2Dif( SvStream
& rStrm
)
2531 // for DIF in the clipboard, IBM_850 is always used
2532 ScFormatFilter::Get().ScExportDif( rStrm
, &rDoc
, aRange
, RTL_TEXTENCODING_IBM_850
);
2536 bool ScImportExport::Dif2Doc( SvStream
& rStrm
)
2538 SCTAB nTab
= aRange
.aStart
.Tab();
2539 ScDocumentUniquePtr
pImportDoc( new ScDocument( SCDOCMODE_UNDO
) );
2540 pImportDoc
->InitUndo( rDoc
, nTab
, nTab
);
2542 // for DIF in the clipboard, IBM_850 is always used
2543 ScFormatFilter::Get().ScImportDif( rStrm
, pImportDoc
.get(), aRange
.aStart
, RTL_TEXTENCODING_IBM_850
);
2547 pImportDoc
->GetCellArea( nTab
, nEndCol
, nEndRow
);
2548 // if there are no cells in the imported content, nEndCol/nEndRow may be before the start
2549 if ( nEndCol
< aRange
.aStart
.Col() )
2550 nEndCol
= aRange
.aStart
.Col();
2551 if ( nEndRow
< aRange
.aStart
.Row() )
2552 nEndRow
= aRange
.aStart
.Row();
2553 aRange
.aEnd
= ScAddress( nEndCol
, nEndRow
, nTab
);
2555 bool bOk
= StartPaste();
2558 InsertDeleteFlags nFlags
= InsertDeleteFlags::ALL
& ~InsertDeleteFlags::STYLES
;
2559 rDoc
.DeleteAreaTab( aRange
, nFlags
);
2560 pImportDoc
->CopyToDocument(aRange
, nFlags
, false, rDoc
);
2567 bool ScImportExport::RTF2Doc( SvStream
& rStrm
, const OUString
& rBaseURL
)
2569 std::unique_ptr
<ScEEAbsImport
> pImp
= ScFormatFilter::Get().CreateRTFImport( &rDoc
, aRange
);
2572 pImp
->Read( rStrm
, rBaseURL
);
2573 aRange
= pImp
->GetRange();
2575 bool bOk
= StartPaste();
2578 InsertDeleteFlags
const nFlags
= InsertDeleteFlags::ALL
& ~InsertDeleteFlags::STYLES
;
2579 rDoc
.DeleteAreaTab( aRange
, nFlags
);
2580 pImp
->WriteToDocument();
2586 bool ScImportExport::HTML2Doc( SvStream
& rStrm
, const OUString
& rBaseURL
)
2588 std::unique_ptr
<ScEEAbsImport
> pImp
= ScFormatFilter::Get().CreateHTMLImport( &rDoc
, rBaseURL
, aRange
);
2592 // If this is set, read from this file, instead of the real clipboard during paste.
2593 char* pEnv
= getenv("SC_DEBUG_HTML_PASTE_FROM");
2597 osl::FileBase::getFileURLFromSystemPath(OUString::fromUtf8(pEnv
), aURL
);
2598 SvFileStream
aStream(aURL
, StreamMode::READ
);
2599 pImp
->Read( aStream
, rBaseURL
);
2603 pImp
->Read( rStrm
, rBaseURL
);
2606 aRange
= pImp
->GetRange();
2608 bool bOk
= StartPaste();
2611 // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in
2612 // a Draw Layer but no Draw View -> create Draw Layer and View here
2614 pDocSh
->MakeDrawLayer();
2616 InsertDeleteFlags
const nFlags
= InsertDeleteFlags::ALL
& ~InsertDeleteFlags::STYLES
;
2617 rDoc
.DeleteAreaTab( aRange
, nFlags
);
2621 // Pick up import options if available.
2622 LanguageType eLang
= pExtOptions
->GetLanguage();
2623 SvNumberFormatter
aNumFormatter( comphelper::getProcessComponentContext(), eLang
);
2624 bool bSpecialNumber
= pExtOptions
->IsDetectSpecialNumber();
2625 bool bScientificNumber
= pExtOptions
->IsDetectScientificNumber();
2626 pImp
->WriteToDocument(false, 1.0, &aNumFormatter
, bSpecialNumber
, bScientificNumber
);
2629 // Regular import, with no options.
2630 pImp
->WriteToDocument();
2637 #ifndef DISABLE_DYNLOADING
2639 extern "C" { static void thisModule() {} }
2644 ScFormatFilterPlugin
* ScFilterCreate();
2649 typedef ScFormatFilterPlugin
* (*FilterFn
)();
2650 ScFormatFilterPlugin
&ScFormatFilter::Get()
2652 static ScFormatFilterPlugin
*plugin
= []()
2654 #ifndef DISABLE_DYNLOADING
2655 OUString
sFilterLib(SVLIBRARY("scfilt"));
2656 static ::osl::Module aModule
;
2657 bool bLoaded
= aModule
.is();
2659 bLoaded
= aModule
.loadRelative(&thisModule
, sFilterLib
);
2661 bLoaded
= aModule
.load(sFilterLib
);
2664 oslGenericFunction fn
= aModule
.getFunctionSymbol( "ScFilterCreate" );
2666 return reinterpret_cast<FilterFn
>(fn
)();
2669 return static_cast<ScFormatFilterPlugin
*>(nullptr);
2671 return ScFilterCreate();
2678 // Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
2680 static const sal_Unicode
* lcl_UnicodeStrChr( const sal_Unicode
* pStr
,
2692 ScImportStringStream::ScImportStringStream( const OUString
& rStr
)
2693 : SvMemoryStream( const_cast<sal_Unicode
*>(rStr
.getStr()),
2694 rStr
.getLength() * sizeof(sal_Unicode
), StreamMode::READ
)
2696 SetStreamCharSet( RTL_TEXTENCODING_UNICODE
);
2697 #ifdef OSL_BIGENDIAN
2698 SetEndian(SvStreamEndian::BIG
);
2700 SetEndian(SvStreamEndian::LITTLE
);
2704 OUString
ReadCsvLine( SvStream
&rStream
, bool bEmbeddedLineBreak
,
2705 OUString
& rFieldSeparators
, sal_Unicode cFieldQuote
, sal_Unicode
& rcDetectSep
, sal_uInt32 nMaxSourceLines
)
2713 } eRetryState
= (bEmbeddedLineBreak
&& rcDetectSep
== 0 ? RetryState::ALLOW
: RetryState::FORBID
);
2715 sal_uInt64 nStreamPos
= (eRetryState
== RetryState::ALLOW
? rStream
.Tell() : 0);
2717 Label_RetryWithNewSep
:
2719 if (eRetryState
== RetryState::RETRY
)
2721 eRetryState
= RetryState::RETRIED
;
2722 rStream
.Seek( nStreamPos
);
2726 rStream
.ReadUniOrByteStringLine(aStr
, rStream
.GetStreamCharSet(), nArbitraryLineLengthLimit
);
2728 if (bEmbeddedLineBreak
)
2730 sal_Int32 nFirstLineLength
= aStr
.getLength();
2731 sal_uInt64 nFirstLineStreamPos
= rStream
.Tell();
2732 sal_uInt32 nLine
= 0;
2734 const sal_Unicode
* pSeps
= rFieldSeparators
.getStr();
2736 QuoteType eQuoteState
= FIELDEND_QUOTE
;
2737 bool bFieldStart
= true;
2739 sal_Int32 nLastOffset
= 0;
2740 sal_Int32 nQuotes
= 0;
2741 while (!rStream
.eof() && aStr
.getLength() < nArbitraryLineLengthLimit
)
2743 const sal_Unicode
* p
= aStr
.getStr() + nLastOffset
;
2744 const sal_Unicode
* const pStop
= aStr
.getStr() + aStr
.getLength();
2749 // Skip embedded null-characters. They don't change
2750 // anything and are handled at a higher level.
2757 if (*p
== cFieldQuote
)
2762 bFieldStart
= false;
2763 eQuoteState
= FIELDSTART_QUOTE
;
2764 nFirstLineLength
= aStr
.getLength();
2765 nFirstLineStreamPos
= rStream
.Tell();
2767 // Do not detect a FIELDSTART_QUOTE if not in
2768 // bFieldStart mode, in which case for unquoted content
2769 // we are in FIELDEND_QUOTE state.
2770 else if (eQuoteState
!= FIELDEND_QUOTE
)
2772 eQuoteState
= lcl_isEscapedOrFieldEndQuote( nQuotes
, p
, pSeps
, cFieldQuote
, rcDetectSep
);
2774 if (eRetryState
== RetryState::ALLOW
&& rcDetectSep
)
2776 eRetryState
= RetryState::RETRY
;
2777 rFieldSeparators
+= OUStringChar(rcDetectSep
);
2778 pSeps
= rFieldSeparators
.getStr();
2779 goto Label_RetryWithNewSep
;
2782 // DONTKNOW_QUOTE is an embedded unescaped quote we
2783 // don't count for pairing.
2784 if (eQuoteState
!= DONTKNOW_QUOTE
)
2788 else if (eQuoteState
== FIELDEND_QUOTE
)
2791 // If blank is a separator it starts a field, if it
2792 // is not and thus maybe leading before quote we
2793 // are still at start of field regarding quotes.
2794 bFieldStart
= (*p
== ' ' || lcl_UnicodeStrChr( pSeps
, *p
) != nullptr);
2796 bFieldStart
= (lcl_UnicodeStrChr( pSeps
, *p
) != nullptr);
2801 if (*p
== cFieldQuote
&& bFieldStart
)
2804 eQuoteState
= FIELDSTART_QUOTE
;
2805 bFieldStart
= false;
2806 nFirstLineLength
= aStr
.getLength();
2807 nFirstLineStreamPos
= rStream
.Tell();
2809 else if (eQuoteState
== FIELDEND_QUOTE
)
2811 // This also skips leading blanks at beginning of line
2812 // if followed by a quote. It's debatable whether we
2813 // actually want that or not, but congruent with what
2814 // ScanNextFieldFromString() does.
2816 bFieldStart
= (*p
== ' ' || lcl_UnicodeStrChr( pSeps
, *p
) != nullptr);
2818 bFieldStart
= (lcl_UnicodeStrChr( pSeps
, *p
) != nullptr);
2821 // A quote character inside a field content does not start
2826 if ((nQuotes
& 1) == 0)
2827 // We still have a (theoretical?) problem here if due to
2828 // nArbitraryLineLengthLimit (or nMaxSourceLines below) we
2829 // split a string right between a doubled quote pair.
2831 else if (eQuoteState
== DONTKNOW_QUOTE
)
2832 // A single unescaped quote somewhere in a quote started
2833 // field, most likely that was not meant to have embedded
2834 // linefeeds either.
2836 else if (++nLine
>= nMaxSourceLines
&& nMaxSourceLines
> 0)
2837 // Unconditionally increment nLine even if nMaxSourceLines==0
2838 // so it can be observed in debugger.
2842 nLastOffset
= aStr
.getLength();
2844 rStream
.ReadUniOrByteStringLine(aNext
, rStream
.GetStreamCharSet(), nArbitraryLineLengthLimit
);
2846 aStr
+= "\n" + aNext
;
2851 // No closing quote at all. A single quote at field start => no
2852 // embedded linefeeds for that field, take only first logical line.
2853 aStr
= aStr
.copy( 0, nFirstLineLength
);
2854 rStream
.Seek( nFirstLineStreamPos
);
2860 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */