1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
22 #include <o3tl/deleter.hxx>
23 #include <sot/formats.hxx>
24 #include <address.hxx>
25 #include <tools/stream.hxx>
27 #include <com/sun/star/uno/Any.hxx>
34 * These options control how multi-line cells are converted during export in
35 * certain lossy formats (such as csv).
37 struct ScExportTextOptions
39 enum NewlineConversion
{ ToSystem
, ToSpace
, None
};
40 ScExportTextOptions( NewlineConversion eNewlineConversion
= ToSystem
, sal_Unicode cSeparatorConvertTo
= 0, bool bAddQuotes
= false ) :
41 meNewlineConversion( eNewlineConversion
), mcSeparatorConvertTo( cSeparatorConvertTo
), mbAddQuotes( bAddQuotes
) {}
43 NewlineConversion meNewlineConversion
;
44 sal_Unicode mcSeparatorConvertTo
; // Convert separator to this character
48 class SAL_DLLPUBLIC_RTTI ScImportExport
52 std::unique_ptr
<ScDocument
, o3tl::default_delete
<ScDocument
>> pUndoDoc
;
55 OUString aNonConvertibleChars
;
56 OUString maFilterOptions
;
57 sal_uInt32 nSizeLimit
;
59 sal_Unicode cSep
; // Separator
60 sal_Unicode cStr
; // String Delimiter
61 bool bFormulas
; // Formula in Text?
62 bool bIncludeFiltered
; // include filtered rows? (default true)
63 bool bAll
; // no selection
64 bool bSingle
; // Single selection
65 bool bUndo
; // with Undo?
66 bool bOverflowRow
; // too many rows
67 bool bOverflowCol
; // too many columns
68 bool bOverflowCell
; // too much data for a cell
70 bool mbImportBroadcast
; // whether or not to broadcast after data import.
71 bool mbOverwriting
; // Whether we could be overwriting existing values (paste).
72 // In this case we cannot use the insert optimization, but we
73 // do not need to broadcast after the import.
74 bool mbIncludeBOM
; // Whether to include a byte-order-mark in the output.
75 ScExportTextOptions mExportTextOptions
;
77 std::unique_ptr
<ScAsciiOptions
> pExtOptions
; // extended options
79 bool StartPaste(); // Protect check, set up Undo
80 void EndPaste(bool bAutoRowHeight
= true); // Undo/Redo actions, Repaint
81 bool Doc2Text( SvStream
& );
82 bool Text2Doc( SvStream
& );
83 bool Doc2Sylk( SvStream
& );
84 bool Sylk2Doc( SvStream
& );
85 bool Doc2HTML( SvStream
&, const OUString
& );
86 bool Doc2RTF( SvStream
& );
87 bool Doc2Dif( SvStream
& );
88 bool Dif2Doc( SvStream
& );
89 bool ExtText2Doc( SvStream
& ); // with pExtOptions
90 bool RTF2Doc( SvStream
&, const OUString
& rBaseURL
);
91 bool HTML2Doc( SvStream
&, const OUString
& rBaseURL
);
94 ScImportExport( ScDocument
& ); // the whole document
95 ScImportExport( ScDocument
&, const OUString
& ); // Range/cell input
96 SC_DLLPUBLIC
ScImportExport( ScDocument
&, const ScAddress
& );
97 SC_DLLPUBLIC
ScImportExport( ScDocument
&, const ScRange
& );
98 SC_DLLPUBLIC
~ScImportExport() COVERITY_NOEXCEPT_FALSE
;
100 void SetExtOptions( const ScAsciiOptions
& rOpt
);
101 void SetFilterOptions( const OUString
& rFilterOptions
);
102 bool IsRef() const { return !bAll
; }
104 const ScRange
& GetRange() const { return aRange
; }
106 SC_DLLPUBLIC
static void EmbeddedNullTreatment( OUString
& rStr
);
108 static bool IsFormatSupported( SotClipboardFormatId nFormat
);
109 static const sal_Unicode
* ScanNextFieldFromString( const sal_Unicode
* p
,
110 OUString
& rField
, sal_Unicode cStr
, const sal_Unicode
* pSeps
,
111 bool bMergeSeps
, bool& rbIsQuoted
, bool& rbOverflowCell
, bool bRemoveSpace
);
113 /** ScImportExport::CountVisualWidth
114 Count the width of string visually ( in multiple of western characters), considering CJK
115 ideographs and CJK symbols (U+3000-U+303F) as twice the width of western characters.
116 @param rStr the string.
117 @param nIdx the starting index, index is incremented for each counted character.
118 @param nMaxWidth the maximum width to count.
119 @return the sum of the width of counted characters.
121 static sal_Int32
CountVisualWidth(std::u16string_view rStr
, sal_Int32
& nIdx
, sal_Int32 nMaxWidth
);
123 /** ScImportExport::CountVisualWidth
124 @return the sum of the visual width of the whole string.
126 static sal_Int32
CountVisualWidth(std::u16string_view rStr
);
128 //! only if stream is only used in own (!) memory
129 static void SetNoEndianSwap( SvStream
& rStrm
);
131 void SetSeparator( sal_Unicode c
) { cSep
= c
; }
132 void SetDelimiter( sal_Unicode c
) { cStr
= c
; }
133 void SetFormulas( bool b
) { bFormulas
= b
; }
134 void SetIncludeFiltered( bool b
) { bIncludeFiltered
= b
; }
136 void SetStreamPath( const OUString
& rPath
) { aStreamPath
= rPath
; }
138 bool ImportString( const OUString
&, SotClipboardFormatId
);
139 bool ExportString( OUString
&, SotClipboardFormatId
);
140 bool ExportByteString( OString
&, rtl_TextEncoding
, SotClipboardFormatId
);
142 SC_DLLPUBLIC
bool ImportStream( SvStream
&, const OUString
& rBaseURL
, SotClipboardFormatId
);
143 SC_DLLPUBLIC
bool ExportStream( SvStream
&, const OUString
& rBaseURL
, SotClipboardFormatId
);
145 bool ExportData( std::u16string_view rMimeType
,
146 css::uno::Any
& rValue
);
149 bool IsOverflowRow() const { return bOverflowRow
; }
150 bool IsOverflowCol() const { return bOverflowCol
; }
151 bool IsOverflowCell() const { return bOverflowCell
; }
152 bool IsOverflow() const { return bOverflowRow
|| bOverflowCol
|| bOverflowCell
; }
154 const OUString
& GetNonConvertibleChars() const { return aNonConvertibleChars
; }
156 void SetApi( bool bApi
) { mbApi
= bApi
; }
157 void SetImportBroadcast( bool b
) { mbImportBroadcast
= b
; }
158 void SetOverwriting( const bool bOverwriting
) { mbOverwriting
= bOverwriting
; }
159 void SetExportTextOptions( const ScExportTextOptions
& options
) { mExportTextOptions
= options
; }
161 bool GetIncludeBOM() const { return mbIncludeBOM
; }
164 // Helper class for importing clipboard strings as streams.
165 class ScImportStringStream
: public SvMemoryStream
168 ScImportStringStream(const OUString
& rStr
);
171 /** Read a CSV (comma separated values) data line using
172 ReadUniOrByteStringLine().
174 @param bEmbeddedLineBreak
175 If TRUE and a line-break occurs inside a field of data,
176 a line feed LF '\n' and the next line are appended. Repeats
177 until a line-break is not in a field. A field is determined
178 by delimiting rFieldSeparators and optionally surrounded by
179 a pair of cFieldQuote characters. For a line-break to be
180 within a field, the field content MUST be surrounded by
181 cFieldQuote characters, and the opening cFieldQuote MUST be
182 at the very start of a line or follow right behind a field
183 separator with no extra characters in between, with the
184 exception of blanks contradictory to RFC 4180. Anything,
185 including field separators and escaped quotes (by doubling
186 them) may appear in a quoted field.
188 If bEmbeddedLineBreak==FALSE, nothing is parsed and the
189 string returned is simply one ReadUniOrByteStringLine().
191 @param rFieldSeparators
192 A list of characters that each may act as a field separator.
193 If rcDetectSep was 0 and a separator is detected then it is appended to
197 The quote character used.
200 If 0 then attempt to detect a possible separator if
201 rFieldSeparators doesn't include it already. This can be necessary because
202 of the "accept broken misquoted CSV fields" feature that tries to ignore
203 trailing blanks after a quoted field and if no separator follows continues
204 to add content to the field assuming the single double quote was in error.
205 It is also necessary if the only possible separator was not selected and
206 not included in rFieldSeparators and a line starts with a quoted field, in
207 which case appending lines is tried until end of file.
208 If a separator is detected it is added to rFieldSeparators and the
209 line is reread with the new separators
211 @param nMaxSourceLines
212 Maximum source lines to read and combine into one logical line for embedded
213 new line purpose. Should be limited for the preview dialog because only
214 non-matching separators selected otherwise would lead to trying to
215 concatenate lines until file end.
216 If 0 no limit other than the internal arbitrary resulting line length
219 check Stream::good() to detect IO problems during read
222 Note that the string returned may be truncated even inside
223 a quoted field if some (arbitrary) maximum length was reached.
224 There currently is no way to exactly determine the conditions,
225 whether this was at a line end, or whether open quotes
226 would have closed the field before the line end, as even a
227 ReadUniOrByteStringLine() may return prematurely but the
228 stream was positioned ahead until the real end of line.
229 Additionally, due to character encoding conversions, string
230 length and bytes read don't necessarily match, and
231 resyncing to a previous position matching the string's
232 length isn't always possible. As a result, a logical line
233 with embedded line breaks and more than the maximum length
234 characters will be spoiled, and a subsequent ReadCsvLine()
235 may start under false preconditions.
238 SC_DLLPUBLIC OUString
ReadCsvLine( SvStream
&rStream
, bool bEmbeddedLineBreak
,
239 OUString
& rFieldSeparators
, sal_Unicode cFieldQuote
, sal_Unicode
& rcDetectSep
,
240 sal_uInt32 nMaxSourceLines
= 0 );
242 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */