1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <svl/numformat.hxx>
21 #include <tools/stream.hxx>
22 #include <osl/diagnose.h>
24 #include <docpool.hxx>
25 #include <document.hxx>
27 #include <fprogressbar.hxx>
29 #include <patattr.hxx>
30 #include <scerrors.hxx>
31 #include <scitems.hxx>
32 #include <stringutil.hxx>
36 const std::u16string_view pKeyTABLE
= u
"TABLE";
37 const std::u16string_view pKeyVECTORS
= u
"VECTORS";
38 const std::u16string_view pKeyTUPLES
= u
"TUPLES";
39 const std::u16string_view pKeyDATA
= u
"DATA";
40 const std::u16string_view pKeyBOT
= u
"BOT";
41 const std::u16string_view pKeyEOD
= u
"EOD";
43 ErrCode
ScFormatFilterPluginImpl::ScImportDif(SvStream
& rIn
, ScDocument
* pDoc
, const ScAddress
& rInsPos
,
44 const rtl_TextEncoding eVon
)
46 DifParser
aDifParser( rIn
, *pDoc
, eVon
);
48 SCTAB nBaseTab
= rInsPos
.Tab();
50 TOPIC eTopic
= T_UNKNOWN
;
51 bool bSyntErrWarn
= false;
52 bool bOverflowWarn
= false;
54 OUStringBuffer
& rData
= aDifParser
.m_aData
;
58 ScfStreamProgressBar
aPrgrsBar( rIn
, pDoc
->GetDocumentShell() );
60 while( eTopic
!= T_DATA
&& eTopic
!= T_END
)
62 eTopic
= aDifParser
.GetNextTopic();
66 const bool bData
= !rData
.isEmpty();
72 if( aDifParser
.nVector
!= 0 || aDifParser
.nVal
!= 1 )
75 pDoc
->RenameTab(nBaseTab
, rData
.toString());
81 if( aDifParser
.nVector
!= 0 )
87 if( aDifParser
.nVector
!= 0 || aDifParser
.nVal
!= 0 )
104 OSL_FAIL( "ScImportDif - missing enum" );
109 if( eTopic
== T_DATA
)
110 { // data starts here
111 SCCOL nBaseCol
= rInsPos
.Col();
113 SCCOL nColCnt
= SCCOL_MAX
;
114 SCROW nRowCnt
= rInsPos
.Row();
115 DifAttrCache aAttrCache
;
117 DATASET eCurrent
= D_UNKNOWN
;
119 ScSetStringParam aStrParam
; // used to set string value without number detection.
120 aStrParam
.setTextInput();
122 while( eCurrent
!= D_EOD
)
124 eCurrent
= aDifParser
.GetNextDataset();
126 aPrgrsBar
.Progress();
127 ScAddress
aPos(nColCnt
, nRowCnt
, nBaseTab
);
128 const OUString aData
= rData
.makeStringAndClear();
133 if( nColCnt
< SCCOL_MAX
)
139 case D_NUMERIC
: // Number cell
140 if( nColCnt
== SCCOL_MAX
)
143 if( pDoc
->ValidCol(nColCnt
) && pDoc
->ValidRow(nRowCnt
) )
145 pDoc
->EnsureTable(nBaseTab
);
147 if( DifParser::IsV( aData
.getStr() ) )
149 pDoc
->SetValue(aPos
, aDifParser
.fVal
);
150 aAttrCache
.SetNumFormat( pDoc
, nColCnt
, nRowCnt
,
151 aDifParser
.nNumFormat
);
153 else if( aData
== "TRUE" || aData
== "FALSE" )
155 pDoc
->SetValue(aPos
, aDifParser
.fVal
);
156 aAttrCache
.SetNumFormat( pDoc
, nColCnt
, nRowCnt
,
157 aDifParser
.nNumFormat
);
159 else if( aData
== "NA" || aData
== "ERROR" )
161 pDoc
->SetString(aPos
, aData
, &aStrParam
);
165 OUString aTmp
= "#IND:" + aData
+ "?";
166 pDoc
->SetString(aPos
, aTmp
, &aStrParam
);
170 bOverflowWarn
= true;
174 case D_STRING
: // Text cell
175 if( nColCnt
== SCCOL_MAX
)
178 if( pDoc
->ValidCol(nColCnt
) && pDoc
->ValidRow(nRowCnt
) )
180 if (!aData
.isEmpty())
182 pDoc
->EnsureTable(nBaseTab
);
183 pDoc
->SetTextCell(aPos
, aData
);
187 bOverflowWarn
= true;
196 OSL_FAIL( "ScImportDif - missing enum" );
200 aAttrCache
.Apply( *pDoc
, nBaseTab
);
203 return SCERR_IMPORT_FORMAT
;
207 // FIXME: Add proper warning!
208 return SCWARN_IMPORT_RANGE_OVERFLOW
;
210 else if( bOverflowWarn
)
211 return SCWARN_IMPORT_RANGE_OVERFLOW
;
216 DifParser::DifParser( SvStream
& rNewIn
, const ScDocument
& rDoc
, rtl_TextEncoding eCharSet
)
221 , pNumFormatter(rDoc
.GetFormatTable())
224 if ( rIn
.GetStreamCharSet() != eCharSet
)
226 OSL_FAIL( "CharSet passed overrides and modifies StreamCharSet" );
227 rIn
.SetStreamCharSet( eCharSet
);
229 rIn
.StartReadingUnicodeText( eCharSet
);
232 TOPIC
DifParser::GetNextTopic()
234 enum STATE
{ S_VectorVal
, S_Data
, S_END
, S_START
, S_UNKNOWN
, S_ERROR_L2
};
236 static const std::u16string_view ppKeys
[] =
254 static const TOPIC pTopics
[] =
277 TOPIC eRet
= T_UNKNOWN
;
281 if( !ReadNextLine( aLine
) )
291 const std::u16string_view
* pRef
;
295 pRef
= &ppKeys
[ nCnt
];
301 eRet
= pTopics
[ nCnt
];
307 pRef
= &ppKeys
[ nCnt
];
321 const sal_Unicode
* pCur
= aLine
.getStr();
323 pCur
= ScanIntVal( pCur
, nVector
);
325 if( pCur
&& *pCur
== ',' )
328 ScanIntVal( pCur
, nVal
);
336 OSL_ENSURE( aLine
.getLength() >= 2,
337 "+GetNextTopic(): <String> is too short!" );
338 if( aLine
.getLength() > 2 )
339 m_aData
.append(aLine
.subView(1, aLine
.getLength() - 2));
345 OSL_FAIL( "DifParser::GetNextTopic - unexpected state" );
349 ReadNextLine( aLine
);
351 case S_ERROR_L2
: // error happened in line 2
353 ReadNextLine( aLine
);
357 OSL_FAIL( "DifParser::GetNextTopic - missing enum" );
364 static void lcl_DeEscapeQuotesDif(OUStringBuffer
& rString
)
366 // Special handling for DIF import: Escaped (duplicated) quotes are resolved.
367 // Single quote characters are left in place because older versions didn't
368 // escape quotes in strings (and Excel doesn't when using the clipboard).
369 // The quotes around the string are removed before this function is called.
371 rString
= rString
.makeStringAndClear().replaceAll("\"\"", "\"");
374 // Determine if passed in string is numeric data and set fVal/nNumFormat if so
375 DATASET
DifParser::GetNumberDataset( const sal_Unicode
* pPossibleNumericData
)
377 DATASET eRet
= D_SYNT_ERROR
;
379 OSL_ENSURE( pNumFormatter
, "-DifParser::GetNumberDataset(): No Formatter, more fun!" );
380 OUString
aTestVal( pPossibleNumericData
);
381 sal_uInt32 nFormat
= 0;
383 if( pNumFormatter
->IsNumberFormat( aTestVal
, nFormat
, fTmpVal
) )
386 nNumFormat
= nFormat
;
395 bool DifParser::ReadNextLine( OUString
& rStr
)
397 if( aLookAheadLine
.isEmpty() )
399 return rIn
.ReadUniOrByteStringLine( rStr
, rIn
.GetStreamCharSet() );
403 rStr
= aLookAheadLine
;
404 aLookAheadLine
.clear();
409 // Look ahead in the stream to determine if the next line is the first line of
410 // a valid data record structure
411 bool DifParser::LookAhead()
413 const sal_Unicode
* pCurrentBuffer
;
414 bool bValidStructure
= false;
416 OSL_ENSURE( aLookAheadLine
.isEmpty(), "*DifParser::LookAhead(): LookAhead called twice in a row" );
417 rIn
.ReadUniOrByteStringLine( aLookAheadLine
, rIn
.GetStreamCharSet() );
419 pCurrentBuffer
= aLookAheadLine
.getStr();
421 switch( *pCurrentBuffer
)
423 case '-': // Special Datatype
426 if( Is1_0( pCurrentBuffer
) )
428 bValidStructure
= true;
431 case '0': // Numeric Data
433 if( *pCurrentBuffer
== ',' )
436 bValidStructure
= ( GetNumberDataset(pCurrentBuffer
) != D_SYNT_ERROR
);
439 case '1': // String Data
440 if( Is1_0( aLookAheadLine
.getStr() ) )
442 bValidStructure
= true;
446 return bValidStructure
;
449 DATASET
DifParser::GetNextDataset()
451 DATASET eRet
= D_UNKNOWN
;
453 const sal_Unicode
* pCurrentBuffer
;
455 ReadNextLine( aLine
);
457 pCurrentBuffer
= aLine
.getStr();
459 switch( *pCurrentBuffer
)
461 case '-': // Special Datatype
464 if( Is1_0( pCurrentBuffer
) )
466 ReadNextLine( aLine
);
467 if( IsBOT( aLine
.getStr() ) )
469 else if( IsEOD( aLine
.getStr() ) )
473 case '0': // Numeric Data
474 pCurrentBuffer
++; // value in fVal, 2. line in m_aData
475 if( *pCurrentBuffer
== ',' )
478 eRet
= GetNumberDataset(pCurrentBuffer
);
480 ReadNextLine( aTmpLine
);
481 if ( eRet
== D_SYNT_ERROR
)
482 { // for broken records write "#ERR: data" to cell
483 m_aData
= OUString::Concat("#ERR: ") + pCurrentBuffer
+ " (" + aTmpLine
+ ")";
492 case '1': // String Data
493 if( Is1_0( aLine
.getStr() ) )
495 ReadNextLine( aLine
);
496 sal_Int32 nLineLength
= aLine
.getLength();
497 const sal_Unicode
* pLine
= aLine
.getStr();
499 if( nLineLength
>= 1 && *pLine
== '"' )
501 // Quotes are not always escaped (duplicated), see lcl_DeEscapeQuotesDif
502 // A look ahead into the next line is needed in order to deal with
503 // multiline strings containing quotes
506 // Single line string
507 if( nLineLength
>= 2 && pLine
[nLineLength
- 1] == '"' )
509 m_aData
= aLine
.subView( 1, nLineLength
- 2 );
510 lcl_DeEscapeQuotesDif(m_aData
);
517 m_aData
= aLine
.subView( 1 );
518 bool bContinue
= true;
521 m_aData
.append("\n");
522 bContinue
= !rIn
.eof() && ReadNextLine( aLine
);
525 nLineLength
= aLine
.getLength();
526 if( nLineLength
>= 1 )
528 pLine
= aLine
.getStr();
529 bContinue
= !LookAhead();
532 m_aData
.append(aLine
);
534 else if( pLine
[nLineLength
- 1] == '"' )
536 m_aData
.append(aLine
.subView(0, nLineLength
-1));
537 lcl_DeEscapeQuotesDif(m_aData
);
549 if( eRet
== D_UNKNOWN
)
550 ReadNextLine( aLine
);
558 const sal_Unicode
* DifParser::ScanIntVal( const sal_Unicode
* pStart
, sal_uInt32
& rRet
)
560 // eat leading whitespace, not specified, but seen in the wild
561 while (*pStart
== ' ' || *pStart
== '\t')
564 sal_Unicode cCurrent
= *pStart
;
566 if( IsNumber( cCurrent
) )
567 rRet
= static_cast<sal_uInt32
>( cCurrent
- '0' );
574 while( IsNumber( cCurrent
) && rRet
< ( 0xFFFFFFFF / 10 ) )
577 rRet
+= static_cast<sal_uInt32
>( cCurrent
- '0' );
586 DifColumn::DifColumn ()
591 void DifColumn::SetNumFormat( const ScDocument
* pDoc
, SCROW nRow
, const sal_uInt32 nNumFormat
)
593 OSL_ENSURE( pDoc
->ValidRow(nRow
), "*DifColumn::SetNumFormat(): Row too big!" );
599 OSL_ENSURE( nRow
> 0,
600 "*DifColumn::SetNumFormat(): more cannot be zero!" );
601 OSL_ENSURE( nRow
> mpCurrent
->nEnd
,
602 "*DifColumn::SetNumFormat(): start from scratch?" );
604 if( mpCurrent
->nNumFormat
== nNumFormat
&& mpCurrent
->nEnd
== nRow
- 1 )
605 mpCurrent
->nEnd
= nRow
;
607 NewEntry( nRow
, nNumFormat
);
610 NewEntry(nRow
,nNumFormat
);
616 void DifColumn::NewEntry( const SCROW nPos
, const sal_uInt32 nNumFormat
)
618 maEntries
.emplace_back();
619 mpCurrent
= &maEntries
.back();
620 mpCurrent
->nStart
= mpCurrent
->nEnd
= nPos
;
621 mpCurrent
->nNumFormat
= nNumFormat
;
625 void DifColumn::Apply( ScDocument
& rDoc
, const SCCOL nCol
, const SCTAB nTab
)
627 ScPatternAttr
aAttr(rDoc
.getCellAttributeHelper());
628 SfxItemSet
&rItemSet
= aAttr
.GetItemSet();
630 for (const auto& rEntry
: maEntries
)
632 OSL_ENSURE( rEntry
.nNumFormat
> 0,
633 "+DifColumn::Apply(): Number format must not be 0!" );
635 rItemSet
.Put( SfxUInt32Item( ATTR_VALUE_FORMAT
, rEntry
.nNumFormat
) );
637 rDoc
.ApplyPatternAreaTab( nCol
, rEntry
.nStart
, nCol
, rEntry
.nEnd
, nTab
, aAttr
);
639 rItemSet
.ClearItem();
643 DifAttrCache::DifAttrCache()
647 DifAttrCache::~DifAttrCache()
651 void DifAttrCache::SetNumFormat( const ScDocument
* pDoc
, const SCCOL nCol
, const SCROW nRow
, const sal_uInt32 nNumFormat
)
653 OSL_ENSURE( pDoc
->ValidCol(nCol
), "-DifAttrCache::SetNumFormat(): Col too big!" );
655 if( !maColMap
.count(nCol
) )
656 maColMap
[ nCol
].reset( new DifColumn
);
658 maColMap
[ nCol
]->SetNumFormat( pDoc
, nRow
, nNumFormat
);
661 void DifAttrCache::Apply( ScDocument
& rDoc
, SCTAB nTab
)
663 for( SCCOL nCol
: rDoc
.GetWritableColumnsRange(nTab
, 0, rDoc
.MaxCol()) )
665 if( maColMap
.count(nCol
) )
666 maColMap
[ nCol
]->Apply( rDoc
, nCol
, nTab
);
670 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */