1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
22 #include <svl/zforlist.hxx>
23 #include <osl/diagnose.h>
26 #include "docpool.hxx"
27 #include "document.hxx"
29 #include "fprogressbar.hxx"
31 #include "patattr.hxx"
32 #include "scerrors.hxx"
33 #include "scitems.hxx"
34 #include "stringutil.hxx"
35 #include <boost/scoped_ptr.hpp>
37 const sal_Unicode pKeyTABLE
[] = { 'T', 'A', 'B', 'L', 'E', 0 };
38 const sal_Unicode pKeyVECTORS
[] = { 'V', 'E', 'C', 'T', 'O', 'R', 'S', 0 };
39 const sal_Unicode pKeyTUPLES
[] = { 'T', 'U', 'P', 'L', 'E', 'S', 0 };
40 const sal_Unicode pKeyDATA
[] = { 'D', 'A', 'T', 'A', 0 };
41 const sal_Unicode pKeyBOT
[] = { 'B', 'O', 'T', 0 };
42 const sal_Unicode pKeyEOD
[] = { 'E', 'O', 'D', 0 };
43 const sal_Unicode pKeyERROR
[] = { 'E', 'R', 'R', 'O', 'R', 0 };
44 const sal_Unicode pKeyTRUE
[] = { 'T', 'R', 'U', 'E', 0 };
45 const sal_Unicode pKeyFALSE
[] = { 'F', 'A', 'L', 'S', 'E', 0 };
46 const sal_Unicode pKeyNA
[] = { 'N', 'A', 0 };
47 const sal_Unicode pKeyV
[] = { 'V', 0 };
48 const sal_Unicode pKey1_0
[] = { '1', ',', '0', 0 };
50 FltError
ScFormatFilterPluginImpl::ScImportDif( SvStream
& rIn
, ScDocument
* pDoc
, const ScAddress
& rInsPos
,
51 const rtl_TextEncoding eVon
, sal_uInt32 nDifOption
)
53 DifParser
aDifParser( rIn
, nDifOption
, *pDoc
, eVon
);
55 const bool bPlain
= aDifParser
.IsPlain();
57 SCTAB nBaseTab
= rInsPos
.Tab();
59 TOPIC eTopic
= T_UNKNOWN
;
60 bool bSyntErrWarn
= false;
61 bool bOverflowWarn
= false;
63 OUString
& aData
= aDifParser
.aData
;
67 ScfStreamProgressBar
aPrgrsBar( rIn
, pDoc
->GetDocumentShell() );
69 while( eTopic
!= T_DATA
&& eTopic
!= T_END
)
71 eTopic
= aDifParser
.GetNextTopic();
75 const bool bData
= !aData
.isEmpty();
81 if( aDifParser
.nVector
!= 0 || aDifParser
.nVal
!= 1 )
84 pDoc
->RenameTab( nBaseTab
, aData
);
89 if( aDifParser
.nVector
!= 0 )
95 if( aDifParser
.nVector
!= 0 )
101 if( aDifParser
.nVector
!= 0 || aDifParser
.nVal
!= 0 )
118 OSL_FAIL( "ScImportDif - missing enum" );
123 if( eTopic
== T_DATA
)
124 { // data starts here
125 SCCOL nBaseCol
= rInsPos
.Col();
127 SCCOL nColCnt
= SCCOL_MAX
;
128 SCROW nRowCnt
= rInsPos
.Row();
129 DifAttrCache
aAttrCache( bPlain
);
131 DATASET eAkt
= D_UNKNOWN
;
133 ScSetStringParam aStrParam
; // used to set string value without number detection.
134 aStrParam
.setTextInput();
136 while( eAkt
!= D_EOD
)
138 eAkt
= aDifParser
.GetNextDataset();
140 aPrgrsBar
.Progress();
141 ScAddress
aPos(nColCnt
, nRowCnt
, nBaseTab
);
146 if( nColCnt
< SCCOL_MAX
)
152 case D_NUMERIC
: // Number cell
153 if( nColCnt
== SCCOL_MAX
)
156 if( ValidCol(nColCnt
) && ValidRow(nRowCnt
) )
158 pDoc
->EnsureTable(nBaseTab
);
160 if( DifParser::IsV( aData
.getStr() ) )
162 pDoc
->SetValue(aPos
, aDifParser
.fVal
);
164 aAttrCache
.SetNumFormat( nColCnt
, nRowCnt
,
165 aDifParser
.nNumFormat
);
167 else if( aData
== pKeyTRUE
|| aData
== pKeyFALSE
)
169 pDoc
->SetValue(aPos
, aDifParser
.fVal
);
171 aAttrCache
.SetLogical( nColCnt
, nRowCnt
);
173 aAttrCache
.SetNumFormat( nColCnt
, nRowCnt
,
174 aDifParser
.nNumFormat
);
176 else if( aData
== pKeyNA
|| aData
== pKeyERROR
)
178 pDoc
->SetString(aPos
, aData
, &aStrParam
);
182 OUString aTmp
= "#IND:" + aData
+ "?";
183 pDoc
->SetString(aPos
, aTmp
, &aStrParam
);
187 bOverflowWarn
= true;
191 case D_STRING
: // Text cell
192 if( nColCnt
== SCCOL_MAX
)
195 if( ValidCol(nColCnt
) && ValidRow(nRowCnt
) )
197 if (!aData
.isEmpty())
199 pDoc
->EnsureTable(nBaseTab
);
200 pDoc
->SetTextCell(aPos
, aData
);
204 bOverflowWarn
= true;
213 OSL_FAIL( "ScImportDif - missing enum" );
217 aAttrCache
.Apply( *pDoc
, nBaseTab
);
224 // FIXME: Add proper Warnung!
225 return eERR_RNGOVRFLW
;
227 else if( bOverflowWarn
)
228 return eERR_RNGOVRFLW
;
233 DifParser::DifParser( SvStream
& rNewIn
, const sal_uInt32 nOption
, ScDocument
& rDoc
, rtl_TextEncoding e
)
241 if ( rIn
.GetStreamCharSet() != eCharSet
)
243 OSL_FAIL( "CharSet passed overrides and modifies StreamCharSet" );
244 rIn
.SetStreamCharSet( eCharSet
);
246 rIn
.StartReadingUnicodeText( eCharSet
);
248 bPlain
= ( nOption
== SC_DIFOPT_PLAIN
);
251 pNumFormatter
= NULL
;
253 pNumFormatter
= rDoc
.GetFormatTable();
256 TOPIC
DifParser::GetNextTopic()
258 enum STATE
{ S_VectorVal
, S_Data
, S_END
, S_START
, S_UNKNOWN
, S_ERROR_L2
};
260 static const sal_Unicode pKeyLABEL
[] = { 'L', 'A', 'B', 'E', 'L', 0 };
261 static const sal_Unicode pKeyCOMMENT
[] = { 'C', 'O', 'M', 'M', 'E', 'N', 'T', 0 };
262 static const sal_Unicode pKeySIZE
[] = { 'S', 'I', 'Z', 'E', 0 };
263 static const sal_Unicode pKeyPERIODICITY
[] = { 'P', 'E', 'R', 'I', 'O', 'D', 'I', 'C', 'I', 'T', 'Y', 0 };
264 static const sal_Unicode pKeyMAJORSTART
[] = { 'M', 'A', 'J', 'O', 'R', 'S', 'T', 'A', 'R', 'T', 0 };
265 static const sal_Unicode pKeyMINORSTART
[] = { 'M', 'I', 'N', 'O', 'R', 'S', 'T', 'A', 'R', 'T', 0 };
266 static const sal_Unicode pKeyTRUELENGTH
[] = { 'T', 'R', 'U', 'E', 'L', 'E', 'N', 'G', 'T', 'H', 0 };
267 static const sal_Unicode pKeyUINITS
[] = { 'U', 'I', 'N', 'I', 'T', 'S', 0 };
268 static const sal_Unicode pKeyDISPLAYUNITS
[] = { 'D', 'I', 'S', 'P', 'L', 'A', 'Y', 'U', 'N', 'I', 'T', 'S', 0 };
269 static const sal_Unicode pKeyUNKNOWN
[] = { 0 };
271 static const sal_Unicode
* ppKeys
[] =
283 pKeyTRUELENGTH
, // 10
289 static const TOPIC pTopics
[] =
312 TOPIC eRet
= T_UNKNOWN
;
316 if( !ReadNextLine( aLine
) )
326 const sal_Unicode
* pRef
;
330 pRef
= ppKeys
[ nCnt
];
336 eRet
= pTopics
[ nCnt
];
342 pRef
= ppKeys
[ nCnt
];
356 const sal_Unicode
* pCur
= aLine
.getStr();
358 pCur
= ScanIntVal( pCur
, nVector
);
360 if( pCur
&& *pCur
== ',' )
363 ScanIntVal( pCur
, nVal
);
371 OSL_ENSURE( aLine
.getLength() >= 2,
372 "+GetNextTopic(): <String> is too short!" );
373 if( aLine
.getLength() > 2 )
374 aData
= aLine
.copy( 1, aLine
.getLength() - 2 );
380 OSL_FAIL( "DifParser::GetNextTopic - unexpected state" );
384 ReadNextLine( aLine
);
386 case S_ERROR_L2
: // error happened in line 2
388 ReadNextLine( aLine
);
392 OSL_FAIL( "DifParser::GetNextTopic - missing enum" );
399 static void lcl_DeEscapeQuotesDif( OUString
& rString
)
401 // Special handling for DIF import: Escaped (duplicated) quotes are resolved.
402 // Single quote characters are left in place because older versions didn't
403 // escape quotes in strings (and Excel doesn't when using the clipboard).
404 // The quotes around the string are removed before this function is called.
406 rString
= rString
.replaceAll("\"\"", "\"");
409 // Determine if passed in string is numeric data and set fVal/nNumFormat if so
410 DATASET
DifParser::GetNumberDataset( const sal_Unicode
* pPossibleNumericData
)
412 DATASET eRet
= D_SYNT_ERROR
;
415 if( ScanFloatVal( pPossibleNumericData
) )
421 { // ...and for punishment, with number formatting...
422 OSL_ENSURE( pNumFormatter
, "-DifParser::GetNextDataset(): No Formatter, more fun!" );
423 OUString
aTestVal( pPossibleNumericData
);
424 sal_uInt32 nFormat
= 0;
426 if( pNumFormatter
->IsNumberFormat( aTestVal
, nFormat
, fTmpVal
) )
429 nNumFormat
= nFormat
;
438 bool DifParser::ReadNextLine( OUString
& rStr
)
440 if( aLookAheadLine
.isEmpty() )
442 return rIn
.ReadUniOrByteStringLine( rStr
, rIn
.GetStreamCharSet() );
446 rStr
= aLookAheadLine
;
447 aLookAheadLine
.clear();
452 // Look ahead in the stream to determine if the next line is the first line of
453 // a valid data record structure
454 bool DifParser::LookAhead()
456 const sal_Unicode
* pAktBuffer
;
457 bool bValidStructure
= false;
459 OSL_ENSURE( aLookAheadLine
.isEmpty(), "*DifParser::LookAhead(): LookAhead called twice in a row" );
460 rIn
.ReadUniOrByteStringLine( aLookAheadLine
, rIn
.GetStreamCharSet() );
462 pAktBuffer
= aLookAheadLine
.getStr();
464 switch( *pAktBuffer
)
466 case '-': // Special Datatype
469 if( Is1_0( pAktBuffer
) )
471 bValidStructure
= true;
474 case '0': // Numeric Data
476 if( *pAktBuffer
== ',' )
479 bValidStructure
= ( GetNumberDataset(pAktBuffer
) != D_SYNT_ERROR
);
482 case '1': // String Data
483 if( Is1_0( aLookAheadLine
.getStr() ) )
485 bValidStructure
= true;
489 return bValidStructure
;
492 DATASET
DifParser::GetNextDataset()
494 DATASET eRet
= D_UNKNOWN
;
496 const sal_Unicode
* pAktBuffer
;
498 ReadNextLine( aLine
);
500 pAktBuffer
= aLine
.getStr();
502 switch( *pAktBuffer
)
504 case '-': // Special Datatype
507 if( Is1_0( pAktBuffer
) )
509 ReadNextLine( aLine
);
510 if( IsBOT( aLine
.getStr() ) )
512 else if( IsEOD( aLine
.getStr() ) )
516 case '0': // Numeric Data
517 pAktBuffer
++; // value in fVal, 2. line in aData
518 if( *pAktBuffer
== ',' )
521 eRet
= GetNumberDataset(pAktBuffer
);
523 ReadNextLine( aTmpLine
);
524 if ( eRet
== D_SYNT_ERROR
)
525 { // for broken records write "#ERR: data" to cell
526 OUStringBuffer
aTmp("#ERR: ");
527 aTmp
.append(pAktBuffer
).append(" (");
528 aTmp
.append(aTmpLine
).append(')');
529 aData
= aTmp
.makeStringAndClear();
538 case '1': // String Data
539 if( Is1_0( aLine
.getStr() ) )
541 ReadNextLine( aLine
);
542 sal_Int32 nLineLength
= aLine
.getLength();
543 const sal_Unicode
* pLine
= aLine
.getStr();
545 if( nLineLength
>= 1 && *pLine
== '"' )
547 // Quotes are not always escaped (duplicated), see lcl_DeEscapeQuotesDif
548 // A look ahead into the next line is needed in order to deal with
549 // multiline strings containing quotes
552 // Single line string
553 if( nLineLength
>= 2 && pLine
[nLineLength
- 1] == '"' )
555 aData
= aLine
.copy( 1, nLineLength
- 2 );
556 lcl_DeEscapeQuotesDif( aData
);
563 aData
= aLine
.copy( 1 );
564 bool bContinue
= true;
567 aData
= aData
+ "\n";
568 bContinue
= !rIn
.IsEof() && ReadNextLine( aLine
);
571 nLineLength
= aLine
.getLength();
572 if( nLineLength
>= 1 )
574 pLine
= aLine
.getStr();
575 bContinue
= !LookAhead();
578 aData
= aData
+ aLine
;
580 else if( pLine
[nLineLength
- 1] == '"' )
582 aData
= aData
+ aLine
.copy(0, nLineLength
-1 );
583 lcl_DeEscapeQuotesDif( aData
);
595 if( eRet
== D_UNKNOWN
)
596 ReadNextLine( aLine
);
604 const sal_Unicode
* DifParser::ScanIntVal( const sal_Unicode
* pStart
, sal_uInt32
& rRet
)
606 // eat leading whitespace, not specified, but seen in the wild
607 while (*pStart
== ' ' || *pStart
== '\t')
610 sal_Unicode cAkt
= *pStart
;
612 if( IsNumber( cAkt
) )
613 rRet
= ( sal_uInt32
) ( cAkt
- '0' );
620 while( IsNumber( cAkt
) && rRet
< ( 0xFFFFFFFF / 10 ) )
623 rRet
+= ( sal_uInt32
) ( cAkt
- '0' );
632 bool DifParser::ScanFloatVal( const sal_Unicode
* pStart
)
635 double fFracPos
= 1.0;
637 bool bExpNeg
= false;
638 bool bExpOverflow
= false;
639 static const sal_uInt16 nExpLimit
= 4096; // FIXME: has to be set more accurately!
644 enum STATE
{ S_FIRST
, S_PRE
, S_POST
, S_EXP_FIRST
, S_EXP
, S_END
, S_FINDEND
};
648 double fNewVal
= 0.0;
656 if( IsNumber( cAkt
) )
659 fNewVal
+= cAkt
- '0';
684 if( IsNumber( cAkt
) )
687 fNewVal
+= cAkt
- '0';
702 case 0x00: // IsNumberEnding( cAkt )
710 if( IsNumber( cAkt
) )
712 fNewVal
+= fFracPos
* ( cAkt
- '0' );
723 case 0x00: // IsNumberEnding( cAkt )
731 if( IsNumber( cAkt
) )
733 if( nExp
< nExpLimit
)
736 nExp
+= ( sal_uInt16
) ( cAkt
- '0' );
755 if( IsNumber( cAkt
) )
757 if( nExp
< ( 0xFFFF / 10 ) )
760 nExp
+= ( sal_uInt16
) ( cAkt
- '0' );
770 bRet
= IsNumberEnding( cAkt
);
775 if( IsNumberEnding( cAkt
) )
777 bRet
= true; // to continue parsing
782 OSL_FAIL( "DifParser::ScanFloatVal - unexpected state" );
785 OSL_FAIL( "DifParser::ScanFloatVal - missing enum" );
793 return false; // FIXME: add special cases here
802 fNewVal
*= pow( 10.0, ( double ) nExp
);
809 DifColumn::DifColumn ()
814 void DifColumn::SetLogical( SCROW nRow
)
816 OSL_ENSURE( ValidRow(nRow
), "*DifColumn::SetLogical(): Row too big!" );
820 OSL_ENSURE( nRow
> 0, "*DifColumn::SetLogical(): more cannot be zero!" );
824 if( pAkt
->nEnd
== nRow
)
832 pAkt
->nStart
= pAkt
->nEnd
= nRow
;
834 aEntries
.push_back(pAkt
);
838 void DifColumn::SetNumFormat( SCROW nRow
, const sal_uInt32 nNumFormat
)
840 OSL_ENSURE( ValidRow(nRow
), "*DifColumn::SetNumFormat(): Row too big!" );
846 OSL_ENSURE( nRow
> 0,
847 "*DifColumn::SetNumFormat(): more cannot be zero!" );
848 OSL_ENSURE( nRow
> pAkt
->nEnd
,
849 "*DifColumn::SetNumFormat(): start from scratch?" );
851 if( pAkt
->nNumFormat
== nNumFormat
&& pAkt
->nEnd
== nRow
- 1 )
854 NewEntry( nRow
, nNumFormat
);
857 NewEntry(nRow
,nNumFormat
);
863 void DifColumn::NewEntry( const SCROW nPos
, const sal_uInt32 nNumFormat
)
866 pAkt
->nStart
= pAkt
->nEnd
= nPos
;
867 pAkt
->nNumFormat
= nNumFormat
;
869 aEntries
.push_back(pAkt
);
872 void DifColumn::Apply( ScDocument
& rDoc
, const SCCOL nCol
, const SCTAB nTab
, const ScPatternAttr
& rPattAttr
)
874 for (boost::ptr_vector
<ENTRY
>::const_iterator it
= aEntries
.begin(); it
!= aEntries
.end(); ++it
)
875 rDoc
.ApplyPatternAreaTab( nCol
, it
->nStart
, nCol
, it
->nEnd
, nTab
, rPattAttr
);
878 void DifColumn::Apply( ScDocument
& rDoc
, const SCCOL nCol
, const SCTAB nTab
)
880 ScPatternAttr
aAttr( rDoc
.GetPool() );
881 SfxItemSet
&rItemSet
= aAttr
.GetItemSet();
883 for (boost::ptr_vector
<ENTRY
>::const_iterator it
= aEntries
.begin(); it
!= aEntries
.end(); ++it
)
885 OSL_ENSURE( it
->nNumFormat
> 0,
886 "+DifColumn::Apply(): Number format must not be 0!" );
888 rItemSet
.Put( SfxUInt32Item( ATTR_VALUE_FORMAT
, it
->nNumFormat
) );
890 rDoc
.ApplyPatternAreaTab( nCol
, it
->nStart
, nCol
, it
->nEnd
, nTab
, aAttr
);
892 rItemSet
.ClearItem();
896 DifAttrCache::DifAttrCache( const bool bNewPlain
)
899 ppCols
= new DifColumn
*[ MAXCOL
+ 1 ];
900 for( SCCOL nCnt
= 0 ; nCnt
<= MAXCOL
; nCnt
++ )
901 ppCols
[ nCnt
] = NULL
;
904 DifAttrCache::~DifAttrCache()
906 for( SCCOL nCnt
= 0 ; nCnt
<= MAXCOL
; nCnt
++ )
909 delete ppCols
[ nCnt
];
915 void DifAttrCache::SetLogical( const SCCOL nCol
, const SCROW nRow
)
917 OSL_ENSURE( ValidCol(nCol
), "-DifAttrCache::SetLogical(): Col too big!" );
918 OSL_ENSURE( bPlain
, "*DifAttrCache::SetLogical(): has to be Plain!" );
920 if( !ppCols
[ nCol
] )
921 ppCols
[ nCol
] = new DifColumn
;
923 ppCols
[ nCol
]->SetLogical( nRow
);
926 void DifAttrCache::SetNumFormat( const SCCOL nCol
, const SCROW nRow
, const sal_uInt32 nNumFormat
)
928 OSL_ENSURE( ValidCol(nCol
), "-DifAttrCache::SetNumFormat(): Col too big!" );
929 OSL_ENSURE( !bPlain
, "*DifAttrCache::SetNumFormat(): should not be Plain!" );
931 if( !ppCols
[ nCol
] )
932 ppCols
[ nCol
] = new DifColumn
;
934 ppCols
[ nCol
]->SetNumFormat( nRow
, nNumFormat
);
937 void DifAttrCache::Apply( ScDocument
& rDoc
, SCTAB nTab
)
941 boost::scoped_ptr
<ScPatternAttr
> pPatt
;
943 for( SCCOL nCol
= 0 ; nCol
<= MAXCOL
; nCol
++ )
949 pPatt
.reset(new ScPatternAttr( rDoc
.GetPool() ));
950 pPatt
->GetItemSet().Put( SfxUInt32Item( ATTR_VALUE_FORMAT
,
951 rDoc
.GetFormatTable()->GetStandardFormat( css::util::NumberFormat::LOGICAL
) ) );
954 ppCols
[ nCol
]->Apply( rDoc
, nCol
, nTab
, *pPatt
);
960 for( SCCOL nCol
= 0 ; nCol
<= MAXCOL
; nCol
++ )
963 ppCols
[ nCol
]->Apply( rDoc
, nCol
, nTab
);
968 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */