1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: difimp.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_sc.hxx"
34 //------------------------------------------------------------------------
36 #include "scitems.hxx"
37 #include <svtools/zforlist.hxx>
41 #include "fprogressbar.hxx"
42 #include "scerrors.hxx"
43 #include "document.hxx"
45 #include "patattr.hxx"
46 #include "docpool.hxx"
52 const sal_Unicode pKeyTABLE
[] = { 'T', 'A', 'B', 'L', 'E', 0 };
53 const sal_Unicode pKeyVECTORS
[] = { 'V', 'E', 'C', 'T', 'O', 'R', 'S', 0 };
54 const sal_Unicode pKeyTUPLES
[] = { 'T', 'U', 'P', 'L', 'E', 'S', 0 };
55 const sal_Unicode pKeyDATA
[] = { 'D', 'A', 'T', 'A', 0 };
56 const sal_Unicode pKeyBOT
[] = { 'B', 'O', 'T', 0 };
57 const sal_Unicode pKeyEOD
[] = { 'E', 'O', 'D', 0 };
58 const sal_Unicode pKeyERROR
[] = { 'E', 'R', 'R', 'O', 'R', 0 };
59 const sal_Unicode pKeyTRUE
[] = { 'T', 'R', 'U', 'E', 0 };
60 const sal_Unicode pKeyFALSE
[] = { 'F', 'A', 'L', 'S', 'E', 0 };
61 const sal_Unicode pKeyNA
[] = { 'N', 'A', 0 };
62 const sal_Unicode pKeyV
[] = { 'V', 0 };
63 const sal_Unicode pKey1_0
[] = { '1', ',', '0', 0 };
66 FltError
ScFormatFilterPluginImpl::ScImportDif( SvStream
& rIn
, ScDocument
* pDoc
, const ScAddress
& rInsPos
,
67 const CharSet eVon
, UINT32 nDifOption
)
69 DifParser
aDifParser( rIn
, nDifOption
, *pDoc
, eVon
);
71 const BOOL bPlain
= aDifParser
.IsPlain();
73 SCTAB nBaseTab
= rInsPos
.Tab();
75 TOPIC eTopic
= T_UNKNOWN
;
76 BOOL bSyntErrWarn
= FALSE
;
77 BOOL bOverflowWarn
= FALSE
;
79 String
& rData
= aDifParser
.aData
;
87 ScfStreamProgressBar
aPrgrsBar( rIn
, pDoc
->GetDocumentShell() );
89 while( eTopic
!= T_DATA
&& eTopic
!= T_END
)
91 eTopic
= aDifParser
.GetNextTopic();
95 bData
= rData
.Len() > 0;
101 if( aDifParser
.nVector
!= 0 || aDifParser
.nVal
!= 1 )
104 pDoc
->RenameTab( nBaseTab
, rData
);
109 if( aDifParser
.nVector
!= 0 )
111 if( aDifParser
.nVal
> MAXCOL
+ 1 )
112 nNumCols
= SCCOL_MAX
;
114 nNumCols
= static_cast<SCCOL
>(aDifParser
.nVal
);
119 if( aDifParser
.nVector
!= 0 )
121 if( aDifParser
.nVal
> MAXROW
+ 1 )
122 nNumRows
= SCROW_MAX
;
124 nNumRows
= static_cast<SCROW
>(aDifParser
.nVal
);
129 if( aDifParser
.nVector
!= 0 || aDifParser
.nVal
!= 0 )
146 DBG_ERRORFILE( "ScImportDif - missing enum" );
152 if( eTopic
== T_DATA
)
153 { // Ab hier kommen die Daten
154 SCCOL nBaseCol
= rInsPos
.Col();
156 SCCOL nColCnt
= SCCOL_MAX
;
157 SCROW nRowCnt
= rInsPos
.Row();
158 DifAttrCache
aAttrCache( bPlain
);
160 DATASET eAkt
= D_UNKNOWN
;
162 while( eAkt
!= D_EOD
)
164 eAkt
= aDifParser
.GetNextDataset();
166 aPrgrsBar
.Progress();
171 if( nColCnt
< SCCOL_MAX
)
177 case D_NUMERIC
: // Numbercell
178 if( nColCnt
== SCCOL_MAX
)
181 if( ValidCol(nColCnt
) && ValidRow(nRowCnt
) )
184 if( DifParser::IsV( rData
.GetBuffer() ) )
186 pCell
= new ScValueCell( aDifParser
.fVal
);
188 aAttrCache
.SetNumFormat( nColCnt
, nRowCnt
,
189 aDifParser
.nNumFormat
);
191 else if( rData
== pKeyTRUE
|| rData
== pKeyFALSE
)
193 pCell
= new ScValueCell( aDifParser
.fVal
);
195 aAttrCache
.SetLogical( nColCnt
, nRowCnt
);
197 aAttrCache
.SetNumFormat( nColCnt
, nRowCnt
,
198 aDifParser
.nNumFormat
);
200 else if( rData
== pKeyNA
|| rData
== pKeyERROR
)
201 pCell
= new ScStringCell( rData
);
204 String
aTmp( RTL_CONSTASCII_USTRINGPARAM( "#IND: " ));
206 aTmp
+= sal_Unicode('?');
207 pCell
= new ScStringCell( aTmp
);
210 pDoc
->PutCell( nColCnt
, nRowCnt
, nBaseTab
, pCell
, ( BOOL
) TRUE
);
213 bOverflowWarn
= TRUE
;
217 case D_STRING
: // Textcell
218 if( nColCnt
== SCCOL_MAX
)
221 if( ValidCol(nColCnt
) && ValidRow(nRowCnt
) )
223 if( rData
.Len() > 0 )
225 pDoc
->PutCell( nColCnt
, nRowCnt
, nBaseTab
,
226 ScBaseCell::CreateTextCell( rData
, pDoc
), ( BOOL
) TRUE
);
230 bOverflowWarn
= TRUE
;
239 DBG_ERROR( "ScImportDif - missing enum" );
243 aAttrCache
.Apply( *pDoc
, nBaseTab
);
249 //###############################################
250 // ACHTUNG: Hier fehlt noch die richtige Warnung!
251 return eERR_RNGOVRFLW
;
252 //###############################################
253 else if( bOverflowWarn
)
254 return eERR_RNGOVRFLW
;
260 DifParser::DifParser( SvStream
& rNewIn
, const UINT32 nOption
, ScDocument
& rDoc
, CharSet e
) :
264 if ( rIn
.GetStreamCharSet() != eCharSet
)
266 DBG_ERRORFILE( "CharSet passed overrides and modifies StreamCharSet" );
267 rIn
.SetStreamCharSet( eCharSet
);
269 if ( eCharSet
== RTL_TEXTENCODING_UNICODE
)
270 rIn
.StartReadingUnicodeText();
272 bPlain
= ( nOption
== SC_DIFOPT_PLAIN
);
275 pNumFormatter
= NULL
;
277 pNumFormatter
= rDoc
.GetFormatTable();
281 TOPIC
DifParser::GetNextTopic( void )
283 enum STATE
{ S_VectorVal
, S_Data
, S_END
, S_START
, S_UNKNOWN
, S_ERROR_L2
};
285 static const sal_Unicode pKeyLABEL
[] = { 'L', 'A', 'B', 'E', 'L', 0 };
286 static const sal_Unicode pKeyCOMMENT
[] = { 'C', 'O', 'M', 'M', 'E', 'N', 'T', 0 };
287 static const sal_Unicode pKeySIZE
[] = { 'S', 'I', 'Z', 'E', 0 };
288 static const sal_Unicode pKeyPERIODICITY
[] = { 'P', 'E', 'R', 'I', 'O', 'D', 'I', 'C', 'I', 'T', 'Y', 0 };
289 static const sal_Unicode pKeyMAJORSTART
[] = { 'M', 'A', 'J', 'O', 'R', 'S', 'T', 'A', 'R', 'T', 0 };
290 static const sal_Unicode pKeyMINORSTART
[] = { 'M', 'I', 'N', 'O', 'R', 'S', 'T', 'A', 'R', 'T', 0 };
291 static const sal_Unicode pKeyTRUELENGTH
[] = { 'T', 'R', 'U', 'E', 'L', 'E', 'N', 'G', 'T', 'H', 0 };
292 static const sal_Unicode pKeyUINITS
[] = { 'U', 'I', 'N', 'I', 'T', 'S', 0 };
293 static const sal_Unicode pKeyDISPLAYUNITS
[] = { 'D', 'I', 'S', 'P', 'L', 'A', 'Y', 'U', 'N', 'I', 'T', 'S', 0 };
294 static const sal_Unicode pKeyUNKNOWN
[] = { 0 };
296 static const sal_Unicode
* ppKeys
[] =
308 pKeyTRUELENGTH
, // 10
314 static const TOPIC pTopics
[] =
337 TOPIC eRet
= T_UNKNOWN
;
341 if( !ReadNextLine( aLine
) )
351 const sal_Unicode
* pRef
;
355 pRef
= ppKeys
[ nCnt
];
361 eRet
= pTopics
[ nCnt
];
367 pRef
= ppKeys
[ nCnt
];
381 const sal_Unicode
* pCur
= aLine
.GetBuffer();
383 pCur
= ScanIntVal( pCur
, nVector
);
385 if( pCur
&& *pCur
== ',' )
388 ScanIntVal( pCur
, nVal
);
396 DBG_ASSERT( aLine
.Len() >= 2,
397 "+GetNextTopic(): <String> ist zu kurz!" );
398 if( aLine
.Len() > 2 )
399 aData
= aLine
.Copy( 1, aLine
.Len() - 2 );
405 DBG_ERRORFILE( "DifParser::GetNextTopic - unexpected state" );
408 // 2 Zeilen ueberlesen
409 ReadNextLine( aLine
);
410 case S_ERROR_L2
: // Fehler in Line 2 aufgetreten
411 // eine Zeile ueberlesen
412 ReadNextLine( aLine
);
416 DBG_ERRORFILE( "DifParser::GetNextTopic - missing enum" );
424 static void lcl_DeEscapeQuotesDif( String
& rString
)
426 // Special handling for DIF import: Escaped (duplicated) quotes are resolved.
427 // Single quote characters are left in place because older versions didn't
428 // escape quotes in strings (and Excel doesn't when using the clipboard).
429 // The quotes around the string are removed before this function is called.
431 static const sal_Unicode aDQ
[] = { '"', '"', 0 };
433 while ( (nPos
= rString
.Search( aDQ
, nPos
)) != STRING_NOTFOUND
)
435 rString
.Erase( nPos
, 1 );
440 // Determine if passed in string is numeric data and set fVal/nNumFormat if so
441 DATASET
DifParser::GetNumberDataset( const sal_Unicode
* pPossibleNumericData
)
443 DATASET eRet
= D_SYNT_ERROR
;
446 if( ScanFloatVal( pPossibleNumericData
) )
452 { // ...und zur Strafe mit'm Numberformatter...
453 DBG_ASSERT( pNumFormatter
, "-DifParser::GetNextDataset(): No Formatter, more fun!" );
454 String
aTestVal( pPossibleNumericData
);
455 sal_uInt32 nFormat
= 0;
457 if( pNumFormatter
->IsNumberFormat( aTestVal
, nFormat
, fTmpVal
) )
460 nNumFormat
= nFormat
;
469 bool DifParser::ReadNextLine( String
& rStr
)
471 if( aLookAheadLine
.Len() == 0 )
473 return rIn
.ReadUniOrByteStringLine( rStr
);
477 rStr
= aLookAheadLine
;
478 aLookAheadLine
.Erase();
483 // Look ahead in the stream to determine if the next line is the first line of
484 // a valid data record structure
485 bool DifParser::LookAhead()
487 const sal_Unicode
* pAktBuffer
;
488 bool bValidStructure
= false;
490 DBG_ASSERT( aLookAheadLine
.Len() == 0, "*DifParser::LookAhead(): LookAhead called twice in a row" );
491 rIn
.ReadUniOrByteStringLine( aLookAheadLine
);
493 pAktBuffer
= aLookAheadLine
.GetBuffer();
495 switch( *pAktBuffer
)
497 case '-': // Special Datatype
500 if( Is1_0( pAktBuffer
) )
502 bValidStructure
= true;
505 case '0': // Numeric Data
507 if( *pAktBuffer
== ',' )
510 bValidStructure
= ( GetNumberDataset(pAktBuffer
) != D_SYNT_ERROR
);
513 case '1': // String Data
514 if( Is1_0( aLookAheadLine
.GetBuffer() ) )
516 bValidStructure
= true;
520 return bValidStructure
;
523 DATASET
DifParser::GetNextDataset( void )
525 DATASET eRet
= D_UNKNOWN
;
527 const sal_Unicode
* pAktBuffer
;
529 ReadNextLine( aLine
);
531 pAktBuffer
= aLine
.GetBuffer();
533 switch( *pAktBuffer
)
535 case '-': // Special Datatype
538 if( Is1_0( pAktBuffer
) )
540 ReadNextLine( aLine
);
541 if( IsBOT( aLine
.GetBuffer() ) )
543 else if( IsEOD( aLine
.GetBuffer() ) )
547 case '0': // Numeric Data
548 pAktBuffer
++; // Wert in fVal, 2. Zeile in aData
549 if( *pAktBuffer
== ',' )
552 eRet
= GetNumberDataset(pAktBuffer
);
553 ReadNextLine( aData
);
554 if ( eRet
== D_SYNT_ERROR
)
555 { // for broken records write "#ERR: data" to cell
556 String
aTmp( RTL_CONSTASCII_USTRINGPARAM( "#ERR: " ));
558 aTmp
.AppendAscii( " (" );
560 aTmp
+= sal_Unicode(')');
566 case '1': // String Data
567 if( Is1_0( aLine
.GetBuffer() ) )
569 ReadNextLine( aLine
);
570 xub_StrLen nLineLength
= aLine
.Len();
571 const sal_Unicode
* pLine
= aLine
.GetBuffer();
573 if( nLineLength
>= 1 && *pLine
== '"' )
575 // Quotes are not always escaped (duplicated), see lcl_DeEscapeQuotesDif
576 // A look ahead into the next line is needed in order to deal with
577 // multiline strings containing quotes
580 // Single line string
581 if( nLineLength
>= 2 && pLine
[nLineLength
- 1] == '"' )
583 aData
= aLine
.Copy( 1, nLineLength
- 2 );
584 lcl_DeEscapeQuotesDif( aData
);
591 aData
= aLine
.Copy( 1 );
592 bool bContinue
= true;
595 aData
.Append( '\n' );
596 bContinue
= !rIn
.IsEof() && ReadNextLine( aLine
);
599 nLineLength
= aLine
.Len();
600 if( nLineLength
>= 1 )
602 pLine
= aLine
.GetBuffer();
603 bContinue
= !LookAhead();
606 aData
.Append( aLine
);
608 else if( pLine
[nLineLength
- 1] == '"' )
610 aData
.Append( pLine
, nLineLength
- 1 );
611 lcl_DeEscapeQuotesDif( aData
);
623 if( eRet
== D_UNKNOWN
)
624 ReadNextLine( aLine
);
633 const sal_Unicode
* DifParser::ScanIntVal( const sal_Unicode
* pStart
, UINT32
& rRet
)
635 // eat leading whitespace, not specified, but seen in the wild
636 while (*pStart
== ' ' || *pStart
== '\t')
639 sal_Unicode cAkt
= *pStart
;
641 if( IsNumber( cAkt
) )
642 rRet
= ( UINT32
) ( cAkt
- '0' );
649 while( IsNumber( cAkt
) && rRet
< ( 0xFFFFFFFF / 10 ) )
652 rRet
+= ( UINT32
) ( cAkt
- '0' );
662 BOOL
DifParser::ScanFloatVal( const sal_Unicode
* pStart
)
664 double fNewVal
= 0.0;
666 double fFracPos
= 1.0;
668 BOOL bExpNeg
= FALSE
;
669 BOOL bExpOverflow
= FALSE
;
670 static const UINT16 nExpLimit
= 4096; // ACHTUNG: muss genauer ermittelt werden!
675 enum STATE
{ S_FIRST
, S_PRE
, S_POST
, S_EXP_FIRST
, S_EXP
, S_END
, S_FINDEND
};
687 if( IsNumber( cAkt
) )
690 fNewVal
+= cAkt
- '0';
715 if( IsNumber( cAkt
) )
718 fNewVal
+= cAkt
- '0';
733 case 0x00: // IsNumberEnding( cAkt )
741 if( IsNumber( cAkt
) )
743 fNewVal
+= fFracPos
* ( cAkt
- '0' );
754 case 0x00: // IsNumberEnding( cAkt )
762 if( IsNumber( cAkt
) )
764 if( nExp
< nExpLimit
)
767 nExp
+= ( UINT16
) ( cAkt
- '0' );
786 if( IsNumber( cAkt
) )
788 if( nExp
< ( 0xFFFF / 10 ) )
791 nExp
+= ( UINT16
) ( cAkt
- '0' );
801 bRet
= IsNumberEnding( cAkt
);
806 if( IsNumberEnding( cAkt
) )
808 bRet
= TRUE
; // damit sinnvoll weitergeparst werden kann
813 DBG_ERRORFILE( "DifParser::ScanFloatVal - unexpected state" );
816 DBG_ERRORFILE( "DifParser::ScanFloatVal - missing enum" );
824 return sal_False
; // ACHTUNG: hier muss noch differenziert werden
833 fNewVal
*= pow( 10.0, ( double ) nExp
);
841 DifColumn::~DifColumn( void )
843 ENTRY
* pEntry
= ( ENTRY
* ) List::First();
848 pEntry
= ( ENTRY
* ) List::Next();
853 void DifColumn::SetLogical( SCROW nRow
)
855 DBG_ASSERT( ValidRow(nRow
), "*DifColumn::SetLogical(): Row zu gross!" );
859 DBG_ASSERT( nRow
> 0, "*DifColumn::SetLogical(): weitere koennen nicht 0 sein!" );
861 if( pAkt
->nEnd
== nRow
)
869 pAkt
->nStart
= pAkt
->nEnd
= nRow
;
870 List::Insert( pAkt
, LIST_APPEND
);
875 void DifColumn::SetNumFormat( SCROW nRow
, const UINT32 nNumFormat
)
877 DBG_ASSERT( ValidRow(nRow
), "*DifColumn::SetNumFormat(): Row zu gross!" );
883 DBG_ASSERT( nRow
> 0,
884 "*DifColumn::SetNumFormat(): weitere koennen nicht 0 sein!" );
885 DBG_ASSERT( nRow
> pAkt
->nEnd
,
886 "*DifColumn::SetNumFormat(): Noch 'mal von vorne?" );
888 if( pAkt
->nNumFormat
== nNumFormat
&& pAkt
->nEnd
== nRow
- 1 )
891 NewEntry( nRow
, nNumFormat
);
894 NewEntry( nRow
, nNumFormat
);
901 void DifColumn::NewEntry( const SCROW nPos
, const UINT32 nNumFormat
)
904 pAkt
->nStart
= pAkt
->nEnd
= nPos
;
905 pAkt
->nNumFormat
= nNumFormat
;
906 List::Insert( pAkt
, LIST_APPEND
);
910 void DifColumn::Apply( ScDocument
& rDoc
, const SCCOL nCol
, const SCTAB nTab
, const ScPatternAttr
& rPattAttr
)
912 ENTRY
* pEntry
= ( ENTRY
* ) List::First();
916 rDoc
.ApplyPatternAreaTab( nCol
, pEntry
->nStart
, nCol
, pEntry
->nEnd
,
918 pEntry
= ( ENTRY
* ) List::Next();
923 void DifColumn::Apply( ScDocument
& rDoc
, const SCCOL nCol
, const SCTAB nTab
)
925 ScPatternAttr
aAttr( rDoc
.GetPool() );
926 SfxItemSet
& rItemSet
= aAttr
.GetItemSet();
928 ENTRY
* pEntry
= ( ENTRY
* ) List::First();
932 DBG_ASSERT( pEntry
->nNumFormat
> 0,
933 "+DifColumn::Apply(): Numberformat darf hier nicht 0 sein!" );
934 rItemSet
.Put( SfxUInt32Item( ATTR_VALUE_FORMAT
, pEntry
->nNumFormat
) );
936 rDoc
.ApplyPatternAreaTab( nCol
, pEntry
->nStart
, nCol
, pEntry
->nEnd
, nTab
, aAttr
);
938 rItemSet
.ClearItem();
940 pEntry
= ( ENTRY
* ) List::Next();
945 DifAttrCache::DifAttrCache( const BOOL bNewPlain
)
948 ppCols
= new DifColumn
*[ MAXCOL
+ 1 ];
949 for( SCCOL nCnt
= 0 ; nCnt
<= MAXCOL
; nCnt
++ )
950 ppCols
[ nCnt
] = NULL
;
954 DifAttrCache::~DifAttrCache()
956 for( SCCOL nCnt
= 0 ; nCnt
<= MAXCOL
; nCnt
++ )
959 delete ppCols
[ nCnt
];
964 void DifAttrCache::SetNumFormat( const SCCOL nCol
, const SCROW nRow
, const UINT32 nNumFormat
)
966 DBG_ASSERT( ValidCol(nCol
), "-DifAttrCache::SetNumFormat(): Col zu gross!" );
967 DBG_ASSERT( !bPlain
, "*DifAttrCache::SetNumFormat(): sollte nicht Plain sein!" );
969 if( !ppCols
[ nCol
] )
970 ppCols
[ nCol
] = new DifColumn
;
972 ppCols
[ nCol
]->SetNumFormat( nRow
, nNumFormat
);
976 void DifAttrCache::Apply( ScDocument
& rDoc
, SCTAB nTab
)
980 ScPatternAttr
* pPatt
= NULL
;
982 for( SCCOL nCol
= 0 ; nCol
<= MAXCOL
; nCol
++ )
988 pPatt
= new ScPatternAttr( rDoc
.GetPool() );
989 pPatt
->GetItemSet().Put( SfxUInt32Item( ATTR_VALUE_FORMAT
,
990 rDoc
.GetFormatTable()->GetStandardFormat( NUMBERFORMAT_LOGICAL
) ) );
993 ppCols
[ nCol
]->Apply( rDoc
, nCol
, nTab
, *pPatt
);
1002 for( SCCOL nCol
= 0 ; nCol
<= MAXCOL
; nCol
++ )
1004 if( ppCols
[ nCol
] )
1005 ppCols
[ nCol
]->Apply( rDoc
, nCol
, nTab
);