Version 6.4.0.3, tag libreoffice-6.4.0.3
[LibreOffice.git] / sc / source / filter / dif / difimp.cxx
blob1d92ed5682044fc080f7a8e6a21de458b072a969
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <svl/zforlist.hxx>
21 #include <tools/stream.hxx>
22 #include <osl/diagnose.h>
23 #include <dif.hxx>
24 #include <docpool.hxx>
25 #include <document.hxx>
26 #include <fprogressbar.hxx>
27 #include <ftools.hxx>
28 #include <patattr.hxx>
29 #include <scerrors.hxx>
30 #include <scitems.hxx>
31 #include <stringutil.hxx>
32 #include <table.hxx>
33 #include <memory>
35 const sal_Unicode pKeyTABLE[] = { 'T', 'A', 'B', 'L', 'E', 0 };
36 const sal_Unicode pKeyVECTORS[] = { 'V', 'E', 'C', 'T', 'O', 'R', 'S', 0 };
37 const sal_Unicode pKeyTUPLES[] = { 'T', 'U', 'P', 'L', 'E', 'S', 0 };
38 const sal_Unicode pKeyDATA[] = { 'D', 'A', 'T', 'A', 0 };
39 const sal_Unicode pKeyBOT[] = { 'B', 'O', 'T', 0 };
40 const sal_Unicode pKeyEOD[] = { 'E', 'O', 'D', 0 };
41 const sal_Unicode pKeyERROR[] = { 'E', 'R', 'R', 'O', 'R', 0 };
42 const sal_Unicode pKeyTRUE[] = { 'T', 'R', 'U', 'E', 0 };
43 const sal_Unicode pKeyFALSE[] = { 'F', 'A', 'L', 'S', 'E', 0 };
44 const sal_Unicode pKeyNA[] = { 'N', 'A', 0 };
45 const sal_Unicode pKeyV[] = { 'V', 0 };
46 const sal_Unicode pKey1_0[] = { '1', ',', '0', 0 };
48 ErrCode ScFormatFilterPluginImpl::ScImportDif(SvStream& rIn, ScDocument* pDoc, const ScAddress& rInsPos,
49 const rtl_TextEncoding eVon )
51 DifParser aDifParser( rIn, *pDoc, eVon );
53 SCTAB nBaseTab = rInsPos.Tab();
55 TOPIC eTopic = T_UNKNOWN;
56 bool bSyntErrWarn = false;
57 bool bOverflowWarn = false;
59 OUStringBuffer& rData = aDifParser.m_aData;
61 rIn.Seek( 0 );
63 ScfStreamProgressBar aPrgrsBar( rIn, pDoc->GetDocumentShell() );
65 while( eTopic != T_DATA && eTopic != T_END )
67 eTopic = aDifParser.GetNextTopic();
69 aPrgrsBar.Progress();
71 const bool bData = !rData.isEmpty();
73 switch( eTopic )
75 case T_TABLE:
77 if( aDifParser.nVector != 0 || aDifParser.nVal != 1 )
78 bSyntErrWarn = true;
79 if( bData )
80 pDoc->RenameTab(nBaseTab, rData.toString());
82 break;
83 case T_VECTORS:
85 if( aDifParser.nVector != 0 )
86 bSyntErrWarn = true;
88 break;
89 case T_TUPLES:
91 if( aDifParser.nVector != 0 )
92 bSyntErrWarn = true;
94 break;
95 case T_DATA:
97 if( aDifParser.nVector != 0 || aDifParser.nVal != 0 )
98 bSyntErrWarn = true;
100 break;
101 case T_LABEL:
102 case T_COMMENT:
103 case T_SIZE:
104 case T_PERIODICITY:
105 case T_MAJORSTART:
106 case T_MINORSTART:
107 case T_TRUELENGTH:
108 case T_UINITS:
109 case T_DISPLAYUNITS:
110 case T_END:
111 case T_UNKNOWN:
112 break;
113 default:
114 OSL_FAIL( "ScImportDif - missing enum" );
119 if( eTopic == T_DATA )
120 { // data starts here
121 SCCOL nBaseCol = rInsPos.Col();
123 SCCOL nColCnt = SCCOL_MAX;
124 SCROW nRowCnt = rInsPos.Row();
125 DifAttrCache aAttrCache;
127 DATASET eCurrent = D_UNKNOWN;
129 ScSetStringParam aStrParam; // used to set string value without number detection.
130 aStrParam.setTextInput();
132 while( eCurrent != D_EOD )
134 eCurrent = aDifParser.GetNextDataset();
136 aPrgrsBar.Progress();
137 ScAddress aPos(nColCnt, nRowCnt, nBaseTab);
139 OUString aData = rData.toString();
141 switch( eCurrent )
143 case D_BOT:
144 if( nColCnt < SCCOL_MAX )
145 nRowCnt++;
146 nColCnt = nBaseCol;
147 break;
148 case D_EOD:
149 break;
150 case D_NUMERIC: // Number cell
151 if( nColCnt == SCCOL_MAX )
152 nColCnt = nBaseCol;
154 if( ValidCol(nColCnt) && ValidRow(nRowCnt) )
156 pDoc->EnsureTable(nBaseTab);
158 if( DifParser::IsV( aData.getStr() ) )
160 pDoc->SetValue(aPos, aDifParser.fVal);
161 aAttrCache.SetNumFormat( nColCnt, nRowCnt,
162 aDifParser.nNumFormat );
164 else if( aData == pKeyTRUE || aData == pKeyFALSE )
166 pDoc->SetValue(aPos, aDifParser.fVal);
167 aAttrCache.SetNumFormat( nColCnt, nRowCnt,
168 aDifParser.nNumFormat );
170 else if( aData == pKeyNA || aData == pKeyERROR )
172 pDoc->SetString(aPos, aData, &aStrParam);
174 else
176 OUString aTmp = "#IND:" + aData + "?";
177 pDoc->SetString(aPos, aTmp, &aStrParam);
180 else
181 bOverflowWarn = true;
183 nColCnt++;
184 break;
185 case D_STRING: // Text cell
186 if( nColCnt == SCCOL_MAX )
187 nColCnt = nBaseCol;
189 if( ValidCol(nColCnt) && ValidRow(nRowCnt) )
191 if (!aData.isEmpty())
193 pDoc->EnsureTable(nBaseTab);
194 pDoc->SetTextCell(aPos, aData);
197 else
198 bOverflowWarn = true;
200 nColCnt++;
201 break;
202 case D_UNKNOWN:
203 break;
204 case D_SYNT_ERROR:
205 break;
206 default:
207 OSL_FAIL( "ScImportDif - missing enum" );
211 aAttrCache.Apply( *pDoc, nBaseTab );
213 else
214 return SCERR_IMPORT_FORMAT;
216 if( bSyntErrWarn )
218 // FIXME: Add proper warning!
219 return SCWARN_IMPORT_RANGE_OVERFLOW;
221 else if( bOverflowWarn )
222 return SCWARN_IMPORT_RANGE_OVERFLOW;
223 else
224 return ERRCODE_NONE;
227 DifParser::DifParser( SvStream& rNewIn, const ScDocument& rDoc, rtl_TextEncoding eCharSet )
228 : fVal(0.0)
229 , nVector(0)
230 , nVal(0)
231 , nNumFormat(0)
232 , pNumFormatter(rDoc.GetFormatTable())
233 , rIn(rNewIn)
235 if ( rIn.GetStreamCharSet() != eCharSet )
237 OSL_FAIL( "CharSet passed overrides and modifies StreamCharSet" );
238 rIn.SetStreamCharSet( eCharSet );
240 rIn.StartReadingUnicodeText( eCharSet );
243 TOPIC DifParser::GetNextTopic()
245 enum STATE { S_VectorVal, S_Data, S_END, S_START, S_UNKNOWN, S_ERROR_L2 };
247 static const sal_Unicode pKeyLABEL[] = { 'L', 'A', 'B', 'E', 'L', 0 };
248 static const sal_Unicode pKeyCOMMENT[] = { 'C', 'O', 'M', 'M', 'E', 'N', 'T', 0 };
249 static const sal_Unicode pKeySIZE[] = { 'S', 'I', 'Z', 'E', 0 };
250 static const sal_Unicode pKeyPERIODICITY[] = { 'P', 'E', 'R', 'I', 'O', 'D', 'I', 'C', 'I', 'T', 'Y', 0 };
251 static const sal_Unicode pKeyMAJORSTART[] = { 'M', 'A', 'J', 'O', 'R', 'S', 'T', 'A', 'R', 'T', 0 };
252 static const sal_Unicode pKeyMINORSTART[] = { 'M', 'I', 'N', 'O', 'R', 'S', 'T', 'A', 'R', 'T', 0 };
253 static const sal_Unicode pKeyTRUELENGTH[] = { 'T', 'R', 'U', 'E', 'L', 'E', 'N', 'G', 'T', 'H', 0 };
254 static const sal_Unicode pKeyUINITS[] = { 'U', 'I', 'N', 'I', 'T', 'S', 0 };
255 static const sal_Unicode pKeyDISPLAYUNITS[] = { 'D', 'I', 'S', 'P', 'L', 'A', 'Y', 'U', 'N', 'I', 'T', 'S', 0 };
256 static const sal_Unicode pKeyUNKNOWN[] = { 0 };
258 static const sal_Unicode* ppKeys[] =
260 pKeyTABLE, // 0
261 pKeyVECTORS,
262 pKeyTUPLES,
263 pKeyDATA,
264 pKeyLABEL,
265 pKeyCOMMENT, // 5
266 pKeySIZE,
267 pKeyPERIODICITY,
268 pKeyMAJORSTART,
269 pKeyMINORSTART,
270 pKeyTRUELENGTH, // 10
271 pKeyUINITS,
272 pKeyDISPLAYUNITS,
273 pKeyUNKNOWN // 13
276 static const TOPIC pTopics[] =
278 T_TABLE, // 0
279 T_VECTORS,
280 T_TUPLES,
281 T_DATA,
282 T_LABEL,
283 T_COMMENT, // 5
284 T_SIZE,
285 T_PERIODICITY,
286 T_MAJORSTART,
287 T_MINORSTART,
288 T_TRUELENGTH, // 10
289 T_UINITS,
290 T_DISPLAYUNITS,
291 T_UNKNOWN // 13
294 STATE eS = S_START;
295 OUString aLine;
297 nVector = 0;
298 nVal = 0;
299 TOPIC eRet = T_UNKNOWN;
301 while( eS != S_END )
303 if( !ReadNextLine( aLine ) )
305 eS = S_END;
306 eRet = T_END;
309 switch( eS )
311 case S_START:
313 const sal_Unicode* pRef;
314 sal_uInt16 nCnt = 0;
315 bool bSearch = true;
317 pRef = ppKeys[ nCnt ];
319 while( bSearch )
321 if( aLine == pRef )
323 eRet = pTopics[ nCnt ];
324 bSearch = false;
326 else
328 nCnt++;
329 pRef = ppKeys[ nCnt ];
330 if( !*pRef )
331 bSearch = false;
335 if( *pRef )
336 eS = S_VectorVal;
337 else
338 eS = S_UNKNOWN;
340 break;
341 case S_VectorVal:
343 const sal_Unicode* pCur = aLine.getStr();
345 pCur = ScanIntVal( pCur, nVector );
347 if( pCur && *pCur == ',' )
349 pCur++;
350 ScanIntVal( pCur, nVal );
351 eS = S_Data;
353 else
354 eS = S_ERROR_L2;
356 break;
357 case S_Data:
358 OSL_ENSURE( aLine.getLength() >= 2,
359 "+GetNextTopic(): <String> is too short!" );
360 if( aLine.getLength() > 2 )
361 m_aData.append(aLine.copy(1, aLine.getLength() - 2));
362 else
363 m_aData.truncate();
364 eS = S_END;
365 break;
366 case S_END:
367 OSL_FAIL( "DifParser::GetNextTopic - unexpected state" );
368 break;
369 case S_UNKNOWN:
370 // skip 2 lines
371 ReadNextLine( aLine );
372 [[fallthrough]];
373 case S_ERROR_L2: // error happened in line 2
374 // skip 1 line
375 ReadNextLine( aLine );
376 eS = S_END;
377 break;
378 default:
379 OSL_FAIL( "DifParser::GetNextTopic - missing enum" );
383 return eRet;
386 static void lcl_DeEscapeQuotesDif(OUStringBuffer& rString)
388 // Special handling for DIF import: Escaped (duplicated) quotes are resolved.
389 // Single quote characters are left in place because older versions didn't
390 // escape quotes in strings (and Excel doesn't when using the clipboard).
391 // The quotes around the string are removed before this function is called.
393 rString = rString.toString().replaceAll("\"\"", "\"");
396 // Determine if passed in string is numeric data and set fVal/nNumFormat if so
397 DATASET DifParser::GetNumberDataset( const sal_Unicode* pPossibleNumericData )
399 DATASET eRet = D_SYNT_ERROR;
401 OSL_ENSURE( pNumFormatter, "-DifParser::GetNumberDataset(): No Formatter, more fun!" );
402 OUString aTestVal( pPossibleNumericData );
403 sal_uInt32 nFormat = 0;
404 double fTmpVal;
405 if( pNumFormatter->IsNumberFormat( aTestVal, nFormat, fTmpVal ) )
407 fVal = fTmpVal;
408 nNumFormat = nFormat;
409 eRet = D_NUMERIC;
411 else
412 eRet = D_SYNT_ERROR;
414 return eRet;
417 bool DifParser::ReadNextLine( OUString& rStr )
419 if( aLookAheadLine.isEmpty() )
421 return rIn.ReadUniOrByteStringLine( rStr, rIn.GetStreamCharSet() );
423 else
425 rStr = aLookAheadLine;
426 aLookAheadLine.clear();
427 return true;
431 // Look ahead in the stream to determine if the next line is the first line of
432 // a valid data record structure
433 bool DifParser::LookAhead()
435 const sal_Unicode* pCurrentBuffer;
436 bool bValidStructure = false;
438 OSL_ENSURE( aLookAheadLine.isEmpty(), "*DifParser::LookAhead(): LookAhead called twice in a row" );
439 rIn.ReadUniOrByteStringLine( aLookAheadLine, rIn.GetStreamCharSet() );
441 pCurrentBuffer = aLookAheadLine.getStr();
443 switch( *pCurrentBuffer )
445 case '-': // Special Datatype
446 pCurrentBuffer++;
448 if( Is1_0( pCurrentBuffer ) )
450 bValidStructure = true;
452 break;
453 case '0': // Numeric Data
454 pCurrentBuffer++;
455 if( *pCurrentBuffer == ',' )
457 pCurrentBuffer++;
458 bValidStructure = ( GetNumberDataset(pCurrentBuffer) != D_SYNT_ERROR );
460 break;
461 case '1': // String Data
462 if( Is1_0( aLookAheadLine.getStr() ) )
464 bValidStructure = true;
466 break;
468 return bValidStructure;
471 DATASET DifParser::GetNextDataset()
473 DATASET eRet = D_UNKNOWN;
474 OUString aLine;
475 const sal_Unicode* pCurrentBuffer;
477 ReadNextLine( aLine );
479 pCurrentBuffer = aLine.getStr();
481 switch( *pCurrentBuffer )
483 case '-': // Special Datatype
484 pCurrentBuffer++;
486 if( Is1_0( pCurrentBuffer ) )
488 ReadNextLine( aLine );
489 if( IsBOT( aLine.getStr() ) )
490 eRet = D_BOT;
491 else if( IsEOD( aLine.getStr() ) )
492 eRet = D_EOD;
494 break;
495 case '0': // Numeric Data
496 pCurrentBuffer++; // value in fVal, 2. line in m_aData
497 if( *pCurrentBuffer == ',' )
499 pCurrentBuffer++;
500 eRet = GetNumberDataset(pCurrentBuffer);
501 OUString aTmpLine;
502 ReadNextLine( aTmpLine );
503 if ( eRet == D_SYNT_ERROR )
504 { // for broken records write "#ERR: data" to cell
505 m_aData = "#ERR: ";
506 m_aData.append(pCurrentBuffer).append(" (");
507 m_aData.append(aTmpLine).append(')');
508 eRet = D_STRING;
510 else
512 m_aData = aTmpLine;
515 break;
516 case '1': // String Data
517 if( Is1_0( aLine.getStr() ) )
519 ReadNextLine( aLine );
520 sal_Int32 nLineLength = aLine.getLength();
521 const sal_Unicode* pLine = aLine.getStr();
523 if( nLineLength >= 1 && *pLine == '"' )
525 // Quotes are not always escaped (duplicated), see lcl_DeEscapeQuotesDif
526 // A look ahead into the next line is needed in order to deal with
527 // multiline strings containing quotes
528 if( LookAhead() )
530 // Single line string
531 if( nLineLength >= 2 && pLine[nLineLength - 1] == '"' )
533 m_aData = aLine.copy( 1, nLineLength - 2 );
534 lcl_DeEscapeQuotesDif(m_aData);
535 eRet = D_STRING;
538 else
540 // Multiline string
541 m_aData = aLine.copy( 1 );
542 bool bContinue = true;
543 while ( bContinue )
545 m_aData.append("\n");
546 bContinue = !rIn.eof() && ReadNextLine( aLine );
547 if( bContinue )
549 nLineLength = aLine.getLength();
550 if( nLineLength >= 1 )
552 pLine = aLine.getStr();
553 bContinue = !LookAhead();
554 if( bContinue )
556 m_aData.append(aLine);
558 else if( pLine[nLineLength - 1] == '"' )
560 m_aData.append(aLine.copy(0, nLineLength -1));
561 lcl_DeEscapeQuotesDif(m_aData);
562 eRet = D_STRING;
570 break;
573 if( eRet == D_UNKNOWN )
574 ReadNextLine( aLine );
576 if( rIn.eof() )
577 eRet = D_EOD;
579 return eRet;
582 const sal_Unicode* DifParser::ScanIntVal( const sal_Unicode* pStart, sal_uInt32& rRet )
584 // eat leading whitespace, not specified, but seen in the wild
585 while (*pStart == ' ' || *pStart == '\t')
586 ++pStart;
588 sal_Unicode cCurrent = *pStart;
590 if( IsNumber( cCurrent ) )
591 rRet = static_cast<sal_uInt32>( cCurrent - '0' );
592 else
593 return nullptr;
595 pStart++;
596 cCurrent = *pStart;
598 while( IsNumber( cCurrent ) && rRet < ( 0xFFFFFFFF / 10 ) )
600 rRet *= 10;
601 rRet += static_cast<sal_uInt32>( cCurrent - '0' );
603 pStart++;
604 cCurrent = *pStart;
607 return pStart;
610 DifColumn::DifColumn ()
611 : mpCurrent(nullptr)
615 void DifColumn::SetNumFormat( SCROW nRow, const sal_uInt32 nNumFormat )
617 OSL_ENSURE( ValidRow(nRow), "*DifColumn::SetNumFormat(): Row too big!" );
619 if( nNumFormat > 0 )
621 if(mpCurrent)
623 OSL_ENSURE( nRow > 0,
624 "*DifColumn::SetNumFormat(): more cannot be zero!" );
625 OSL_ENSURE( nRow > mpCurrent->nEnd,
626 "*DifColumn::SetNumFormat(): start from scratch?" );
628 if( mpCurrent->nNumFormat == nNumFormat && mpCurrent->nEnd == nRow - 1 )
629 mpCurrent->nEnd = nRow;
630 else
631 NewEntry( nRow, nNumFormat );
633 else
634 NewEntry(nRow,nNumFormat );
636 else
637 mpCurrent = nullptr;
640 void DifColumn::NewEntry( const SCROW nPos, const sal_uInt32 nNumFormat )
642 maEntries.emplace_back();
643 mpCurrent = &maEntries.back();
644 mpCurrent->nStart = mpCurrent->nEnd = nPos;
645 mpCurrent->nNumFormat = nNumFormat;
649 void DifColumn::Apply( ScDocument& rDoc, const SCCOL nCol, const SCTAB nTab )
651 ScPatternAttr aAttr( rDoc.GetPool() );
652 SfxItemSet &rItemSet = aAttr.GetItemSet();
654 for (const auto& rEntry : maEntries)
656 OSL_ENSURE( rEntry.nNumFormat > 0,
657 "+DifColumn::Apply(): Number format must not be 0!" );
659 rItemSet.Put( SfxUInt32Item( ATTR_VALUE_FORMAT, rEntry.nNumFormat ) );
661 rDoc.ApplyPatternAreaTab( nCol, rEntry.nStart, nCol, rEntry.nEnd, nTab, aAttr );
663 rItemSet.ClearItem();
667 DifAttrCache::DifAttrCache()
671 DifAttrCache::~DifAttrCache()
675 void DifAttrCache::SetNumFormat( const SCCOL nCol, const SCROW nRow, const sal_uInt32 nNumFormat )
677 OSL_ENSURE( ValidCol(nCol), "-DifAttrCache::SetNumFormat(): Col too big!" );
679 if( !maColMap.count(nCol) )
680 maColMap[ nCol ].reset( new DifColumn );
682 maColMap[ nCol ]->SetNumFormat( nRow, nNumFormat );
685 void DifAttrCache::Apply( ScDocument& rDoc, SCTAB nTab )
687 for( SCCOL nCol : rDoc.GetColumnsRange(nTab, 0, rDoc.MaxCol()) )
689 if( maColMap.count(nCol) )
690 maColMap[ nCol ]->Apply( rDoc, nCol, nTab );
694 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */