LanguageTool: don't crash if REST protocol isn't set
[LibreOffice.git] / sc / source / filter / dif / difimp.cxx
blobcf5953c81c99429b145021c4bd3dbdf44898bca0
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <svl/numformat.hxx>
21 #include <svl/zforlist.hxx>
22 #include <tools/stream.hxx>
23 #include <osl/diagnose.h>
24 #include <dif.hxx>
25 #include <docpool.hxx>
26 #include <document.hxx>
27 #include <fprogressbar.hxx>
28 #include <ftools.hxx>
29 #include <patattr.hxx>
30 #include <scerrors.hxx>
31 #include <scitems.hxx>
32 #include <stringutil.hxx>
33 #include <table.hxx>
34 #include <memory>
36 const sal_Unicode pKeyTABLE[] = { 'T', 'A', 'B', 'L', 'E', 0 };
37 const sal_Unicode pKeyVECTORS[] = { 'V', 'E', 'C', 'T', 'O', 'R', 'S', 0 };
38 const sal_Unicode pKeyTUPLES[] = { 'T', 'U', 'P', 'L', 'E', 'S', 0 };
39 const sal_Unicode pKeyDATA[] = { 'D', 'A', 'T', 'A', 0 };
40 const sal_Unicode pKeyBOT[] = { 'B', 'O', 'T', 0 };
41 const sal_Unicode pKeyEOD[] = { 'E', 'O', 'D', 0 };
42 const sal_Unicode pKeyERROR[] = { 'E', 'R', 'R', 'O', 'R', 0 };
43 const sal_Unicode pKeyTRUE[] = { 'T', 'R', 'U', 'E', 0 };
44 const sal_Unicode pKeyFALSE[] = { 'F', 'A', 'L', 'S', 'E', 0 };
45 const sal_Unicode pKeyNA[] = { 'N', 'A', 0 };
46 const sal_Unicode pKeyV[] = { 'V', 0 };
47 const sal_Unicode pKey1_0[] = { '1', ',', '0', 0 };
49 ErrCode ScFormatFilterPluginImpl::ScImportDif(SvStream& rIn, ScDocument* pDoc, const ScAddress& rInsPos,
50 const rtl_TextEncoding eVon )
52 DifParser aDifParser( rIn, *pDoc, eVon );
54 SCTAB nBaseTab = rInsPos.Tab();
56 TOPIC eTopic = T_UNKNOWN;
57 bool bSyntErrWarn = false;
58 bool bOverflowWarn = false;
60 OUStringBuffer& rData = aDifParser.m_aData;
62 rIn.Seek( 0 );
64 ScfStreamProgressBar aPrgrsBar( rIn, pDoc->GetDocumentShell() );
66 while( eTopic != T_DATA && eTopic != T_END )
68 eTopic = aDifParser.GetNextTopic();
70 aPrgrsBar.Progress();
72 const bool bData = !rData.isEmpty();
74 switch( eTopic )
76 case T_TABLE:
78 if( aDifParser.nVector != 0 || aDifParser.nVal != 1 )
79 bSyntErrWarn = true;
80 if( bData )
81 pDoc->RenameTab(nBaseTab, rData.toString());
83 break;
84 case T_VECTORS:
86 if( aDifParser.nVector != 0 )
87 bSyntErrWarn = true;
89 break;
90 case T_TUPLES:
92 if( aDifParser.nVector != 0 )
93 bSyntErrWarn = true;
95 break;
96 case T_DATA:
98 if( aDifParser.nVector != 0 || aDifParser.nVal != 0 )
99 bSyntErrWarn = true;
101 break;
102 case T_LABEL:
103 case T_COMMENT:
104 case T_SIZE:
105 case T_PERIODICITY:
106 case T_MAJORSTART:
107 case T_MINORSTART:
108 case T_TRUELENGTH:
109 case T_UINITS:
110 case T_DISPLAYUNITS:
111 case T_END:
112 case T_UNKNOWN:
113 break;
114 default:
115 OSL_FAIL( "ScImportDif - missing enum" );
120 if( eTopic == T_DATA )
121 { // data starts here
122 SCCOL nBaseCol = rInsPos.Col();
124 SCCOL nColCnt = SCCOL_MAX;
125 SCROW nRowCnt = rInsPos.Row();
126 DifAttrCache aAttrCache;
128 DATASET eCurrent = D_UNKNOWN;
130 ScSetStringParam aStrParam; // used to set string value without number detection.
131 aStrParam.setTextInput();
133 while( eCurrent != D_EOD )
135 eCurrent = aDifParser.GetNextDataset();
137 aPrgrsBar.Progress();
138 ScAddress aPos(nColCnt, nRowCnt, nBaseTab);
140 OUString aData = rData.toString();
142 switch( eCurrent )
144 case D_BOT:
145 if( nColCnt < SCCOL_MAX )
146 nRowCnt++;
147 nColCnt = nBaseCol;
148 break;
149 case D_EOD:
150 break;
151 case D_NUMERIC: // Number cell
152 if( nColCnt == SCCOL_MAX )
153 nColCnt = nBaseCol;
155 if( pDoc->ValidCol(nColCnt) && pDoc->ValidRow(nRowCnt) )
157 pDoc->EnsureTable(nBaseTab);
159 if( DifParser::IsV( aData.getStr() ) )
161 pDoc->SetValue(aPos, aDifParser.fVal);
162 aAttrCache.SetNumFormat( pDoc, nColCnt, nRowCnt,
163 aDifParser.nNumFormat );
165 else if( aData == pKeyTRUE || aData == pKeyFALSE )
167 pDoc->SetValue(aPos, aDifParser.fVal);
168 aAttrCache.SetNumFormat( pDoc, nColCnt, nRowCnt,
169 aDifParser.nNumFormat );
171 else if( aData == pKeyNA || aData == pKeyERROR )
173 pDoc->SetString(aPos, aData, &aStrParam);
175 else
177 OUString aTmp = "#IND:" + aData + "?";
178 pDoc->SetString(aPos, aTmp, &aStrParam);
181 else
182 bOverflowWarn = true;
184 nColCnt++;
185 break;
186 case D_STRING: // Text cell
187 if( nColCnt == SCCOL_MAX )
188 nColCnt = nBaseCol;
190 if( pDoc->ValidCol(nColCnt) && pDoc->ValidRow(nRowCnt) )
192 if (!aData.isEmpty())
194 pDoc->EnsureTable(nBaseTab);
195 pDoc->SetTextCell(aPos, aData);
198 else
199 bOverflowWarn = true;
201 nColCnt++;
202 break;
203 case D_UNKNOWN:
204 break;
205 case D_SYNT_ERROR:
206 break;
207 default:
208 OSL_FAIL( "ScImportDif - missing enum" );
212 aAttrCache.Apply( *pDoc, nBaseTab );
214 else
215 return SCERR_IMPORT_FORMAT;
217 if( bSyntErrWarn )
219 // FIXME: Add proper warning!
220 return SCWARN_IMPORT_RANGE_OVERFLOW;
222 else if( bOverflowWarn )
223 return SCWARN_IMPORT_RANGE_OVERFLOW;
224 else
225 return ERRCODE_NONE;
228 DifParser::DifParser( SvStream& rNewIn, const ScDocument& rDoc, rtl_TextEncoding eCharSet )
229 : fVal(0.0)
230 , nVector(0)
231 , nVal(0)
232 , nNumFormat(0)
233 , pNumFormatter(rDoc.GetFormatTable())
234 , rIn(rNewIn)
236 if ( rIn.GetStreamCharSet() != eCharSet )
238 OSL_FAIL( "CharSet passed overrides and modifies StreamCharSet" );
239 rIn.SetStreamCharSet( eCharSet );
241 rIn.StartReadingUnicodeText( eCharSet );
244 TOPIC DifParser::GetNextTopic()
246 enum STATE { S_VectorVal, S_Data, S_END, S_START, S_UNKNOWN, S_ERROR_L2 };
248 static const sal_Unicode pKeyLABEL[] = { 'L', 'A', 'B', 'E', 'L', 0 };
249 static const sal_Unicode pKeyCOMMENT[] = { 'C', 'O', 'M', 'M', 'E', 'N', 'T', 0 };
250 static const sal_Unicode pKeySIZE[] = { 'S', 'I', 'Z', 'E', 0 };
251 static const sal_Unicode pKeyPERIODICITY[] = { 'P', 'E', 'R', 'I', 'O', 'D', 'I', 'C', 'I', 'T', 'Y', 0 };
252 static const sal_Unicode pKeyMAJORSTART[] = { 'M', 'A', 'J', 'O', 'R', 'S', 'T', 'A', 'R', 'T', 0 };
253 static const sal_Unicode pKeyMINORSTART[] = { 'M', 'I', 'N', 'O', 'R', 'S', 'T', 'A', 'R', 'T', 0 };
254 static const sal_Unicode pKeyTRUELENGTH[] = { 'T', 'R', 'U', 'E', 'L', 'E', 'N', 'G', 'T', 'H', 0 };
255 static const sal_Unicode pKeyUINITS[] = { 'U', 'I', 'N', 'I', 'T', 'S', 0 };
256 static const sal_Unicode pKeyDISPLAYUNITS[] = { 'D', 'I', 'S', 'P', 'L', 'A', 'Y', 'U', 'N', 'I', 'T', 'S', 0 };
257 static const sal_Unicode pKeyUNKNOWN[] = { 0 };
259 static const sal_Unicode* ppKeys[] =
261 pKeyTABLE, // 0
262 pKeyVECTORS,
263 pKeyTUPLES,
264 pKeyDATA,
265 pKeyLABEL,
266 pKeyCOMMENT, // 5
267 pKeySIZE,
268 pKeyPERIODICITY,
269 pKeyMAJORSTART,
270 pKeyMINORSTART,
271 pKeyTRUELENGTH, // 10
272 pKeyUINITS,
273 pKeyDISPLAYUNITS,
274 pKeyUNKNOWN // 13
277 static const TOPIC pTopics[] =
279 T_TABLE, // 0
280 T_VECTORS,
281 T_TUPLES,
282 T_DATA,
283 T_LABEL,
284 T_COMMENT, // 5
285 T_SIZE,
286 T_PERIODICITY,
287 T_MAJORSTART,
288 T_MINORSTART,
289 T_TRUELENGTH, // 10
290 T_UINITS,
291 T_DISPLAYUNITS,
292 T_UNKNOWN // 13
295 STATE eS = S_START;
296 OUString aLine;
298 nVector = 0;
299 nVal = 0;
300 TOPIC eRet = T_UNKNOWN;
302 while( eS != S_END )
304 if( !ReadNextLine( aLine ) )
306 eS = S_END;
307 eRet = T_END;
310 switch( eS )
312 case S_START:
314 const sal_Unicode* pRef;
315 sal_uInt16 nCnt = 0;
316 bool bSearch = true;
318 pRef = ppKeys[ nCnt ];
320 while( bSearch )
322 if( aLine == pRef )
324 eRet = pTopics[ nCnt ];
325 bSearch = false;
327 else
329 nCnt++;
330 pRef = ppKeys[ nCnt ];
331 if( !*pRef )
332 bSearch = false;
336 if( *pRef )
337 eS = S_VectorVal;
338 else
339 eS = S_UNKNOWN;
341 break;
342 case S_VectorVal:
344 const sal_Unicode* pCur = aLine.getStr();
346 pCur = ScanIntVal( pCur, nVector );
348 if( pCur && *pCur == ',' )
350 pCur++;
351 ScanIntVal( pCur, nVal );
352 eS = S_Data;
354 else
355 eS = S_ERROR_L2;
357 break;
358 case S_Data:
359 OSL_ENSURE( aLine.getLength() >= 2,
360 "+GetNextTopic(): <String> is too short!" );
361 if( aLine.getLength() > 2 )
362 m_aData.append(aLine.subView(1, aLine.getLength() - 2));
363 else
364 m_aData.truncate();
365 eS = S_END;
366 break;
367 case S_END:
368 OSL_FAIL( "DifParser::GetNextTopic - unexpected state" );
369 break;
370 case S_UNKNOWN:
371 // skip 2 lines
372 ReadNextLine( aLine );
373 [[fallthrough]];
374 case S_ERROR_L2: // error happened in line 2
375 // skip 1 line
376 ReadNextLine( aLine );
377 eS = S_END;
378 break;
379 default:
380 OSL_FAIL( "DifParser::GetNextTopic - missing enum" );
384 return eRet;
387 static void lcl_DeEscapeQuotesDif(OUStringBuffer& rString)
389 // Special handling for DIF import: Escaped (duplicated) quotes are resolved.
390 // Single quote characters are left in place because older versions didn't
391 // escape quotes in strings (and Excel doesn't when using the clipboard).
392 // The quotes around the string are removed before this function is called.
394 rString = rString.toString().replaceAll("\"\"", "\"");
397 // Determine if passed in string is numeric data and set fVal/nNumFormat if so
398 DATASET DifParser::GetNumberDataset( const sal_Unicode* pPossibleNumericData )
400 DATASET eRet = D_SYNT_ERROR;
402 OSL_ENSURE( pNumFormatter, "-DifParser::GetNumberDataset(): No Formatter, more fun!" );
403 OUString aTestVal( pPossibleNumericData );
404 sal_uInt32 nFormat = 0;
405 double fTmpVal;
406 if( pNumFormatter->IsNumberFormat( aTestVal, nFormat, fTmpVal ) )
408 fVal = fTmpVal;
409 nNumFormat = nFormat;
410 eRet = D_NUMERIC;
412 else
413 eRet = D_SYNT_ERROR;
415 return eRet;
418 bool DifParser::ReadNextLine( OUString& rStr )
420 if( aLookAheadLine.isEmpty() )
422 return rIn.ReadUniOrByteStringLine( rStr, rIn.GetStreamCharSet() );
424 else
426 rStr = aLookAheadLine;
427 aLookAheadLine.clear();
428 return true;
432 // Look ahead in the stream to determine if the next line is the first line of
433 // a valid data record structure
434 bool DifParser::LookAhead()
436 const sal_Unicode* pCurrentBuffer;
437 bool bValidStructure = false;
439 OSL_ENSURE( aLookAheadLine.isEmpty(), "*DifParser::LookAhead(): LookAhead called twice in a row" );
440 rIn.ReadUniOrByteStringLine( aLookAheadLine, rIn.GetStreamCharSet() );
442 pCurrentBuffer = aLookAheadLine.getStr();
444 switch( *pCurrentBuffer )
446 case '-': // Special Datatype
447 pCurrentBuffer++;
449 if( Is1_0( pCurrentBuffer ) )
451 bValidStructure = true;
453 break;
454 case '0': // Numeric Data
455 pCurrentBuffer++;
456 if( *pCurrentBuffer == ',' )
458 pCurrentBuffer++;
459 bValidStructure = ( GetNumberDataset(pCurrentBuffer) != D_SYNT_ERROR );
461 break;
462 case '1': // String Data
463 if( Is1_0( aLookAheadLine.getStr() ) )
465 bValidStructure = true;
467 break;
469 return bValidStructure;
472 DATASET DifParser::GetNextDataset()
474 DATASET eRet = D_UNKNOWN;
475 OUString aLine;
476 const sal_Unicode* pCurrentBuffer;
478 ReadNextLine( aLine );
480 pCurrentBuffer = aLine.getStr();
482 switch( *pCurrentBuffer )
484 case '-': // Special Datatype
485 pCurrentBuffer++;
487 if( Is1_0( pCurrentBuffer ) )
489 ReadNextLine( aLine );
490 if( IsBOT( aLine.getStr() ) )
491 eRet = D_BOT;
492 else if( IsEOD( aLine.getStr() ) )
493 eRet = D_EOD;
495 break;
496 case '0': // Numeric Data
497 pCurrentBuffer++; // value in fVal, 2. line in m_aData
498 if( *pCurrentBuffer == ',' )
500 pCurrentBuffer++;
501 eRet = GetNumberDataset(pCurrentBuffer);
502 OUString aTmpLine;
503 ReadNextLine( aTmpLine );
504 if ( eRet == D_SYNT_ERROR )
505 { // for broken records write "#ERR: data" to cell
506 m_aData = OUString::Concat("#ERR: ") + pCurrentBuffer + " (" + aTmpLine + ")";
507 eRet = D_STRING;
509 else
511 m_aData = aTmpLine;
514 break;
515 case '1': // String Data
516 if( Is1_0( aLine.getStr() ) )
518 ReadNextLine( aLine );
519 sal_Int32 nLineLength = aLine.getLength();
520 const sal_Unicode* pLine = aLine.getStr();
522 if( nLineLength >= 1 && *pLine == '"' )
524 // Quotes are not always escaped (duplicated), see lcl_DeEscapeQuotesDif
525 // A look ahead into the next line is needed in order to deal with
526 // multiline strings containing quotes
527 if( LookAhead() )
529 // Single line string
530 if( nLineLength >= 2 && pLine[nLineLength - 1] == '"' )
532 m_aData = aLine.subView( 1, nLineLength - 2 );
533 lcl_DeEscapeQuotesDif(m_aData);
534 eRet = D_STRING;
537 else
539 // Multiline string
540 m_aData = aLine.subView( 1 );
541 bool bContinue = true;
542 while ( bContinue )
544 m_aData.append("\n");
545 bContinue = !rIn.eof() && ReadNextLine( aLine );
546 if( bContinue )
548 nLineLength = aLine.getLength();
549 if( nLineLength >= 1 )
551 pLine = aLine.getStr();
552 bContinue = !LookAhead();
553 if( bContinue )
555 m_aData.append(aLine);
557 else if( pLine[nLineLength - 1] == '"' )
559 m_aData.append(aLine.subView(0, nLineLength -1));
560 lcl_DeEscapeQuotesDif(m_aData);
561 eRet = D_STRING;
569 break;
572 if( eRet == D_UNKNOWN )
573 ReadNextLine( aLine );
575 if( rIn.eof() )
576 eRet = D_EOD;
578 return eRet;
581 const sal_Unicode* DifParser::ScanIntVal( const sal_Unicode* pStart, sal_uInt32& rRet )
583 // eat leading whitespace, not specified, but seen in the wild
584 while (*pStart == ' ' || *pStart == '\t')
585 ++pStart;
587 sal_Unicode cCurrent = *pStart;
589 if( IsNumber( cCurrent ) )
590 rRet = static_cast<sal_uInt32>( cCurrent - '0' );
591 else
592 return nullptr;
594 pStart++;
595 cCurrent = *pStart;
597 while( IsNumber( cCurrent ) && rRet < ( 0xFFFFFFFF / 10 ) )
599 rRet *= 10;
600 rRet += static_cast<sal_uInt32>( cCurrent - '0' );
602 pStart++;
603 cCurrent = *pStart;
606 return pStart;
609 DifColumn::DifColumn ()
610 : mpCurrent(nullptr)
614 void DifColumn::SetNumFormat( const ScDocument* pDoc, SCROW nRow, const sal_uInt32 nNumFormat )
616 OSL_ENSURE( pDoc->ValidRow(nRow), "*DifColumn::SetNumFormat(): Row too big!" );
618 if( nNumFormat > 0 )
620 if(mpCurrent)
622 OSL_ENSURE( nRow > 0,
623 "*DifColumn::SetNumFormat(): more cannot be zero!" );
624 OSL_ENSURE( nRow > mpCurrent->nEnd,
625 "*DifColumn::SetNumFormat(): start from scratch?" );
627 if( mpCurrent->nNumFormat == nNumFormat && mpCurrent->nEnd == nRow - 1 )
628 mpCurrent->nEnd = nRow;
629 else
630 NewEntry( nRow, nNumFormat );
632 else
633 NewEntry(nRow,nNumFormat );
635 else
636 mpCurrent = nullptr;
639 void DifColumn::NewEntry( const SCROW nPos, const sal_uInt32 nNumFormat )
641 maEntries.emplace_back();
642 mpCurrent = &maEntries.back();
643 mpCurrent->nStart = mpCurrent->nEnd = nPos;
644 mpCurrent->nNumFormat = nNumFormat;
648 void DifColumn::Apply( ScDocument& rDoc, const SCCOL nCol, const SCTAB nTab )
650 ScPatternAttr aAttr( rDoc.GetPool() );
651 SfxItemSet &rItemSet = aAttr.GetItemSet();
653 for (const auto& rEntry : maEntries)
655 OSL_ENSURE( rEntry.nNumFormat > 0,
656 "+DifColumn::Apply(): Number format must not be 0!" );
658 rItemSet.Put( SfxUInt32Item( ATTR_VALUE_FORMAT, rEntry.nNumFormat ) );
660 rDoc.ApplyPatternAreaTab( nCol, rEntry.nStart, nCol, rEntry.nEnd, nTab, aAttr );
662 rItemSet.ClearItem();
666 DifAttrCache::DifAttrCache()
670 DifAttrCache::~DifAttrCache()
674 void DifAttrCache::SetNumFormat( const ScDocument* pDoc, const SCCOL nCol, const SCROW nRow, const sal_uInt32 nNumFormat )
676 OSL_ENSURE( pDoc->ValidCol(nCol), "-DifAttrCache::SetNumFormat(): Col too big!" );
678 if( !maColMap.count(nCol) )
679 maColMap[ nCol ].reset( new DifColumn );
681 maColMap[ nCol ]->SetNumFormat( pDoc, nRow, nNumFormat );
684 void DifAttrCache::Apply( ScDocument& rDoc, SCTAB nTab )
686 for( SCCOL nCol : rDoc.GetWritableColumnsRange(nTab, 0, rDoc.MaxCol()) )
688 if( maColMap.count(nCol) )
689 maColMap[ nCol ]->Apply( rDoc, nCol, nTab );
693 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */