editeng/source/misc/svxacorr.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <memory>
  21 #include <string_view>
  22 #include <sal/config.h>
  23
  24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
  25 #include <com/sun/star/embed/XStorage.hpp>
  26 #include <com/sun/star/io/IOException.hpp>
  27 #include <com/sun/star/io/XStream.hpp>
  28 #include <tools/urlobj.hxx>
  29 #include <i18nlangtag/mslangid.hxx>
  30 #include <i18nutil/transliteration.hxx>
  31 #include <sal/log.hxx>
  32 #include <osl/diagnose.h>
  33 #include <vcl/svapp.hxx>
  34 #include <vcl/settings.hxx>
  35 #include <svl/fstathelper.hxx>
  36 #include <svl/urihelper.hxx>
  37 #include <unotools/charclass.hxx>
  38 #include <com/sun/star/i18n/UnicodeType.hpp>
  39 #include <unotools/collatorwrapper.hxx>
  40 #include <com/sun/star/i18n/UnicodeScript.hpp>
  41 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
  42 #include <unotools/localedatawrapper.hxx>
  43 #include <unotools/transliterationwrapper.hxx>
  44 #include <comphelper/processfactory.hxx>
  45 #include <comphelper/storagehelper.hxx>
  46 #include <comphelper/string.hxx>
  47 #include <editeng/editids.hrc>
  48 #include <sot/storage.hxx>
  49 #include <editeng/udlnitem.hxx>
  50 #include <editeng/wghtitem.hxx>
  51 #include <editeng/postitem.hxx>
  52 #include <editeng/crossedoutitem.hxx>
  53 #include <editeng/escapementitem.hxx>
  54 #include <editeng/svxacorr.hxx>
  55 #include <editeng/unolingu.hxx>
  56 #include <vcl/window.hxx>
  57 #include <com/sun/star/xml/sax/InputSource.hpp>
  58 #include <com/sun/star/xml/sax/FastParser.hpp>
  59 #include <com/sun/star/xml/sax/Writer.hpp>
  60 #include <com/sun/star/xml/sax/SAXParseException.hpp>
  61 #include <unotools/streamwrap.hxx>
  62 #include "SvXMLAutoCorrectImport.hxx"
  63 #include "SvXMLAutoCorrectExport.hxx"
  64 #include "SvXMLAutoCorrectTokenHandler.hxx"
  65 #include <ucbhelper/content.hxx>
  66 #include <com/sun/star/ucb/ContentCreationException.hpp>
  67 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
  68 #include <com/sun/star/ucb/TransferInfo.hpp>
  69 #include <com/sun/star/ucb/NameClash.hpp>
  70 #include <tools/diagnose_ex.h>
  71 #include <xmloff/xmltoken.hxx>
  72 #include <unordered_map>
  73 #include <rtl/character.hxx>
  74
  75 using namespace ::com::sun::star::ucb;
  76 using namespace ::com::sun::star::uno;
  77 using namespace ::com::sun::star::xml::sax;
  78 using namespace ::com::sun::star;
  79 using namespace ::xmloff::token;
  80 using namespace ::utl;
  81
  82 namespace {
  83
  84 enum class Flags {
  85     NONE            = 0x00,
  86     FullStop        = 0x01,
  87     ExclamationMark = 0x02,
  88     QuestionMark    = 0x04,
  89 };
  90
  91 }
  92
  93 namespace o3tl {
  94     template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
  95 }
  96 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space
  97
  98 const char pXMLImplWrdStt_ExcptLstStr[] = "WordExceptList.xml";
  99 const char pXMLImplCplStt_ExcptLstStr[] = "SentenceExceptList.xml";
 100 const char pXMLImplAutocorr_ListStr[]   = "DocumentList.xml";
 101
 102 const char
 103     /* also at these beginnings - Brackets and all kinds of begin characters */
 104     sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
 105     /* also at these ends - Brackets and all kinds of begin characters */
 106     sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
 107
 108 static OUString EncryptBlockName_Imp(const OUString& rName);
 109
 110 static bool NonFieldWordDelim( const sal_Unicode c )
 111 {
 112     return ' ' == c || '\t' == c || 0x0a == c ||
 113             cNonBreakingSpace == c || 0x2011 == c;
 114 }
 115
 116 static bool IsWordDelim( const sal_Unicode c )
 117 {
 118     return c == 0x1 || NonFieldWordDelim(c);
 119 }
 120
 121
 122 static bool IsLowerLetter( sal_Int32 nCharType )
 123 {
 124     return CharClass::isLetterType( nCharType ) &&
 125            ( css::i18n::KCharacterType::LOWER & nCharType);
 126 }
 127
 128 static bool IsUpperLetter( sal_Int32 nCharType )
 129 {
 130     return CharClass::isLetterType( nCharType ) &&
 131             ( css::i18n::KCharacterType::UPPER & nCharType);
 132 }
 133
 134 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
 135                                    sal_Int32 nStt, sal_Int32 nEnd )
 136 {
 137     for( ; nStt < nEnd; ++nStt )
 138     {
 139         css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
 140         switch( nScript )
 141         {
 142             case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
 143             case css::i18n::UnicodeScript_kHangulJamo:
 144             case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
 145             case css::i18n::UnicodeScript_kHiragana:
 146             case css::i18n::UnicodeScript_kKatakana:
 147             case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
 148             case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
 149             case css::i18n::UnicodeScript_kCJKCompatibility:
 150             case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA:
 151             case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
 152             case css::i18n::UnicodeScript_kHangulSyllable:
 153             case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
 154             case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
 155                 return true;
 156             default: ; //do nothing
 157         }
 158     }
 159     return false;
 160 }
 161
 162 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
 163                                   sal_Int32 nStt, sal_Int32 nEnd )
 164 {
 165     for( ; nStt < nEnd; ++nStt )
 166     {
 167         if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
 168             return true;
 169     }
 170     return false;
 171 }
 172
 173 static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c )
 174 {
 175     // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
 176     if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) )
 177         return true;
 178
 179     bool bRet = false;
 180     for( ; *pArr; ++pArr )
 181         if( *pArr == c )
 182         {
 183             bRet = true;
 184             break;
 185         }
 186     return bRet;
 187 }
 188
 189 SvxAutoCorrDoc::~SvxAutoCorrDoc()
 190 {
 191 }
 192
 193 // Called by the functions:
 194 //  - FnCapitalStartWord
 195 //  - FnCapitalStartSentence
 196 // after the exchange of characters. Then the words, if necessary, can be inserted
 197 // into the exception list.
 198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
 199                                         sal_Unicode )
 200 {
 201 }
 202
 203 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
 204 {
 205     return LANGUAGE_SYSTEM;
 206 }
 207
 208 static const LanguageTag& GetAppLang()
 209 {
 210     return Application::GetSettings().GetLanguageTag();
 211 }
 212
 213 /// Never use an unresolved LANGUAGE_SYSTEM.
 214 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
 215 {
 216     LanguageType eLang = rDoc.GetLanguage( nPos );
 217     if (eLang == LANGUAGE_SYSTEM)
 218         eLang = GetAppLang().getLanguageType();     // the current work locale
 219     return eLang;
 220 }
 221
 222 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
 223 {
 224     static LocaleDataWrapper aLclDtWrp( GetAppLang() );
 225     LanguageTag aLcl( nLang );
 226     const LanguageTag& rLcl = aLclDtWrp.getLoadedLanguageTag();
 227     if( aLcl != rLcl )
 228         aLclDtWrp.setLanguageTag( aLcl );
 229     return aLclDtWrp;
 230 }
 231 static TransliterationWrapper& GetIgnoreTranslWrapper()
 232 {
 233     static int bIsInit = 0;
 234     static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
 235                 TransliterationFlags::IGNORE_KANA |
 236                 TransliterationFlags::IGNORE_WIDTH );
 237     if( !bIsInit )
 238     {
 239         aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
 240         bIsInit = 1;
 241     }
 242     return aWrp;
 243 }
 244 static CollatorWrapper& GetCollatorWrapper()
 245 {
 246     static CollatorWrapper aCollWrp = [&]()
 247     {
 248         CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
 249         tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
 250         return tmp;
 251     }();
 252     return aCollWrp;
 253 }
 254
 255 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
 256 {
 257     return  cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
 258             cChar == ' '  || cChar == '\'' || cChar == '\"' ||
 259             cChar == '*'  || cChar == '_'  || cChar == '%' ||
 260             cChar == '.'  || cChar == ','  || cChar == ';' ||
 261             cChar == ':'  || cChar == '?' || cChar == '!' ||
 262             cChar == '<'  || cChar == '>' ||
 263             cChar == '/'  || cChar == '-';
 264 }
 265
 266 namespace
 267 {
 268     bool IsCompoundWordDelimChar(sal_Unicode cChar)
 269     {
 270         return  cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
 271     }
 272 }
 273
 274 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
 275 {
 276     return cChar == '%' || cChar == ';' || cChar == ':'  || cChar == '?' || cChar == '!' ||
 277         cChar == '/' /*case for the urls exception*/;
 278 }
 279
 280 ACFlags SvxAutoCorrect::GetDefaultFlags()
 281 {
 282     ACFlags nRet = ACFlags::Autocorrect
 283                     | ACFlags::CapitalStartSentence
 284                     | ACFlags::CapitalStartWord
 285                     | ACFlags::ChgOrdinalNumber
 286                     | ACFlags::ChgToEnEmDash
 287                     | ACFlags::AddNonBrkSpace
 288                     | ACFlags::TransliterateRTL
 289                     | ACFlags::ChgAngleQuotes
 290                     | ACFlags::ChgWeightUnderl
 291                     | ACFlags::SetINetAttr
 292                     | ACFlags::ChgQuotes
 293                     | ACFlags::SaveWordCplSttLst
 294                     | ACFlags::SaveWordWrdSttLst
 295                     | ACFlags::CorrectCapsLock;
 296     LanguageType eLang = GetAppLang().getLanguageType();
 297     if( eLang.anyOf(
 298         LANGUAGE_ENGLISH,
 299         LANGUAGE_ENGLISH_US,
 300         LANGUAGE_ENGLISH_UK,
 301         LANGUAGE_ENGLISH_AUS,
 302         LANGUAGE_ENGLISH_CAN,
 303         LANGUAGE_ENGLISH_NZ,
 304         LANGUAGE_ENGLISH_EIRE,
 305         LANGUAGE_ENGLISH_SAFRICA,
 306         LANGUAGE_ENGLISH_JAMAICA,
 307         LANGUAGE_ENGLISH_CARIBBEAN))
 308         nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
 309     return nRet;
 310 }
 311
 312 constexpr sal_Unicode cEmDash = 0x2014;
 313 constexpr sal_Unicode cEnDash = 0x2013;
 314 constexpr sal_Unicode cApostrophe = 0x2019;
 315 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
 316 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
 317 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
 318 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
 319 // stop characters for searching preceding quotes
 320 // (the first character is also the opening quote we are looking for)
 321 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
 322 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
 323 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
 324 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
 325 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
 326 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };
 327
 328 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile,
 329                                 const OUString& rUserAutocorrFile )
 330     : sShareAutoCorrFile( rShareAutocorrFile )
 331     , sUserAutoCorrFile( rUserAutocorrFile )
 332     , eCharClassLang( LANGUAGE_DONTKNOW )
 333     , nFlags(SvxAutoCorrect::GetDefaultFlags())
 334     , cStartDQuote( 0 )
 335     , cEndDQuote( 0 )
 336     , cStartSQuote( 0 )
 337     , cEndSQuote( 0 )
 338 {
 339 }
 340
 341 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
 342     : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
 343     , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
 344     , aSwFlags( rCpy.aSwFlags )
 345     , eCharClassLang(rCpy.eCharClassLang)
 346     , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WrdSttLstLoad))
 347     , cStartDQuote( rCpy.cStartDQuote )
 348     , cEndDQuote( rCpy.cEndDQuote )
 349     , cStartSQuote( rCpy.cStartSQuote )
 350     , cEndSQuote( rCpy.cEndSQuote )
 351 {
 352 }
 353
 354
 355 SvxAutoCorrect::~SvxAutoCorrect()
 356 {
 357 }
 358
 359 void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
 360 {
 361     pCharClass.reset( new CharClass( LanguageTag( eLang)) );
 362     eCharClassLang = eLang;
 363 }
 364
 365 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
 366 {
 367     ACFlags nOld = nFlags;
 368     nFlags = bOn ? nFlags | nFlag
 369                  : nFlags & ~nFlag;
 370
 371     if( !bOn )
 372     {
 373         if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
 374             nFlags &= ~ACFlags::CplSttLstLoad;
 375         if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
 376             nFlags &= ~ACFlags::WrdSttLstLoad;
 377         if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
 378             nFlags &= ~ACFlags::ChgWordLstLoad;
 379     }
 380 }
 381
 382
 383 // Correct TWo INitial CApitals
 384 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 385                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
 386                                     LanguageType eLang )
 387 {
 388     CharClass& rCC = GetCharClass( eLang );
 389
 390     // Delete all non alphanumeric. Test the characters at the beginning/end of
 391     // the word ( recognizes: "(min.", "/min.", and so on.)
 392     for( ; nSttPos < nEndPos; ++nSttPos )
 393         if( rCC.isLetterNumeric( rTxt, nSttPos ))
 394             break;
 395     for( ; nSttPos < nEndPos; --nEndPos )
 396         if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
 397             break;
 398
 399     // Is the word a compounded word separated by delimiters?
 400     // If so, keep track of all delimiters so each constituent
 401     // word can be checked for two initial capital letters.
 402     std::deque<sal_Int32> aDelimiters;
 403
 404     // Always check for two capitals at the beginning
 405     // of the entire word, so start at nSttPos.
 406     aDelimiters.push_back(nSttPos);
 407
 408     // Find all compound word delimiters
 409     for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
 410     {
 411         if (IsCompoundWordDelimChar(rTxt[ n ]))
 412         {
 413             aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
 414         }
 415     }
 416
 417     // Decide where to put the terminating delimiter.
 418     // If the last AutoCorrect char was a newline, then the AutoCorrect
 419     // char will not be included in rTxt.
 420     // If the last AutoCorrect char was not a newline, then the AutoCorrect
 421     // character will be the last character in rTxt.
 422     if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
 423         aDelimiters.push_back(nEndPos);
 424
 425     // Iterate through the word and all words that compose it.
 426     // Two capital letters at the beginning of word?
 427     for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
 428     {
 429         nSttPos = aDelimiters[nI];
 430         nEndPos = aDelimiters[nI + 1];
 431
 432         if( nSttPos+2 < nEndPos &&
 433             IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
 434             IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
 435             // Is the third character a lower case
 436             IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
 437             // Do not replace special attributes
 438             0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
 439         {
 440             // test if the word is in an exception list
 441             OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
 442             if( !FindInWrdSttExceptList(eLang, sWord) )
 443             {
 444                 // Check that word isn't correctly spelt before correcting:
 445                 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
 446                     LinguMgr::GetSpellChecker();
 447                 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
 448                 {
 449                     Sequence< css::beans::PropertyValue > aEmptySeq;
 450                     if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
 451                     {
 452                         return;
 453                     }
 454                 }
 455                 sal_Unicode cSave = rTxt[ nSttPos ];
 456                 OUString sChar = rCC.lowercase( OUString(cSave) );
 457                 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
 458                 {
 459                     if( ACFlags::SaveWordWrdSttLst & nFlags )
 460                         rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
 461                 }
 462             }
 463         }
 464     }
 465 }
 466
 467 // Format ordinal numbers suffixes (1st -> 1^st)
 468 bool SvxAutoCorrect::FnChgOrdinalNumber(
 469     SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 470     sal_Int32 nSttPos, sal_Int32 nEndPos,
 471     LanguageType eLang)
 472 {
 473     // 1st, 2nd, 3rd, 4 - 0th
 474     // 201th or 201st
 475     // 12th or 12nd
 476     bool bChg = false;
 477
 478     // In some languages ordinal suffixes should never be
 479     // changed to superscript. Let's break for those languages.
 480     if (!eLang.anyOf(
 481          LANGUAGE_SWEDISH,
 482          LANGUAGE_SWEDISH_FINLAND))
 483     {
 484         CharClass& rCC = GetCharClass(eLang);
 485
 486         for (; nSttPos < nEndPos; ++nSttPos)
 487             if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos]))
 488                 break;
 489         for (; nSttPos < nEndPos; --nEndPos)
 490             if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
 491                 break;
 492
 493
 494         // Get the last number in the string to check
 495         sal_Int32 nNumEnd = nEndPos;
 496         bool bFoundEnd = false;
 497         bool isValidNumber = true;
 498         sal_Int32 i = nEndPos;
 499         while (i > nSttPos)
 500         {
 501             i--;
 502             bool isDigit = rCC.isDigit(rTxt, i);
 503             if (bFoundEnd)
 504                 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
 505
 506             if (isDigit && !bFoundEnd)
 507             {
 508                 bFoundEnd = true;
 509                 nNumEnd = i;
 510             }
 511         }
 512
 513         if (bFoundEnd && isValidNumber) {
 514             sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32();
 515
 516             // Check if the characters after that number correspond to the ordinal suffix
 517             uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
 518                 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
 519
 520             const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
 521             for (OUString const & sSuffix : aSuffixes)
 522             {
 523                 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1);
 524
 525                 if (sSuffix == sEnd)
 526                 {
 527                     // Check if the ordinal suffix has to be set as super script
 528                     if (rCC.isLetter(sSuffix))
 529                     {
 530                         // Do the change
 531                         SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
 532                             DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
 533                         rDoc.SetAttr(nNumEnd + 1, nEndPos,
 534                             SID_ATTR_CHAR_ESCAPEMENT,
 535                             aSvxEscapementItem);
 536                         bChg = true;
 537                     }
 538                 }
 539             }
 540         }
 541     }
 542     return bChg;
 543 }
 544
 545 // Replace dashes
 546 bool SvxAutoCorrect::FnChgToEnEmDash(
 547                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 548                                 sal_Int32 nSttPos, sal_Int32 nEndPos,
 549                                 LanguageType eLang )
 550 {
 551     bool bRet = false;
 552     CharClass& rCC = GetCharClass( eLang );
 553     if (eLang == LANGUAGE_SYSTEM)
 554         eLang = GetAppLang().getLanguageType();
 555     bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
 556
 557     // replace " - " or " --" with "enDash"
 558     if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
 559     {
 560         sal_Unicode cCh = rTxt[ nSttPos ];
 561         if( '-' == cCh )
 562         {
 563             if( 1 < nEndPos - nSttPos &&
 564                 ' ' == rTxt[ nSttPos-1 ] &&
 565                 '-' == rTxt[ nSttPos+1 ])
 566             {
 567                 sal_Int32 n;
 568                 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr(
 569                             sImplSttSkipChars,(cCh = rTxt[ n ]));
 570                         ++n )
 571                     ;
 572
 573                 // found: " --[<AnySttChars>][A-z0-9]
 574                 if( rCC.isLetterNumeric( OUString(cCh) ) )
 575                 {
 576                     for( n = nSttPos-1; n && lcl_IsInAsciiArr(
 577                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
 578                         ;
 579
 580                     // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
 581                     if( rCC.isLetterNumeric( OUString(cCh) ))
 582                     {
 583                         rDoc.Delete( nSttPos, nSttPos + 2 );
 584                         rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
 585                         bRet = true;
 586                     }
 587                 }
 588             }
 589         }
 590         else if( 3 < nSttPos &&
 591                  ' ' == rTxt[ nSttPos-1 ] &&
 592                  '-' == rTxt[ nSttPos-2 ])
 593         {
 594             sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
 595             if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
 596             {
 597                 --nTmpPos;
 598                 ++nLen;
 599                 cCh = rTxt[ nTmpPos-1 ];
 600             }
 601             if( ' ' == cCh )
 602             {
 603                 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr(
 604                             sImplSttSkipChars,(cCh = rTxt[ n ]));
 605                         ++n )
 606                     ;
 607
 608                 // found: " - [<AnySttChars>][A-z0-9]
 609                 if( rCC.isLetterNumeric( OUString(cCh) ) )
 610                 {
 611                     cCh = ' ';
 612                     for( n = nTmpPos-1; n && lcl_IsInAsciiArr(
 613                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
 614                             ;
 615                     // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
 616                     if( rCC.isLetterNumeric( OUString(cCh) ))
 617                     {
 618                         rDoc.Delete( nTmpPos, nTmpPos + nLen );
 619                         rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
 620                         bRet = true;
 621                     }
 622                 }
 623             }
 624         }
 625     }
 626
 627     // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
 628     // [0-9]--[0-9] double dash always replaced with "enDash"
 629     // Finnish and Hungarian use enDash instead of emDash.
 630     bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
 631     if( 4 <= nEndPos - nSttPos )
 632     {
 633         OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) );
 634         sal_Int32 nFndPos = sTmp.indexOf("--");
 635         if( nFndPos != -1 && nFndPos &&
 636             nFndPos + 2 < sTmp.getLength() &&
 637             ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) ||
 638               lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) &&
 639             ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) ||
 640             lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] )))
 641         {
 642             nSttPos = nSttPos + nFndPos;
 643             rDoc.Delete( nSttPos, nSttPos + 2 );
 644             rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) &&
 645                 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) );
 646             bRet = true;
 647         }
 648     }
 649     return bRet;
 650 }
 651
 652 // Add non-breaking space before specific punctuation marks in French text
 653 bool SvxAutoCorrect::FnAddNonBrkSpace(
 654                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 655                                 sal_Int32 nEndPos,
 656                                 LanguageType eLang, bool& io_bNbspRunNext )
 657 {
 658     bool bRet = false;
 659
 660     CharClass& rCC = GetCharClass( eLang );
 661
 662     if ( rCC.getLanguageTag().getLanguage() == "fr" )
 663     {
 664         bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
 665         OUString allChars = ":;?!%";
 666         OUString chars( allChars );
 667         if ( bFrCA )
 668             chars = ":";
 669
 670         sal_Unicode cChar = rTxt[ nEndPos ];
 671         bool bHasSpace = chars.indexOf( cChar ) != -1;
 672         bool bIsSpecial = allChars.indexOf( cChar ) != -1;
 673         if ( bIsSpecial )
 674         {
 675             // Get the last word delimiter position
 676             sal_Int32 nSttWdPos = nEndPos;
 677             bool bWasWordDelim = false;
 678             while( nSttWdPos )
 679             {
 680                 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
 681                 if (bWasWordDelim)
 682                     break;
 683             }
 684
 685             //See if the text is the start of a protocol string, e.g. have text of
 686             //"http" see if it is the start of "http:" and if so leave it alone
 687             sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
 688             sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1;
 689             if (nIndex + nProtocolLen <= rTxt.getLength())
 690             {
 691                 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
 692                     return false;
 693             }
 694
 695             // Check the presence of "://" in the word
 696             sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 );
 697             if ( nStrPos == -1 && nEndPos > 0 )
 698             {
 699                 // Check the previous char
 700                 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
 701                 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
 702                 {
 703                     // Remove any previous normal space
 704                     sal_Int32 nPos = nEndPos - 1;
 705                     while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
 706                     {
 707                         if ( nPos == 0 ) break;
 708                         nPos--;
 709                         cPrevChar = rTxt[ nPos ];
 710                     }
 711
 712                     nPos++;
 713                     if ( nEndPos - nPos > 0 )
 714                         rDoc.Delete( nPos, nEndPos );
 715
 716                     // Add the non-breaking space at the end pos
 717                     if ( bHasSpace )
 718                         rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
 719                     io_bNbspRunNext = true;
 720                     bRet = true;
 721                 }
 722                 else if ( chars.indexOf( cPrevChar ) != -1 )
 723                     io_bNbspRunNext = true;
 724             }
 725         }
 726         else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) )
 727         {
 728             // Remove the hardspace right before to avoid formatting URLs
 729             sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
 730             sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
 731             if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
 732             {
 733                 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
 734                 bRet = true;
 735             }
 736         }
 737     }
 738
 739     return bRet;
 740 }
 741
 742 // URL recognition
 743 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 744                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
 745                                     LanguageType eLang )
 746 {
 747     OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
 748                                                 GetCharClass( eLang ) ));
 749     bool bRet = !sURL.isEmpty();
 750     if( bRet )          // so, set attribute:
 751         rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
 752     return bRet;
 753 }
 754
 755 // Automatic *bold*, /italic/, -strikeout- and _underline_
 756 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 757                                         sal_Int32 nEndPos )
 758 {
 759     // Condition:
 760     //  at the beginning:   _, *, / or ~ after Space with the following !Space
 761     //  at the end:         _, *, / or ~ before Space (word delimiter?)
 762
 763     sal_Unicode cInsChar = rTxt[ nEndPos ];  // underline, bold, italic or strikeout
 764     if( ++nEndPos != rTxt.getLength() &&
 765         !IsWordDelim( rTxt[ nEndPos ] ) )
 766         return false;
 767
 768     --nEndPos;
 769
 770     bool bAlphaNum = false;
 771     sal_Int32 nPos = nEndPos;
 772     sal_Int32  nFndPos = -1;
 773     CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
 774
 775     while( nPos )
 776     {
 777         switch( sal_Unicode c = rTxt[ --nPos ] )
 778         {
 779         case '_':
 780         case '-':
 781         case '/':
 782         case '*':
 783             if( c == cInsChar )
 784             {
 785                 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
 786                     IsWordDelim( rTxt[ nPos-1 ])) &&
 787                     !IsWordDelim( rTxt[ nPos+1 ]))
 788                         nFndPos = nPos;
 789                 else
 790                     // Condition is not satisfied, so cancel
 791                     nFndPos = -1;
 792                 nPos = 0;
 793             }
 794             break;
 795         default:
 796             if( !bAlphaNum )
 797                 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
 798         }
 799     }
 800
 801     if( -1 != nFndPos )
 802     {
 803         // first delete the Character at the end - this allows insertion
 804         // of an empty hint in SetAttr which would be removed by Delete
 805         // (fdo#62536, AUTOFMT in Writer)
 806         rDoc.Delete( nEndPos, nEndPos + 1 );
 807         rDoc.Delete( nFndPos, nFndPos + 1 );
 808         // Span the Attribute over the area
 809         // the end.
 810         if( '*' == cInsChar )           // Bold
 811         {
 812             SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
 813             rDoc.SetAttr( nFndPos, nEndPos - 1,
 814                           SID_ATTR_CHAR_WEIGHT,
 815                           aSvxWeightItem);
 816         }
 817         else if( '/' == cInsChar )           // Italic
 818         {
 819             SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
 820             rDoc.SetAttr( nFndPos, nEndPos - 1,
 821                           SID_ATTR_CHAR_POSTURE,
 822                           aSvxPostureItem);
 823         }
 824         else if( '-' == cInsChar )           // Strikeout
 825         {
 826             SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
 827             rDoc.SetAttr( nFndPos, nEndPos - 1,
 828                           SID_ATTR_CHAR_STRIKEOUT,
 829                           aSvxCrossedOutItem);
 830         }
 831         else                            // Underline
 832         {
 833             SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
 834             rDoc.SetAttr( nFndPos, nEndPos - 1,
 835                           SID_ATTR_CHAR_UNDERLINE,
 836                           aSvxUnderlineItem);
 837         }
 838       }
 839
 840     return -1 != nFndPos;
 841 }
 842
 843 // Capitalize first letter of every sentence
 844 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
 845                                     const OUString& rTxt, bool bNormalPos,
 846                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
 847                                     LanguageType eLang )
 848 {
 849
 850     if( rTxt.isEmpty() || nEndPos <= nSttPos )
 851         return;
 852
 853     CharClass& rCC = GetCharClass( eLang );
 854     OUString aText( rTxt );
 855     const sal_Unicode *pStart = aText.getStr(),
 856                       *pStr = pStart + nEndPos,
 857                       *pWordStt = nullptr,
 858                       *pDelim = nullptr;
 859
 860     bool bAtStart = false;
 861     do {
 862         --pStr;
 863         if (rCC.isLetter(aText, pStr - pStart))
 864         {
 865             if( !pWordStt )
 866                 pDelim = pStr+1;
 867             pWordStt = pStr;
 868         }
 869         else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
 870         {
 871             if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
 872                 pWordStt - 1 == pStr &&
 873                 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
 874                 (pStart + 1) <= pStr &&
 875                 rCC.isLetter(aText, pStr-1 - pStart))
 876                 pWordStt = --pStr;
 877             else
 878                 break;
 879         }
 880         bAtStart = (pStart == pStr);
 881     } while( !bAtStart );
 882
 883     if (!pWordStt)
 884         return;    // no character to be replaced
 885
 886
 887     if (rCC.isDigit(aText, pStr - pStart))
 888         return; // already ok
 889
 890     if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
 891         return; // already ok
 892
 893     //See if the text is the start of a protocol string, e.g. have text of
 894     //"http" see if it is the start of "http:" and if so leave it alone
 895     sal_Int32 nIndex = pWordStt - pStart;
 896     sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
 897     if (nIndex + nProtocolLen <= rTxt.getLength())
 898     {
 899         if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
 900             return; // already ok
 901     }
 902
 903     if (0x1 == *pWordStt || 0x2 == *pWordStt)
 904         return; // already ok
 905
 906     // Only capitalize, if string before specified characters is long enough
 907     if( *pDelim && 2 >= pDelim - pWordStt &&
 908         lcl_IsInAsciiArr( ".-)>", *pDelim ) )
 909         return;
 910
 911     // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
 912     if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
 913         return;
 914
 915     if( !bAtStart ) // Still no beginning of a paragraph?
 916     {
 917         if (NonFieldWordDelim(*pStr))
 918         {
 919             for (;;)
 920             {
 921                 bAtStart = (pStart == pStr--);
 922                 if (bAtStart || !NonFieldWordDelim(*pStr))
 923                     break;
 924             }
 925         }
 926         // Asian full stop, full width full stop, full width exclamation mark
 927         // and full width question marks are treated as word delimiters
 928         else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
 929                   0xFF1F != *pStr )
 930             return; // no valid separator -> no replacement
 931     }
 932
 933     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
 934     if (FindInWrdSttExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
 935         return;
 936
 937     if( bAtStart )  // at the beginning of a paragraph?
 938     {
 939         // Check out the previous paragraph, if it exists.
 940         // If so, then check to paragraph separator at the end.
 941         OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
 942         if (!pPrevPara)
 943         {
 944             // valid separator -> replace
 945             OUString sChar( *pWordStt );
 946             sChar = rCC.titlecase(sChar); //see fdo#56740
 947             if (!comphelper::string::equals(sChar, *pWordStt))
 948                rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
 949             return;
 950         }
 951
 952         aText = *pPrevPara;
 953         bAtStart = false;
 954         pStart = aText.getStr();
 955         pStr = pStart + aText.getLength();
 956
 957         do {            // overwrite all blanks
 958             --pStr;
 959             if (!NonFieldWordDelim(*pStr))
 960                 break;
 961             bAtStart = (pStart == pStr);
 962         } while( !bAtStart );
 963
 964         if( bAtStart )
 965             return;  // no valid separator -> no replacement
 966     }
 967
 968     // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
 969     // all three can happen, but not more than once!
 970     const sal_Unicode* pExceptStt = nullptr;
 971     bool bContinue = true;
 972     Flags nFlag = Flags::NONE;
 973     do
 974     {
 975         switch (*pStr)
 976         {
 977             // Western and Asian full stop
 978             case '.':
 979             case 0x3002:
 980             case 0xFF0E:
 981             {
 982                 if (pStr >= pStart + 2 && *(pStr - 2) == '.')
 983                 {
 984                     //e.g. text "f.o.o. word": Now currently considering
 985                     //capitalizing word but second last character of
 986                     //previous word is a .  So probably last word is an
 987                     //anagram that ends in . and not truly the end of a
 988                     //previous sentence, so don't autocapitalize this word
 989                     return;
 990                 }
 991                 if (nFlag & Flags::FullStop)
 992                     return; // no valid separator -> no replacement
 993                 nFlag |= Flags::FullStop;
 994                 pExceptStt = pStr;
 995             }
 996             break;
 997             case '!':
 998             case 0xFF01:
 999             {
1000                 if (nFlag & Flags::ExclamationMark)
1001                     return; // no valid separator -> no replacement
1002                 nFlag |= Flags::ExclamationMark;
1003             }
1004             break;
1005             case '?':
1006             case 0xFF1F:
1007             {
1008                 if (nFlag & Flags::QuestionMark)
1009                     return; // no valid separator -> no replacement
1010                 nFlag |= Flags::QuestionMark;
1011             }
1012             break;
1013             default:
1014                 if (nFlag == Flags::NONE)
1015                     return; // no valid separator -> no replacement
1016                 else
1017                     bContinue = false;
1018                 break;
1019         }
1020
1021         if (bContinue && pStr-- == pStart)
1022         {
1023             return; // no valid separator -> no replacement
1024         }
1025     } while (bContinue);
1026     if (Flags::FullStop != nFlag)
1027         pExceptStt = nullptr;
1028
1029     // Only capitalize, if string is long enough
1030     if( 2 > ( pStr - pStart ) )
1031         return;
1032
1033     if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
1034     {
1035         bool bValid = false, bAlphaFnd = false;
1036         const sal_Unicode* pTmpStr = pStr;
1037         while( !bValid )
1038         {
1039             if( rCC.isDigit( aText, pTmpStr - pStart ) )
1040             {
1041                 bValid = true;
1042                 pStr = pTmpStr - 1;
1043             }
1044             else if( rCC.isLetter( aText, pTmpStr - pStart ) )
1045             {
1046                 if( bAlphaFnd )
1047                 {
1048                     bValid = true;
1049                     pStr = pTmpStr;
1050                 }
1051                 else
1052                     bAlphaFnd = true;
1053             }
1054             else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
1055                 break;
1056
1057             if( pTmpStr == pStart )
1058                 break;
1059
1060             --pTmpStr;
1061         }
1062
1063         if( !bValid )
1064             return;       // no valid separator -> no replacement
1065     }
1066
1067     bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1068
1069     // Search for the beginning of the word
1070     while (!NonFieldWordDelim(*pStr))
1071     {
1072         if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1073             bNumericOnly = false;
1074
1075         if( pStart == pStr )
1076             break;
1077
1078         --pStr;
1079     }
1080
1081     if( bNumericOnly )      // consists of only numbers, then not
1082         return;
1083
1084     if (NonFieldWordDelim(*pStr))
1085         ++pStr;
1086
1087     OUString sWord;
1088
1089     // check on the basis of the exception list
1090     if( pExceptStt )
1091     {
1092         sWord = OUString(pStr, pExceptStt - pStr + 1);
1093         if( FindInCplSttExceptList(eLang, sWord) )
1094             return;
1095
1096         // Delete all non alphanumeric. Test the characters at the
1097         // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1098         OUString sTmp( sWord );
1099         while( !sTmp.isEmpty() &&
1100                 !rCC.isLetterNumeric( sTmp, 0 ) )
1101             sTmp = sTmp.copy(1);
1102
1103         // Remove all non alphanumeric characters towards the end up until
1104         // the last one.
1105         sal_Int32 nLen = sTmp.getLength();
1106         while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1107             --nLen;
1108         if( nLen + 1 < sTmp.getLength() )
1109             sTmp = sTmp.copy( 0, nLen + 1 );
1110
1111         if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1112             FindInCplSttExceptList(eLang, sTmp))
1113             return;
1114
1115         if(FindInCplSttExceptList(eLang, sWord, true))
1116             return;
1117     }
1118
1119     // Ok, then replace
1120     sal_Unicode cSave = *pWordStt;
1121     nSttPos = pWordStt - rTxt.getStr();
1122     OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
1123     bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1124
1125     // Perhaps someone wants to have the word
1126     if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
1127         rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
1128 }
1129
1130 // Correct accidental use of cAPS LOCK key
1131 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1132                                         sal_Int32 nSttPos, sal_Int32 nEndPos,
1133                                         LanguageType eLang )
1134 {
1135     if (nEndPos - nSttPos < 2)
1136         // string must be at least 2-character long.
1137         return false;
1138
1139     CharClass& rCC = GetCharClass( eLang );
1140
1141     // Check the first 2 letters.
1142     if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1143         return false;
1144
1145     if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1146         return false;
1147
1148     OUStringBuffer aConverted;
1149     aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
1150     aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
1151
1152     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1153     if (FindInWrdSttExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
1154         return false;
1155
1156     for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1157     {
1158         if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1159             // A lowercase letter disqualifies the whole text.
1160             return false;
1161
1162         if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1163             // Another uppercase letter.  Convert it.
1164             aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
1165         else
1166             // This is not an alphabetic letter.  Leave it as-is.
1167             aConverted.append( rTxt[i] );
1168     }
1169
1170     // Replace the word.
1171     rDoc.Delete(nSttPos, nEndPos);
1172     rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
1173
1174     return true;
1175 }
1176
1177
1178 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1179                                         LanguageType eLang ) const
1180 {
1181     sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1182                                     ? GetStartDoubleQuote()
1183                                     : GetStartSingleQuote() )
1184                                    : ( '\"' == cInsChar
1185                                     ? GetEndDoubleQuote()
1186                                     : GetEndSingleQuote() );
1187     if( !cRet )
1188     {
1189         // then through the Language find the right character
1190         if( LANGUAGE_NONE == eLang )
1191             cRet = cInsChar;
1192         else
1193         {
1194             LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1195             OUString sRet( bSttQuote
1196                             ? ( '\"' == cInsChar
1197                                 ? rLcl.getDoubleQuotationMarkStart()
1198                                 : rLcl.getQuotationMarkStart() )
1199                             : ( '\"' == cInsChar
1200                                 ? rLcl.getDoubleQuotationMarkEnd()
1201                                 : rLcl.getQuotationMarkEnd() ));
1202             cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1203         }
1204     }
1205     return cRet;
1206 }
1207
1208 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1209                                     sal_Unicode cInsChar, bool bSttQuote,
1210                                     bool bIns, LanguageType eLang, ACQuotes eType ) const
1211 {
1212     sal_Unicode cRet;
1213
1214     if ( eType == ACQuotes::DoubleAngleQuote )
1215     {
1216         bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
1217         // pressing " inside a quotation -> use second level angle quotes
1218         bool bLeftQuote = '\"' == cInsChar &&
1219                 // start position and Romanian OR
1220                 // not start position and Hungarian
1221                 bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
1222         cRet = ( '<' == cInsChar || bLeftQuote )
1223                 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
1224                 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
1225     }
1226     else if ( eType == ACQuotes::UseApostrophe )
1227         cRet = cApostrophe;
1228     else
1229         cRet = GetQuote( cInsChar, bSttQuote, eLang );
1230
1231     OUString sChg( cInsChar );
1232     if( bIns )
1233         rDoc.Insert( nInsPos, sChg );
1234     else
1235         rDoc.Replace( nInsPos, sChg );
1236
1237     sChg = OUString(cRet);
1238
1239     if( eType == ACQuotes::NonBreakingSpace )
1240     {
1241         if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
1242         {
1243             if( !bSttQuote )
1244                 ++nInsPos;
1245         }
1246     }
1247     else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
1248     {
1249         rDoc.Delete( nInsPos-1, nInsPos);
1250         --nInsPos;
1251     }
1252
1253     rDoc.Replace( nInsPos, sChg );
1254
1255     // i' -> I' in English (last step for the Undo)
1256     if( eType == ACQuotes::CapitalizeIAm )
1257         rDoc.Replace( nInsPos-1, "I" );
1258 }
1259
1260 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
1261                                 sal_Unicode cInsChar, bool bSttQuote )
1262 {
1263     const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1264     sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1265
1266     OUString sRet(cRet);
1267
1268     if( '\"' == cInsChar )
1269     {
1270         if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1271         {
1272             if( bSttQuote )
1273                 sRet += " ";
1274             else
1275                 sRet = " " + sRet;
1276         }
1277     }
1278     return sRet;
1279 }
1280
1281 // search preceding opening quote in the paragraph before the insert position
1282 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
1283                 const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars )
1284 {
1285     sal_Unicode cTmpChar;
1286
1287     do {
1288         cTmpChar = rTxt[ --nPos ];
1289         if ( cTmpChar == sPrecedingChar )
1290             return true;
1291
1292         for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
1293             if ( cTmpChar == *pCh )
1294                 return false;
1295
1296     } while ( nPos > 0 );
1297
1298     return false;
1299 }
1300
1301 // WARNING: rText may become invalid, see comment below
1302 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1303                                     sal_Int32 nInsPos, sal_Unicode cChar,
1304                                     bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
1305 {
1306     bool bIsNextRun = io_bNbspRunNext;
1307     io_bNbspRunNext = false;  // if it was set, then it has to be turned off
1308
1309     do{                                 // only for middle check loop !!
1310         if( cChar )
1311         {
1312             // Prevent double space
1313             if( nInsPos && ' ' == cChar &&
1314                 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
1315                 ' ' == rTxt[ nInsPos - 1 ])
1316             {
1317                 break;
1318             }
1319
1320             bool bSingle = '\'' == cChar;
1321             bool bIsReplaceQuote =
1322                         (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
1323                         (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
1324             if( bIsReplaceQuote )
1325             {
1326                 bool bSttQuote = !nInsPos;
1327                 ACQuotes eType = ACQuotes::NONE;
1328                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1329                 if (!bSttQuote)
1330                 {
1331                     sal_Unicode cPrev = rTxt[ nInsPos-1 ];
1332                     bSttQuote = NonFieldWordDelim(cPrev) ||
1333                         lcl_IsInAsciiArr( "([{", cPrev ) ||
1334                         ( cEmDash == cPrev ) ||
1335                         ( cEnDash == cPrev );
1336                     // tdf#38394 use opening quotation mark << in French l'<<word>>
1337                     if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
1338                         primary(eLang) == primary(LANGUAGE_FRENCH) &&
1339                         ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
1340                                // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1341                                OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
1342                           ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
1343                                // abbreviated form of que
1344                                ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
1345                                ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
1346                     {
1347                         bSttQuote = true;
1348                     }
1349                     // tdf#108423 for capitalization of English i'm
1350                     else if ( bSingle && ( cPrev == 'i' ) &&
1351                         primary(eLang) == primary(LANGUAGE_ENGLISH) &&
1352                         ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
1353                     {
1354                         eType = ACQuotes::CapitalizeIAm;
1355                     }
1356                     // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1357                     else if ( !bSingle && nInsPos &&
1358                         ( ( eLang == LANGUAGE_HUNGARIAN &&
1359                             lcl_HasPrecedingChar( rTxt, nInsPos,
1360                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
1361                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) ) ||
1362                           ( eLang.anyOf(
1363                                 LANGUAGE_ROMANIAN,
1364                                 LANGUAGE_ROMANIAN_MOLDOVA ) &&
1365                             lcl_HasPrecedingChar( rTxt, nInsPos,
1366                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
1367                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 1 ) ) ) )
1368                     {
1369                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1370                         // only if the opening double quotation mark is the default one
1371                         if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
1372                             eType = ACQuotes::DoubleAngleQuote;
1373                     }
1374                     else if ( bSingle && nInsPos && !bSttQuote &&
1375                         // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1376                         // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1377                         // tdf#123786 the same for Russian and Ukrainian
1378                         ( ( eLang.anyOf (
1379                                  LANGUAGE_CZECH,
1380                                  LANGUAGE_GERMAN,
1381                                  LANGUAGE_GERMAN_SWISS,
1382                                  LANGUAGE_GERMAN_AUSTRIAN,
1383                                  LANGUAGE_GERMAN_LUXEMBOURG,
1384                                  LANGUAGE_GERMAN_LIECHTENSTEIN,
1385                                  LANGUAGE_ICELANDIC,
1386                                  LANGUAGE_SLOVAK,
1387                                  LANGUAGE_SLOVENIAN ) &&
1388                             !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEnd[0],  aStopSingleQuoteEnd + 1 ) ) ||
1389                           ( eLang.anyOf (
1390                                  LANGUAGE_RUSSIAN,
1391                                  LANGUAGE_UKRAINIAN ) &&
1392                             !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0],  aStopSingleQuoteEndRuUa + 1 ) ) ) )
1393                     {
1394                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1395                         CharClass& rCC = GetCharClass( eLang );
1396                         if ( ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd[0]) ||
1397                              rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) ) &&
1398                              // use apostrophe only after letters, not after digits or punctuation
1399                              rCC.isLetter(rTxt, nInsPos-1) )
1400                         {
1401                             eType = ACQuotes::UseApostrophe;
1402                         }
1403                     }
1404                 }
1405
1406                 if ( eType == ACQuotes::NONE && !bSingle &&
1407                     ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
1408                     eType = ACQuotes::NonBreakingSpace;
1409
1410                 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
1411                 break;
1412             }
1413             // tdf#133524 change "<<" and ">>" to double angle quotation marks
1414             else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
1415                 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
1416                 ('<' == cChar || '>' == cChar) &&
1417                 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
1418             {
1419                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1420                 if ( eLang.anyOf(
1421                         LANGUAGE_CATALAN,              // primary level
1422                         LANGUAGE_CATALAN_VALENCIAN,    // primary level
1423                         LANGUAGE_FINNISH,              // alternative primary level
1424                         LANGUAGE_FRENCH_SWISS,         // second level
1425                         LANGUAGE_GALICIAN,             // primary level
1426                         LANGUAGE_HUNGARIAN,            // second level
1427                         LANGUAGE_POLISH,               // second level
1428                         LANGUAGE_PORTUGUESE,           // primary level
1429                         LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
1430                         LANGUAGE_ROMANIAN,             // second level
1431                         LANGUAGE_ROMANIAN_MOLDOVA,     // second level
1432                         LANGUAGE_SWEDISH,              // alternative primary level
1433                         LANGUAGE_SWEDISH_FINLAND,      // alternative primary level
1434                         LANGUAGE_UKRAINIAN,            // primary level
1435                         LANGUAGE_USER_ARAGONESE,       // primary level
1436                         LANGUAGE_USER_ASTURIAN ) ||    // primary level
1437                     primary(eLang) == primary(LANGUAGE_GERMAN) ||  // alternative primary level
1438                     primary(eLang) == primary(LANGUAGE_SPANISH) )  // primary level
1439                 {
1440                     InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
1441                     break;
1442                 }
1443             }
1444
1445             if( bInsert )
1446                 rDoc.Insert( nInsPos, OUString(cChar) );
1447             else
1448                 rDoc.Replace( nInsPos, OUString(cChar) );
1449
1450             // Hardspaces autocorrection
1451             if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
1452             {
1453                 if ( NeedsHardspaceAutocorr( cChar ) &&
1454                     FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) )
1455                 {
1456                     ;
1457                 }
1458                 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1459                 {
1460                     // Remove the NBSP if it wasn't an autocorrection
1461                     if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1462                             cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1463                     {
1464                         // Look for the last HARD_SPACE
1465                         sal_Int32 nPos = nInsPos - 1;
1466                         bool bContinue = true;
1467                         while ( bContinue )
1468                         {
1469                             const sal_Unicode cTmpChar = rTxt[ nPos ];
1470                             if ( cTmpChar == cNonBreakingSpace )
1471                             {
1472                                 rDoc.Delete( nPos, nPos + 1 );
1473                                 bContinue = false;
1474                             }
1475                             else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1476                                 bContinue = false;
1477                             nPos--;
1478                         }
1479                     }
1480                 }
1481             }
1482         }
1483
1484         if( !nInsPos )
1485             break;
1486
1487         sal_Int32 nPos = nInsPos - 1;
1488
1489         if( IsWordDelim( rTxt[ nPos ]))
1490             break;
1491
1492         // Set bold or underline automatically?
1493         if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
1494         {
1495             if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
1496             {
1497                 FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
1498             }
1499             break;
1500         }
1501
1502         while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1503             ;
1504
1505         // Found a Paragraph-start or a Blank, search for the word shortcut in
1506         // auto.
1507         sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1508         if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1509             --nCapLttrPos;          // begin of paragraph and no blank
1510
1511         const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1512         CharClass& rCC = GetCharClass( eLang );
1513
1514         // no symbol characters
1515         if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1516             break;
1517
1518         if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
1519             // tdf#134940 fix regression of arrow "-->" resulted by premature
1520             // replacement of "--" since '>' was added to IsAutoCorrectChar()
1521             '>' != cChar )
1522         {
1523             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1524             // and becomes INVALID if ChgAutoCorrWord returns true!
1525             // => use aPara/pPara to create a valid copy of the string!
1526             OUString aPara;
1527             OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
1528
1529             bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1530                                                     *this, pPara );
1531             if( !bChgWord )
1532             {
1533                 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1534                 while( nCapLttrPos1 < nInsPos &&
1535                         lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1536                         )
1537                         ++nCapLttrPos1;
1538                 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1539                         lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1540                         )
1541                         --nInsPos1;
1542
1543                 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1544                     nCapLttrPos1 < nInsPos1 &&
1545                     rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1546                 {
1547                     bChgWord = true;
1548                     nCapLttrPos = nCapLttrPos1;
1549                 }
1550             }
1551
1552             if( bChgWord )
1553             {
1554                 if( !aPara.isEmpty() )
1555                 {
1556                     sal_Int32 nEnd = nCapLttrPos;
1557                     while( nEnd < aPara.getLength() &&
1558                             !IsWordDelim( aPara[ nEnd ]))
1559                         ++nEnd;
1560
1561                     // Capital letter at beginning of paragraph?
1562                     if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1563                     {
1564                         FnCapitalStartSentence( rDoc, aPara, false,
1565                                                 nCapLttrPos, nEnd, eLang );
1566                     }
1567
1568                     if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1569                     {
1570                         FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
1571                     }
1572                 }
1573                 break;
1574             }
1575         }
1576
1577         if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
1578         {
1579             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1580             // and becomes INVALID if TransliterateRTLWord returns true!
1581             if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
1582                 break;
1583         }
1584
1585         if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
1586                 (nInsPos >= 2 ) &&       // fdo#69762 avoid autocorrect for 2e-3
1587                 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1588                 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1589             ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
1590                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1591                 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1592             ;
1593         else
1594         {
1595             bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1596             bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1597
1598             if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
1599                  FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1600             {
1601                 // Correct accidental use of cAPS LOCK key (do this only when
1602                 // the caps or shift lock key is pressed). Turn off the caps
1603                 // lock afterwards.
1604                 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1605             }
1606
1607             // Capital letter at beginning of paragraph ?
1608             if( !bUnsupported &&
1609                 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1610             {
1611                 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
1612             }
1613
1614             // Two capital letters at beginning of word ??
1615             if( !bUnsupported &&
1616                 IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
1617             {
1618                 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1619             }
1620
1621             if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1622             {
1623                 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1624             }
1625         }
1626
1627     } while( false );
1628 }
1629
1630 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
1631                                                         LanguageType eLang )
1632 {
1633     LanguageTag aLanguageTag( eLang);
1634     if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
1635         (void)CreateLanguageFile(aLanguageTag);
1636     return *(m_aLangTable.find(aLanguageTag)->second);
1637 }
1638
1639 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1640 {
1641     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1642     if (iter != m_aLangTable.end() && iter->second)
1643         iter->second->SaveCplSttExceptList();
1644     else
1645     {
1646         SAL_WARN("editeng", "Save an empty list? ");
1647     }
1648 }
1649
1650 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang)
1651 {
1652     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1653     if (iter != m_aLangTable.end() && iter->second)
1654         iter->second->SaveWrdSttExceptList();
1655     else
1656     {
1657         SAL_WARN("editeng", "Save an empty list? ");
1658     }
1659 }
1660
1661 // Adds a single word. The list will immediately be written to the file!
1662 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1663                                         LanguageType eLang )
1664 {
1665     SvxAutoCorrectLanguageLists* pLists = nullptr;
1666     // either the right language is present or it will be this in the general list
1667     auto iter = m_aLangTable.find(LanguageTag(eLang));
1668     if (iter != m_aLangTable.end())
1669         pLists = iter->second.get();
1670     else
1671     {
1672         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1673         iter = m_aLangTable.find(aLangTagUndetermined);
1674         if (iter != m_aLangTable.end())
1675             pLists = iter->second.get();
1676         else if(CreateLanguageFile(aLangTagUndetermined))
1677             pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1678     }
1679     OSL_ENSURE(pLists, "No auto correction data");
1680     return pLists && pLists->AddToCplSttExceptList(rNew);
1681 }
1682
1683 // Adds a single word. The list will immediately be written to the file!
1684 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew,
1685                                          LanguageType eLang )
1686 {
1687     SvxAutoCorrectLanguageLists* pLists = nullptr;
1688     //either the right language is present or it is set in the general list
1689     auto iter = m_aLangTable.find(LanguageTag(eLang));
1690     if (iter != m_aLangTable.end())
1691         pLists = iter->second.get();
1692     else
1693     {
1694         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1695         iter = m_aLangTable.find(aLangTagUndetermined);
1696         if (iter != m_aLangTable.end())
1697             pLists = iter->second.get();
1698         else if(CreateLanguageFile(aLangTagUndetermined))
1699             pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1700     }
1701     OSL_ENSURE(pLists, "No auto correction file!");
1702     return pLists && pLists->AddToWrdSttExceptList(rNew);
1703 }
1704
1705 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
1706                                              sal_Int32 nPos)
1707 {
1708     OUString sRet;
1709     if( !nPos )
1710         return sRet;
1711
1712     sal_Int32 nEnd = nPos;
1713
1714     // it must be followed by a blank or tab!
1715     if( ( nPos < rTxt.getLength() &&
1716         !IsWordDelim( rTxt[ nPos ])) ||
1717         IsWordDelim( rTxt[ --nPos ]))
1718         return sRet;
1719
1720     while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1721         ;
1722
1723     // Found a Paragraph-start or a Blank, search for the word shortcut in
1724     // auto.
1725     sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1726     if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1727         --nCapLttrPos;          // Beginning of paragraph and no Blank!
1728
1729     while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1730         if( ++nCapLttrPos >= nEnd )
1731             return sRet;
1732
1733     if( 3 > nEnd - nCapLttrPos )
1734         return sRet;
1735
1736     const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1737
1738     CharClass& rCC = GetCharClass(eLang);
1739
1740     if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
1741         return sRet;
1742
1743     sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
1744     return sRet;
1745 }
1746
1747 // static
1748 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(const OUString& rTxt,
1749                                                           const sal_Int32 nPos)
1750 {
1751     constexpr sal_Int32 nMinLen = 3;
1752     constexpr sal_Int32 nMaxLen = 9;
1753     std::vector<OUString> aRes;
1754     if (nPos >= nMinLen)
1755     {
1756         sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
1757         // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1758         if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
1759         {
1760             while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
1761                 ++nBegin;
1762         }
1763         if (nBegin + nMinLen <= nPos)
1764         {
1765             OUString sRes = rTxt.copy(nBegin, nPos - nBegin);
1766             aRes.push_back(sRes);
1767             bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
1768             for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
1769             {
1770                 bool bAdd = bLastStartedWithDelim;
1771                 bLastStartedWithDelim = IsWordDelim(sRes[i]);
1772                 bAdd = bAdd || bLastStartedWithDelim;
1773                 if (bAdd)
1774                     aRes.push_back(sRes.copy(i));
1775             }
1776         }
1777     }
1778     return aRes;
1779 }
1780
1781 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1782 {
1783     OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
1784
1785     OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
1786     OUString sShareDirFile( sUserDirFile );
1787
1788     SvxAutoCorrectLanguageLists* pLists = nullptr;
1789
1790     tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1791
1792     auto nFndPos = aLastFileTable.find(rLanguageTag);
1793     if(nFndPos != aLastFileTable.end() &&
1794        (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1795        nAktTime - nLastCheckTime < nMinTime)
1796     {
1797         // no need to test the file, because the last check is not older then
1798         // 2 minutes.
1799         if( bNewFile )
1800         {
1801             sShareDirFile = sUserDirFile;
1802             pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1803             LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1804             m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1805             aLastFileTable.erase(nFndPos);
1806         }
1807     }
1808     else if(
1809              ( FStatHelper::IsDocument( sUserDirFile ) ||
1810                FStatHelper::IsDocument( sShareDirFile =
1811                    GetAutoCorrFileName( rLanguageTag ) ) ||
1812                FStatHelper::IsDocument( sShareDirFile =
1813                    GetAutoCorrFileName( rLanguageTag, false, false, true) )
1814              ) ||
1815         ( sShareDirFile = sUserDirFile, bNewFile )
1816           )
1817     {
1818         pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1819         LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1820         m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1821         if (nFndPos != aLastFileTable.end())
1822             aLastFileTable.erase(nFndPos);
1823     }
1824     else if( !bNewFile )
1825     {
1826         aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1827     }
1828     return pLists != nullptr;
1829 }
1830
1831 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1832                                 LanguageType eLang )
1833 {
1834     LanguageTag aLanguageTag( eLang);
1835     auto const iter = m_aLangTable.find(aLanguageTag);
1836     if (iter != m_aLangTable.end())
1837         return iter->second->PutText(rShort, rLong);
1838     if(CreateLanguageFile(aLanguageTag))
1839         return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong);
1840     return false;
1841 }
1842
1843 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1844                                               std::vector<SvxAutocorrWord>& aDeleteEntries,
1845                                               LanguageType eLang )
1846 {
1847     LanguageTag aLanguageTag( eLang);
1848     auto const iter = m_aLangTable.find(aLanguageTag);
1849     if (iter != m_aLangTable.end())
1850     {
1851         iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1852     }
1853     else if(CreateLanguageFile( aLanguageTag ))
1854     {
1855         m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1856     }
1857 }
1858
1859 //  - return the replacement text (only for SWG-Format, all other
1860 //    can be taken from the word list!)
1861 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1862 {
1863     return false;
1864 }
1865
1866 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1867 {
1868 }
1869
1870 // Text with attribution (only the SWG - SWG format!)
1871 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
1872                               const OUString&, const OUString&, SfxObjectShell&, OUString& )
1873 {
1874     return false;
1875 }
1876
1877 OUString EncryptBlockName_Imp(const OUString& rName)
1878 {
1879     OUStringBuffer aName;
1880     aName.append('#').append(rName);
1881     for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos)
1882     {
1883         if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos]))
1884             aName[nPos] &= 0x0f;
1885     }
1886     return aName.makeStringAndClear();
1887 }
1888
1889 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1890 static void GeneratePackageName ( const OUString& rShort, OUString& rPackageName )
1891 {
1892     OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
1893     OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
1894
1895     for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
1896     {
1897         switch (aBuf[nPos])
1898         {
1899             case '!':
1900             case '/':
1901             case ':':
1902             case '.':
1903             case '\\':
1904                 aBuf[nPos] = '_';
1905                 break;
1906             default:
1907                 break;
1908         }
1909     }
1910
1911     rPackageName = aBuf.makeStringAndClear();
1912 }
1913
1914 static const SvxAutocorrWord* lcl_SearchWordsInList(
1915                 SvxAutoCorrectLanguageLists* pList, const OUString& rTxt,
1916                 sal_Int32& rStt, sal_Int32 nEndPos)
1917 {
1918     const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
1919     return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
1920 }
1921
1922 // the search for the words in the substitution table
1923 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
1924                 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
1925                 SvxAutoCorrDoc&, LanguageTag& rLang )
1926 {
1927     const SvxAutocorrWord* pRet = nullptr;
1928     LanguageTag aLanguageTag( rLang);
1929     if( aLanguageTag.isSystemLocale() )
1930         aLanguageTag.reset( MsLangId::getSystemLanguage());
1931
1932     /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
1933      * list instead? */
1934
1935     // First search for eLang, then US-English -> English
1936     // and last in LANGUAGE_UNDETERMINED
1937     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1938     {
1939         //the language is available - so bring it on
1940         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1941         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1942         if( pRet )
1943         {
1944             rLang = aLanguageTag;
1945             return pRet;
1946         }
1947         else
1948             return nullptr;
1949     }
1950
1951     // If it still could not be found here, then keep on searching
1952     LanguageType eLang = aLanguageTag.getLanguageType();
1953     // the primary language for example EN
1954     aLanguageTag.reset(aLanguageTag.getLanguage());
1955     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
1956     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
1957                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
1958                  CreateLanguageFile(aLanguageTag, false)))
1959     {
1960         //the language is available - so bring it on
1961         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1962         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1963         if( pRet )
1964         {
1965             rLang = aLanguageTag;
1966             return pRet;
1967         }
1968     }
1969
1970     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
1971             CreateLanguageFile(aLanguageTag, false))
1972     {
1973         //the language is available - so bring it on
1974         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1975         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1976         if( pRet )
1977         {
1978             rLang = aLanguageTag;
1979             return pRet;
1980         }
1981     }
1982     return nullptr;
1983 }
1984
1985 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang,
1986                                              const OUString& sWord )
1987 {
1988     LanguageTag aLanguageTag( eLang);
1989
1990     /* TODO-BCP47: again horrible ugliness */
1991
1992     // First search for eLang, then primary language of eLang
1993     // and last in LANGUAGE_UNDETERMINED
1994
1995     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1996     {
1997         //the language is available - so bring it on
1998         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
1999         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2000             return true;
2001     }
2002
2003     // If it still could not be found here, then keep on searching
2004     // the primary language for example EN
2005     aLanguageTag.reset(aLanguageTag.getLanguage());
2006     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2007     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2008                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2009                  CreateLanguageFile(aLanguageTag, false)))
2010     {
2011         //the language is available - so bring it on
2012         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
2013         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2014             return true;
2015     }
2016
2017     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2018             CreateLanguageFile(aLanguageTag, false))
2019     {
2020         //the language is available - so bring it on
2021         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
2022         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2023             return true;
2024     }
2025     return false;
2026 }
2027
2028 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
2029 {
2030     SvStringsISortDtor::const_iterator it = pList->find( "~" );
2031     SvStringsISortDtor::size_type nPos = it - pList->begin();
2032     if( nPos < pList->size() )
2033     {
2034         OUString sLowerWord(sWord.toAsciiLowerCase());
2035         OUString sAbr;
2036         for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
2037         {
2038             sAbr = (*pList)[ n ];
2039             if (sAbr[0] != '~')
2040                 break;
2041             // ~ and ~. are not allowed!
2042             if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
2043             {
2044                 OUString sLowerAbk(sAbr.toAsciiLowerCase());
2045                 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
2046                 {
2047                     if( !--i )      // agrees
2048                         return true;
2049
2050                     if( sLowerAbk[i] != sLowerWord[--ii])
2051                         break;
2052                 }
2053             }
2054         }
2055     }
2056     OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
2057             "Wrongly sorted exception list?" );
2058     return false;
2059 }
2060
2061 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
2062                                 const OUString& sWord, bool bAbbreviation)
2063 {
2064     LanguageTag aLanguageTag( eLang);
2065
2066     /* TODO-BCP47: did I mention terrible horrible ugliness? */
2067
2068     // First search for eLang, then primary language of eLang
2069     // and last in LANGUAGE_UNDETERMINED
2070
2071     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2072     {
2073         //the language is available - so bring it on
2074         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2075         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2076             return true;
2077     }
2078
2079     // If it still could not be found here, then keep on searching
2080     // the primary language for example EN
2081     aLanguageTag.reset(aLanguageTag.getLanguage());
2082     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2083     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2084                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2085                  CreateLanguageFile(aLanguageTag, false)))
2086     {
2087         //the language is available - so bring it on
2088         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2089         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2090             return true;
2091     }
2092
2093     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2094             CreateLanguageFile(aLanguageTag, false))
2095     {
2096         //the language is available - so bring it on
2097         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2098         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2099             return true;
2100     }
2101     return false;
2102 }
2103
2104 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
2105                                             bool bNewFile, bool bTst, bool bUnlocalized ) const
2106 {
2107     OUString sRet, sExt( rLanguageTag.getBcp47() );
2108     if (bUnlocalized)
2109     {
2110         // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2111         std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
2112         if (!vecFallBackStrings.empty())
2113            sExt = vecFallBackStrings[0];
2114     }
2115
2116     sExt = "_" + sExt + ".dat";
2117     if( bNewFile )
2118         sRet = sUserAutoCorrFile + sExt;
2119     else if( !bTst )
2120         sRet = sShareAutoCorrFile + sExt;
2121     else
2122     {
2123         // test first in the user directory - if not exist, then
2124         sRet = sUserAutoCorrFile + sExt;
2125         if( !FStatHelper::IsDocument( sRet ))
2126             sRet = sShareAutoCorrFile + sExt;
2127     }
2128     return sRet;
2129 }
2130
2131 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2132                 SvxAutoCorrect& rParent,
2133                 const OUString& rShareAutoCorrectFile,
2134                 const OUString& rUserAutoCorrectFile)
2135 :   sShareAutoCorrFile( rShareAutoCorrectFile ),
2136     sUserAutoCorrFile( rUserAutoCorrectFile ),
2137     aModifiedDate( Date::EMPTY ),
2138     aModifiedTime( tools::Time::EMPTY ),
2139     aLastCheckTime( tools::Time::EMPTY ),
2140     rAutoCorrect(rParent),
2141     nFlags(ACFlags::NONE)
2142 {
2143 }
2144
2145 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2146 {
2147 }
2148
2149 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2150 {
2151     // Access the file system only every 2 minutes to check the date stamp
2152     bool bRet = false;
2153
2154     tools::Time nMinTime( 0, 2 );
2155     tools::Time nAktTime( tools::Time::SYSTEM );
2156     if( aLastCheckTime <= nAktTime) // overflow?
2157         return false;
2158     nAktTime -= aLastCheckTime;
2159     if( nAktTime > nMinTime )     // min time past
2160     {
2161         Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
2162         if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2163                                             &aTstDate, &aTstTime ) &&
2164             ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
2165         {
2166             bRet = true;
2167             // then remove all the lists fast!
2168             if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
2169             {
2170                 pCplStt_ExcptLst.reset();
2171             }
2172             if( (ACFlags::WrdSttLstLoad & nFlags) && pWrdStt_ExcptLst )
2173             {
2174                 pWrdStt_ExcptLst.reset();
2175             }
2176             if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
2177             {
2178                 pAutocorr_List.reset();
2179             }
2180             nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WrdSttLstLoad | ACFlags::ChgWordLstLoad );
2181         }
2182         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2183     }
2184     return bRet;
2185 }
2186
2187 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2188                                         std::unique_ptr<SvStringsISortDtor>& rpLst,
2189                                         const char* pStrmName,
2190                                         tools::SvRef<SotStorage>& rStg)
2191 {
2192     if( rpLst )
2193         rpLst->clear();
2194     else
2195         rpLst.reset( new SvStringsISortDtor );
2196
2197     {
2198         const OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 );
2199
2200         if( rStg.is() && rStg->IsStream( sStrmName ) )
2201         {
2202             tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2203                 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2204             if( ERRCODE_NONE != xStrm->GetError())
2205             {
2206                 xStrm.clear();
2207                 rStg.clear();
2208                 RemoveStream_Imp( sStrmName );
2209             }
2210             else
2211             {
2212                 uno::Reference< uno::XComponentContext > xContext =
2213                     comphelper::getProcessComponentContext();
2214
2215                 xml::sax::InputSource aParserInput;
2216                 aParserInput.sSystemId = sStrmName;
2217
2218                 xStrm->Seek( 0 );
2219                 xStrm->SetBufferSize( 8 * 1024 );
2220                 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2221
2222                 // get filter
2223                 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2224
2225                 // connect parser and filter
2226                 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2227                 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2228                 xParser->setFastDocumentHandler( xFilter );
2229                 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2230                 xParser->setTokenHandler( xTokenHandler );
2231
2232                 // parse
2233                 try
2234                 {
2235                     xParser->parseStream( aParserInput );
2236                 }
2237                 catch( const xml::sax::SAXParseException& )
2238                 {
2239                     // re throw ?
2240                 }
2241                 catch( const xml::sax::SAXException& )
2242                 {
2243                     // re throw ?
2244                 }
2245                 catch( const io::IOException& )
2246                 {
2247                     // re throw ?
2248                 }
2249             }
2250         }
2251
2252         // Set time stamp
2253         FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2254                                         &aModifiedDate, &aModifiedTime );
2255         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2256     }
2257
2258 }
2259
2260 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2261                             const SvStringsISortDtor& rLst,
2262                             const char* pStrmName,
2263                             tools::SvRef<SotStorage> const &rStg,
2264                             bool bConvert )
2265 {
2266     if( !rStg.is() )
2267         return;
2268
2269     OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 );
2270     if( rLst.empty() )
2271     {
2272         rStg->Remove( sStrmName );
2273         rStg->Commit();
2274     }
2275     else
2276     {
2277         tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2278                 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2279         if( xStrm.is() )
2280         {
2281             xStrm->SetSize( 0 );
2282             xStrm->SetBufferSize( 8192 );
2283             xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2284
2285
2286             uno::Reference< uno::XComponentContext > xContext =
2287                 comphelper::getProcessComponentContext();
2288
2289             uno::Reference < xml::sax::XWriter > xWriter  = xml::sax::Writer::create(xContext);
2290             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2291             xWriter->setOutputStream(xOut);
2292
2293             uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2294             rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
2295
2296             xExp->exportDoc( XML_BLOCK_LIST );
2297
2298             xStrm->Commit();
2299             if( xStrm->GetError() == ERRCODE_NONE )
2300             {
2301                 xStrm.clear();
2302                 if (!bConvert)
2303                 {
2304                     rStg->Commit();
2305                     if( ERRCODE_NONE != rStg->GetError() )
2306                     {
2307                         rStg->Remove( sStrmName );
2308                         rStg->Commit();
2309                     }
2310                 }
2311             }
2312         }
2313     }
2314 }
2315
2316 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2317 {
2318     if( pAutocorr_List )
2319         pAutocorr_List->DeleteAndDestroyAll();
2320     else
2321         pAutocorr_List.reset( new SvxAutocorrWordList() );
2322
2323     try
2324     {
2325         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2326         uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
2327         uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2328
2329         xml::sax::InputSource aParserInput;
2330         aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
2331         aParserInput.aInputStream = xStrm->getInputStream();
2332
2333         // get parser
2334         uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2335         SAL_INFO("editeng", "AutoCorrect Import" );
2336         uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
2337         uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2338
2339         // connect parser and filter
2340         xParser->setFastDocumentHandler( xFilter );
2341         xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2342         xParser->setTokenHandler(xTokenHandler);
2343
2344         // parse
2345         xParser->parseStream( aParserInput );
2346     }
2347     catch ( const uno::Exception& )
2348     {
2349         TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
2350     }
2351
2352     // Set time stamp
2353     FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2354                                     &aModifiedDate, &aModifiedTime );
2355     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2356
2357     return pAutocorr_List.get();
2358 }
2359
2360 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2361 {
2362     if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2363     {
2364         LoadAutocorrWordList();
2365         if( !pAutocorr_List )
2366         {
2367             OSL_ENSURE( false, "No valid list" );
2368             pAutocorr_List.reset( new SvxAutocorrWordList() );
2369         }
2370         nFlags |= ACFlags::ChgWordLstLoad;
2371     }
2372     return pAutocorr_List.get();
2373 }
2374
2375 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2376 {
2377     if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2378     {
2379         LoadCplSttExceptList();
2380         if( !pCplStt_ExcptLst )
2381         {
2382             OSL_ENSURE( false, "No valid list" );
2383             pCplStt_ExcptLst.reset( new SvStringsISortDtor );
2384         }
2385         nFlags |= ACFlags::CplSttLstLoad;
2386     }
2387     return pCplStt_ExcptLst.get();
2388 }
2389
2390 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2391 {
2392     bool bRet = false;
2393     if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2394     {
2395         MakeUserStorage_Impl();
2396         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2397
2398         SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2399
2400         xStg = nullptr;
2401         // Set time stamp
2402         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2403                                             &aModifiedDate, &aModifiedTime );
2404         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2405         bRet = true;
2406     }
2407     return bRet;
2408 }
2409
2410 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew)
2411 {
2412     bool bRet = false;
2413     SvStringsISortDtor* pExceptList = LoadWrdSttExceptList();
2414     if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second )
2415     {
2416         MakeUserStorage_Impl();
2417         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2418
2419         SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2420
2421         xStg = nullptr;
2422         // Set time stamp
2423         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2424                                             &aModifiedDate, &aModifiedTime );
2425         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2426         bRet = true;
2427     }
2428     return bRet;
2429 }
2430
2431 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2432 {
2433     try
2434     {
2435         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2436         if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2437             LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2438     }
2439     catch (const css::ucb::ContentCreationException&)
2440     {
2441     }
2442     return pCplStt_ExcptLst.get();
2443 }
2444
2445 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2446 {
2447     MakeUserStorage_Impl();
2448     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2449
2450     SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2451
2452     xStg = nullptr;
2453
2454     // Set time stamp
2455     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2456                                             &aModifiedDate, &aModifiedTime );
2457     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2458 }
2459
2460 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList()
2461 {
2462     try
2463     {
2464         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2465         if( xStg.is() && xStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) )
2466             LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2467     }
2468     catch (const css::ucb::ContentCreationException &)
2469     {
2470         TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList");
2471     }
2472     return pWrdStt_ExcptLst.get();
2473 }
2474
2475 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList()
2476 {
2477     MakeUserStorage_Impl();
2478     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2479
2480     SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2481
2482     xStg = nullptr;
2483     // Set time stamp
2484     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2485                                             &aModifiedDate, &aModifiedTime );
2486     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2487 }
2488
2489 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList()
2490 {
2491     if( !( ACFlags::WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2492     {
2493         LoadWrdSttExceptList();
2494         if( !pWrdStt_ExcptLst )
2495         {
2496             OSL_ENSURE( false, "No valid list" );
2497             pWrdStt_ExcptLst.reset( new SvStringsISortDtor );
2498         }
2499         nFlags |= ACFlags::WrdSttLstLoad;
2500     }
2501     return pWrdStt_ExcptLst.get();
2502 }
2503
2504 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2505 {
2506     if( sShareAutoCorrFile != sUserAutoCorrFile )
2507     {
2508         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2509         if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
2510             xStg->IsStream( rName ) )
2511         {
2512             xStg->Remove( rName );
2513             xStg->Commit();
2514
2515             xStg = nullptr;
2516         }
2517     }
2518 }
2519
2520 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2521 {
2522     // The conversion needs to happen if the file is already in the user
2523     // directory and is in the old format. Additionally it needs to
2524     // happen when the file is being copied from share to user.
2525
2526     bool bError = false, bConvert = false, bCopy = false;
2527     INetURLObject aDest;
2528     INetURLObject aSource;
2529
2530     if (sUserAutoCorrFile != sShareAutoCorrFile )
2531     {
2532         aSource = INetURLObject ( sShareAutoCorrFile );
2533         aDest = INetURLObject ( sUserAutoCorrFile );
2534         if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2535         {
2536             aDest.SetExtension ( "bak" );
2537             bConvert = true;
2538         }
2539         bCopy = true;
2540     }
2541     else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2542     {
2543         aSource = INetURLObject ( sUserAutoCorrFile );
2544         aDest = INetURLObject ( sUserAutoCorrFile );
2545         aDest.SetExtension ( "bak" );
2546         bCopy = bConvert = true;
2547     }
2548     if (bCopy)
2549     {
2550         try
2551         {
2552             OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
2553             sal_Int32 nSlashPos = sMain.lastIndexOf('/');
2554             sMain = sMain.copy(0, nSlashPos);
2555             ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2556             TransferInfo aInfo;
2557             aInfo.NameClash = NameClash::OVERWRITE;
2558             aInfo.NewTitle = aDest.GetLastName();
2559             aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
2560             aInfo.MoveData  = false;
2561             aNewContent.executeCommand( "transfer", Any(aInfo));
2562         }
2563         catch (...)
2564         {
2565             bError = true;
2566         }
2567     }
2568     if (bConvert && !bError)
2569     {
2570         tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
2571         tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
2572
2573         if( xSrcStg.is() && xDstStg.is() )
2574         {
2575             std::unique_ptr<SvStringsISortDtor> pTmpWordList;
2576
2577             if (xSrcStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) )
2578                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg );
2579
2580             if (pTmpWordList)
2581             {
2582                 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true );
2583                 pTmpWordList.reset();
2584             }
2585
2586
2587             if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2588                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2589
2590             if (pTmpWordList)
2591             {
2592                 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2593                 pTmpWordList->clear();
2594             }
2595
2596             GetAutocorrWordList();
2597             MakeBlocklist_Imp( *xDstStg );
2598             sShareAutoCorrFile = sUserAutoCorrFile;
2599             xDstStg = nullptr;
2600             try
2601             {
2602                 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2603                 aContent.executeCommand ( "delete", makeAny ( true ) );
2604             }
2605             catch (...)
2606             {
2607             }
2608         }
2609     }
2610     else if( bCopy && !bError )
2611         sShareAutoCorrFile = sUserAutoCorrFile;
2612 }
2613
2614 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2615 {
2616     bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2617     if( !bRemove )
2618     {
2619         tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
2620                     ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2621         if( refList.is() )
2622         {
2623             refList->SetSize( 0 );
2624             refList->SetBufferSize( 8192 );
2625             refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2626
2627             uno::Reference< uno::XComponentContext > xContext =
2628                 comphelper::getProcessComponentContext();
2629
2630             uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2631             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2632             xWriter->setOutputStream(xOut);
2633
2634             rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
2635
2636             xExp->exportDoc( XML_BLOCK_LIST );
2637
2638             refList->Commit();
2639             bRet = ERRCODE_NONE == refList->GetError();
2640             if( bRet )
2641             {
2642                 refList.clear();
2643                 rStg.Commit();
2644                 if( ERRCODE_NONE != rStg.GetError() )
2645                 {
2646                     bRemove = true;
2647                     bRet = false;
2648                 }
2649             }
2650         }
2651         else
2652             bRet = false;
2653     }
2654
2655     if( bRemove )
2656     {
2657         rStg.Remove( pXMLImplAutocorr_ListStr );
2658         rStg.Commit();
2659     }
2660
2661     return bRet;
2662 }
2663
2664 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2665 {
2666     // First get the current list!
2667     GetAutocorrWordList();
2668
2669     MakeUserStorage_Impl();
2670     tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2671
2672     bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
2673
2674     if( bRet )
2675     {
2676         for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
2677         {
2678             std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2679             if( xFoundEntry )
2680             {
2681                 if( !xFoundEntry->IsTextOnly() )
2682                 {
2683                     OUString aName( aWordToDelete.GetShort() );
2684                     if (xStorage->IsOLEStorage())
2685                         aName = EncryptBlockName_Imp(aName);
2686                     else
2687                         GeneratePackageName ( aWordToDelete.GetShort(), aName );
2688
2689                     if( xStorage->IsContained( aName ) )
2690                     {
2691                         xStorage->Remove( aName );
2692                         bRet = xStorage->Commit();
2693                     }
2694                 }
2695             }
2696         }
2697
2698         for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
2699         {
2700             SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
2701             std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
2702             if( xRemoved )
2703             {
2704                 if( !xRemoved->IsTextOnly() )
2705                 {
2706                     // Still have to remove the Storage
2707                     OUString sStorageName( aWordToAdd.GetShort() );
2708                     if (xStorage->IsOLEStorage())
2709                         sStorageName = EncryptBlockName_Imp(sStorageName);
2710                     else
2711                         GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
2712
2713                     if( xStorage->IsContained( sStorageName ) )
2714                         xStorage->Remove( sStorageName );
2715                 }
2716             }
2717             bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
2718
2719             if ( !bRet )
2720             {
2721                 break;
2722             }
2723         }
2724
2725         if ( bRet )
2726         {
2727             bRet = MakeBlocklist_Imp( *xStorage );
2728         }
2729     }
2730     return bRet;
2731 }
2732
2733 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2734 {
2735     // First get the current list!
2736     GetAutocorrWordList();
2737
2738     MakeUserStorage_Impl();
2739     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2740
2741     bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
2742
2743     // Update the word list
2744     if( bRet )
2745     {
2746         SvxAutocorrWord aNew(rShort, rLong, true );
2747         std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
2748         if( xRemove )
2749         {
2750             if( !xRemove->IsTextOnly() )
2751             {
2752                 // Still have to remove the Storage
2753                 OUString sStgNm( rShort );
2754                 if (xStg->IsOLEStorage())
2755                     sStgNm = EncryptBlockName_Imp(sStgNm);
2756                 else
2757                     GeneratePackageName ( rShort, sStgNm);
2758
2759                 if( xStg->IsContained( sStgNm ) )
2760                     xStg->Remove( sStgNm );
2761             }
2762         }
2763
2764         if( pAutocorr_List->Insert( std::move(aNew) ) )
2765         {
2766             bRet = MakeBlocklist_Imp( *xStg );
2767             xStg = nullptr;
2768         }
2769         else
2770         {
2771             bRet = false;
2772         }
2773     }
2774     return bRet;
2775 }
2776
2777 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2778                                                SfxObjectShell& rShell )
2779 {
2780     // First get the current list!
2781     GetAutocorrWordList();
2782
2783     MakeUserStorage_Impl();
2784
2785     try
2786     {
2787         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2788         OUString sLong;
2789         bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2790         xStg = nullptr;
2791
2792         // Update the word list
2793         if( bRet )
2794         {
2795             if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
2796             {
2797                 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2798                 MakeBlocklist_Imp( *xStor );
2799             }
2800         }
2801     }
2802     catch ( const uno::Exception& )
2803     {
2804     }
2805 }
2806
2807 // Keep the list sorted ...
2808 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2809 {
2810     bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
2811     {
2812         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2813         return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
2814     }
2815 };
2816
2817 namespace {
2818
2819 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
2820
2821 }
2822
2823 struct SvxAutocorrWordList::Impl
2824 {
2825
2826     // only one of these contains the data
2827     // maSortedVector is manually sorted so we can optimise data movement
2828     mutable AutocorrWordSetType maSortedVector;
2829     mutable AutocorrWordHashType maHash; // key is 'Short'
2830
2831     void DeleteAndDestroyAll()
2832     {
2833         maHash.clear();
2834         maSortedVector.clear();
2835     }
2836 };
2837
2838 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2839
2840 SvxAutocorrWordList::~SvxAutocorrWordList()
2841 {
2842 }
2843
2844 void SvxAutocorrWordList::DeleteAndDestroyAll()
2845 {
2846     mpImpl->DeleteAndDestroyAll();
2847 }
2848
2849 // returns true if inserted
2850 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
2851 {
2852     if ( mpImpl->maSortedVector.empty() ) // use the hash
2853     {
2854         OUString aShort = aWord.GetShort();
2855         auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
2856         if (inserted)
2857             return &(it->second);
2858         return nullptr;
2859     }
2860     else
2861     {
2862         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
2863         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2864         if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
2865         {
2866             it = mpImpl->maSortedVector.insert(it, std::move(aWord));
2867             return &*it;
2868         }
2869         return nullptr;
2870     }
2871 }
2872
2873 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
2874 {
2875     (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
2876 }
2877
2878 bool SvxAutocorrWordList::empty() const
2879 {
2880     return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
2881 }
2882
2883 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
2884 {
2885
2886     if ( mpImpl->maSortedVector.empty() ) // use the hash
2887     {
2888         AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
2889         if( it != mpImpl->maHash.end() )
2890         {
2891             SvxAutocorrWord pMatch = std::move(it->second);
2892             mpImpl->maHash.erase (it);
2893             return pMatch;
2894         }
2895     }
2896     else
2897     {
2898         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
2899         if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
2900         {
2901             SvxAutocorrWord pMatch = std::move(*it);
2902             mpImpl->maSortedVector.erase (it);
2903             return pMatch;
2904         }
2905     }
2906     return std::optional<SvxAutocorrWord>();
2907 }
2908
2909 // return the sorted contents - defer sorting until we have to.
2910 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
2911 {
2912     // convert from hash to set permanently
2913     if ( mpImpl->maSortedVector.empty() )
2914     {
2915         std::vector<SvxAutocorrWord> tmp;
2916         tmp.reserve(mpImpl->maHash.size());
2917         for (auto & rPair : mpImpl->maHash)
2918             tmp.emplace_back(std::move(rPair.second));
2919         mpImpl->maHash.clear();
2920         // sort twice - this gets the list into mostly-sorted order, which
2921         // reduces the number of times we need to invoke the expensive ICU collate fn.
2922         std::sort(tmp.begin(), tmp.end(),
2923             [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
2924             {
2925                 return lhs.GetShort() < rhs.GetShort();
2926             });
2927         // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
2928         // stable_sort is twice as fast as sort in this situation because it does
2929         // fewer comparison operations.
2930         std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
2931         mpImpl->maSortedVector = std::move(tmp);
2932     }
2933     return mpImpl->maSortedVector;
2934 }
2935
2936 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
2937                                       const OUString &rTxt,
2938                                       sal_Int32 &rStt,
2939                                       sal_Int32 nEndPos) const
2940 {
2941     const OUString& rChk = pFnd->GetShort();
2942
2943     sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
2944     sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
2945     sal_Int32 nSttWdPos = nEndPos;
2946
2947     // direct replacement of keywords surrounded by colons (for example, ":name:")
2948     bool bColonNameColon = rTxt.getLength() > nEndPos &&
2949         rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
2950     if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard )
2951     {
2952
2953         bool bWasWordDelim = false;
2954         sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
2955         if (bColonNameColon)
2956             nCalcStt++;
2957         if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
2958               ( nCalcStt < rStt &&
2959                 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
2960         {
2961             TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
2962             OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard);
2963             if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
2964             {
2965                 rStt = nCalcStt;
2966                 if (!left_wildcard)
2967                 {
2968                     // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
2969                     if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
2970                         return nullptr;
2971                     return pFnd;
2972                 }
2973                 // get the first word delimiter position before the matching ".*word" pattern
2974                 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
2975                     ;
2976                 if (bWasWordDelim) rStt++;
2977                 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard);
2978                 // avoid double spaces before simple "word" replacement
2979                 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong();
2980                 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) )
2981                     return pNew;
2982             }
2983         } else
2984         // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
2985         if ( right_wildcard )
2986         {
2987
2988             OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
2989             // Get the last word delimiter position
2990             bool not_suffix;
2991
2992             while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
2993                 ;
2994             // search the first occurrence (with a left word delimitation, if needed)
2995             sal_Int32 nFndPos = -1;
2996             do {
2997                 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1);
2998                 if (nFndPos == -1)
2999                     break;
3000                 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
3001             } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
3002
3003             if ( nFndPos != -1 )
3004             {
3005                 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
3006
3007                 if ( left_wildcard )
3008                 {
3009                     // get the first word delimiter position before the matching ".*word.*" pattern
3010                     while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
3011                         ;
3012                     if (bWasWordDelim) nFndPos++;
3013                 }
3014                 if (nEndPos + extra_repl <= nFndPos)
3015                 {
3016                     return nullptr;
3017                 }
3018                 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3019                 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl);
3020
3021                 OUString aLong;
3022                 rStt = nFndPos;
3023                 if ( !left_wildcard )
3024                 {
3025                     sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
3026                     aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : "");
3027                 } else {
3028                     OUStringBuffer buf;
3029                     do {
3030                         nSttWdPos = rTxt.indexOf( sTmp, nFndPos);
3031                         if (nSttWdPos != -1)
3032                         {
3033                             sal_Int32 nTmp(nFndPos);
3034                             while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp]))
3035                                 nTmp++;
3036                             if (nTmp < nSttWdPos)
3037                                 break; // word delimiter found
3038                             buf.append(std::u16string_view(rTxt).substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
3039                             nFndPos = nSttWdPos + sTmp.getLength();
3040                         }
3041                     } while (nSttWdPos != -1);
3042                     if (nEndPos - nFndPos > extra_repl)
3043                         buf.append(std::u16string_view(rTxt).substr(nFndPos, nEndPos - nFndPos));
3044                     aLong = buf.makeStringAndClear();
3045                 }
3046                 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) )
3047                 {
3048                     if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos )
3049                         return pNew;
3050                 }
3051             }
3052         }
3053     }
3054     return nullptr;
3055 }
3056
3057 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt,
3058                                                               sal_Int32 nEndPos) const
3059 {
3060     for (auto const& elem : mpImpl->maHash)
3061     {
3062         if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) )
3063             return pTmp;
3064     }
3065
3066     for (auto const& elem : mpImpl->maSortedVector)
3067     {
3068         if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) )
3069             return pTmp;
3070     }
3071     return nullptr;
3072 }
3073
3074 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */