editeng/source/misc/svxacorr.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <memory>
  21 #include <utility>
  22 #include <algorithm>
  23 #include <string_view>
  24 #include <sal/config.h>
  25
  26 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
  27 #include <com/sun/star/embed/XStorage.hpp>
  28 #include <com/sun/star/io/IOException.hpp>
  29 #include <com/sun/star/io/XStream.hpp>
  30 #include <tools/urlobj.hxx>
  31 #include <i18nlangtag/mslangid.hxx>
  32 #include <i18nutil/transliteration.hxx>
  33 #include <sal/log.hxx>
  34 #include <osl/diagnose.h>
  35 #include <vcl/svapp.hxx>
  36 #include <vcl/settings.hxx>
  37 #include <svl/fstathelper.hxx>
  38 #include <svl/urihelper.hxx>
  39 #include <unotools/charclass.hxx>
  40 #include <com/sun/star/i18n/UnicodeType.hpp>
  41 #include <unotools/collatorwrapper.hxx>
  42 #include <com/sun/star/i18n/UnicodeScript.hpp>
  43 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
  44 #include <unotools/localedatawrapper.hxx>
  45 #include <unotools/transliterationwrapper.hxx>
  46 #include <comphelper/processfactory.hxx>
  47 #include <comphelper/storagehelper.hxx>
  48 #include <o3tl/string_view.hxx>
  49 #include <editeng/editids.hrc>
  50 #include <sot/storage.hxx>
  51 #include <editeng/udlnitem.hxx>
  52 #include <editeng/wghtitem.hxx>
  53 #include <editeng/postitem.hxx>
  54 #include <editeng/crossedoutitem.hxx>
  55 #include <editeng/escapementitem.hxx>
  56 #include <editeng/svxacorr.hxx>
  57 #include <editeng/unolingu.hxx>
  58 #include <vcl/window.hxx>
  59 #include <com/sun/star/xml/sax/InputSource.hpp>
  60 #include <com/sun/star/xml/sax/FastParser.hpp>
  61 #include <com/sun/star/xml/sax/Writer.hpp>
  62 #include <com/sun/star/xml/sax/SAXParseException.hpp>
  63 #include <unotools/streamwrap.hxx>
  64 #include "SvXMLAutoCorrectImport.hxx"
  65 #include "SvXMLAutoCorrectExport.hxx"
  66 #include "SvXMLAutoCorrectTokenHandler.hxx"
  67 #include <ucbhelper/content.hxx>
  68 #include <com/sun/star/ucb/ContentCreationException.hpp>
  69 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
  70 #include <com/sun/star/ucb/TransferInfo.hpp>
  71 #include <com/sun/star/ucb/NameClash.hpp>
  72 #include <comphelper/diagnose_ex.hxx>
  73 #include <xmloff/xmltoken.hxx>
  74 #include <unordered_map>
  75 #include <rtl/character.hxx>
  76
  77 using namespace ::com::sun::star::ucb;
  78 using namespace ::com::sun::star::uno;
  79 using namespace ::com::sun::star::xml::sax;
  80 using namespace ::com::sun::star;
  81 using namespace ::xmloff::token;
  82 using namespace ::utl;
  83
  84 namespace {
  85
  86 enum class Flags {
  87     NONE            = 0x00,
  88     FullStop        = 0x01,
  89     ExclamationMark = 0x02,
  90     QuestionMark    = 0x04,
  91 };
  92
  93 }
  94
  95 namespace o3tl {
  96     template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
  97 }
  98 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space
  99
 100 constexpr OUString pXMLImplWordStart_ExcptLstStr = u"WordExceptList.xml"_ustr;
 101 constexpr OUString pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"_ustr;
 102 constexpr OUString pXMLImplAutocorr_ListStr = u"DocumentList.xml"_ustr;
 103
 104 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
 105 // Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled as "begin characters"?
 106 constexpr std::u16string_view
 107     /* also at these beginnings - Brackets and all kinds of begin characters */
 108     sImplSttSkipChars = u"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094",
 109     /* also at these ends - Brackets and all kinds of begin characters */
 110     sImplEndSkipChars = u"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094";
 111
 112 static OUString EncryptBlockName_Imp(std::u16string_view rName);
 113
 114 static bool NonFieldWordDelim( const sal_Unicode c )
 115 {
 116     return ' ' == c || '\t' == c || 0x0a == c ||
 117             cNonBreakingSpace == c || 0x2011 == c;
 118 }
 119
 120 static bool IsWordDelim( const sal_Unicode c )
 121 {
 122     return c == 0x1 || NonFieldWordDelim(c);
 123 }
 124
 125
 126 static bool IsLowerLetter( sal_Int32 nCharType )
 127 {
 128     return CharClass::isLetterType( nCharType ) &&
 129            ( css::i18n::KCharacterType::LOWER & nCharType);
 130 }
 131
 132 static bool IsUpperLetter( sal_Int32 nCharType )
 133 {
 134     return CharClass::isLetterType( nCharType ) &&
 135             ( css::i18n::KCharacterType::UPPER & nCharType);
 136 }
 137
 138 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
 139                                    sal_Int32 nStt, sal_Int32 nEnd )
 140 {
 141     for( ; nStt < nEnd; ++nStt )
 142     {
 143         css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
 144         switch( nScript )
 145         {
 146             case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
 147             case css::i18n::UnicodeScript_kHangulJamo:
 148             case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
 149             case css::i18n::UnicodeScript_kHiragana:
 150             case css::i18n::UnicodeScript_kKatakana:
 151             case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
 152             case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
 153             case css::i18n::UnicodeScript_kCJKCompatibility:
 154             case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA:
 155             case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
 156             case css::i18n::UnicodeScript_kHangulSyllable:
 157             case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
 158             case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
 159                 return true;
 160             default: ; //do nothing
 161         }
 162     }
 163     return false;
 164 }
 165
 166 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
 167                                   sal_Int32 nStt, sal_Int32 nEnd )
 168 {
 169     for( ; nStt < nEnd; ++nStt )
 170     {
 171         if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
 172             return true;
 173     }
 174     return false;
 175 }
 176
 177 static bool lcl_IsInArr(std::u16string_view arr, const sal_uInt32 c)
 178 {
 179     return std::any_of(arr.begin(), arr.end(), [c](const auto c1) { return c1 == c; });
 180 }
 181
 182 SvxAutoCorrDoc::~SvxAutoCorrDoc()
 183 {
 184 }
 185
 186 // Called by the functions:
 187 //  - FnCapitalStartWord
 188 //  - FnCapitalStartSentence
 189 // after the exchange of characters. Then the words, if necessary, can be inserted
 190 // into the exception list.
 191 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
 192                                         sal_Unicode )
 193 {
 194 }
 195
 196 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
 197 {
 198     return LANGUAGE_SYSTEM;
 199 }
 200
 201 static const LanguageTag& GetAppLang()
 202 {
 203     return Application::GetSettings().GetLanguageTag();
 204 }
 205
 206 /// Never use an unresolved LANGUAGE_SYSTEM.
 207 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
 208 {
 209     LanguageType eLang = rDoc.GetLanguage( nPos );
 210     if (eLang == LANGUAGE_SYSTEM)
 211         eLang = GetAppLang().getLanguageType();     // the current work locale
 212     return eLang;
 213 }
 214
 215 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
 216 {
 217     static std::unique_ptr<LocaleDataWrapper> xLclDtWrp;
 218     LanguageTag aLcl( nLang );
 219     if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl)
 220         xLclDtWrp.reset(new LocaleDataWrapper(std::move(aLcl)));
 221     return *xLclDtWrp;
 222 }
 223 static TransliterationWrapper& GetIgnoreTranslWrapper()
 224 {
 225     static int bIsInit = 0;
 226     static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
 227                 TransliterationFlags::IGNORE_KANA |
 228                 TransliterationFlags::IGNORE_WIDTH );
 229     if( !bIsInit )
 230     {
 231         aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
 232         bIsInit = 1;
 233     }
 234     return aWrp;
 235 }
 236 static CollatorWrapper& GetCollatorWrapper()
 237 {
 238     static CollatorWrapper aCollWrp = []()
 239     {
 240         CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
 241         tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
 242         return tmp;
 243     }();
 244     return aCollWrp;
 245 }
 246
 247 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
 248 {
 249     return  cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
 250             cChar == ' '  || cChar == '\'' || cChar == '\"' ||
 251             cChar == '*'  || cChar == '_'  || cChar == '%' ||
 252             cChar == '.'  || cChar == ','  || cChar == ';' ||
 253             cChar == ':'  || cChar == '?' || cChar == '!' ||
 254             cChar == '<'  || cChar == '>' ||
 255             cChar == '/'  || cChar == '-';
 256 }
 257
 258 namespace
 259 {
 260     bool IsCompoundWordDelimChar(sal_Unicode cChar)
 261     {
 262         return  cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
 263     }
 264 }
 265
 266 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
 267 {
 268     return cChar == '%' || cChar == ';' || cChar == ':'  || cChar == '?' || cChar == '!' ||
 269         cChar == '/' /*case for the urls exception*/;
 270 }
 271
 272 ACFlags SvxAutoCorrect::GetDefaultFlags()
 273 {
 274     ACFlags nRet = ACFlags::Autocorrect
 275                     | ACFlags::CapitalStartSentence
 276                     | ACFlags::CapitalStartWord
 277                     | ACFlags::ChgOrdinalNumber
 278                     | ACFlags::ChgToEnEmDash
 279                     | ACFlags::AddNonBrkSpace
 280                     | ACFlags::TransliterateRTL
 281                     | ACFlags::ChgAngleQuotes
 282                     | ACFlags::ChgWeightUnderl
 283                     | ACFlags::SetINetAttr
 284                     | ACFlags::SetDOIAttr
 285                     | ACFlags::ChgQuotes
 286                     | ACFlags::SaveWordCplSttLst
 287                     | ACFlags::SaveWordWordStartLst
 288                     | ACFlags::CorrectCapsLock;
 289     LanguageType eLang = GetAppLang().getLanguageType();
 290     if( eLang.anyOf(
 291         LANGUAGE_ENGLISH,
 292         LANGUAGE_ENGLISH_US,
 293         LANGUAGE_ENGLISH_UK,
 294         LANGUAGE_ENGLISH_AUS,
 295         LANGUAGE_ENGLISH_CAN,
 296         LANGUAGE_ENGLISH_NZ,
 297         LANGUAGE_ENGLISH_EIRE,
 298         LANGUAGE_ENGLISH_SAFRICA,
 299         LANGUAGE_ENGLISH_JAMAICA,
 300         LANGUAGE_ENGLISH_CARIBBEAN))
 301         nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
 302     return nRet;
 303 }
 304
 305 constexpr sal_Unicode cEmDash = 0x2014;
 306 constexpr sal_Unicode cEnDash = 0x2013;
 307 constexpr OUString sEmDash(u"\u2014"_ustr);
 308 constexpr OUString sEnDash(u"\u2013"_ustr);
 309 constexpr sal_Unicode cApostrophe = 0x2019;
 310 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
 311 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
 312 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
 313 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
 314 // stop characters for searching preceding quotes
 315 // (the first character is also the opening quote we are looking for)
 316 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
 317 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
 318 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
 319 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
 320 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
 321 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };
 322
 323 SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile,
 324                                 OUString aUserAutocorrFile )
 325     : sShareAutoCorrFile(std::move( aShareAutocorrFile ))
 326     , sUserAutoCorrFile(std::move( aUserAutocorrFile ))
 327     , eCharClassLang( LANGUAGE_DONTKNOW )
 328     , nFlags(SvxAutoCorrect::GetDefaultFlags())
 329     , cStartDQuote( 0 )
 330     , cEndDQuote( 0 )
 331     , cStartSQuote( 0 )
 332     , cEndSQuote( 0 )
 333 {
 334 }
 335
 336 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
 337     : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
 338     , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
 339     , aSwFlags( rCpy.aSwFlags )
 340     , eCharClassLang(rCpy.eCharClassLang)
 341     , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WordStartLstLoad))
 342     , cStartDQuote( rCpy.cStartDQuote )
 343     , cEndDQuote( rCpy.cEndDQuote )
 344     , cStartSQuote( rCpy.cStartSQuote )
 345     , cEndSQuote( rCpy.cEndSQuote )
 346 {
 347 }
 348
 349
 350 SvxAutoCorrect::~SvxAutoCorrect()
 351 {
 352 }
 353
 354 void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
 355 {
 356     moCharClass.emplace( LanguageTag( eLang) );
 357     eCharClassLang = eLang;
 358 }
 359
 360 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
 361 {
 362     ACFlags nOld = nFlags;
 363     nFlags = bOn ? nFlags | nFlag
 364                  : nFlags & ~nFlag;
 365
 366     if( !bOn )
 367     {
 368         if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
 369             nFlags &= ~ACFlags::CplSttLstLoad;
 370         if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
 371             nFlags &= ~ACFlags::WordStartLstLoad;
 372         if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
 373             nFlags &= ~ACFlags::ChgWordLstLoad;
 374     }
 375 }
 376
 377
 378 // Correct TWo INitial CApitals
 379 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 380                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
 381                                     LanguageType eLang )
 382 {
 383     CharClass& rCC = GetCharClass( eLang );
 384
 385     // Delete all non alphanumeric. Test the characters at the beginning/end of
 386     // the word ( recognizes: "(min.", "/min.", and so on.)
 387     for( ; nSttPos < nEndPos; ++nSttPos )
 388         if( rCC.isLetterNumeric( rTxt, nSttPos ))
 389             break;
 390     for( ; nSttPos < nEndPos; --nEndPos )
 391         if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
 392             break;
 393
 394     // Is the word a compounded word separated by delimiters?
 395     // If so, keep track of all delimiters so each constituent
 396     // word can be checked for two initial capital letters.
 397     std::deque<sal_Int32> aDelimiters;
 398
 399     // Always check for two capitals at the beginning
 400     // of the entire word, so start at nSttPos.
 401     aDelimiters.push_back(nSttPos);
 402
 403     // Find all compound word delimiters
 404     for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
 405     {
 406         if (IsCompoundWordDelimChar(rTxt[ n ]))
 407         {
 408             aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
 409         }
 410     }
 411
 412     // Decide where to put the terminating delimiter.
 413     // If the last AutoCorrect char was a newline, then the AutoCorrect
 414     // char will not be included in rTxt.
 415     // If the last AutoCorrect char was not a newline, then the AutoCorrect
 416     // character will be the last character in rTxt.
 417     if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
 418         aDelimiters.push_back(nEndPos);
 419
 420     // Iterate through the word and all words that compose it.
 421     // Two capital letters at the beginning of word?
 422     for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
 423     {
 424         nSttPos = aDelimiters[nI];
 425         nEndPos = aDelimiters[nI + 1];
 426
 427         if( nSttPos+2 < nEndPos &&
 428             IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
 429             IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
 430             // Is the third character a lower case
 431             IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
 432             // Do not replace special attributes
 433             0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
 434         {
 435             // test if the word is in an exception list
 436             OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
 437             if( !FindInWordStartExceptList(eLang, sWord) )
 438             {
 439                 // Check that word isn't correctly spelt before correcting:
 440                 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
 441                     LinguMgr::GetSpellChecker();
 442                 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
 443                 {
 444                     Sequence< css::beans::PropertyValue > aEmptySeq;
 445                     if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
 446                     {
 447                         return;
 448                     }
 449                 }
 450                 sal_Unicode cSave = rTxt[ nSttPos ];
 451                 OUString sChar = rCC.lowercase( OUString(cSave) );
 452                 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
 453                 {
 454                     if( ACFlags::SaveWordWordStartLst & nFlags )
 455                         rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
 456                 }
 457             }
 458         }
 459     }
 460 }
 461
 462 // Format ordinal numbers suffixes (1st -> 1^st)
 463 bool SvxAutoCorrect::FnChgOrdinalNumber(
 464     SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 465     sal_Int32 nSttPos, sal_Int32 nEndPos,
 466     LanguageType eLang)
 467 {
 468     // 1st, 2nd, 3rd, 4 - 0th
 469     // 201th or 201st
 470     // 12th or 12nd
 471     bool bChg = false;
 472
 473     // In some languages ordinal suffixes should never be
 474     // changed to superscript. Let's break for those languages.
 475     if (!eLang.anyOf(
 476          LANGUAGE_SWEDISH,
 477          LANGUAGE_SWEDISH_FINLAND))
 478     {
 479         CharClass& rCC = GetCharClass(eLang);
 480
 481         for (; nSttPos < nEndPos; ++nSttPos)
 482             if (!lcl_IsInArr(sImplSttSkipChars, rTxt[nSttPos]))
 483                 break;
 484         for (; nSttPos < nEndPos; --nEndPos)
 485             if (!lcl_IsInArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
 486                 break;
 487
 488
 489         // Get the last number in the string to check
 490         sal_Int32 nNumEnd = nEndPos;
 491         bool bFoundEnd = false;
 492         bool isValidNumber = true;
 493         sal_Int32 i = nEndPos;
 494         while (i > nSttPos)
 495         {
 496             i--;
 497             bool isDigit = rCC.isDigit(rTxt, i);
 498             if (bFoundEnd)
 499                 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
 500
 501             if (isDigit && !bFoundEnd)
 502             {
 503                 bFoundEnd = true;
 504                 nNumEnd = i;
 505             }
 506         }
 507
 508         if (bFoundEnd && isValidNumber) {
 509             sal_Int32 nNum = o3tl::toInt32(rTxt.subView(nSttPos, nNumEnd - nSttPos + 1));
 510
 511             // Check if the characters after that number correspond to the ordinal suffix
 512             uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
 513                 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
 514
 515             const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
 516             for (OUString const & sSuffix : aSuffixes)
 517             {
 518                 std::u16string_view sEnd = rTxt.subView(nNumEnd + 1, nEndPos - nNumEnd - 1);
 519
 520                 if (sSuffix == sEnd)
 521                 {
 522                     // Check if the ordinal suffix has to be set as super script
 523                     if (rCC.isLetter(sSuffix))
 524                     {
 525                         // Do the change
 526                         SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
 527                             DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
 528                         rDoc.SetAttr(nNumEnd + 1, nEndPos,
 529                             SID_ATTR_CHAR_ESCAPEMENT,
 530                             aSvxEscapementItem);
 531                         bChg = true;
 532                     }
 533                 }
 534             }
 535         }
 536     }
 537     return bChg;
 538 }
 539
 540 // Replace dashes
 541 bool SvxAutoCorrect::FnChgToEnEmDash(
 542                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 543                                 sal_Int32 nSttPos, sal_Int32 nEndPos,
 544                                 LanguageType eLang )
 545 {
 546     bool bRet = false;
 547     CharClass& rCC = GetCharClass( eLang );
 548     if (eLang == LANGUAGE_SYSTEM)
 549         eLang = GetAppLang().getLanguageType();
 550     bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
 551
 552     // rTxt may refer to the frame text that will change in the calls to rDoc.Delete / rDoc.Insert;
 553     // keep a local copy for later use
 554     OUString aOrigTxt = rTxt;
 555     sal_Int32 nFirstReplacementTextLengthChange = 0;
 556
 557     // replace " - " or " --" with "enDash"
 558     if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
 559     {
 560         sal_Unicode cCh = rTxt[ nSttPos ];
 561         if( '-' == cCh )
 562         {
 563             if( 1 < nEndPos - nSttPos &&
 564                 ' ' == rTxt[ nSttPos-1 ] &&
 565                 '-' == rTxt[ nSttPos+1 ])
 566             {
 567                 sal_Int32 n;
 568                 for( n = nSttPos+2; n < nEndPos && lcl_IsInArr(
 569                             sImplSttSkipChars,(cCh = rTxt[ n ]));
 570                         ++n )
 571                     ;
 572
 573                 // found: " --[<AnySttChars>][A-z0-9]
 574                 if( rCC.isLetterNumeric( OUString(cCh) ) )
 575                 {
 576                     for( n = nSttPos-1; n && lcl_IsInArr(
 577                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
 578                         ;
 579
 580                     // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
 581                     if( rCC.isLetterNumeric( OUString(cCh) ))
 582                     {
 583                         rDoc.Delete( nSttPos, nSttPos + 2 );
 584                         rDoc.Insert( nSttPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
 585                         nFirstReplacementTextLengthChange = -1; // 2 ch -> 1 ch
 586                         bRet = true;
 587                     }
 588                 }
 589             }
 590         }
 591         else if( 3 < nSttPos &&
 592                  ' ' == rTxt[ nSttPos-1 ] &&
 593                  '-' == rTxt[ nSttPos-2 ])
 594         {
 595             sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
 596             if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
 597             {
 598                 --nTmpPos;
 599                 ++nLen;
 600                 cCh = rTxt[ nTmpPos-1 ];
 601             }
 602             if( ' ' == cCh )
 603             {
 604                 for( n = nSttPos; n < nEndPos && lcl_IsInArr(
 605                             sImplSttSkipChars,(cCh = rTxt[ n ]));
 606                         ++n )
 607                     ;
 608
 609                 // found: " - [<AnySttChars>][A-z0-9]
 610                 if( rCC.isLetterNumeric( OUString(cCh) ) )
 611                 {
 612                     cCh = ' ';
 613                     for( n = nTmpPos-1; n && lcl_IsInArr(
 614                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
 615                             ;
 616                     // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
 617                     if( rCC.isLetterNumeric( OUString(cCh) ))
 618                     {
 619                         rDoc.Delete( nTmpPos, nTmpPos + nLen );
 620                         rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
 621                         nFirstReplacementTextLengthChange = 1 - nLen; // nLen ch -> 1 ch
 622                         bRet = true;
 623                     }
 624                 }
 625             }
 626         }
 627     }
 628
 629     // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
 630     // [0-9]--[0-9] double dash always replaced with "enDash"
 631     // Finnish and Hungarian use enDash instead of emDash.
 632     bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
 633     if( 4 <= nEndPos - nSttPos )
 634     {
 635         std::u16string_view sTmpView( aOrigTxt.subView( nSttPos, nEndPos - nSttPos ) );
 636         size_t nFndPos = sTmpView.find(u"--");
 637         if (nFndPos > 0 && nFndPos < sTmpView.size() - 2)
 638         {
 639             // Use proper codepoints. Currently, CharClass::isLetterNumeric is broken, it
 640             // uses the index *both* as code unit index (when checking it as ASCII), *and*
 641             // as code point index (when passes to css::i18n::XCharacterClassification).
 642             // Oh well... Anyway, single-codepoint strings will workaround it.
 643             sal_Int32 nStart = nSttPos + nFndPos;
 644             sal_uInt32 chStart = aOrigTxt.iterateCodePoints(&nStart, -1);
 645             OUString sStart(&chStart, 1);
 646             // No idea why sImplEndSkipChars is checked at start
 647             if (rCC.isLetterNumeric(sStart, 0) || lcl_IsInArr(sImplEndSkipChars, chStart))
 648             {
 649                 sal_Int32 nEnd = nSttPos + nFndPos + 2;
 650                 sal_uInt32 chEnd = aOrigTxt.iterateCodePoints(&nEnd, 1);
 651                 OUString sEnd(&chEnd, 1);
 652                 // No idea why sImplSttSkipChars is checked at end
 653                 if (rCC.isLetterNumeric(sEnd, 0) || lcl_IsInArr(sImplSttSkipChars, chEnd))
 654                 {
 655                     nSttPos = nSttPos + nFndPos + nFirstReplacementTextLengthChange;
 656                     rDoc.Delete(nSttPos, nSttPos + 2);
 657                     rDoc.Insert(nSttPos,
 658                                 (bEnDash || (rCC.isDigit(sStart, 0) && rCC.isDigit(sEnd, 0))
 659                                      ? sEnDash
 660                                      : sEmDash));
 661                     bRet = true;
 662                 }
 663             }
 664         }
 665     }
 666     return bRet;
 667 }
 668
 669 // Add non-breaking space before specific punctuation marks in French text
 670 sal_Int32 SvxAutoCorrect::FnAddNonBrkSpace(
 671                                 SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
 672                                 sal_Int32 nEndPos,
 673                                 LanguageType eLang, bool& io_bNbspRunNext )
 674 {
 675     sal_Int32 nRet = -1;
 676
 677     CharClass& rCC = GetCharClass( eLang );
 678
 679     if ( rCC.getLanguageTag().getLanguage() == "fr" )
 680     {
 681         bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
 682         OUString allChars = ":;?!%";
 683         OUString chars( allChars );
 684         if ( bFrCA )
 685             chars = ":";
 686
 687         sal_Unicode cChar = rTxt[ nEndPos ];
 688         bool bHasSpace = chars.indexOf( cChar ) != -1;
 689         bool bIsSpecial = allChars.indexOf( cChar ) != -1;
 690         if ( bIsSpecial )
 691         {
 692             // Get the last word delimiter position
 693             sal_Int32 nSttWdPos = nEndPos;
 694             bool bWasWordDelim = false;
 695             while( nSttWdPos )
 696             {
 697                 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
 698                 if (bWasWordDelim)
 699                     break;
 700             }
 701
 702             //See if the text is the start of a protocol string, e.g. have text of
 703             //"http" see if it is the start of "http:" and if so leave it alone
 704             size_t nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
 705             size_t nProtocolLen = nEndPos - nSttWdPos + 1;
 706             if (nIndex + nProtocolLen <= rTxt.size())
 707             {
 708                 if (INetURLObject::CompareProtocolScheme(rTxt.substr(nIndex, nProtocolLen)) != INetProtocol::NotValid)
 709                     return -1;
 710             }
 711
 712             // Check the presence of "://" in the word
 713             size_t nStrPos = rTxt.find( u"://", nSttWdPos + 1 );
 714             if ( nStrPos == std::u16string_view::npos && nEndPos > 0 )
 715             {
 716                 // Check the previous char
 717                 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
 718                 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
 719                 {
 720                     // Remove any previous normal space
 721                     sal_Int32 nPos = nEndPos - 1;
 722                     while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
 723                     {
 724                         if ( nPos == 0 ) break;
 725                         nPos--;
 726                         cPrevChar = rTxt[ nPos ];
 727                     }
 728
 729                     nPos++;
 730                     if ( nEndPos - nPos > 0 )
 731                         rDoc.Delete( nPos, nEndPos );
 732
 733                     // Add the non-breaking space at the end pos
 734                     if ( bHasSpace )
 735                         rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
 736                     io_bNbspRunNext = true;
 737                     nRet = nPos;
 738                 }
 739                 else if ( chars.indexOf( cPrevChar ) != -1 )
 740                     io_bNbspRunNext = true;
 741             }
 742         }
 743         else if ( cChar == '/' && nEndPos > 1 && static_cast<sal_Int32>(rTxt.size()) > (nEndPos - 1) )
 744         {
 745             // Remove the hardspace right before to avoid formatting URLs
 746             sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
 747             sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
 748             if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
 749             {
 750                 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
 751                 nRet = nEndPos - 1;
 752             }
 753         }
 754     }
 755
 756     return nRet;
 757 }
 758
 759 // URL recognition
 760 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 761                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
 762                                     LanguageType eLang )
 763 {
 764     OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
 765                                                 GetCharClass( eLang ) ));
 766     bool bRet = !sURL.isEmpty();
 767     if( bRet )          // so, set attribute:
 768         rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
 769     return bRet;
 770 }
 771
 772 // DOI citation recognition
 773 bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 774                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
 775                                     LanguageType eLang )
 776 {
 777     OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) ));
 778     bool bRet = !sURL.isEmpty();
 779     if( bRet )          // so, set attribute:
 780         rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
 781     return bRet;
 782 }
 783
 784 // Automatic *bold*, /italic/, -strikeout- and _underline_
 785 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 786                                         sal_Int32 nEndPos )
 787 {
 788     // Condition:
 789     //  at the beginning:   _, *, / or ~ after Space with the following !Space
 790     //  at the end:         _, *, / or ~ before Space (word delimiter?)
 791
 792     sal_Unicode cInsChar = rTxt[ nEndPos ];  // underline, bold, italic or strikeout
 793     if( ++nEndPos != rTxt.getLength() &&
 794         !IsWordDelim( rTxt[ nEndPos ] ) )
 795         return false;
 796
 797     --nEndPos;
 798
 799     bool bAlphaNum = false;
 800     sal_Int32 nPos = nEndPos;
 801     sal_Int32  nFndPos = -1;
 802     CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
 803
 804     while( nPos )
 805     {
 806         switch( sal_Unicode c = rTxt[ --nPos ] )
 807         {
 808         case '_':
 809         case '-':
 810         case '/':
 811         case '*':
 812             if( c == cInsChar )
 813             {
 814                 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
 815                     IsWordDelim( rTxt[ nPos-1 ])) &&
 816                     !IsWordDelim( rTxt[ nPos+1 ]))
 817                         nFndPos = nPos;
 818                 else
 819                     // Condition is not satisfied, so cancel
 820                     nFndPos = -1;
 821                 nPos = 0;
 822             }
 823             break;
 824         default:
 825             if( !bAlphaNum )
 826                 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
 827         }
 828     }
 829
 830     if( -1 != nFndPos )
 831     {
 832         // first delete the Character at the end - this allows insertion
 833         // of an empty hint in SetAttr which would be removed by Delete
 834         // (fdo#62536, AUTOFMT in Writer)
 835         rDoc.Delete( nEndPos, nEndPos + 1 );
 836
 837         // Span the Attribute over the area
 838         // the end.
 839         if( '*' == cInsChar )           // Bold
 840         {
 841             SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
 842             rDoc.SetAttr( nFndPos + 1, nEndPos,
 843                           SID_ATTR_CHAR_WEIGHT,
 844                           aSvxWeightItem);
 845         }
 846         else if( '/' == cInsChar )           // Italic
 847         {
 848             SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
 849             rDoc.SetAttr( nFndPos + 1, nEndPos,
 850                           SID_ATTR_CHAR_POSTURE,
 851                           aSvxPostureItem);
 852         }
 853         else if( '-' == cInsChar )           // Strikeout
 854         {
 855             SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
 856             rDoc.SetAttr( nFndPos + 1, nEndPos,
 857                           SID_ATTR_CHAR_STRIKEOUT,
 858                           aSvxCrossedOutItem);
 859         }
 860         else                            // Underline
 861         {
 862             SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
 863             rDoc.SetAttr( nFndPos + 1, nEndPos,
 864                           SID_ATTR_CHAR_UNDERLINE,
 865                           aSvxUnderlineItem);
 866         }
 867         rDoc.Delete( nFndPos, nFndPos + 1 );
 868     }
 869
 870     return -1 != nFndPos;
 871 }
 872
 873 // Capitalize first letter of every sentence
 874 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
 875                                     const OUString& rTxt, bool bNormalPos,
 876                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
 877                                     LanguageType eLang )
 878 {
 879
 880     if( rTxt.isEmpty() || nEndPos <= nSttPos )
 881         return;
 882
 883     CharClass& rCC = GetCharClass( eLang );
 884     OUString aText( rTxt );
 885     const sal_Unicode *pStart = aText.getStr(),
 886                       *pStr = pStart + nEndPos,
 887                       *pWordStt = nullptr,
 888                       *pDelim = nullptr;
 889
 890     bool bAtStart = false;
 891     do {
 892         --pStr;
 893         if (rCC.isLetter(aText, pStr - pStart))
 894         {
 895             if( !pWordStt )
 896                 pDelim = pStr+1;
 897             pWordStt = pStr;
 898         }
 899         else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
 900         {
 901             if( (lcl_IsInArr( u"-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
 902                 pWordStt - 1 == pStr &&
 903                 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
 904                 (pStart + 1) <= pStr &&
 905                 rCC.isLetter(aText, pStr-1 - pStart))
 906                 pWordStt = --pStr;
 907             else
 908                 break;
 909         }
 910         bAtStart = (pStart == pStr);
 911     } while( !bAtStart );
 912
 913     if (!pWordStt)
 914         return;    // no character to be replaced
 915
 916
 917     if (rCC.isDigit(aText, pStr - pStart))
 918         return; // already ok
 919
 920     if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
 921         return; // already ok
 922
 923     //See if the text is the start of a protocol string, e.g. have text of
 924     //"http" see if it is the start of "http:" and if so leave it alone
 925     sal_Int32 nIndex = pWordStt - pStart;
 926     sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
 927     if (nIndex + nProtocolLen <= rTxt.getLength())
 928     {
 929         if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid)
 930             return; // already ok
 931     }
 932
 933     if (0x1 == *pWordStt || 0x2 == *pWordStt)
 934         return; // already ok
 935
 936     // Only capitalize, if string before specified characters is long enough
 937     if( *pDelim && 2 >= pDelim - pWordStt &&
 938         lcl_IsInArr( u".-)>", *pDelim ) )
 939         return;
 940
 941     // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
 942     if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
 943         return;
 944
 945     if( !bAtStart ) // Still no beginning of a paragraph?
 946     {
 947         if (NonFieldWordDelim(*pStr))
 948         {
 949             for (;;)
 950             {
 951                 bAtStart = (pStart == pStr--);
 952                 if (bAtStart || !NonFieldWordDelim(*pStr))
 953                     break;
 954             }
 955         }
 956         // Asian full stop, full width full stop, full width exclamation mark
 957         // and full width question marks are treated as word delimiters
 958         else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
 959                   0xFF1F != *pStr )
 960             return; // no valid separator -> no replacement
 961     }
 962
 963     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
 964     if (FindInWordStartExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
 965         return;
 966
 967     if( bAtStart )  // at the beginning of a paragraph?
 968     {
 969         // Check out the previous paragraph, if it exists.
 970         // If so, then check to paragraph separator at the end.
 971         OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
 972         if (!pPrevPara)
 973         {
 974             // valid separator -> replace
 975             OUString sChar( *pWordStt );
 976             sChar = rCC.titlecase(sChar); //see fdo#56740
 977             if (sChar != OUStringChar(*pWordStt))
 978                rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
 979             return;
 980         }
 981
 982         aText = *pPrevPara;
 983         bAtStart = false;
 984         pStart = aText.getStr();
 985         pStr = pStart + aText.getLength();
 986
 987         do {            // overwrite all blanks
 988             --pStr;
 989             if (!NonFieldWordDelim(*pStr))
 990                 break;
 991             bAtStart = (pStart == pStr);
 992         } while( !bAtStart );
 993
 994         if( bAtStart )
 995             return;  // no valid separator -> no replacement
 996     }
 997
 998     // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
 999     // all three can happen, but not more than once!
1000     const sal_Unicode* pExceptStt = nullptr;
1001     bool bContinue = true;
1002     Flags nFlag = Flags::NONE;
1003     do
1004     {
1005         switch (*pStr)
1006         {
1007             // Western and Asian full stop
1008             case '.':
1009             case 0x3002:
1010             case 0xFF0E:
1011             {
1012                 if (pStr >= pStart + 2 && *(pStr - 2) == '.')
1013                 {
1014                     //e.g. text "f.o.o. word": Now currently considering
1015                     //capitalizing word but second last character of
1016                     //previous word is a .  So probably last word is an
1017                     //anagram that ends in . and not truly the end of a
1018                     //previous sentence, so don't autocapitalize this word
1019                     return;
1020                 }
1021                 if (nFlag & Flags::FullStop)
1022                     return; // no valid separator -> no replacement
1023                 nFlag |= Flags::FullStop;
1024                 pExceptStt = pStr;
1025             }
1026             break;
1027             case '!':
1028             case 0xFF01:
1029             {
1030                 if (nFlag & Flags::ExclamationMark)
1031                     return; // no valid separator -> no replacement
1032                 nFlag |= Flags::ExclamationMark;
1033             }
1034             break;
1035             case '?':
1036             case 0xFF1F:
1037             {
1038                 if (nFlag & Flags::QuestionMark)
1039                     return; // no valid separator -> no replacement
1040                 nFlag |= Flags::QuestionMark;
1041             }
1042             break;
1043             default:
1044                 if (nFlag == Flags::NONE)
1045                     return; // no valid separator -> no replacement
1046                 else
1047                     bContinue = false;
1048                 break;
1049         }
1050
1051         if (bContinue && pStr-- == pStart)
1052         {
1053             return; // no valid separator -> no replacement
1054         }
1055     } while (bContinue);
1056     if (Flags::FullStop != nFlag)
1057         pExceptStt = nullptr;
1058
1059     // Only capitalize, if string is long enough
1060     if( 2 > ( pStr - pStart ) )
1061         return;
1062
1063     if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
1064     {
1065         bool bValid = false, bAlphaFnd = false;
1066         const sal_Unicode* pTmpStr = pStr;
1067         while( !bValid )
1068         {
1069             if( rCC.isDigit( aText, pTmpStr - pStart ) )
1070             {
1071                 bValid = true;
1072                 pStr = pTmpStr - 1;
1073             }
1074             else if( rCC.isLetter( aText, pTmpStr - pStart ) )
1075             {
1076                 if( bAlphaFnd )
1077                 {
1078                     bValid = true;
1079                     pStr = pTmpStr;
1080                 }
1081                 else
1082                     bAlphaFnd = true;
1083             }
1084             else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
1085                 break;
1086
1087             if( pTmpStr == pStart )
1088                 break;
1089
1090             --pTmpStr;
1091         }
1092
1093         if( !bValid )
1094             return;       // no valid separator -> no replacement
1095     }
1096
1097     bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1098
1099     // Search for the beginning of the word
1100     while (!NonFieldWordDelim(*pStr))
1101     {
1102         if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1103             bNumericOnly = false;
1104
1105         if( pStart == pStr )
1106             break;
1107
1108         --pStr;
1109     }
1110
1111     if( bNumericOnly )      // consists of only numbers, then not
1112         return;
1113
1114     if (NonFieldWordDelim(*pStr))
1115         ++pStr;
1116
1117     OUString sWord;
1118
1119     // check on the basis of the exception list
1120     if( pExceptStt )
1121     {
1122         sWord = OUString(pStr, pExceptStt - pStr + 1);
1123         if( FindInCplSttExceptList(eLang, sWord) )
1124             return;
1125
1126         // Delete all non alphanumeric. Test the characters at the
1127         // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1128         OUString sTmp( sWord );
1129         while( !sTmp.isEmpty() &&
1130                 !rCC.isLetterNumeric( sTmp, 0 ) )
1131             sTmp = sTmp.copy(1);
1132
1133         // Remove all non alphanumeric characters towards the end up until
1134         // the last one.
1135         sal_Int32 nLen = sTmp.getLength();
1136         while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1137             --nLen;
1138         if( nLen + 1 < sTmp.getLength() )
1139             sTmp = sTmp.copy( 0, nLen + 1 );
1140
1141         if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1142             FindInCplSttExceptList(eLang, sTmp))
1143             return;
1144
1145         if(FindInCplSttExceptList(eLang, sWord, true))
1146             return;
1147     }
1148
1149     // Ok, then replace
1150     sal_Unicode cSave = *pWordStt;
1151     nSttPos = pWordStt - rTxt.getStr();
1152     OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
1153     bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1154
1155     // Perhaps someone wants to have the word
1156     if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
1157         rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
1158 }
1159
1160 // Correct accidental use of cAPS LOCK key
1161 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1162                                         sal_Int32 nSttPos, sal_Int32 nEndPos,
1163                                         LanguageType eLang )
1164 {
1165     if (nEndPos - nSttPos < 2)
1166         // string must be at least 2-character long.
1167         return false;
1168
1169     CharClass& rCC = GetCharClass( eLang );
1170
1171     // Check the first 2 letters.
1172     if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1173         return false;
1174
1175     if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1176         return false;
1177
1178     OUStringBuffer aConverted;
1179     aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
1180     aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
1181
1182     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1183     if (FindInWordStartExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
1184         return false;
1185
1186     for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1187     {
1188         if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1189             // A lowercase letter disqualifies the whole text.
1190             return false;
1191
1192         if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1193             // Another uppercase letter.  Convert it.
1194             aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
1195         else
1196             // This is not an alphabetic letter.  Leave it as-is.
1197             aConverted.append( rTxt[i] );
1198     }
1199
1200     // Replace the word.
1201     rDoc.Delete(nSttPos, nEndPos);
1202     rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
1203
1204     return true;
1205 }
1206
1207
1208 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1209                                         LanguageType eLang ) const
1210 {
1211     sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1212                                     ? GetStartDoubleQuote()
1213                                     : GetStartSingleQuote() )
1214                                    : ( '\"' == cInsChar
1215                                     ? GetEndDoubleQuote()
1216                                     : GetEndSingleQuote() );
1217     if( !cRet )
1218     {
1219         // then through the Language find the right character
1220         if( LANGUAGE_NONE == eLang )
1221             cRet = cInsChar;
1222         else
1223         {
1224             LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1225             OUString sRet( bSttQuote
1226                             ? ( '\"' == cInsChar
1227                                 ? rLcl.getDoubleQuotationMarkStart()
1228                                 : rLcl.getQuotationMarkStart() )
1229                             : ( '\"' == cInsChar
1230                                 ? rLcl.getDoubleQuotationMarkEnd()
1231                                 : rLcl.getQuotationMarkEnd() ));
1232             cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1233         }
1234     }
1235     return cRet;
1236 }
1237
1238 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1239                                     sal_Unicode cInsChar, bool bSttQuote,
1240                                     bool bIns, LanguageType eLang, ACQuotes eType ) const
1241 {
1242     sal_Unicode cRet;
1243
1244     if ( eType == ACQuotes::DoubleAngleQuote )
1245     {
1246         bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
1247         // pressing " inside a quotation -> use second level angle quotes
1248         bool bLeftQuote = '\"' == cInsChar &&
1249                 // start position and Romanian OR
1250                 // not start position and Hungarian
1251                 bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
1252         cRet = ( '<' == cInsChar || bLeftQuote )
1253                 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
1254                 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
1255     }
1256     else if ( eType == ACQuotes::UseApostrophe )
1257         cRet = cApostrophe;
1258     else
1259         cRet = GetQuote( cInsChar, bSttQuote, eLang );
1260
1261     OUString sChg( cInsChar );
1262     if( bIns )
1263         rDoc.Insert( nInsPos, sChg );
1264     else
1265         rDoc.Replace( nInsPos, sChg );
1266
1267     sChg = OUString(cRet);
1268
1269     if( eType == ACQuotes::NonBreakingSpace )
1270     {
1271         if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
1272         {
1273             if( !bSttQuote )
1274                 ++nInsPos;
1275         }
1276     }
1277     else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
1278     {
1279         rDoc.Delete( nInsPos-1, nInsPos);
1280         --nInsPos;
1281     }
1282
1283     rDoc.Replace( nInsPos, sChg );
1284
1285     // i' -> I' in English (last step for the Undo)
1286     if( eType == ACQuotes::CapitalizeIAm )
1287         rDoc.Replace( nInsPos-1, "I" );
1288 }
1289
1290 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
1291                                 sal_Unicode cInsChar, bool bSttQuote )
1292 {
1293     const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1294     sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1295
1296     OUString sRet(cRet);
1297
1298     if( '\"' == cInsChar )
1299     {
1300         if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1301         {
1302             if( bSttQuote )
1303                 sRet += " ";
1304             else
1305                 sRet = " " + sRet;
1306         }
1307     }
1308     return sRet;
1309 }
1310
1311 // search preceding opening quote in the paragraph before the insert position
1312 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
1313                 const sal_Unicode sPrecedingChar, const sal_Unicode sStopChar, const sal_Unicode* aStopChars )
1314 {
1315     sal_Unicode cTmpChar;
1316
1317     do {
1318         cTmpChar = rTxt[ --nPos ];
1319         if ( cTmpChar == sPrecedingChar )
1320             return true;
1321
1322         if ( cTmpChar == sStopChar )
1323             return false;
1324
1325         for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
1326             if ( cTmpChar == *pCh )
1327                 return false;
1328
1329     } while ( nPos > 0 );
1330
1331     return false;
1332 }
1333
1334 // WARNING: rText may become invalid, see comment below
1335 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1336                                     sal_Int32 nInsPos, sal_Unicode cChar,
1337                                     bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
1338 {
1339     bool bIsNextRun = io_bNbspRunNext;
1340     io_bNbspRunNext = false;  // if it was set, then it has to be turned off
1341
1342     do{                                 // only for middle check loop !!
1343         if( cChar )
1344         {
1345             // Prevent double space
1346             if( nInsPos && ' ' == cChar &&
1347                 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
1348                 ' ' == rTxt[ nInsPos - 1 ])
1349             {
1350                 break;
1351             }
1352
1353             bool bSingle = '\'' == cChar;
1354             bool bIsReplaceQuote =
1355                         (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
1356                         (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
1357             if( bIsReplaceQuote )
1358             {
1359                 bool bSttQuote = !nInsPos;
1360                 ACQuotes eType = ACQuotes::NONE;
1361                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1362                 if (!bSttQuote)
1363                 {
1364                     sal_Unicode cPrev = rTxt[ nInsPos-1 ];
1365                     bSttQuote = NonFieldWordDelim(cPrev) ||
1366                         lcl_IsInArr( u"([{", cPrev ) ||
1367                         ( cEmDash == cPrev ) ||
1368                         ( cEnDash == cPrev );
1369                     // tdf#38394 use opening quotation mark << in French l'<<word>>
1370                     if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
1371                         primary(eLang) == primary(LANGUAGE_FRENCH) &&
1372                         ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
1373                                // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1374                                OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
1375                           ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
1376                                // abbreviated form of que
1377                                ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
1378                                ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
1379                     {
1380                         bSttQuote = true;
1381                     }
1382                     // tdf#108423 for capitalization of English i'm
1383                     else if ( bSingle && ( cPrev == 'i' ) &&
1384                         primary(eLang) == primary(LANGUAGE_ENGLISH) &&
1385                         ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
1386                     {
1387                         eType = ACQuotes::CapitalizeIAm;
1388                     }
1389                     // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1390                     else if ( !bSingle && nInsPos &&
1391                         ( ( eLang == LANGUAGE_HUNGARIAN &&
1392                             lcl_HasPrecedingChar( rTxt, nInsPos,
1393                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
1394                                 bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEnd[1],
1395                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 2 ) ) ||
1396                           ( eLang.anyOf(
1397                                 LANGUAGE_ROMANIAN,
1398                                 LANGUAGE_ROMANIAN_MOLDOVA ) &&
1399                             lcl_HasPrecedingChar( rTxt, nInsPos,
1400                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
1401                                 bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEndRo[1],
1402                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 2 ) ) ) )
1403                     {
1404                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1405                         // only if the opening double quotation mark is the default one
1406                         if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
1407                             eType = ACQuotes::DoubleAngleQuote;
1408                     }
1409                     else if ( bSingle && nInsPos && !bSttQuote &&
1410                         // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1411                         // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1412                         // tdf#123786 the same for Russian and Ukrainian
1413                         ( eLang.anyOf (
1414                                  LANGUAGE_CZECH,
1415                                  LANGUAGE_GERMAN,
1416                                  LANGUAGE_GERMAN_SWISS,
1417                                  LANGUAGE_GERMAN_AUSTRIAN,
1418                                  LANGUAGE_GERMAN_LUXEMBOURG,
1419                                  LANGUAGE_GERMAN_LIECHTENSTEIN,
1420                                  LANGUAGE_ICELANDIC,
1421                                  LANGUAGE_SLOVAK,
1422                                  LANGUAGE_SLOVENIAN ) ) )
1423                     {
1424                         sal_Unicode sStartChar = GetStartSingleQuote();
1425                         sal_Unicode sEndChar = GetEndSingleQuote();
1426                         if ( !sStartChar || !sEndChar ) {
1427                             LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1428                             if ( !sStartChar ) sStartChar = rLcl.getQuotationMarkStart()[0];
1429                             if ( !sEndChar ) sEndChar = rLcl.getQuotationMarkStart()[0];
1430                         }
1431                         if ( !lcl_HasPrecedingChar( rTxt, nInsPos, sStartChar, sEndChar, aStopSingleQuoteEnd + 1 ) )
1432                         {
1433                             CharClass& rCC = GetCharClass( eLang );
1434                             if ( rCC.isLetter(rTxt, nInsPos-1) )
1435                             {
1436                                 eType = ACQuotes::UseApostrophe;
1437                             }
1438                         }
1439                     }
1440                     else if ( bSingle && nInsPos && !bSttQuote &&
1441                           ( eLang.anyOf (
1442                                  LANGUAGE_RUSSIAN,
1443                                  LANGUAGE_UKRAINIAN ) &&
1444                             !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa[1],  aStopSingleQuoteEndRuUa + 2 ) ) )
1445                     {
1446                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1447                         CharClass& rCC = GetCharClass( eLang );
1448                         if ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) &&
1449                              // use apostrophe only after letters, not after digits or punctuation
1450                              rCC.isLetter(rTxt, nInsPos-1) )
1451                         {
1452                             eType = ACQuotes::UseApostrophe;
1453                         }
1454                     }
1455                 }
1456
1457                 if ( eType == ACQuotes::NONE && !bSingle &&
1458                     ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
1459                     eType = ACQuotes::NonBreakingSpace;
1460
1461                 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
1462                 break;
1463             }
1464             // tdf#133524 change "<<" and ">>" to double angle quotation marks
1465             else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
1466                 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
1467                 ('<' == cChar || '>' == cChar) &&
1468                 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
1469             {
1470                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1471                 if ( eLang.anyOf(
1472                         LANGUAGE_CATALAN,              // primary level
1473                         LANGUAGE_CATALAN_VALENCIAN,    // primary level
1474                         LANGUAGE_FINNISH,              // alternative primary level
1475                         LANGUAGE_FRENCH_SWISS,         // second level
1476                         LANGUAGE_GALICIAN,             // primary level
1477                         LANGUAGE_HUNGARIAN,            // second level
1478                         LANGUAGE_POLISH,               // second level
1479                         LANGUAGE_PORTUGUESE,           // primary level
1480                         LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
1481                         LANGUAGE_ROMANIAN,             // second level
1482                         LANGUAGE_ROMANIAN_MOLDOVA,     // second level
1483                         LANGUAGE_SWEDISH,              // alternative primary level
1484                         LANGUAGE_SWEDISH_FINLAND,      // alternative primary level
1485                         LANGUAGE_UKRAINIAN,            // primary level
1486                         LANGUAGE_USER_ARAGONESE,       // primary level
1487                         LANGUAGE_USER_ASTURIAN ) ||    // primary level
1488                     primary(eLang) == primary(LANGUAGE_GERMAN) ||  // alternative primary level
1489                     primary(eLang) == primary(LANGUAGE_SPANISH) )  // primary level
1490                 {
1491                     InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
1492                     break;
1493                 }
1494             }
1495
1496             if( bInsert )
1497                 rDoc.Insert( nInsPos, OUString(cChar) );
1498             else
1499                 rDoc.Replace( nInsPos, OUString(cChar) );
1500
1501             // Hardspaces autocorrection
1502             if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
1503             {
1504                 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1505                 // and its length may change (even become shorter) if FnAddNonBrkSpace succeeds!
1506                 sal_Int32 nUpdatedPos = -1;
1507                 if (NeedsHardspaceAutocorr(cChar))
1508                     nUpdatedPos = FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext );
1509                 if (nUpdatedPos >= 0)
1510                 {
1511                     nInsPos = nUpdatedPos;
1512                 }
1513                 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1514                 {
1515                     // Remove the NBSP if it wasn't an autocorrection
1516                     if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1517                             cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1518                     {
1519                         // Look for the last HARD_SPACE
1520                         sal_Int32 nPos = nInsPos - 1;
1521                         bool bContinue = true;
1522                         while ( bContinue )
1523                         {
1524                             const sal_Unicode cTmpChar = rTxt[ nPos ];
1525                             if ( cTmpChar == cNonBreakingSpace )
1526                             {
1527                                 rDoc.Delete( nPos, nPos + 1 );
1528                                 bContinue = false;
1529                             }
1530                             else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1531                                 bContinue = false;
1532                             nPos--;
1533                         }
1534                     }
1535                 }
1536             }
1537         }
1538
1539         if( !nInsPos )
1540             break;
1541
1542         sal_Int32 nPos = nInsPos - 1;
1543
1544         if( IsWordDelim( rTxt[ nPos ]))
1545             break;
1546
1547         // Set bold or underline automatically?
1548         if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
1549         {
1550             if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
1551             {
1552                 FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
1553             }
1554             break;
1555         }
1556
1557         while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1558             ;
1559
1560         // Found a Paragraph-start or a Blank, search for the word shortcut in
1561         // auto.
1562         sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1563         if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1564             --nCapLttrPos;          // begin of paragraph and no blank
1565
1566         const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1567         CharClass& rCC = GetCharClass( eLang );
1568
1569         // no symbol characters
1570         if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1571             break;
1572
1573         if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
1574             // tdf#134940 fix regression of arrow "-->" resulted by premature
1575             // replacement of "--" since '>' was added to IsAutoCorrectChar()
1576             '>' != cChar )
1577         {
1578             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1579             // and becomes INVALID if ChgAutoCorrWord returns true!
1580             // => use aPara/pPara to create a valid copy of the string!
1581             OUString aPara;
1582             OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
1583
1584             bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1585                                                     *this, pPara );
1586             if( !bChgWord )
1587             {
1588                 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1589                 while( nCapLttrPos1 < nInsPos &&
1590                         lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1591                         )
1592                         ++nCapLttrPos1;
1593                 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1594                         lcl_IsInArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1595                         )
1596                         --nInsPos1;
1597
1598                 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1599                     nCapLttrPos1 < nInsPos1 &&
1600                     rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1601                 {
1602                     bChgWord = true;
1603                     nCapLttrPos = nCapLttrPos1;
1604                 }
1605             }
1606
1607             if( bChgWord )
1608             {
1609                 if( !aPara.isEmpty() )
1610                 {
1611                     sal_Int32 nEnd = nCapLttrPos;
1612                     while( nEnd < aPara.getLength() &&
1613                             !IsWordDelim( aPara[ nEnd ]))
1614                         ++nEnd;
1615
1616                     // Capital letter at beginning of paragraph?
1617                     if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1618                     {
1619                         FnCapitalStartSentence( rDoc, aPara, false,
1620                                                 nCapLttrPos, nEnd, eLang );
1621                     }
1622
1623                     if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1624                     {
1625                         FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
1626                     }
1627                 }
1628                 break;
1629             }
1630         }
1631
1632         if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
1633         {
1634             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1635             // and becomes INVALID if TransliterateRTLWord returns true!
1636             if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
1637                 break;
1638         }
1639
1640         if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
1641                 (nInsPos >= 2 ) &&       // fdo#69762 avoid autocorrect for 2e-3
1642                 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1643                 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1644             ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
1645                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1646                 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1647             ( IsAutoCorrFlag( ACFlags::SetDOIAttr ) &&
1648                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1649                 FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1650             ;
1651         else
1652         {
1653             bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1654             bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1655
1656             if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
1657                  FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1658             {
1659                 // Correct accidental use of cAPS LOCK key (do this only when
1660                 // the caps or shift lock key is pressed). Turn off the caps
1661                 // lock afterwards.
1662                 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1663             }
1664
1665             // Capital letter at beginning of paragraph ?
1666             if( !bUnsupported &&
1667                 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1668             {
1669                 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
1670             }
1671
1672             // Two capital letters at beginning of word ??
1673             if( !bUnsupported &&
1674                 IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
1675             {
1676                 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1677             }
1678
1679             if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1680             {
1681                 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1682             }
1683         }
1684
1685     } while( false );
1686 }
1687
1688 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
1689                                                         LanguageType eLang )
1690 {
1691     LanguageTag aLanguageTag( eLang);
1692     if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
1693         (void)CreateLanguageFile(aLanguageTag);
1694     const auto iter = m_aLangTable.find(aLanguageTag);
1695     assert(iter != m_aLangTable.end());
1696     return iter->second;
1697 }
1698
1699 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1700 {
1701     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1702     if (iter != m_aLangTable.end())
1703         iter->second.SaveCplSttExceptList();
1704     else
1705     {
1706         SAL_WARN("editeng", "Save an empty list? ");
1707     }
1708 }
1709
1710 void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang)
1711 {
1712     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1713     if (iter != m_aLangTable.end())
1714         iter->second.SaveWordStartExceptList();
1715     else
1716     {
1717         SAL_WARN("editeng", "Save an empty list? ");
1718     }
1719 }
1720
1721 // Adds a single word. The list will immediately be written to the file!
1722 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1723                                         LanguageType eLang )
1724 {
1725     SvxAutoCorrectLanguageLists* pLists = nullptr;
1726     // either the right language is present or it will be this in the general list
1727     auto iter = m_aLangTable.find(LanguageTag(eLang));
1728     if (iter != m_aLangTable.end())
1729         pLists = &iter->second;
1730     else
1731     {
1732         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1733         iter = m_aLangTable.find(aLangTagUndetermined);
1734         if (iter != m_aLangTable.end())
1735             pLists = &iter->second;
1736         else if(CreateLanguageFile(aLangTagUndetermined))
1737         {
1738             iter = m_aLangTable.find(aLangTagUndetermined);
1739             assert(iter != m_aLangTable.end());
1740             pLists = &iter->second;
1741         }
1742     }
1743     OSL_ENSURE(pLists, "No auto correction data");
1744     return pLists && pLists->AddToCplSttExceptList(rNew);
1745 }
1746
1747 // Adds a single word. The list will immediately be written to the file!
1748 bool SvxAutoCorrect::AddWordStartException( const OUString& rNew,
1749                                          LanguageType eLang )
1750 {
1751     SvxAutoCorrectLanguageLists* pLists = nullptr;
1752     //either the right language is present or it is set in the general list
1753     auto iter = m_aLangTable.find(LanguageTag(eLang));
1754     if (iter != m_aLangTable.end())
1755         pLists = &iter->second;
1756     else
1757     {
1758         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1759         iter = m_aLangTable.find(aLangTagUndetermined);
1760         if (iter != m_aLangTable.end())
1761             pLists = &iter->second;
1762         else if(CreateLanguageFile(aLangTagUndetermined))
1763         {
1764             iter = m_aLangTable.find(aLangTagUndetermined);
1765             assert(iter != m_aLangTable.end());
1766             pLists = &iter->second;
1767         }
1768     }
1769     OSL_ENSURE(pLists, "No auto correction file!");
1770     return pLists && pLists->AddToWordStartExceptList(rNew);
1771 }
1772
1773 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
1774                                              sal_Int32 nPos)
1775 {
1776     OUString sRet;
1777     if( !nPos )
1778         return sRet;
1779
1780     sal_Int32 nEnd = nPos;
1781
1782     // it must be followed by a blank or tab!
1783     if( ( nPos < rTxt.getLength() &&
1784         !IsWordDelim( rTxt[ nPos ])) ||
1785         IsWordDelim( rTxt[ --nPos ]))
1786         return sRet;
1787
1788     while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1789         ;
1790
1791     // Found a Paragraph-start or a Blank, search for the word shortcut in
1792     // auto.
1793     sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1794     if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1795         --nCapLttrPos;          // Beginning of paragraph and no Blank!
1796
1797     while( lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1798         if( ++nCapLttrPos >= nEnd )
1799             return sRet;
1800
1801     if( 3 > nEnd - nCapLttrPos )
1802         return sRet;
1803
1804     const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1805
1806     CharClass& rCC = GetCharClass(eLang);
1807
1808     if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
1809         return sRet;
1810
1811     sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
1812     return sRet;
1813 }
1814
1815 // static
1816 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt,
1817                                                           const sal_Int32 nPos)
1818 {
1819     constexpr sal_Int32 nMinLen = 3;
1820     constexpr sal_Int32 nMaxLen = 9;
1821     std::vector<OUString> aRes;
1822     if (nPos >= nMinLen)
1823     {
1824         sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
1825         // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1826         if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
1827         {
1828             while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
1829                 ++nBegin;
1830         }
1831         if (nBegin + nMinLen <= nPos)
1832         {
1833             OUString sRes( rTxt.substr(nBegin, nPos - nBegin) );
1834             aRes.push_back(sRes);
1835             bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
1836             for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
1837             {
1838                 bool bAdd = bLastStartedWithDelim;
1839                 bLastStartedWithDelim = IsWordDelim(sRes[i]);
1840                 bAdd = bAdd || bLastStartedWithDelim;
1841                 if (bAdd)
1842                     aRes.push_back(sRes.copy(i));
1843             }
1844         }
1845     }
1846     return aRes;
1847 }
1848
1849 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1850 {
1851     OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
1852
1853     OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
1854     OUString sShareDirFile( sUserDirFile );
1855
1856     SvxAutoCorrectLanguageLists* pLists = nullptr;
1857
1858     tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1859
1860     auto nFndPos = aLastFileTable.find(rLanguageTag);
1861     if(nFndPos != aLastFileTable.end() &&
1862        (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1863        nAktTime - nLastCheckTime < nMinTime)
1864     {
1865         // no need to test the file, because the last check is not older then
1866         // 2 minutes.
1867         if( bNewFile )
1868         {
1869             sShareDirFile = sUserDirFile;
1870             auto itBool = m_aLangTable.emplace(std::piecewise_construct,
1871                             std::forward_as_tuple(rLanguageTag),
1872                             std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
1873             pLists = &itBool.first->second;
1874             aLastFileTable.erase(nFndPos);
1875         }
1876     }
1877     else if(
1878              ( FStatHelper::IsDocument( sUserDirFile ) ||
1879                FStatHelper::IsDocument( sShareDirFile =
1880                    GetAutoCorrFileName( rLanguageTag ) ) ||
1881                FStatHelper::IsDocument( sShareDirFile =
1882                    GetAutoCorrFileName( rLanguageTag, false, false, true) )
1883              ) ||
1884         ( sShareDirFile = sUserDirFile, bNewFile )
1885           )
1886     {
1887         auto itBool = m_aLangTable.emplace(std::piecewise_construct,
1888                         std::forward_as_tuple(rLanguageTag),
1889                         std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
1890         pLists = &itBool.first->second;
1891         if (nFndPos != aLastFileTable.end())
1892             aLastFileTable.erase(nFndPos);
1893     }
1894     else if( !bNewFile )
1895     {
1896         aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1897     }
1898     return pLists != nullptr;
1899 }
1900
1901 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1902                                 LanguageType eLang )
1903 {
1904     LanguageTag aLanguageTag( eLang);
1905     if (auto const iter = m_aLangTable.find(aLanguageTag); iter != m_aLangTable.end())
1906         return iter->second.PutText(rShort, rLong);
1907     if (CreateLanguageFile(aLanguageTag))
1908     {
1909         auto const iter = m_aLangTable.find(aLanguageTag);
1910         assert (iter != m_aLangTable.end());
1911         return iter->second.PutText(rShort, rLong);
1912     }
1913     return false;
1914 }
1915
1916 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1917                                               std::vector<SvxAutocorrWord>& aDeleteEntries,
1918                                               LanguageType eLang )
1919 {
1920     LanguageTag aLanguageTag( eLang);
1921     auto iter = m_aLangTable.find(aLanguageTag);
1922     if (iter != m_aLangTable.end())
1923     {
1924         iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
1925     }
1926     else if(CreateLanguageFile( aLanguageTag ))
1927     {
1928         iter = m_aLangTable.find(aLanguageTag);
1929         assert(iter != m_aLangTable.end());
1930         iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
1931     }
1932 }
1933
1934 //  - return the replacement text (only for SWG-Format, all other
1935 //    can be taken from the word list!)
1936 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1937 {
1938     return false;
1939 }
1940
1941 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1942 {
1943 }
1944
1945 // Text with attribution (only the SWG - SWG format!)
1946 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
1947                               const OUString&, const OUString&, SfxObjectShell&, OUString& )
1948 {
1949     return false;
1950 }
1951
1952 OUString EncryptBlockName_Imp(std::u16string_view rName)
1953 {
1954     OUStringBuffer aName;
1955     aName.append('#').append(rName);
1956     for (size_t nLen = rName.size(), nPos = 1; nPos < nLen; ++nPos)
1957     {
1958         if (lcl_IsInArr( u"!/:.\\", aName[nPos]))
1959             aName[nPos] &= 0x0f;
1960     }
1961     return aName.makeStringAndClear();
1962 }
1963
1964 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1965 static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName )
1966 {
1967     OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
1968     OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
1969
1970     for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
1971     {
1972         switch (aBuf[nPos])
1973         {
1974             case '!':
1975             case '/':
1976             case ':':
1977             case '.':
1978             case '\\':
1979             // tdf#156769 - escape the question mark in the storage name
1980             case '?':
1981                 aBuf[nPos] = '_';
1982                 break;
1983             default:
1984                 break;
1985         }
1986     }
1987
1988     rPackageName = aBuf.makeStringAndClear();
1989 }
1990
1991 static const SvxAutocorrWord* lcl_SearchWordsInList(
1992                 SvxAutoCorrectLanguageLists* pList, std::u16string_view rTxt,
1993                 sal_Int32& rStt, sal_Int32 nEndPos)
1994 {
1995     const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
1996     return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
1997 }
1998
1999 // the search for the words in the substitution table
2000 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
2001                 std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
2002                 SvxAutoCorrDoc&, LanguageTag& rLang )
2003 {
2004     const SvxAutocorrWord* pRet = nullptr;
2005     LanguageTag aLanguageTag( rLang);
2006     if( aLanguageTag.isSystemLocale() )
2007         aLanguageTag.reset( MsLangId::getConfiguredSystemLanguage());
2008
2009     /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
2010      * list instead? */
2011
2012     // First search for eLang, then US-English -> English
2013     // and last in LANGUAGE_UNDETERMINED
2014     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2015     {
2016         //the language is available - so bring it on
2017         const auto iter = m_aLangTable.find(aLanguageTag);
2018         assert(iter != m_aLangTable.end());
2019         SvxAutoCorrectLanguageLists & rList = iter->second;
2020         pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2021         if( pRet )
2022         {
2023             rLang = aLanguageTag;
2024             return pRet;
2025         }
2026         else
2027             return nullptr;
2028     }
2029
2030     // If it still could not be found here, then keep on searching
2031     LanguageType eLang = aLanguageTag.getLanguageType();
2032     // the primary language for example EN
2033     aLanguageTag.reset(aLanguageTag.getLanguage());
2034     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2035     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2036                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2037                  CreateLanguageFile(aLanguageTag, false)))
2038     {
2039         //the language is available - so bring it on
2040         SvxAutoCorrectLanguageLists& rList = m_aLangTable.find(aLanguageTag)->second;
2041         pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2042         if( pRet )
2043         {
2044             rLang = aLanguageTag;
2045             return pRet;
2046         }
2047     }
2048
2049     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2050             CreateLanguageFile(aLanguageTag, false))
2051     {
2052         //the language is available - so bring it on
2053         const auto iter = m_aLangTable.find(aLanguageTag);
2054         assert(iter != m_aLangTable.end());
2055         SvxAutoCorrectLanguageLists& rList = iter->second;
2056         pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2057         if( pRet )
2058         {
2059             rLang = aLanguageTag;
2060             return pRet;
2061         }
2062     }
2063     return nullptr;
2064 }
2065
2066 bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang,
2067                                              const OUString& sWord )
2068 {
2069     LanguageTag aLanguageTag( eLang);
2070
2071     /* TODO-BCP47: again horrible ugliness */
2072
2073     // First search for eLang, then primary language of eLang
2074     // and last in LANGUAGE_UNDETERMINED
2075
2076     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2077     {
2078         //the language is available - so bring it on
2079         const auto iter = m_aLangTable.find(aLanguageTag);
2080         assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2081         auto& rList = iter->second;
2082         if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2083             return true;
2084     }
2085
2086     // If it still could not be found here, then keep on searching
2087     // the primary language for example EN
2088     aLanguageTag.reset(aLanguageTag.getLanguage());
2089     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2090     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2091                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2092                  CreateLanguageFile(aLanguageTag, false)))
2093     {
2094         //the language is available - so bring it on
2095         const auto iter = m_aLangTable.find(aLanguageTag);
2096         assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2097         auto& rList = iter->second;
2098         if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2099             return true;
2100     }
2101
2102     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2103             CreateLanguageFile(aLanguageTag, false))
2104     {
2105         //the language is available - so bring it on
2106         const auto iter = m_aLangTable.find(aLanguageTag);
2107         assert(iter != m_aLangTable.end());
2108         auto& rList = iter->second;
2109         if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2110             return true;
2111     }
2112     return false;
2113 }
2114
2115 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
2116 {
2117     SvStringsISortDtor::const_iterator it = pList->find( "~" );
2118     SvStringsISortDtor::size_type nPos = it - pList->begin();
2119     if( nPos < pList->size() )
2120     {
2121         OUString sLowerWord(sWord.toAsciiLowerCase());
2122         OUString sAbr;
2123         for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
2124         {
2125             sAbr = (*pList)[ n ];
2126             if (sAbr[0] != '~')
2127                 break;
2128             // ~ and ~. are not allowed!
2129             if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
2130             {
2131                 OUString sLowerAbk(sAbr.toAsciiLowerCase());
2132                 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
2133                 {
2134                     if( !--i )      // agrees
2135                         return true;
2136
2137                     if( sLowerAbk[i] != sLowerWord[--ii])
2138                         break;
2139                 }
2140             }
2141         }
2142     }
2143     OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
2144             "Wrongly sorted exception list?" );
2145     return false;
2146 }
2147
2148 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
2149                                 const OUString& sWord, bool bAbbreviation)
2150 {
2151     LanguageTag aLanguageTag( eLang);
2152
2153     /* TODO-BCP47: did I mention terrible horrible ugliness? */
2154
2155     // First search for eLang, then primary language of eLang
2156     // and last in LANGUAGE_UNDETERMINED
2157
2158     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2159     {
2160         //the language is available - so bring it on
2161         const auto iter = m_aLangTable.find(aLanguageTag);
2162         assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2163         const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2164         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2165             return true;
2166     }
2167
2168     // If it still could not be found here, then keep on searching
2169     // the primary language for example EN
2170     aLanguageTag.reset(aLanguageTag.getLanguage());
2171     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2172     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2173                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2174                  CreateLanguageFile(aLanguageTag, false)))
2175     {
2176         //the language is available - so bring it on
2177         const auto iter = m_aLangTable.find(aLanguageTag);
2178         assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2179         const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2180         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2181             return true;
2182     }
2183
2184     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2185             CreateLanguageFile(aLanguageTag, false))
2186     {
2187         //the language is available - so bring it on
2188         const auto iter = m_aLangTable.find(aLanguageTag);
2189         assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2190         const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2191         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2192             return true;
2193     }
2194     return false;
2195 }
2196
2197 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
2198                                             bool bNewFile, bool bTst, bool bUnlocalized ) const
2199 {
2200     OUString sRet, sExt( rLanguageTag.getBcp47() );
2201     if (bUnlocalized)
2202     {
2203         // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2204         std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
2205         if (!vecFallBackStrings.empty())
2206            sExt = vecFallBackStrings[0];
2207     }
2208
2209     sExt = "_" + sExt + ".dat";
2210     if( bNewFile )
2211         sRet = sUserAutoCorrFile + sExt;
2212     else if( !bTst )
2213         sRet = sShareAutoCorrFile + sExt;
2214     else
2215     {
2216         // test first in the user directory - if not exist, then
2217         sRet = sUserAutoCorrFile + sExt;
2218         if( !FStatHelper::IsDocument( sRet ))
2219             sRet = sShareAutoCorrFile + sExt;
2220     }
2221     return sRet;
2222 }
2223
2224 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2225                 SvxAutoCorrect& rParent,
2226                 OUString aShareAutoCorrectFile,
2227                 OUString aUserAutoCorrectFile)
2228 :   sShareAutoCorrFile(std::move( aShareAutoCorrectFile )),
2229     sUserAutoCorrFile(std::move( aUserAutoCorrectFile )),
2230     aModifiedDate( Date::EMPTY ),
2231     aModifiedTime( tools::Time::EMPTY ),
2232     aLastCheckTime( tools::Time::EMPTY ),
2233     rAutoCorrect(rParent),
2234     nFlags(ACFlags::NONE)
2235 {
2236 }
2237
2238 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2239 {
2240 }
2241
2242 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2243 {
2244     // Access the file system only every 2 minutes to check the date stamp
2245     bool bRet = false;
2246
2247     tools::Time nMinTime( 0, 2 );
2248     tools::Time nAktTime( tools::Time::SYSTEM );
2249     if( aLastCheckTime <= nAktTime) // overflow?
2250         return false;
2251     nAktTime -= aLastCheckTime;
2252     if( nAktTime > nMinTime )     // min time past
2253     {
2254         Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
2255         if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2256                                             &aTstDate, &aTstTime ) &&
2257             ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
2258         {
2259             bRet = true;
2260             // then remove all the lists fast!
2261             if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
2262             {
2263                 pCplStt_ExcptLst.reset();
2264             }
2265             if( (ACFlags::WordStartLstLoad & nFlags) && pWordStart_ExcptLst )
2266             {
2267                 pWordStart_ExcptLst.reset();
2268             }
2269             if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
2270             {
2271                 pAutocorr_List.reset();
2272             }
2273             nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WordStartLstLoad | ACFlags::ChgWordLstLoad );
2274         }
2275         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2276     }
2277     return bRet;
2278 }
2279
2280 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2281                                         std::unique_ptr<SvStringsISortDtor>& rpLst,
2282                                         const OUString& sStrmName,
2283                                         tools::SvRef<SotStorage>& rStg)
2284 {
2285     if( rpLst )
2286         rpLst->clear();
2287     else
2288         rpLst.reset( new SvStringsISortDtor );
2289
2290     {
2291         if( rStg.is() && rStg->IsStream( sStrmName ) )
2292         {
2293             tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2294                 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2295             if( ERRCODE_NONE != xStrm->GetError())
2296             {
2297                 xStrm.clear();
2298                 rStg.clear();
2299                 RemoveStream_Imp( sStrmName );
2300             }
2301             else
2302             {
2303                 uno::Reference< uno::XComponentContext > xContext =
2304                     comphelper::getProcessComponentContext();
2305
2306                 xml::sax::InputSource aParserInput;
2307                 aParserInput.sSystemId = sStrmName;
2308
2309                 xStrm->Seek( 0 );
2310                 xStrm->SetBufferSize( 8 * 1024 );
2311                 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2312
2313                 // get filter
2314                 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2315
2316                 // connect parser and filter
2317                 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2318                 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2319                 xParser->setFastDocumentHandler( xFilter );
2320                 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2321                 xParser->setTokenHandler( xTokenHandler );
2322
2323                 // parse
2324                 try
2325                 {
2326                     xParser->parseStream( aParserInput );
2327                 }
2328                 catch( const xml::sax::SAXParseException& )
2329                 {
2330                     // re throw ?
2331                 }
2332                 catch( const xml::sax::SAXException& )
2333                 {
2334                     // re throw ?
2335                 }
2336                 catch( const io::IOException& )
2337                 {
2338                     // re throw ?
2339                 }
2340             }
2341         }
2342
2343         // Set time stamp
2344         FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2345                                         &aModifiedDate, &aModifiedTime );
2346         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2347     }
2348
2349 }
2350
2351 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2352                             const SvStringsISortDtor& rLst,
2353                             const OUString& sStrmName,
2354                             tools::SvRef<SotStorage> const &rStg,
2355                             bool bConvert )
2356 {
2357     if( !rStg.is() )
2358         return;
2359
2360     if( rLst.empty() )
2361     {
2362         rStg->Remove( sStrmName );
2363         rStg->Commit();
2364     }
2365     else
2366     {
2367         tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2368                 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2369         if( xStrm.is() )
2370         {
2371             xStrm->SetSize( 0 );
2372             xStrm->SetBufferSize( 8192 );
2373             xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2374
2375
2376             uno::Reference< uno::XComponentContext > xContext =
2377                 comphelper::getProcessComponentContext();
2378
2379             uno::Reference < xml::sax::XWriter > xWriter  = xml::sax::Writer::create(xContext);
2380             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2381             xWriter->setOutputStream(xOut);
2382
2383             uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2384             rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
2385
2386             xExp->exportDoc( XML_BLOCK_LIST );
2387
2388             xStrm->Commit();
2389             if( xStrm->GetError() == ERRCODE_NONE )
2390             {
2391                 xStrm.clear();
2392                 if (!bConvert)
2393                 {
2394                     rStg->Commit();
2395                     if( ERRCODE_NONE != rStg->GetError() )
2396                     {
2397                         rStg->Remove( sStrmName );
2398                         rStg->Commit();
2399                     }
2400                 }
2401             }
2402         }
2403     }
2404 }
2405
2406 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2407 {
2408     if( pAutocorr_List )
2409         pAutocorr_List->DeleteAndDestroyAll();
2410     else
2411         pAutocorr_List.reset( new SvxAutocorrWordList() );
2412
2413     try
2414     {
2415         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2416         uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
2417         uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2418
2419         xml::sax::InputSource aParserInput;
2420         aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
2421         aParserInput.aInputStream = xStrm->getInputStream();
2422
2423         // get parser
2424         uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2425         SAL_INFO("editeng", "AutoCorrect Import" );
2426         uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
2427         uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2428
2429         // connect parser and filter
2430         xParser->setFastDocumentHandler( xFilter );
2431         xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2432         xParser->setTokenHandler(xTokenHandler);
2433
2434         // parse
2435         xParser->parseStream( aParserInput );
2436     }
2437     catch ( const uno::Exception& )
2438     {
2439         TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
2440     }
2441
2442     // Set time stamp
2443     FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2444                                     &aModifiedDate, &aModifiedTime );
2445     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2446
2447     return pAutocorr_List.get();
2448 }
2449
2450 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2451 {
2452     if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2453     {
2454         LoadAutocorrWordList();
2455         if( !pAutocorr_List )
2456         {
2457             OSL_ENSURE( false, "No valid list" );
2458             pAutocorr_List.reset( new SvxAutocorrWordList() );
2459         }
2460         nFlags |= ACFlags::ChgWordLstLoad;
2461     }
2462     return pAutocorr_List.get();
2463 }
2464
2465 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2466 {
2467     if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2468     {
2469         LoadCplSttExceptList();
2470         if( !pCplStt_ExcptLst )
2471         {
2472             OSL_ENSURE( false, "No valid list" );
2473             pCplStt_ExcptLst.reset( new SvStringsISortDtor );
2474         }
2475         nFlags |= ACFlags::CplSttLstLoad;
2476     }
2477     return pCplStt_ExcptLst.get();
2478 }
2479
2480 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2481 {
2482     bool bRet = false;
2483     if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2484     {
2485         MakeUserStorage_Impl();
2486         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2487
2488         SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2489
2490         xStg = nullptr;
2491         // Set time stamp
2492         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2493                                             &aModifiedDate, &aModifiedTime );
2494         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2495         bRet = true;
2496     }
2497     return bRet;
2498 }
2499
2500 bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString& rNew)
2501 {
2502     bool bRet = false;
2503     if( !rNew.isEmpty() && GetWordStartExceptList()->insert( rNew ).second )
2504     {
2505         MakeUserStorage_Impl();
2506         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2507
2508         SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2509
2510         xStg = nullptr;
2511         // Set time stamp
2512         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2513                                             &aModifiedDate, &aModifiedTime );
2514         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2515         bRet = true;
2516     }
2517     return bRet;
2518 }
2519
2520 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2521 {
2522     try
2523     {
2524         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2525         if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2526             LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2527     }
2528     catch (const css::ucb::ContentCreationException&)
2529     {
2530     }
2531     return pCplStt_ExcptLst.get();
2532 }
2533
2534 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2535 {
2536     MakeUserStorage_Impl();
2537     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2538
2539     SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2540
2541     xStg = nullptr;
2542
2543     // Set time stamp
2544     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2545                                             &aModifiedDate, &aModifiedTime );
2546     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2547 }
2548
2549 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWordStartExceptList()
2550 {
2551     try
2552     {
2553         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2554         if( xStg.is() && xStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
2555             LoadXMLExceptList_Imp( pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2556     }
2557     catch (const css::ucb::ContentCreationException &)
2558     {
2559         TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList");
2560     }
2561     return pWordStart_ExcptLst.get();
2562 }
2563
2564 void SvxAutoCorrectLanguageLists::SaveWordStartExceptList()
2565 {
2566     MakeUserStorage_Impl();
2567     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2568
2569     SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2570
2571     xStg = nullptr;
2572     // Set time stamp
2573     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2574                                             &aModifiedDate, &aModifiedTime );
2575     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2576 }
2577
2578 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWordStartExceptList()
2579 {
2580     if( !( ACFlags::WordStartLstLoad & nFlags ) || IsFileChanged_Imp() )
2581     {
2582         LoadWordStartExceptList();
2583         if( !pWordStart_ExcptLst )
2584         {
2585             OSL_ENSURE( false, "No valid list" );
2586             pWordStart_ExcptLst.reset( new SvStringsISortDtor );
2587         }
2588         nFlags |= ACFlags::WordStartLstLoad;
2589     }
2590     return pWordStart_ExcptLst.get();
2591 }
2592
2593 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2594 {
2595     if( sShareAutoCorrFile != sUserAutoCorrFile )
2596     {
2597         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2598         if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
2599             xStg->IsStream( rName ) )
2600         {
2601             xStg->Remove( rName );
2602             xStg->Commit();
2603
2604             xStg = nullptr;
2605         }
2606     }
2607 }
2608
2609 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2610 {
2611     // The conversion needs to happen if the file is already in the user
2612     // directory and is in the old format. Additionally it needs to
2613     // happen when the file is being copied from share to user.
2614
2615     bool bError = false, bConvert = false, bCopy = false;
2616     INetURLObject aDest;
2617     INetURLObject aSource;
2618
2619     if (sUserAutoCorrFile != sShareAutoCorrFile )
2620     {
2621         aSource = INetURLObject ( sShareAutoCorrFile );
2622         aDest = INetURLObject ( sUserAutoCorrFile );
2623         if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2624         {
2625             aDest.SetExtension ( u"bak" );
2626             bConvert = true;
2627         }
2628         bCopy = true;
2629     }
2630     else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2631     {
2632         aSource = INetURLObject ( sUserAutoCorrFile );
2633         aDest = INetURLObject ( sUserAutoCorrFile );
2634         aDest.SetExtension ( u"bak" );
2635         bCopy = bConvert = true;
2636     }
2637     if (bCopy)
2638     {
2639         try
2640         {
2641             OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
2642             sal_Int32 nSlashPos = sMain.lastIndexOf('/');
2643             sMain = sMain.copy(0, nSlashPos);
2644             ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2645             TransferInfo aInfo;
2646             aInfo.NameClash = NameClash::OVERWRITE;
2647             aInfo.NewTitle = aDest.GetLastName();
2648             aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
2649             aInfo.MoveData  = false;
2650             aNewContent.executeCommand( "transfer", Any(aInfo));
2651         }
2652         catch (...)
2653         {
2654             bError = true;
2655         }
2656     }
2657     if (bConvert && !bError)
2658     {
2659         tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
2660         tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
2661
2662         if( xSrcStg.is() && xDstStg.is() )
2663         {
2664             std::unique_ptr<SvStringsISortDtor> pTmpWordList;
2665
2666             if (xSrcStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
2667                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWordStart_ExcptLstStr, xSrcStg );
2668
2669             if (pTmpWordList)
2670             {
2671                 SaveExceptList_Imp( *pTmpWordList, pXMLImplWordStart_ExcptLstStr, xDstStg, true );
2672                 pTmpWordList.reset();
2673             }
2674
2675
2676             if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2677                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2678
2679             if (pTmpWordList)
2680             {
2681                 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2682                 pTmpWordList->clear();
2683             }
2684
2685             GetAutocorrWordList();
2686             MakeBlocklist_Imp( *xDstStg );
2687             sShareAutoCorrFile = sUserAutoCorrFile;
2688             xDstStg = nullptr;
2689             try
2690             {
2691                 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2692                 aContent.executeCommand ( "delete", Any ( true ) );
2693             }
2694             catch (...)
2695             {
2696             }
2697         }
2698     }
2699     else if( bCopy && !bError )
2700         sShareAutoCorrFile = sUserAutoCorrFile;
2701 }
2702
2703 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2704 {
2705     bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2706     if( !bRemove )
2707     {
2708         tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
2709                     ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2710         if( refList.is() )
2711         {
2712             refList->SetSize( 0 );
2713             refList->SetBufferSize( 8192 );
2714             refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2715
2716             uno::Reference< uno::XComponentContext > xContext =
2717                 comphelper::getProcessComponentContext();
2718
2719             uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2720             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2721             xWriter->setOutputStream(xOut);
2722
2723             rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
2724
2725             xExp->exportDoc( XML_BLOCK_LIST );
2726
2727             refList->Commit();
2728             bRet = ERRCODE_NONE == refList->GetError();
2729             if( bRet )
2730             {
2731                 refList.clear();
2732                 rStg.Commit();
2733                 if( ERRCODE_NONE != rStg.GetError() )
2734                 {
2735                     bRemove = true;
2736                     bRet = false;
2737                 }
2738             }
2739         }
2740         else
2741             bRet = false;
2742     }
2743
2744     if( bRemove )
2745     {
2746         rStg.Remove( pXMLImplAutocorr_ListStr );
2747         rStg.Commit();
2748     }
2749
2750     return bRet;
2751 }
2752
2753 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2754 {
2755     // First get the current list!
2756     GetAutocorrWordList();
2757
2758     MakeUserStorage_Impl();
2759     tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2760
2761     bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
2762
2763     if( bRet )
2764     {
2765         for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
2766         {
2767             std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2768             if( xFoundEntry )
2769             {
2770                 if( !xFoundEntry->IsTextOnly() )
2771                 {
2772                     OUString aName( aWordToDelete.GetShort() );
2773                     if (xStorage->IsOLEStorage())
2774                         aName = EncryptBlockName_Imp(aName);
2775                     else
2776                         GeneratePackageName ( aWordToDelete.GetShort(), aName );
2777
2778                     if( xStorage->IsContained( aName ) )
2779                     {
2780                         xStorage->Remove( aName );
2781                         bRet = xStorage->Commit();
2782                     }
2783                 }
2784             }
2785         }
2786
2787         for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
2788         {
2789             SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
2790             std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
2791             if( xRemoved )
2792             {
2793                 if( !xRemoved->IsTextOnly() )
2794                 {
2795                     // Still have to remove the Storage
2796                     OUString sStorageName( aWordToAdd.GetShort() );
2797                     if (xStorage->IsOLEStorage())
2798                         sStorageName = EncryptBlockName_Imp(sStorageName);
2799                     else
2800                         GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
2801
2802                     if( xStorage->IsContained( sStorageName ) )
2803                         xStorage->Remove( sStorageName );
2804                 }
2805             }
2806             bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
2807
2808             if ( !bRet )
2809             {
2810                 break;
2811             }
2812         }
2813
2814         if ( bRet )
2815         {
2816             bRet = MakeBlocklist_Imp( *xStorage );
2817         }
2818     }
2819     return bRet;
2820 }
2821
2822 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2823 {
2824     // First get the current list!
2825     GetAutocorrWordList();
2826
2827     MakeUserStorage_Impl();
2828     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2829
2830     bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
2831
2832     // Update the word list
2833     if( bRet )
2834     {
2835         SvxAutocorrWord aNew(rShort, rLong, true );
2836         std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
2837         if( xRemove )
2838         {
2839             if( !xRemove->IsTextOnly() )
2840             {
2841                 // Still have to remove the Storage
2842                 OUString sStgNm( rShort );
2843                 if (xStg->IsOLEStorage())
2844                     sStgNm = EncryptBlockName_Imp(sStgNm);
2845                 else
2846                     GeneratePackageName ( rShort, sStgNm);
2847
2848                 if( xStg->IsContained( sStgNm ) )
2849                     xStg->Remove( sStgNm );
2850             }
2851         }
2852
2853         if( pAutocorr_List->Insert( std::move(aNew) ) )
2854         {
2855             bRet = MakeBlocklist_Imp( *xStg );
2856             xStg = nullptr;
2857         }
2858         else
2859         {
2860             bRet = false;
2861         }
2862     }
2863     return bRet;
2864 }
2865
2866 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2867                                                SfxObjectShell& rShell )
2868 {
2869     // First get the current list!
2870     GetAutocorrWordList();
2871
2872     MakeUserStorage_Impl();
2873
2874     try
2875     {
2876         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2877         OUString sLong;
2878         bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2879         xStg = nullptr;
2880
2881         // Update the word list
2882         if( bRet )
2883         {
2884             if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
2885             {
2886                 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2887                 MakeBlocklist_Imp( *xStor );
2888             }
2889         }
2890     }
2891     catch ( const uno::Exception& )
2892     {
2893     }
2894 }
2895
2896 // Keep the list sorted ...
2897 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2898 {
2899     bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
2900     {
2901         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2902         return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
2903     }
2904 };
2905
2906 namespace {
2907
2908 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
2909
2910 }
2911
2912 struct SvxAutocorrWordList::Impl
2913 {
2914
2915     // only one of these contains the data
2916     // maSortedVector is manually sorted so we can optimise data movement
2917     mutable AutocorrWordSetType maSortedVector;
2918     mutable AutocorrWordHashType maHash; // key is 'Short'
2919
2920     void DeleteAndDestroyAll()
2921     {
2922         maHash.clear();
2923         maSortedVector.clear();
2924     }
2925 };
2926
2927 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2928
2929 SvxAutocorrWordList::~SvxAutocorrWordList()
2930 {
2931 }
2932
2933 void SvxAutocorrWordList::DeleteAndDestroyAll()
2934 {
2935     mpImpl->DeleteAndDestroyAll();
2936 }
2937
2938 // returns true if inserted
2939 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
2940 {
2941     if ( mpImpl->maSortedVector.empty() ) // use the hash
2942     {
2943         OUString aShort = aWord.GetShort();
2944         auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
2945         if (inserted)
2946             return &(it->second);
2947         return nullptr;
2948     }
2949     else
2950     {
2951         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
2952         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2953         if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
2954         {
2955             it = mpImpl->maSortedVector.insert(it, std::move(aWord));
2956             return &*it;
2957         }
2958         return nullptr;
2959     }
2960 }
2961
2962 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
2963 {
2964     (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
2965 }
2966
2967 bool SvxAutocorrWordList::empty() const
2968 {
2969     return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
2970 }
2971
2972 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
2973 {
2974
2975     if ( mpImpl->maSortedVector.empty() ) // use the hash
2976     {
2977         AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
2978         if( it != mpImpl->maHash.end() )
2979         {
2980             SvxAutocorrWord pMatch = std::move(it->second);
2981             mpImpl->maHash.erase (it);
2982             return pMatch;
2983         }
2984     }
2985     else
2986     {
2987         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
2988         if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
2989         {
2990             SvxAutocorrWord pMatch = std::move(*it);
2991             mpImpl->maSortedVector.erase (it);
2992             return pMatch;
2993         }
2994     }
2995     return std::optional<SvxAutocorrWord>();
2996 }
2997
2998 // return the sorted contents - defer sorting until we have to.
2999 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
3000 {
3001     // convert from hash to set permanently
3002     if ( mpImpl->maSortedVector.empty() )
3003     {
3004         std::vector<SvxAutocorrWord> tmp;
3005         tmp.reserve(mpImpl->maHash.size());
3006         for (auto & rPair : mpImpl->maHash)
3007             tmp.emplace_back(std::move(rPair.second));
3008         mpImpl->maHash.clear();
3009         // sort twice - this gets the list into mostly-sorted order, which
3010         // reduces the number of times we need to invoke the expensive ICU collate fn.
3011         std::sort(tmp.begin(), tmp.end(),
3012             [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
3013             {
3014                 return lhs.GetShort() < rhs.GetShort();
3015             });
3016         // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
3017         // stable_sort is twice as fast as sort in this situation because it does
3018         // fewer comparison operations.
3019         std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
3020         mpImpl->maSortedVector = std::move(tmp);
3021     }
3022     return mpImpl->maSortedVector;
3023 }
3024
3025 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
3026                                       std::u16string_view rTxt,
3027                                       sal_Int32 &rStt,
3028                                       sal_Int32 nEndPos) const
3029 {
3030     const OUString& rChk = pFnd->GetShort();
3031
3032     sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
3033     sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
3034     assert(nEndPos >= 0);
3035     size_t nSttWdPos = nEndPos;
3036
3037     // direct replacement of keywords surrounded by colons (for example, ":name:")
3038     bool bColonNameColon = static_cast<sal_Int32>(rTxt.size()) > nEndPos &&
3039         rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
3040     if ( nEndPos + (bColonNameColon ? 1 : 0) < rChk.getLength() - left_wildcard - right_wildcard )
3041         return nullptr;
3042
3043     bool bWasWordDelim = false;
3044     sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
3045     if (bColonNameColon)
3046         nCalcStt++;
3047     if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
3048           ( nCalcStt < rStt &&
3049             IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
3050     {
3051         TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
3052         OUString sWord( rTxt.substr(nCalcStt, rChk.getLength() - left_wildcard) );
3053         if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
3054         {
3055             rStt = nCalcStt;
3056             if (!left_wildcard)
3057             {
3058                 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
3059                 if (static_cast<sal_Int32>(rTxt.size()) > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
3060                     return nullptr;
3061                 return pFnd;
3062             }
3063             // get the first word delimiter position before the matching ".*word" pattern
3064             while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
3065                 ;
3066             if (bWasWordDelim) rStt++;
3067             OUString left_pattern( rTxt.substr(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard) );
3068             // avoid double spaces before simple "word" replacement
3069             left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().subView(1) : pFnd->GetLong();
3070             if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(OUString(rTxt.substr(rStt, nEndPos - rStt)), left_pattern) ) )
3071                 return pNew;
3072         }
3073     } else
3074     // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
3075     if ( right_wildcard )
3076     {
3077
3078         OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
3079         // Get the last word delimiter position
3080         bool not_suffix;
3081
3082         while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
3083             ;
3084         // search the first occurrence (with a left word delimitation, if needed)
3085         size_t nFndPos = std::u16string_view::npos;
3086         do {
3087             nFndPos = rTxt.find( sTmp, nFndPos + 1);
3088             if (nFndPos == std::u16string_view::npos)
3089                 break;
3090             not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
3091         } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
3092
3093         if ( nFndPos != std::u16string_view::npos )
3094         {
3095             sal_Int32 extra_repl = static_cast<sal_Int32>(nFndPos) + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
3096
3097             if ( left_wildcard )
3098             {
3099                 // get the first word delimiter position before the matching ".*word.*" pattern
3100                 while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
3101                     ;
3102                 if (bWasWordDelim) nFndPos++;
3103             }
3104             if (nEndPos + extra_repl <= static_cast<sal_Int32>(nFndPos))
3105             {
3106                 return nullptr;
3107             }
3108             // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3109             OUString aShort( rTxt.substr(nFndPos, nEndPos - nFndPos + extra_repl) );
3110
3111             OUString aLong;
3112             rStt = nFndPos;
3113             if ( !left_wildcard )
3114             {
3115                 sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
3116                 aLong = pFnd->GetLong() + (siz > 0 ? rTxt.substr(nFndPos + sTmp.getLength(), siz) : u"");
3117             } else {
3118                 OUStringBuffer buf;
3119                 do {
3120                     nSttWdPos = rTxt.find( sTmp, nFndPos);
3121                     if (nSttWdPos != std::u16string_view::npos)
3122                     {
3123                         sal_Int32 nTmp(nFndPos);
3124                         while (nTmp < static_cast<sal_Int32>(nSttWdPos) && !IsWordDelim(rTxt[nTmp]))
3125                             nTmp++;
3126                         if (nTmp < static_cast<sal_Int32>(nSttWdPos))
3127                             break; // word delimiter found
3128                         buf.append(rTxt.substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
3129                         nFndPos = nSttWdPos + sTmp.getLength();
3130                     }
3131                 } while (nSttWdPos != std::u16string_view::npos);
3132                 if (static_cast<sal_Int32>(nEndPos - nFndPos) > extra_repl)
3133                     buf.append(rTxt.substr(nFndPos, nEndPos - nFndPos));
3134                 aLong = buf.makeStringAndClear();
3135             }
3136             if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) )
3137             {
3138                 if ( (static_cast<sal_Int32>(rTxt.size()) > nEndPos && IsWordDelim(rTxt[nEndPos])) || static_cast<sal_Int32>(rTxt.size()) == nEndPos )
3139                     return pNew;
3140             }
3141         }
3142     }
3143     return nullptr;
3144 }
3145
3146 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt, sal_Int32& rStt,
3147                                                               sal_Int32 nEndPos) const
3148 {
3149     for (auto const& elem : mpImpl->maHash)
3150     {
3151         if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) )
3152             return pTmp;
3153     }
3154
3155     for (auto const& elem : mpImpl->maSortedVector)
3156     {
3157         if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) )
3158             return pTmp;
3159     }
3160     return nullptr;
3161 }
3162
3163 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */