editeng/source/misc/svxacorr.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <com/sun/star/io/XStream.hpp>
  21 #include <com/sun/star/lang/Locale.hpp>
  22 #include <tools/urlobj.hxx>
  23 #include <i18nlangtag/mslangid.hxx>
  24 #include <vcl/svapp.hxx>
  25 #include <vcl/settings.hxx>
  26 #include <sot/storinfo.hxx>
  27 #include <svl/fstathelper.hxx>
  28 #include <svtools/helpopt.hxx>
  29 #include <svl/urihelper.hxx>
  30 #include <unotools/charclass.hxx>
  31 #include <com/sun/star/i18n/UnicodeType.hpp>
  32 #include <unotools/collatorwrapper.hxx>
  33 #include <com/sun/star/i18n/CollatorOptions.hpp>
  34 #include <com/sun/star/i18n/UnicodeScript.hpp>
  35 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
  36 #include <unotools/localedatawrapper.hxx>
  37 #include <unotools/transliterationwrapper.hxx>
  38 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
  39 #include <com/sun/star/io/XActiveDataSource.hpp>
  40 #include <comphelper/processfactory.hxx>
  41 #include <comphelper/storagehelper.hxx>
  42 #include <comphelper/string.hxx>
  43 #include <editeng/editids.hrc>
  44 #include <sot/storage.hxx>
  45 #include <editeng/udlnitem.hxx>
  46 #include <editeng/wghtitem.hxx>
  47 #include <editeng/escapementitem.hxx>
  48 #include <editeng/svxacorr.hxx>
  49 #include <editeng/unolingu.hxx>
  50 #include "vcl/window.hxx"
  51 #include <helpid.hrc>
  52 #include <com/sun/star/xml/sax/InputSource.hpp>
  53 #include <com/sun/star/xml/sax/FastParser.hpp>
  54 #include <com/sun/star/xml/sax/FastToken.hpp>
  55 #include <com/sun/star/xml/sax/Writer.hpp>
  56 #include <com/sun/star/xml/sax/FastTokenHandler.hpp>
  57 #include <unotools/streamwrap.hxx>
  58 #include <SvXMLAutoCorrectImport.hxx>
  59 #include <SvXMLAutoCorrectExport.hxx>
  60 #include <SvXMLAutoCorrectTokenHandler.hxx>
  61 #include <ucbhelper/content.hxx>
  62 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
  63 #include <com/sun/star/ucb/TransferInfo.hpp>
  64 #include <com/sun/star/ucb/NameClash.hpp>
  65 #include <xmloff/xmltoken.hxx>
  66 #include <vcl/help.hxx>
  67 #include <set>
  68 #include <unordered_map>
  69
  70 using namespace ::com::sun::star::ucb;
  71 using namespace ::com::sun::star::uno;
  72 using namespace ::com::sun::star::xml::sax;
  73 using namespace ::com::sun::star;
  74 using namespace ::xmloff::token;
  75 using namespace ::utl;
  76
  77 static const int C_NONE             = 0x00;
  78 static const int C_FULL_STOP        = 0x01;
  79 static const int C_EXCLAMATION_MARK = 0x02;
  80 static const int C_QUESTION_MARK    = 0x04;
  81 static const sal_Unicode cNonBreakingSpace = 0xA0;
  82
  83 static const sal_Char pXMLImplWrdStt_ExcptLstStr[] = "WordExceptList.xml";
  84 static const sal_Char pXMLImplCplStt_ExcptLstStr[] = "SentenceExceptList.xml";
  85 static const sal_Char pXMLImplAutocorr_ListStr[]   = "DocumentList.xml";
  86
  87 static const sal_Char
  88     /* also at these beginnings - Brackets and all kinds of begin characters */
  89     sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
  90     /* also at these ends - Brackets and all kinds of begin characters */
  91     sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
  92
  93 // These characters are allowed in words: (for FnCptlSttSntnc)
  94 static const sal_Char sImplWordChars[] = "-'";
  95
  96 OUString EncryptBlockName_Imp(const OUString& rName);
  97
  98 TYPEINIT0(SvxAutoCorrect)
  99
 100 typedef SvxAutoCorrectLanguageLists* SvxAutoCorrectLanguageListsPtr;
 101
 102 static inline bool IsWordDelim( const sal_Unicode c )
 103 {
 104     return ' ' == c || '\t' == c || 0x0a == c ||
 105             cNonBreakingSpace == c || 0x2011 == c || 0x1 == c;
 106 }
 107
 108 static inline bool IsLowerLetter( sal_Int32 nCharType )
 109 {
 110     return CharClass::isLetterType( nCharType ) &&
 111             0 == ( ::com::sun::star::i18n::KCharacterType::UPPER & nCharType);
 112 }
 113
 114 static inline bool IsUpperLetter( sal_Int32 nCharType )
 115 {
 116     return CharClass::isLetterType( nCharType ) &&
 117             0 == ( ::com::sun::star::i18n::KCharacterType::LOWER & nCharType);
 118 }
 119
 120 bool lcl_IsUnsupportedUnicodeChar( CharClass& rCC, const OUString& rTxt,
 121                                    sal_Int32 nStt, sal_Int32 nEnd )
 122 {
 123     for( ; nStt < nEnd; ++nStt )
 124     {
 125         short nScript = rCC.getScript( rTxt, nStt );
 126         switch( nScript )
 127         {
 128             case ::com::sun::star::i18n::UnicodeScript_kCJKRadicalsSupplement:
 129             case ::com::sun::star::i18n::UnicodeScript_kHangulJamo:
 130             case ::com::sun::star::i18n::UnicodeScript_kCJKSymbolPunctuation:
 131             case ::com::sun::star::i18n::UnicodeScript_kHiragana:
 132             case ::com::sun::star::i18n::UnicodeScript_kKatakana:
 133             case ::com::sun::star::i18n::UnicodeScript_kHangulCompatibilityJamo:
 134             case ::com::sun::star::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
 135             case ::com::sun::star::i18n::UnicodeScript_kCJKCompatibility:
 136             case ::com::sun::star::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA:
 137             case ::com::sun::star::i18n::UnicodeScript_kCJKUnifiedIdeograph:
 138             case ::com::sun::star::i18n::UnicodeScript_kHangulSyllable:
 139             case ::com::sun::star::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
 140             case ::com::sun::star::i18n::UnicodeScript_kHalfwidthFullwidthForm:
 141                 return true;
 142             default: ; //do nothing
 143         }
 144     }
 145     return false;
 146 }
 147
 148 static bool lcl_IsSymbolChar( CharClass& rCC, const OUString& rTxt,
 149                                   sal_Int32 nStt, sal_Int32 nEnd )
 150 {
 151     for( ; nStt < nEnd; ++nStt )
 152     {
 153         if( ::com::sun::star::i18n::UnicodeType::PRIVATE_USE ==
 154                 rCC.getType( rTxt, nStt ))
 155             return true;
 156     }
 157     return false;
 158 }
 159
 160 static bool lcl_IsInAsciiArr( const sal_Char* pArr, const sal_Unicode c )
 161 {
 162     bool bRet = false;
 163     for( ; *pArr; ++pArr )
 164         if( *pArr == c )
 165         {
 166             bRet = true;
 167             break;
 168         }
 169     return bRet;
 170 }
 171
 172 SvxAutoCorrDoc::~SvxAutoCorrDoc()
 173 {
 174 }
 175
 176 // Called by the functions:
 177 //  - FnCptlSttWrd
 178 //  - FnCptlSttSntnc
 179 // after the exchange of characters. Then the words, if necessary, can be inserted
 180 // into the exception list.
 181 void SvxAutoCorrDoc::SaveCpltSttWord( sal_uLong, sal_Int32, const OUString&,
 182                                         sal_Unicode )
 183 {
 184 }
 185
 186 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32, bool ) const
 187 {
 188     return LANGUAGE_SYSTEM;
 189 }
 190
 191 static const LanguageTag& GetAppLang()
 192 {
 193     return Application::GetSettings().GetLanguageTag();
 194 }
 195 static LocaleDataWrapper& GetLocaleDataWrapper( sal_uInt16 nLang )
 196 {
 197     static LocaleDataWrapper aLclDtWrp( GetAppLang() );
 198     LanguageTag aLcl( nLang );
 199     const LanguageTag& rLcl = aLclDtWrp.getLoadedLanguageTag();
 200     if( aLcl != rLcl )
 201         aLclDtWrp.setLanguageTag( aLcl );
 202     return aLclDtWrp;
 203 }
 204 static TransliterationWrapper& GetIgnoreTranslWrapper()
 205 {
 206     static int bIsInit = 0;
 207     static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
 208                 ::com::sun::star::i18n::TransliterationModules_IGNORE_KANA |
 209                 ::com::sun::star::i18n::TransliterationModules_IGNORE_WIDTH );
 210     if( !bIsInit )
 211     {
 212         aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
 213         bIsInit = 1;
 214     }
 215     return aWrp;
 216 }
 217 static CollatorWrapper& GetCollatorWrapper()
 218 {
 219     static int bIsInit = 0;
 220     static CollatorWrapper aCollWrp( ::comphelper::getProcessComponentContext() );
 221     if( !bIsInit )
 222     {
 223         aCollWrp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
 224         bIsInit = 1;
 225     }
 226     return aCollWrp;
 227 }
 228
 229 static void lcl_ClearTable(boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>& rLangTable)
 230 {
 231     rLangTable.clear();
 232 }
 233
 234 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
 235 {
 236     return  cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
 237             cChar == ' '  || cChar == '\'' || cChar == '\"' ||
 238             cChar == '*'  || cChar == '_'  || cChar == '%' ||
 239             cChar == '.'  || cChar == ','  || cChar == ';' ||
 240             cChar == ':'  || cChar == '?' || cChar == '!' ||
 241             cChar == '/'  || cChar == '-';
 242 }
 243
 244 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
 245 {
 246     return cChar == '%' || cChar == ';' || cChar == ':'  || cChar == '?' || cChar == '!' ||
 247         cChar == '/' /*case for the urls exception*/;
 248 }
 249
 250 long SvxAutoCorrect::GetDefaultFlags()
 251 {
 252     long nRet = Autocorrect
 253                     | CptlSttSntnc
 254                     | CptlSttWrd
 255                     | ChgOrdinalNumber
 256                     | ChgToEnEmDash
 257                     | AddNonBrkSpace
 258                     | ChgWeightUnderl
 259                     | SetINetAttr
 260                     | ChgQuotes
 261                     | SaveWordCplSttLst
 262                     | SaveWordWrdSttLst
 263                     | CorrectCapsLock;
 264     LanguageType eLang = GetAppLang().getLanguageType();
 265     switch( eLang )
 266     {
 267     case LANGUAGE_ENGLISH:
 268     case LANGUAGE_ENGLISH_US:
 269     case LANGUAGE_ENGLISH_UK:
 270     case LANGUAGE_ENGLISH_AUS:
 271     case LANGUAGE_ENGLISH_CAN:
 272     case LANGUAGE_ENGLISH_NZ:
 273     case LANGUAGE_ENGLISH_EIRE:
 274     case LANGUAGE_ENGLISH_SAFRICA:
 275     case LANGUAGE_ENGLISH_JAMAICA:
 276     case LANGUAGE_ENGLISH_CARRIBEAN:
 277         nRet &= ~(ChgQuotes|ChgSglQuotes);
 278         break;
 279     }
 280     return nRet;
 281 }
 282
 283
 284 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile,
 285                                 const OUString& rUserAutocorrFile )
 286     : sShareAutoCorrFile( rShareAutocorrFile )
 287     , sUserAutoCorrFile( rUserAutocorrFile )
 288     , pLangTable( new boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists> )
 289     , pCharClass( 0 )
 290     , bRunNext( false )
 291     , eCharClassLang( LANGUAGE_DONTKNOW )
 292     , nFlags(SvxAutoCorrect::GetDefaultFlags())
 293     , cStartDQuote( 0 )
 294     , cEndDQuote( 0 )
 295     , cStartSQuote( 0 )
 296     , cEndSQuote( 0 )
 297     , cEmDash( 0x2014 )
 298     , cEnDash( 0x2013)
 299 {
 300 }
 301
 302 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
 303     : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
 304     , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
 305     , aSwFlags( rCpy.aSwFlags )
 306     , pLangTable( new boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists> )
 307     , pCharClass( 0 )
 308     , bRunNext( false )
 309     , eCharClassLang(rCpy.eCharClassLang)
 310     , nFlags( rCpy.nFlags & ~(ChgWordLstLoad|CplSttLstLoad|WrdSttLstLoad))
 311     , cStartDQuote( rCpy.cStartDQuote )
 312     , cEndDQuote( rCpy.cEndDQuote )
 313     , cStartSQuote( rCpy.cStartSQuote )
 314     , cEndSQuote( rCpy.cEndSQuote )
 315     , cEmDash( rCpy.cEmDash )
 316     , cEnDash( rCpy.cEnDash )
 317 {
 318 }
 319
 320
 321 SvxAutoCorrect::~SvxAutoCorrect()
 322 {
 323     lcl_ClearTable(*pLangTable);
 324     delete pLangTable;
 325     delete pCharClass;
 326 }
 327
 328 void SvxAutoCorrect::_GetCharClass( LanguageType eLang )
 329 {
 330     delete pCharClass;
 331     pCharClass = new CharClass( LanguageTag( eLang));
 332     eCharClassLang = eLang;
 333 }
 334
 335 void SvxAutoCorrect::SetAutoCorrFlag( long nFlag, bool bOn )
 336 {
 337     long nOld = nFlags;
 338     nFlags = bOn ? nFlags | nFlag
 339                  : nFlags & ~nFlag;
 340
 341     if( !bOn )
 342     {
 343         if( (nOld & CptlSttSntnc) != (nFlags & CptlSttSntnc) )
 344             nFlags &= ~CplSttLstLoad;
 345         if( (nOld & CptlSttWrd) != (nFlags & CptlSttWrd) )
 346             nFlags &= ~WrdSttLstLoad;
 347         if( (nOld & Autocorrect) != (nFlags & Autocorrect) )
 348             nFlags &= ~ChgWordLstLoad;
 349     }
 350 }
 351
 352
 353     // Two capital letters at the beginning of word?
 354 bool SvxAutoCorrect::FnCptlSttWrd( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 355                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
 356                                     LanguageType eLang )
 357 {
 358     bool bRet = false;
 359     CharClass& rCC = GetCharClass( eLang );
 360
 361     // Delete all non alphanumeric. Test the characters at the beginning/end of
 362     // the word ( recognizes: "(min.", "/min.", and so on.)
 363     for( ; nSttPos < nEndPos; ++nSttPos )
 364         if( rCC.isLetterNumeric( rTxt, nSttPos ))
 365             break;
 366     for( ; nSttPos < nEndPos; --nEndPos )
 367         if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
 368             break;
 369
 370     // Is the word a compounded word separated by delimiters?
 371     // If so, keep track of all delimiters so each constituent
 372     // word can be checked for two initial capital letters.
 373     std::deque<sal_Int32> aDelimiters;
 374
 375     // Always check for two capitals at the beginning
 376     // of the entire word, so start at nSttPos.
 377     aDelimiters.push_back(nSttPos);
 378
 379     // Find all compound word delimiters
 380     for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
 381     {
 382         if (IsAutoCorrectChar(rTxt[ n ]))
 383         {
 384             aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
 385         }
 386     }
 387
 388     // Decide where to put the terminating delimiter.
 389     // If the last AutoCorrect char was a newline, then the AutoCorrect
 390     // char will not be included in rTxt.
 391     // If the last AutoCorrect char was not a newline, then the AutoCorrect
 392     // character will be the last character in rTxt.
 393     if (!IsAutoCorrectChar(rTxt[nEndPos-1]))
 394         aDelimiters.push_back(nEndPos);
 395
 396     // Iterate through the word and all words that compose it.
 397     // Two capital letters at the beginning of word?
 398     for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
 399     {
 400         nSttPos = aDelimiters[nI];
 401         nEndPos = aDelimiters[nI + 1];
 402
 403         if( nSttPos+2 < nEndPos &&
 404             IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
 405             IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
 406             // Is the third character a lower case
 407             IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
 408             // Do not replace special attributes
 409             0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
 410         {
 411             // test if the word is in an exception list
 412             OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
 413             if( !FindInWrdSttExceptList(eLang, sWord) )
 414             {
 415                 // Check that word isn't correctly spelled before correcting:
 416                 ::com::sun::star::uno::Reference<
 417                     ::com::sun::star::linguistic2::XSpellChecker1 > xSpeller =
 418                     SvxGetSpellChecker();
 419                 if( xSpeller->hasLanguage(eLang) )
 420                 {
 421                     Sequence< ::com::sun::star::beans::PropertyValue > aEmptySeq;
 422                     if (!xSpeller->spell(sWord, eLang, aEmptySeq).is())
 423                     {
 424                         return false;
 425                     }
 426                 }
 427                 sal_Unicode cSave = rTxt[ nSttPos ];
 428                 OUString sChar( cSave );
 429                 sChar = rCC.lowercase( sChar );
 430                 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
 431                 {
 432                     if( SaveWordWrdSttLst & nFlags )
 433                         rDoc.SaveCpltSttWord( CptlSttWrd, nSttPos, sWord, cSave );
 434                     bRet = true;
 435                 }
 436             }
 437         }
 438     }
 439     return bRet;
 440 }
 441
 442
 443 bool SvxAutoCorrect::FnChgOrdinalNumber(
 444     SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 445     sal_Int32 nSttPos, sal_Int32 nEndPos,
 446     LanguageType eLang)
 447 {
 448     // 1st, 2nd, 3rd, 4 - 0th
 449     // 201th or 201st
 450     // 12th or 12nd
 451     bool bChg = false;
 452
 453     // In some languages ordinal suffixes should never be
 454     // changed to superscript. Let's break for those languages.
 455     switch (eLang)
 456     {
 457     case LANGUAGE_SWEDISH:
 458     case LANGUAGE_SWEDISH_FINLAND:
 459         break;
 460     default:
 461         CharClass& rCC = GetCharClass(eLang);
 462
 463         for (; nSttPos < nEndPos; ++nSttPos)
 464             if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos]))
 465                 break;
 466         for (; nSttPos < nEndPos; --nEndPos)
 467             if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
 468                 break;
 469
 470
 471         // Get the last number in the string to check
 472         sal_Int32 nNumEnd = nEndPos;
 473         bool foundEnd = false;
 474         bool validNumber = true;
 475         sal_Int32 i = nEndPos;
 476
 477         while (i > nSttPos)
 478         {
 479             i--;
 480             bool isDigit = rCC.isDigit(rTxt, i);
 481             if (foundEnd)
 482                 validNumber |= isDigit;
 483
 484             if (isDigit && !foundEnd)
 485             {
 486                 foundEnd = true;
 487                 nNumEnd = i;
 488             }
 489         }
 490
 491         if (foundEnd && validNumber) {
 492             sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32();
 493
 494             // Check if the characters after that number correspond to the ordinal suffix
 495             uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
 496                 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
 497
 498             uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
 499             for (sal_Int32 nSuff = 0; nSuff < aSuffixes.getLength(); nSuff++)
 500             {
 501                 OUString sSuffix(aSuffixes[nSuff]);
 502                 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1);
 503
 504                 if (sSuffix == sEnd)
 505                 {
 506                     // Check if the ordinal suffix has to be set as super script
 507                     if (rCC.isLetter(sSuffix))
 508                     {
 509                         // Do the change
 510                         SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
 511                             DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
 512                         rDoc.SetAttr(nNumEnd + 1, nEndPos,
 513                             SID_ATTR_CHAR_ESCAPEMENT,
 514                             aSvxEscapementItem);
 515                         bChg = true;
 516                     }
 517                 }
 518             }
 519         }
 520     }
 521     return bChg;
 522 }
 523
 524
 525 bool SvxAutoCorrect::FnChgToEnEmDash(
 526                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 527                                 sal_Int32 nSttPos, sal_Int32 nEndPos,
 528                                 LanguageType eLang )
 529 {
 530     bool bRet = false;
 531     CharClass& rCC = GetCharClass( eLang );
 532     if (eLang == LANGUAGE_SYSTEM)
 533         eLang = GetAppLang().getLanguageType();
 534     bool bAlwaysUseEmDash = (cEmDash && (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN));
 535
 536     // replace " - " or " --" with "enDash"
 537     if( cEnDash && 1 < nSttPos && 1 <= nEndPos - nSttPos )
 538     {
 539         sal_Unicode cCh = rTxt[ nSttPos ];
 540         if( '-' == cCh )
 541         {
 542             if( ' ' == rTxt[ nSttPos-1 ] &&
 543                 '-' == rTxt[ nSttPos+1 ])
 544             {
 545                 sal_Int32 n;
 546                 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr(
 547                             sImplSttSkipChars,(cCh = rTxt[ n ]));
 548                         ++n )
 549                     ;
 550
 551                 // found: " --[<AnySttChars>][A-z0-9]
 552                 if( rCC.isLetterNumeric( OUString(cCh) ) )
 553                 {
 554                     for( n = nSttPos-1; n && lcl_IsInAsciiArr(
 555                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
 556                         ;
 557
 558                     // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
 559                     if( rCC.isLetterNumeric( OUString(cCh) ))
 560                     {
 561                         rDoc.Delete( nSttPos, nSttPos + 2 );
 562                         rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
 563                         bRet = true;
 564                     }
 565                 }
 566             }
 567         }
 568         else if( 3 < nSttPos &&
 569                  ' ' == rTxt[ nSttPos-1 ] &&
 570                  '-' == rTxt[ nSttPos-2 ])
 571         {
 572             sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
 573             if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
 574             {
 575                 --nTmpPos;
 576                 ++nLen;
 577                 cCh = rTxt[ nTmpPos-1 ];
 578             }
 579             if( ' ' == cCh )
 580             {
 581                 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr(
 582                             sImplSttSkipChars,(cCh = rTxt[ n ]));
 583                         ++n )
 584                     ;
 585
 586                 // found: " - [<AnySttChars>][A-z0-9]
 587                 if( rCC.isLetterNumeric( OUString(cCh) ) )
 588                 {
 589                     cCh = ' ';
 590                     for( n = nTmpPos-1; n && lcl_IsInAsciiArr(
 591                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
 592                             ;
 593                     // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
 594                     if( rCC.isLetterNumeric( OUString(cCh) ))
 595                     {
 596                         rDoc.Delete( nTmpPos, nTmpPos + nLen );
 597                         rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
 598                         bRet = true;
 599                     }
 600                 }
 601             }
 602         }
 603     }
 604
 605     // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
 606     // [0-9]--[0-9] double dash always replaced with "enDash"
 607     // Finnish and Hungarian use enDash instead of emDash.
 608     bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
 609     if( ((cEmDash && !bEnDash) || (cEnDash && bEnDash)) && 4 <= nEndPos - nSttPos )
 610     {
 611         OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) );
 612         sal_Int32 nFndPos = sTmp.indexOf("--");
 613         if( nFndPos != -1 && nFndPos &&
 614             nFndPos + 2 < sTmp.getLength() &&
 615             ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) ||
 616               lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) &&
 617             ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) ||
 618             lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] )))
 619         {
 620             nSttPos = nSttPos + nFndPos;
 621             rDoc.Delete( nSttPos, nSttPos + 2 );
 622             rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) &&
 623                 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) );
 624             bRet = true;
 625         }
 626     }
 627     return bRet;
 628 }
 629
 630
 631 bool SvxAutoCorrect::FnAddNonBrkSpace(
 632                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 633                                 sal_Int32, sal_Int32 nEndPos,
 634                                 LanguageType eLang )
 635 {
 636     bool bRet = false;
 637
 638     CharClass& rCC = GetCharClass( eLang );
 639
 640     if ( rCC.getLanguageTag().getLanguage() == "fr" )
 641     {
 642         bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
 643         OUString allChars = ":;?!%";
 644         OUString chars( allChars );
 645         if ( bFrCA )
 646             chars = ":";
 647
 648         sal_Unicode cChar = rTxt[ nEndPos ];
 649         bool bHasSpace = chars.indexOf( cChar ) != -1;
 650         bool bIsSpecial = allChars.indexOf( cChar ) != -1;
 651         if ( bIsSpecial )
 652         {
 653             // Get the last word delimiter position
 654             sal_Int32 nSttWdPos = nEndPos;
 655             bool bWasWordDelim = false;
 656             while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
 657                 ;
 658
 659             //See if the text is the start of a protocol string, e.g. have text of
 660             //"http" see if it is the start of "http:" and if so leave it alone
 661             sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
 662             sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1;
 663             if (nIndex + nProtocolLen <= rTxt.getLength())
 664             {
 665                 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
 666                     return false;
 667             }
 668
 669             // Check the presence of "://" in the word
 670             sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 );
 671             if ( nStrPos == -1 && nEndPos > 0 )
 672             {
 673                 // Check the previous char
 674                 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
 675                 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
 676                 {
 677                     // Remove any previous normal space
 678                     sal_Int32 nPos = nEndPos - 1;
 679                     while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
 680                     {
 681                         if ( nPos == 0 ) break;
 682                         nPos--;
 683                         cPrevChar = rTxt[ nPos ];
 684                     }
 685
 686                     nPos++;
 687                     if ( nEndPos - nPos > 0 )
 688                         rDoc.Delete( nPos, nEndPos );
 689
 690                     // Add the non-breaking space at the end pos
 691                     if ( bHasSpace )
 692                         rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
 693                     bRunNext = true;
 694                     bRet = true;
 695                 }
 696                 else if ( chars.indexOf( cPrevChar ) != -1 )
 697                     bRunNext = true;
 698             }
 699         }
 700         else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) )
 701         {
 702             // Remove the hardspace right before to avoid formatting URLs
 703             sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
 704             sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
 705             if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
 706             {
 707                 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
 708                 bRet = true;
 709             }
 710         }
 711     }
 712
 713     return bRet;
 714 }
 715
 716
 717 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 718                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
 719                                     LanguageType eLang )
 720 {
 721     OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
 722                                                 GetCharClass( eLang ) ));
 723     bool bRet = !sURL.isEmpty();
 724     if( bRet )          // also Attribut setzen:
 725         rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
 726     return bRet;
 727 }
 728
 729
 730 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
 731                                         sal_Int32 , sal_Int32 nEndPos,
 732                                         LanguageType eLang )
 733 {
 734     // Condition:
 735     //  at the beginning:   _ or * after Space with the folloeing !Space
 736     //  at the end:         _ or * before Space (word delimiter?)
 737
 738     sal_Unicode c, cInsChar = rTxt[ nEndPos ];  // underline or bold
 739     if( ++nEndPos != rTxt.getLength() &&
 740         !IsWordDelim( rTxt[ nEndPos ] ) )
 741         return false;
 742
 743     --nEndPos;
 744
 745     bool bAlphaNum = false;
 746     sal_Int32 nPos = nEndPos;
 747     sal_Int32  nFndPos = -1;
 748     CharClass& rCC = GetCharClass( eLang );
 749
 750     while( nPos )
 751     {
 752         switch( c = rTxt[ --nPos ] )
 753         {
 754         case '_':
 755         case '*':
 756             if( c == cInsChar )
 757             {
 758                 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
 759                     IsWordDelim( rTxt[ nPos-1 ])) &&
 760                     !IsWordDelim( rTxt[ nPos+1 ]))
 761                         nFndPos = nPos;
 762                 else
 763                     // Condition is not satisfied, so cancel
 764                     nFndPos = -1;
 765                 nPos = 0;
 766             }
 767             break;
 768         default:
 769             if( !bAlphaNum )
 770                 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
 771         }
 772     }
 773
 774     if( -1 != nFndPos )
 775     {
 776         // first delete the Character at the end - this allows insertion
 777         // of an empty hint in SetAttr which would be removed by Delete
 778         // (fdo#62536, AUTOFMT in Writer)
 779         rDoc.Delete( nEndPos, nEndPos + 1 );
 780         rDoc.Delete( nFndPos, nFndPos + 1 );
 781         // Span the Attribute over the area
 782         // the end.
 783         if( '*' == cInsChar )           // Bold
 784         {
 785             SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
 786             rDoc.SetAttr( nFndPos, nEndPos - 1,
 787                             SID_ATTR_CHAR_WEIGHT,
 788                             aSvxWeightItem);
 789         }
 790         else                            // underline
 791         {
 792             SvxUnderlineItem aSvxUnderlineItem( UNDERLINE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
 793             rDoc.SetAttr( nFndPos, nEndPos - 1,
 794                             SID_ATTR_CHAR_UNDERLINE,
 795                             aSvxUnderlineItem);
 796         }
 797     }
 798
 799     return -1 != nFndPos;
 800 }
 801
 802
 803 bool SvxAutoCorrect::FnCptlSttSntnc( SvxAutoCorrDoc& rDoc,
 804                                     const OUString& rTxt, bool bNormalPos,
 805                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
 806                                     LanguageType eLang )
 807 {
 808
 809     if( rTxt.isEmpty() || nEndPos <= nSttPos )
 810         return false;
 811
 812     CharClass& rCC = GetCharClass( eLang );
 813     OUString aText( rTxt );
 814     const sal_Unicode *pStart = aText.getStr(),
 815                       *pStr = pStart + nEndPos,
 816                       *pWordStt = 0,
 817                       *pDelim = 0;
 818
 819     bool bAtStart = false;
 820     do {
 821         --pStr;
 822         if (rCC.isLetter(aText, pStr - pStart))
 823         {
 824             if( !pWordStt )
 825                 pDelim = pStr+1;
 826             pWordStt = pStr;
 827         }
 828         else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
 829         {
 830             if( lcl_IsInAsciiArr( sImplWordChars, *pStr ) &&
 831                 pWordStt - 1 == pStr &&
 832                 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
 833                 (pStart + 1) <= pStr &&
 834                 rCC.isLetter(aText, pStr-1 - pStart))
 835                 pWordStt = --pStr;
 836             else
 837                 break;
 838         }
 839     } while( ! ( bAtStart = (pStart == pStr) ) );
 840
 841     if (!pWordStt)
 842         return false;    // no character to be replaced
 843
 844
 845     if (rCC.isDigit(aText, pStr - pStart))
 846         return false; // already ok
 847
 848     if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
 849         return false; // already ok
 850
 851     //See if the text is the start of a protocol string, e.g. have text of
 852     //"http" see if it is the start of "http:" and if so leave it alone
 853     sal_Int32 nIndex = pWordStt - pStart;
 854     sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
 855     if (nIndex + nProtocolLen <= rTxt.getLength())
 856     {
 857         if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
 858             return false; // already ok
 859     }
 860
 861     if (0x1 == *pWordStt || 0x2 == *pWordStt)
 862         return false; // already ok
 863
 864     if( *pDelim && 2 >= pDelim - pWordStt &&
 865         lcl_IsInAsciiArr( ".-)>", *pDelim ) )
 866         return false;
 867
 868     if( !bAtStart ) // Still no beginning of a paragraph?
 869     {
 870         if ( IsWordDelim( *pStr ) )
 871         {
 872             while( ! ( bAtStart = (pStart == pStr--) ) && IsWordDelim( *pStr ) )
 873                 ;
 874         }
 875         // Asian full stop, full width full stop, full width exclamation mark
 876         // and full width question marks are treated as word delimiters
 877         else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
 878                   0xFF1F != *pStr )
 879             return false; // no valid separator -> no replacement
 880     }
 881
 882     if( bAtStart )  // at the beginning of a paragraph?
 883     {
 884         // Check out the previous paragraph, if it exists.
 885         // If so, then check to paragraph separator at the end.
 886         OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
 887         if (!pPrevPara)
 888         {
 889             // valid separator -> replace
 890             OUString sChar( *pWordStt );
 891             sChar = rCC.titlecase(sChar); //see fdo#56740
 892             return  !comphelper::string::equals(sChar, *pWordStt) &&
 893                     rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
 894         }
 895
 896         aText = *pPrevPara;
 897         bAtStart = false;
 898         pStart = aText.getStr();
 899         pStr = pStart + aText.getLength();
 900
 901         do {            // overwrite all blanks
 902             --pStr;
 903             if( !IsWordDelim( *pStr ))
 904                 break;
 905         } while( ! ( bAtStart = (pStart == pStr) ) );
 906
 907         if( bAtStart )
 908             return false;  // no valid separator -> no replacement
 909     }
 910
 911     // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
 912     // all three can happen, but not more than once!
 913     const sal_Unicode* pExceptStt = 0;
 914     if( !bAtStart )
 915     {
 916         bool bContinue = true;
 917         int nFlag = C_NONE;
 918         do {
 919             switch( *pStr )
 920             {
 921             // Western and Asian full stop
 922             case '.':
 923             case 0x3002 :
 924             case 0xFF0E :
 925                 {
 926                     if (pStr >= pStart + 2 && *(pStr-2) == '.')
 927                     {
 928                         //e.g. text "f.o.o. word": Now currently considering
 929                         //capitalizing word but second last character of
 930                         //previous word is a .  So probably last word is an
 931                         //anagram that ends in . and not truly the end of a
 932                         //previous sentence, so don't autocapitalize this word
 933                         return false;
 934                     }
 935                     if( nFlag & C_FULL_STOP )
 936                         return false;  // no valid separator -> no replacement
 937                     nFlag |= C_FULL_STOP;
 938                     pExceptStt = pStr;
 939                 }
 940                 break;
 941             case '!':
 942             case 0xFF01 :
 943                 {
 944                     if( nFlag & C_EXCLAMATION_MARK )
 945                         return false;   // no valid separator -> no replacement
 946                     nFlag |= C_EXCLAMATION_MARK;
 947                 }
 948                 break;
 949             case '?':
 950             case 0xFF1F :
 951                 {
 952                     if( nFlag & C_QUESTION_MARK)
 953                         return false;   // no valid separator -> no replacement
 954                     nFlag |= C_QUESTION_MARK;
 955                 }
 956                 break;
 957             default:
 958                 if( !nFlag )
 959                     return false;       // no valid separator -> no replacement
 960                 else
 961                     bContinue = false;
 962                 break;
 963             }
 964
 965             if( bContinue && pStr-- == pStart )
 966             {
 967                 return false;       // no valid separator -> no replacement
 968             }
 969         } while( bContinue );
 970         if( C_FULL_STOP != nFlag )
 971             pExceptStt = 0;
 972     }
 973
 974     if( 2 > ( pStr - pStart ) )
 975         return false;
 976
 977     if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
 978     {
 979         bool bValid = false, bAlphaFnd = false;
 980         const sal_Unicode* pTmpStr = pStr;
 981         while( !bValid )
 982         {
 983             if( rCC.isDigit( aText, pTmpStr - pStart ) )
 984             {
 985                 bValid = true;
 986                 pStr = pTmpStr - 1;
 987             }
 988             else if( rCC.isLetter( aText, pTmpStr - pStart ) )
 989             {
 990                 if( bAlphaFnd )
 991                 {
 992                     bValid = true;
 993                     pStr = pTmpStr;
 994                 }
 995                 else
 996                     bAlphaFnd = true;
 997             }
 998             else if( bAlphaFnd || IsWordDelim( *pTmpStr ) )
 999                 break;
1000
1001             if( pTmpStr == pStart )
1002                 break;
1003
1004             --pTmpStr;
1005         }
1006
1007         if( !bValid )
1008             return false;       // no valid separator -> no replacement
1009     }
1010
1011     bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1012
1013     // Search for the beginning of the word
1014     while( !IsWordDelim( *pStr ))
1015     {
1016         if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1017             bNumericOnly = false;
1018
1019         if( pStart == pStr )
1020             break;
1021
1022         --pStr;
1023     }
1024
1025     if( bNumericOnly )      // consists of only numbers, then not
1026         return false;
1027
1028     if( IsWordDelim( *pStr ))
1029         ++pStr;
1030
1031     OUString sWord;
1032
1033     // check on the basis of the exception list
1034     if( pExceptStt )
1035     {
1036         sWord = OUString(pStr, pExceptStt - pStr + 1);
1037         if( FindInCplSttExceptList(eLang, sWord) )
1038             return false;
1039
1040         // Delete all non alphanumeric. Test the characters at the
1041         // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1042         OUString sTmp( sWord );
1043         while( !sTmp.isEmpty() &&
1044                 !rCC.isLetterNumeric( sTmp, 0 ) )
1045             sTmp = sTmp.copy(1);
1046
1047         // Remove all non alphanumeric characters towards the end up until
1048         // the last one.
1049         sal_Int32 nLen = sTmp.getLength();
1050         while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1051             --nLen;
1052         if( nLen + 1 < sTmp.getLength() )
1053             sTmp = sTmp.copy( 0, nLen + 1 );
1054
1055         if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1056             FindInCplSttExceptList(eLang, sTmp))
1057             return false;
1058
1059         if(FindInCplSttExceptList(eLang, sWord, true))
1060             return false;
1061     }
1062
1063     // Ok, then replace
1064     sal_Unicode cSave = *pWordStt;
1065     nSttPos = pWordStt - rTxt.getStr();
1066     OUString sChar( cSave );
1067     sChar = rCC.titlecase(sChar); //see fdo#56740
1068     bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1069
1070     // Parahaps someone wants to have the word
1071     if( bRet && SaveWordCplSttLst & nFlags )
1072         rDoc.SaveCpltSttWord( CptlSttSntnc, nSttPos, sWord, cSave );
1073
1074     return bRet;
1075 }
1076
1077 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1078                                         sal_Int32 nSttPos, sal_Int32 nEndPos,
1079                                         LanguageType eLang )
1080 {
1081     if (nEndPos - nSttPos < 2)
1082         // string must be at least 2-character long.
1083         return false;
1084
1085     CharClass& rCC = GetCharClass( eLang );
1086
1087     // Check the first 2 letters.
1088     if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1089         return false;
1090
1091     if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1092         return false;
1093
1094     OUString aConverted;
1095     aConverted += rCC.uppercase(OUString(rTxt[nSttPos]));
1096     aConverted += rCC.lowercase(OUString(rTxt[nSttPos+1]));
1097
1098     for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1099     {
1100         if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1101             // A lowercase letter disqualifies the whole text.
1102             return false;
1103
1104         if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1105             // Another uppercase letter.  Convert it.
1106             aConverted += rCC.lowercase(OUString(rTxt[i]));
1107         else
1108             // This is not an alphabetic letter.  Leave it as-is.
1109             aConverted += OUString( rTxt[i] );
1110     }
1111
1112     // Replace the word.
1113     rDoc.Delete(nSttPos, nEndPos);
1114     rDoc.Insert(nSttPos, aConverted);
1115
1116     return true;
1117 }
1118
1119
1120 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1121                                         LanguageType eLang ) const
1122 {
1123     sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1124                                     ? GetStartDoubleQuote()
1125                                     : GetStartSingleQuote() )
1126                                    : ( '\"' == cInsChar
1127                                     ? GetEndDoubleQuote()
1128                                     : GetEndSingleQuote() );
1129     if( !cRet )
1130     {
1131         // then through the Language find the right character
1132         if( LANGUAGE_NONE == eLang )
1133             cRet = cInsChar;
1134         else
1135         {
1136             LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1137             OUString sRet( bSttQuote
1138                             ? ( '\"' == cInsChar
1139                                 ? rLcl.getDoubleQuotationMarkStart()
1140                                 : rLcl.getQuotationMarkStart() )
1141                             : ( '\"' == cInsChar
1142                                 ? rLcl.getDoubleQuotationMarkEnd()
1143                                 : rLcl.getQuotationMarkEnd() ));
1144             cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1145         }
1146     }
1147     return cRet;
1148 }
1149
1150 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1151                                     sal_Unicode cInsChar, bool bSttQuote,
1152                                     bool bIns )
1153 {
1154     LanguageType eLang = rDoc.GetLanguage( nInsPos, false );
1155     sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1156
1157     OUString sChg( cInsChar );
1158     if( bIns )
1159         rDoc.Insert( nInsPos, sChg );
1160     else
1161         rDoc.Replace( nInsPos, sChg );
1162
1163     sChg = OUString(cRet);
1164
1165     if( '\"' == cInsChar )
1166     {
1167         if( LANGUAGE_SYSTEM == eLang )
1168             eLang = GetAppLang().getLanguageType();
1169         switch( eLang )
1170         {
1171         case LANGUAGE_FRENCH:
1172         case LANGUAGE_FRENCH_BELGIAN:
1173         case LANGUAGE_FRENCH_CANADIAN:
1174         case LANGUAGE_FRENCH_SWISS:
1175         case LANGUAGE_FRENCH_LUXEMBOURG:
1176             {
1177                 OUString s( cNonBreakingSpace );
1178                     // UNICODE code for no break space
1179                 if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, s ))
1180                 {
1181                     if( !bSttQuote )
1182                         ++nInsPos;
1183                 }
1184             }
1185             break;
1186         }
1187     }
1188
1189     rDoc.Replace( nInsPos, sChg );
1190 }
1191
1192 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1193                                 sal_Unicode cInsChar, bool bSttQuote )
1194 {
1195     LanguageType eLang = rDoc.GetLanguage( nInsPos, false );
1196     sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1197
1198     OUString sRet = OUString(cRet);
1199
1200     if( '\"' == cInsChar )
1201     {
1202         if( LANGUAGE_SYSTEM == eLang )
1203             eLang = GetAppLang().getLanguageType();
1204         switch( eLang )
1205         {
1206         case LANGUAGE_FRENCH:
1207         case LANGUAGE_FRENCH_BELGIAN:
1208         case LANGUAGE_FRENCH_CANADIAN:
1209         case LANGUAGE_FRENCH_SWISS:
1210         case LANGUAGE_FRENCH_LUXEMBOURG:
1211             if( bSttQuote )
1212                 sRet += " ";
1213             else
1214                 sRet = " " + sRet;
1215             break;
1216         }
1217     }
1218     return sRet;
1219 }
1220
1221 sal_uLong
1222 SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1223                                     sal_Int32 nInsPos, sal_Unicode cChar,
1224                                     bool bInsert, vcl::Window* pFrameWin )
1225 {
1226     sal_uLong nRet = 0;
1227     bool bIsNextRun = bRunNext;
1228     bRunNext = false;  // if it was set, then it has to be turned off
1229
1230     do{                                 // only for middle check loop !!
1231         if( cChar )
1232         {
1233             // Prevent double space
1234             if( nInsPos && ' ' == cChar &&
1235                 IsAutoCorrFlag( IgnoreDoubleSpace ) &&
1236                 ' ' == rTxt[ nInsPos - 1 ])
1237             {
1238                 nRet = IgnoreDoubleSpace;
1239                 break;
1240             }
1241
1242             bool bSingle = '\'' == cChar;
1243             bool bIsReplaceQuote =
1244                         (IsAutoCorrFlag( ChgQuotes ) && ('\"' == cChar )) ||
1245                         (IsAutoCorrFlag( ChgSglQuotes ) && bSingle );
1246             if( bIsReplaceQuote )
1247             {
1248                 sal_Unicode cPrev;
1249                 bool bSttQuote = !nInsPos ||
1250                         IsWordDelim( ( cPrev = rTxt[ nInsPos-1 ])) ||
1251                         lcl_IsInAsciiArr( "([{", cPrev ) ||
1252                         ( cEmDash && cEmDash == cPrev ) ||
1253                         ( cEnDash && cEnDash == cPrev );
1254
1255                 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert );
1256                 nRet = bSingle ? ChgSglQuotes : ChgQuotes;
1257                 break;
1258             }
1259
1260             if( bInsert )
1261                 rDoc.Insert( nInsPos, OUString(cChar) );
1262             else
1263                 rDoc.Replace( nInsPos, OUString(cChar) );
1264
1265             // Hardspaces autocorrection
1266             if ( IsAutoCorrFlag( AddNonBrkSpace ) )
1267             {
1268                 if ( NeedsHardspaceAutocorr( cChar ) &&
1269                     FnAddNonBrkSpace( rDoc, rTxt, 0, nInsPos, rDoc.GetLanguage( nInsPos, false ) ) )
1270                 {
1271                     nRet = AddNonBrkSpace;
1272                 }
1273                 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1274                 {
1275                     // Remove the NBSP if it wasn't an autocorrection
1276                     if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1277                             cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1278                     {
1279                         // Look for the last HARD_SPACE
1280                         sal_Int32 nPos = nInsPos - 1;
1281                         bool bContinue = true;
1282                         while ( bContinue )
1283                         {
1284                             const sal_Unicode cTmpChar = rTxt[ nPos ];
1285                             if ( cTmpChar == cNonBreakingSpace )
1286                             {
1287                                 rDoc.Delete( nPos, nPos + 1 );
1288                                 nRet = AddNonBrkSpace;
1289                                 bContinue = false;
1290                             }
1291                             else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1292                                 bContinue = false;
1293                             nPos--;
1294                         }
1295                     }
1296                 }
1297             }
1298         }
1299
1300         if( !nInsPos )
1301             break;
1302
1303         sal_Int32 nPos = nInsPos - 1;
1304
1305         if( IsWordDelim( rTxt[ nPos ]))
1306             break;
1307
1308         // Set bold or underline automatically?
1309         if (('*' == cChar || '_' == cChar) && (nPos+1 < rTxt.getLength()))
1310         {
1311             if( IsAutoCorrFlag( ChgWeightUnderl ) &&
1312                 FnChgWeightUnderl( rDoc, rTxt, 0, nPos+1 ) )
1313                 nRet = ChgWeightUnderl;
1314             break;
1315         }
1316
1317         while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1318             ;
1319
1320         // Found a Paragraph-start or a Blank, search for the word shortcut in
1321         // auto.
1322         sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1323         if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1324             --nCapLttrPos;          // Absatz Anfang und kein Blank !
1325
1326         LanguageType eLang = rDoc.GetLanguage( nCapLttrPos, false );
1327         if( LANGUAGE_SYSTEM == eLang )
1328             eLang = MsLangId::getSystemLanguage();
1329         CharClass& rCC = GetCharClass( eLang );
1330
1331         // no symbol characters
1332         if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1333             break;
1334
1335         if( IsAutoCorrFlag( Autocorrect ) )
1336         {
1337             OUString aPara;
1338             OUString* pPara = IsAutoCorrFlag(CptlSttSntnc) ? &aPara : 0;
1339
1340             // since LibO 4.1, '-' is a word separator
1341             // fdo#67742 avoid "--" to be replaced by "–" if next is "-"
1342             if( rTxt.endsWith( "---" ) )
1343                     break;
1344             bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1345                                                     *this, pPara );
1346             if( !bChgWord )
1347             {
1348                 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1349                 while( nCapLttrPos1 < nInsPos &&
1350                         lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1351                         )
1352                         ++nCapLttrPos1;
1353                 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1354                         lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1355                         )
1356                         --nInsPos1;
1357
1358                 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1359                     nCapLttrPos1 < nInsPos1 &&
1360                     rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1361                 {
1362                     bChgWord = true;
1363                     nCapLttrPos = nCapLttrPos1;
1364                 }
1365             }
1366
1367             if( bChgWord )
1368             {
1369                 nRet = Autocorrect;
1370                 if( !aPara.isEmpty() )
1371                 {
1372                     sal_Int32 nEnd = nCapLttrPos;
1373                     while( nEnd < aPara.getLength() &&
1374                             !IsWordDelim( aPara[ nEnd ]))
1375                         ++nEnd;
1376
1377                     // Capital letter at beginning of paragraph?
1378                     if( IsAutoCorrFlag( CptlSttSntnc ) &&
1379                         FnCptlSttSntnc( rDoc, aPara, false,
1380                                                 nCapLttrPos, nEnd, eLang ) )
1381                         nRet |= CptlSttSntnc;
1382
1383                     if( IsAutoCorrFlag( ChgToEnEmDash ) &&
1384                         FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nEnd, eLang ) )
1385                         nRet |= ChgToEnEmDash;
1386                 }
1387                 break;
1388             }
1389         }
1390
1391         if( ( IsAutoCorrFlag( nRet = ChgOrdinalNumber ) &&
1392                 (nInsPos >= 2 ) &&       // fdo#69762 avoid autocorrect for 2e-3
1393                 ( '-' != cChar || 'E' != toupper(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1394                 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1395             ( IsAutoCorrFlag( nRet = SetINetAttr ) &&
1396                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1397                 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1398             ;
1399         else
1400         {
1401             bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1402             bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1403
1404             nRet = 0;
1405             if ( bLockKeyOn && IsAutoCorrFlag( CorrectCapsLock ) &&
1406                  FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1407             {
1408                 // Correct accidental use of cAPS LOCK key (do this only when
1409                 // the caps or shift lock key is pressed).  Turn off the caps
1410                 // lock afterwords.
1411                 nRet |= CorrectCapsLock;
1412                 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1413             }
1414
1415             // Capital letter at beginning of paragraph ?
1416             if( !bUnsupported &&
1417                 IsAutoCorrFlag( CptlSttSntnc ) &&
1418                 FnCptlSttSntnc( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang ) )
1419                 nRet |= CptlSttSntnc;
1420
1421             // Two capital letters at beginning of word ??
1422             if( !bUnsupported &&
1423                 IsAutoCorrFlag( CptlSttWrd ) &&
1424                 FnCptlSttWrd( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1425                 nRet |= CptlSttWrd;
1426
1427             if( IsAutoCorrFlag( ChgToEnEmDash ) &&
1428                 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1429                 nRet |= ChgToEnEmDash;
1430         }
1431
1432     } while( false );
1433
1434     return nRet;
1435 }
1436
1437 SvxAutoCorrectLanguageLists& SvxAutoCorrect::_GetLanguageList(
1438                                                         LanguageType eLang )
1439 {
1440     LanguageTag aLanguageTag( eLang);
1441     if (pLangTable->find(aLanguageTag) == pLangTable->end())
1442         (void)CreateLanguageFile(aLanguageTag, true);
1443     return *(pLangTable->find(aLanguageTag)->second);
1444 }
1445
1446 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1447 {
1448     boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(LanguageTag(eLang));
1449     if(nTmpVal != pLangTable->end() && nTmpVal->second)
1450         nTmpVal->second->SaveCplSttExceptList();
1451 #ifdef DBG_UTIL
1452     else
1453     {
1454         SAL_WARN("editeng", "Save an empty list? ");
1455     }
1456 #endif
1457 }
1458
1459 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang)
1460 {
1461     boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(LanguageTag(eLang));
1462     if(nTmpVal != pLangTable->end() && nTmpVal->second)
1463         nTmpVal->second->SaveWrdSttExceptList();
1464 #ifdef DBG_UTIL
1465     else
1466     {
1467         SAL_WARN("editeng", "Save an empty list? ");
1468     }
1469 #endif
1470 }
1471
1472 // Adds a single word. The list will immediately be written to the file!
1473 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1474                                         LanguageType eLang )
1475 {
1476     SvxAutoCorrectLanguageLists* pLists = 0;
1477     // either the right language is present or it will be this in the general list
1478     boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(LanguageTag(eLang));
1479     if(nTmpVal != pLangTable->end())
1480         pLists = nTmpVal->second;
1481     else
1482     {
1483         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1484         nTmpVal = pLangTable->find(aLangTagUndetermined);
1485         if(nTmpVal != pLangTable->end())
1486             pLists = nTmpVal->second;
1487         else if(CreateLanguageFile(aLangTagUndetermined, true))
1488             pLists = pLangTable->find(aLangTagUndetermined)->second;
1489     }
1490     OSL_ENSURE(pLists, "No auto correction data");
1491     return pLists && pLists->AddToCplSttExceptList(rNew);
1492 }
1493
1494 // Adds a single word. The list will immediately be written to the file!
1495 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew,
1496                                          LanguageType eLang )
1497 {
1498     SvxAutoCorrectLanguageLists* pLists = 0;
1499     //either the right language is present or it is set in the general list
1500     boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(LanguageTag(eLang));
1501     if(nTmpVal != pLangTable->end())
1502         pLists = nTmpVal->second;
1503     else
1504     {
1505         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1506         nTmpVal = pLangTable->find(aLangTagUndetermined);
1507         if(nTmpVal != pLangTable->end())
1508             pLists = nTmpVal->second;
1509         else if(CreateLanguageFile(aLangTagUndetermined, true))
1510             pLists = pLangTable->find(aLangTagUndetermined)->second;
1511     }
1512     OSL_ENSURE(pLists, "No auto correction file!");
1513     return pLists && pLists->AddToWrdSttExceptList(rNew);
1514 }
1515
1516 bool SvxAutoCorrect::GetPrevAutoCorrWord( SvxAutoCorrDoc& rDoc,
1517                                         const OUString& rTxt, sal_Int32 nPos,
1518                                         OUString& rWord ) const
1519 {
1520     if( !nPos )
1521         return false;
1522
1523     sal_Int32 nEnde = nPos;
1524
1525     // it must be followed by a blank or tab!
1526     if( ( nPos < rTxt.getLength() &&
1527         !IsWordDelim( rTxt[ nPos ])) ||
1528         IsWordDelim( rTxt[ --nPos ]))
1529         return false;
1530
1531     while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1532         ;
1533
1534     // Found a Paragraph-start or a Blank, search for the word shortcut in
1535     // auto.
1536     sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1537     if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1538         --nCapLttrPos;          // Beginning of pargraph and no Blank!
1539
1540     while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1541         if( ++nCapLttrPos >= nEnde )
1542             return false;
1543
1544     if( 3 > nEnde - nCapLttrPos )
1545         return false;
1546
1547     LanguageType eLang = rDoc.GetLanguage( nCapLttrPos, false );
1548     if( LANGUAGE_SYSTEM == eLang )
1549         eLang = MsLangId::getSystemLanguage();
1550
1551     SvxAutoCorrect* pThis = const_cast<SvxAutoCorrect*>(this);
1552     CharClass& rCC = pThis->GetCharClass( eLang );
1553
1554     if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnde ))
1555         return false;
1556
1557     rWord = rTxt.copy( nCapLttrPos, nEnde - nCapLttrPos );
1558     return true;
1559 }
1560
1561 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1562 {
1563     OSL_ENSURE(pLangTable->find(rLanguageTag) == pLangTable->end(), "Language already exists ");
1564
1565     OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true, false, false ));
1566     OUString sShareDirFile( sUserDirFile );
1567
1568     SvxAutoCorrectLanguageListsPtr pLists = 0;
1569
1570     tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1571
1572     std::map<LanguageTag, long>::iterator nFndPos = aLastFileTable.find(rLanguageTag);
1573     if(nFndPos != aLastFileTable.end() &&
1574        (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1575        nAktTime - nLastCheckTime < nMinTime)
1576     {
1577         // no need to test the file, because the last check is not older then
1578         // 2 minutes.
1579         if( bNewFile )
1580         {
1581             sShareDirFile = sUserDirFile;
1582             pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1583             LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1584             pLangTable->insert(aTmp, pLists);
1585             aLastFileTable.erase(nFndPos);
1586         }
1587     }
1588     else if(
1589              ( FStatHelper::IsDocument( sUserDirFile ) ||
1590                FStatHelper::IsDocument( sShareDirFile =
1591                    GetAutoCorrFileName( rLanguageTag, false, false, false ) ) ||
1592                FStatHelper::IsDocument( sShareDirFile =
1593                    GetAutoCorrFileName( rLanguageTag, false, false, true) )
1594              ) ||
1595         ( sShareDirFile = sUserDirFile, bNewFile )
1596           )
1597     {
1598         pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1599         LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1600         pLangTable->insert(aTmp, pLists);
1601         if (nFndPos != aLastFileTable.end())
1602             aLastFileTable.erase(nFndPos);
1603     }
1604     else if( !bNewFile )
1605     {
1606         aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1607     }
1608     return pLists != 0;
1609 }
1610
1611 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1612                                 LanguageType eLang )
1613 {
1614     LanguageTag aLanguageTag( eLang);
1615     boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(aLanguageTag);
1616     if(nTmpVal != pLangTable->end())
1617         return nTmpVal->second->PutText(rShort, rLong);
1618     if(CreateLanguageFile(aLanguageTag))
1619         return pLangTable->find(aLanguageTag)->second->PutText(rShort, rLong);
1620     return false;
1621 }
1622
1623 bool SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1624                                               std::vector<SvxAutocorrWord>& aDeleteEntries,
1625                                               LanguageType eLang )
1626 {
1627     LanguageTag aLanguageTag( eLang);
1628     boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(aLanguageTag);
1629     if(nTmpVal != pLangTable->end())
1630     {
1631         return nTmpVal->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1632     }
1633     else if(CreateLanguageFile( aLanguageTag ))
1634     {
1635         return pLangTable->find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1636     }
1637     return false;
1638
1639 }
1640
1641 //  - return the replacement text (only for SWG-Format, all other
1642 //    can be taken from the word list!)
1643 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1644 {
1645     return false;
1646 }
1647
1648 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1649 {
1650 }
1651
1652 // Text with attribution (only the SWG - SWG format!)
1653 bool SvxAutoCorrect::PutText( const com::sun::star::uno::Reference < com::sun::star::embed::XStorage >&,
1654                               const OUString&, const OUString&, SfxObjectShell&, OUString& )
1655 {
1656     return false;
1657 }
1658
1659 OUString EncryptBlockName_Imp(const OUString& rName)
1660 {
1661     OUStringBuffer aName;
1662     aName.append('#').append(rName);
1663     for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos)
1664     {
1665         if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos]))
1666             aName[nPos] &= 0x0f;
1667     }
1668     return aName.makeStringAndClear();
1669 }
1670
1671 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1672 static void GeneratePackageName ( const OUString& rShort, OUString& rPackageName )
1673 {
1674     OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
1675     OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
1676
1677     for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
1678     {
1679         switch (aBuf[nPos])
1680         {
1681             case '!':
1682             case '/':
1683             case ':':
1684             case '.':
1685             case '\\':
1686                 aBuf[nPos] = '_';
1687                 break;
1688             default:
1689                 break;
1690         }
1691     }
1692
1693     rPackageName = aBuf.makeStringAndClear();
1694 }
1695
1696 static const SvxAutocorrWord* lcl_SearchWordsInList(
1697                 SvxAutoCorrectLanguageListsPtr pList, const OUString& rTxt,
1698                 sal_Int32& rStt, sal_Int32 nEndPos)
1699 {
1700     const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
1701     return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
1702 }
1703
1704 // the search for the words in the substitution table
1705 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
1706                 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
1707                 SvxAutoCorrDoc&, LanguageTag& rLang )
1708 {
1709     const SvxAutocorrWord* pRet = 0;
1710     LanguageTag aLanguageTag( rLang);
1711     if( aLanguageTag.isSystemLocale() )
1712         aLanguageTag.reset( MsLangId::getSystemLanguage());
1713
1714     /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
1715      * list instead? */
1716
1717     // First search for eLang, then US-English -> English
1718     // and last in LANGUAGE_UNDETERMINED
1719     if(pLangTable->find(aLanguageTag) != pLangTable->end() || CreateLanguageFile(aLanguageTag, false))
1720     {
1721         //the language is available - so bring it on
1722         SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1723         pRet = lcl_SearchWordsInList( pList, rTxt, rStt, nEndPos );
1724         if( pRet )
1725         {
1726             rLang = aLanguageTag;
1727             return pRet;
1728         }
1729     }
1730
1731     // If it still could not be found here, then keep on searching
1732     LanguageType eLang = aLanguageTag.getLanguageType();
1733     LanguageType nTmpKey1 = eLang & 0x7ff; // the main language in many cases DE
1734     if(nTmpKey1 != eLang && (pLangTable->find(aLanguageTag.reset(nTmpKey1)) != pLangTable->end() ||
1735                 CreateLanguageFile(aLanguageTag, false)))
1736     {
1737         //the language is available - so bring it on
1738         SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1739         pRet = lcl_SearchWordsInList( pList, rTxt, rStt, nEndPos );
1740         if( pRet )
1741         {
1742             rLang = aLanguageTag;
1743             return pRet;
1744         }
1745     }
1746
1747     // otherwise for example EN
1748     aLanguageTag.reset(aLanguageTag.getLanguage());
1749     LanguageType nTmpKey2 = aLanguageTag.getLanguageType(false);
1750     if (nTmpKey2 != eLang && nTmpKey2 != LANGUAGE_UNDETERMINED &&
1751                 (pLangTable->find(aLanguageTag) != pLangTable->end() ||
1752                  CreateLanguageFile(aLanguageTag, false)))
1753     {
1754         //the language is available - so bring it on
1755         SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1756         pRet = lcl_SearchWordsInList( pList, rTxt, rStt, nEndPos );
1757         if( pRet )
1758         {
1759             rLang = aLanguageTag;
1760             return pRet;
1761         }
1762     }
1763
1764     if(pLangTable->find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != pLangTable->end() ||
1765             CreateLanguageFile(aLanguageTag, false))
1766     {
1767         //the language is available - so bring it on
1768         SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1769         pRet = lcl_SearchWordsInList( pList, rTxt, rStt, nEndPos );
1770         if( pRet )
1771         {
1772             rLang = aLanguageTag;
1773             return pRet;
1774         }
1775     }
1776     return 0;
1777 }
1778
1779 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang,
1780                                              const OUString& sWord )
1781 {
1782     LanguageTag aLanguageTag( eLang);
1783
1784     /* TODO-BCP47: again horrible uglyness */
1785
1786     // First search for eLang, then US-English -> English
1787     // and last in LANGUAGE_UNDETERMINED
1788     LanguageType nTmpKey1 = eLang & 0x7ff; // the main language in many cases DE
1789     OUString sTemp(sWord);
1790
1791     if(pLangTable->find(aLanguageTag) != pLangTable->end() || CreateLanguageFile(aLanguageTag, false))
1792     {
1793         //the language is available - so bring it on
1794         SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1795         OUString _sTemp(sWord);
1796         if(pList->GetWrdSttExceptList()->find(_sTemp) != pList->GetWrdSttExceptList()->end() )
1797             return true;
1798     }
1799
1800     // If it still could not be found here, then keep on searching
1801     if(nTmpKey1 != eLang && (pLangTable->find(aLanguageTag.reset(nTmpKey1)) != pLangTable->end() ||
1802                 CreateLanguageFile(aLanguageTag, false)))
1803     {
1804         //the language is available - so bring it on
1805         SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1806         if(pList->GetWrdSttExceptList()->find(sTemp) != pList->GetWrdSttExceptList()->end() )
1807             return true;
1808     }
1809
1810     // otherwise for example EN
1811     aLanguageTag.reset(aLanguageTag.getLanguage());
1812     LanguageType nTmpKey2 = aLanguageTag.getLanguageType(false);
1813     if (nTmpKey2 != eLang && nTmpKey2 != LANGUAGE_UNDETERMINED &&
1814                 (pLangTable->find(aLanguageTag) != pLangTable->end() ||
1815                  CreateLanguageFile(aLanguageTag, false)))
1816     {
1817         //the language is available - so bring it on
1818         SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1819         if(pList->GetWrdSttExceptList()->find(sTemp) != pList->GetWrdSttExceptList()->end() )
1820             return true;
1821     }
1822
1823     if(pLangTable->find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != pLangTable->end() ||
1824             CreateLanguageFile(aLanguageTag, false))
1825     {
1826         //the language is available - so bring it on
1827         SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1828         if(pList->GetWrdSttExceptList()->find(sTemp) != pList->GetWrdSttExceptList()->end() )
1829             return true;
1830     }
1831     return false;
1832 }
1833
1834 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
1835 {
1836     OUString sAbk('~');
1837     SvStringsISortDtor::const_iterator it = pList->find( sAbk );
1838     sal_uInt16 nPos = it - pList->begin();
1839     if( nPos < pList->size() )
1840     {
1841         OUString sLowerWord(sWord.toAsciiLowerCase());
1842         OUString pAbk;
1843         for( sal_uInt16 n = nPos;
1844                 n < pList->size() &&
1845                 '~' == ( pAbk = (*pList)[ n ])[ 0 ];
1846             ++n )
1847         {
1848             // ~ and ~. are not allowed!
1849             if( 2 < pAbk.getLength() && pAbk.getLength() - 1 <= sWord.getLength() )
1850             {
1851                 OUString sLowerAbk(pAbk.toAsciiLowerCase());
1852                 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
1853                 {
1854                     if( !--i )      // agrees
1855                         return true;
1856
1857                     if( sLowerAbk[i] != sLowerWord[--ii])
1858                         break;
1859                 }
1860             }
1861         }
1862     }
1863     OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
1864             "Wrongly sorted exception list?" );
1865     return false;
1866 }
1867
1868 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
1869                                 const OUString& sWord, bool bAbbreviation)
1870 {
1871     LanguageTag aLanguageTag( eLang);
1872
1873     /* TODO-BCP47: did I mention terrible horrible uglyness? */
1874
1875     // First search for eLang, then US-English -> English
1876     // and last in LANGUAGE_UNDETERMINED
1877     LanguageType nTmpKey1 = eLang & 0x7ff; // the main language in many cases DE
1878     OUString sTemp( sWord );
1879
1880     if(pLangTable->find(aLanguageTag) != pLangTable->end() || CreateLanguageFile(aLanguageTag, false))
1881     {
1882         //the language is available - so bring it on
1883         const SvStringsISortDtor* pList = pLangTable->find(aLanguageTag)->second->GetCplSttExceptList();
1884         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sTemp) != pList->end() )
1885             return true;
1886     }
1887
1888     // If it still could not be found here, then keep on searching
1889     if(nTmpKey1 != eLang && (pLangTable->find(aLanguageTag.reset(nTmpKey1)) != pLangTable->end() ||
1890                 CreateLanguageFile(aLanguageTag, false)))
1891     {
1892         const SvStringsISortDtor* pList = pLangTable->find(aLanguageTag)->second->GetCplSttExceptList();
1893         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sTemp) != pList->end() )
1894             return true;
1895     }
1896
1897     // otherwise for example EN
1898     aLanguageTag.reset(aLanguageTag.getLanguage());
1899     LanguageType nTmpKey2 = aLanguageTag.getLanguageType(false);
1900     if (nTmpKey2 != eLang && nTmpKey2 != LANGUAGE_UNDETERMINED &&
1901                 (pLangTable->find(aLanguageTag) != pLangTable->end() ||
1902                  CreateLanguageFile(aLanguageTag, false)))
1903     {
1904         //the language is available - so bring it on
1905         const SvStringsISortDtor* pList = pLangTable->find(aLanguageTag)->second->GetCplSttExceptList();
1906         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sTemp) != pList->end() )
1907             return true;
1908     }
1909
1910     if(pLangTable->find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != pLangTable->end() ||
1911             CreateLanguageFile(aLanguageTag, false))
1912     {
1913         //the language is available - so bring it on
1914         const SvStringsISortDtor* pList = pLangTable->find(aLanguageTag)->second->GetCplSttExceptList();
1915         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sTemp) != pList->end() )
1916             return true;
1917     }
1918     return false;
1919 }
1920
1921 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
1922                                             bool bNewFile, bool bTst, bool bUnlocalized ) const
1923 {
1924     OUString sRet, sExt( rLanguageTag.getBcp47() );
1925     if (bUnlocalized)
1926     {
1927         // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
1928         ::std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
1929         if (!vecFallBackStrings.empty())
1930            sExt = vecFallBackStrings[0];
1931     }
1932
1933     sExt = "_" + sExt + ".dat";
1934     if( bNewFile )
1935         ( sRet = sUserAutoCorrFile )  += sExt;
1936     else if( !bTst )
1937         ( sRet = sShareAutoCorrFile )  += sExt;
1938     else
1939     {
1940         // test first in the user directory - if not exist, then
1941         ( sRet = sUserAutoCorrFile ) += sExt;
1942         if( !FStatHelper::IsDocument( sRet ))
1943             ( sRet = sShareAutoCorrFile ) += sExt;
1944     }
1945     return sRet;
1946 }
1947
1948 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
1949                 SvxAutoCorrect& rParent,
1950                 const OUString& rShareAutoCorrectFile,
1951                 const OUString& rUserAutoCorrectFile)
1952 :   sShareAutoCorrFile( rShareAutoCorrectFile ),
1953     sUserAutoCorrFile( rUserAutoCorrectFile ),
1954     aModifiedDate( Date::EMPTY ),
1955     aModifiedTime( tools::Time::EMPTY ),
1956     aLastCheckTime( tools::Time::EMPTY ),
1957     pCplStt_ExcptLst( 0 ),
1958     pWrdStt_ExcptLst( 0 ),
1959     pAutocorr_List( 0 ),
1960     rAutoCorrect(rParent),
1961     nFlags(0)
1962 {
1963 }
1964
1965 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
1966 {
1967     delete pCplStt_ExcptLst;
1968     delete pWrdStt_ExcptLst;
1969     delete pAutocorr_List;
1970 }
1971
1972 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
1973 {
1974     // Access the file system only every 2 minutes to check the date stamp
1975     bool bRet = false;
1976
1977     tools::Time nMinTime( 0, 2 );
1978     tools::Time nAktTime( tools::Time::SYSTEM );
1979     if( aLastCheckTime > nAktTime ||                    // overflow?
1980         ( nAktTime -= aLastCheckTime ) > nMinTime )     // min time past
1981     {
1982         Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
1983         if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
1984                                             &aTstDate, &aTstTime ) &&
1985             ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
1986         {
1987             bRet = true;
1988             // then remove all the lists fast!
1989             if( CplSttLstLoad & nFlags && pCplStt_ExcptLst )
1990                 delete pCplStt_ExcptLst, pCplStt_ExcptLst = 0;
1991             if( WrdSttLstLoad & nFlags && pWrdStt_ExcptLst )
1992                 delete pWrdStt_ExcptLst, pWrdStt_ExcptLst = 0;
1993             if( ChgWordLstLoad & nFlags && pAutocorr_List )
1994                 delete pAutocorr_List, pAutocorr_List = 0;
1995             nFlags &= ~(CplSttLstLoad | WrdSttLstLoad | ChgWordLstLoad );
1996         }
1997         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
1998     }
1999     return bRet;
2000 }
2001
2002 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2003                                         SvStringsISortDtor*& rpLst,
2004                                         const sal_Char* pStrmName,
2005                                         tools::SvRef<SotStorage>& rStg)
2006 {
2007     if( rpLst )
2008         rpLst->clear();
2009     else
2010         rpLst = new SvStringsISortDtor;
2011
2012     {
2013         OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 );
2014         OUString sTmp( sStrmName );
2015
2016         if( rStg.Is() && rStg->IsStream( sStrmName ) )
2017         {
2018             tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sTmp,
2019                 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2020             if( SVSTREAM_OK != xStrm->GetError())
2021             {
2022                 xStrm.Clear();
2023                 rStg.Clear();
2024                 RemoveStream_Imp( sStrmName );
2025             }
2026             else
2027             {
2028                 uno::Reference< uno::XComponentContext > xContext =
2029                     comphelper::getProcessComponentContext();
2030
2031                 xml::sax::InputSource aParserInput;
2032                 aParserInput.sSystemId = sStrmName;
2033
2034                 xStrm->Seek( 0L );
2035                 xStrm->SetBufferSize( 8 * 1024 );
2036                 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2037
2038                 // get filter
2039                 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2040
2041                 // connect parser and filter
2042                 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2043                 uno::Reference< xml::sax::XFastTokenHandler > xTokenHandler = static_cast< xml::sax::XFastTokenHandler* >( new SvXMLAutoCorrectTokenHandler );
2044                 xParser->setFastDocumentHandler( xFilter );
2045                 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2046                 xParser->setTokenHandler( xTokenHandler );
2047
2048                 // parse
2049                 try
2050                 {
2051                     xParser->parseStream( aParserInput );
2052                 }
2053                 catch( const xml::sax::SAXParseException& )
2054                 {
2055                     // re throw ?
2056                 }
2057                 catch( const xml::sax::SAXException& )
2058                 {
2059                     // re throw ?
2060                 }
2061                 catch( const io::IOException& )
2062                 {
2063                     // re throw ?
2064                 }
2065             }
2066         }
2067
2068         // Set time stamp
2069         FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2070                                         &aModifiedDate, &aModifiedTime );
2071         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2072     }
2073
2074 }
2075
2076 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2077                             const SvStringsISortDtor& rLst,
2078                             const sal_Char* pStrmName,
2079                             tools::SvRef<SotStorage> &rStg,
2080                             bool bConvert )
2081 {
2082     if( rStg.Is() )
2083     {
2084         OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 );
2085         if( rLst.empty() )
2086         {
2087             rStg->Remove( sStrmName );
2088             rStg->Commit();
2089         }
2090         else
2091         {
2092             tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2093                     ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2094             if( xStrm.Is() )
2095             {
2096                 xStrm->SetSize( 0 );
2097                 xStrm->SetBufferSize( 8192 );
2098                 OUString aMime( "text/xml" );
2099                 uno::Any aAny;
2100                 aAny <<= aMime;
2101                 xStrm->SetProperty( OUString("MediaType"), aAny );
2102
2103
2104                 uno::Reference< uno::XComponentContext > xContext =
2105                     comphelper::getProcessComponentContext();
2106
2107                 uno::Reference < xml::sax::XWriter > xWriter  = xml::sax::Writer::create(xContext);
2108                 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2109                 xWriter->setOutputStream(xOut);
2110
2111                 uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2112                 SvXMLExceptionListExport aExp( xContext, rLst, sStrmName, xHandler );
2113
2114                 aExp.exportDoc( XML_BLOCK_LIST );
2115
2116                 xStrm->Commit();
2117                 if( xStrm->GetError() == SVSTREAM_OK )
2118                 {
2119                     xStrm.Clear();
2120                     if (!bConvert)
2121                     {
2122                         rStg->Commit();
2123                         if( SVSTREAM_OK != rStg->GetError() )
2124                         {
2125                             rStg->Remove( sStrmName );
2126                             rStg->Commit();
2127                         }
2128                     }
2129                 }
2130             }
2131         }
2132     }
2133 }
2134
2135 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2136 {
2137     if( pAutocorr_List )
2138         pAutocorr_List->DeleteAndDestroyAll();
2139     else
2140         pAutocorr_List = new SvxAutocorrWordList();
2141
2142     try
2143     {
2144         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2145         OUString aXMLWordListName( pXMLImplAutocorr_ListStr, strlen(pXMLImplAutocorr_ListStr), RTL_TEXTENCODING_MS_1252 );
2146         uno::Reference < io::XStream > xStrm = xStg->openStreamElement( aXMLWordListName, embed::ElementModes::READ );
2147         uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2148
2149         xml::sax::InputSource aParserInput;
2150         aParserInput.sSystemId = aXMLWordListName;
2151         aParserInput.aInputStream = xStrm->getInputStream();
2152
2153         // get parser
2154         uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2155         SAL_INFO("editeng", "AutoCorrect Import" );
2156         uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List, rAutoCorrect, xStg );
2157         uno::Reference< xml::sax::XFastTokenHandler > xTokenHandler = static_cast< xml::sax::XFastTokenHandler* >( new SvXMLAutoCorrectTokenHandler );
2158
2159         // connect parser and filter
2160         xParser->setFastDocumentHandler( xFilter );
2161         xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2162         xParser->setTokenHandler(xTokenHandler);
2163
2164         // parse
2165         xParser->parseStream( aParserInput );
2166     }
2167     catch ( const uno::Exception& )
2168     {
2169     }
2170
2171     // Set time stamp
2172     FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2173                                     &aModifiedDate, &aModifiedTime );
2174     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2175
2176     return pAutocorr_List;
2177 }
2178
2179 void SvxAutoCorrectLanguageLists::SetAutocorrWordList( SvxAutocorrWordList* pList )
2180 {
2181     if( pAutocorr_List && pList != pAutocorr_List )
2182         delete pAutocorr_List;
2183     pAutocorr_List = pList;
2184     if( !pAutocorr_List )
2185     {
2186         OSL_ENSURE( false, "No valid list" );
2187         pAutocorr_List = new SvxAutocorrWordList();
2188     }
2189     nFlags |= ChgWordLstLoad;
2190 }
2191
2192 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2193 {
2194     if( !( ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2195         SetAutocorrWordList( LoadAutocorrWordList() );
2196     return pAutocorr_List;
2197 }
2198
2199 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2200 {
2201     if( !( CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2202         SetCplSttExceptList( LoadCplSttExceptList() );
2203     return pCplStt_ExcptLst;
2204 }
2205
2206 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2207 {
2208     bool aRet = false;
2209     if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2210     {
2211         MakeUserStorage_Impl();
2212         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2213
2214         SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2215
2216         xStg = 0;
2217         // Set time stamp
2218         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2219                                             &aModifiedDate, &aModifiedTime );
2220         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2221         aRet = true;
2222     }
2223     return aRet;
2224 }
2225
2226 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew)
2227 {
2228     bool aRet = false;
2229     SvStringsISortDtor* pExceptList = LoadWrdSttExceptList();
2230     if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second )
2231     {
2232         MakeUserStorage_Impl();
2233         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2234
2235         SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2236
2237         xStg = 0;
2238         // Set time stamp
2239         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2240                                             &aModifiedDate, &aModifiedTime );
2241         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2242         aRet = true;
2243     }
2244     return aRet;
2245 }
2246
2247 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2248 {
2249     try
2250     {
2251         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2252         OUString sTemp ( pXMLImplCplStt_ExcptLstStr );
2253         if( xStg.Is() && xStg->IsContained( sTemp ) )
2254             LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2255     }
2256     catch (const css::ucb::ContentCreationException&)
2257     {
2258     }
2259     return pCplStt_ExcptLst;
2260 }
2261
2262 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2263 {
2264     MakeUserStorage_Impl();
2265     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2266
2267     SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2268
2269     xStg = 0;
2270
2271     // Set time stamp
2272     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2273                                             &aModifiedDate, &aModifiedTime );
2274     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2275 }
2276
2277 void SvxAutoCorrectLanguageLists::SetCplSttExceptList( SvStringsISortDtor* pList )
2278 {
2279     if( pCplStt_ExcptLst && pList != pCplStt_ExcptLst )
2280         delete pCplStt_ExcptLst;
2281
2282     pCplStt_ExcptLst = pList;
2283     if( !pCplStt_ExcptLst )
2284     {
2285         OSL_ENSURE( false, "No valid list" );
2286         pCplStt_ExcptLst = new SvStringsISortDtor;
2287     }
2288     nFlags |= CplSttLstLoad;
2289 }
2290
2291 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList()
2292 {
2293     try
2294     {
2295         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2296         OUString sTemp ( pXMLImplWrdStt_ExcptLstStr );
2297         if( xStg.Is() && xStg->IsContained( sTemp ) )
2298             LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2299     }
2300     catch (const css::ucb::ContentCreationException &e)
2301     {
2302         SAL_WARN("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList: Caught exception: " << e.Message);
2303     }
2304     return pWrdStt_ExcptLst;
2305 }
2306
2307 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList()
2308 {
2309     MakeUserStorage_Impl();
2310     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2311
2312     SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2313
2314     xStg = 0;
2315     // Set time stamp
2316     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2317                                             &aModifiedDate, &aModifiedTime );
2318     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2319 }
2320
2321 void SvxAutoCorrectLanguageLists::SetWrdSttExceptList( SvStringsISortDtor* pList )
2322 {
2323     if( pWrdStt_ExcptLst && pList != pWrdStt_ExcptLst )
2324         delete pWrdStt_ExcptLst;
2325     pWrdStt_ExcptLst = pList;
2326     if( !pWrdStt_ExcptLst )
2327     {
2328         OSL_ENSURE( false, "No valid list" );
2329         pWrdStt_ExcptLst = new SvStringsISortDtor;
2330     }
2331     nFlags |= WrdSttLstLoad;
2332 }
2333
2334 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList()
2335 {
2336     if( !( WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2337         SetWrdSttExceptList( LoadWrdSttExceptList() );
2338     return pWrdStt_ExcptLst;
2339 }
2340
2341 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2342 {
2343     if( sShareAutoCorrFile != sUserAutoCorrFile )
2344     {
2345         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2346         if( xStg.Is() && SVSTREAM_OK == xStg->GetError() &&
2347             xStg->IsStream( rName ) )
2348         {
2349             xStg->Remove( rName );
2350             xStg->Commit();
2351
2352             xStg = 0;
2353         }
2354     }
2355 }
2356
2357 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2358 {
2359     // The conversion needs to happen if the file is already in the user
2360     // directory and is in the old format. Additionally it needs to
2361     // happen when the file is being copied from share to user.
2362
2363     bool bError = false, bConvert = false, bCopy = false;
2364     INetURLObject aDest;
2365     INetURLObject aSource;
2366
2367     if (sUserAutoCorrFile != sShareAutoCorrFile )
2368     {
2369         aSource = INetURLObject ( sShareAutoCorrFile );
2370         aDest = INetURLObject ( sUserAutoCorrFile );
2371         if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2372         {
2373             aDest.SetExtension ( OUString("bak") );
2374             bConvert = true;
2375         }
2376         bCopy = true;
2377     }
2378     else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2379     {
2380         aSource = INetURLObject ( sUserAutoCorrFile );
2381         aDest = INetURLObject ( sUserAutoCorrFile );
2382         aDest.SetExtension ( OUString("bak") );
2383         bCopy = bConvert = true;
2384     }
2385     if (bCopy)
2386     {
2387         try
2388         {
2389             OUString sMain(aDest.GetMainURL( INetURLObject::DECODE_TO_IURI ));
2390             sal_Unicode cSlash = '/';
2391             sal_Int32 nSlashPos = sMain.lastIndexOf(cSlash);
2392             sMain = sMain.copy(0, nSlashPos);
2393             ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2394             Any aAny;
2395             TransferInfo aInfo;
2396             aInfo.NameClash = NameClash::OVERWRITE;
2397             aInfo.NewTitle  = aDest.GetName();
2398             aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DECODE_TO_IURI );
2399             aInfo.MoveData  = sal_False;
2400             aAny <<= aInfo;
2401             aNewContent.executeCommand( OUString (  "transfer"  ), aAny);
2402         }
2403         catch (...)
2404         {
2405             bError = true;
2406         }
2407     }
2408     if (bConvert && !bError)
2409     {
2410         tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DECODE_TO_IURI ), StreamMode::READ );
2411         tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
2412
2413         if( xSrcStg.Is() && xDstStg.Is() )
2414         {
2415             OUString sXMLWord     ( pXMLImplWrdStt_ExcptLstStr );
2416             OUString sXMLSentence ( pXMLImplCplStt_ExcptLstStr );
2417             SvStringsISortDtor  *pTmpWordList = NULL;
2418
2419             if (xSrcStg->IsContained( sXMLWord ) )
2420                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg );
2421
2422             if (pTmpWordList)
2423             {
2424                 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true );
2425                 pTmpWordList->clear();
2426                 pTmpWordList = NULL;
2427             }
2428
2429
2430             if (xSrcStg->IsContained( sXMLSentence ) )
2431                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2432
2433             if (pTmpWordList)
2434             {
2435                 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2436                 pTmpWordList->clear();
2437             }
2438
2439             GetAutocorrWordList();
2440             MakeBlocklist_Imp( *xDstStg );
2441             sShareAutoCorrFile = sUserAutoCorrFile;
2442             xDstStg = 0;
2443             try
2444             {
2445                 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DECODE_TO_IURI ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2446                 aContent.executeCommand ( OUString( "delete" ), makeAny ( true ) );
2447             }
2448             catch (...)
2449             {
2450             }
2451         }
2452     }
2453     else if( bCopy && !bError )
2454         sShareAutoCorrFile = sUserAutoCorrFile;
2455 }
2456
2457 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2458 {
2459     OUString sStrmName( pXMLImplAutocorr_ListStr, strlen(pXMLImplAutocorr_ListStr), RTL_TEXTENCODING_MS_1252 );
2460     bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2461     if( !bRemove )
2462     {
2463         tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( sStrmName,
2464                     ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2465         if( refList.Is() )
2466         {
2467             refList->SetSize( 0 );
2468             refList->SetBufferSize( 8192 );
2469             OUString aPropName( "MediaType" );
2470             OUString aMime( "text/xml" );
2471             uno::Any aAny;
2472             aAny <<= aMime;
2473             refList->SetProperty( aPropName, aAny );
2474
2475             uno::Reference< uno::XComponentContext > xContext =
2476                 comphelper::getProcessComponentContext();
2477
2478             uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2479             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2480             xWriter->setOutputStream(xOut);
2481
2482             uno::Reference<xml::sax::XDocumentHandler> xHandler(xWriter, uno::UNO_QUERY);
2483             SvXMLAutoCorrectExport aExp( xContext, pAutocorr_List, sStrmName, xHandler );
2484
2485             aExp.exportDoc( XML_BLOCK_LIST );
2486
2487             refList->Commit();
2488             bRet = SVSTREAM_OK == refList->GetError();
2489             if( bRet )
2490             {
2491                 refList.Clear();
2492                 rStg.Commit();
2493                 if( SVSTREAM_OK != rStg.GetError() )
2494                 {
2495                     bRemove = true;
2496                     bRet = false;
2497                 }
2498             }
2499         }
2500         else
2501             bRet = false;
2502     }
2503
2504     if( bRemove )
2505     {
2506         rStg.Remove( sStrmName );
2507         rStg.Commit();
2508     }
2509
2510     return bRet;
2511 }
2512
2513 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2514 {
2515     // First get the current list!
2516     GetAutocorrWordList();
2517
2518     MakeUserStorage_Impl();
2519     tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2520
2521     bool bRet = xStorage.Is() && SVSTREAM_OK == xStorage->GetError();
2522
2523     if( bRet )
2524     {
2525         for ( sal_uInt32 i=0; i < aDeleteEntries.size(); i++ )
2526         {
2527             SvxAutocorrWord aWordToDelete = aDeleteEntries[i];
2528             SvxAutocorrWord *pFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2529             if( pFoundEntry )
2530             {
2531                 if( !pFoundEntry->IsTextOnly() )
2532                 {
2533                     OUString aName( aWordToDelete.GetShort() );
2534                     if (xStorage->IsOLEStorage())
2535                         aName = EncryptBlockName_Imp(aName);
2536                     else
2537                         GeneratePackageName ( aWordToDelete.GetShort(), aName );
2538
2539                     if( xStorage->IsContained( aName ) )
2540                     {
2541                         xStorage->Remove( aName );
2542                         bRet = xStorage->Commit();
2543                     }
2544                 }
2545                 delete pFoundEntry;
2546             }
2547         }
2548
2549         for ( sal_uInt32 i=0; i < aNewEntries.size(); i++ )
2550         {
2551             SvxAutocorrWord *pWordToAdd = new SvxAutocorrWord( aNewEntries[i].GetShort(), aNewEntries[i].GetLong(), true );
2552             SvxAutocorrWord *pRemoved = pAutocorr_List->FindAndRemove( pWordToAdd );
2553             if( pRemoved )
2554             {
2555                 if( !pRemoved->IsTextOnly() )
2556                 {
2557                     // Still have to remove the Storage
2558                     OUString sStorageName( pWordToAdd->GetShort() );
2559                     if (xStorage->IsOLEStorage())
2560                         sStorageName = EncryptBlockName_Imp(sStorageName);
2561                     else
2562                         GeneratePackageName ( pWordToAdd->GetShort(), sStorageName);
2563
2564                     if( xStorage->IsContained( sStorageName ) )
2565                         xStorage->Remove( sStorageName );
2566                 }
2567                 delete pRemoved;
2568             }
2569             bRet = pAutocorr_List->Insert( pWordToAdd );
2570
2571             if ( !bRet )
2572             {
2573                 delete pWordToAdd;
2574                 break;
2575             }
2576         }
2577
2578         if ( bRet )
2579         {
2580             bRet = MakeBlocklist_Imp( *xStorage );
2581         }
2582     }
2583     return bRet;
2584 }
2585
2586 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2587 {
2588     // First get the current list!
2589     GetAutocorrWordList();
2590
2591     MakeUserStorage_Impl();
2592     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2593
2594     bool bRet = xStg.Is() && SVSTREAM_OK == xStg->GetError();
2595
2596     // Update the word list
2597     if( bRet )
2598     {
2599         SvxAutocorrWord* pNew = new SvxAutocorrWord( rShort, rLong, true );
2600         SvxAutocorrWord *pRemove = pAutocorr_List->FindAndRemove( pNew );
2601         if( pRemove )
2602         {
2603             if( !pRemove->IsTextOnly() )
2604             {
2605                 // Still have to remove the Storage
2606                 OUString sStgNm( rShort );
2607                 if (xStg->IsOLEStorage())
2608                     sStgNm = EncryptBlockName_Imp(sStgNm);
2609                 else
2610                     GeneratePackageName ( rShort, sStgNm);
2611
2612                 if( xStg->IsContained( sStgNm ) )
2613                     xStg->Remove( sStgNm );
2614             }
2615             delete pRemove;
2616         }
2617
2618         if( pAutocorr_List->Insert( pNew ) )
2619         {
2620             bRet = MakeBlocklist_Imp( *xStg );
2621             xStg = 0;
2622         }
2623         else
2624         {
2625             delete pNew;
2626             bRet = false;
2627         }
2628     }
2629     return bRet;
2630 }
2631
2632 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2633                                                SfxObjectShell& rShell )
2634 {
2635     // First get the current list!
2636     GetAutocorrWordList();
2637
2638     MakeUserStorage_Impl();
2639
2640     bool bRet = false;
2641     OUString sLong;
2642     try
2643     {
2644         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2645         bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2646         xStg = 0;
2647
2648         // Update the word list
2649         if( bRet )
2650         {
2651             SvxAutocorrWord* pNew = new SvxAutocorrWord( rShort, sLong, false );
2652             if( pAutocorr_List->Insert( pNew ) )
2653             {
2654                 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2655                 MakeBlocklist_Imp( *xStor );
2656             }
2657             else
2658                 delete pNew;
2659         }
2660     }
2661     catch ( const uno::Exception& )
2662     {
2663     }
2664
2665     return bRet;
2666 }
2667
2668 // Delete an entry
2669 bool SvxAutoCorrectLanguageLists::DeleteText( const OUString& rShort )
2670 {
2671     // First get the current list!
2672     GetAutocorrWordList();
2673
2674     MakeUserStorage_Impl();
2675
2676     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2677     bool bRet = xStg.Is() && SVSTREAM_OK == xStg->GetError();
2678     if( bRet )
2679     {
2680         SvxAutocorrWord aTmp( rShort, rShort );
2681         SvxAutocorrWord *pFnd = pAutocorr_List->FindAndRemove( &aTmp );
2682         if( pFnd )
2683         {
2684             if( !pFnd->IsTextOnly() )
2685             {
2686                 OUString aName( rShort );
2687                 if (xStg->IsOLEStorage())
2688                     aName = EncryptBlockName_Imp(aName);
2689                 else
2690                     GeneratePackageName ( rShort, aName );
2691                 if( xStg->IsContained( aName ) )
2692                 {
2693                     xStg->Remove( aName );
2694                     bRet = xStg->Commit();
2695                 }
2696
2697             }
2698             delete pFnd;
2699             MakeBlocklist_Imp( *xStg );
2700             xStg = 0;
2701         }
2702         else
2703             bRet = false;
2704     }
2705     return bRet;
2706 }
2707
2708 // Keep the list sorted ...
2709 struct CompareSvxAutocorrWordList
2710 {
2711     bool operator()( SvxAutocorrWord* const& lhs, SvxAutocorrWord* const& rhs ) const
2712     {
2713         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2714         return rCmp.compareString( lhs->GetShort(), rhs->GetShort() ) < 0;
2715     }
2716 };
2717
2718 namespace {
2719
2720 typedef std::set<SvxAutocorrWord*, CompareSvxAutocorrWordList> AutocorrWordSetType;
2721 typedef std::unordered_map<OUString, SvxAutocorrWord*, OUStringHash> AutocorrWordHashType;
2722
2723 }
2724
2725 struct SvxAutocorrWordList::Impl
2726 {
2727
2728     // only one of these contains the data
2729     mutable AutocorrWordSetType maSet;
2730     mutable AutocorrWordHashType maHash; // key is 'Short'
2731
2732     void DeleteAndDestroyAll()
2733     {
2734         for (AutocorrWordHashType::const_iterator it = maHash.begin(); it != maHash.end(); ++it)
2735             delete it->second;
2736         maHash.clear();
2737
2738         for (AutocorrWordSetType::const_iterator it2 = maSet.begin(); it2 != maSet.end(); ++it2)
2739             delete *it2;
2740         maSet.clear();
2741     }
2742 };
2743
2744 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2745
2746 SvxAutocorrWordList::~SvxAutocorrWordList()
2747 {
2748     mpImpl->DeleteAndDestroyAll();
2749     delete mpImpl;
2750 }
2751
2752 void SvxAutocorrWordList::DeleteAndDestroyAll()
2753 {
2754     mpImpl->DeleteAndDestroyAll();
2755 }
2756
2757 // returns true if inserted
2758 bool SvxAutocorrWordList::Insert(SvxAutocorrWord *pWord) const
2759 {
2760     if ( mpImpl->maSet.empty() ) // use the hash
2761     {
2762         OUString aShort( pWord->GetShort() );
2763         return mpImpl->maHash.insert( std::pair<OUString, SvxAutocorrWord *>( aShort, pWord ) ).second;
2764     }
2765     else
2766         return mpImpl->maSet.insert( pWord ).second;
2767 }
2768
2769 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
2770 {
2771     SvxAutocorrWord* pNew = new SvxAutocorrWord( sWrong, sRight, bOnlyTxt );
2772     if( !Insert( pNew ) )
2773         delete pNew;
2774 }
2775
2776 bool SvxAutocorrWordList::empty() const
2777 {
2778     return mpImpl->maHash.empty() && mpImpl->maSet.empty();
2779 }
2780
2781 SvxAutocorrWord *SvxAutocorrWordList::FindAndRemove(SvxAutocorrWord *pWord)
2782 {
2783     SvxAutocorrWord *pMatch = NULL;
2784
2785     if ( mpImpl->maSet.empty() ) // use the hash
2786     {
2787         AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
2788         if( it != mpImpl->maHash.end() )
2789         {
2790             pMatch = it->second;
2791             mpImpl->maHash.erase (it);
2792         }
2793     }
2794     else
2795     {
2796         AutocorrWordSetType::iterator it = mpImpl->maSet.find( pWord );
2797         if( it != mpImpl->maSet.end() )
2798         {
2799             pMatch = *it;
2800             mpImpl->maSet.erase (it);
2801         }
2802     }
2803     return pMatch;
2804 }
2805
2806 // return the sorted contents - defer sorting until we have to.
2807 SvxAutocorrWordList::Content SvxAutocorrWordList::getSortedContent() const
2808 {
2809     Content aContent;
2810
2811     // convert from hash to set permanantly
2812     if ( mpImpl->maSet.empty() )
2813     {
2814         // This beasty has some O(N log(N)) in a terribly slow ICU collate fn.
2815         for (AutocorrWordHashType::const_iterator it = mpImpl->maHash.begin(); it != mpImpl->maHash.end(); ++it)
2816             mpImpl->maSet.insert( it->second );
2817         mpImpl->maHash.clear();
2818     }
2819     for (AutocorrWordSetType::const_iterator it = mpImpl->maSet.begin(); it != mpImpl->maSet.end(); ++it)
2820         aContent.push_back( *it );
2821
2822     return aContent;
2823 }
2824
2825 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
2826                                       const OUString &rTxt,
2827                                       sal_Int32 &rStt,
2828                                       sal_Int32 nEndPos) const
2829 {
2830     const OUString& rChk = pFnd->GetShort();
2831
2832     sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
2833     sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
2834     sal_Int32 nSttWdPos = nEndPos;
2835
2836     // direct replacement of keywords surrounded by colons (for example, ":name:")
2837     bool bColonNameColon = rTxt.getLength() > nEndPos &&
2838         rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
2839     if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard )
2840     {
2841
2842         bool bWasWordDelim = false;
2843         sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
2844         if (bColonNameColon)
2845             nCalcStt++;
2846         if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
2847               ( nCalcStt < rStt &&
2848                 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
2849         {
2850             TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
2851             OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard);
2852             if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
2853             {
2854                 rStt = nCalcStt;
2855                 if (!left_wildcard)
2856                 {
2857                     // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
2858                     if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
2859                         return NULL;
2860                     return pFnd;
2861                 }
2862                 // get the first word delimiter position before the matching ".*word" pattern
2863                 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
2864                     ;
2865                 if (bWasWordDelim) rStt++;
2866                 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard);
2867                 // avoid double spaces before simple "word" replacement
2868                 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong();
2869                 SvxAutocorrWord* pNew = new SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern);
2870                 if( Insert( pNew ) ) return pNew; else delete pNew;
2871             }
2872         } else
2873         // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
2874         if ( right_wildcard )
2875         {
2876
2877             OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
2878             // Get the last word delimiter position
2879             bool not_suffix;
2880
2881             while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
2882                 ;
2883             // search the first occurrence (with a left word delimitation, if needed)
2884             sal_Int32 nFndPos = -1;
2885             do {
2886                 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1);
2887                 not_suffix = (bWasWordDelim && (nSttWdPos >= nFndPos + sTmp.getLength()));
2888             } while ( nFndPos != -1 && (!(left_wildcard || (!left_wildcard && (!nFndPos || IsWordDelim( rTxt[ nFndPos - 1 ])))) || not_suffix));
2889
2890             if ( nFndPos != -1 )
2891             {
2892                 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
2893
2894                 if ( left_wildcard )
2895                 {
2896                     // get the first word delimiter position before the matching ".*word.*" pattern
2897                     while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
2898                         ;
2899                     if (bWasWordDelim) nFndPos++;
2900                 }
2901                 if (nEndPos + extra_repl <= nFndPos)
2902                 {
2903                     return 0;
2904                 }
2905                 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
2906                 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl);
2907
2908                 OUString aLong;
2909                 rStt = nFndPos;
2910                 if ( !left_wildcard )
2911                 {
2912                     sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
2913                     aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : "");
2914                 } else {
2915                     OUStringBuffer buf;
2916                     do {
2917                         nSttWdPos = rTxt.indexOf( sTmp, nFndPos);
2918                         if (nSttWdPos != -1)
2919                         {
2920                             buf.append(rTxt.copy(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
2921                             nFndPos = nSttWdPos + sTmp.getLength();
2922                         }
2923                     } while (nSttWdPos != -1);
2924                     if (nEndPos - nFndPos > extra_repl) buf.append(rTxt.copy(nFndPos, nEndPos - nFndPos));
2925                     aLong = buf.makeStringAndClear();
2926                 }
2927                 SvxAutocorrWord* pNew = new SvxAutocorrWord(aShort, aLong);
2928                 if ( Insert( pNew ) )
2929                 {
2930                     if ( IsWordDelim(rTxt[nEndPos]) ) return pNew;
2931                 } else delete pNew;
2932             }
2933         }
2934     }
2935     return NULL;
2936 }
2937
2938 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt,
2939                                                               sal_Int32 nEndPos) const
2940 {
2941     for (AutocorrWordHashType::const_iterator it = mpImpl->maHash.begin(); it != mpImpl->maHash.end(); ++it)
2942     {
2943         if( const SvxAutocorrWord *aTmp = WordMatches( it->second, rTxt, rStt, nEndPos ) )
2944             return aTmp;
2945     }
2946
2947     for (AutocorrWordSetType::const_iterator it2 = mpImpl->maSet.begin(); it2 != mpImpl->maSet.end(); ++it2)
2948     {
2949         if( const SvxAutocorrWord *aTmp = WordMatches( *it2, rTxt, rStt, nEndPos ) )
2950             return aTmp;
2951     }
2952     return 0;
2953 }
2954
2955 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */