nss: upgrade to release 3.73
[LibreOffice.git] / editeng / source / misc / svxacorr.cxx
blobe6b951e7b18706b18a934ec93829bc5116d8e2f1
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <memory>
21 #include <string_view>
22 #include <sal/config.h>
24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
25 #include <com/sun/star/embed/XStorage.hpp>
26 #include <com/sun/star/io/IOException.hpp>
27 #include <com/sun/star/io/XStream.hpp>
28 #include <tools/urlobj.hxx>
29 #include <i18nlangtag/mslangid.hxx>
30 #include <i18nutil/transliteration.hxx>
31 #include <sal/log.hxx>
32 #include <osl/diagnose.h>
33 #include <vcl/svapp.hxx>
34 #include <vcl/settings.hxx>
35 #include <svl/fstathelper.hxx>
36 #include <svl/urihelper.hxx>
37 #include <unotools/charclass.hxx>
38 #include <com/sun/star/i18n/UnicodeType.hpp>
39 #include <unotools/collatorwrapper.hxx>
40 #include <com/sun/star/i18n/UnicodeScript.hpp>
41 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
42 #include <unotools/localedatawrapper.hxx>
43 #include <unotools/transliterationwrapper.hxx>
44 #include <comphelper/processfactory.hxx>
45 #include <comphelper/storagehelper.hxx>
46 #include <comphelper/string.hxx>
47 #include <editeng/editids.hrc>
48 #include <sot/storage.hxx>
49 #include <editeng/udlnitem.hxx>
50 #include <editeng/wghtitem.hxx>
51 #include <editeng/postitem.hxx>
52 #include <editeng/crossedoutitem.hxx>
53 #include <editeng/escapementitem.hxx>
54 #include <editeng/svxacorr.hxx>
55 #include <editeng/unolingu.hxx>
56 #include <vcl/window.hxx>
57 #include <com/sun/star/xml/sax/InputSource.hpp>
58 #include <com/sun/star/xml/sax/FastParser.hpp>
59 #include <com/sun/star/xml/sax/Writer.hpp>
60 #include <com/sun/star/xml/sax/SAXParseException.hpp>
61 #include <unotools/streamwrap.hxx>
62 #include "SvXMLAutoCorrectImport.hxx"
63 #include "SvXMLAutoCorrectExport.hxx"
64 #include "SvXMLAutoCorrectTokenHandler.hxx"
65 #include <ucbhelper/content.hxx>
66 #include <com/sun/star/ucb/ContentCreationException.hpp>
67 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
68 #include <com/sun/star/ucb/TransferInfo.hpp>
69 #include <com/sun/star/ucb/NameClash.hpp>
70 #include <tools/diagnose_ex.h>
71 #include <xmloff/xmltoken.hxx>
72 #include <unordered_map>
73 #include <rtl/character.hxx>
75 using namespace ::com::sun::star::ucb;
76 using namespace ::com::sun::star::uno;
77 using namespace ::com::sun::star::xml::sax;
78 using namespace ::com::sun::star;
79 using namespace ::xmloff::token;
80 using namespace ::utl;
82 namespace {
84 enum class Flags {
85 NONE = 0x00,
86 FullStop = 0x01,
87 ExclamationMark = 0x02,
88 QuestionMark = 0x04,
93 namespace o3tl {
94 template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
96 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space
98 const char pXMLImplWrdStt_ExcptLstStr[] = "WordExceptList.xml";
99 const char pXMLImplCplStt_ExcptLstStr[] = "SentenceExceptList.xml";
100 const char pXMLImplAutocorr_ListStr[] = "DocumentList.xml";
102 const char
103 /* also at these beginnings - Brackets and all kinds of begin characters */
104 sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
105 /* also at these ends - Brackets and all kinds of begin characters */
106 sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
108 static OUString EncryptBlockName_Imp(const OUString& rName);
110 static bool NonFieldWordDelim( const sal_Unicode c )
112 return ' ' == c || '\t' == c || 0x0a == c ||
113 cNonBreakingSpace == c || 0x2011 == c;
116 static bool IsWordDelim( const sal_Unicode c )
118 return c == 0x1 || NonFieldWordDelim(c);
122 static bool IsLowerLetter( sal_Int32 nCharType )
124 return CharClass::isLetterType( nCharType ) &&
125 ( css::i18n::KCharacterType::LOWER & nCharType);
128 static bool IsUpperLetter( sal_Int32 nCharType )
130 return CharClass::isLetterType( nCharType ) &&
131 ( css::i18n::KCharacterType::UPPER & nCharType);
134 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
135 sal_Int32 nStt, sal_Int32 nEnd )
137 for( ; nStt < nEnd; ++nStt )
139 css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
140 switch( nScript )
142 case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
143 case css::i18n::UnicodeScript_kHangulJamo:
144 case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
145 case css::i18n::UnicodeScript_kHiragana:
146 case css::i18n::UnicodeScript_kKatakana:
147 case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
148 case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
149 case css::i18n::UnicodeScript_kCJKCompatibility:
150 case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA:
151 case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
152 case css::i18n::UnicodeScript_kHangulSyllable:
153 case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
154 case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
155 return true;
156 default: ; //do nothing
159 return false;
162 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
163 sal_Int32 nStt, sal_Int32 nEnd )
165 for( ; nStt < nEnd; ++nStt )
167 if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
168 return true;
170 return false;
173 static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c )
175 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
176 if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) )
177 return true;
179 bool bRet = false;
180 for( ; *pArr; ++pArr )
181 if( *pArr == c )
183 bRet = true;
184 break;
186 return bRet;
189 SvxAutoCorrDoc::~SvxAutoCorrDoc()
193 // Called by the functions:
194 // - FnCapitalStartWord
195 // - FnCapitalStartSentence
196 // after the exchange of characters. Then the words, if necessary, can be inserted
197 // into the exception list.
198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
199 sal_Unicode )
203 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
205 return LANGUAGE_SYSTEM;
208 static const LanguageTag& GetAppLang()
210 return Application::GetSettings().GetLanguageTag();
213 /// Never use an unresolved LANGUAGE_SYSTEM.
214 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
216 LanguageType eLang = rDoc.GetLanguage( nPos );
217 if (eLang == LANGUAGE_SYSTEM)
218 eLang = GetAppLang().getLanguageType(); // the current work locale
219 return eLang;
222 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
224 static LocaleDataWrapper aLclDtWrp( GetAppLang() );
225 LanguageTag aLcl( nLang );
226 const LanguageTag& rLcl = aLclDtWrp.getLoadedLanguageTag();
227 if( aLcl != rLcl )
228 aLclDtWrp.setLanguageTag( aLcl );
229 return aLclDtWrp;
231 static TransliterationWrapper& GetIgnoreTranslWrapper()
233 static int bIsInit = 0;
234 static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
235 TransliterationFlags::IGNORE_KANA |
236 TransliterationFlags::IGNORE_WIDTH );
237 if( !bIsInit )
239 aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
240 bIsInit = 1;
242 return aWrp;
244 static CollatorWrapper& GetCollatorWrapper()
246 static CollatorWrapper aCollWrp = [&]()
248 CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
249 tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
250 return tmp;
251 }();
252 return aCollWrp;
255 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
257 return cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
258 cChar == ' ' || cChar == '\'' || cChar == '\"' ||
259 cChar == '*' || cChar == '_' || cChar == '%' ||
260 cChar == '.' || cChar == ',' || cChar == ';' ||
261 cChar == ':' || cChar == '?' || cChar == '!' ||
262 cChar == '<' || cChar == '>' ||
263 cChar == '/' || cChar == '-';
266 namespace
268 bool IsCompoundWordDelimChar(sal_Unicode cChar)
270 return cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
274 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
276 return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' ||
277 cChar == '/' /*case for the urls exception*/;
280 ACFlags SvxAutoCorrect::GetDefaultFlags()
282 ACFlags nRet = ACFlags::Autocorrect
283 | ACFlags::CapitalStartSentence
284 | ACFlags::CapitalStartWord
285 | ACFlags::ChgOrdinalNumber
286 | ACFlags::ChgToEnEmDash
287 | ACFlags::AddNonBrkSpace
288 | ACFlags::TransliterateRTL
289 | ACFlags::ChgAngleQuotes
290 | ACFlags::ChgWeightUnderl
291 | ACFlags::SetINetAttr
292 | ACFlags::ChgQuotes
293 | ACFlags::SaveWordCplSttLst
294 | ACFlags::SaveWordWrdSttLst
295 | ACFlags::CorrectCapsLock;
296 LanguageType eLang = GetAppLang().getLanguageType();
297 if( eLang.anyOf(
298 LANGUAGE_ENGLISH,
299 LANGUAGE_ENGLISH_US,
300 LANGUAGE_ENGLISH_UK,
301 LANGUAGE_ENGLISH_AUS,
302 LANGUAGE_ENGLISH_CAN,
303 LANGUAGE_ENGLISH_NZ,
304 LANGUAGE_ENGLISH_EIRE,
305 LANGUAGE_ENGLISH_SAFRICA,
306 LANGUAGE_ENGLISH_JAMAICA,
307 LANGUAGE_ENGLISH_CARIBBEAN))
308 nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
309 return nRet;
312 constexpr sal_Unicode cEmDash = 0x2014;
313 constexpr sal_Unicode cEnDash = 0x2013;
314 constexpr sal_Unicode cApostrophe = 0x2019;
315 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
316 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
317 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
318 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
319 // stop characters for searching preceding quotes
320 // (the first character is also the opening quote we are looking for)
321 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
322 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
323 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
324 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
325 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
326 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };
328 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile,
329 const OUString& rUserAutocorrFile )
330 : sShareAutoCorrFile( rShareAutocorrFile )
331 , sUserAutoCorrFile( rUserAutocorrFile )
332 , eCharClassLang( LANGUAGE_DONTKNOW )
333 , nFlags(SvxAutoCorrect::GetDefaultFlags())
334 , cStartDQuote( 0 )
335 , cEndDQuote( 0 )
336 , cStartSQuote( 0 )
337 , cEndSQuote( 0 )
341 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
342 : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
343 , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
344 , aSwFlags( rCpy.aSwFlags )
345 , eCharClassLang(rCpy.eCharClassLang)
346 , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WrdSttLstLoad))
347 , cStartDQuote( rCpy.cStartDQuote )
348 , cEndDQuote( rCpy.cEndDQuote )
349 , cStartSQuote( rCpy.cStartSQuote )
350 , cEndSQuote( rCpy.cEndSQuote )
355 SvxAutoCorrect::~SvxAutoCorrect()
359 void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
361 pCharClass.reset( new CharClass( LanguageTag( eLang)) );
362 eCharClassLang = eLang;
365 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
367 ACFlags nOld = nFlags;
368 nFlags = bOn ? nFlags | nFlag
369 : nFlags & ~nFlag;
371 if( !bOn )
373 if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
374 nFlags &= ~ACFlags::CplSttLstLoad;
375 if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
376 nFlags &= ~ACFlags::WrdSttLstLoad;
377 if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
378 nFlags &= ~ACFlags::ChgWordLstLoad;
383 // Correct TWo INitial CApitals
384 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
385 sal_Int32 nSttPos, sal_Int32 nEndPos,
386 LanguageType eLang )
388 CharClass& rCC = GetCharClass( eLang );
390 // Delete all non alphanumeric. Test the characters at the beginning/end of
391 // the word ( recognizes: "(min.", "/min.", and so on.)
392 for( ; nSttPos < nEndPos; ++nSttPos )
393 if( rCC.isLetterNumeric( rTxt, nSttPos ))
394 break;
395 for( ; nSttPos < nEndPos; --nEndPos )
396 if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
397 break;
399 // Is the word a compounded word separated by delimiters?
400 // If so, keep track of all delimiters so each constituent
401 // word can be checked for two initial capital letters.
402 std::deque<sal_Int32> aDelimiters;
404 // Always check for two capitals at the beginning
405 // of the entire word, so start at nSttPos.
406 aDelimiters.push_back(nSttPos);
408 // Find all compound word delimiters
409 for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
411 if (IsCompoundWordDelimChar(rTxt[ n ]))
413 aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
417 // Decide where to put the terminating delimiter.
418 // If the last AutoCorrect char was a newline, then the AutoCorrect
419 // char will not be included in rTxt.
420 // If the last AutoCorrect char was not a newline, then the AutoCorrect
421 // character will be the last character in rTxt.
422 if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
423 aDelimiters.push_back(nEndPos);
425 // Iterate through the word and all words that compose it.
426 // Two capital letters at the beginning of word?
427 for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
429 nSttPos = aDelimiters[nI];
430 nEndPos = aDelimiters[nI + 1];
432 if( nSttPos+2 < nEndPos &&
433 IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
434 IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
435 // Is the third character a lower case
436 IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
437 // Do not replace special attributes
438 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
440 // test if the word is in an exception list
441 OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
442 if( !FindInWrdSttExceptList(eLang, sWord) )
444 // Check that word isn't correctly spelt before correcting:
445 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
446 LinguMgr::GetSpellChecker();
447 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
449 Sequence< css::beans::PropertyValue > aEmptySeq;
450 if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
452 return;
455 sal_Unicode cSave = rTxt[ nSttPos ];
456 OUString sChar = rCC.lowercase( OUString(cSave) );
457 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
459 if( ACFlags::SaveWordWrdSttLst & nFlags )
460 rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
467 // Format ordinal numbers suffixes (1st -> 1^st)
468 bool SvxAutoCorrect::FnChgOrdinalNumber(
469 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
470 sal_Int32 nSttPos, sal_Int32 nEndPos,
471 LanguageType eLang)
473 // 1st, 2nd, 3rd, 4 - 0th
474 // 201th or 201st
475 // 12th or 12nd
476 bool bChg = false;
478 // In some languages ordinal suffixes should never be
479 // changed to superscript. Let's break for those languages.
480 if (!eLang.anyOf(
481 LANGUAGE_SWEDISH,
482 LANGUAGE_SWEDISH_FINLAND))
484 CharClass& rCC = GetCharClass(eLang);
486 for (; nSttPos < nEndPos; ++nSttPos)
487 if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos]))
488 break;
489 for (; nSttPos < nEndPos; --nEndPos)
490 if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
491 break;
494 // Get the last number in the string to check
495 sal_Int32 nNumEnd = nEndPos;
496 bool bFoundEnd = false;
497 bool isValidNumber = true;
498 sal_Int32 i = nEndPos;
499 while (i > nSttPos)
501 i--;
502 bool isDigit = rCC.isDigit(rTxt, i);
503 if (bFoundEnd)
504 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
506 if (isDigit && !bFoundEnd)
508 bFoundEnd = true;
509 nNumEnd = i;
513 if (bFoundEnd && isValidNumber) {
514 sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32();
516 // Check if the characters after that number correspond to the ordinal suffix
517 uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
518 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
520 const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
521 for (OUString const & sSuffix : aSuffixes)
523 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1);
525 if (sSuffix == sEnd)
527 // Check if the ordinal suffix has to be set as super script
528 if (rCC.isLetter(sSuffix))
530 // Do the change
531 SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
532 DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
533 rDoc.SetAttr(nNumEnd + 1, nEndPos,
534 SID_ATTR_CHAR_ESCAPEMENT,
535 aSvxEscapementItem);
536 bChg = true;
542 return bChg;
545 // Replace dashes
546 bool SvxAutoCorrect::FnChgToEnEmDash(
547 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
548 sal_Int32 nSttPos, sal_Int32 nEndPos,
549 LanguageType eLang )
551 bool bRet = false;
552 CharClass& rCC = GetCharClass( eLang );
553 if (eLang == LANGUAGE_SYSTEM)
554 eLang = GetAppLang().getLanguageType();
555 bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
557 // replace " - " or " --" with "enDash"
558 if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
560 sal_Unicode cCh = rTxt[ nSttPos ];
561 if( '-' == cCh )
563 if( 1 < nEndPos - nSttPos &&
564 ' ' == rTxt[ nSttPos-1 ] &&
565 '-' == rTxt[ nSttPos+1 ])
567 sal_Int32 n;
568 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr(
569 sImplSttSkipChars,(cCh = rTxt[ n ]));
570 ++n )
573 // found: " --[<AnySttChars>][A-z0-9]
574 if( rCC.isLetterNumeric( OUString(cCh) ) )
576 for( n = nSttPos-1; n && lcl_IsInAsciiArr(
577 sImplEndSkipChars,(cCh = rTxt[ --n ])); )
580 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
581 if( rCC.isLetterNumeric( OUString(cCh) ))
583 rDoc.Delete( nSttPos, nSttPos + 2 );
584 rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
585 bRet = true;
590 else if( 3 < nSttPos &&
591 ' ' == rTxt[ nSttPos-1 ] &&
592 '-' == rTxt[ nSttPos-2 ])
594 sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
595 if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
597 --nTmpPos;
598 ++nLen;
599 cCh = rTxt[ nTmpPos-1 ];
601 if( ' ' == cCh )
603 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr(
604 sImplSttSkipChars,(cCh = rTxt[ n ]));
605 ++n )
608 // found: " - [<AnySttChars>][A-z0-9]
609 if( rCC.isLetterNumeric( OUString(cCh) ) )
611 cCh = ' ';
612 for( n = nTmpPos-1; n && lcl_IsInAsciiArr(
613 sImplEndSkipChars,(cCh = rTxt[ --n ])); )
615 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
616 if( rCC.isLetterNumeric( OUString(cCh) ))
618 rDoc.Delete( nTmpPos, nTmpPos + nLen );
619 rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
620 bRet = true;
627 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
628 // [0-9]--[0-9] double dash always replaced with "enDash"
629 // Finnish and Hungarian use enDash instead of emDash.
630 bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
631 if( 4 <= nEndPos - nSttPos )
633 OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) );
634 sal_Int32 nFndPos = sTmp.indexOf("--");
635 if( nFndPos != -1 && nFndPos &&
636 nFndPos + 2 < sTmp.getLength() &&
637 ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) ||
638 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) &&
639 ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) ||
640 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] )))
642 nSttPos = nSttPos + nFndPos;
643 rDoc.Delete( nSttPos, nSttPos + 2 );
644 rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) &&
645 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) );
646 bRet = true;
649 return bRet;
652 // Add non-breaking space before specific punctuation marks in French text
653 bool SvxAutoCorrect::FnAddNonBrkSpace(
654 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
655 sal_Int32 nEndPos,
656 LanguageType eLang, bool& io_bNbspRunNext )
658 bool bRet = false;
660 CharClass& rCC = GetCharClass( eLang );
662 if ( rCC.getLanguageTag().getLanguage() == "fr" )
664 bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
665 OUString allChars = ":;?!%";
666 OUString chars( allChars );
667 if ( bFrCA )
668 chars = ":";
670 sal_Unicode cChar = rTxt[ nEndPos ];
671 bool bHasSpace = chars.indexOf( cChar ) != -1;
672 bool bIsSpecial = allChars.indexOf( cChar ) != -1;
673 if ( bIsSpecial )
675 // Get the last word delimiter position
676 sal_Int32 nSttWdPos = nEndPos;
677 bool bWasWordDelim = false;
678 while( nSttWdPos )
680 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
681 if (bWasWordDelim)
682 break;
685 //See if the text is the start of a protocol string, e.g. have text of
686 //"http" see if it is the start of "http:" and if so leave it alone
687 sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
688 sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1;
689 if (nIndex + nProtocolLen <= rTxt.getLength())
691 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
692 return false;
695 // Check the presence of "://" in the word
696 sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 );
697 if ( nStrPos == -1 && nEndPos > 0 )
699 // Check the previous char
700 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
701 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
703 // Remove any previous normal space
704 sal_Int32 nPos = nEndPos - 1;
705 while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
707 if ( nPos == 0 ) break;
708 nPos--;
709 cPrevChar = rTxt[ nPos ];
712 nPos++;
713 if ( nEndPos - nPos > 0 )
714 rDoc.Delete( nPos, nEndPos );
716 // Add the non-breaking space at the end pos
717 if ( bHasSpace )
718 rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
719 io_bNbspRunNext = true;
720 bRet = true;
722 else if ( chars.indexOf( cPrevChar ) != -1 )
723 io_bNbspRunNext = true;
726 else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) )
728 // Remove the hardspace right before to avoid formatting URLs
729 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
730 sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
731 if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
733 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
734 bRet = true;
739 return bRet;
742 // URL recognition
743 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
744 sal_Int32 nSttPos, sal_Int32 nEndPos,
745 LanguageType eLang )
747 OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
748 GetCharClass( eLang ) ));
749 bool bRet = !sURL.isEmpty();
750 if( bRet ) // so, set attribute:
751 rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
752 return bRet;
755 // Automatic *bold*, /italic/, -strikeout- and _underline_
756 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
757 sal_Int32 nEndPos )
759 // Condition:
760 // at the beginning: _, *, / or ~ after Space with the following !Space
761 // at the end: _, *, / or ~ before Space (word delimiter?)
763 sal_Unicode cInsChar = rTxt[ nEndPos ]; // underline, bold, italic or strikeout
764 if( ++nEndPos != rTxt.getLength() &&
765 !IsWordDelim( rTxt[ nEndPos ] ) )
766 return false;
768 --nEndPos;
770 bool bAlphaNum = false;
771 sal_Int32 nPos = nEndPos;
772 sal_Int32 nFndPos = -1;
773 CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
775 while( nPos )
777 switch( sal_Unicode c = rTxt[ --nPos ] )
779 case '_':
780 case '-':
781 case '/':
782 case '*':
783 if( c == cInsChar )
785 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
786 IsWordDelim( rTxt[ nPos-1 ])) &&
787 !IsWordDelim( rTxt[ nPos+1 ]))
788 nFndPos = nPos;
789 else
790 // Condition is not satisfied, so cancel
791 nFndPos = -1;
792 nPos = 0;
794 break;
795 default:
796 if( !bAlphaNum )
797 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
801 if( -1 != nFndPos )
803 // first delete the Character at the end - this allows insertion
804 // of an empty hint in SetAttr which would be removed by Delete
805 // (fdo#62536, AUTOFMT in Writer)
806 rDoc.Delete( nEndPos, nEndPos + 1 );
807 rDoc.Delete( nFndPos, nFndPos + 1 );
808 // Span the Attribute over the area
809 // the end.
810 if( '*' == cInsChar ) // Bold
812 SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
813 rDoc.SetAttr( nFndPos, nEndPos - 1,
814 SID_ATTR_CHAR_WEIGHT,
815 aSvxWeightItem);
817 else if( '/' == cInsChar ) // Italic
819 SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
820 rDoc.SetAttr( nFndPos, nEndPos - 1,
821 SID_ATTR_CHAR_POSTURE,
822 aSvxPostureItem);
824 else if( '-' == cInsChar ) // Strikeout
826 SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
827 rDoc.SetAttr( nFndPos, nEndPos - 1,
828 SID_ATTR_CHAR_STRIKEOUT,
829 aSvxCrossedOutItem);
831 else // Underline
833 SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
834 rDoc.SetAttr( nFndPos, nEndPos - 1,
835 SID_ATTR_CHAR_UNDERLINE,
836 aSvxUnderlineItem);
840 return -1 != nFndPos;
843 // Capitalize first letter of every sentence
844 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
845 const OUString& rTxt, bool bNormalPos,
846 sal_Int32 nSttPos, sal_Int32 nEndPos,
847 LanguageType eLang )
850 if( rTxt.isEmpty() || nEndPos <= nSttPos )
851 return;
853 CharClass& rCC = GetCharClass( eLang );
854 OUString aText( rTxt );
855 const sal_Unicode *pStart = aText.getStr(),
856 *pStr = pStart + nEndPos,
857 *pWordStt = nullptr,
858 *pDelim = nullptr;
860 bool bAtStart = false;
861 do {
862 --pStr;
863 if (rCC.isLetter(aText, pStr - pStart))
865 if( !pWordStt )
866 pDelim = pStr+1;
867 pWordStt = pStr;
869 else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
871 if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
872 pWordStt - 1 == pStr &&
873 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
874 (pStart + 1) <= pStr &&
875 rCC.isLetter(aText, pStr-1 - pStart))
876 pWordStt = --pStr;
877 else
878 break;
880 bAtStart = (pStart == pStr);
881 } while( !bAtStart );
883 if (!pWordStt)
884 return; // no character to be replaced
887 if (rCC.isDigit(aText, pStr - pStart))
888 return; // already ok
890 if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
891 return; // already ok
893 //See if the text is the start of a protocol string, e.g. have text of
894 //"http" see if it is the start of "http:" and if so leave it alone
895 sal_Int32 nIndex = pWordStt - pStart;
896 sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
897 if (nIndex + nProtocolLen <= rTxt.getLength())
899 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
900 return; // already ok
903 if (0x1 == *pWordStt || 0x2 == *pWordStt)
904 return; // already ok
906 // Only capitalize, if string before specified characters is long enough
907 if( *pDelim && 2 >= pDelim - pWordStt &&
908 lcl_IsInAsciiArr( ".-)>", *pDelim ) )
909 return;
911 // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
912 if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
913 return;
915 if( !bAtStart ) // Still no beginning of a paragraph?
917 if (NonFieldWordDelim(*pStr))
919 for (;;)
921 bAtStart = (pStart == pStr--);
922 if (bAtStart || !NonFieldWordDelim(*pStr))
923 break;
926 // Asian full stop, full width full stop, full width exclamation mark
927 // and full width question marks are treated as word delimiters
928 else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
929 0xFF1F != *pStr )
930 return; // no valid separator -> no replacement
933 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
934 if (FindInWrdSttExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
935 return;
937 if( bAtStart ) // at the beginning of a paragraph?
939 // Check out the previous paragraph, if it exists.
940 // If so, then check to paragraph separator at the end.
941 OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
942 if (!pPrevPara)
944 // valid separator -> replace
945 OUString sChar( *pWordStt );
946 sChar = rCC.titlecase(sChar); //see fdo#56740
947 if (!comphelper::string::equals(sChar, *pWordStt))
948 rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
949 return;
952 aText = *pPrevPara;
953 bAtStart = false;
954 pStart = aText.getStr();
955 pStr = pStart + aText.getLength();
957 do { // overwrite all blanks
958 --pStr;
959 if (!NonFieldWordDelim(*pStr))
960 break;
961 bAtStart = (pStart == pStr);
962 } while( !bAtStart );
964 if( bAtStart )
965 return; // no valid separator -> no replacement
968 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
969 // all three can happen, but not more than once!
970 const sal_Unicode* pExceptStt = nullptr;
971 bool bContinue = true;
972 Flags nFlag = Flags::NONE;
975 switch (*pStr)
977 // Western and Asian full stop
978 case '.':
979 case 0x3002:
980 case 0xFF0E:
982 if (pStr >= pStart + 2 && *(pStr - 2) == '.')
984 //e.g. text "f.o.o. word": Now currently considering
985 //capitalizing word but second last character of
986 //previous word is a . So probably last word is an
987 //anagram that ends in . and not truly the end of a
988 //previous sentence, so don't autocapitalize this word
989 return;
991 if (nFlag & Flags::FullStop)
992 return; // no valid separator -> no replacement
993 nFlag |= Flags::FullStop;
994 pExceptStt = pStr;
996 break;
997 case '!':
998 case 0xFF01:
1000 if (nFlag & Flags::ExclamationMark)
1001 return; // no valid separator -> no replacement
1002 nFlag |= Flags::ExclamationMark;
1004 break;
1005 case '?':
1006 case 0xFF1F:
1008 if (nFlag & Flags::QuestionMark)
1009 return; // no valid separator -> no replacement
1010 nFlag |= Flags::QuestionMark;
1012 break;
1013 default:
1014 if (nFlag == Flags::NONE)
1015 return; // no valid separator -> no replacement
1016 else
1017 bContinue = false;
1018 break;
1021 if (bContinue && pStr-- == pStart)
1023 return; // no valid separator -> no replacement
1025 } while (bContinue);
1026 if (Flags::FullStop != nFlag)
1027 pExceptStt = nullptr;
1029 // Only capitalize, if string is long enough
1030 if( 2 > ( pStr - pStart ) )
1031 return;
1033 if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
1035 bool bValid = false, bAlphaFnd = false;
1036 const sal_Unicode* pTmpStr = pStr;
1037 while( !bValid )
1039 if( rCC.isDigit( aText, pTmpStr - pStart ) )
1041 bValid = true;
1042 pStr = pTmpStr - 1;
1044 else if( rCC.isLetter( aText, pTmpStr - pStart ) )
1046 if( bAlphaFnd )
1048 bValid = true;
1049 pStr = pTmpStr;
1051 else
1052 bAlphaFnd = true;
1054 else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
1055 break;
1057 if( pTmpStr == pStart )
1058 break;
1060 --pTmpStr;
1063 if( !bValid )
1064 return; // no valid separator -> no replacement
1067 bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1069 // Search for the beginning of the word
1070 while (!NonFieldWordDelim(*pStr))
1072 if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1073 bNumericOnly = false;
1075 if( pStart == pStr )
1076 break;
1078 --pStr;
1081 if( bNumericOnly ) // consists of only numbers, then not
1082 return;
1084 if (NonFieldWordDelim(*pStr))
1085 ++pStr;
1087 OUString sWord;
1089 // check on the basis of the exception list
1090 if( pExceptStt )
1092 sWord = OUString(pStr, pExceptStt - pStr + 1);
1093 if( FindInCplSttExceptList(eLang, sWord) )
1094 return;
1096 // Delete all non alphanumeric. Test the characters at the
1097 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1098 OUString sTmp( sWord );
1099 while( !sTmp.isEmpty() &&
1100 !rCC.isLetterNumeric( sTmp, 0 ) )
1101 sTmp = sTmp.copy(1);
1103 // Remove all non alphanumeric characters towards the end up until
1104 // the last one.
1105 sal_Int32 nLen = sTmp.getLength();
1106 while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1107 --nLen;
1108 if( nLen + 1 < sTmp.getLength() )
1109 sTmp = sTmp.copy( 0, nLen + 1 );
1111 if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1112 FindInCplSttExceptList(eLang, sTmp))
1113 return;
1115 if(FindInCplSttExceptList(eLang, sWord, true))
1116 return;
1119 // Ok, then replace
1120 sal_Unicode cSave = *pWordStt;
1121 nSttPos = pWordStt - rTxt.getStr();
1122 OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
1123 bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1125 // Perhaps someone wants to have the word
1126 if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
1127 rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
1130 // Correct accidental use of cAPS LOCK key
1131 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1132 sal_Int32 nSttPos, sal_Int32 nEndPos,
1133 LanguageType eLang )
1135 if (nEndPos - nSttPos < 2)
1136 // string must be at least 2-character long.
1137 return false;
1139 CharClass& rCC = GetCharClass( eLang );
1141 // Check the first 2 letters.
1142 if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1143 return false;
1145 if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1146 return false;
1148 OUStringBuffer aConverted;
1149 aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
1150 aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
1152 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1153 if (FindInWrdSttExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
1154 return false;
1156 for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1158 if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1159 // A lowercase letter disqualifies the whole text.
1160 return false;
1162 if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1163 // Another uppercase letter. Convert it.
1164 aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
1165 else
1166 // This is not an alphabetic letter. Leave it as-is.
1167 aConverted.append( rTxt[i] );
1170 // Replace the word.
1171 rDoc.Delete(nSttPos, nEndPos);
1172 rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
1174 return true;
1178 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1179 LanguageType eLang ) const
1181 sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1182 ? GetStartDoubleQuote()
1183 : GetStartSingleQuote() )
1184 : ( '\"' == cInsChar
1185 ? GetEndDoubleQuote()
1186 : GetEndSingleQuote() );
1187 if( !cRet )
1189 // then through the Language find the right character
1190 if( LANGUAGE_NONE == eLang )
1191 cRet = cInsChar;
1192 else
1194 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1195 OUString sRet( bSttQuote
1196 ? ( '\"' == cInsChar
1197 ? rLcl.getDoubleQuotationMarkStart()
1198 : rLcl.getQuotationMarkStart() )
1199 : ( '\"' == cInsChar
1200 ? rLcl.getDoubleQuotationMarkEnd()
1201 : rLcl.getQuotationMarkEnd() ));
1202 cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1205 return cRet;
1208 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1209 sal_Unicode cInsChar, bool bSttQuote,
1210 bool bIns, LanguageType eLang, ACQuotes eType ) const
1212 sal_Unicode cRet;
1214 if ( eType == ACQuotes::DoubleAngleQuote )
1216 bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
1217 // pressing " inside a quotation -> use second level angle quotes
1218 bool bLeftQuote = '\"' == cInsChar &&
1219 // start position and Romanian OR
1220 // not start position and Hungarian
1221 bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
1222 cRet = ( '<' == cInsChar || bLeftQuote )
1223 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
1224 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
1226 else if ( eType == ACQuotes::UseApostrophe )
1227 cRet = cApostrophe;
1228 else
1229 cRet = GetQuote( cInsChar, bSttQuote, eLang );
1231 OUString sChg( cInsChar );
1232 if( bIns )
1233 rDoc.Insert( nInsPos, sChg );
1234 else
1235 rDoc.Replace( nInsPos, sChg );
1237 sChg = OUString(cRet);
1239 if( eType == ACQuotes::NonBreakingSpace )
1241 if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
1243 if( !bSttQuote )
1244 ++nInsPos;
1247 else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
1249 rDoc.Delete( nInsPos-1, nInsPos);
1250 --nInsPos;
1253 rDoc.Replace( nInsPos, sChg );
1255 // i' -> I' in English (last step for the Undo)
1256 if( eType == ACQuotes::CapitalizeIAm )
1257 rDoc.Replace( nInsPos-1, "I" );
1260 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
1261 sal_Unicode cInsChar, bool bSttQuote )
1263 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1264 sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1266 OUString sRet(cRet);
1268 if( '\"' == cInsChar )
1270 if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1272 if( bSttQuote )
1273 sRet += " ";
1274 else
1275 sRet = " " + sRet;
1278 return sRet;
1281 // search preceding opening quote in the paragraph before the insert position
1282 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
1283 const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars )
1285 sal_Unicode cTmpChar;
1287 do {
1288 cTmpChar = rTxt[ --nPos ];
1289 if ( cTmpChar == sPrecedingChar )
1290 return true;
1292 for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
1293 if ( cTmpChar == *pCh )
1294 return false;
1296 } while ( nPos > 0 );
1298 return false;
1301 // WARNING: rText may become invalid, see comment below
1302 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1303 sal_Int32 nInsPos, sal_Unicode cChar,
1304 bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
1306 bool bIsNextRun = io_bNbspRunNext;
1307 io_bNbspRunNext = false; // if it was set, then it has to be turned off
1309 do{ // only for middle check loop !!
1310 if( cChar )
1312 // Prevent double space
1313 if( nInsPos && ' ' == cChar &&
1314 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
1315 ' ' == rTxt[ nInsPos - 1 ])
1317 break;
1320 bool bSingle = '\'' == cChar;
1321 bool bIsReplaceQuote =
1322 (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
1323 (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
1324 if( bIsReplaceQuote )
1326 bool bSttQuote = !nInsPos;
1327 ACQuotes eType = ACQuotes::NONE;
1328 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1329 if (!bSttQuote)
1331 sal_Unicode cPrev = rTxt[ nInsPos-1 ];
1332 bSttQuote = NonFieldWordDelim(cPrev) ||
1333 lcl_IsInAsciiArr( "([{", cPrev ) ||
1334 ( cEmDash == cPrev ) ||
1335 ( cEnDash == cPrev );
1336 // tdf#38394 use opening quotation mark << in French l'<<word>>
1337 if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
1338 primary(eLang) == primary(LANGUAGE_FRENCH) &&
1339 ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
1340 // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1341 OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
1342 ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
1343 // abbreviated form of que
1344 ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
1345 ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
1347 bSttQuote = true;
1349 // tdf#108423 for capitalization of English i'm
1350 else if ( bSingle && ( cPrev == 'i' ) &&
1351 primary(eLang) == primary(LANGUAGE_ENGLISH) &&
1352 ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
1354 eType = ACQuotes::CapitalizeIAm;
1356 // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1357 else if ( !bSingle && nInsPos &&
1358 ( ( eLang == LANGUAGE_HUNGARIAN &&
1359 lcl_HasPrecedingChar( rTxt, nInsPos,
1360 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
1361 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) ) ||
1362 ( eLang.anyOf(
1363 LANGUAGE_ROMANIAN,
1364 LANGUAGE_ROMANIAN_MOLDOVA ) &&
1365 lcl_HasPrecedingChar( rTxt, nInsPos,
1366 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
1367 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 1 ) ) ) )
1369 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1370 // only if the opening double quotation mark is the default one
1371 if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
1372 eType = ACQuotes::DoubleAngleQuote;
1374 else if ( bSingle && nInsPos && !bSttQuote &&
1375 // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1376 // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1377 // tdf#123786 the same for Russian and Ukrainian
1378 ( ( eLang.anyOf (
1379 LANGUAGE_CZECH,
1380 LANGUAGE_GERMAN,
1381 LANGUAGE_GERMAN_SWISS,
1382 LANGUAGE_GERMAN_AUSTRIAN,
1383 LANGUAGE_GERMAN_LUXEMBOURG,
1384 LANGUAGE_GERMAN_LIECHTENSTEIN,
1385 LANGUAGE_ICELANDIC,
1386 LANGUAGE_SLOVAK,
1387 LANGUAGE_SLOVENIAN ) &&
1388 !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEnd[0], aStopSingleQuoteEnd + 1 ) ) ||
1389 ( eLang.anyOf (
1390 LANGUAGE_RUSSIAN,
1391 LANGUAGE_UKRAINIAN ) &&
1392 !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa + 1 ) ) ) )
1394 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1395 CharClass& rCC = GetCharClass( eLang );
1396 if ( ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd[0]) ||
1397 rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) ) &&
1398 // use apostrophe only after letters, not after digits or punctuation
1399 rCC.isLetter(rTxt, nInsPos-1) )
1401 eType = ACQuotes::UseApostrophe;
1406 if ( eType == ACQuotes::NONE && !bSingle &&
1407 ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
1408 eType = ACQuotes::NonBreakingSpace;
1410 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
1411 break;
1413 // tdf#133524 change "<<" and ">>" to double angle quotation marks
1414 else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
1415 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
1416 ('<' == cChar || '>' == cChar) &&
1417 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
1419 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1420 if ( eLang.anyOf(
1421 LANGUAGE_CATALAN, // primary level
1422 LANGUAGE_CATALAN_VALENCIAN, // primary level
1423 LANGUAGE_FINNISH, // alternative primary level
1424 LANGUAGE_FRENCH_SWISS, // second level
1425 LANGUAGE_GALICIAN, // primary level
1426 LANGUAGE_HUNGARIAN, // second level
1427 LANGUAGE_POLISH, // second level
1428 LANGUAGE_PORTUGUESE, // primary level
1429 LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
1430 LANGUAGE_ROMANIAN, // second level
1431 LANGUAGE_ROMANIAN_MOLDOVA, // second level
1432 LANGUAGE_SWEDISH, // alternative primary level
1433 LANGUAGE_SWEDISH_FINLAND, // alternative primary level
1434 LANGUAGE_UKRAINIAN, // primary level
1435 LANGUAGE_USER_ARAGONESE, // primary level
1436 LANGUAGE_USER_ASTURIAN ) || // primary level
1437 primary(eLang) == primary(LANGUAGE_GERMAN) || // alternative primary level
1438 primary(eLang) == primary(LANGUAGE_SPANISH) ) // primary level
1440 InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
1441 break;
1445 if( bInsert )
1446 rDoc.Insert( nInsPos, OUString(cChar) );
1447 else
1448 rDoc.Replace( nInsPos, OUString(cChar) );
1450 // Hardspaces autocorrection
1451 if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
1453 if ( NeedsHardspaceAutocorr( cChar ) &&
1454 FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) )
1458 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1460 // Remove the NBSP if it wasn't an autocorrection
1461 if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1462 cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1464 // Look for the last HARD_SPACE
1465 sal_Int32 nPos = nInsPos - 1;
1466 bool bContinue = true;
1467 while ( bContinue )
1469 const sal_Unicode cTmpChar = rTxt[ nPos ];
1470 if ( cTmpChar == cNonBreakingSpace )
1472 rDoc.Delete( nPos, nPos + 1 );
1473 bContinue = false;
1475 else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1476 bContinue = false;
1477 nPos--;
1484 if( !nInsPos )
1485 break;
1487 sal_Int32 nPos = nInsPos - 1;
1489 if( IsWordDelim( rTxt[ nPos ]))
1490 break;
1492 // Set bold or underline automatically?
1493 if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
1495 if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
1497 FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
1499 break;
1502 while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1505 // Found a Paragraph-start or a Blank, search for the word shortcut in
1506 // auto.
1507 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character
1508 if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1509 --nCapLttrPos; // begin of paragraph and no blank
1511 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1512 CharClass& rCC = GetCharClass( eLang );
1514 // no symbol characters
1515 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1516 break;
1518 if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
1519 // tdf#134940 fix regression of arrow "-->" resulted by premature
1520 // replacement of "--" since '>' was added to IsAutoCorrectChar()
1521 '>' != cChar )
1523 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1524 // and becomes INVALID if ChgAutoCorrWord returns true!
1525 // => use aPara/pPara to create a valid copy of the string!
1526 OUString aPara;
1527 OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
1529 bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1530 *this, pPara );
1531 if( !bChgWord )
1533 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1534 while( nCapLttrPos1 < nInsPos &&
1535 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1537 ++nCapLttrPos1;
1538 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1539 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1541 --nInsPos1;
1543 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1544 nCapLttrPos1 < nInsPos1 &&
1545 rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1547 bChgWord = true;
1548 nCapLttrPos = nCapLttrPos1;
1552 if( bChgWord )
1554 if( !aPara.isEmpty() )
1556 sal_Int32 nEnd = nCapLttrPos;
1557 while( nEnd < aPara.getLength() &&
1558 !IsWordDelim( aPara[ nEnd ]))
1559 ++nEnd;
1561 // Capital letter at beginning of paragraph?
1562 if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1564 FnCapitalStartSentence( rDoc, aPara, false,
1565 nCapLttrPos, nEnd, eLang );
1568 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1570 FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
1573 break;
1577 if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
1579 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1580 // and becomes INVALID if TransliterateRTLWord returns true!
1581 if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
1582 break;
1585 if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
1586 (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3
1587 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1588 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1589 ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
1590 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1591 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1593 else
1595 bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1596 bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1598 if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
1599 FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1601 // Correct accidental use of cAPS LOCK key (do this only when
1602 // the caps or shift lock key is pressed). Turn off the caps
1603 // lock afterwards.
1604 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1607 // Capital letter at beginning of paragraph ?
1608 if( !bUnsupported &&
1609 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1611 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
1614 // Two capital letters at beginning of word ??
1615 if( !bUnsupported &&
1616 IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
1618 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1621 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1623 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1627 } while( false );
1630 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
1631 LanguageType eLang )
1633 LanguageTag aLanguageTag( eLang);
1634 if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
1635 (void)CreateLanguageFile(aLanguageTag);
1636 return *(m_aLangTable.find(aLanguageTag)->second);
1639 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1641 auto const iter = m_aLangTable.find(LanguageTag(eLang));
1642 if (iter != m_aLangTable.end() && iter->second)
1643 iter->second->SaveCplSttExceptList();
1644 else
1646 SAL_WARN("editeng", "Save an empty list? ");
1650 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang)
1652 auto const iter = m_aLangTable.find(LanguageTag(eLang));
1653 if (iter != m_aLangTable.end() && iter->second)
1654 iter->second->SaveWrdSttExceptList();
1655 else
1657 SAL_WARN("editeng", "Save an empty list? ");
1661 // Adds a single word. The list will immediately be written to the file!
1662 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1663 LanguageType eLang )
1665 SvxAutoCorrectLanguageLists* pLists = nullptr;
1666 // either the right language is present or it will be this in the general list
1667 auto iter = m_aLangTable.find(LanguageTag(eLang));
1668 if (iter != m_aLangTable.end())
1669 pLists = iter->second.get();
1670 else
1672 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1673 iter = m_aLangTable.find(aLangTagUndetermined);
1674 if (iter != m_aLangTable.end())
1675 pLists = iter->second.get();
1676 else if(CreateLanguageFile(aLangTagUndetermined))
1677 pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1679 OSL_ENSURE(pLists, "No auto correction data");
1680 return pLists && pLists->AddToCplSttExceptList(rNew);
1683 // Adds a single word. The list will immediately be written to the file!
1684 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew,
1685 LanguageType eLang )
1687 SvxAutoCorrectLanguageLists* pLists = nullptr;
1688 //either the right language is present or it is set in the general list
1689 auto iter = m_aLangTable.find(LanguageTag(eLang));
1690 if (iter != m_aLangTable.end())
1691 pLists = iter->second.get();
1692 else
1694 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1695 iter = m_aLangTable.find(aLangTagUndetermined);
1696 if (iter != m_aLangTable.end())
1697 pLists = iter->second.get();
1698 else if(CreateLanguageFile(aLangTagUndetermined))
1699 pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1701 OSL_ENSURE(pLists, "No auto correction file!");
1702 return pLists && pLists->AddToWrdSttExceptList(rNew);
1705 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
1706 sal_Int32 nPos)
1708 OUString sRet;
1709 if( !nPos )
1710 return sRet;
1712 sal_Int32 nEnd = nPos;
1714 // it must be followed by a blank or tab!
1715 if( ( nPos < rTxt.getLength() &&
1716 !IsWordDelim( rTxt[ nPos ])) ||
1717 IsWordDelim( rTxt[ --nPos ]))
1718 return sRet;
1720 while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1723 // Found a Paragraph-start or a Blank, search for the word shortcut in
1724 // auto.
1725 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character
1726 if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1727 --nCapLttrPos; // Beginning of paragraph and no Blank!
1729 while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1730 if( ++nCapLttrPos >= nEnd )
1731 return sRet;
1733 if( 3 > nEnd - nCapLttrPos )
1734 return sRet;
1736 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1738 CharClass& rCC = GetCharClass(eLang);
1740 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
1741 return sRet;
1743 sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
1744 return sRet;
1747 // static
1748 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(const OUString& rTxt,
1749 const sal_Int32 nPos)
1751 constexpr sal_Int32 nMinLen = 3;
1752 constexpr sal_Int32 nMaxLen = 9;
1753 std::vector<OUString> aRes;
1754 if (nPos >= nMinLen)
1756 sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
1757 // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1758 if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
1760 while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
1761 ++nBegin;
1763 if (nBegin + nMinLen <= nPos)
1765 OUString sRes = rTxt.copy(nBegin, nPos - nBegin);
1766 aRes.push_back(sRes);
1767 bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
1768 for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
1770 bool bAdd = bLastStartedWithDelim;
1771 bLastStartedWithDelim = IsWordDelim(sRes[i]);
1772 bAdd = bAdd || bLastStartedWithDelim;
1773 if (bAdd)
1774 aRes.push_back(sRes.copy(i));
1778 return aRes;
1781 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1783 OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
1785 OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
1786 OUString sShareDirFile( sUserDirFile );
1788 SvxAutoCorrectLanguageLists* pLists = nullptr;
1790 tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1792 auto nFndPos = aLastFileTable.find(rLanguageTag);
1793 if(nFndPos != aLastFileTable.end() &&
1794 (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1795 nAktTime - nLastCheckTime < nMinTime)
1797 // no need to test the file, because the last check is not older then
1798 // 2 minutes.
1799 if( bNewFile )
1801 sShareDirFile = sUserDirFile;
1802 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1803 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference
1804 m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1805 aLastFileTable.erase(nFndPos);
1808 else if(
1809 ( FStatHelper::IsDocument( sUserDirFile ) ||
1810 FStatHelper::IsDocument( sShareDirFile =
1811 GetAutoCorrFileName( rLanguageTag ) ) ||
1812 FStatHelper::IsDocument( sShareDirFile =
1813 GetAutoCorrFileName( rLanguageTag, false, false, true) )
1814 ) ||
1815 ( sShareDirFile = sUserDirFile, bNewFile )
1818 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1819 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference
1820 m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1821 if (nFndPos != aLastFileTable.end())
1822 aLastFileTable.erase(nFndPos);
1824 else if( !bNewFile )
1826 aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1828 return pLists != nullptr;
1831 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1832 LanguageType eLang )
1834 LanguageTag aLanguageTag( eLang);
1835 auto const iter = m_aLangTable.find(aLanguageTag);
1836 if (iter != m_aLangTable.end())
1837 return iter->second->PutText(rShort, rLong);
1838 if(CreateLanguageFile(aLanguageTag))
1839 return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong);
1840 return false;
1843 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1844 std::vector<SvxAutocorrWord>& aDeleteEntries,
1845 LanguageType eLang )
1847 LanguageTag aLanguageTag( eLang);
1848 auto const iter = m_aLangTable.find(aLanguageTag);
1849 if (iter != m_aLangTable.end())
1851 iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1853 else if(CreateLanguageFile( aLanguageTag ))
1855 m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1859 // - return the replacement text (only for SWG-Format, all other
1860 // can be taken from the word list!)
1861 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1863 return false;
1866 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1870 // Text with attribution (only the SWG - SWG format!)
1871 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
1872 const OUString&, const OUString&, SfxObjectShell&, OUString& )
1874 return false;
1877 OUString EncryptBlockName_Imp(const OUString& rName)
1879 OUStringBuffer aName;
1880 aName.append('#').append(rName);
1881 for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos)
1883 if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos]))
1884 aName[nPos] &= 0x0f;
1886 return aName.makeStringAndClear();
1889 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1890 static void GeneratePackageName ( const OUString& rShort, OUString& rPackageName )
1892 OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
1893 OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
1895 for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
1897 switch (aBuf[nPos])
1899 case '!':
1900 case '/':
1901 case ':':
1902 case '.':
1903 case '\\':
1904 aBuf[nPos] = '_';
1905 break;
1906 default:
1907 break;
1911 rPackageName = aBuf.makeStringAndClear();
1914 static const SvxAutocorrWord* lcl_SearchWordsInList(
1915 SvxAutoCorrectLanguageLists* pList, const OUString& rTxt,
1916 sal_Int32& rStt, sal_Int32 nEndPos)
1918 const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
1919 return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
1922 // the search for the words in the substitution table
1923 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
1924 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
1925 SvxAutoCorrDoc&, LanguageTag& rLang )
1927 const SvxAutocorrWord* pRet = nullptr;
1928 LanguageTag aLanguageTag( rLang);
1929 if( aLanguageTag.isSystemLocale() )
1930 aLanguageTag.reset( MsLangId::getSystemLanguage());
1932 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
1933 * list instead? */
1935 // First search for eLang, then US-English -> English
1936 // and last in LANGUAGE_UNDETERMINED
1937 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1939 //the language is available - so bring it on
1940 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1941 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1942 if( pRet )
1944 rLang = aLanguageTag;
1945 return pRet;
1947 else
1948 return nullptr;
1951 // If it still could not be found here, then keep on searching
1952 LanguageType eLang = aLanguageTag.getLanguageType();
1953 // the primary language for example EN
1954 aLanguageTag.reset(aLanguageTag.getLanguage());
1955 LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
1956 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
1957 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
1958 CreateLanguageFile(aLanguageTag, false)))
1960 //the language is available - so bring it on
1961 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1962 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1963 if( pRet )
1965 rLang = aLanguageTag;
1966 return pRet;
1970 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
1971 CreateLanguageFile(aLanguageTag, false))
1973 //the language is available - so bring it on
1974 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1975 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1976 if( pRet )
1978 rLang = aLanguageTag;
1979 return pRet;
1982 return nullptr;
1985 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang,
1986 const OUString& sWord )
1988 LanguageTag aLanguageTag( eLang);
1990 /* TODO-BCP47: again horrible ugliness */
1992 // First search for eLang, then primary language of eLang
1993 // and last in LANGUAGE_UNDETERMINED
1995 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1997 //the language is available - so bring it on
1998 auto const& pList = m_aLangTable.find(aLanguageTag)->second;
1999 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2000 return true;
2003 // If it still could not be found here, then keep on searching
2004 // the primary language for example EN
2005 aLanguageTag.reset(aLanguageTag.getLanguage());
2006 LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2007 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2008 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2009 CreateLanguageFile(aLanguageTag, false)))
2011 //the language is available - so bring it on
2012 auto const& pList = m_aLangTable.find(aLanguageTag)->second;
2013 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2014 return true;
2017 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2018 CreateLanguageFile(aLanguageTag, false))
2020 //the language is available - so bring it on
2021 auto const& pList = m_aLangTable.find(aLanguageTag)->second;
2022 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2023 return true;
2025 return false;
2028 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
2030 SvStringsISortDtor::const_iterator it = pList->find( "~" );
2031 SvStringsISortDtor::size_type nPos = it - pList->begin();
2032 if( nPos < pList->size() )
2034 OUString sLowerWord(sWord.toAsciiLowerCase());
2035 OUString sAbr;
2036 for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
2038 sAbr = (*pList)[ n ];
2039 if (sAbr[0] != '~')
2040 break;
2041 // ~ and ~. are not allowed!
2042 if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
2044 OUString sLowerAbk(sAbr.toAsciiLowerCase());
2045 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
2047 if( !--i ) // agrees
2048 return true;
2050 if( sLowerAbk[i] != sLowerWord[--ii])
2051 break;
2056 OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
2057 "Wrongly sorted exception list?" );
2058 return false;
2061 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
2062 const OUString& sWord, bool bAbbreviation)
2064 LanguageTag aLanguageTag( eLang);
2066 /* TODO-BCP47: did I mention terrible horrible ugliness? */
2068 // First search for eLang, then primary language of eLang
2069 // and last in LANGUAGE_UNDETERMINED
2071 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2073 //the language is available - so bring it on
2074 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2075 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2076 return true;
2079 // If it still could not be found here, then keep on searching
2080 // the primary language for example EN
2081 aLanguageTag.reset(aLanguageTag.getLanguage());
2082 LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2083 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2084 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2085 CreateLanguageFile(aLanguageTag, false)))
2087 //the language is available - so bring it on
2088 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2089 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2090 return true;
2093 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2094 CreateLanguageFile(aLanguageTag, false))
2096 //the language is available - so bring it on
2097 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2098 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2099 return true;
2101 return false;
2104 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
2105 bool bNewFile, bool bTst, bool bUnlocalized ) const
2107 OUString sRet, sExt( rLanguageTag.getBcp47() );
2108 if (bUnlocalized)
2110 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2111 std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
2112 if (!vecFallBackStrings.empty())
2113 sExt = vecFallBackStrings[0];
2116 sExt = "_" + sExt + ".dat";
2117 if( bNewFile )
2118 sRet = sUserAutoCorrFile + sExt;
2119 else if( !bTst )
2120 sRet = sShareAutoCorrFile + sExt;
2121 else
2123 // test first in the user directory - if not exist, then
2124 sRet = sUserAutoCorrFile + sExt;
2125 if( !FStatHelper::IsDocument( sRet ))
2126 sRet = sShareAutoCorrFile + sExt;
2128 return sRet;
2131 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2132 SvxAutoCorrect& rParent,
2133 const OUString& rShareAutoCorrectFile,
2134 const OUString& rUserAutoCorrectFile)
2135 : sShareAutoCorrFile( rShareAutoCorrectFile ),
2136 sUserAutoCorrFile( rUserAutoCorrectFile ),
2137 aModifiedDate( Date::EMPTY ),
2138 aModifiedTime( tools::Time::EMPTY ),
2139 aLastCheckTime( tools::Time::EMPTY ),
2140 rAutoCorrect(rParent),
2141 nFlags(ACFlags::NONE)
2145 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2149 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2151 // Access the file system only every 2 minutes to check the date stamp
2152 bool bRet = false;
2154 tools::Time nMinTime( 0, 2 );
2155 tools::Time nAktTime( tools::Time::SYSTEM );
2156 if( aLastCheckTime <= nAktTime) // overflow?
2157 return false;
2158 nAktTime -= aLastCheckTime;
2159 if( nAktTime > nMinTime ) // min time past
2161 Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
2162 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2163 &aTstDate, &aTstTime ) &&
2164 ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
2166 bRet = true;
2167 // then remove all the lists fast!
2168 if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
2170 pCplStt_ExcptLst.reset();
2172 if( (ACFlags::WrdSttLstLoad & nFlags) && pWrdStt_ExcptLst )
2174 pWrdStt_ExcptLst.reset();
2176 if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
2178 pAutocorr_List.reset();
2180 nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WrdSttLstLoad | ACFlags::ChgWordLstLoad );
2182 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2184 return bRet;
2187 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2188 std::unique_ptr<SvStringsISortDtor>& rpLst,
2189 const char* pStrmName,
2190 tools::SvRef<SotStorage>& rStg)
2192 if( rpLst )
2193 rpLst->clear();
2194 else
2195 rpLst.reset( new SvStringsISortDtor );
2198 const OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 );
2200 if( rStg.is() && rStg->IsStream( sStrmName ) )
2202 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2203 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2204 if( ERRCODE_NONE != xStrm->GetError())
2206 xStrm.clear();
2207 rStg.clear();
2208 RemoveStream_Imp( sStrmName );
2210 else
2212 uno::Reference< uno::XComponentContext > xContext =
2213 comphelper::getProcessComponentContext();
2215 xml::sax::InputSource aParserInput;
2216 aParserInput.sSystemId = sStrmName;
2218 xStrm->Seek( 0 );
2219 xStrm->SetBufferSize( 8 * 1024 );
2220 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2222 // get filter
2223 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2225 // connect parser and filter
2226 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2227 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2228 xParser->setFastDocumentHandler( xFilter );
2229 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2230 xParser->setTokenHandler( xTokenHandler );
2232 // parse
2235 xParser->parseStream( aParserInput );
2237 catch( const xml::sax::SAXParseException& )
2239 // re throw ?
2241 catch( const xml::sax::SAXException& )
2243 // re throw ?
2245 catch( const io::IOException& )
2247 // re throw ?
2252 // Set time stamp
2253 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2254 &aModifiedDate, &aModifiedTime );
2255 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2260 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2261 const SvStringsISortDtor& rLst,
2262 const char* pStrmName,
2263 tools::SvRef<SotStorage> const &rStg,
2264 bool bConvert )
2266 if( !rStg.is() )
2267 return;
2269 OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 );
2270 if( rLst.empty() )
2272 rStg->Remove( sStrmName );
2273 rStg->Commit();
2275 else
2277 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2278 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2279 if( xStrm.is() )
2281 xStrm->SetSize( 0 );
2282 xStrm->SetBufferSize( 8192 );
2283 xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2286 uno::Reference< uno::XComponentContext > xContext =
2287 comphelper::getProcessComponentContext();
2289 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2290 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2291 xWriter->setOutputStream(xOut);
2293 uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2294 rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
2296 xExp->exportDoc( XML_BLOCK_LIST );
2298 xStrm->Commit();
2299 if( xStrm->GetError() == ERRCODE_NONE )
2301 xStrm.clear();
2302 if (!bConvert)
2304 rStg->Commit();
2305 if( ERRCODE_NONE != rStg->GetError() )
2307 rStg->Remove( sStrmName );
2308 rStg->Commit();
2316 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2318 if( pAutocorr_List )
2319 pAutocorr_List->DeleteAndDestroyAll();
2320 else
2321 pAutocorr_List.reset( new SvxAutocorrWordList() );
2325 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2326 uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
2327 uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2329 xml::sax::InputSource aParserInput;
2330 aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
2331 aParserInput.aInputStream = xStrm->getInputStream();
2333 // get parser
2334 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2335 SAL_INFO("editeng", "AutoCorrect Import" );
2336 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
2337 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2339 // connect parser and filter
2340 xParser->setFastDocumentHandler( xFilter );
2341 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2342 xParser->setTokenHandler(xTokenHandler);
2344 // parse
2345 xParser->parseStream( aParserInput );
2347 catch ( const uno::Exception& )
2349 TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
2352 // Set time stamp
2353 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2354 &aModifiedDate, &aModifiedTime );
2355 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2357 return pAutocorr_List.get();
2360 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2362 if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2364 LoadAutocorrWordList();
2365 if( !pAutocorr_List )
2367 OSL_ENSURE( false, "No valid list" );
2368 pAutocorr_List.reset( new SvxAutocorrWordList() );
2370 nFlags |= ACFlags::ChgWordLstLoad;
2372 return pAutocorr_List.get();
2375 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2377 if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2379 LoadCplSttExceptList();
2380 if( !pCplStt_ExcptLst )
2382 OSL_ENSURE( false, "No valid list" );
2383 pCplStt_ExcptLst.reset( new SvStringsISortDtor );
2385 nFlags |= ACFlags::CplSttLstLoad;
2387 return pCplStt_ExcptLst.get();
2390 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2392 bool bRet = false;
2393 if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2395 MakeUserStorage_Impl();
2396 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2398 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2400 xStg = nullptr;
2401 // Set time stamp
2402 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2403 &aModifiedDate, &aModifiedTime );
2404 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2405 bRet = true;
2407 return bRet;
2410 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew)
2412 bool bRet = false;
2413 SvStringsISortDtor* pExceptList = LoadWrdSttExceptList();
2414 if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second )
2416 MakeUserStorage_Impl();
2417 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2419 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2421 xStg = nullptr;
2422 // Set time stamp
2423 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2424 &aModifiedDate, &aModifiedTime );
2425 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2426 bRet = true;
2428 return bRet;
2431 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2435 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2436 if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2437 LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2439 catch (const css::ucb::ContentCreationException&)
2442 return pCplStt_ExcptLst.get();
2445 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2447 MakeUserStorage_Impl();
2448 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2450 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2452 xStg = nullptr;
2454 // Set time stamp
2455 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2456 &aModifiedDate, &aModifiedTime );
2457 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2460 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList()
2464 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2465 if( xStg.is() && xStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) )
2466 LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2468 catch (const css::ucb::ContentCreationException &)
2470 TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList");
2472 return pWrdStt_ExcptLst.get();
2475 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList()
2477 MakeUserStorage_Impl();
2478 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2480 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2482 xStg = nullptr;
2483 // Set time stamp
2484 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2485 &aModifiedDate, &aModifiedTime );
2486 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2489 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList()
2491 if( !( ACFlags::WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2493 LoadWrdSttExceptList();
2494 if( !pWrdStt_ExcptLst )
2496 OSL_ENSURE( false, "No valid list" );
2497 pWrdStt_ExcptLst.reset( new SvStringsISortDtor );
2499 nFlags |= ACFlags::WrdSttLstLoad;
2501 return pWrdStt_ExcptLst.get();
2504 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2506 if( sShareAutoCorrFile != sUserAutoCorrFile )
2508 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2509 if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
2510 xStg->IsStream( rName ) )
2512 xStg->Remove( rName );
2513 xStg->Commit();
2515 xStg = nullptr;
2520 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2522 // The conversion needs to happen if the file is already in the user
2523 // directory and is in the old format. Additionally it needs to
2524 // happen when the file is being copied from share to user.
2526 bool bError = false, bConvert = false, bCopy = false;
2527 INetURLObject aDest;
2528 INetURLObject aSource;
2530 if (sUserAutoCorrFile != sShareAutoCorrFile )
2532 aSource = INetURLObject ( sShareAutoCorrFile );
2533 aDest = INetURLObject ( sUserAutoCorrFile );
2534 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2536 aDest.SetExtension ( "bak" );
2537 bConvert = true;
2539 bCopy = true;
2541 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2543 aSource = INetURLObject ( sUserAutoCorrFile );
2544 aDest = INetURLObject ( sUserAutoCorrFile );
2545 aDest.SetExtension ( "bak" );
2546 bCopy = bConvert = true;
2548 if (bCopy)
2552 OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
2553 sal_Int32 nSlashPos = sMain.lastIndexOf('/');
2554 sMain = sMain.copy(0, nSlashPos);
2555 ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2556 TransferInfo aInfo;
2557 aInfo.NameClash = NameClash::OVERWRITE;
2558 aInfo.NewTitle = aDest.GetLastName();
2559 aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
2560 aInfo.MoveData = false;
2561 aNewContent.executeCommand( "transfer", Any(aInfo));
2563 catch (...)
2565 bError = true;
2568 if (bConvert && !bError)
2570 tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
2571 tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
2573 if( xSrcStg.is() && xDstStg.is() )
2575 std::unique_ptr<SvStringsISortDtor> pTmpWordList;
2577 if (xSrcStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) )
2578 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg );
2580 if (pTmpWordList)
2582 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true );
2583 pTmpWordList.reset();
2587 if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2588 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2590 if (pTmpWordList)
2592 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2593 pTmpWordList->clear();
2596 GetAutocorrWordList();
2597 MakeBlocklist_Imp( *xDstStg );
2598 sShareAutoCorrFile = sUserAutoCorrFile;
2599 xDstStg = nullptr;
2602 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2603 aContent.executeCommand ( "delete", makeAny ( true ) );
2605 catch (...)
2610 else if( bCopy && !bError )
2611 sShareAutoCorrFile = sUserAutoCorrFile;
2614 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2616 bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2617 if( !bRemove )
2619 tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
2620 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2621 if( refList.is() )
2623 refList->SetSize( 0 );
2624 refList->SetBufferSize( 8192 );
2625 refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2627 uno::Reference< uno::XComponentContext > xContext =
2628 comphelper::getProcessComponentContext();
2630 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2631 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2632 xWriter->setOutputStream(xOut);
2634 rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
2636 xExp->exportDoc( XML_BLOCK_LIST );
2638 refList->Commit();
2639 bRet = ERRCODE_NONE == refList->GetError();
2640 if( bRet )
2642 refList.clear();
2643 rStg.Commit();
2644 if( ERRCODE_NONE != rStg.GetError() )
2646 bRemove = true;
2647 bRet = false;
2651 else
2652 bRet = false;
2655 if( bRemove )
2657 rStg.Remove( pXMLImplAutocorr_ListStr );
2658 rStg.Commit();
2661 return bRet;
2664 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2666 // First get the current list!
2667 GetAutocorrWordList();
2669 MakeUserStorage_Impl();
2670 tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2672 bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
2674 if( bRet )
2676 for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
2678 std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2679 if( xFoundEntry )
2681 if( !xFoundEntry->IsTextOnly() )
2683 OUString aName( aWordToDelete.GetShort() );
2684 if (xStorage->IsOLEStorage())
2685 aName = EncryptBlockName_Imp(aName);
2686 else
2687 GeneratePackageName ( aWordToDelete.GetShort(), aName );
2689 if( xStorage->IsContained( aName ) )
2691 xStorage->Remove( aName );
2692 bRet = xStorage->Commit();
2698 for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
2700 SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
2701 std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
2702 if( xRemoved )
2704 if( !xRemoved->IsTextOnly() )
2706 // Still have to remove the Storage
2707 OUString sStorageName( aWordToAdd.GetShort() );
2708 if (xStorage->IsOLEStorage())
2709 sStorageName = EncryptBlockName_Imp(sStorageName);
2710 else
2711 GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
2713 if( xStorage->IsContained( sStorageName ) )
2714 xStorage->Remove( sStorageName );
2717 bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
2719 if ( !bRet )
2721 break;
2725 if ( bRet )
2727 bRet = MakeBlocklist_Imp( *xStorage );
2730 return bRet;
2733 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2735 // First get the current list!
2736 GetAutocorrWordList();
2738 MakeUserStorage_Impl();
2739 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2741 bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
2743 // Update the word list
2744 if( bRet )
2746 SvxAutocorrWord aNew(rShort, rLong, true );
2747 std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
2748 if( xRemove )
2750 if( !xRemove->IsTextOnly() )
2752 // Still have to remove the Storage
2753 OUString sStgNm( rShort );
2754 if (xStg->IsOLEStorage())
2755 sStgNm = EncryptBlockName_Imp(sStgNm);
2756 else
2757 GeneratePackageName ( rShort, sStgNm);
2759 if( xStg->IsContained( sStgNm ) )
2760 xStg->Remove( sStgNm );
2764 if( pAutocorr_List->Insert( std::move(aNew) ) )
2766 bRet = MakeBlocklist_Imp( *xStg );
2767 xStg = nullptr;
2769 else
2771 bRet = false;
2774 return bRet;
2777 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2778 SfxObjectShell& rShell )
2780 // First get the current list!
2781 GetAutocorrWordList();
2783 MakeUserStorage_Impl();
2787 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2788 OUString sLong;
2789 bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2790 xStg = nullptr;
2792 // Update the word list
2793 if( bRet )
2795 if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
2797 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2798 MakeBlocklist_Imp( *xStor );
2802 catch ( const uno::Exception& )
2807 // Keep the list sorted ...
2808 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2810 bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
2812 CollatorWrapper& rCmp = ::GetCollatorWrapper();
2813 return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
2817 namespace {
2819 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
2823 struct SvxAutocorrWordList::Impl
2826 // only one of these contains the data
2827 // maSortedVector is manually sorted so we can optimise data movement
2828 mutable AutocorrWordSetType maSortedVector;
2829 mutable AutocorrWordHashType maHash; // key is 'Short'
2831 void DeleteAndDestroyAll()
2833 maHash.clear();
2834 maSortedVector.clear();
2838 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2840 SvxAutocorrWordList::~SvxAutocorrWordList()
2844 void SvxAutocorrWordList::DeleteAndDestroyAll()
2846 mpImpl->DeleteAndDestroyAll();
2849 // returns true if inserted
2850 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
2852 if ( mpImpl->maSortedVector.empty() ) // use the hash
2854 OUString aShort = aWord.GetShort();
2855 auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
2856 if (inserted)
2857 return &(it->second);
2858 return nullptr;
2860 else
2862 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
2863 CollatorWrapper& rCmp = ::GetCollatorWrapper();
2864 if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
2866 it = mpImpl->maSortedVector.insert(it, std::move(aWord));
2867 return &*it;
2869 return nullptr;
2873 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
2875 (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
2878 bool SvxAutocorrWordList::empty() const
2880 return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
2883 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
2886 if ( mpImpl->maSortedVector.empty() ) // use the hash
2888 AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
2889 if( it != mpImpl->maHash.end() )
2891 SvxAutocorrWord pMatch = std::move(it->second);
2892 mpImpl->maHash.erase (it);
2893 return pMatch;
2896 else
2898 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
2899 if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
2901 SvxAutocorrWord pMatch = std::move(*it);
2902 mpImpl->maSortedVector.erase (it);
2903 return pMatch;
2906 return std::optional<SvxAutocorrWord>();
2909 // return the sorted contents - defer sorting until we have to.
2910 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
2912 // convert from hash to set permanently
2913 if ( mpImpl->maSortedVector.empty() )
2915 std::vector<SvxAutocorrWord> tmp;
2916 tmp.reserve(mpImpl->maHash.size());
2917 for (auto & rPair : mpImpl->maHash)
2918 tmp.emplace_back(std::move(rPair.second));
2919 mpImpl->maHash.clear();
2920 // sort twice - this gets the list into mostly-sorted order, which
2921 // reduces the number of times we need to invoke the expensive ICU collate fn.
2922 std::sort(tmp.begin(), tmp.end(),
2923 [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
2925 return lhs.GetShort() < rhs.GetShort();
2927 // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
2928 // stable_sort is twice as fast as sort in this situation because it does
2929 // fewer comparison operations.
2930 std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
2931 mpImpl->maSortedVector = std::move(tmp);
2933 return mpImpl->maSortedVector;
2936 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
2937 const OUString &rTxt,
2938 sal_Int32 &rStt,
2939 sal_Int32 nEndPos) const
2941 const OUString& rChk = pFnd->GetShort();
2943 sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
2944 sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
2945 sal_Int32 nSttWdPos = nEndPos;
2947 // direct replacement of keywords surrounded by colons (for example, ":name:")
2948 bool bColonNameColon = rTxt.getLength() > nEndPos &&
2949 rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
2950 if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard )
2953 bool bWasWordDelim = false;
2954 sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
2955 if (bColonNameColon)
2956 nCalcStt++;
2957 if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
2958 ( nCalcStt < rStt &&
2959 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
2961 TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
2962 OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard);
2963 if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
2965 rStt = nCalcStt;
2966 if (!left_wildcard)
2968 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
2969 if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
2970 return nullptr;
2971 return pFnd;
2973 // get the first word delimiter position before the matching ".*word" pattern
2974 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
2976 if (bWasWordDelim) rStt++;
2977 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard);
2978 // avoid double spaces before simple "word" replacement
2979 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong();
2980 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) )
2981 return pNew;
2983 } else
2984 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
2985 if ( right_wildcard )
2988 OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
2989 // Get the last word delimiter position
2990 bool not_suffix;
2992 while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
2994 // search the first occurrence (with a left word delimitation, if needed)
2995 sal_Int32 nFndPos = -1;
2996 do {
2997 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1);
2998 if (nFndPos == -1)
2999 break;
3000 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
3001 } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
3003 if ( nFndPos != -1 )
3005 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
3007 if ( left_wildcard )
3009 // get the first word delimiter position before the matching ".*word.*" pattern
3010 while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
3012 if (bWasWordDelim) nFndPos++;
3014 if (nEndPos + extra_repl <= nFndPos)
3016 return nullptr;
3018 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3019 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl);
3021 OUString aLong;
3022 rStt = nFndPos;
3023 if ( !left_wildcard )
3025 sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
3026 aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : "");
3027 } else {
3028 OUStringBuffer buf;
3029 do {
3030 nSttWdPos = rTxt.indexOf( sTmp, nFndPos);
3031 if (nSttWdPos != -1)
3033 sal_Int32 nTmp(nFndPos);
3034 while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp]))
3035 nTmp++;
3036 if (nTmp < nSttWdPos)
3037 break; // word delimiter found
3038 buf.append(std::u16string_view(rTxt).substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
3039 nFndPos = nSttWdPos + sTmp.getLength();
3041 } while (nSttWdPos != -1);
3042 if (nEndPos - nFndPos > extra_repl)
3043 buf.append(std::u16string_view(rTxt).substr(nFndPos, nEndPos - nFndPos));
3044 aLong = buf.makeStringAndClear();
3046 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) )
3048 if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos )
3049 return pNew;
3054 return nullptr;
3057 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt,
3058 sal_Int32 nEndPos) const
3060 for (auto const& elem : mpImpl->maHash)
3062 if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) )
3063 return pTmp;
3066 for (auto const& elem : mpImpl->maSortedVector)
3068 if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) )
3069 return pTmp;
3071 return nullptr;
3074 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */