tdf#156769 - Escape the question mark in the storage name
[LibreOffice.git] / editeng / source / misc / svxacorr.cxx
blobd278b582e8336db81344ff65b6263241b2da07f0
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <memory>
21 #include <utility>
22 #include <algorithm>
23 #include <string_view>
24 #include <sal/config.h>
26 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
27 #include <com/sun/star/embed/XStorage.hpp>
28 #include <com/sun/star/io/IOException.hpp>
29 #include <com/sun/star/io/XStream.hpp>
30 #include <tools/urlobj.hxx>
31 #include <i18nlangtag/mslangid.hxx>
32 #include <i18nutil/transliteration.hxx>
33 #include <sal/log.hxx>
34 #include <osl/diagnose.h>
35 #include <vcl/svapp.hxx>
36 #include <vcl/settings.hxx>
37 #include <svl/fstathelper.hxx>
38 #include <svl/urihelper.hxx>
39 #include <unotools/charclass.hxx>
40 #include <com/sun/star/i18n/UnicodeType.hpp>
41 #include <unotools/collatorwrapper.hxx>
42 #include <com/sun/star/i18n/UnicodeScript.hpp>
43 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
44 #include <unotools/localedatawrapper.hxx>
45 #include <unotools/transliterationwrapper.hxx>
46 #include <comphelper/processfactory.hxx>
47 #include <comphelper/storagehelper.hxx>
48 #include <o3tl/string_view.hxx>
49 #include <editeng/editids.hrc>
50 #include <sot/storage.hxx>
51 #include <editeng/udlnitem.hxx>
52 #include <editeng/wghtitem.hxx>
53 #include <editeng/postitem.hxx>
54 #include <editeng/crossedoutitem.hxx>
55 #include <editeng/escapementitem.hxx>
56 #include <editeng/svxacorr.hxx>
57 #include <editeng/unolingu.hxx>
58 #include <vcl/window.hxx>
59 #include <com/sun/star/xml/sax/InputSource.hpp>
60 #include <com/sun/star/xml/sax/FastParser.hpp>
61 #include <com/sun/star/xml/sax/Writer.hpp>
62 #include <com/sun/star/xml/sax/SAXParseException.hpp>
63 #include <unotools/streamwrap.hxx>
64 #include "SvXMLAutoCorrectImport.hxx"
65 #include "SvXMLAutoCorrectExport.hxx"
66 #include "SvXMLAutoCorrectTokenHandler.hxx"
67 #include <ucbhelper/content.hxx>
68 #include <com/sun/star/ucb/ContentCreationException.hpp>
69 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
70 #include <com/sun/star/ucb/TransferInfo.hpp>
71 #include <com/sun/star/ucb/NameClash.hpp>
72 #include <comphelper/diagnose_ex.hxx>
73 #include <xmloff/xmltoken.hxx>
74 #include <unordered_map>
75 #include <rtl/character.hxx>
77 using namespace ::com::sun::star::ucb;
78 using namespace ::com::sun::star::uno;
79 using namespace ::com::sun::star::xml::sax;
80 using namespace ::com::sun::star;
81 using namespace ::xmloff::token;
82 using namespace ::utl;
84 namespace {
86 enum class Flags {
87 NONE = 0x00,
88 FullStop = 0x01,
89 ExclamationMark = 0x02,
90 QuestionMark = 0x04,
95 namespace o3tl {
96 template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
98 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space
100 constexpr OUString pXMLImplWordStart_ExcptLstStr = u"WordExceptList.xml"_ustr;
101 constexpr OUString pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"_ustr;
102 constexpr OUString pXMLImplAutocorr_ListStr = u"DocumentList.xml"_ustr;
104 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
105 // Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled as "begin characters"?
106 constexpr std::u16string_view
107 /* also at these beginnings - Brackets and all kinds of begin characters */
108 sImplSttSkipChars = u"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094",
109 /* also at these ends - Brackets and all kinds of begin characters */
110 sImplEndSkipChars = u"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094";
112 static OUString EncryptBlockName_Imp(std::u16string_view rName);
114 static bool NonFieldWordDelim( const sal_Unicode c )
116 return ' ' == c || '\t' == c || 0x0a == c ||
117 cNonBreakingSpace == c || 0x2011 == c;
120 static bool IsWordDelim( const sal_Unicode c )
122 return c == 0x1 || NonFieldWordDelim(c);
126 static bool IsLowerLetter( sal_Int32 nCharType )
128 return CharClass::isLetterType( nCharType ) &&
129 ( css::i18n::KCharacterType::LOWER & nCharType);
132 static bool IsUpperLetter( sal_Int32 nCharType )
134 return CharClass::isLetterType( nCharType ) &&
135 ( css::i18n::KCharacterType::UPPER & nCharType);
138 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
139 sal_Int32 nStt, sal_Int32 nEnd )
141 for( ; nStt < nEnd; ++nStt )
143 css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
144 switch( nScript )
146 case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
147 case css::i18n::UnicodeScript_kHangulJamo:
148 case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
149 case css::i18n::UnicodeScript_kHiragana:
150 case css::i18n::UnicodeScript_kKatakana:
151 case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
152 case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
153 case css::i18n::UnicodeScript_kCJKCompatibility:
154 case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA:
155 case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
156 case css::i18n::UnicodeScript_kHangulSyllable:
157 case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
158 case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
159 return true;
160 default: ; //do nothing
163 return false;
166 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
167 sal_Int32 nStt, sal_Int32 nEnd )
169 for( ; nStt < nEnd; ++nStt )
171 if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
172 return true;
174 return false;
177 static bool lcl_IsInArr(std::u16string_view arr, const sal_uInt32 c)
179 return std::any_of(arr.begin(), arr.end(), [c](const auto c1) { return c1 == c; });
182 SvxAutoCorrDoc::~SvxAutoCorrDoc()
186 // Called by the functions:
187 // - FnCapitalStartWord
188 // - FnCapitalStartSentence
189 // after the exchange of characters. Then the words, if necessary, can be inserted
190 // into the exception list.
191 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
192 sal_Unicode )
196 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
198 return LANGUAGE_SYSTEM;
201 static const LanguageTag& GetAppLang()
203 return Application::GetSettings().GetLanguageTag();
206 /// Never use an unresolved LANGUAGE_SYSTEM.
207 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
209 LanguageType eLang = rDoc.GetLanguage( nPos );
210 if (eLang == LANGUAGE_SYSTEM)
211 eLang = GetAppLang().getLanguageType(); // the current work locale
212 return eLang;
215 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
217 static std::unique_ptr<LocaleDataWrapper> xLclDtWrp;
218 LanguageTag aLcl( nLang );
219 if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl)
220 xLclDtWrp.reset(new LocaleDataWrapper(std::move(aLcl)));
221 return *xLclDtWrp;
223 static TransliterationWrapper& GetIgnoreTranslWrapper()
225 static int bIsInit = 0;
226 static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
227 TransliterationFlags::IGNORE_KANA |
228 TransliterationFlags::IGNORE_WIDTH );
229 if( !bIsInit )
231 aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
232 bIsInit = 1;
234 return aWrp;
236 static CollatorWrapper& GetCollatorWrapper()
238 static CollatorWrapper aCollWrp = []()
240 CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
241 tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
242 return tmp;
243 }();
244 return aCollWrp;
247 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
249 return cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
250 cChar == ' ' || cChar == '\'' || cChar == '\"' ||
251 cChar == '*' || cChar == '_' || cChar == '%' ||
252 cChar == '.' || cChar == ',' || cChar == ';' ||
253 cChar == ':' || cChar == '?' || cChar == '!' ||
254 cChar == '<' || cChar == '>' ||
255 cChar == '/' || cChar == '-';
258 namespace
260 bool IsCompoundWordDelimChar(sal_Unicode cChar)
262 return cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
266 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
268 return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' ||
269 cChar == '/' /*case for the urls exception*/;
272 ACFlags SvxAutoCorrect::GetDefaultFlags()
274 ACFlags nRet = ACFlags::Autocorrect
275 | ACFlags::CapitalStartSentence
276 | ACFlags::CapitalStartWord
277 | ACFlags::ChgOrdinalNumber
278 | ACFlags::ChgToEnEmDash
279 | ACFlags::AddNonBrkSpace
280 | ACFlags::TransliterateRTL
281 | ACFlags::ChgAngleQuotes
282 | ACFlags::ChgWeightUnderl
283 | ACFlags::SetINetAttr
284 | ACFlags::SetDOIAttr
285 | ACFlags::ChgQuotes
286 | ACFlags::SaveWordCplSttLst
287 | ACFlags::SaveWordWordStartLst
288 | ACFlags::CorrectCapsLock;
289 LanguageType eLang = GetAppLang().getLanguageType();
290 if( eLang.anyOf(
291 LANGUAGE_ENGLISH,
292 LANGUAGE_ENGLISH_US,
293 LANGUAGE_ENGLISH_UK,
294 LANGUAGE_ENGLISH_AUS,
295 LANGUAGE_ENGLISH_CAN,
296 LANGUAGE_ENGLISH_NZ,
297 LANGUAGE_ENGLISH_EIRE,
298 LANGUAGE_ENGLISH_SAFRICA,
299 LANGUAGE_ENGLISH_JAMAICA,
300 LANGUAGE_ENGLISH_CARIBBEAN))
301 nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
302 return nRet;
305 constexpr sal_Unicode cEmDash = 0x2014;
306 constexpr sal_Unicode cEnDash = 0x2013;
307 constexpr OUString sEmDash(u"\u2014"_ustr);
308 constexpr OUString sEnDash(u"\u2013"_ustr);
309 constexpr sal_Unicode cApostrophe = 0x2019;
310 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
311 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
312 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
313 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
314 // stop characters for searching preceding quotes
315 // (the first character is also the opening quote we are looking for)
316 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
317 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
318 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
319 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
320 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
321 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };
323 SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile,
324 OUString aUserAutocorrFile )
325 : sShareAutoCorrFile(std::move( aShareAutocorrFile ))
326 , sUserAutoCorrFile(std::move( aUserAutocorrFile ))
327 , eCharClassLang( LANGUAGE_DONTKNOW )
328 , nFlags(SvxAutoCorrect::GetDefaultFlags())
329 , cStartDQuote( 0 )
330 , cEndDQuote( 0 )
331 , cStartSQuote( 0 )
332 , cEndSQuote( 0 )
336 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
337 : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
338 , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
339 , aSwFlags( rCpy.aSwFlags )
340 , eCharClassLang(rCpy.eCharClassLang)
341 , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WordStartLstLoad))
342 , cStartDQuote( rCpy.cStartDQuote )
343 , cEndDQuote( rCpy.cEndDQuote )
344 , cStartSQuote( rCpy.cStartSQuote )
345 , cEndSQuote( rCpy.cEndSQuote )
350 SvxAutoCorrect::~SvxAutoCorrect()
354 void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
356 moCharClass.emplace( LanguageTag( eLang) );
357 eCharClassLang = eLang;
360 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
362 ACFlags nOld = nFlags;
363 nFlags = bOn ? nFlags | nFlag
364 : nFlags & ~nFlag;
366 if( !bOn )
368 if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
369 nFlags &= ~ACFlags::CplSttLstLoad;
370 if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
371 nFlags &= ~ACFlags::WordStartLstLoad;
372 if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
373 nFlags &= ~ACFlags::ChgWordLstLoad;
378 // Correct TWo INitial CApitals
379 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
380 sal_Int32 nSttPos, sal_Int32 nEndPos,
381 LanguageType eLang )
383 CharClass& rCC = GetCharClass( eLang );
385 // Delete all non alphanumeric. Test the characters at the beginning/end of
386 // the word ( recognizes: "(min.", "/min.", and so on.)
387 for( ; nSttPos < nEndPos; ++nSttPos )
388 if( rCC.isLetterNumeric( rTxt, nSttPos ))
389 break;
390 for( ; nSttPos < nEndPos; --nEndPos )
391 if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
392 break;
394 // Is the word a compounded word separated by delimiters?
395 // If so, keep track of all delimiters so each constituent
396 // word can be checked for two initial capital letters.
397 std::deque<sal_Int32> aDelimiters;
399 // Always check for two capitals at the beginning
400 // of the entire word, so start at nSttPos.
401 aDelimiters.push_back(nSttPos);
403 // Find all compound word delimiters
404 for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
406 if (IsCompoundWordDelimChar(rTxt[ n ]))
408 aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
412 // Decide where to put the terminating delimiter.
413 // If the last AutoCorrect char was a newline, then the AutoCorrect
414 // char will not be included in rTxt.
415 // If the last AutoCorrect char was not a newline, then the AutoCorrect
416 // character will be the last character in rTxt.
417 if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
418 aDelimiters.push_back(nEndPos);
420 // Iterate through the word and all words that compose it.
421 // Two capital letters at the beginning of word?
422 for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
424 nSttPos = aDelimiters[nI];
425 nEndPos = aDelimiters[nI + 1];
427 if( nSttPos+2 < nEndPos &&
428 IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
429 IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
430 // Is the third character a lower case
431 IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
432 // Do not replace special attributes
433 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
435 // test if the word is in an exception list
436 OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
437 if( !FindInWordStartExceptList(eLang, sWord) )
439 // Check that word isn't correctly spelt before correcting:
440 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
441 LinguMgr::GetSpellChecker();
442 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
444 Sequence< css::beans::PropertyValue > aEmptySeq;
445 if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
447 return;
450 sal_Unicode cSave = rTxt[ nSttPos ];
451 OUString sChar = rCC.lowercase( OUString(cSave) );
452 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
454 if( ACFlags::SaveWordWordStartLst & nFlags )
455 rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
462 // Format ordinal numbers suffixes (1st -> 1^st)
463 bool SvxAutoCorrect::FnChgOrdinalNumber(
464 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
465 sal_Int32 nSttPos, sal_Int32 nEndPos,
466 LanguageType eLang)
468 // 1st, 2nd, 3rd, 4 - 0th
469 // 201th or 201st
470 // 12th or 12nd
471 bool bChg = false;
473 // In some languages ordinal suffixes should never be
474 // changed to superscript. Let's break for those languages.
475 if (!eLang.anyOf(
476 LANGUAGE_SWEDISH,
477 LANGUAGE_SWEDISH_FINLAND))
479 CharClass& rCC = GetCharClass(eLang);
481 for (; nSttPos < nEndPos; ++nSttPos)
482 if (!lcl_IsInArr(sImplSttSkipChars, rTxt[nSttPos]))
483 break;
484 for (; nSttPos < nEndPos; --nEndPos)
485 if (!lcl_IsInArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
486 break;
489 // Get the last number in the string to check
490 sal_Int32 nNumEnd = nEndPos;
491 bool bFoundEnd = false;
492 bool isValidNumber = true;
493 sal_Int32 i = nEndPos;
494 while (i > nSttPos)
496 i--;
497 bool isDigit = rCC.isDigit(rTxt, i);
498 if (bFoundEnd)
499 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
501 if (isDigit && !bFoundEnd)
503 bFoundEnd = true;
504 nNumEnd = i;
508 if (bFoundEnd && isValidNumber) {
509 sal_Int32 nNum = o3tl::toInt32(rTxt.subView(nSttPos, nNumEnd - nSttPos + 1));
511 // Check if the characters after that number correspond to the ordinal suffix
512 uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
513 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
515 const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
516 for (OUString const & sSuffix : aSuffixes)
518 std::u16string_view sEnd = rTxt.subView(nNumEnd + 1, nEndPos - nNumEnd - 1);
520 if (sSuffix == sEnd)
522 // Check if the ordinal suffix has to be set as super script
523 if (rCC.isLetter(sSuffix))
525 // Do the change
526 SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
527 DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
528 rDoc.SetAttr(nNumEnd + 1, nEndPos,
529 SID_ATTR_CHAR_ESCAPEMENT,
530 aSvxEscapementItem);
531 bChg = true;
537 return bChg;
540 // Replace dashes
541 bool SvxAutoCorrect::FnChgToEnEmDash(
542 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
543 sal_Int32 nSttPos, sal_Int32 nEndPos,
544 LanguageType eLang )
546 bool bRet = false;
547 CharClass& rCC = GetCharClass( eLang );
548 if (eLang == LANGUAGE_SYSTEM)
549 eLang = GetAppLang().getLanguageType();
550 bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
552 // rTxt may refer to the frame text that will change in the calls to rDoc.Delete / rDoc.Insert;
553 // keep a local copy for later use
554 OUString aOrigTxt = rTxt;
555 sal_Int32 nFirstReplacementTextLengthChange = 0;
557 // replace " - " or " --" with "enDash"
558 if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
560 sal_Unicode cCh = rTxt[ nSttPos ];
561 if( '-' == cCh )
563 if( 1 < nEndPos - nSttPos &&
564 ' ' == rTxt[ nSttPos-1 ] &&
565 '-' == rTxt[ nSttPos+1 ])
567 sal_Int32 n;
568 for( n = nSttPos+2; n < nEndPos && lcl_IsInArr(
569 sImplSttSkipChars,(cCh = rTxt[ n ]));
570 ++n )
573 // found: " --[<AnySttChars>][A-z0-9]
574 if( rCC.isLetterNumeric( OUString(cCh) ) )
576 for( n = nSttPos-1; n && lcl_IsInArr(
577 sImplEndSkipChars,(cCh = rTxt[ --n ])); )
580 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
581 if( rCC.isLetterNumeric( OUString(cCh) ))
583 rDoc.Delete( nSttPos, nSttPos + 2 );
584 rDoc.Insert( nSttPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
585 nFirstReplacementTextLengthChange = -1; // 2 ch -> 1 ch
586 bRet = true;
591 else if( 3 < nSttPos &&
592 ' ' == rTxt[ nSttPos-1 ] &&
593 '-' == rTxt[ nSttPos-2 ])
595 sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
596 if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
598 --nTmpPos;
599 ++nLen;
600 cCh = rTxt[ nTmpPos-1 ];
602 if( ' ' == cCh )
604 for( n = nSttPos; n < nEndPos && lcl_IsInArr(
605 sImplSttSkipChars,(cCh = rTxt[ n ]));
606 ++n )
609 // found: " - [<AnySttChars>][A-z0-9]
610 if( rCC.isLetterNumeric( OUString(cCh) ) )
612 cCh = ' ';
613 for( n = nTmpPos-1; n && lcl_IsInArr(
614 sImplEndSkipChars,(cCh = rTxt[ --n ])); )
616 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
617 if( rCC.isLetterNumeric( OUString(cCh) ))
619 rDoc.Delete( nTmpPos, nTmpPos + nLen );
620 rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
621 nFirstReplacementTextLengthChange = 1 - nLen; // nLen ch -> 1 ch
622 bRet = true;
629 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
630 // [0-9]--[0-9] double dash always replaced with "enDash"
631 // Finnish and Hungarian use enDash instead of emDash.
632 bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
633 if( 4 <= nEndPos - nSttPos )
635 std::u16string_view sTmpView( aOrigTxt.subView( nSttPos, nEndPos - nSttPos ) );
636 size_t nFndPos = sTmpView.find(u"--");
637 if (nFndPos > 0 && nFndPos < sTmpView.size() - 2)
639 // Use proper codepoints. Currently, CharClass::isLetterNumeric is broken, it
640 // uses the index *both* as code unit index (when checking it as ASCII), *and*
641 // as code point index (when passes to css::i18n::XCharacterClassification).
642 // Oh well... Anyway, single-codepoint strings will workaround it.
643 sal_Int32 nStart = nSttPos + nFndPos;
644 sal_uInt32 chStart = aOrigTxt.iterateCodePoints(&nStart, -1);
645 OUString sStart(&chStart, 1);
646 // No idea why sImplEndSkipChars is checked at start
647 if (rCC.isLetterNumeric(sStart, 0) || lcl_IsInArr(sImplEndSkipChars, chStart))
649 sal_Int32 nEnd = nSttPos + nFndPos + 2;
650 sal_uInt32 chEnd = aOrigTxt.iterateCodePoints(&nEnd, 1);
651 OUString sEnd(&chEnd, 1);
652 // No idea why sImplSttSkipChars is checked at end
653 if (rCC.isLetterNumeric(sEnd, 0) || lcl_IsInArr(sImplSttSkipChars, chEnd))
655 nSttPos = nSttPos + nFndPos + nFirstReplacementTextLengthChange;
656 rDoc.Delete(nSttPos, nSttPos + 2);
657 rDoc.Insert(nSttPos,
658 (bEnDash || (rCC.isDigit(sStart, 0) && rCC.isDigit(sEnd, 0))
659 ? sEnDash
660 : sEmDash));
661 bRet = true;
666 return bRet;
669 // Add non-breaking space before specific punctuation marks in French text
670 sal_Int32 SvxAutoCorrect::FnAddNonBrkSpace(
671 SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
672 sal_Int32 nEndPos,
673 LanguageType eLang, bool& io_bNbspRunNext )
675 sal_Int32 nRet = -1;
677 CharClass& rCC = GetCharClass( eLang );
679 if ( rCC.getLanguageTag().getLanguage() == "fr" )
681 bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
682 OUString allChars = ":;?!%";
683 OUString chars( allChars );
684 if ( bFrCA )
685 chars = ":";
687 sal_Unicode cChar = rTxt[ nEndPos ];
688 bool bHasSpace = chars.indexOf( cChar ) != -1;
689 bool bIsSpecial = allChars.indexOf( cChar ) != -1;
690 if ( bIsSpecial )
692 // Get the last word delimiter position
693 sal_Int32 nSttWdPos = nEndPos;
694 bool bWasWordDelim = false;
695 while( nSttWdPos )
697 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
698 if (bWasWordDelim)
699 break;
702 //See if the text is the start of a protocol string, e.g. have text of
703 //"http" see if it is the start of "http:" and if so leave it alone
704 size_t nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
705 size_t nProtocolLen = nEndPos - nSttWdPos + 1;
706 if (nIndex + nProtocolLen <= rTxt.size())
708 if (INetURLObject::CompareProtocolScheme(rTxt.substr(nIndex, nProtocolLen)) != INetProtocol::NotValid)
709 return -1;
712 // Check the presence of "://" in the word
713 size_t nStrPos = rTxt.find( u"://", nSttWdPos + 1 );
714 if ( nStrPos == std::u16string_view::npos && nEndPos > 0 )
716 // Check the previous char
717 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
718 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
720 // Remove any previous normal space
721 sal_Int32 nPos = nEndPos - 1;
722 while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
724 if ( nPos == 0 ) break;
725 nPos--;
726 cPrevChar = rTxt[ nPos ];
729 nPos++;
730 if ( nEndPos - nPos > 0 )
731 rDoc.Delete( nPos, nEndPos );
733 // Add the non-breaking space at the end pos
734 if ( bHasSpace )
735 rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
736 io_bNbspRunNext = true;
737 nRet = nPos;
739 else if ( chars.indexOf( cPrevChar ) != -1 )
740 io_bNbspRunNext = true;
743 else if ( cChar == '/' && nEndPos > 1 && static_cast<sal_Int32>(rTxt.size()) > (nEndPos - 1) )
745 // Remove the hardspace right before to avoid formatting URLs
746 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
747 sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
748 if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
750 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
751 nRet = nEndPos - 1;
756 return nRet;
759 // URL recognition
760 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
761 sal_Int32 nSttPos, sal_Int32 nEndPos,
762 LanguageType eLang )
764 OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
765 GetCharClass( eLang ) ));
766 bool bRet = !sURL.isEmpty();
767 if( bRet ) // so, set attribute:
768 rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
769 return bRet;
772 // DOI citation recognition
773 bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
774 sal_Int32 nSttPos, sal_Int32 nEndPos,
775 LanguageType eLang )
777 OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) ));
778 bool bRet = !sURL.isEmpty();
779 if( bRet ) // so, set attribute:
780 rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
781 return bRet;
784 // Automatic *bold*, /italic/, -strikeout- and _underline_
785 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
786 sal_Int32 nEndPos )
788 // Condition:
789 // at the beginning: _, *, / or ~ after Space with the following !Space
790 // at the end: _, *, / or ~ before Space (word delimiter?)
792 sal_Unicode cInsChar = rTxt[ nEndPos ]; // underline, bold, italic or strikeout
793 if( ++nEndPos != rTxt.getLength() &&
794 !IsWordDelim( rTxt[ nEndPos ] ) )
795 return false;
797 --nEndPos;
799 bool bAlphaNum = false;
800 sal_Int32 nPos = nEndPos;
801 sal_Int32 nFndPos = -1;
802 CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
804 while( nPos )
806 switch( sal_Unicode c = rTxt[ --nPos ] )
808 case '_':
809 case '-':
810 case '/':
811 case '*':
812 if( c == cInsChar )
814 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
815 IsWordDelim( rTxt[ nPos-1 ])) &&
816 !IsWordDelim( rTxt[ nPos+1 ]))
817 nFndPos = nPos;
818 else
819 // Condition is not satisfied, so cancel
820 nFndPos = -1;
821 nPos = 0;
823 break;
824 default:
825 if( !bAlphaNum )
826 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
830 if( -1 != nFndPos )
832 // first delete the Character at the end - this allows insertion
833 // of an empty hint in SetAttr which would be removed by Delete
834 // (fdo#62536, AUTOFMT in Writer)
835 rDoc.Delete( nEndPos, nEndPos + 1 );
837 // Span the Attribute over the area
838 // the end.
839 if( '*' == cInsChar ) // Bold
841 SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
842 rDoc.SetAttr( nFndPos + 1, nEndPos,
843 SID_ATTR_CHAR_WEIGHT,
844 aSvxWeightItem);
846 else if( '/' == cInsChar ) // Italic
848 SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
849 rDoc.SetAttr( nFndPos + 1, nEndPos,
850 SID_ATTR_CHAR_POSTURE,
851 aSvxPostureItem);
853 else if( '-' == cInsChar ) // Strikeout
855 SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
856 rDoc.SetAttr( nFndPos + 1, nEndPos,
857 SID_ATTR_CHAR_STRIKEOUT,
858 aSvxCrossedOutItem);
860 else // Underline
862 SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
863 rDoc.SetAttr( nFndPos + 1, nEndPos,
864 SID_ATTR_CHAR_UNDERLINE,
865 aSvxUnderlineItem);
867 rDoc.Delete( nFndPos, nFndPos + 1 );
870 return -1 != nFndPos;
873 // Capitalize first letter of every sentence
874 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
875 const OUString& rTxt, bool bNormalPos,
876 sal_Int32 nSttPos, sal_Int32 nEndPos,
877 LanguageType eLang )
880 if( rTxt.isEmpty() || nEndPos <= nSttPos )
881 return;
883 CharClass& rCC = GetCharClass( eLang );
884 OUString aText( rTxt );
885 const sal_Unicode *pStart = aText.getStr(),
886 *pStr = pStart + nEndPos,
887 *pWordStt = nullptr,
888 *pDelim = nullptr;
890 bool bAtStart = false;
891 do {
892 --pStr;
893 if (rCC.isLetter(aText, pStr - pStart))
895 if( !pWordStt )
896 pDelim = pStr+1;
897 pWordStt = pStr;
899 else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
901 if( (lcl_IsInArr( u"-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
902 pWordStt - 1 == pStr &&
903 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
904 (pStart + 1) <= pStr &&
905 rCC.isLetter(aText, pStr-1 - pStart))
906 pWordStt = --pStr;
907 else
908 break;
910 bAtStart = (pStart == pStr);
911 } while( !bAtStart );
913 if (!pWordStt)
914 return; // no character to be replaced
917 if (rCC.isDigit(aText, pStr - pStart))
918 return; // already ok
920 if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
921 return; // already ok
923 //See if the text is the start of a protocol string, e.g. have text of
924 //"http" see if it is the start of "http:" and if so leave it alone
925 sal_Int32 nIndex = pWordStt - pStart;
926 sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
927 if (nIndex + nProtocolLen <= rTxt.getLength())
929 if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid)
930 return; // already ok
933 if (0x1 == *pWordStt || 0x2 == *pWordStt)
934 return; // already ok
936 // Only capitalize, if string before specified characters is long enough
937 if( *pDelim && 2 >= pDelim - pWordStt &&
938 lcl_IsInArr( u".-)>", *pDelim ) )
939 return;
941 // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
942 if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
943 return;
945 if( !bAtStart ) // Still no beginning of a paragraph?
947 if (NonFieldWordDelim(*pStr))
949 for (;;)
951 bAtStart = (pStart == pStr--);
952 if (bAtStart || !NonFieldWordDelim(*pStr))
953 break;
956 // Asian full stop, full width full stop, full width exclamation mark
957 // and full width question marks are treated as word delimiters
958 else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
959 0xFF1F != *pStr )
960 return; // no valid separator -> no replacement
963 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
964 if (FindInWordStartExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
965 return;
967 if( bAtStart ) // at the beginning of a paragraph?
969 // Check out the previous paragraph, if it exists.
970 // If so, then check to paragraph separator at the end.
971 OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
972 if (!pPrevPara)
974 // valid separator -> replace
975 OUString sChar( *pWordStt );
976 sChar = rCC.titlecase(sChar); //see fdo#56740
977 if (sChar != OUStringChar(*pWordStt))
978 rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
979 return;
982 aText = *pPrevPara;
983 bAtStart = false;
984 pStart = aText.getStr();
985 pStr = pStart + aText.getLength();
987 do { // overwrite all blanks
988 --pStr;
989 if (!NonFieldWordDelim(*pStr))
990 break;
991 bAtStart = (pStart == pStr);
992 } while( !bAtStart );
994 if( bAtStart )
995 return; // no valid separator -> no replacement
998 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
999 // all three can happen, but not more than once!
1000 const sal_Unicode* pExceptStt = nullptr;
1001 bool bContinue = true;
1002 Flags nFlag = Flags::NONE;
1005 switch (*pStr)
1007 // Western and Asian full stop
1008 case '.':
1009 case 0x3002:
1010 case 0xFF0E:
1012 if (pStr >= pStart + 2 && *(pStr - 2) == '.')
1014 //e.g. text "f.o.o. word": Now currently considering
1015 //capitalizing word but second last character of
1016 //previous word is a . So probably last word is an
1017 //anagram that ends in . and not truly the end of a
1018 //previous sentence, so don't autocapitalize this word
1019 return;
1021 if (nFlag & Flags::FullStop)
1022 return; // no valid separator -> no replacement
1023 nFlag |= Flags::FullStop;
1024 pExceptStt = pStr;
1026 break;
1027 case '!':
1028 case 0xFF01:
1030 if (nFlag & Flags::ExclamationMark)
1031 return; // no valid separator -> no replacement
1032 nFlag |= Flags::ExclamationMark;
1034 break;
1035 case '?':
1036 case 0xFF1F:
1038 if (nFlag & Flags::QuestionMark)
1039 return; // no valid separator -> no replacement
1040 nFlag |= Flags::QuestionMark;
1042 break;
1043 default:
1044 if (nFlag == Flags::NONE)
1045 return; // no valid separator -> no replacement
1046 else
1047 bContinue = false;
1048 break;
1051 if (bContinue && pStr-- == pStart)
1053 return; // no valid separator -> no replacement
1055 } while (bContinue);
1056 if (Flags::FullStop != nFlag)
1057 pExceptStt = nullptr;
1059 // Only capitalize, if string is long enough
1060 if( 2 > ( pStr - pStart ) )
1061 return;
1063 if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
1065 bool bValid = false, bAlphaFnd = false;
1066 const sal_Unicode* pTmpStr = pStr;
1067 while( !bValid )
1069 if( rCC.isDigit( aText, pTmpStr - pStart ) )
1071 bValid = true;
1072 pStr = pTmpStr - 1;
1074 else if( rCC.isLetter( aText, pTmpStr - pStart ) )
1076 if( bAlphaFnd )
1078 bValid = true;
1079 pStr = pTmpStr;
1081 else
1082 bAlphaFnd = true;
1084 else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
1085 break;
1087 if( pTmpStr == pStart )
1088 break;
1090 --pTmpStr;
1093 if( !bValid )
1094 return; // no valid separator -> no replacement
1097 bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1099 // Search for the beginning of the word
1100 while (!NonFieldWordDelim(*pStr))
1102 if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1103 bNumericOnly = false;
1105 if( pStart == pStr )
1106 break;
1108 --pStr;
1111 if( bNumericOnly ) // consists of only numbers, then not
1112 return;
1114 if (NonFieldWordDelim(*pStr))
1115 ++pStr;
1117 OUString sWord;
1119 // check on the basis of the exception list
1120 if( pExceptStt )
1122 sWord = OUString(pStr, pExceptStt - pStr + 1);
1123 if( FindInCplSttExceptList(eLang, sWord) )
1124 return;
1126 // Delete all non alphanumeric. Test the characters at the
1127 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1128 OUString sTmp( sWord );
1129 while( !sTmp.isEmpty() &&
1130 !rCC.isLetterNumeric( sTmp, 0 ) )
1131 sTmp = sTmp.copy(1);
1133 // Remove all non alphanumeric characters towards the end up until
1134 // the last one.
1135 sal_Int32 nLen = sTmp.getLength();
1136 while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1137 --nLen;
1138 if( nLen + 1 < sTmp.getLength() )
1139 sTmp = sTmp.copy( 0, nLen + 1 );
1141 if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1142 FindInCplSttExceptList(eLang, sTmp))
1143 return;
1145 if(FindInCplSttExceptList(eLang, sWord, true))
1146 return;
1149 // Ok, then replace
1150 sal_Unicode cSave = *pWordStt;
1151 nSttPos = pWordStt - rTxt.getStr();
1152 OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
1153 bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1155 // Perhaps someone wants to have the word
1156 if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
1157 rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
1160 // Correct accidental use of cAPS LOCK key
1161 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1162 sal_Int32 nSttPos, sal_Int32 nEndPos,
1163 LanguageType eLang )
1165 if (nEndPos - nSttPos < 2)
1166 // string must be at least 2-character long.
1167 return false;
1169 CharClass& rCC = GetCharClass( eLang );
1171 // Check the first 2 letters.
1172 if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1173 return false;
1175 if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1176 return false;
1178 OUStringBuffer aConverted;
1179 aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
1180 aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
1182 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1183 if (FindInWordStartExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
1184 return false;
1186 for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1188 if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1189 // A lowercase letter disqualifies the whole text.
1190 return false;
1192 if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1193 // Another uppercase letter. Convert it.
1194 aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
1195 else
1196 // This is not an alphabetic letter. Leave it as-is.
1197 aConverted.append( rTxt[i] );
1200 // Replace the word.
1201 rDoc.Delete(nSttPos, nEndPos);
1202 rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
1204 return true;
1208 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1209 LanguageType eLang ) const
1211 sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1212 ? GetStartDoubleQuote()
1213 : GetStartSingleQuote() )
1214 : ( '\"' == cInsChar
1215 ? GetEndDoubleQuote()
1216 : GetEndSingleQuote() );
1217 if( !cRet )
1219 // then through the Language find the right character
1220 if( LANGUAGE_NONE == eLang )
1221 cRet = cInsChar;
1222 else
1224 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1225 OUString sRet( bSttQuote
1226 ? ( '\"' == cInsChar
1227 ? rLcl.getDoubleQuotationMarkStart()
1228 : rLcl.getQuotationMarkStart() )
1229 : ( '\"' == cInsChar
1230 ? rLcl.getDoubleQuotationMarkEnd()
1231 : rLcl.getQuotationMarkEnd() ));
1232 cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1235 return cRet;
1238 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1239 sal_Unicode cInsChar, bool bSttQuote,
1240 bool bIns, LanguageType eLang, ACQuotes eType ) const
1242 sal_Unicode cRet;
1244 if ( eType == ACQuotes::DoubleAngleQuote )
1246 bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
1247 // pressing " inside a quotation -> use second level angle quotes
1248 bool bLeftQuote = '\"' == cInsChar &&
1249 // start position and Romanian OR
1250 // not start position and Hungarian
1251 bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
1252 cRet = ( '<' == cInsChar || bLeftQuote )
1253 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
1254 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
1256 else if ( eType == ACQuotes::UseApostrophe )
1257 cRet = cApostrophe;
1258 else
1259 cRet = GetQuote( cInsChar, bSttQuote, eLang );
1261 OUString sChg( cInsChar );
1262 if( bIns )
1263 rDoc.Insert( nInsPos, sChg );
1264 else
1265 rDoc.Replace( nInsPos, sChg );
1267 sChg = OUString(cRet);
1269 if( eType == ACQuotes::NonBreakingSpace )
1271 if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
1273 if( !bSttQuote )
1274 ++nInsPos;
1277 else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
1279 rDoc.Delete( nInsPos-1, nInsPos);
1280 --nInsPos;
1283 rDoc.Replace( nInsPos, sChg );
1285 // i' -> I' in English (last step for the Undo)
1286 if( eType == ACQuotes::CapitalizeIAm )
1287 rDoc.Replace( nInsPos-1, "I" );
1290 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
1291 sal_Unicode cInsChar, bool bSttQuote )
1293 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1294 sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1296 OUString sRet(cRet);
1298 if( '\"' == cInsChar )
1300 if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1302 if( bSttQuote )
1303 sRet += " ";
1304 else
1305 sRet = " " + sRet;
1308 return sRet;
1311 // search preceding opening quote in the paragraph before the insert position
1312 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
1313 const sal_Unicode sPrecedingChar, const sal_Unicode sStopChar, const sal_Unicode* aStopChars )
1315 sal_Unicode cTmpChar;
1317 do {
1318 cTmpChar = rTxt[ --nPos ];
1319 if ( cTmpChar == sPrecedingChar )
1320 return true;
1322 if ( cTmpChar == sStopChar )
1323 return false;
1325 for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
1326 if ( cTmpChar == *pCh )
1327 return false;
1329 } while ( nPos > 0 );
1331 return false;
1334 // WARNING: rText may become invalid, see comment below
1335 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1336 sal_Int32 nInsPos, sal_Unicode cChar,
1337 bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
1339 bool bIsNextRun = io_bNbspRunNext;
1340 io_bNbspRunNext = false; // if it was set, then it has to be turned off
1342 do{ // only for middle check loop !!
1343 if( cChar )
1345 // Prevent double space
1346 if( nInsPos && ' ' == cChar &&
1347 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
1348 ' ' == rTxt[ nInsPos - 1 ])
1350 break;
1353 bool bSingle = '\'' == cChar;
1354 bool bIsReplaceQuote =
1355 (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
1356 (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
1357 if( bIsReplaceQuote )
1359 bool bSttQuote = !nInsPos;
1360 ACQuotes eType = ACQuotes::NONE;
1361 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1362 if (!bSttQuote)
1364 sal_Unicode cPrev = rTxt[ nInsPos-1 ];
1365 bSttQuote = NonFieldWordDelim(cPrev) ||
1366 lcl_IsInArr( u"([{", cPrev ) ||
1367 ( cEmDash == cPrev ) ||
1368 ( cEnDash == cPrev );
1369 // tdf#38394 use opening quotation mark << in French l'<<word>>
1370 if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
1371 primary(eLang) == primary(LANGUAGE_FRENCH) &&
1372 ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
1373 // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1374 OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
1375 ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
1376 // abbreviated form of que
1377 ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
1378 ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
1380 bSttQuote = true;
1382 // tdf#108423 for capitalization of English i'm
1383 else if ( bSingle && ( cPrev == 'i' ) &&
1384 primary(eLang) == primary(LANGUAGE_ENGLISH) &&
1385 ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
1387 eType = ACQuotes::CapitalizeIAm;
1389 // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1390 else if ( !bSingle && nInsPos &&
1391 ( ( eLang == LANGUAGE_HUNGARIAN &&
1392 lcl_HasPrecedingChar( rTxt, nInsPos,
1393 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
1394 bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEnd[1],
1395 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 2 ) ) ||
1396 ( eLang.anyOf(
1397 LANGUAGE_ROMANIAN,
1398 LANGUAGE_ROMANIAN_MOLDOVA ) &&
1399 lcl_HasPrecedingChar( rTxt, nInsPos,
1400 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
1401 bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEndRo[1],
1402 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 2 ) ) ) )
1404 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1405 // only if the opening double quotation mark is the default one
1406 if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
1407 eType = ACQuotes::DoubleAngleQuote;
1409 else if ( bSingle && nInsPos && !bSttQuote &&
1410 // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1411 // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1412 // tdf#123786 the same for Russian and Ukrainian
1413 ( eLang.anyOf (
1414 LANGUAGE_CZECH,
1415 LANGUAGE_GERMAN,
1416 LANGUAGE_GERMAN_SWISS,
1417 LANGUAGE_GERMAN_AUSTRIAN,
1418 LANGUAGE_GERMAN_LUXEMBOURG,
1419 LANGUAGE_GERMAN_LIECHTENSTEIN,
1420 LANGUAGE_ICELANDIC,
1421 LANGUAGE_SLOVAK,
1422 LANGUAGE_SLOVENIAN ) ) )
1424 sal_Unicode sStartChar = GetStartSingleQuote();
1425 sal_Unicode sEndChar = GetEndSingleQuote();
1426 if ( !sStartChar || !sEndChar ) {
1427 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1428 if ( !sStartChar ) sStartChar = rLcl.getQuotationMarkStart()[0];
1429 if ( !sEndChar ) sEndChar = rLcl.getQuotationMarkStart()[0];
1431 if ( !lcl_HasPrecedingChar( rTxt, nInsPos, sStartChar, sEndChar, aStopSingleQuoteEnd + 1 ) )
1433 CharClass& rCC = GetCharClass( eLang );
1434 if ( rCC.isLetter(rTxt, nInsPos-1) )
1436 eType = ACQuotes::UseApostrophe;
1440 else if ( bSingle && nInsPos && !bSttQuote &&
1441 ( eLang.anyOf (
1442 LANGUAGE_RUSSIAN,
1443 LANGUAGE_UKRAINIAN ) &&
1444 !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa[1], aStopSingleQuoteEndRuUa + 2 ) ) )
1446 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1447 CharClass& rCC = GetCharClass( eLang );
1448 if ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) &&
1449 // use apostrophe only after letters, not after digits or punctuation
1450 rCC.isLetter(rTxt, nInsPos-1) )
1452 eType = ACQuotes::UseApostrophe;
1457 if ( eType == ACQuotes::NONE && !bSingle &&
1458 ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
1459 eType = ACQuotes::NonBreakingSpace;
1461 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
1462 break;
1464 // tdf#133524 change "<<" and ">>" to double angle quotation marks
1465 else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
1466 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
1467 ('<' == cChar || '>' == cChar) &&
1468 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
1470 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1471 if ( eLang.anyOf(
1472 LANGUAGE_CATALAN, // primary level
1473 LANGUAGE_CATALAN_VALENCIAN, // primary level
1474 LANGUAGE_FINNISH, // alternative primary level
1475 LANGUAGE_FRENCH_SWISS, // second level
1476 LANGUAGE_GALICIAN, // primary level
1477 LANGUAGE_HUNGARIAN, // second level
1478 LANGUAGE_POLISH, // second level
1479 LANGUAGE_PORTUGUESE, // primary level
1480 LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
1481 LANGUAGE_ROMANIAN, // second level
1482 LANGUAGE_ROMANIAN_MOLDOVA, // second level
1483 LANGUAGE_SWEDISH, // alternative primary level
1484 LANGUAGE_SWEDISH_FINLAND, // alternative primary level
1485 LANGUAGE_UKRAINIAN, // primary level
1486 LANGUAGE_USER_ARAGONESE, // primary level
1487 LANGUAGE_USER_ASTURIAN ) || // primary level
1488 primary(eLang) == primary(LANGUAGE_GERMAN) || // alternative primary level
1489 primary(eLang) == primary(LANGUAGE_SPANISH) ) // primary level
1491 InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
1492 break;
1496 if( bInsert )
1497 rDoc.Insert( nInsPos, OUString(cChar) );
1498 else
1499 rDoc.Replace( nInsPos, OUString(cChar) );
1501 // Hardspaces autocorrection
1502 if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
1504 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1505 // and its length may change (even become shorter) if FnAddNonBrkSpace succeeds!
1506 sal_Int32 nUpdatedPos = -1;
1507 if (NeedsHardspaceAutocorr(cChar))
1508 nUpdatedPos = FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext );
1509 if (nUpdatedPos >= 0)
1511 nInsPos = nUpdatedPos;
1513 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1515 // Remove the NBSP if it wasn't an autocorrection
1516 if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1517 cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1519 // Look for the last HARD_SPACE
1520 sal_Int32 nPos = nInsPos - 1;
1521 bool bContinue = true;
1522 while ( bContinue )
1524 const sal_Unicode cTmpChar = rTxt[ nPos ];
1525 if ( cTmpChar == cNonBreakingSpace )
1527 rDoc.Delete( nPos, nPos + 1 );
1528 bContinue = false;
1530 else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1531 bContinue = false;
1532 nPos--;
1539 if( !nInsPos )
1540 break;
1542 sal_Int32 nPos = nInsPos - 1;
1544 if( IsWordDelim( rTxt[ nPos ]))
1545 break;
1547 // Set bold or underline automatically?
1548 if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
1550 if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
1552 FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
1554 break;
1557 while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1560 // Found a Paragraph-start or a Blank, search for the word shortcut in
1561 // auto.
1562 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character
1563 if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1564 --nCapLttrPos; // begin of paragraph and no blank
1566 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1567 CharClass& rCC = GetCharClass( eLang );
1569 // no symbol characters
1570 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1571 break;
1573 if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
1574 // tdf#134940 fix regression of arrow "-->" resulted by premature
1575 // replacement of "--" since '>' was added to IsAutoCorrectChar()
1576 '>' != cChar )
1578 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1579 // and becomes INVALID if ChgAutoCorrWord returns true!
1580 // => use aPara/pPara to create a valid copy of the string!
1581 OUString aPara;
1582 OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
1584 bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1585 *this, pPara );
1586 if( !bChgWord )
1588 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1589 while( nCapLttrPos1 < nInsPos &&
1590 lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1592 ++nCapLttrPos1;
1593 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1594 lcl_IsInArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1596 --nInsPos1;
1598 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1599 nCapLttrPos1 < nInsPos1 &&
1600 rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1602 bChgWord = true;
1603 nCapLttrPos = nCapLttrPos1;
1607 if( bChgWord )
1609 if( !aPara.isEmpty() )
1611 sal_Int32 nEnd = nCapLttrPos;
1612 while( nEnd < aPara.getLength() &&
1613 !IsWordDelim( aPara[ nEnd ]))
1614 ++nEnd;
1616 // Capital letter at beginning of paragraph?
1617 if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1619 FnCapitalStartSentence( rDoc, aPara, false,
1620 nCapLttrPos, nEnd, eLang );
1623 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1625 FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
1628 break;
1632 if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
1634 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1635 // and becomes INVALID if TransliterateRTLWord returns true!
1636 if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
1637 break;
1640 if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
1641 (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3
1642 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1643 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1644 ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
1645 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1646 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1647 ( IsAutoCorrFlag( ACFlags::SetDOIAttr ) &&
1648 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1649 FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1651 else
1653 bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1654 bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1656 if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
1657 FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1659 // Correct accidental use of cAPS LOCK key (do this only when
1660 // the caps or shift lock key is pressed). Turn off the caps
1661 // lock afterwards.
1662 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1665 // Capital letter at beginning of paragraph ?
1666 if( !bUnsupported &&
1667 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1669 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
1672 // Two capital letters at beginning of word ??
1673 if( !bUnsupported &&
1674 IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
1676 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1679 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1681 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1685 } while( false );
1688 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
1689 LanguageType eLang )
1691 LanguageTag aLanguageTag( eLang);
1692 if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
1693 (void)CreateLanguageFile(aLanguageTag);
1694 const auto iter = m_aLangTable.find(aLanguageTag);
1695 assert(iter != m_aLangTable.end());
1696 return iter->second;
1699 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1701 auto const iter = m_aLangTable.find(LanguageTag(eLang));
1702 if (iter != m_aLangTable.end())
1703 iter->second.SaveCplSttExceptList();
1704 else
1706 SAL_WARN("editeng", "Save an empty list? ");
1710 void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang)
1712 auto const iter = m_aLangTable.find(LanguageTag(eLang));
1713 if (iter != m_aLangTable.end())
1714 iter->second.SaveWordStartExceptList();
1715 else
1717 SAL_WARN("editeng", "Save an empty list? ");
1721 // Adds a single word. The list will immediately be written to the file!
1722 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1723 LanguageType eLang )
1725 SvxAutoCorrectLanguageLists* pLists = nullptr;
1726 // either the right language is present or it will be this in the general list
1727 auto iter = m_aLangTable.find(LanguageTag(eLang));
1728 if (iter != m_aLangTable.end())
1729 pLists = &iter->second;
1730 else
1732 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1733 iter = m_aLangTable.find(aLangTagUndetermined);
1734 if (iter != m_aLangTable.end())
1735 pLists = &iter->second;
1736 else if(CreateLanguageFile(aLangTagUndetermined))
1738 iter = m_aLangTable.find(aLangTagUndetermined);
1739 assert(iter != m_aLangTable.end());
1740 pLists = &iter->second;
1743 OSL_ENSURE(pLists, "No auto correction data");
1744 return pLists && pLists->AddToCplSttExceptList(rNew);
1747 // Adds a single word. The list will immediately be written to the file!
1748 bool SvxAutoCorrect::AddWordStartException( const OUString& rNew,
1749 LanguageType eLang )
1751 SvxAutoCorrectLanguageLists* pLists = nullptr;
1752 //either the right language is present or it is set in the general list
1753 auto iter = m_aLangTable.find(LanguageTag(eLang));
1754 if (iter != m_aLangTable.end())
1755 pLists = &iter->second;
1756 else
1758 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1759 iter = m_aLangTable.find(aLangTagUndetermined);
1760 if (iter != m_aLangTable.end())
1761 pLists = &iter->second;
1762 else if(CreateLanguageFile(aLangTagUndetermined))
1764 iter = m_aLangTable.find(aLangTagUndetermined);
1765 assert(iter != m_aLangTable.end());
1766 pLists = &iter->second;
1769 OSL_ENSURE(pLists, "No auto correction file!");
1770 return pLists && pLists->AddToWordStartExceptList(rNew);
1773 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
1774 sal_Int32 nPos)
1776 OUString sRet;
1777 if( !nPos )
1778 return sRet;
1780 sal_Int32 nEnd = nPos;
1782 // it must be followed by a blank or tab!
1783 if( ( nPos < rTxt.getLength() &&
1784 !IsWordDelim( rTxt[ nPos ])) ||
1785 IsWordDelim( rTxt[ --nPos ]))
1786 return sRet;
1788 while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1791 // Found a Paragraph-start or a Blank, search for the word shortcut in
1792 // auto.
1793 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character
1794 if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1795 --nCapLttrPos; // Beginning of paragraph and no Blank!
1797 while( lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1798 if( ++nCapLttrPos >= nEnd )
1799 return sRet;
1801 if( 3 > nEnd - nCapLttrPos )
1802 return sRet;
1804 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1806 CharClass& rCC = GetCharClass(eLang);
1808 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
1809 return sRet;
1811 sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
1812 return sRet;
1815 // static
1816 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt,
1817 const sal_Int32 nPos)
1819 constexpr sal_Int32 nMinLen = 3;
1820 constexpr sal_Int32 nMaxLen = 9;
1821 std::vector<OUString> aRes;
1822 if (nPos >= nMinLen)
1824 sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
1825 // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1826 if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
1828 while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
1829 ++nBegin;
1831 if (nBegin + nMinLen <= nPos)
1833 OUString sRes( rTxt.substr(nBegin, nPos - nBegin) );
1834 aRes.push_back(sRes);
1835 bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
1836 for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
1838 bool bAdd = bLastStartedWithDelim;
1839 bLastStartedWithDelim = IsWordDelim(sRes[i]);
1840 bAdd = bAdd || bLastStartedWithDelim;
1841 if (bAdd)
1842 aRes.push_back(sRes.copy(i));
1846 return aRes;
1849 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1851 OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
1853 OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
1854 OUString sShareDirFile( sUserDirFile );
1856 SvxAutoCorrectLanguageLists* pLists = nullptr;
1858 tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1860 auto nFndPos = aLastFileTable.find(rLanguageTag);
1861 if(nFndPos != aLastFileTable.end() &&
1862 (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1863 nAktTime - nLastCheckTime < nMinTime)
1865 // no need to test the file, because the last check is not older then
1866 // 2 minutes.
1867 if( bNewFile )
1869 sShareDirFile = sUserDirFile;
1870 auto itBool = m_aLangTable.emplace(std::piecewise_construct,
1871 std::forward_as_tuple(rLanguageTag),
1872 std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
1873 pLists = &itBool.first->second;
1874 aLastFileTable.erase(nFndPos);
1877 else if(
1878 ( FStatHelper::IsDocument( sUserDirFile ) ||
1879 FStatHelper::IsDocument( sShareDirFile =
1880 GetAutoCorrFileName( rLanguageTag ) ) ||
1881 FStatHelper::IsDocument( sShareDirFile =
1882 GetAutoCorrFileName( rLanguageTag, false, false, true) )
1883 ) ||
1884 ( sShareDirFile = sUserDirFile, bNewFile )
1887 auto itBool = m_aLangTable.emplace(std::piecewise_construct,
1888 std::forward_as_tuple(rLanguageTag),
1889 std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
1890 pLists = &itBool.first->second;
1891 if (nFndPos != aLastFileTable.end())
1892 aLastFileTable.erase(nFndPos);
1894 else if( !bNewFile )
1896 aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1898 return pLists != nullptr;
1901 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1902 LanguageType eLang )
1904 LanguageTag aLanguageTag( eLang);
1905 if (auto const iter = m_aLangTable.find(aLanguageTag); iter != m_aLangTable.end())
1906 return iter->second.PutText(rShort, rLong);
1907 if (CreateLanguageFile(aLanguageTag))
1909 auto const iter = m_aLangTable.find(aLanguageTag);
1910 assert (iter != m_aLangTable.end());
1911 return iter->second.PutText(rShort, rLong);
1913 return false;
1916 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1917 std::vector<SvxAutocorrWord>& aDeleteEntries,
1918 LanguageType eLang )
1920 LanguageTag aLanguageTag( eLang);
1921 auto iter = m_aLangTable.find(aLanguageTag);
1922 if (iter != m_aLangTable.end())
1924 iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
1926 else if(CreateLanguageFile( aLanguageTag ))
1928 iter = m_aLangTable.find(aLanguageTag);
1929 assert(iter != m_aLangTable.end());
1930 iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
1934 // - return the replacement text (only for SWG-Format, all other
1935 // can be taken from the word list!)
1936 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1938 return false;
1941 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1945 // Text with attribution (only the SWG - SWG format!)
1946 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
1947 const OUString&, const OUString&, SfxObjectShell&, OUString& )
1949 return false;
1952 OUString EncryptBlockName_Imp(std::u16string_view rName)
1954 OUStringBuffer aName;
1955 aName.append('#').append(rName);
1956 for (size_t nLen = rName.size(), nPos = 1; nPos < nLen; ++nPos)
1958 if (lcl_IsInArr( u"!/:.\\", aName[nPos]))
1959 aName[nPos] &= 0x0f;
1961 return aName.makeStringAndClear();
1964 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1965 static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName )
1967 OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
1968 OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
1970 for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
1972 switch (aBuf[nPos])
1974 case '!':
1975 case '/':
1976 case ':':
1977 case '.':
1978 case '\\':
1979 // tdf#156769 - escape the question mark in the storage name
1980 case '?':
1981 aBuf[nPos] = '_';
1982 break;
1983 default:
1984 break;
1988 rPackageName = aBuf.makeStringAndClear();
1991 static const SvxAutocorrWord* lcl_SearchWordsInList(
1992 SvxAutoCorrectLanguageLists* pList, std::u16string_view rTxt,
1993 sal_Int32& rStt, sal_Int32 nEndPos)
1995 const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
1996 return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
1999 // the search for the words in the substitution table
2000 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
2001 std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
2002 SvxAutoCorrDoc&, LanguageTag& rLang )
2004 const SvxAutocorrWord* pRet = nullptr;
2005 LanguageTag aLanguageTag( rLang);
2006 if( aLanguageTag.isSystemLocale() )
2007 aLanguageTag.reset( MsLangId::getConfiguredSystemLanguage());
2009 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
2010 * list instead? */
2012 // First search for eLang, then US-English -> English
2013 // and last in LANGUAGE_UNDETERMINED
2014 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2016 //the language is available - so bring it on
2017 const auto iter = m_aLangTable.find(aLanguageTag);
2018 assert(iter != m_aLangTable.end());
2019 SvxAutoCorrectLanguageLists & rList = iter->second;
2020 pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2021 if( pRet )
2023 rLang = aLanguageTag;
2024 return pRet;
2026 else
2027 return nullptr;
2030 // If it still could not be found here, then keep on searching
2031 LanguageType eLang = aLanguageTag.getLanguageType();
2032 // the primary language for example EN
2033 aLanguageTag.reset(aLanguageTag.getLanguage());
2034 LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2035 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2036 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2037 CreateLanguageFile(aLanguageTag, false)))
2039 //the language is available - so bring it on
2040 SvxAutoCorrectLanguageLists& rList = m_aLangTable.find(aLanguageTag)->second;
2041 pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2042 if( pRet )
2044 rLang = aLanguageTag;
2045 return pRet;
2049 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2050 CreateLanguageFile(aLanguageTag, false))
2052 //the language is available - so bring it on
2053 const auto iter = m_aLangTable.find(aLanguageTag);
2054 assert(iter != m_aLangTable.end());
2055 SvxAutoCorrectLanguageLists& rList = iter->second;
2056 pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2057 if( pRet )
2059 rLang = aLanguageTag;
2060 return pRet;
2063 return nullptr;
2066 bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang,
2067 const OUString& sWord )
2069 LanguageTag aLanguageTag( eLang);
2071 /* TODO-BCP47: again horrible ugliness */
2073 // First search for eLang, then primary language of eLang
2074 // and last in LANGUAGE_UNDETERMINED
2076 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2078 //the language is available - so bring it on
2079 const auto iter = m_aLangTable.find(aLanguageTag);
2080 assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2081 auto& rList = iter->second;
2082 if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2083 return true;
2086 // If it still could not be found here, then keep on searching
2087 // the primary language for example EN
2088 aLanguageTag.reset(aLanguageTag.getLanguage());
2089 LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2090 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2091 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2092 CreateLanguageFile(aLanguageTag, false)))
2094 //the language is available - so bring it on
2095 const auto iter = m_aLangTable.find(aLanguageTag);
2096 assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2097 auto& rList = iter->second;
2098 if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2099 return true;
2102 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2103 CreateLanguageFile(aLanguageTag, false))
2105 //the language is available - so bring it on
2106 const auto iter = m_aLangTable.find(aLanguageTag);
2107 assert(iter != m_aLangTable.end());
2108 auto& rList = iter->second;
2109 if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2110 return true;
2112 return false;
2115 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
2117 SvStringsISortDtor::const_iterator it = pList->find( "~" );
2118 SvStringsISortDtor::size_type nPos = it - pList->begin();
2119 if( nPos < pList->size() )
2121 OUString sLowerWord(sWord.toAsciiLowerCase());
2122 OUString sAbr;
2123 for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
2125 sAbr = (*pList)[ n ];
2126 if (sAbr[0] != '~')
2127 break;
2128 // ~ and ~. are not allowed!
2129 if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
2131 OUString sLowerAbk(sAbr.toAsciiLowerCase());
2132 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
2134 if( !--i ) // agrees
2135 return true;
2137 if( sLowerAbk[i] != sLowerWord[--ii])
2138 break;
2143 OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
2144 "Wrongly sorted exception list?" );
2145 return false;
2148 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
2149 const OUString& sWord, bool bAbbreviation)
2151 LanguageTag aLanguageTag( eLang);
2153 /* TODO-BCP47: did I mention terrible horrible ugliness? */
2155 // First search for eLang, then primary language of eLang
2156 // and last in LANGUAGE_UNDETERMINED
2158 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2160 //the language is available - so bring it on
2161 const auto iter = m_aLangTable.find(aLanguageTag);
2162 assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2163 const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2164 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2165 return true;
2168 // If it still could not be found here, then keep on searching
2169 // the primary language for example EN
2170 aLanguageTag.reset(aLanguageTag.getLanguage());
2171 LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2172 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2173 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2174 CreateLanguageFile(aLanguageTag, false)))
2176 //the language is available - so bring it on
2177 const auto iter = m_aLangTable.find(aLanguageTag);
2178 assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2179 const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2180 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2181 return true;
2184 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2185 CreateLanguageFile(aLanguageTag, false))
2187 //the language is available - so bring it on
2188 const auto iter = m_aLangTable.find(aLanguageTag);
2189 assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2190 const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2191 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2192 return true;
2194 return false;
2197 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
2198 bool bNewFile, bool bTst, bool bUnlocalized ) const
2200 OUString sRet, sExt( rLanguageTag.getBcp47() );
2201 if (bUnlocalized)
2203 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2204 std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
2205 if (!vecFallBackStrings.empty())
2206 sExt = vecFallBackStrings[0];
2209 sExt = "_" + sExt + ".dat";
2210 if( bNewFile )
2211 sRet = sUserAutoCorrFile + sExt;
2212 else if( !bTst )
2213 sRet = sShareAutoCorrFile + sExt;
2214 else
2216 // test first in the user directory - if not exist, then
2217 sRet = sUserAutoCorrFile + sExt;
2218 if( !FStatHelper::IsDocument( sRet ))
2219 sRet = sShareAutoCorrFile + sExt;
2221 return sRet;
2224 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2225 SvxAutoCorrect& rParent,
2226 OUString aShareAutoCorrectFile,
2227 OUString aUserAutoCorrectFile)
2228 : sShareAutoCorrFile(std::move( aShareAutoCorrectFile )),
2229 sUserAutoCorrFile(std::move( aUserAutoCorrectFile )),
2230 aModifiedDate( Date::EMPTY ),
2231 aModifiedTime( tools::Time::EMPTY ),
2232 aLastCheckTime( tools::Time::EMPTY ),
2233 rAutoCorrect(rParent),
2234 nFlags(ACFlags::NONE)
2238 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2242 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2244 // Access the file system only every 2 minutes to check the date stamp
2245 bool bRet = false;
2247 tools::Time nMinTime( 0, 2 );
2248 tools::Time nAktTime( tools::Time::SYSTEM );
2249 if( aLastCheckTime <= nAktTime) // overflow?
2250 return false;
2251 nAktTime -= aLastCheckTime;
2252 if( nAktTime > nMinTime ) // min time past
2254 Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
2255 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2256 &aTstDate, &aTstTime ) &&
2257 ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
2259 bRet = true;
2260 // then remove all the lists fast!
2261 if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
2263 pCplStt_ExcptLst.reset();
2265 if( (ACFlags::WordStartLstLoad & nFlags) && pWordStart_ExcptLst )
2267 pWordStart_ExcptLst.reset();
2269 if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
2271 pAutocorr_List.reset();
2273 nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WordStartLstLoad | ACFlags::ChgWordLstLoad );
2275 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2277 return bRet;
2280 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2281 std::unique_ptr<SvStringsISortDtor>& rpLst,
2282 const OUString& sStrmName,
2283 tools::SvRef<SotStorage>& rStg)
2285 if( rpLst )
2286 rpLst->clear();
2287 else
2288 rpLst.reset( new SvStringsISortDtor );
2291 if( rStg.is() && rStg->IsStream( sStrmName ) )
2293 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2294 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2295 if( ERRCODE_NONE != xStrm->GetError())
2297 xStrm.clear();
2298 rStg.clear();
2299 RemoveStream_Imp( sStrmName );
2301 else
2303 uno::Reference< uno::XComponentContext > xContext =
2304 comphelper::getProcessComponentContext();
2306 xml::sax::InputSource aParserInput;
2307 aParserInput.sSystemId = sStrmName;
2309 xStrm->Seek( 0 );
2310 xStrm->SetBufferSize( 8 * 1024 );
2311 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2313 // get filter
2314 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2316 // connect parser and filter
2317 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2318 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2319 xParser->setFastDocumentHandler( xFilter );
2320 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2321 xParser->setTokenHandler( xTokenHandler );
2323 // parse
2326 xParser->parseStream( aParserInput );
2328 catch( const xml::sax::SAXParseException& )
2330 // re throw ?
2332 catch( const xml::sax::SAXException& )
2334 // re throw ?
2336 catch( const io::IOException& )
2338 // re throw ?
2343 // Set time stamp
2344 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2345 &aModifiedDate, &aModifiedTime );
2346 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2351 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2352 const SvStringsISortDtor& rLst,
2353 const OUString& sStrmName,
2354 tools::SvRef<SotStorage> const &rStg,
2355 bool bConvert )
2357 if( !rStg.is() )
2358 return;
2360 if( rLst.empty() )
2362 rStg->Remove( sStrmName );
2363 rStg->Commit();
2365 else
2367 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2368 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2369 if( xStrm.is() )
2371 xStrm->SetSize( 0 );
2372 xStrm->SetBufferSize( 8192 );
2373 xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2376 uno::Reference< uno::XComponentContext > xContext =
2377 comphelper::getProcessComponentContext();
2379 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2380 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2381 xWriter->setOutputStream(xOut);
2383 uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2384 rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
2386 xExp->exportDoc( XML_BLOCK_LIST );
2388 xStrm->Commit();
2389 if( xStrm->GetError() == ERRCODE_NONE )
2391 xStrm.clear();
2392 if (!bConvert)
2394 rStg->Commit();
2395 if( ERRCODE_NONE != rStg->GetError() )
2397 rStg->Remove( sStrmName );
2398 rStg->Commit();
2406 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2408 if( pAutocorr_List )
2409 pAutocorr_List->DeleteAndDestroyAll();
2410 else
2411 pAutocorr_List.reset( new SvxAutocorrWordList() );
2415 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2416 uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
2417 uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2419 xml::sax::InputSource aParserInput;
2420 aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
2421 aParserInput.aInputStream = xStrm->getInputStream();
2423 // get parser
2424 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2425 SAL_INFO("editeng", "AutoCorrect Import" );
2426 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
2427 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2429 // connect parser and filter
2430 xParser->setFastDocumentHandler( xFilter );
2431 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2432 xParser->setTokenHandler(xTokenHandler);
2434 // parse
2435 xParser->parseStream( aParserInput );
2437 catch ( const uno::Exception& )
2439 TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
2442 // Set time stamp
2443 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2444 &aModifiedDate, &aModifiedTime );
2445 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2447 return pAutocorr_List.get();
2450 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2452 if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2454 LoadAutocorrWordList();
2455 if( !pAutocorr_List )
2457 OSL_ENSURE( false, "No valid list" );
2458 pAutocorr_List.reset( new SvxAutocorrWordList() );
2460 nFlags |= ACFlags::ChgWordLstLoad;
2462 return pAutocorr_List.get();
2465 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2467 if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2469 LoadCplSttExceptList();
2470 if( !pCplStt_ExcptLst )
2472 OSL_ENSURE( false, "No valid list" );
2473 pCplStt_ExcptLst.reset( new SvStringsISortDtor );
2475 nFlags |= ACFlags::CplSttLstLoad;
2477 return pCplStt_ExcptLst.get();
2480 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2482 bool bRet = false;
2483 if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2485 MakeUserStorage_Impl();
2486 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2488 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2490 xStg = nullptr;
2491 // Set time stamp
2492 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2493 &aModifiedDate, &aModifiedTime );
2494 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2495 bRet = true;
2497 return bRet;
2500 bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString& rNew)
2502 bool bRet = false;
2503 if( !rNew.isEmpty() && GetWordStartExceptList()->insert( rNew ).second )
2505 MakeUserStorage_Impl();
2506 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2508 SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2510 xStg = nullptr;
2511 // Set time stamp
2512 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2513 &aModifiedDate, &aModifiedTime );
2514 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2515 bRet = true;
2517 return bRet;
2520 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2524 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2525 if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2526 LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2528 catch (const css::ucb::ContentCreationException&)
2531 return pCplStt_ExcptLst.get();
2534 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2536 MakeUserStorage_Impl();
2537 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2539 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2541 xStg = nullptr;
2543 // Set time stamp
2544 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2545 &aModifiedDate, &aModifiedTime );
2546 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2549 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWordStartExceptList()
2553 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2554 if( xStg.is() && xStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
2555 LoadXMLExceptList_Imp( pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2557 catch (const css::ucb::ContentCreationException &)
2559 TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList");
2561 return pWordStart_ExcptLst.get();
2564 void SvxAutoCorrectLanguageLists::SaveWordStartExceptList()
2566 MakeUserStorage_Impl();
2567 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2569 SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2571 xStg = nullptr;
2572 // Set time stamp
2573 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2574 &aModifiedDate, &aModifiedTime );
2575 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2578 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWordStartExceptList()
2580 if( !( ACFlags::WordStartLstLoad & nFlags ) || IsFileChanged_Imp() )
2582 LoadWordStartExceptList();
2583 if( !pWordStart_ExcptLst )
2585 OSL_ENSURE( false, "No valid list" );
2586 pWordStart_ExcptLst.reset( new SvStringsISortDtor );
2588 nFlags |= ACFlags::WordStartLstLoad;
2590 return pWordStart_ExcptLst.get();
2593 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2595 if( sShareAutoCorrFile != sUserAutoCorrFile )
2597 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2598 if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
2599 xStg->IsStream( rName ) )
2601 xStg->Remove( rName );
2602 xStg->Commit();
2604 xStg = nullptr;
2609 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2611 // The conversion needs to happen if the file is already in the user
2612 // directory and is in the old format. Additionally it needs to
2613 // happen when the file is being copied from share to user.
2615 bool bError = false, bConvert = false, bCopy = false;
2616 INetURLObject aDest;
2617 INetURLObject aSource;
2619 if (sUserAutoCorrFile != sShareAutoCorrFile )
2621 aSource = INetURLObject ( sShareAutoCorrFile );
2622 aDest = INetURLObject ( sUserAutoCorrFile );
2623 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2625 aDest.SetExtension ( u"bak" );
2626 bConvert = true;
2628 bCopy = true;
2630 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2632 aSource = INetURLObject ( sUserAutoCorrFile );
2633 aDest = INetURLObject ( sUserAutoCorrFile );
2634 aDest.SetExtension ( u"bak" );
2635 bCopy = bConvert = true;
2637 if (bCopy)
2641 OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
2642 sal_Int32 nSlashPos = sMain.lastIndexOf('/');
2643 sMain = sMain.copy(0, nSlashPos);
2644 ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2645 TransferInfo aInfo;
2646 aInfo.NameClash = NameClash::OVERWRITE;
2647 aInfo.NewTitle = aDest.GetLastName();
2648 aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
2649 aInfo.MoveData = false;
2650 aNewContent.executeCommand( "transfer", Any(aInfo));
2652 catch (...)
2654 bError = true;
2657 if (bConvert && !bError)
2659 tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
2660 tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
2662 if( xSrcStg.is() && xDstStg.is() )
2664 std::unique_ptr<SvStringsISortDtor> pTmpWordList;
2666 if (xSrcStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
2667 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWordStart_ExcptLstStr, xSrcStg );
2669 if (pTmpWordList)
2671 SaveExceptList_Imp( *pTmpWordList, pXMLImplWordStart_ExcptLstStr, xDstStg, true );
2672 pTmpWordList.reset();
2676 if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2677 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2679 if (pTmpWordList)
2681 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2682 pTmpWordList->clear();
2685 GetAutocorrWordList();
2686 MakeBlocklist_Imp( *xDstStg );
2687 sShareAutoCorrFile = sUserAutoCorrFile;
2688 xDstStg = nullptr;
2691 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2692 aContent.executeCommand ( "delete", Any ( true ) );
2694 catch (...)
2699 else if( bCopy && !bError )
2700 sShareAutoCorrFile = sUserAutoCorrFile;
2703 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2705 bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2706 if( !bRemove )
2708 tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
2709 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2710 if( refList.is() )
2712 refList->SetSize( 0 );
2713 refList->SetBufferSize( 8192 );
2714 refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2716 uno::Reference< uno::XComponentContext > xContext =
2717 comphelper::getProcessComponentContext();
2719 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2720 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2721 xWriter->setOutputStream(xOut);
2723 rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
2725 xExp->exportDoc( XML_BLOCK_LIST );
2727 refList->Commit();
2728 bRet = ERRCODE_NONE == refList->GetError();
2729 if( bRet )
2731 refList.clear();
2732 rStg.Commit();
2733 if( ERRCODE_NONE != rStg.GetError() )
2735 bRemove = true;
2736 bRet = false;
2740 else
2741 bRet = false;
2744 if( bRemove )
2746 rStg.Remove( pXMLImplAutocorr_ListStr );
2747 rStg.Commit();
2750 return bRet;
2753 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2755 // First get the current list!
2756 GetAutocorrWordList();
2758 MakeUserStorage_Impl();
2759 tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2761 bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
2763 if( bRet )
2765 for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
2767 std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2768 if( xFoundEntry )
2770 if( !xFoundEntry->IsTextOnly() )
2772 OUString aName( aWordToDelete.GetShort() );
2773 if (xStorage->IsOLEStorage())
2774 aName = EncryptBlockName_Imp(aName);
2775 else
2776 GeneratePackageName ( aWordToDelete.GetShort(), aName );
2778 if( xStorage->IsContained( aName ) )
2780 xStorage->Remove( aName );
2781 bRet = xStorage->Commit();
2787 for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
2789 SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
2790 std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
2791 if( xRemoved )
2793 if( !xRemoved->IsTextOnly() )
2795 // Still have to remove the Storage
2796 OUString sStorageName( aWordToAdd.GetShort() );
2797 if (xStorage->IsOLEStorage())
2798 sStorageName = EncryptBlockName_Imp(sStorageName);
2799 else
2800 GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
2802 if( xStorage->IsContained( sStorageName ) )
2803 xStorage->Remove( sStorageName );
2806 bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
2808 if ( !bRet )
2810 break;
2814 if ( bRet )
2816 bRet = MakeBlocklist_Imp( *xStorage );
2819 return bRet;
2822 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2824 // First get the current list!
2825 GetAutocorrWordList();
2827 MakeUserStorage_Impl();
2828 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2830 bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
2832 // Update the word list
2833 if( bRet )
2835 SvxAutocorrWord aNew(rShort, rLong, true );
2836 std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
2837 if( xRemove )
2839 if( !xRemove->IsTextOnly() )
2841 // Still have to remove the Storage
2842 OUString sStgNm( rShort );
2843 if (xStg->IsOLEStorage())
2844 sStgNm = EncryptBlockName_Imp(sStgNm);
2845 else
2846 GeneratePackageName ( rShort, sStgNm);
2848 if( xStg->IsContained( sStgNm ) )
2849 xStg->Remove( sStgNm );
2853 if( pAutocorr_List->Insert( std::move(aNew) ) )
2855 bRet = MakeBlocklist_Imp( *xStg );
2856 xStg = nullptr;
2858 else
2860 bRet = false;
2863 return bRet;
2866 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2867 SfxObjectShell& rShell )
2869 // First get the current list!
2870 GetAutocorrWordList();
2872 MakeUserStorage_Impl();
2876 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2877 OUString sLong;
2878 bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2879 xStg = nullptr;
2881 // Update the word list
2882 if( bRet )
2884 if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
2886 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2887 MakeBlocklist_Imp( *xStor );
2891 catch ( const uno::Exception& )
2896 // Keep the list sorted ...
2897 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2899 bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
2901 CollatorWrapper& rCmp = ::GetCollatorWrapper();
2902 return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
2906 namespace {
2908 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
2912 struct SvxAutocorrWordList::Impl
2915 // only one of these contains the data
2916 // maSortedVector is manually sorted so we can optimise data movement
2917 mutable AutocorrWordSetType maSortedVector;
2918 mutable AutocorrWordHashType maHash; // key is 'Short'
2920 void DeleteAndDestroyAll()
2922 maHash.clear();
2923 maSortedVector.clear();
2927 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2929 SvxAutocorrWordList::~SvxAutocorrWordList()
2933 void SvxAutocorrWordList::DeleteAndDestroyAll()
2935 mpImpl->DeleteAndDestroyAll();
2938 // returns true if inserted
2939 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
2941 if ( mpImpl->maSortedVector.empty() ) // use the hash
2943 OUString aShort = aWord.GetShort();
2944 auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
2945 if (inserted)
2946 return &(it->second);
2947 return nullptr;
2949 else
2951 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
2952 CollatorWrapper& rCmp = ::GetCollatorWrapper();
2953 if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
2955 it = mpImpl->maSortedVector.insert(it, std::move(aWord));
2956 return &*it;
2958 return nullptr;
2962 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
2964 (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
2967 bool SvxAutocorrWordList::empty() const
2969 return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
2972 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
2975 if ( mpImpl->maSortedVector.empty() ) // use the hash
2977 AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
2978 if( it != mpImpl->maHash.end() )
2980 SvxAutocorrWord pMatch = std::move(it->second);
2981 mpImpl->maHash.erase (it);
2982 return pMatch;
2985 else
2987 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
2988 if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
2990 SvxAutocorrWord pMatch = std::move(*it);
2991 mpImpl->maSortedVector.erase (it);
2992 return pMatch;
2995 return std::optional<SvxAutocorrWord>();
2998 // return the sorted contents - defer sorting until we have to.
2999 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
3001 // convert from hash to set permanently
3002 if ( mpImpl->maSortedVector.empty() )
3004 std::vector<SvxAutocorrWord> tmp;
3005 tmp.reserve(mpImpl->maHash.size());
3006 for (auto & rPair : mpImpl->maHash)
3007 tmp.emplace_back(std::move(rPair.second));
3008 mpImpl->maHash.clear();
3009 // sort twice - this gets the list into mostly-sorted order, which
3010 // reduces the number of times we need to invoke the expensive ICU collate fn.
3011 std::sort(tmp.begin(), tmp.end(),
3012 [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
3014 return lhs.GetShort() < rhs.GetShort();
3016 // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
3017 // stable_sort is twice as fast as sort in this situation because it does
3018 // fewer comparison operations.
3019 std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
3020 mpImpl->maSortedVector = std::move(tmp);
3022 return mpImpl->maSortedVector;
3025 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
3026 std::u16string_view rTxt,
3027 sal_Int32 &rStt,
3028 sal_Int32 nEndPos) const
3030 const OUString& rChk = pFnd->GetShort();
3032 sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
3033 sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
3034 assert(nEndPos >= 0);
3035 size_t nSttWdPos = nEndPos;
3037 // direct replacement of keywords surrounded by colons (for example, ":name:")
3038 bool bColonNameColon = static_cast<sal_Int32>(rTxt.size()) > nEndPos &&
3039 rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
3040 if ( nEndPos + (bColonNameColon ? 1 : 0) < rChk.getLength() - left_wildcard - right_wildcard )
3041 return nullptr;
3043 bool bWasWordDelim = false;
3044 sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
3045 if (bColonNameColon)
3046 nCalcStt++;
3047 if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
3048 ( nCalcStt < rStt &&
3049 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
3051 TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
3052 OUString sWord( rTxt.substr(nCalcStt, rChk.getLength() - left_wildcard) );
3053 if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
3055 rStt = nCalcStt;
3056 if (!left_wildcard)
3058 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
3059 if (static_cast<sal_Int32>(rTxt.size()) > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
3060 return nullptr;
3061 return pFnd;
3063 // get the first word delimiter position before the matching ".*word" pattern
3064 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
3066 if (bWasWordDelim) rStt++;
3067 OUString left_pattern( rTxt.substr(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard) );
3068 // avoid double spaces before simple "word" replacement
3069 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().subView(1) : pFnd->GetLong();
3070 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(OUString(rTxt.substr(rStt, nEndPos - rStt)), left_pattern) ) )
3071 return pNew;
3073 } else
3074 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
3075 if ( right_wildcard )
3078 OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
3079 // Get the last word delimiter position
3080 bool not_suffix;
3082 while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
3084 // search the first occurrence (with a left word delimitation, if needed)
3085 size_t nFndPos = std::u16string_view::npos;
3086 do {
3087 nFndPos = rTxt.find( sTmp, nFndPos + 1);
3088 if (nFndPos == std::u16string_view::npos)
3089 break;
3090 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
3091 } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
3093 if ( nFndPos != std::u16string_view::npos )
3095 sal_Int32 extra_repl = static_cast<sal_Int32>(nFndPos) + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
3097 if ( left_wildcard )
3099 // get the first word delimiter position before the matching ".*word.*" pattern
3100 while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
3102 if (bWasWordDelim) nFndPos++;
3104 if (nEndPos + extra_repl <= static_cast<sal_Int32>(nFndPos))
3106 return nullptr;
3108 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3109 OUString aShort( rTxt.substr(nFndPos, nEndPos - nFndPos + extra_repl) );
3111 OUString aLong;
3112 rStt = nFndPos;
3113 if ( !left_wildcard )
3115 sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
3116 aLong = pFnd->GetLong() + (siz > 0 ? rTxt.substr(nFndPos + sTmp.getLength(), siz) : u"");
3117 } else {
3118 OUStringBuffer buf;
3119 do {
3120 nSttWdPos = rTxt.find( sTmp, nFndPos);
3121 if (nSttWdPos != std::u16string_view::npos)
3123 sal_Int32 nTmp(nFndPos);
3124 while (nTmp < static_cast<sal_Int32>(nSttWdPos) && !IsWordDelim(rTxt[nTmp]))
3125 nTmp++;
3126 if (nTmp < static_cast<sal_Int32>(nSttWdPos))
3127 break; // word delimiter found
3128 buf.append(rTxt.substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
3129 nFndPos = nSttWdPos + sTmp.getLength();
3131 } while (nSttWdPos != std::u16string_view::npos);
3132 if (static_cast<sal_Int32>(nEndPos - nFndPos) > extra_repl)
3133 buf.append(rTxt.substr(nFndPos, nEndPos - nFndPos));
3134 aLong = buf.makeStringAndClear();
3136 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) )
3138 if ( (static_cast<sal_Int32>(rTxt.size()) > nEndPos && IsWordDelim(rTxt[nEndPos])) || static_cast<sal_Int32>(rTxt.size()) == nEndPos )
3139 return pNew;
3143 return nullptr;
3146 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt, sal_Int32& rStt,
3147 sal_Int32 nEndPos) const
3149 for (auto const& elem : mpImpl->maHash)
3151 if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) )
3152 return pTmp;
3155 for (auto const& elem : mpImpl->maSortedVector)
3157 if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) )
3158 return pTmp;
3160 return nullptr;
3163 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */