1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
23 #include <string_view>
24 #include <sal/config.h>
26 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
27 #include <com/sun/star/embed/XStorage.hpp>
28 #include <com/sun/star/io/IOException.hpp>
29 #include <com/sun/star/io/XStream.hpp>
30 #include <tools/urlobj.hxx>
31 #include <i18nlangtag/mslangid.hxx>
32 #include <i18nutil/transliteration.hxx>
33 #include <sal/log.hxx>
34 #include <osl/diagnose.h>
35 #include <vcl/svapp.hxx>
36 #include <vcl/settings.hxx>
37 #include <svl/fstathelper.hxx>
38 #include <svl/urihelper.hxx>
39 #include <unotools/charclass.hxx>
40 #include <com/sun/star/i18n/UnicodeType.hpp>
41 #include <unotools/collatorwrapper.hxx>
42 #include <com/sun/star/i18n/UnicodeScript.hpp>
43 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
44 #include <unotools/localedatawrapper.hxx>
45 #include <unotools/transliterationwrapper.hxx>
46 #include <comphelper/processfactory.hxx>
47 #include <comphelper/storagehelper.hxx>
48 #include <o3tl/string_view.hxx>
49 #include <editeng/editids.hrc>
50 #include <sot/storage.hxx>
51 #include <editeng/udlnitem.hxx>
52 #include <editeng/wghtitem.hxx>
53 #include <editeng/postitem.hxx>
54 #include <editeng/crossedoutitem.hxx>
55 #include <editeng/escapementitem.hxx>
56 #include <editeng/svxacorr.hxx>
57 #include <editeng/unolingu.hxx>
58 #include <vcl/window.hxx>
59 #include <com/sun/star/xml/sax/InputSource.hpp>
60 #include <com/sun/star/xml/sax/FastParser.hpp>
61 #include <com/sun/star/xml/sax/Writer.hpp>
62 #include <com/sun/star/xml/sax/SAXParseException.hpp>
63 #include <unotools/streamwrap.hxx>
64 #include "SvXMLAutoCorrectImport.hxx"
65 #include "SvXMLAutoCorrectExport.hxx"
66 #include "SvXMLAutoCorrectTokenHandler.hxx"
67 #include <ucbhelper/content.hxx>
68 #include <com/sun/star/ucb/ContentCreationException.hpp>
69 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
70 #include <com/sun/star/ucb/TransferInfo.hpp>
71 #include <com/sun/star/ucb/NameClash.hpp>
72 #include <comphelper/diagnose_ex.hxx>
73 #include <xmloff/xmltoken.hxx>
74 #include <unordered_map>
75 #include <rtl/character.hxx>
77 using namespace ::com::sun::star::ucb
;
78 using namespace ::com::sun::star::uno
;
79 using namespace ::com::sun::star::xml::sax
;
80 using namespace ::com::sun::star
;
81 using namespace ::xmloff::token
;
82 using namespace ::utl
;
89 ExclamationMark
= 0x02,
96 template<> struct typed_flags
<Flags
> : is_typed_flags
<Flags
, 0x07> {};
98 const sal_Unicode cNonBreakingSpace
= 0xA0; // UNICODE code for no break space
100 constexpr OUString pXMLImplWordStart_ExcptLstStr
= u
"WordExceptList.xml"_ustr
;
101 constexpr OUString pXMLImplCplStt_ExcptLstStr
= u
"SentenceExceptList.xml"_ustr
;
102 constexpr OUString pXMLImplAutocorr_ListStr
= u
"DocumentList.xml"_ustr
;
104 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
105 // Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled as "begin characters"?
106 constexpr std::u16string_view
107 /* also at these beginnings - Brackets and all kinds of begin characters */
108 sImplSttSkipChars
= u
"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094",
109 /* also at these ends - Brackets and all kinds of begin characters */
110 sImplEndSkipChars
= u
"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094";
112 static OUString
EncryptBlockName_Imp(std::u16string_view rName
);
114 static bool NonFieldWordDelim( const sal_Unicode c
)
116 return ' ' == c
|| '\t' == c
|| 0x0a == c
||
117 cNonBreakingSpace
== c
|| 0x2011 == c
;
120 static bool IsWordDelim( const sal_Unicode c
)
122 return c
== 0x1 || NonFieldWordDelim(c
);
126 static bool IsLowerLetter( sal_Int32 nCharType
)
128 return CharClass::isLetterType( nCharType
) &&
129 ( css::i18n::KCharacterType::LOWER
& nCharType
);
132 static bool IsUpperLetter( sal_Int32 nCharType
)
134 return CharClass::isLetterType( nCharType
) &&
135 ( css::i18n::KCharacterType::UPPER
& nCharType
);
138 static bool lcl_IsUnsupportedUnicodeChar( CharClass
const & rCC
, const OUString
& rTxt
,
139 sal_Int32 nStt
, sal_Int32 nEnd
)
141 for( ; nStt
< nEnd
; ++nStt
)
143 css::i18n::UnicodeScript nScript
= rCC
.getScript( rTxt
, nStt
);
146 case css::i18n::UnicodeScript_kCJKRadicalsSupplement
:
147 case css::i18n::UnicodeScript_kHangulJamo
:
148 case css::i18n::UnicodeScript_kCJKSymbolPunctuation
:
149 case css::i18n::UnicodeScript_kHiragana
:
150 case css::i18n::UnicodeScript_kKatakana
:
151 case css::i18n::UnicodeScript_kHangulCompatibilityJamo
:
152 case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth
:
153 case css::i18n::UnicodeScript_kCJKCompatibility
:
154 case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA
:
155 case css::i18n::UnicodeScript_kCJKUnifiedIdeograph
:
156 case css::i18n::UnicodeScript_kHangulSyllable
:
157 case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph
:
158 case css::i18n::UnicodeScript_kHalfwidthFullwidthForm
:
160 default: ; //do nothing
166 static bool lcl_IsSymbolChar( CharClass
const & rCC
, const OUString
& rTxt
,
167 sal_Int32 nStt
, sal_Int32 nEnd
)
169 for( ; nStt
< nEnd
; ++nStt
)
171 if( css::i18n::UnicodeType::PRIVATE_USE
== rCC
.getType( rTxt
, nStt
))
177 static bool lcl_IsInArr(std::u16string_view arr
, const sal_uInt32 c
)
179 return std::any_of(arr
.begin(), arr
.end(), [c
](const auto c1
) { return c1
== c
; });
182 SvxAutoCorrDoc::~SvxAutoCorrDoc()
186 // Called by the functions:
187 // - FnCapitalStartWord
188 // - FnCapitalStartSentence
189 // after the exchange of characters. Then the words, if necessary, can be inserted
190 // into the exception list.
191 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags
, sal_Int32
, const OUString
&,
196 LanguageType
SvxAutoCorrDoc::GetLanguage( sal_Int32
) const
198 return LANGUAGE_SYSTEM
;
201 static const LanguageTag
& GetAppLang()
203 return Application::GetSettings().GetLanguageTag();
206 /// Never use an unresolved LANGUAGE_SYSTEM.
207 static LanguageType
GetDocLanguage( const SvxAutoCorrDoc
& rDoc
, sal_Int32 nPos
)
209 LanguageType eLang
= rDoc
.GetLanguage( nPos
);
210 if (eLang
== LANGUAGE_SYSTEM
)
211 eLang
= GetAppLang().getLanguageType(); // the current work locale
215 static LocaleDataWrapper
& GetLocaleDataWrapper( LanguageType nLang
)
217 static std::unique_ptr
<LocaleDataWrapper
> xLclDtWrp
;
218 LanguageTag
aLcl( nLang
);
219 if (!xLclDtWrp
|| xLclDtWrp
->getLoadedLanguageTag() != aLcl
)
220 xLclDtWrp
.reset(new LocaleDataWrapper(std::move(aLcl
)));
223 static TransliterationWrapper
& GetIgnoreTranslWrapper()
225 static int bIsInit
= 0;
226 static TransliterationWrapper
aWrp( ::comphelper::getProcessComponentContext(),
227 TransliterationFlags::IGNORE_KANA
|
228 TransliterationFlags::IGNORE_WIDTH
);
231 aWrp
.loadModuleIfNeeded( GetAppLang().getLanguageType() );
236 static CollatorWrapper
& GetCollatorWrapper()
238 static CollatorWrapper aCollWrp
= []()
240 CollatorWrapper
tmp( ::comphelper::getProcessComponentContext() );
241 tmp
.loadDefaultCollator( GetAppLang().getLocale(), 0 );
247 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar
)
249 return cChar
== '\0' || cChar
== '\t' || cChar
== 0x0a ||
250 cChar
== ' ' || cChar
== '\'' || cChar
== '\"' ||
251 cChar
== '*' || cChar
== '_' || cChar
== '%' ||
252 cChar
== '.' || cChar
== ',' || cChar
== ';' ||
253 cChar
== ':' || cChar
== '?' || cChar
== '!' ||
254 cChar
== '<' || cChar
== '>' ||
255 cChar
== '/' || cChar
== '-';
260 bool IsCompoundWordDelimChar(sal_Unicode cChar
)
262 return cChar
== '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar
);
266 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar
)
268 return cChar
== '%' || cChar
== ';' || cChar
== ':' || cChar
== '?' || cChar
== '!' ||
269 cChar
== '/' /*case for the urls exception*/;
272 ACFlags
SvxAutoCorrect::GetDefaultFlags()
274 ACFlags nRet
= ACFlags::Autocorrect
275 | ACFlags::CapitalStartSentence
276 | ACFlags::CapitalStartWord
277 | ACFlags::ChgOrdinalNumber
278 | ACFlags::ChgToEnEmDash
279 | ACFlags::AddNonBrkSpace
280 | ACFlags::TransliterateRTL
281 | ACFlags::ChgAngleQuotes
282 | ACFlags::ChgWeightUnderl
283 | ACFlags::SetINetAttr
284 | ACFlags::SetDOIAttr
286 | ACFlags::SaveWordCplSttLst
287 | ACFlags::SaveWordWordStartLst
288 | ACFlags::CorrectCapsLock
;
289 LanguageType eLang
= GetAppLang().getLanguageType();
294 LANGUAGE_ENGLISH_AUS
,
295 LANGUAGE_ENGLISH_CAN
,
297 LANGUAGE_ENGLISH_EIRE
,
298 LANGUAGE_ENGLISH_SAFRICA
,
299 LANGUAGE_ENGLISH_JAMAICA
,
300 LANGUAGE_ENGLISH_CARIBBEAN
))
301 nRet
&= ~ACFlags(ACFlags::ChgQuotes
|ACFlags::ChgSglQuotes
);
305 constexpr sal_Unicode cEmDash
= 0x2014;
306 constexpr sal_Unicode cEnDash
= 0x2013;
307 constexpr OUString
sEmDash(u
"\u2014"_ustr
);
308 constexpr OUString
sEnDash(u
"\u2013"_ustr
);
309 constexpr sal_Unicode cApostrophe
= 0x2019;
310 constexpr sal_Unicode cLeftDoubleAngleQuote
= 0xAB;
311 constexpr sal_Unicode cRightDoubleAngleQuote
= 0xBB;
312 constexpr sal_Unicode cLeftSingleAngleQuote
= 0x2039;
313 constexpr sal_Unicode cRightSingleAngleQuote
= 0x203A;
314 // stop characters for searching preceding quotes
315 // (the first character is also the opening quote we are looking for)
316 const sal_Unicode aStopDoubleAngleQuoteStart
[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
317 const sal_Unicode aStopDoubleAngleQuoteEnd
[] = { cRightDoubleAngleQuote
, cLeftDoubleAngleQuote
, 0x201D, 0x201E, 0 }; // preceding >>
318 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
319 const sal_Unicode aStopDoubleAngleQuoteEndRo
[] = { cLeftDoubleAngleQuote
, cRightDoubleAngleQuote
, 0x201D, 0x201E, 0x201C, 0 };
320 const sal_Unicode aStopSingleQuoteEnd
[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
321 const sal_Unicode aStopSingleQuoteEndRuUa
[] = { 0x201E, 0x201C, cRightDoubleAngleQuote
, cLeftDoubleAngleQuote
, 0 };
323 SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile
,
324 OUString aUserAutocorrFile
)
325 : sShareAutoCorrFile(std::move( aShareAutocorrFile
))
326 , sUserAutoCorrFile(std::move( aUserAutocorrFile
))
327 , eCharClassLang( LANGUAGE_DONTKNOW
)
328 , nFlags(SvxAutoCorrect::GetDefaultFlags())
336 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect
& rCpy
)
337 : sShareAutoCorrFile( rCpy
.sShareAutoCorrFile
)
338 , sUserAutoCorrFile( rCpy
.sUserAutoCorrFile
)
339 , aSwFlags( rCpy
.aSwFlags
)
340 , eCharClassLang(rCpy
.eCharClassLang
)
341 , nFlags( rCpy
.nFlags
& ~ACFlags(ACFlags::ChgWordLstLoad
|ACFlags::CplSttLstLoad
|ACFlags::WordStartLstLoad
))
342 , cStartDQuote( rCpy
.cStartDQuote
)
343 , cEndDQuote( rCpy
.cEndDQuote
)
344 , cStartSQuote( rCpy
.cStartSQuote
)
345 , cEndSQuote( rCpy
.cEndSQuote
)
350 SvxAutoCorrect::~SvxAutoCorrect()
354 void SvxAutoCorrect::GetCharClass_( LanguageType eLang
)
356 moCharClass
.emplace( LanguageTag( eLang
) );
357 eCharClassLang
= eLang
;
360 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag
, bool bOn
)
362 ACFlags nOld
= nFlags
;
363 nFlags
= bOn
? nFlags
| nFlag
368 if( (nOld
& ACFlags::CapitalStartSentence
) != (nFlags
& ACFlags::CapitalStartSentence
) )
369 nFlags
&= ~ACFlags::CplSttLstLoad
;
370 if( (nOld
& ACFlags::CapitalStartWord
) != (nFlags
& ACFlags::CapitalStartWord
) )
371 nFlags
&= ~ACFlags::WordStartLstLoad
;
372 if( (nOld
& ACFlags::Autocorrect
) != (nFlags
& ACFlags::Autocorrect
) )
373 nFlags
&= ~ACFlags::ChgWordLstLoad
;
378 // Correct TWo INitial CApitals
379 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
380 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
383 CharClass
& rCC
= GetCharClass( eLang
);
385 // Delete all non alphanumeric. Test the characters at the beginning/end of
386 // the word ( recognizes: "(min.", "/min.", and so on.)
387 for( ; nSttPos
< nEndPos
; ++nSttPos
)
388 if( rCC
.isLetterNumeric( rTxt
, nSttPos
))
390 for( ; nSttPos
< nEndPos
; --nEndPos
)
391 if( rCC
.isLetterNumeric( rTxt
, nEndPos
- 1 ))
394 // Is the word a compounded word separated by delimiters?
395 // If so, keep track of all delimiters so each constituent
396 // word can be checked for two initial capital letters.
397 std::deque
<sal_Int32
> aDelimiters
;
399 // Always check for two capitals at the beginning
400 // of the entire word, so start at nSttPos.
401 aDelimiters
.push_back(nSttPos
);
403 // Find all compound word delimiters
404 for (sal_Int32 n
= nSttPos
; n
< nEndPos
; ++n
)
406 if (IsCompoundWordDelimChar(rTxt
[ n
]))
408 aDelimiters
.push_back( n
+ 1 ); // Get position of char after delimiter
412 // Decide where to put the terminating delimiter.
413 // If the last AutoCorrect char was a newline, then the AutoCorrect
414 // char will not be included in rTxt.
415 // If the last AutoCorrect char was not a newline, then the AutoCorrect
416 // character will be the last character in rTxt.
417 if (!IsCompoundWordDelimChar(rTxt
[nEndPos
-1]))
418 aDelimiters
.push_back(nEndPos
);
420 // Iterate through the word and all words that compose it.
421 // Two capital letters at the beginning of word?
422 for (size_t nI
= 0; nI
< aDelimiters
.size() - 1; ++nI
)
424 nSttPos
= aDelimiters
[nI
];
425 nEndPos
= aDelimiters
[nI
+ 1];
427 if( nSttPos
+2 < nEndPos
&&
428 IsUpperLetter( rCC
.getCharacterType( rTxt
, nSttPos
)) &&
429 IsUpperLetter( rCC
.getCharacterType( rTxt
, ++nSttPos
)) &&
430 // Is the third character a lower case
431 IsLowerLetter( rCC
.getCharacterType( rTxt
, nSttPos
+1 )) &&
432 // Do not replace special attributes
433 0x1 != rTxt
[ nSttPos
] && 0x2 != rTxt
[ nSttPos
])
435 // test if the word is in an exception list
436 OUString
sWord( rTxt
.copy( nSttPos
- 1, nEndPos
- nSttPos
+ 1 ));
437 if( !FindInWordStartExceptList(eLang
, sWord
) )
439 // Check that word isn't correctly spelt before correcting:
440 css::uno::Reference
< css::linguistic2::XSpellChecker1
> xSpeller
=
441 LinguMgr::GetSpellChecker();
442 if( xSpeller
->hasLanguage(static_cast<sal_uInt16
>(eLang
)) )
444 Sequence
< css::beans::PropertyValue
> aEmptySeq
;
445 if (xSpeller
->isValid(sWord
, static_cast<sal_uInt16
>(eLang
), aEmptySeq
))
450 sal_Unicode cSave
= rTxt
[ nSttPos
];
451 OUString sChar
= rCC
.lowercase( OUString(cSave
) );
452 if( sChar
[0] != cSave
&& rDoc
.ReplaceRange( nSttPos
, 1, sChar
))
454 if( ACFlags::SaveWordWordStartLst
& nFlags
)
455 rDoc
.SaveCpltSttWord( ACFlags::CapitalStartWord
, nSttPos
, sWord
, cSave
);
462 // Format ordinal numbers suffixes (1st -> 1^st)
463 bool SvxAutoCorrect::FnChgOrdinalNumber(
464 SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
465 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
468 // 1st, 2nd, 3rd, 4 - 0th
473 // In some languages ordinal suffixes should never be
474 // changed to superscript. Let's break for those languages.
477 LANGUAGE_SWEDISH_FINLAND
))
479 CharClass
& rCC
= GetCharClass(eLang
);
481 for (; nSttPos
< nEndPos
; ++nSttPos
)
482 if (!lcl_IsInArr(sImplSttSkipChars
, rTxt
[nSttPos
]))
484 for (; nSttPos
< nEndPos
; --nEndPos
)
485 if (!lcl_IsInArr(sImplEndSkipChars
, rTxt
[nEndPos
- 1]))
489 // Get the last number in the string to check
490 sal_Int32 nNumEnd
= nEndPos
;
491 bool bFoundEnd
= false;
492 bool isValidNumber
= true;
493 sal_Int32 i
= nEndPos
;
497 bool isDigit
= rCC
.isDigit(rTxt
, i
);
499 isValidNumber
&= (isDigit
|| !rCC
.isLetter(rTxt
, i
));
501 if (isDigit
&& !bFoundEnd
)
508 if (bFoundEnd
&& isValidNumber
) {
509 sal_Int32 nNum
= o3tl::toInt32(rTxt
.subView(nSttPos
, nNumEnd
- nSttPos
+ 1));
511 // Check if the characters after that number correspond to the ordinal suffix
512 uno::Reference
< i18n::XOrdinalSuffix
> xOrdSuffix
513 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
515 const uno::Sequence
< OUString
> aSuffixes
= xOrdSuffix
->getOrdinalSuffix(nNum
, rCC
.getLanguageTag().getLocale());
516 for (OUString
const & sSuffix
: aSuffixes
)
518 std::u16string_view sEnd
= rTxt
.subView(nNumEnd
+ 1, nEndPos
- nNumEnd
- 1);
522 // Check if the ordinal suffix has to be set as super script
523 if (rCC
.isLetter(sSuffix
))
526 SvxEscapementItem
aSvxEscapementItem(DFLT_ESC_AUTO_SUPER
,
527 DFLT_ESC_PROP
, SID_ATTR_CHAR_ESCAPEMENT
);
528 rDoc
.SetAttr(nNumEnd
+ 1, nEndPos
,
529 SID_ATTR_CHAR_ESCAPEMENT
,
541 bool SvxAutoCorrect::FnChgToEnEmDash(
542 SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
543 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
547 CharClass
& rCC
= GetCharClass( eLang
);
548 if (eLang
== LANGUAGE_SYSTEM
)
549 eLang
= GetAppLang().getLanguageType();
550 bool bAlwaysUseEmDash
= (eLang
== LANGUAGE_RUSSIAN
|| eLang
== LANGUAGE_UKRAINIAN
);
552 // rTxt may refer to the frame text that will change in the calls to rDoc.Delete / rDoc.Insert;
553 // keep a local copy for later use
554 OUString aOrigTxt
= rTxt
;
555 sal_Int32 nFirstReplacementTextLengthChange
= 0;
557 // replace " - " or " --" with "enDash"
558 if( 1 < nSttPos
&& 1 <= nEndPos
- nSttPos
)
560 sal_Unicode cCh
= rTxt
[ nSttPos
];
563 if( 1 < nEndPos
- nSttPos
&&
564 ' ' == rTxt
[ nSttPos
-1 ] &&
565 '-' == rTxt
[ nSttPos
+1 ])
568 for( n
= nSttPos
+2; n
< nEndPos
&& lcl_IsInArr(
569 sImplSttSkipChars
,(cCh
= rTxt
[ n
]));
573 // found: " --[<AnySttChars>][A-z0-9]
574 if( rCC
.isLetterNumeric( OUString(cCh
) ) )
576 for( n
= nSttPos
-1; n
&& lcl_IsInArr(
577 sImplEndSkipChars
,(cCh
= rTxt
[ --n
])); )
580 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
581 if( rCC
.isLetterNumeric( OUString(cCh
) ))
583 rDoc
.Delete( nSttPos
, nSttPos
+ 2 );
584 rDoc
.Insert( nSttPos
, bAlwaysUseEmDash
? sEmDash
: sEnDash
);
585 nFirstReplacementTextLengthChange
= -1; // 2 ch -> 1 ch
591 else if( 3 < nSttPos
&&
592 ' ' == rTxt
[ nSttPos
-1 ] &&
593 '-' == rTxt
[ nSttPos
-2 ])
595 sal_Int32 n
, nLen
= 1, nTmpPos
= nSttPos
- 2;
596 if( '-' == ( cCh
= rTxt
[ nTmpPos
-1 ]) )
600 cCh
= rTxt
[ nTmpPos
-1 ];
604 for( n
= nSttPos
; n
< nEndPos
&& lcl_IsInArr(
605 sImplSttSkipChars
,(cCh
= rTxt
[ n
]));
609 // found: " - [<AnySttChars>][A-z0-9]
610 if( rCC
.isLetterNumeric( OUString(cCh
) ) )
613 for( n
= nTmpPos
-1; n
&& lcl_IsInArr(
614 sImplEndSkipChars
,(cCh
= rTxt
[ --n
])); )
616 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
617 if( rCC
.isLetterNumeric( OUString(cCh
) ))
619 rDoc
.Delete( nTmpPos
, nTmpPos
+ nLen
);
620 rDoc
.Insert( nTmpPos
, bAlwaysUseEmDash
? sEmDash
: sEnDash
);
621 nFirstReplacementTextLengthChange
= 1 - nLen
; // nLen ch -> 1 ch
629 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
630 // [0-9]--[0-9] double dash always replaced with "enDash"
631 // Finnish and Hungarian use enDash instead of emDash.
632 bool bEnDash
= (eLang
== LANGUAGE_HUNGARIAN
|| eLang
== LANGUAGE_FINNISH
);
633 if( 4 <= nEndPos
- nSttPos
)
635 std::u16string_view
sTmpView( aOrigTxt
.subView( nSttPos
, nEndPos
- nSttPos
) );
636 size_t nFndPos
= sTmpView
.find(u
"--");
637 if (nFndPos
> 0 && nFndPos
< sTmpView
.size() - 2)
639 // Use proper codepoints. Currently, CharClass::isLetterNumeric is broken, it
640 // uses the index *both* as code unit index (when checking it as ASCII), *and*
641 // as code point index (when passes to css::i18n::XCharacterClassification).
642 // Oh well... Anyway, single-codepoint strings will workaround it.
643 sal_Int32 nStart
= nSttPos
+ nFndPos
;
644 sal_uInt32 chStart
= aOrigTxt
.iterateCodePoints(&nStart
, -1);
645 OUString
sStart(&chStart
, 1);
646 // No idea why sImplEndSkipChars is checked at start
647 if (rCC
.isLetterNumeric(sStart
, 0) || lcl_IsInArr(sImplEndSkipChars
, chStart
))
649 sal_Int32 nEnd
= nSttPos
+ nFndPos
+ 2;
650 sal_uInt32 chEnd
= aOrigTxt
.iterateCodePoints(&nEnd
, 1);
651 OUString
sEnd(&chEnd
, 1);
652 // No idea why sImplSttSkipChars is checked at end
653 if (rCC
.isLetterNumeric(sEnd
, 0) || lcl_IsInArr(sImplSttSkipChars
, chEnd
))
655 nSttPos
= nSttPos
+ nFndPos
+ nFirstReplacementTextLengthChange
;
656 rDoc
.Delete(nSttPos
, nSttPos
+ 2);
658 (bEnDash
|| (rCC
.isDigit(sStart
, 0) && rCC
.isDigit(sEnd
, 0))
669 // Add non-breaking space before specific punctuation marks in French text
670 sal_Int32
SvxAutoCorrect::FnAddNonBrkSpace(
671 SvxAutoCorrDoc
& rDoc
, std::u16string_view rTxt
,
673 LanguageType eLang
, bool& io_bNbspRunNext
)
677 CharClass
& rCC
= GetCharClass( eLang
);
679 if ( rCC
.getLanguageTag().getLanguage() == "fr" )
681 bool bFrCA
= (rCC
.getLanguageTag().getCountry() == "CA");
682 OUString allChars
= ":;?!%";
683 OUString
chars( allChars
);
687 sal_Unicode cChar
= rTxt
[ nEndPos
];
688 bool bHasSpace
= chars
.indexOf( cChar
) != -1;
689 bool bIsSpecial
= allChars
.indexOf( cChar
) != -1;
692 // Get the last word delimiter position
693 sal_Int32 nSttWdPos
= nEndPos
;
694 bool bWasWordDelim
= false;
697 bWasWordDelim
= IsWordDelim( rTxt
[ --nSttWdPos
]);
702 //See if the text is the start of a protocol string, e.g. have text of
703 //"http" see if it is the start of "http:" and if so leave it alone
704 size_t nIndex
= nSttWdPos
+ (bWasWordDelim
? 1 : 0);
705 size_t nProtocolLen
= nEndPos
- nSttWdPos
+ 1;
706 if (nIndex
+ nProtocolLen
<= rTxt
.size())
708 if (INetURLObject::CompareProtocolScheme(rTxt
.substr(nIndex
, nProtocolLen
)) != INetProtocol::NotValid
)
712 // Check the presence of "://" in the word
713 size_t nStrPos
= rTxt
.find( u
"://", nSttWdPos
+ 1 );
714 if ( nStrPos
== std::u16string_view::npos
&& nEndPos
> 0 )
716 // Check the previous char
717 sal_Unicode cPrevChar
= rTxt
[ nEndPos
- 1 ];
718 if ( ( chars
.indexOf( cPrevChar
) == -1 ) && cPrevChar
!= '\t' )
720 // Remove any previous normal space
721 sal_Int32 nPos
= nEndPos
- 1;
722 while ( cPrevChar
== ' ' || cPrevChar
== cNonBreakingSpace
)
724 if ( nPos
== 0 ) break;
726 cPrevChar
= rTxt
[ nPos
];
730 if ( nEndPos
- nPos
> 0 )
731 rDoc
.Delete( nPos
, nEndPos
);
733 // Add the non-breaking space at the end pos
735 rDoc
.Insert( nPos
, OUString(cNonBreakingSpace
) );
736 io_bNbspRunNext
= true;
739 else if ( chars
.indexOf( cPrevChar
) != -1 )
740 io_bNbspRunNext
= true;
743 else if ( cChar
== '/' && nEndPos
> 1 && static_cast<sal_Int32
>(rTxt
.size()) > (nEndPos
- 1) )
745 // Remove the hardspace right before to avoid formatting URLs
746 sal_Unicode cPrevChar
= rTxt
[ nEndPos
- 1 ];
747 sal_Unicode cMaybeSpaceChar
= rTxt
[ nEndPos
- 2 ];
748 if ( cPrevChar
== ':' && cMaybeSpaceChar
== cNonBreakingSpace
)
750 rDoc
.Delete( nEndPos
- 2, nEndPos
- 1 );
760 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
761 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
764 OUString
sURL( URIHelper::FindFirstURLInText( rTxt
, nSttPos
, nEndPos
,
765 GetCharClass( eLang
) ));
766 bool bRet
= !sURL
.isEmpty();
767 if( bRet
) // so, set attribute:
768 rDoc
.SetINetAttr( nSttPos
, nEndPos
, sURL
);
772 // DOI citation recognition
773 bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
774 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
777 OUString
sURL( URIHelper::FindFirstDOIInText( rTxt
, nSttPos
, nEndPos
, GetCharClass( eLang
) ));
778 bool bRet
= !sURL
.isEmpty();
779 if( bRet
) // so, set attribute:
780 rDoc
.SetINetAttr( nSttPos
, nEndPos
, sURL
);
784 // Automatic *bold*, /italic/, -strikeout- and _underline_
785 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
789 // at the beginning: _, *, / or ~ after Space with the following !Space
790 // at the end: _, *, / or ~ before Space (word delimiter?)
792 sal_Unicode cInsChar
= rTxt
[ nEndPos
]; // underline, bold, italic or strikeout
793 if( ++nEndPos
!= rTxt
.getLength() &&
794 !IsWordDelim( rTxt
[ nEndPos
] ) )
799 bool bAlphaNum
= false;
800 sal_Int32 nPos
= nEndPos
;
801 sal_Int32 nFndPos
= -1;
802 CharClass
& rCC
= GetCharClass( LANGUAGE_SYSTEM
);
806 switch( sal_Unicode c
= rTxt
[ --nPos
] )
814 if( bAlphaNum
&& nPos
+1 < nEndPos
&& ( !nPos
||
815 IsWordDelim( rTxt
[ nPos
-1 ])) &&
816 !IsWordDelim( rTxt
[ nPos
+1 ]))
819 // Condition is not satisfied, so cancel
826 bAlphaNum
= rCC
.isLetterNumeric( rTxt
, nPos
);
832 // first delete the Character at the end - this allows insertion
833 // of an empty hint in SetAttr which would be removed by Delete
834 // (fdo#62536, AUTOFMT in Writer)
835 rDoc
.Delete( nEndPos
, nEndPos
+ 1 );
837 // Span the Attribute over the area
839 if( '*' == cInsChar
) // Bold
841 SvxWeightItem
aSvxWeightItem( WEIGHT_BOLD
, SID_ATTR_CHAR_WEIGHT
);
842 rDoc
.SetAttr( nFndPos
+ 1, nEndPos
,
843 SID_ATTR_CHAR_WEIGHT
,
846 else if( '/' == cInsChar
) // Italic
848 SvxPostureItem
aSvxPostureItem( ITALIC_NORMAL
, SID_ATTR_CHAR_POSTURE
);
849 rDoc
.SetAttr( nFndPos
+ 1, nEndPos
,
850 SID_ATTR_CHAR_POSTURE
,
853 else if( '-' == cInsChar
) // Strikeout
855 SvxCrossedOutItem
aSvxCrossedOutItem( STRIKEOUT_SINGLE
, SID_ATTR_CHAR_STRIKEOUT
);
856 rDoc
.SetAttr( nFndPos
+ 1, nEndPos
,
857 SID_ATTR_CHAR_STRIKEOUT
,
862 SvxUnderlineItem
aSvxUnderlineItem( LINESTYLE_SINGLE
, SID_ATTR_CHAR_UNDERLINE
);
863 rDoc
.SetAttr( nFndPos
+ 1, nEndPos
,
864 SID_ATTR_CHAR_UNDERLINE
,
867 rDoc
.Delete( nFndPos
, nFndPos
+ 1 );
870 return -1 != nFndPos
;
873 // Capitalize first letter of every sentence
874 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc
& rDoc
,
875 const OUString
& rTxt
, bool bNormalPos
,
876 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
880 if( rTxt
.isEmpty() || nEndPos
<= nSttPos
)
883 CharClass
& rCC
= GetCharClass( eLang
);
884 OUString
aText( rTxt
);
885 const sal_Unicode
*pStart
= aText
.getStr(),
886 *pStr
= pStart
+ nEndPos
,
890 bool bAtStart
= false;
893 if (rCC
.isLetter(aText
, pStr
- pStart
))
899 else if (pWordStt
&& !rCC
.isDigit(aText
, pStr
- pStart
))
901 if( (lcl_IsInArr( u
"-'", *pStr
) || *pStr
== cApostrophe
) && // These characters are allowed in words
902 pWordStt
- 1 == pStr
&&
903 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
904 (pStart
+ 1) <= pStr
&&
905 rCC
.isLetter(aText
, pStr
-1 - pStart
))
910 bAtStart
= (pStart
== pStr
);
911 } while( !bAtStart
);
914 return; // no character to be replaced
917 if (rCC
.isDigit(aText
, pStr
- pStart
))
918 return; // already ok
920 if (IsUpperLetter(rCC
.getCharacterType(aText
, pWordStt
- pStart
)))
921 return; // already ok
923 //See if the text is the start of a protocol string, e.g. have text of
924 //"http" see if it is the start of "http:" and if so leave it alone
925 sal_Int32 nIndex
= pWordStt
- pStart
;
926 sal_Int32 nProtocolLen
= pDelim
- pWordStt
+ 1;
927 if (nIndex
+ nProtocolLen
<= rTxt
.getLength())
929 if (INetURLObject::CompareProtocolScheme(rTxt
.subView(nIndex
, nProtocolLen
)) != INetProtocol::NotValid
)
930 return; // already ok
933 if (0x1 == *pWordStt
|| 0x2 == *pWordStt
)
934 return; // already ok
936 // Only capitalize, if string before specified characters is long enough
937 if( *pDelim
&& 2 >= pDelim
- pWordStt
&&
938 lcl_IsInArr( u
".-)>", *pDelim
) )
941 // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
942 if ( 1 == pDelim
- pWordStt
&& 0x03B1 <= *pWordStt
&& *pWordStt
<= 0x03C9 && eLang
!= LANGUAGE_GREEK
)
945 if( !bAtStart
) // Still no beginning of a paragraph?
947 if (NonFieldWordDelim(*pStr
))
951 bAtStart
= (pStart
== pStr
--);
952 if (bAtStart
|| !NonFieldWordDelim(*pStr
))
956 // Asian full stop, full width full stop, full width exclamation mark
957 // and full width question marks are treated as word delimiters
958 else if ( 0x3002 != *pStr
&& 0xFF0E != *pStr
&& 0xFF01 != *pStr
&&
960 return; // no valid separator -> no replacement
963 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
964 if (FindInWordStartExceptList(eLang
, OUString(pWordStt
, pDelim
- pWordStt
)))
967 if( bAtStart
) // at the beginning of a paragraph?
969 // Check out the previous paragraph, if it exists.
970 // If so, then check to paragraph separator at the end.
971 OUString
const*const pPrevPara
= rDoc
.GetPrevPara(bNormalPos
);
974 // valid separator -> replace
975 OUString
sChar( *pWordStt
);
976 sChar
= rCC
.titlecase(sChar
); //see fdo#56740
977 if (sChar
!= OUStringChar(*pWordStt
))
978 rDoc
.ReplaceRange( pWordStt
- pStart
, 1, sChar
);
984 pStart
= aText
.getStr();
985 pStr
= pStart
+ aText
.getLength();
987 do { // overwrite all blanks
989 if (!NonFieldWordDelim(*pStr
))
991 bAtStart
= (pStart
== pStr
);
992 } while( !bAtStart
);
995 return; // no valid separator -> no replacement
998 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
999 // all three can happen, but not more than once!
1000 const sal_Unicode
* pExceptStt
= nullptr;
1001 bool bContinue
= true;
1002 Flags nFlag
= Flags::NONE
;
1007 // Western and Asian full stop
1012 if (pStr
>= pStart
+ 2 && *(pStr
- 2) == '.')
1014 //e.g. text "f.o.o. word": Now currently considering
1015 //capitalizing word but second last character of
1016 //previous word is a . So probably last word is an
1017 //anagram that ends in . and not truly the end of a
1018 //previous sentence, so don't autocapitalize this word
1021 if (nFlag
& Flags::FullStop
)
1022 return; // no valid separator -> no replacement
1023 nFlag
|= Flags::FullStop
;
1030 if (nFlag
& Flags::ExclamationMark
)
1031 return; // no valid separator -> no replacement
1032 nFlag
|= Flags::ExclamationMark
;
1038 if (nFlag
& Flags::QuestionMark
)
1039 return; // no valid separator -> no replacement
1040 nFlag
|= Flags::QuestionMark
;
1044 if (nFlag
== Flags::NONE
)
1045 return; // no valid separator -> no replacement
1051 if (bContinue
&& pStr
-- == pStart
)
1053 return; // no valid separator -> no replacement
1055 } while (bContinue
);
1056 if (Flags::FullStop
!= nFlag
)
1057 pExceptStt
= nullptr;
1059 // Only capitalize, if string is long enough
1060 if( 2 > ( pStr
- pStart
) )
1063 if (!rCC
.isLetterNumeric(aText
, pStr
-- - pStart
))
1065 bool bValid
= false, bAlphaFnd
= false;
1066 const sal_Unicode
* pTmpStr
= pStr
;
1069 if( rCC
.isDigit( aText
, pTmpStr
- pStart
) )
1074 else if( rCC
.isLetter( aText
, pTmpStr
- pStart
) )
1084 else if (bAlphaFnd
|| NonFieldWordDelim(*pTmpStr
))
1087 if( pTmpStr
== pStart
)
1094 return; // no valid separator -> no replacement
1097 bool bNumericOnly
= '0' <= *(pStr
+1) && *(pStr
+1) <= '9';
1099 // Search for the beginning of the word
1100 while (!NonFieldWordDelim(*pStr
))
1102 if( bNumericOnly
&& rCC
.isLetter( aText
, pStr
- pStart
) )
1103 bNumericOnly
= false;
1105 if( pStart
== pStr
)
1111 if( bNumericOnly
) // consists of only numbers, then not
1114 if (NonFieldWordDelim(*pStr
))
1119 // check on the basis of the exception list
1122 sWord
= OUString(pStr
, pExceptStt
- pStr
+ 1);
1123 if( FindInCplSttExceptList(eLang
, sWord
) )
1126 // Delete all non alphanumeric. Test the characters at the
1127 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1128 OUString
sTmp( sWord
);
1129 while( !sTmp
.isEmpty() &&
1130 !rCC
.isLetterNumeric( sTmp
, 0 ) )
1131 sTmp
= sTmp
.copy(1);
1133 // Remove all non alphanumeric characters towards the end up until
1135 sal_Int32 nLen
= sTmp
.getLength();
1136 while( nLen
&& !rCC
.isLetterNumeric( sTmp
, nLen
-1 ) )
1138 if( nLen
+ 1 < sTmp
.getLength() )
1139 sTmp
= sTmp
.copy( 0, nLen
+ 1 );
1141 if( !sTmp
.isEmpty() && sTmp
.getLength() != sWord
.getLength() &&
1142 FindInCplSttExceptList(eLang
, sTmp
))
1145 if(FindInCplSttExceptList(eLang
, sWord
, true))
1150 sal_Unicode cSave
= *pWordStt
;
1151 nSttPos
= pWordStt
- rTxt
.getStr();
1152 OUString sChar
= rCC
.titlecase(OUString(cSave
)); //see fdo#56740
1153 bool bRet
= sChar
[0] != cSave
&& rDoc
.ReplaceRange( nSttPos
, 1, sChar
);
1155 // Perhaps someone wants to have the word
1156 if( bRet
&& ACFlags::SaveWordCplSttLst
& nFlags
)
1157 rDoc
.SaveCpltSttWord( ACFlags::CapitalStartSentence
, nSttPos
, sWord
, cSave
);
1160 // Correct accidental use of cAPS LOCK key
1161 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
1162 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
1163 LanguageType eLang
)
1165 if (nEndPos
- nSttPos
< 2)
1166 // string must be at least 2-character long.
1169 CharClass
& rCC
= GetCharClass( eLang
);
1171 // Check the first 2 letters.
1172 if ( !IsLowerLetter(rCC
.getCharacterType(rTxt
, nSttPos
)) )
1175 if ( !IsUpperLetter(rCC
.getCharacterType(rTxt
, nSttPos
+1)) )
1178 OUStringBuffer aConverted
;
1179 aConverted
.append( rCC
.uppercase(OUString(rTxt
[nSttPos
])) );
1180 aConverted
.append( rCC
.lowercase(OUString(rTxt
[nSttPos
+1])) );
1182 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1183 if (FindInWordStartExceptList(eLang
, rTxt
.copy(nSttPos
, nEndPos
- nSttPos
)))
1186 for( sal_Int32 i
= nSttPos
+2; i
< nEndPos
; ++i
)
1188 if ( IsLowerLetter(rCC
.getCharacterType(rTxt
, i
)) )
1189 // A lowercase letter disqualifies the whole text.
1192 if ( IsUpperLetter(rCC
.getCharacterType(rTxt
, i
)) )
1193 // Another uppercase letter. Convert it.
1194 aConverted
.append( rCC
.lowercase(OUString(rTxt
[i
])) );
1196 // This is not an alphabetic letter. Leave it as-is.
1197 aConverted
.append( rTxt
[i
] );
1200 // Replace the word.
1201 rDoc
.Delete(nSttPos
, nEndPos
);
1202 rDoc
.Insert(nSttPos
, aConverted
.makeStringAndClear());
1208 sal_Unicode
SvxAutoCorrect::GetQuote( sal_Unicode cInsChar
, bool bSttQuote
,
1209 LanguageType eLang
) const
1211 sal_Unicode cRet
= bSttQuote
? ( '\"' == cInsChar
1212 ? GetStartDoubleQuote()
1213 : GetStartSingleQuote() )
1214 : ( '\"' == cInsChar
1215 ? GetEndDoubleQuote()
1216 : GetEndSingleQuote() );
1219 // then through the Language find the right character
1220 if( LANGUAGE_NONE
== eLang
)
1224 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1225 OUString
sRet( bSttQuote
1226 ? ( '\"' == cInsChar
1227 ? rLcl
.getDoubleQuotationMarkStart()
1228 : rLcl
.getQuotationMarkStart() )
1229 : ( '\"' == cInsChar
1230 ? rLcl
.getDoubleQuotationMarkEnd()
1231 : rLcl
.getQuotationMarkEnd() ));
1232 cRet
= !sRet
.isEmpty() ? sRet
[0] : cInsChar
;
1238 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc
& rDoc
, sal_Int32 nInsPos
,
1239 sal_Unicode cInsChar
, bool bSttQuote
,
1240 bool bIns
, LanguageType eLang
, ACQuotes eType
) const
1244 if ( eType
== ACQuotes::DoubleAngleQuote
)
1246 bool bSwiss
= eLang
== LANGUAGE_FRENCH_SWISS
;
1247 // pressing " inside a quotation -> use second level angle quotes
1248 bool bLeftQuote
= '\"' == cInsChar
&&
1249 // start position and Romanian OR
1250 // not start position and Hungarian
1251 bSttQuote
== (eLang
!= LANGUAGE_HUNGARIAN
);
1252 cRet
= ( '<' == cInsChar
|| bLeftQuote
)
1253 ? ( bSwiss
? cLeftSingleAngleQuote
: cLeftDoubleAngleQuote
)
1254 : ( bSwiss
? cRightSingleAngleQuote
: cRightDoubleAngleQuote
);
1256 else if ( eType
== ACQuotes::UseApostrophe
)
1259 cRet
= GetQuote( cInsChar
, bSttQuote
, eLang
);
1261 OUString
sChg( cInsChar
);
1263 rDoc
.Insert( nInsPos
, sChg
);
1265 rDoc
.Replace( nInsPos
, sChg
);
1267 sChg
= OUString(cRet
);
1269 if( eType
== ACQuotes::NonBreakingSpace
)
1271 if( rDoc
.Insert( bSttQuote
? nInsPos
+1 : nInsPos
, OUStringChar(cNonBreakingSpace
) ))
1277 else if( eType
== ACQuotes::DoubleAngleQuote
&& cInsChar
!= '\"' )
1279 rDoc
.Delete( nInsPos
-1, nInsPos
);
1283 rDoc
.Replace( nInsPos
, sChg
);
1285 // i' -> I' in English (last step for the Undo)
1286 if( eType
== ACQuotes::CapitalizeIAm
)
1287 rDoc
.Replace( nInsPos
-1, "I" );
1290 OUString
SvxAutoCorrect::GetQuote( SvxAutoCorrDoc
const & rDoc
, sal_Int32 nInsPos
,
1291 sal_Unicode cInsChar
, bool bSttQuote
)
1293 const LanguageType eLang
= GetDocLanguage( rDoc
, nInsPos
);
1294 sal_Unicode cRet
= GetQuote( cInsChar
, bSttQuote
, eLang
);
1296 OUString
sRet(cRet
);
1298 if( '\"' == cInsChar
)
1300 if (primary(eLang
) == primary(LANGUAGE_FRENCH
) && eLang
!= LANGUAGE_FRENCH_SWISS
)
1311 // search preceding opening quote in the paragraph before the insert position
1312 static bool lcl_HasPrecedingChar( std::u16string_view rTxt
, sal_Int32 nPos
,
1313 const sal_Unicode sPrecedingChar
, const sal_Unicode sStopChar
, const sal_Unicode
* aStopChars
)
1315 sal_Unicode cTmpChar
;
1318 cTmpChar
= rTxt
[ --nPos
];
1319 if ( cTmpChar
== sPrecedingChar
)
1322 if ( cTmpChar
== sStopChar
)
1325 for ( const sal_Unicode
* pCh
= aStopChars
; *pCh
; ++pCh
)
1326 if ( cTmpChar
== *pCh
)
1329 } while ( nPos
> 0 );
1334 // WARNING: rText may become invalid, see comment below
1335 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
1336 sal_Int32 nInsPos
, sal_Unicode cChar
,
1337 bool bInsert
, bool& io_bNbspRunNext
, vcl::Window
const * pFrameWin
)
1339 bool bIsNextRun
= io_bNbspRunNext
;
1340 io_bNbspRunNext
= false; // if it was set, then it has to be turned off
1342 do{ // only for middle check loop !!
1345 // Prevent double space
1346 if( nInsPos
&& ' ' == cChar
&&
1347 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace
) &&
1348 ' ' == rTxt
[ nInsPos
- 1 ])
1353 bool bSingle
= '\'' == cChar
;
1354 bool bIsReplaceQuote
=
1355 (IsAutoCorrFlag( ACFlags::ChgQuotes
) && ('\"' == cChar
)) ||
1356 (IsAutoCorrFlag( ACFlags::ChgSglQuotes
) && bSingle
);
1357 if( bIsReplaceQuote
)
1359 bool bSttQuote
= !nInsPos
;
1360 ACQuotes eType
= ACQuotes::NONE
;
1361 const LanguageType eLang
= GetDocLanguage( rDoc
, nInsPos
);
1364 sal_Unicode cPrev
= rTxt
[ nInsPos
-1 ];
1365 bSttQuote
= NonFieldWordDelim(cPrev
) ||
1366 lcl_IsInArr( u
"([{", cPrev
) ||
1367 ( cEmDash
== cPrev
) ||
1368 ( cEnDash
== cPrev
);
1369 // tdf#38394 use opening quotation mark << in French l'<<word>>
1370 if ( !bSingle
&& !bSttQuote
&& cPrev
== cApostrophe
&&
1371 primary(eLang
) == primary(LANGUAGE_FRENCH
) &&
1372 ( ( ( nInsPos
== 2 || ( nInsPos
> 2 && IsWordDelim( rTxt
[ nInsPos
-3 ] ) ) ) &&
1373 // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1374 OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt
[ nInsPos
-2 ] ) > -1 ) ||
1375 ( ( nInsPos
== 3 || (nInsPos
> 3 && IsWordDelim( rTxt
[ nInsPos
-4 ] ) ) ) &&
1376 // abbreviated form of que
1377 ( rTxt
[ nInsPos
-2 ] == 'u' || rTxt
[ nInsPos
-2 ] == 'U' ) &&
1378 ( rTxt
[ nInsPos
-3 ] == 'q' || rTxt
[ nInsPos
-3 ] == 'Q' ) ) ) )
1382 // tdf#108423 for capitalization of English i'm
1383 else if ( bSingle
&& ( cPrev
== 'i' ) &&
1384 primary(eLang
) == primary(LANGUAGE_ENGLISH
) &&
1385 ( nInsPos
== 1 || IsWordDelim( rTxt
[ nInsPos
-2 ] ) ) )
1387 eType
= ACQuotes::CapitalizeIAm
;
1389 // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1390 else if ( !bSingle
&& nInsPos
&&
1391 ( ( eLang
== LANGUAGE_HUNGARIAN
&&
1392 lcl_HasPrecedingChar( rTxt
, nInsPos
,
1393 bSttQuote
? aStopDoubleAngleQuoteStart
[0] : aStopDoubleAngleQuoteEnd
[0],
1394 bSttQuote
? aStopDoubleAngleQuoteStart
[1] : aStopDoubleAngleQuoteEnd
[1],
1395 bSttQuote
? aStopDoubleAngleQuoteStart
+ 1 : aStopDoubleAngleQuoteEnd
+ 2 ) ) ||
1398 LANGUAGE_ROMANIAN_MOLDOVA
) &&
1399 lcl_HasPrecedingChar( rTxt
, nInsPos
,
1400 bSttQuote
? aStopDoubleAngleQuoteStart
[0] : aStopDoubleAngleQuoteEndRo
[0],
1401 bSttQuote
? aStopDoubleAngleQuoteStart
[1] : aStopDoubleAngleQuoteEndRo
[1],
1402 bSttQuote
? aStopDoubleAngleQuoteStart
+ 1 : aStopDoubleAngleQuoteEndRo
+ 2 ) ) ) )
1404 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1405 // only if the opening double quotation mark is the default one
1406 if ( rLcl
.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart
[0]) )
1407 eType
= ACQuotes::DoubleAngleQuote
;
1409 else if ( bSingle
&& nInsPos
&& !bSttQuote
&&
1410 // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1411 // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1412 // tdf#123786 the same for Russian and Ukrainian
1416 LANGUAGE_GERMAN_SWISS
,
1417 LANGUAGE_GERMAN_AUSTRIAN
,
1418 LANGUAGE_GERMAN_LUXEMBOURG
,
1419 LANGUAGE_GERMAN_LIECHTENSTEIN
,
1422 LANGUAGE_SLOVENIAN
) ) )
1424 sal_Unicode sStartChar
= GetStartSingleQuote();
1425 sal_Unicode sEndChar
= GetEndSingleQuote();
1426 if ( !sStartChar
|| !sEndChar
) {
1427 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1428 if ( !sStartChar
) sStartChar
= rLcl
.getQuotationMarkStart()[0];
1429 if ( !sEndChar
) sEndChar
= rLcl
.getQuotationMarkStart()[0];
1431 if ( !lcl_HasPrecedingChar( rTxt
, nInsPos
, sStartChar
, sEndChar
, aStopSingleQuoteEnd
+ 1 ) )
1433 CharClass
& rCC
= GetCharClass( eLang
);
1434 if ( rCC
.isLetter(rTxt
, nInsPos
-1) )
1436 eType
= ACQuotes::UseApostrophe
;
1440 else if ( bSingle
&& nInsPos
&& !bSttQuote
&&
1443 LANGUAGE_UKRAINIAN
) &&
1444 !lcl_HasPrecedingChar( rTxt
, nInsPos
, aStopSingleQuoteEndRuUa
[0], aStopSingleQuoteEndRuUa
[1], aStopSingleQuoteEndRuUa
+ 2 ) ) )
1446 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1447 CharClass
& rCC
= GetCharClass( eLang
);
1448 if ( rLcl
.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa
[0]) &&
1449 // use apostrophe only after letters, not after digits or punctuation
1450 rCC
.isLetter(rTxt
, nInsPos
-1) )
1452 eType
= ACQuotes::UseApostrophe
;
1457 if ( eType
== ACQuotes::NONE
&& !bSingle
&&
1458 ( primary(eLang
) == primary(LANGUAGE_FRENCH
) && eLang
!= LANGUAGE_FRENCH_SWISS
) )
1459 eType
= ACQuotes::NonBreakingSpace
;
1461 InsertQuote( rDoc
, nInsPos
, cChar
, bSttQuote
, bInsert
, eLang
, eType
);
1464 // tdf#133524 change "<<" and ">>" to double angle quotation marks
1465 else if ( IsAutoCorrFlag( ACFlags::ChgQuotes
) &&
1466 IsAutoCorrFlag( ACFlags::ChgAngleQuotes
) &&
1467 ('<' == cChar
|| '>' == cChar
) &&
1468 nInsPos
> 0 && cChar
== rTxt
[ nInsPos
-1 ] )
1470 const LanguageType eLang
= GetDocLanguage( rDoc
, nInsPos
);
1472 LANGUAGE_CATALAN
, // primary level
1473 LANGUAGE_CATALAN_VALENCIAN
, // primary level
1474 LANGUAGE_FINNISH
, // alternative primary level
1475 LANGUAGE_FRENCH_SWISS
, // second level
1476 LANGUAGE_GALICIAN
, // primary level
1477 LANGUAGE_HUNGARIAN
, // second level
1478 LANGUAGE_POLISH
, // second level
1479 LANGUAGE_PORTUGUESE
, // primary level
1480 LANGUAGE_PORTUGUESE_BRAZILIAN
, // primary level
1481 LANGUAGE_ROMANIAN
, // second level
1482 LANGUAGE_ROMANIAN_MOLDOVA
, // second level
1483 LANGUAGE_SWEDISH
, // alternative primary level
1484 LANGUAGE_SWEDISH_FINLAND
, // alternative primary level
1485 LANGUAGE_UKRAINIAN
, // primary level
1486 LANGUAGE_USER_ARAGONESE
, // primary level
1487 LANGUAGE_USER_ASTURIAN
) || // primary level
1488 primary(eLang
) == primary(LANGUAGE_GERMAN
) || // alternative primary level
1489 primary(eLang
) == primary(LANGUAGE_SPANISH
) ) // primary level
1491 InsertQuote( rDoc
, nInsPos
, cChar
, false, bInsert
, eLang
, ACQuotes::DoubleAngleQuote
);
1497 rDoc
.Insert( nInsPos
, OUString(cChar
) );
1499 rDoc
.Replace( nInsPos
, OUString(cChar
) );
1501 // Hardspaces autocorrection
1502 if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace
) )
1504 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1505 // and its length may change (even become shorter) if FnAddNonBrkSpace succeeds!
1506 sal_Int32 nUpdatedPos
= -1;
1507 if (NeedsHardspaceAutocorr(cChar
))
1508 nUpdatedPos
= FnAddNonBrkSpace( rDoc
, rTxt
, nInsPos
, GetDocLanguage( rDoc
, nInsPos
), io_bNbspRunNext
);
1509 if (nUpdatedPos
>= 0)
1511 nInsPos
= nUpdatedPos
;
1513 else if ( bIsNextRun
&& !IsAutoCorrectChar( cChar
) )
1515 // Remove the NBSP if it wasn't an autocorrection
1516 if ( nInsPos
!= 0 && NeedsHardspaceAutocorr( rTxt
[ nInsPos
- 1 ] ) &&
1517 cChar
!= ' ' && cChar
!= '\t' && cChar
!= cNonBreakingSpace
)
1519 // Look for the last HARD_SPACE
1520 sal_Int32 nPos
= nInsPos
- 1;
1521 bool bContinue
= true;
1524 const sal_Unicode cTmpChar
= rTxt
[ nPos
];
1525 if ( cTmpChar
== cNonBreakingSpace
)
1527 rDoc
.Delete( nPos
, nPos
+ 1 );
1530 else if ( !NeedsHardspaceAutocorr( cTmpChar
) || nPos
== 0 )
1542 sal_Int32 nPos
= nInsPos
- 1;
1544 if( IsWordDelim( rTxt
[ nPos
]))
1547 // Set bold or underline automatically?
1548 if (('*' == cChar
|| '_' == cChar
|| '/' == cChar
|| '-' == cChar
) && (nPos
+1 < rTxt
.getLength()))
1550 if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl
) )
1552 FnChgWeightUnderl( rDoc
, rTxt
, nPos
+1 );
1557 while( nPos
&& !IsWordDelim( rTxt
[ --nPos
]))
1560 // Found a Paragraph-start or a Blank, search for the word shortcut in
1562 sal_Int32 nCapLttrPos
= nPos
+1; // on the 1st Character
1563 if( !nPos
&& !IsWordDelim( rTxt
[ 0 ]))
1564 --nCapLttrPos
; // begin of paragraph and no blank
1566 const LanguageType eLang
= GetDocLanguage( rDoc
, nCapLttrPos
);
1567 CharClass
& rCC
= GetCharClass( eLang
);
1569 // no symbol characters
1570 if( lcl_IsSymbolChar( rCC
, rTxt
, nCapLttrPos
, nInsPos
))
1573 if( IsAutoCorrFlag( ACFlags::Autocorrect
) &&
1574 // tdf#134940 fix regression of arrow "-->" resulted by premature
1575 // replacement of "--" since '>' was added to IsAutoCorrectChar()
1578 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1579 // and becomes INVALID if ChgAutoCorrWord returns true!
1580 // => use aPara/pPara to create a valid copy of the string!
1582 OUString
* pPara
= IsAutoCorrFlag(ACFlags::CapitalStartSentence
) ? &aPara
: nullptr;
1584 bool bChgWord
= rDoc
.ChgAutoCorrWord( nCapLttrPos
, nInsPos
,
1588 sal_Int32 nCapLttrPos1
= nCapLttrPos
, nInsPos1
= nInsPos
;
1589 while( nCapLttrPos1
< nInsPos
&&
1590 lcl_IsInArr( sImplSttSkipChars
, rTxt
[ nCapLttrPos1
] )
1593 while( nCapLttrPos1
< nInsPos1
&& nInsPos1
&&
1594 lcl_IsInArr( sImplEndSkipChars
, rTxt
[ nInsPos1
-1 ] )
1598 if( (nCapLttrPos1
!= nCapLttrPos
|| nInsPos1
!= nInsPos
) &&
1599 nCapLttrPos1
< nInsPos1
&&
1600 rDoc
.ChgAutoCorrWord( nCapLttrPos1
, nInsPos1
, *this, pPara
))
1603 nCapLttrPos
= nCapLttrPos1
;
1609 if( !aPara
.isEmpty() )
1611 sal_Int32 nEnd
= nCapLttrPos
;
1612 while( nEnd
< aPara
.getLength() &&
1613 !IsWordDelim( aPara
[ nEnd
]))
1616 // Capital letter at beginning of paragraph?
1617 if( IsAutoCorrFlag( ACFlags::CapitalStartSentence
) )
1619 FnCapitalStartSentence( rDoc
, aPara
, false,
1620 nCapLttrPos
, nEnd
, eLang
);
1623 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash
) )
1625 FnChgToEnEmDash( rDoc
, aPara
, nCapLttrPos
, nEnd
, eLang
);
1632 if( IsAutoCorrFlag( ACFlags::TransliterateRTL
) && GetDocLanguage( rDoc
, nInsPos
) == LANGUAGE_HUNGARIAN
)
1634 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1635 // and becomes INVALID if TransliterateRTLWord returns true!
1636 if ( rDoc
.TransliterateRTLWord( nCapLttrPos
, nInsPos
) )
1640 if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber
) &&
1641 (nInsPos
>= 2 ) && // fdo#69762 avoid autocorrect for 2e-3
1642 ( '-' != cChar
|| 'E' != rtl::toAsciiUpperCase(rTxt
[nInsPos
-1]) || '0' > rTxt
[nInsPos
-2] || '9' < rTxt
[nInsPos
-2] ) &&
1643 FnChgOrdinalNumber( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) ) ||
1644 ( IsAutoCorrFlag( ACFlags::SetINetAttr
) &&
1645 ( ' ' == cChar
|| '\t' == cChar
|| 0x0a == cChar
|| !cChar
) &&
1646 FnSetINetAttr( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) ) ||
1647 ( IsAutoCorrFlag( ACFlags::SetDOIAttr
) &&
1648 ( ' ' == cChar
|| '\t' == cChar
|| 0x0a == cChar
|| !cChar
) &&
1649 FnSetDOIAttr( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) ) )
1653 bool bLockKeyOn
= pFrameWin
&& (pFrameWin
->GetIndicatorState() & KeyIndicatorState::CAPSLOCK
);
1654 bool bUnsupported
= lcl_IsUnsupportedUnicodeChar( rCC
, rTxt
, nCapLttrPos
, nInsPos
);
1656 if ( bLockKeyOn
&& IsAutoCorrFlag( ACFlags::CorrectCapsLock
) &&
1657 FnCorrectCapsLock( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) )
1659 // Correct accidental use of cAPS LOCK key (do this only when
1660 // the caps or shift lock key is pressed). Turn off the caps
1662 pFrameWin
->SimulateKeyPress( KEY_CAPSLOCK
);
1665 // Capital letter at beginning of paragraph ?
1666 if( !bUnsupported
&&
1667 IsAutoCorrFlag( ACFlags::CapitalStartSentence
) )
1669 FnCapitalStartSentence( rDoc
, rTxt
, true, nCapLttrPos
, nInsPos
, eLang
);
1672 // Two capital letters at beginning of word ??
1673 if( !bUnsupported
&&
1674 IsAutoCorrFlag( ACFlags::CapitalStartWord
) )
1676 FnCapitalStartWord( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
);
1679 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash
) )
1681 FnChgToEnEmDash( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
);
1688 SvxAutoCorrectLanguageLists
& SvxAutoCorrect::GetLanguageList_(
1689 LanguageType eLang
)
1691 LanguageTag
aLanguageTag( eLang
);
1692 if (m_aLangTable
.find(aLanguageTag
) == m_aLangTable
.end())
1693 (void)CreateLanguageFile(aLanguageTag
);
1694 const auto iter
= m_aLangTable
.find(aLanguageTag
);
1695 assert(iter
!= m_aLangTable
.end());
1696 return iter
->second
;
1699 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang
)
1701 auto const iter
= m_aLangTable
.find(LanguageTag(eLang
));
1702 if (iter
!= m_aLangTable
.end())
1703 iter
->second
.SaveCplSttExceptList();
1706 SAL_WARN("editeng", "Save an empty list? ");
1710 void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang
)
1712 auto const iter
= m_aLangTable
.find(LanguageTag(eLang
));
1713 if (iter
!= m_aLangTable
.end())
1714 iter
->second
.SaveWordStartExceptList();
1717 SAL_WARN("editeng", "Save an empty list? ");
1721 // Adds a single word. The list will immediately be written to the file!
1722 bool SvxAutoCorrect::AddCplSttException( const OUString
& rNew
,
1723 LanguageType eLang
)
1725 SvxAutoCorrectLanguageLists
* pLists
= nullptr;
1726 // either the right language is present or it will be this in the general list
1727 auto iter
= m_aLangTable
.find(LanguageTag(eLang
));
1728 if (iter
!= m_aLangTable
.end())
1729 pLists
= &iter
->second
;
1732 LanguageTag
aLangTagUndetermined( LANGUAGE_UNDETERMINED
);
1733 iter
= m_aLangTable
.find(aLangTagUndetermined
);
1734 if (iter
!= m_aLangTable
.end())
1735 pLists
= &iter
->second
;
1736 else if(CreateLanguageFile(aLangTagUndetermined
))
1738 iter
= m_aLangTable
.find(aLangTagUndetermined
);
1739 assert(iter
!= m_aLangTable
.end());
1740 pLists
= &iter
->second
;
1743 OSL_ENSURE(pLists
, "No auto correction data");
1744 return pLists
&& pLists
->AddToCplSttExceptList(rNew
);
1747 // Adds a single word. The list will immediately be written to the file!
1748 bool SvxAutoCorrect::AddWordStartException( const OUString
& rNew
,
1749 LanguageType eLang
)
1751 SvxAutoCorrectLanguageLists
* pLists
= nullptr;
1752 //either the right language is present or it is set in the general list
1753 auto iter
= m_aLangTable
.find(LanguageTag(eLang
));
1754 if (iter
!= m_aLangTable
.end())
1755 pLists
= &iter
->second
;
1758 LanguageTag
aLangTagUndetermined( LANGUAGE_UNDETERMINED
);
1759 iter
= m_aLangTable
.find(aLangTagUndetermined
);
1760 if (iter
!= m_aLangTable
.end())
1761 pLists
= &iter
->second
;
1762 else if(CreateLanguageFile(aLangTagUndetermined
))
1764 iter
= m_aLangTable
.find(aLangTagUndetermined
);
1765 assert(iter
!= m_aLangTable
.end());
1766 pLists
= &iter
->second
;
1769 OSL_ENSURE(pLists
, "No auto correction file!");
1770 return pLists
&& pLists
->AddToWordStartExceptList(rNew
);
1773 OUString
SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc
const& rDoc
, const OUString
& rTxt
,
1780 sal_Int32 nEnd
= nPos
;
1782 // it must be followed by a blank or tab!
1783 if( ( nPos
< rTxt
.getLength() &&
1784 !IsWordDelim( rTxt
[ nPos
])) ||
1785 IsWordDelim( rTxt
[ --nPos
]))
1788 while( nPos
&& !IsWordDelim( rTxt
[ --nPos
]))
1791 // Found a Paragraph-start or a Blank, search for the word shortcut in
1793 sal_Int32 nCapLttrPos
= nPos
+1; // on the 1st Character
1794 if( !nPos
&& !IsWordDelim( rTxt
[ 0 ]))
1795 --nCapLttrPos
; // Beginning of paragraph and no Blank!
1797 while( lcl_IsInArr( sImplSttSkipChars
, rTxt
[ nCapLttrPos
]) )
1798 if( ++nCapLttrPos
>= nEnd
)
1801 if( 3 > nEnd
- nCapLttrPos
)
1804 const LanguageType eLang
= GetDocLanguage( rDoc
, nCapLttrPos
);
1806 CharClass
& rCC
= GetCharClass(eLang
);
1808 if( lcl_IsSymbolChar( rCC
, rTxt
, nCapLttrPos
, nEnd
))
1811 sRet
= rTxt
.copy( nCapLttrPos
, nEnd
- nCapLttrPos
);
1816 std::vector
<OUString
> SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt
,
1817 const sal_Int32 nPos
)
1819 constexpr sal_Int32 nMinLen
= 3;
1820 constexpr sal_Int32 nMaxLen
= 9;
1821 std::vector
<OUString
> aRes
;
1822 if (nPos
>= nMinLen
)
1824 sal_Int32 nBegin
= std::max
<sal_Int32
>(nPos
- nMaxLen
, 0);
1825 // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1826 if (nBegin
> 0 && !IsWordDelim(rTxt
[nBegin
-1]))
1828 while (nBegin
+ nMinLen
<= nPos
&& !IsWordDelim(rTxt
[nBegin
]))
1831 if (nBegin
+ nMinLen
<= nPos
)
1833 OUString
sRes( rTxt
.substr(nBegin
, nPos
- nBegin
) );
1834 aRes
.push_back(sRes
);
1835 bool bLastStartedWithDelim
= IsWordDelim(sRes
[0]);
1836 for (sal_Int32 i
= 1; i
<= sRes
.getLength() - nMinLen
; ++i
)
1838 bool bAdd
= bLastStartedWithDelim
;
1839 bLastStartedWithDelim
= IsWordDelim(sRes
[i
]);
1840 bAdd
= bAdd
|| bLastStartedWithDelim
;
1842 aRes
.push_back(sRes
.copy(i
));
1849 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag
& rLanguageTag
, bool bNewFile
)
1851 OSL_ENSURE(m_aLangTable
.find(rLanguageTag
) == m_aLangTable
.end(), "Language already exists ");
1853 OUString
sUserDirFile( GetAutoCorrFileName( rLanguageTag
, true ));
1854 OUString
sShareDirFile( sUserDirFile
);
1856 SvxAutoCorrectLanguageLists
* pLists
= nullptr;
1858 tools::Time
nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM
), nLastCheckTime( tools::Time::EMPTY
);
1860 auto nFndPos
= aLastFileTable
.find(rLanguageTag
);
1861 if(nFndPos
!= aLastFileTable
.end() &&
1862 (nLastCheckTime
.SetTime(nFndPos
->second
), nLastCheckTime
< nAktTime
) &&
1863 nAktTime
- nLastCheckTime
< nMinTime
)
1865 // no need to test the file, because the last check is not older then
1869 sShareDirFile
= sUserDirFile
;
1870 auto itBool
= m_aLangTable
.emplace(std::piecewise_construct
,
1871 std::forward_as_tuple(rLanguageTag
),
1872 std::forward_as_tuple(*this, sShareDirFile
, sUserDirFile
));
1873 pLists
= &itBool
.first
->second
;
1874 aLastFileTable
.erase(nFndPos
);
1878 ( FStatHelper::IsDocument( sUserDirFile
) ||
1879 FStatHelper::IsDocument( sShareDirFile
=
1880 GetAutoCorrFileName( rLanguageTag
) ) ||
1881 FStatHelper::IsDocument( sShareDirFile
=
1882 GetAutoCorrFileName( rLanguageTag
, false, false, true) )
1884 ( sShareDirFile
= sUserDirFile
, bNewFile
)
1887 auto itBool
= m_aLangTable
.emplace(std::piecewise_construct
,
1888 std::forward_as_tuple(rLanguageTag
),
1889 std::forward_as_tuple(*this, sShareDirFile
, sUserDirFile
));
1890 pLists
= &itBool
.first
->second
;
1891 if (nFndPos
!= aLastFileTable
.end())
1892 aLastFileTable
.erase(nFndPos
);
1894 else if( !bNewFile
)
1896 aLastFileTable
[rLanguageTag
] = nAktTime
.GetTime();
1898 return pLists
!= nullptr;
1901 bool SvxAutoCorrect::PutText( const OUString
& rShort
, const OUString
& rLong
,
1902 LanguageType eLang
)
1904 LanguageTag
aLanguageTag( eLang
);
1905 if (auto const iter
= m_aLangTable
.find(aLanguageTag
); iter
!= m_aLangTable
.end())
1906 return iter
->second
.PutText(rShort
, rLong
);
1907 if (CreateLanguageFile(aLanguageTag
))
1909 auto const iter
= m_aLangTable
.find(aLanguageTag
);
1910 assert (iter
!= m_aLangTable
.end());
1911 return iter
->second
.PutText(rShort
, rLong
);
1916 void SvxAutoCorrect::MakeCombinedChanges( std::vector
<SvxAutocorrWord
>& aNewEntries
,
1917 std::vector
<SvxAutocorrWord
>& aDeleteEntries
,
1918 LanguageType eLang
)
1920 LanguageTag
aLanguageTag( eLang
);
1921 auto iter
= m_aLangTable
.find(aLanguageTag
);
1922 if (iter
!= m_aLangTable
.end())
1924 iter
->second
.MakeCombinedChanges( aNewEntries
, aDeleteEntries
);
1926 else if(CreateLanguageFile( aLanguageTag
))
1928 iter
= m_aLangTable
.find(aLanguageTag
);
1929 assert(iter
!= m_aLangTable
.end());
1930 iter
->second
.MakeCombinedChanges( aNewEntries
, aDeleteEntries
);
1934 // - return the replacement text (only for SWG-Format, all other
1935 // can be taken from the word list!)
1936 bool SvxAutoCorrect::GetLongText( const OUString
&, OUString
& )
1941 void SvxAutoCorrect::refreshBlockList( const uno::Reference
< embed::XStorage
>& )
1945 // Text with attribution (only the SWG - SWG format!)
1946 bool SvxAutoCorrect::PutText( const css::uno::Reference
< css::embed::XStorage
>&,
1947 const OUString
&, const OUString
&, SfxObjectShell
&, OUString
& )
1952 OUString
EncryptBlockName_Imp(std::u16string_view rName
)
1954 OUStringBuffer aName
;
1955 aName
.append('#').append(rName
);
1956 for (size_t nLen
= rName
.size(), nPos
= 1; nPos
< nLen
; ++nPos
)
1958 if (lcl_IsInArr( u
"!/:.\\", aName
[nPos
]))
1959 aName
[nPos
] &= 0x0f;
1961 return aName
.makeStringAndClear();
1964 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1965 static void GeneratePackageName ( std::u16string_view rShort
, OUString
& rPackageName
)
1967 OString
sByte(OUStringToOString(rShort
, RTL_TEXTENCODING_UTF7
));
1968 OUStringBuffer
aBuf(OStringToOUString(sByte
, RTL_TEXTENCODING_ASCII_US
));
1970 for (sal_Int32 nPos
= 0; nPos
< aBuf
.getLength(); ++nPos
)
1979 // tdf#156769 - escape the question mark in the storage name
1988 rPackageName
= aBuf
.makeStringAndClear();
1991 static const SvxAutocorrWord
* lcl_SearchWordsInList(
1992 SvxAutoCorrectLanguageLists
* pList
, std::u16string_view rTxt
,
1993 sal_Int32
& rStt
, sal_Int32 nEndPos
)
1995 const SvxAutocorrWordList
* pAutoCorrWordList
= pList
->GetAutocorrWordList();
1996 return pAutoCorrWordList
->SearchWordsInList( rTxt
, rStt
, nEndPos
);
1999 // the search for the words in the substitution table
2000 const SvxAutocorrWord
* SvxAutoCorrect::SearchWordsInList(
2001 std::u16string_view rTxt
, sal_Int32
& rStt
, sal_Int32 nEndPos
,
2002 SvxAutoCorrDoc
&, LanguageTag
& rLang
)
2004 const SvxAutocorrWord
* pRet
= nullptr;
2005 LanguageTag
aLanguageTag( rLang
);
2006 if( aLanguageTag
.isSystemLocale() )
2007 aLanguageTag
.reset( MsLangId::getConfiguredSystemLanguage());
2009 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
2012 // First search for eLang, then US-English -> English
2013 // and last in LANGUAGE_UNDETERMINED
2014 if (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() || CreateLanguageFile(aLanguageTag
, false))
2016 //the language is available - so bring it on
2017 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2018 assert(iter
!= m_aLangTable
.end());
2019 SvxAutoCorrectLanguageLists
& rList
= iter
->second
;
2020 pRet
= lcl_SearchWordsInList( &rList
, rTxt
, rStt
, nEndPos
);
2023 rLang
= aLanguageTag
;
2030 // If it still could not be found here, then keep on searching
2031 LanguageType eLang
= aLanguageTag
.getLanguageType();
2032 // the primary language for example EN
2033 aLanguageTag
.reset(aLanguageTag
.getLanguage());
2034 LanguageType nTmpKey
= aLanguageTag
.getLanguageType(false);
2035 if (nTmpKey
!= eLang
&& nTmpKey
!= LANGUAGE_UNDETERMINED
&&
2036 (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() ||
2037 CreateLanguageFile(aLanguageTag
, false)))
2039 //the language is available - so bring it on
2040 SvxAutoCorrectLanguageLists
& rList
= m_aLangTable
.find(aLanguageTag
)->second
;
2041 pRet
= lcl_SearchWordsInList( &rList
, rTxt
, rStt
, nEndPos
);
2044 rLang
= aLanguageTag
;
2049 if (m_aLangTable
.find(aLanguageTag
.reset(LANGUAGE_UNDETERMINED
)) != m_aLangTable
.end() ||
2050 CreateLanguageFile(aLanguageTag
, false))
2052 //the language is available - so bring it on
2053 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2054 assert(iter
!= m_aLangTable
.end());
2055 SvxAutoCorrectLanguageLists
& rList
= iter
->second
;
2056 pRet
= lcl_SearchWordsInList( &rList
, rTxt
, rStt
, nEndPos
);
2059 rLang
= aLanguageTag
;
2066 bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang
,
2067 const OUString
& sWord
)
2069 LanguageTag
aLanguageTag( eLang
);
2071 /* TODO-BCP47: again horrible ugliness */
2073 // First search for eLang, then primary language of eLang
2074 // and last in LANGUAGE_UNDETERMINED
2076 if (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() || CreateLanguageFile(aLanguageTag
, false))
2078 //the language is available - so bring it on
2079 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2080 assert(iter
!= m_aLangTable
.end() && "CreateLanguageFile can't fail");
2081 auto& rList
= iter
->second
;
2082 if(rList
.GetWordStartExceptList()->find(sWord
) != rList
.GetWordStartExceptList()->end() )
2086 // If it still could not be found here, then keep on searching
2087 // the primary language for example EN
2088 aLanguageTag
.reset(aLanguageTag
.getLanguage());
2089 LanguageType nTmpKey
= aLanguageTag
.getLanguageType(false);
2090 if (nTmpKey
!= eLang
&& nTmpKey
!= LANGUAGE_UNDETERMINED
&&
2091 (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() ||
2092 CreateLanguageFile(aLanguageTag
, false)))
2094 //the language is available - so bring it on
2095 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2096 assert(iter
!= m_aLangTable
.end() && "CreateLanguageFile can't fail");
2097 auto& rList
= iter
->second
;
2098 if(rList
.GetWordStartExceptList()->find(sWord
) != rList
.GetWordStartExceptList()->end() )
2102 if (m_aLangTable
.find(aLanguageTag
.reset(LANGUAGE_UNDETERMINED
)) != m_aLangTable
.end() ||
2103 CreateLanguageFile(aLanguageTag
, false))
2105 //the language is available - so bring it on
2106 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2107 assert(iter
!= m_aLangTable
.end());
2108 auto& rList
= iter
->second
;
2109 if(rList
.GetWordStartExceptList()->find(sWord
) != rList
.GetWordStartExceptList()->end() )
2115 static bool lcl_FindAbbreviation(const SvStringsISortDtor
* pList
, const OUString
& sWord
)
2117 SvStringsISortDtor::const_iterator it
= pList
->find( "~" );
2118 SvStringsISortDtor::size_type nPos
= it
- pList
->begin();
2119 if( nPos
< pList
->size() )
2121 OUString
sLowerWord(sWord
.toAsciiLowerCase());
2123 for( SvStringsISortDtor::size_type n
= nPos
; n
< pList
->size(); ++n
)
2125 sAbr
= (*pList
)[ n
];
2128 // ~ and ~. are not allowed!
2129 if( 2 < sAbr
.getLength() && sAbr
.getLength() - 1 <= sWord
.getLength() )
2131 OUString
sLowerAbk(sAbr
.toAsciiLowerCase());
2132 for (sal_Int32 i
= sLowerAbk
.getLength(), ii
= sLowerWord
.getLength(); i
;)
2134 if( !--i
) // agrees
2137 if( sLowerAbk
[i
] != sLowerWord
[--ii
])
2143 OSL_ENSURE( !(nPos
&& '~' == (*pList
)[ --nPos
][ 0 ] ),
2144 "Wrongly sorted exception list?" );
2148 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang
,
2149 const OUString
& sWord
, bool bAbbreviation
)
2151 LanguageTag
aLanguageTag( eLang
);
2153 /* TODO-BCP47: did I mention terrible horrible ugliness? */
2155 // First search for eLang, then primary language of eLang
2156 // and last in LANGUAGE_UNDETERMINED
2158 if (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() || CreateLanguageFile(aLanguageTag
, false))
2160 //the language is available - so bring it on
2161 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2162 assert(iter
!= m_aLangTable
.end() && "CreateLanguageFile can't fail");
2163 const SvStringsISortDtor
* pList
= iter
->second
.GetCplSttExceptList();
2164 if(bAbbreviation
? lcl_FindAbbreviation(pList
, sWord
) : pList
->find(sWord
) != pList
->end() )
2168 // If it still could not be found here, then keep on searching
2169 // the primary language for example EN
2170 aLanguageTag
.reset(aLanguageTag
.getLanguage());
2171 LanguageType nTmpKey
= aLanguageTag
.getLanguageType(false);
2172 if (nTmpKey
!= eLang
&& nTmpKey
!= LANGUAGE_UNDETERMINED
&&
2173 (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() ||
2174 CreateLanguageFile(aLanguageTag
, false)))
2176 //the language is available - so bring it on
2177 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2178 assert(iter
!= m_aLangTable
.end() && "CreateLanguageFile can't fail");
2179 const SvStringsISortDtor
* pList
= iter
->second
.GetCplSttExceptList();
2180 if(bAbbreviation
? lcl_FindAbbreviation(pList
, sWord
) : pList
->find(sWord
) != pList
->end() )
2184 if (m_aLangTable
.find(aLanguageTag
.reset(LANGUAGE_UNDETERMINED
)) != m_aLangTable
.end() ||
2185 CreateLanguageFile(aLanguageTag
, false))
2187 //the language is available - so bring it on
2188 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2189 assert(iter
!= m_aLangTable
.end() && "CreateLanguageFile can't fail");
2190 const SvStringsISortDtor
* pList
= iter
->second
.GetCplSttExceptList();
2191 if(bAbbreviation
? lcl_FindAbbreviation(pList
, sWord
) : pList
->find(sWord
) != pList
->end() )
2197 OUString
SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag
& rLanguageTag
,
2198 bool bNewFile
, bool bTst
, bool bUnlocalized
) const
2200 OUString sRet
, sExt( rLanguageTag
.getBcp47() );
2203 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2204 std::vector
< OUString
> vecFallBackStrings
= rLanguageTag
.getFallbackStrings(false);
2205 if (!vecFallBackStrings
.empty())
2206 sExt
= vecFallBackStrings
[0];
2209 sExt
= "_" + sExt
+ ".dat";
2211 sRet
= sUserAutoCorrFile
+ sExt
;
2213 sRet
= sShareAutoCorrFile
+ sExt
;
2216 // test first in the user directory - if not exist, then
2217 sRet
= sUserAutoCorrFile
+ sExt
;
2218 if( !FStatHelper::IsDocument( sRet
))
2219 sRet
= sShareAutoCorrFile
+ sExt
;
2224 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2225 SvxAutoCorrect
& rParent
,
2226 OUString aShareAutoCorrectFile
,
2227 OUString aUserAutoCorrectFile
)
2228 : sShareAutoCorrFile(std::move( aShareAutoCorrectFile
)),
2229 sUserAutoCorrFile(std::move( aUserAutoCorrectFile
)),
2230 aModifiedDate( Date::EMPTY
),
2231 aModifiedTime( tools::Time::EMPTY
),
2232 aLastCheckTime( tools::Time::EMPTY
),
2233 rAutoCorrect(rParent
),
2234 nFlags(ACFlags::NONE
)
2238 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2242 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2244 // Access the file system only every 2 minutes to check the date stamp
2247 tools::Time
nMinTime( 0, 2 );
2248 tools::Time
nAktTime( tools::Time::SYSTEM
);
2249 if( aLastCheckTime
<= nAktTime
) // overflow?
2251 nAktTime
-= aLastCheckTime
;
2252 if( nAktTime
> nMinTime
) // min time past
2254 Date
aTstDate( Date::EMPTY
); tools::Time
aTstTime( tools::Time::EMPTY
);
2255 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile
,
2256 &aTstDate
, &aTstTime
) &&
2257 ( aModifiedDate
!= aTstDate
|| aModifiedTime
!= aTstTime
))
2260 // then remove all the lists fast!
2261 if( (ACFlags::CplSttLstLoad
& nFlags
) && pCplStt_ExcptLst
)
2263 pCplStt_ExcptLst
.reset();
2265 if( (ACFlags::WordStartLstLoad
& nFlags
) && pWordStart_ExcptLst
)
2267 pWordStart_ExcptLst
.reset();
2269 if( (ACFlags::ChgWordLstLoad
& nFlags
) && pAutocorr_List
)
2271 pAutocorr_List
.reset();
2273 nFlags
&= ~ACFlags(ACFlags::CplSttLstLoad
| ACFlags::WordStartLstLoad
| ACFlags::ChgWordLstLoad
);
2275 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2280 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2281 std::unique_ptr
<SvStringsISortDtor
>& rpLst
,
2282 const OUString
& sStrmName
,
2283 tools::SvRef
<SotStorage
>& rStg
)
2288 rpLst
.reset( new SvStringsISortDtor
);
2291 if( rStg
.is() && rStg
->IsStream( sStrmName
) )
2293 tools::SvRef
<SotStorageStream
> xStrm
= rStg
->OpenSotStream( sStrmName
,
2294 ( StreamMode::READ
| StreamMode::SHARE_DENYWRITE
| StreamMode::NOCREATE
) );
2295 if( ERRCODE_NONE
!= xStrm
->GetError())
2299 RemoveStream_Imp( sStrmName
);
2303 uno::Reference
< uno::XComponentContext
> xContext
=
2304 comphelper::getProcessComponentContext();
2306 xml::sax::InputSource aParserInput
;
2307 aParserInput
.sSystemId
= sStrmName
;
2310 xStrm
->SetBufferSize( 8 * 1024 );
2311 aParserInput
.aInputStream
= new utl::OInputStreamWrapper( *xStrm
);
2314 uno::Reference
< xml::sax::XFastDocumentHandler
> xFilter
= new SvXMLExceptionListImport ( xContext
, *rpLst
);
2316 // connect parser and filter
2317 uno::Reference
< xml::sax::XFastParser
> xParser
= xml::sax::FastParser::create( xContext
);
2318 uno::Reference
<xml::sax::XFastTokenHandler
> xTokenHandler
= new SvXMLAutoCorrectTokenHandler
;
2319 xParser
->setFastDocumentHandler( xFilter
);
2320 xParser
->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE
);
2321 xParser
->setTokenHandler( xTokenHandler
);
2326 xParser
->parseStream( aParserInput
);
2328 catch( const xml::sax::SAXParseException
& )
2332 catch( const xml::sax::SAXException
& )
2336 catch( const io::IOException
& )
2344 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile
,
2345 &aModifiedDate
, &aModifiedTime
);
2346 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2351 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2352 const SvStringsISortDtor
& rLst
,
2353 const OUString
& sStrmName
,
2354 tools::SvRef
<SotStorage
> const &rStg
,
2362 rStg
->Remove( sStrmName
);
2367 tools::SvRef
<SotStorageStream
> xStrm
= rStg
->OpenSotStream( sStrmName
,
2368 ( StreamMode::READ
| StreamMode::WRITE
| StreamMode::SHARE_DENYWRITE
) );
2371 xStrm
->SetSize( 0 );
2372 xStrm
->SetBufferSize( 8192 );
2373 xStrm
->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2376 uno::Reference
< uno::XComponentContext
> xContext
=
2377 comphelper::getProcessComponentContext();
2379 uno::Reference
< xml::sax::XWriter
> xWriter
= xml::sax::Writer::create(xContext
);
2380 uno::Reference
< io::XOutputStream
> xOut
= new utl::OOutputStreamWrapper( *xStrm
);
2381 xWriter
->setOutputStream(xOut
);
2383 uno::Reference
< xml::sax::XDocumentHandler
> xHandler(xWriter
, UNO_QUERY_THROW
);
2384 rtl::Reference
< SvXMLExceptionListExport
> xExp( new SvXMLExceptionListExport( xContext
, rLst
, sStrmName
, xHandler
) );
2386 xExp
->exportDoc( XML_BLOCK_LIST
);
2389 if( xStrm
->GetError() == ERRCODE_NONE
)
2395 if( ERRCODE_NONE
!= rStg
->GetError() )
2397 rStg
->Remove( sStrmName
);
2406 SvxAutocorrWordList
* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2408 if( pAutocorr_List
)
2409 pAutocorr_List
->DeleteAndDestroyAll();
2411 pAutocorr_List
.reset( new SvxAutocorrWordList() );
2415 uno::Reference
< embed::XStorage
> xStg
= comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile
, embed::ElementModes::READ
);
2416 uno::Reference
< io::XStream
> xStrm
= xStg
->openStreamElement( pXMLImplAutocorr_ListStr
, embed::ElementModes::READ
);
2417 uno::Reference
< uno::XComponentContext
> xContext
= comphelper::getProcessComponentContext();
2419 xml::sax::InputSource aParserInput
;
2420 aParserInput
.sSystemId
= pXMLImplAutocorr_ListStr
;
2421 aParserInput
.aInputStream
= xStrm
->getInputStream();
2424 uno::Reference
< xml::sax::XFastParser
> xParser
= xml::sax::FastParser::create(xContext
);
2425 SAL_INFO("editeng", "AutoCorrect Import" );
2426 uno::Reference
< xml::sax::XFastDocumentHandler
> xFilter
= new SvXMLAutoCorrectImport( xContext
, pAutocorr_List
.get(), rAutoCorrect
, xStg
);
2427 uno::Reference
<xml::sax::XFastTokenHandler
> xTokenHandler
= new SvXMLAutoCorrectTokenHandler
;
2429 // connect parser and filter
2430 xParser
->setFastDocumentHandler( xFilter
);
2431 xParser
->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE
);
2432 xParser
->setTokenHandler(xTokenHandler
);
2435 xParser
->parseStream( aParserInput
);
2437 catch ( const uno::Exception
& )
2439 TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile
);
2443 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile
,
2444 &aModifiedDate
, &aModifiedTime
);
2445 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2447 return pAutocorr_List
.get();
2450 const SvxAutocorrWordList
* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2452 if( !( ACFlags::ChgWordLstLoad
& nFlags
) || IsFileChanged_Imp() )
2454 LoadAutocorrWordList();
2455 if( !pAutocorr_List
)
2457 OSL_ENSURE( false, "No valid list" );
2458 pAutocorr_List
.reset( new SvxAutocorrWordList() );
2460 nFlags
|= ACFlags::ChgWordLstLoad
;
2462 return pAutocorr_List
.get();
2465 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2467 if( !( ACFlags::CplSttLstLoad
& nFlags
) || IsFileChanged_Imp() )
2469 LoadCplSttExceptList();
2470 if( !pCplStt_ExcptLst
)
2472 OSL_ENSURE( false, "No valid list" );
2473 pCplStt_ExcptLst
.reset( new SvStringsISortDtor
);
2475 nFlags
|= ACFlags::CplSttLstLoad
;
2477 return pCplStt_ExcptLst
.get();
2480 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString
& rNew
)
2483 if( !rNew
.isEmpty() && GetCplSttExceptList()->insert( rNew
).second
)
2485 MakeUserStorage_Impl();
2486 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2488 SaveExceptList_Imp( *pCplStt_ExcptLst
, pXMLImplCplStt_ExcptLstStr
, xStg
);
2492 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2493 &aModifiedDate
, &aModifiedTime
);
2494 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2500 bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString
& rNew
)
2503 if( !rNew
.isEmpty() && GetWordStartExceptList()->insert( rNew
).second
)
2505 MakeUserStorage_Impl();
2506 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2508 SaveExceptList_Imp( *pWordStart_ExcptLst
, pXMLImplWordStart_ExcptLstStr
, xStg
);
2512 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2513 &aModifiedDate
, &aModifiedTime
);
2514 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2520 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2524 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sShareAutoCorrFile
, StreamMode::READ
| StreamMode::SHARE_DENYNONE
);
2525 if( xStg
.is() && xStg
->IsContained( pXMLImplCplStt_ExcptLstStr
) )
2526 LoadXMLExceptList_Imp( pCplStt_ExcptLst
, pXMLImplCplStt_ExcptLstStr
, xStg
);
2528 catch (const css::ucb::ContentCreationException
&)
2531 return pCplStt_ExcptLst
.get();
2534 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2536 MakeUserStorage_Impl();
2537 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2539 SaveExceptList_Imp( *pCplStt_ExcptLst
, pXMLImplCplStt_ExcptLstStr
, xStg
);
2544 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2545 &aModifiedDate
, &aModifiedTime
);
2546 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2549 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::LoadWordStartExceptList()
2553 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sShareAutoCorrFile
, StreamMode::READ
| StreamMode::SHARE_DENYNONE
);
2554 if( xStg
.is() && xStg
->IsContained( pXMLImplWordStart_ExcptLstStr
) )
2555 LoadXMLExceptList_Imp( pWordStart_ExcptLst
, pXMLImplWordStart_ExcptLstStr
, xStg
);
2557 catch (const css::ucb::ContentCreationException
&)
2559 TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList");
2561 return pWordStart_ExcptLst
.get();
2564 void SvxAutoCorrectLanguageLists::SaveWordStartExceptList()
2566 MakeUserStorage_Impl();
2567 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2569 SaveExceptList_Imp( *pWordStart_ExcptLst
, pXMLImplWordStart_ExcptLstStr
, xStg
);
2573 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2574 &aModifiedDate
, &aModifiedTime
);
2575 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2578 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::GetWordStartExceptList()
2580 if( !( ACFlags::WordStartLstLoad
& nFlags
) || IsFileChanged_Imp() )
2582 LoadWordStartExceptList();
2583 if( !pWordStart_ExcptLst
)
2585 OSL_ENSURE( false, "No valid list" );
2586 pWordStart_ExcptLst
.reset( new SvStringsISortDtor
);
2588 nFlags
|= ACFlags::WordStartLstLoad
;
2590 return pWordStart_ExcptLst
.get();
2593 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString
& rName
)
2595 if( sShareAutoCorrFile
!= sUserAutoCorrFile
)
2597 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2598 if( xStg
.is() && ERRCODE_NONE
== xStg
->GetError() &&
2599 xStg
->IsStream( rName
) )
2601 xStg
->Remove( rName
);
2609 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2611 // The conversion needs to happen if the file is already in the user
2612 // directory and is in the old format. Additionally it needs to
2613 // happen when the file is being copied from share to user.
2615 bool bError
= false, bConvert
= false, bCopy
= false;
2616 INetURLObject aDest
;
2617 INetURLObject aSource
;
2619 if (sUserAutoCorrFile
!= sShareAutoCorrFile
)
2621 aSource
= INetURLObject ( sShareAutoCorrFile
);
2622 aDest
= INetURLObject ( sUserAutoCorrFile
);
2623 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile
) )
2625 aDest
.SetExtension ( u
"bak" );
2630 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile
) )
2632 aSource
= INetURLObject ( sUserAutoCorrFile
);
2633 aDest
= INetURLObject ( sUserAutoCorrFile
);
2634 aDest
.SetExtension ( u
"bak" );
2635 bCopy
= bConvert
= true;
2641 OUString
sMain(aDest
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
));
2642 sal_Int32 nSlashPos
= sMain
.lastIndexOf('/');
2643 sMain
= sMain
.copy(0, nSlashPos
);
2644 ::ucbhelper::Content
aNewContent( sMain
, uno::Reference
< XCommandEnvironment
>(), comphelper::getProcessComponentContext() );
2646 aInfo
.NameClash
= NameClash::OVERWRITE
;
2647 aInfo
.NewTitle
= aDest
.GetLastName();
2648 aInfo
.SourceURL
= aSource
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
);
2649 aInfo
.MoveData
= false;
2650 aNewContent
.executeCommand( "transfer", Any(aInfo
));
2657 if (bConvert
&& !bError
)
2659 tools::SvRef
<SotStorage
> xSrcStg
= new SotStorage( aDest
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
), StreamMode::READ
);
2660 tools::SvRef
<SotStorage
> xDstStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::WRITE
);
2662 if( xSrcStg
.is() && xDstStg
.is() )
2664 std::unique_ptr
<SvStringsISortDtor
> pTmpWordList
;
2666 if (xSrcStg
->IsContained( pXMLImplWordStart_ExcptLstStr
) )
2667 LoadXMLExceptList_Imp( pTmpWordList
, pXMLImplWordStart_ExcptLstStr
, xSrcStg
);
2671 SaveExceptList_Imp( *pTmpWordList
, pXMLImplWordStart_ExcptLstStr
, xDstStg
, true );
2672 pTmpWordList
.reset();
2676 if (xSrcStg
->IsContained( pXMLImplCplStt_ExcptLstStr
) )
2677 LoadXMLExceptList_Imp( pTmpWordList
, pXMLImplCplStt_ExcptLstStr
, xSrcStg
);
2681 SaveExceptList_Imp( *pTmpWordList
, pXMLImplCplStt_ExcptLstStr
, xDstStg
, true );
2682 pTmpWordList
->clear();
2685 GetAutocorrWordList();
2686 MakeBlocklist_Imp( *xDstStg
);
2687 sShareAutoCorrFile
= sUserAutoCorrFile
;
2691 ::ucbhelper::Content
aContent ( aDest
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
), uno::Reference
< XCommandEnvironment
>(), comphelper::getProcessComponentContext() );
2692 aContent
.executeCommand ( "delete", Any ( true ) );
2699 else if( bCopy
&& !bError
)
2700 sShareAutoCorrFile
= sUserAutoCorrFile
;
2703 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage
& rStg
)
2705 bool bRet
= true, bRemove
= !pAutocorr_List
|| pAutocorr_List
->empty();
2708 tools::SvRef
<SotStorageStream
> refList
= rStg
.OpenSotStream( pXMLImplAutocorr_ListStr
,
2709 ( StreamMode::READ
| StreamMode::WRITE
| StreamMode::SHARE_DENYWRITE
) );
2712 refList
->SetSize( 0 );
2713 refList
->SetBufferSize( 8192 );
2714 refList
->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2716 uno::Reference
< uno::XComponentContext
> xContext
=
2717 comphelper::getProcessComponentContext();
2719 uno::Reference
< xml::sax::XWriter
> xWriter
= xml::sax::Writer::create(xContext
);
2720 uno::Reference
< io::XOutputStream
> xOut
= new utl::OOutputStreamWrapper( *refList
);
2721 xWriter
->setOutputStream(xOut
);
2723 rtl::Reference
< SvXMLAutoCorrectExport
> xExp( new SvXMLAutoCorrectExport( xContext
, pAutocorr_List
.get(), pXMLImplAutocorr_ListStr
, xWriter
) );
2725 xExp
->exportDoc( XML_BLOCK_LIST
);
2728 bRet
= ERRCODE_NONE
== refList
->GetError();
2733 if( ERRCODE_NONE
!= rStg
.GetError() )
2746 rStg
.Remove( pXMLImplAutocorr_ListStr
);
2753 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector
<SvxAutocorrWord
>& aNewEntries
, std::vector
<SvxAutocorrWord
>& aDeleteEntries
)
2755 // First get the current list!
2756 GetAutocorrWordList();
2758 MakeUserStorage_Impl();
2759 tools::SvRef
<SotStorage
> xStorage
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2761 bool bRet
= xStorage
.is() && ERRCODE_NONE
== xStorage
->GetError();
2765 for (SvxAutocorrWord
& aWordToDelete
: aDeleteEntries
)
2767 std::optional
<SvxAutocorrWord
> xFoundEntry
= pAutocorr_List
->FindAndRemove( &aWordToDelete
);
2770 if( !xFoundEntry
->IsTextOnly() )
2772 OUString
aName( aWordToDelete
.GetShort() );
2773 if (xStorage
->IsOLEStorage())
2774 aName
= EncryptBlockName_Imp(aName
);
2776 GeneratePackageName ( aWordToDelete
.GetShort(), aName
);
2778 if( xStorage
->IsContained( aName
) )
2780 xStorage
->Remove( aName
);
2781 bRet
= xStorage
->Commit();
2787 for (const SvxAutocorrWord
& aNewEntrie
: aNewEntries
)
2789 SvxAutocorrWord
aWordToAdd(aNewEntrie
.GetShort(), aNewEntrie
.GetLong(), true );
2790 std::optional
<SvxAutocorrWord
> xRemoved
= pAutocorr_List
->FindAndRemove( &aWordToAdd
);
2793 if( !xRemoved
->IsTextOnly() )
2795 // Still have to remove the Storage
2796 OUString
sStorageName( aWordToAdd
.GetShort() );
2797 if (xStorage
->IsOLEStorage())
2798 sStorageName
= EncryptBlockName_Imp(sStorageName
);
2800 GeneratePackageName ( aWordToAdd
.GetShort(), sStorageName
);
2802 if( xStorage
->IsContained( sStorageName
) )
2803 xStorage
->Remove( sStorageName
);
2806 bRet
= pAutocorr_List
->Insert( std::move(aWordToAdd
) );
2816 bRet
= MakeBlocklist_Imp( *xStorage
);
2822 bool SvxAutoCorrectLanguageLists::PutText( const OUString
& rShort
, const OUString
& rLong
)
2824 // First get the current list!
2825 GetAutocorrWordList();
2827 MakeUserStorage_Impl();
2828 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2830 bool bRet
= xStg
.is() && ERRCODE_NONE
== xStg
->GetError();
2832 // Update the word list
2835 SvxAutocorrWord
aNew(rShort
, rLong
, true );
2836 std::optional
<SvxAutocorrWord
> xRemove
= pAutocorr_List
->FindAndRemove( &aNew
);
2839 if( !xRemove
->IsTextOnly() )
2841 // Still have to remove the Storage
2842 OUString
sStgNm( rShort
);
2843 if (xStg
->IsOLEStorage())
2844 sStgNm
= EncryptBlockName_Imp(sStgNm
);
2846 GeneratePackageName ( rShort
, sStgNm
);
2848 if( xStg
->IsContained( sStgNm
) )
2849 xStg
->Remove( sStgNm
);
2853 if( pAutocorr_List
->Insert( std::move(aNew
) ) )
2855 bRet
= MakeBlocklist_Imp( *xStg
);
2866 void SvxAutoCorrectLanguageLists::PutText( const OUString
& rShort
,
2867 SfxObjectShell
& rShell
)
2869 // First get the current list!
2870 GetAutocorrWordList();
2872 MakeUserStorage_Impl();
2876 uno::Reference
< embed::XStorage
> xStg
= comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile
, embed::ElementModes::READWRITE
);
2878 bool bRet
= rAutoCorrect
.PutText( xStg
, sUserAutoCorrFile
, rShort
, rShell
, sLong
);
2881 // Update the word list
2884 if( pAutocorr_List
->Insert( SvxAutocorrWord(rShort
, sLong
, false) ) )
2886 tools::SvRef
<SotStorage
> xStor
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2887 MakeBlocklist_Imp( *xStor
);
2891 catch ( const uno::Exception
& )
2896 // Keep the list sorted ...
2897 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2899 bool operator()( SvxAutocorrWord
const & lhs
, SvxAutocorrWord
const & rhs
) const
2901 CollatorWrapper
& rCmp
= ::GetCollatorWrapper();
2902 return rCmp
.compareString( lhs
.GetShort(), rhs
.GetShort() ) < 0;
2908 typedef std::unordered_map
<OUString
, SvxAutocorrWord
> AutocorrWordHashType
;
2912 struct SvxAutocorrWordList::Impl
2915 // only one of these contains the data
2916 // maSortedVector is manually sorted so we can optimise data movement
2917 mutable AutocorrWordSetType maSortedVector
;
2918 mutable AutocorrWordHashType maHash
; // key is 'Short'
2920 void DeleteAndDestroyAll()
2923 maSortedVector
.clear();
2927 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl
) {}
2929 SvxAutocorrWordList::~SvxAutocorrWordList()
2933 void SvxAutocorrWordList::DeleteAndDestroyAll()
2935 mpImpl
->DeleteAndDestroyAll();
2938 // returns true if inserted
2939 const SvxAutocorrWord
* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord
) const
2941 if ( mpImpl
->maSortedVector
.empty() ) // use the hash
2943 OUString aShort
= aWord
.GetShort();
2944 auto [it
,inserted
] = mpImpl
->maHash
.emplace( std::move(aShort
), std::move(aWord
) );
2946 return &(it
->second
);
2951 auto it
= std::lower_bound(mpImpl
->maSortedVector
.begin(), mpImpl
->maSortedVector
.end(), aWord
, CompareSvxAutocorrWordList());
2952 CollatorWrapper
& rCmp
= ::GetCollatorWrapper();
2953 if (it
== mpImpl
->maSortedVector
.end() || rCmp
.compareString( aWord
.GetShort(), it
->GetShort() ) != 0)
2955 it
= mpImpl
->maSortedVector
.insert(it
, std::move(aWord
));
2962 void SvxAutocorrWordList::LoadEntry(const OUString
& sWrong
, const OUString
& sRight
, bool bOnlyTxt
)
2964 (void)Insert(SvxAutocorrWord( sWrong
, sRight
, bOnlyTxt
));
2967 bool SvxAutocorrWordList::empty() const
2969 return mpImpl
->maHash
.empty() && mpImpl
->maSortedVector
.empty();
2972 std::optional
<SvxAutocorrWord
> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord
*pWord
)
2975 if ( mpImpl
->maSortedVector
.empty() ) // use the hash
2977 AutocorrWordHashType::iterator it
= mpImpl
->maHash
.find( pWord
->GetShort() );
2978 if( it
!= mpImpl
->maHash
.end() )
2980 SvxAutocorrWord pMatch
= std::move(it
->second
);
2981 mpImpl
->maHash
.erase (it
);
2987 auto it
= std::lower_bound(mpImpl
->maSortedVector
.begin(), mpImpl
->maSortedVector
.end(), *pWord
, CompareSvxAutocorrWordList());
2988 if (it
!= mpImpl
->maSortedVector
.end() && !CompareSvxAutocorrWordList()(*pWord
, *it
))
2990 SvxAutocorrWord pMatch
= std::move(*it
);
2991 mpImpl
->maSortedVector
.erase (it
);
2995 return std::optional
<SvxAutocorrWord
>();
2998 // return the sorted contents - defer sorting until we have to.
2999 const SvxAutocorrWordList::AutocorrWordSetType
& SvxAutocorrWordList::getSortedContent() const
3001 // convert from hash to set permanently
3002 if ( mpImpl
->maSortedVector
.empty() )
3004 std::vector
<SvxAutocorrWord
> tmp
;
3005 tmp
.reserve(mpImpl
->maHash
.size());
3006 for (auto & rPair
: mpImpl
->maHash
)
3007 tmp
.emplace_back(std::move(rPair
.second
));
3008 mpImpl
->maHash
.clear();
3009 // sort twice - this gets the list into mostly-sorted order, which
3010 // reduces the number of times we need to invoke the expensive ICU collate fn.
3011 std::sort(tmp
.begin(), tmp
.end(),
3012 [] ( SvxAutocorrWord
const & lhs
, SvxAutocorrWord
const & rhs
)
3014 return lhs
.GetShort() < rhs
.GetShort();
3016 // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
3017 // stable_sort is twice as fast as sort in this situation because it does
3018 // fewer comparison operations.
3019 std::stable_sort(tmp
.begin(), tmp
.end(), CompareSvxAutocorrWordList());
3020 mpImpl
->maSortedVector
= std::move(tmp
);
3022 return mpImpl
->maSortedVector
;
3025 const SvxAutocorrWord
* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord
*pFnd
,
3026 std::u16string_view rTxt
,
3028 sal_Int32 nEndPos
) const
3030 const OUString
& rChk
= pFnd
->GetShort();
3032 sal_Int32 left_wildcard
= rChk
.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
3033 sal_Int32 right_wildcard
= rChk
.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
3034 assert(nEndPos
>= 0);
3035 size_t nSttWdPos
= nEndPos
;
3037 // direct replacement of keywords surrounded by colons (for example, ":name:")
3038 bool bColonNameColon
= static_cast<sal_Int32
>(rTxt
.size()) > nEndPos
&&
3039 rTxt
[nEndPos
] == ':' && rChk
[0] == ':' && rChk
.endsWith(":");
3040 if ( nEndPos
+ (bColonNameColon
? 1 : 0) < rChk
.getLength() - left_wildcard
- right_wildcard
)
3043 bool bWasWordDelim
= false;
3044 sal_Int32 nCalcStt
= nEndPos
- rChk
.getLength() + left_wildcard
;
3045 if (bColonNameColon
)
3047 if( !right_wildcard
&& ( !nCalcStt
|| nCalcStt
== rStt
|| left_wildcard
|| bColonNameColon
||
3048 ( nCalcStt
< rStt
&&
3049 IsWordDelim( rTxt
[ nCalcStt
- 1 ] ))) )
3051 TransliterationWrapper
& rCmp
= GetIgnoreTranslWrapper();
3052 OUString
sWord( rTxt
.substr(nCalcStt
, rChk
.getLength() - left_wildcard
) );
3053 if( (!left_wildcard
&& rCmp
.isEqual( rChk
, sWord
)) || (left_wildcard
&& rCmp
.isEqual( rChk
.copy(left_wildcard
), sWord
) ))
3058 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
3059 if (static_cast<sal_Int32
>(rTxt
.size()) > nEndPos
&& rTxt
[nEndPos
] == '/' && rChk
.indexOf('/') != -1)
3063 // get the first word delimiter position before the matching ".*word" pattern
3064 while( rStt
&& !(bWasWordDelim
= IsWordDelim( rTxt
[ --rStt
])))
3066 if (bWasWordDelim
) rStt
++;
3067 OUString
left_pattern( rTxt
.substr(rStt
, nEndPos
- rStt
- rChk
.getLength() + left_wildcard
) );
3068 // avoid double spaces before simple "word" replacement
3069 left_pattern
+= (left_pattern
.getLength() == 0 && pFnd
->GetLong()[0] == 0x20) ? pFnd
->GetLong().subView(1) : pFnd
->GetLong();
3070 if( const SvxAutocorrWord
* pNew
= Insert( SvxAutocorrWord(OUString(rTxt
.substr(rStt
, nEndPos
- rStt
)), left_pattern
) ) )
3074 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
3075 if ( right_wildcard
)
3078 OUString
sTmp( rChk
.copy( left_wildcard
, rChk
.getLength() - left_wildcard
- right_wildcard
) );
3079 // Get the last word delimiter position
3082 while( nSttWdPos
&& !(bWasWordDelim
= IsWordDelim( rTxt
[ --nSttWdPos
])))
3084 // search the first occurrence (with a left word delimitation, if needed)
3085 size_t nFndPos
= std::u16string_view::npos
;
3087 nFndPos
= rTxt
.find( sTmp
, nFndPos
+ 1);
3088 if (nFndPos
== std::u16string_view::npos
)
3090 not_suffix
= bWasWordDelim
&& (nSttWdPos
>= (nFndPos
+ sTmp
.getLength()));
3091 } while ( (!left_wildcard
&& nFndPos
&& !IsWordDelim( rTxt
[ nFndPos
- 1 ])) || not_suffix
);
3093 if ( nFndPos
!= std::u16string_view::npos
)
3095 sal_Int32 extra_repl
= static_cast<sal_Int32
>(nFndPos
) + sTmp
.getLength() > nEndPos
? 1: 0; // for patterns with terminating characters, eg. "a:"
3097 if ( left_wildcard
)
3099 // get the first word delimiter position before the matching ".*word.*" pattern
3100 while( nFndPos
&& !(bWasWordDelim
= IsWordDelim( rTxt
[ --nFndPos
])))
3102 if (bWasWordDelim
) nFndPos
++;
3104 if (nEndPos
+ extra_repl
<= static_cast<sal_Int32
>(nFndPos
))
3108 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3109 OUString
aShort( rTxt
.substr(nFndPos
, nEndPos
- nFndPos
+ extra_repl
) );
3113 if ( !left_wildcard
)
3115 sal_Int32 siz
= nEndPos
- nFndPos
- sTmp
.getLength();
3116 aLong
= pFnd
->GetLong() + (siz
> 0 ? rTxt
.substr(nFndPos
+ sTmp
.getLength(), siz
) : u
"");
3120 nSttWdPos
= rTxt
.find( sTmp
, nFndPos
);
3121 if (nSttWdPos
!= std::u16string_view::npos
)
3123 sal_Int32
nTmp(nFndPos
);
3124 while (nTmp
< static_cast<sal_Int32
>(nSttWdPos
) && !IsWordDelim(rTxt
[nTmp
]))
3126 if (nTmp
< static_cast<sal_Int32
>(nSttWdPos
))
3127 break; // word delimiter found
3128 buf
.append(rTxt
.substr(nFndPos
, nSttWdPos
- nFndPos
)).append(pFnd
->GetLong());
3129 nFndPos
= nSttWdPos
+ sTmp
.getLength();
3131 } while (nSttWdPos
!= std::u16string_view::npos
);
3132 if (static_cast<sal_Int32
>(nEndPos
- nFndPos
) > extra_repl
)
3133 buf
.append(rTxt
.substr(nFndPos
, nEndPos
- nFndPos
));
3134 aLong
= buf
.makeStringAndClear();
3136 if ( const SvxAutocorrWord
* pNew
= Insert( SvxAutocorrWord(aShort
, aLong
) ) )
3138 if ( (static_cast<sal_Int32
>(rTxt
.size()) > nEndPos
&& IsWordDelim(rTxt
[nEndPos
])) || static_cast<sal_Int32
>(rTxt
.size()) == nEndPos
)
3146 const SvxAutocorrWord
* SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt
, sal_Int32
& rStt
,
3147 sal_Int32 nEndPos
) const
3149 for (auto const& elem
: mpImpl
->maHash
)
3151 if( const SvxAutocorrWord
*pTmp
= WordMatches( &elem
.second
, rTxt
, rStt
, nEndPos
) )
3155 for (auto const& elem
: mpImpl
->maSortedVector
)
3157 if( const SvxAutocorrWord
*pTmp
= WordMatches( &elem
, rTxt
, rStt
, nEndPos
) )
3163 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */