1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
23 #include <string_view>
24 #include <sal/config.h>
26 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
27 #include <com/sun/star/embed/XStorage.hpp>
28 #include <com/sun/star/io/IOException.hpp>
29 #include <com/sun/star/io/XStream.hpp>
30 #include <tools/urlobj.hxx>
31 #include <i18nlangtag/mslangid.hxx>
32 #include <i18nutil/transliteration.hxx>
33 #include <sal/log.hxx>
34 #include <osl/diagnose.h>
35 #include <vcl/svapp.hxx>
36 #include <vcl/settings.hxx>
37 #include <svl/fstathelper.hxx>
38 #include <svl/urihelper.hxx>
39 #include <unotools/charclass.hxx>
40 #include <com/sun/star/i18n/UnicodeType.hpp>
41 #include <unotools/collatorwrapper.hxx>
42 #include <com/sun/star/i18n/UnicodeScript.hpp>
43 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
44 #include <unotools/localedatawrapper.hxx>
45 #include <unotools/transliterationwrapper.hxx>
46 #include <comphelper/processfactory.hxx>
47 #include <comphelper/sequence.hxx>
48 #include <comphelper/storagehelper.hxx>
49 #include <o3tl/string_view.hxx>
50 #include <editeng/editids.hrc>
51 #include <sot/storage.hxx>
52 #include <editeng/udlnitem.hxx>
53 #include <editeng/wghtitem.hxx>
54 #include <editeng/postitem.hxx>
55 #include <editeng/crossedoutitem.hxx>
56 #include <editeng/escapementitem.hxx>
57 #include <editeng/svxacorr.hxx>
58 #include <editeng/unolingu.hxx>
59 #include <vcl/window.hxx>
60 #include <com/sun/star/xml/sax/InputSource.hpp>
61 #include <com/sun/star/xml/sax/FastParser.hpp>
62 #include <com/sun/star/xml/sax/Writer.hpp>
63 #include <com/sun/star/xml/sax/SAXParseException.hpp>
64 #include <unotools/streamwrap.hxx>
65 #include "SvXMLAutoCorrectImport.hxx"
66 #include "SvXMLAutoCorrectExport.hxx"
67 #include "SvXMLAutoCorrectTokenHandler.hxx"
68 #include <ucbhelper/content.hxx>
69 #include <com/sun/star/ucb/ContentCreationException.hpp>
70 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
71 #include <com/sun/star/ucb/TransferInfo.hpp>
72 #include <com/sun/star/ucb/NameClash.hpp>
73 #include <comphelper/diagnose_ex.hxx>
74 #include <xmloff/xmltoken.hxx>
75 #include <unordered_map>
76 #include <rtl/character.hxx>
78 using namespace ::com::sun::star::ucb
;
79 using namespace ::com::sun::star::uno
;
80 using namespace ::com::sun::star::xml::sax
;
81 using namespace ::com::sun::star
;
82 using namespace ::xmloff::token
;
83 using namespace ::utl
;
90 ExclamationMark
= 0x02,
97 template<> struct typed_flags
<Flags
> : is_typed_flags
<Flags
, 0x07> {};
99 const sal_Unicode cNonBreakingSpace
= 0xA0; // UNICODE code for no break space
101 constexpr OUString pXMLImplWordStart_ExcptLstStr
= u
"WordExceptList.xml"_ustr
;
102 constexpr OUString pXMLImplCplStt_ExcptLstStr
= u
"SentenceExceptList.xml"_ustr
;
103 constexpr OUString pXMLImplAutocorr_ListStr
= u
"DocumentList.xml"_ustr
;
105 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
106 // Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled as "begin characters"?
107 constexpr std::u16string_view
108 /* also at these beginnings - Brackets and all kinds of begin characters */
109 sImplSttSkipChars
= u
"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094",
110 /* also at these ends - Brackets and all kinds of begin characters */
111 sImplEndSkipChars
= u
"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094";
113 static OUString
EncryptBlockName_Imp(std::u16string_view rName
);
115 static bool NonFieldWordDelim( const sal_Unicode c
)
117 return ' ' == c
|| '\t' == c
|| 0x0a == c
||
118 cNonBreakingSpace
== c
|| 0x2011 == c
;
121 static bool IsWordDelim( const sal_Unicode c
)
123 return c
== 0x1 || NonFieldWordDelim(c
);
127 static bool IsLowerLetter( sal_Int32 nCharType
)
129 return CharClass::isLetterType( nCharType
) &&
130 ( css::i18n::KCharacterType::LOWER
& nCharType
);
133 static bool IsUpperLetter( sal_Int32 nCharType
)
135 return CharClass::isLetterType( nCharType
) &&
136 ( css::i18n::KCharacterType::UPPER
& nCharType
);
139 static bool lcl_IsUnsupportedUnicodeChar( CharClass
const & rCC
, const OUString
& rTxt
,
140 sal_Int32 nStt
, sal_Int32 nEnd
)
142 for( ; nStt
< nEnd
; ++nStt
)
144 css::i18n::UnicodeScript nScript
= rCC
.getScript( rTxt
, nStt
);
147 case css::i18n::UnicodeScript_kCJKRadicalsSupplement
:
148 case css::i18n::UnicodeScript_kHangulJamo
:
149 case css::i18n::UnicodeScript_kCJKSymbolPunctuation
:
150 case css::i18n::UnicodeScript_kHiragana
:
151 case css::i18n::UnicodeScript_kKatakana
:
152 case css::i18n::UnicodeScript_kHangulCompatibilityJamo
:
153 case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth
:
154 case css::i18n::UnicodeScript_kCJKCompatibility
:
155 case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA
:
156 case css::i18n::UnicodeScript_kCJKUnifiedIdeograph
:
157 case css::i18n::UnicodeScript_kHangulSyllable
:
158 case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph
:
159 case css::i18n::UnicodeScript_kHalfwidthFullwidthForm
:
161 default: ; //do nothing
167 static bool lcl_IsSymbolChar( CharClass
const & rCC
, const OUString
& rTxt
,
168 sal_Int32 nStt
, sal_Int32 nEnd
)
170 for( ; nStt
< nEnd
; ++nStt
)
172 if( css::i18n::UnicodeType::PRIVATE_USE
== rCC
.getType( rTxt
, nStt
))
178 static bool lcl_IsInArr(std::u16string_view arr
, const sal_uInt32 c
)
180 return std::any_of(arr
.begin(), arr
.end(), [c
](const auto c1
) { return c1
== c
; });
183 SvxAutoCorrDoc::~SvxAutoCorrDoc()
187 // Called by the functions:
188 // - FnCapitalStartWord
189 // - FnCapitalStartSentence
190 // after the exchange of characters. Then the words, if necessary, can be inserted
191 // into the exception list.
192 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags
, sal_Int32
, const OUString
&,
197 LanguageType
SvxAutoCorrDoc::GetLanguage( sal_Int32
) const
199 return LANGUAGE_SYSTEM
;
202 static const LanguageTag
& GetAppLang()
204 return Application::GetSettings().GetLanguageTag();
207 /// Never use an unresolved LANGUAGE_SYSTEM.
208 static LanguageType
GetDocLanguage( const SvxAutoCorrDoc
& rDoc
, sal_Int32 nPos
)
210 LanguageType eLang
= rDoc
.GetLanguage( nPos
);
211 if (eLang
== LANGUAGE_SYSTEM
)
212 eLang
= GetAppLang().getLanguageType(); // the current work locale
216 static LocaleDataWrapper
& GetLocaleDataWrapper( LanguageType nLang
)
218 static std::unique_ptr
<LocaleDataWrapper
> xLclDtWrp
;
219 LanguageTag
aLcl( nLang
);
220 if (!xLclDtWrp
|| xLclDtWrp
->getLoadedLanguageTag() != aLcl
)
221 xLclDtWrp
.reset(new LocaleDataWrapper(std::move(aLcl
)));
224 static TransliterationWrapper
& GetIgnoreTranslWrapper()
226 static int bIsInit
= 0;
227 static TransliterationWrapper
aWrp( ::comphelper::getProcessComponentContext(),
228 TransliterationFlags::IGNORE_KANA
|
229 TransliterationFlags::IGNORE_WIDTH
);
232 aWrp
.loadModuleIfNeeded( GetAppLang().getLanguageType() );
237 static CollatorWrapper
& GetCollatorWrapper()
239 static CollatorWrapper aCollWrp
= []()
241 CollatorWrapper
tmp( ::comphelper::getProcessComponentContext() );
242 tmp
.loadDefaultCollator( GetAppLang().getLocale(), 0 );
248 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar
)
250 return cChar
== '\0' || cChar
== '\t' || cChar
== 0x0a ||
251 cChar
== ' ' || cChar
== '\'' || cChar
== '\"' ||
252 cChar
== '*' || cChar
== '_' || cChar
== '%' ||
253 cChar
== '.' || cChar
== ',' || cChar
== ';' ||
254 cChar
== ':' || cChar
== '?' || cChar
== '!' ||
255 cChar
== '<' || cChar
== '>' ||
256 cChar
== '/' || cChar
== '-';
261 bool IsCompoundWordDelimChar(sal_Unicode cChar
)
263 return cChar
== '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar
);
267 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar
)
269 return cChar
== '%' || cChar
== ';' || cChar
== ':' || cChar
== '?' || cChar
== '!' ||
270 cChar
== '/' /*case for the urls exception*/;
273 ACFlags
SvxAutoCorrect::GetDefaultFlags()
275 ACFlags nRet
= ACFlags::Autocorrect
276 | ACFlags::CapitalStartSentence
277 | ACFlags::CapitalStartWord
278 | ACFlags::ChgOrdinalNumber
279 | ACFlags::ChgToEnEmDash
280 | ACFlags::AddNonBrkSpace
281 | ACFlags::TransliterateRTL
282 | ACFlags::ChgAngleQuotes
283 | ACFlags::ChgWeightUnderl
284 | ACFlags::SetINetAttr
285 | ACFlags::SetDOIAttr
287 | ACFlags::SaveWordCplSttLst
288 | ACFlags::SaveWordWordStartLst
289 | ACFlags::CorrectCapsLock
;
290 LanguageType eLang
= GetAppLang().getLanguageType();
295 LANGUAGE_ENGLISH_AUS
,
296 LANGUAGE_ENGLISH_CAN
,
298 LANGUAGE_ENGLISH_EIRE
,
299 LANGUAGE_ENGLISH_SAFRICA
,
300 LANGUAGE_ENGLISH_JAMAICA
,
301 LANGUAGE_ENGLISH_CARIBBEAN
))
302 nRet
&= ~ACFlags(ACFlags::ChgQuotes
|ACFlags::ChgSglQuotes
);
306 constexpr sal_Unicode cEmDash
= 0x2014;
307 constexpr sal_Unicode cEnDash
= 0x2013;
308 constexpr OUString
sEmDash(u
"\u2014"_ustr
);
309 constexpr OUString
sEnDash(u
"\u2013"_ustr
);
310 constexpr sal_Unicode cApostrophe
= 0x2019;
311 constexpr sal_Unicode cLeftDoubleAngleQuote
= 0xAB;
312 constexpr sal_Unicode cRightDoubleAngleQuote
= 0xBB;
313 constexpr sal_Unicode cLeftSingleAngleQuote
= 0x2039;
314 constexpr sal_Unicode cRightSingleAngleQuote
= 0x203A;
315 // stop characters for searching preceding quotes
316 // (the first character is also the opening quote we are looking for)
317 const sal_Unicode aStopDoubleAngleQuoteStart
[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
318 const sal_Unicode aStopDoubleAngleQuoteEnd
[] = { cRightDoubleAngleQuote
, cLeftDoubleAngleQuote
, 0x201D, 0x201E, 0 }; // preceding >>
319 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
320 const sal_Unicode aStopDoubleAngleQuoteEndRo
[] = { cLeftDoubleAngleQuote
, cRightDoubleAngleQuote
, 0x201D, 0x201E, 0x201C, 0 };
321 const sal_Unicode aStopSingleQuoteEnd
[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
322 const sal_Unicode aStopSingleQuoteEndRuUa
[] = { 0x201E, 0x201C, cRightDoubleAngleQuote
, cLeftDoubleAngleQuote
, 0 };
324 SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile
,
325 OUString aUserAutocorrFile
)
326 : sShareAutoCorrFile(std::move( aShareAutocorrFile
))
327 , sUserAutoCorrFile(std::move( aUserAutocorrFile
))
328 , eCharClassLang( LANGUAGE_DONTKNOW
)
329 , nFlags(SvxAutoCorrect::GetDefaultFlags())
337 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect
& rCpy
)
338 : sShareAutoCorrFile( rCpy
.sShareAutoCorrFile
)
339 , sUserAutoCorrFile( rCpy
.sUserAutoCorrFile
)
340 , aSwFlags( rCpy
.aSwFlags
)
341 , eCharClassLang(rCpy
.eCharClassLang
)
342 , nFlags( rCpy
.nFlags
& ~ACFlags(ACFlags::ChgWordLstLoad
|ACFlags::CplSttLstLoad
|ACFlags::WordStartLstLoad
))
343 , cStartDQuote( rCpy
.cStartDQuote
)
344 , cEndDQuote( rCpy
.cEndDQuote
)
345 , cStartSQuote( rCpy
.cStartSQuote
)
346 , cEndSQuote( rCpy
.cEndSQuote
)
351 SvxAutoCorrect::~SvxAutoCorrect()
355 void SvxAutoCorrect::GetCharClass_( LanguageType eLang
)
357 moCharClass
.emplace( LanguageTag( eLang
) );
358 eCharClassLang
= eLang
;
361 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag
, bool bOn
)
363 ACFlags nOld
= nFlags
;
364 nFlags
= bOn
? nFlags
| nFlag
369 if( (nOld
& ACFlags::CapitalStartSentence
) != (nFlags
& ACFlags::CapitalStartSentence
) )
370 nFlags
&= ~ACFlags::CplSttLstLoad
;
371 if( (nOld
& ACFlags::CapitalStartWord
) != (nFlags
& ACFlags::CapitalStartWord
) )
372 nFlags
&= ~ACFlags::WordStartLstLoad
;
373 if( (nOld
& ACFlags::Autocorrect
) != (nFlags
& ACFlags::Autocorrect
) )
374 nFlags
&= ~ACFlags::ChgWordLstLoad
;
379 // Correct TWo INitial CApitals
380 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
381 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
384 CharClass
& rCC
= GetCharClass( eLang
);
386 // Delete all non alphanumeric. Test the characters at the beginning/end of
387 // the word ( recognizes: "(min.", "/min.", and so on.)
388 for( ; nSttPos
< nEndPos
; ++nSttPos
)
389 if( rCC
.isLetterNumeric( rTxt
, nSttPos
))
391 for( ; nSttPos
< nEndPos
; --nEndPos
)
392 if( rCC
.isLetterNumeric( rTxt
, nEndPos
- 1 ))
395 // Is the word a compounded word separated by delimiters?
396 // If so, keep track of all delimiters so each constituent
397 // word can be checked for two initial capital letters.
398 std::deque
<sal_Int32
> aDelimiters
;
400 // Always check for two capitals at the beginning
401 // of the entire word, so start at nSttPos.
402 aDelimiters
.push_back(nSttPos
);
404 // Find all compound word delimiters
405 for (sal_Int32 n
= nSttPos
; n
< nEndPos
; ++n
)
407 if (IsCompoundWordDelimChar(rTxt
[ n
]))
409 aDelimiters
.push_back( n
+ 1 ); // Get position of char after delimiter
413 // Decide where to put the terminating delimiter.
414 // If the last AutoCorrect char was a newline, then the AutoCorrect
415 // char will not be included in rTxt.
416 // If the last AutoCorrect char was not a newline, then the AutoCorrect
417 // character will be the last character in rTxt.
418 if (!IsCompoundWordDelimChar(rTxt
[nEndPos
-1]))
419 aDelimiters
.push_back(nEndPos
);
421 // Iterate through the word and all words that compose it.
422 // Two capital letters at the beginning of word?
423 for (size_t nI
= 0; nI
< aDelimiters
.size() - 1; ++nI
)
425 nSttPos
= aDelimiters
[nI
];
426 nEndPos
= aDelimiters
[nI
+ 1];
428 if( nSttPos
+2 < nEndPos
&&
429 IsUpperLetter( rCC
.getCharacterType( rTxt
, nSttPos
)) &&
430 IsUpperLetter( rCC
.getCharacterType( rTxt
, ++nSttPos
)) &&
431 // Is the third character a lower case
432 IsLowerLetter( rCC
.getCharacterType( rTxt
, nSttPos
+1 )) &&
433 // Do not replace special attributes
434 0x1 != rTxt
[ nSttPos
] && 0x2 != rTxt
[ nSttPos
])
436 // test if the word is in an exception list
437 OUString
sWord( rTxt
.copy( nSttPos
- 1, nEndPos
- nSttPos
+ 1 ));
438 if( !FindInWordStartExceptList(eLang
, sWord
) )
440 // Check that word isn't correctly spelt before correcting:
441 css::uno::Reference
< css::linguistic2::XSpellChecker1
> xSpeller
=
442 LinguMgr::GetSpellChecker();
443 if( xSpeller
->hasLanguage(static_cast<sal_uInt16
>(eLang
)) )
445 Sequence
< css::beans::PropertyValue
> aEmptySeq
;
446 if (xSpeller
->isValid(sWord
, static_cast<sal_uInt16
>(eLang
), aEmptySeq
))
451 sal_Unicode cSave
= rTxt
[ nSttPos
];
452 OUString sChar
= rCC
.lowercase( OUString(cSave
) );
453 if( sChar
[0] != cSave
&& rDoc
.ReplaceRange( nSttPos
, 1, sChar
))
455 if( ACFlags::SaveWordWordStartLst
& nFlags
)
456 rDoc
.SaveCpltSttWord( ACFlags::CapitalStartWord
, nSttPos
, sWord
, cSave
);
463 // Format ordinal numbers suffixes (1st -> 1^st)
464 bool SvxAutoCorrect::FnChgOrdinalNumber(
465 SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
466 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
469 // 1st, 2nd, 3rd, 4 - 0th
474 // In some languages ordinal suffixes should never be
475 // changed to superscript. Let's break for those languages.
477 LANGUAGE_CATALAN
, // tdf#156792
478 LANGUAGE_CATALAN_VALENCIAN
,
480 LANGUAGE_SWEDISH_FINLAND
))
482 CharClass
& rCC
= GetCharClass(eLang
);
484 for (; nSttPos
< nEndPos
; ++nSttPos
)
485 if (!lcl_IsInArr(sImplSttSkipChars
, rTxt
[nSttPos
]))
487 for (; nSttPos
< nEndPos
; --nEndPos
)
488 if (!lcl_IsInArr(sImplEndSkipChars
, rTxt
[nEndPos
- 1]))
492 // Get the last number in the string to check
493 sal_Int32 nNumEnd
= nEndPos
;
494 bool bFoundEnd
= false;
495 bool isValidNumber
= true;
496 sal_Int32 i
= nEndPos
;
500 bool isDigit
= rCC
.isDigit(rTxt
, i
);
502 isValidNumber
&= (isDigit
|| !rCC
.isLetter(rTxt
, i
));
504 if (isDigit
&& !bFoundEnd
)
511 if (bFoundEnd
&& isValidNumber
) {
512 sal_Int32 nNum
= o3tl::toInt32(rTxt
.subView(nSttPos
, nNumEnd
- nSttPos
+ 1));
513 std::u16string_view sEnd
= rTxt
.subView(nNumEnd
+ 1, nEndPos
- nNumEnd
- 1);
515 // Check if the characters after that number correspond to the ordinal suffix
516 uno::Reference
< i18n::XOrdinalSuffix
> xOrdSuffix
517 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
519 uno::Sequence
< OUString
> aSuffixes
= xOrdSuffix
->getOrdinalSuffix(nNum
, rCC
.getLanguageTag().getLocale());
521 // add extra suffixes for languages not handled by i18npool/ICU
522 if ( primary(eLang
) == primary(LANGUAGE_PORTUGUESE
) &&
523 ( nEndPos
== nNumEnd
+ 3 || nEndPos
== nNumEnd
+ 4 ) &&
524 ( sEnd
[0] == 'a' || sEnd
[0] == 'o' || sEnd
[0] == 'r' ) )
526 auto aExtendedSuffixes
= comphelper::sequenceToContainer
< std::vector
<OUString
> >(aSuffixes
);
527 aExtendedSuffixes
.push_back(u
"as"_ustr
); // plural form of 'a'
528 aExtendedSuffixes
.push_back(u
"os"_ustr
); // plural form of 'o'
529 aExtendedSuffixes
.push_back(u
"ra"_ustr
); // alternative form of 'a'
530 aExtendedSuffixes
.push_back(u
"ro"_ustr
); // alternative form of 'o'
531 aExtendedSuffixes
.push_back(u
"ras"_ustr
); // alternative form of "as"
532 aExtendedSuffixes
.push_back(u
"ros"_ustr
); // alternative form of "os"
533 aSuffixes
= comphelper::containerToSequence(aExtendedSuffixes
);
536 for (OUString
const & sSuffix
: aSuffixes
)
540 // Check if the ordinal suffix has to be set as super script
541 if (rCC
.isLetter(sSuffix
))
543 sal_Int32 nNumberChanged
= 0;
544 sal_Int32 nSuffixChanged
= 0;
545 // exceptions for Portuguese
546 // add missing dot: 1a -> 1.ª
547 // and remove optional 'r': 1ro -> 1.º
548 if ( primary(eLang
) == primary(LANGUAGE_PORTUGUESE
) )
550 if ( sSuffix
.startsWith("r") )
552 rDoc
.Delete( nNumEnd
+ 1, nNumEnd
+ 2 );
555 rDoc
.Insert( nNumEnd
+ 1, u
"."_ustr
);
560 SvxEscapementItem
aSvxEscapementItem(DFLT_ESC_AUTO_SUPER
,
561 DFLT_ESC_PROP
, SID_ATTR_CHAR_ESCAPEMENT
);
562 rDoc
.SetAttr(nNumEnd
+ 1 + nNumberChanged
,
563 nEndPos
+ nNumberChanged
+ nSuffixChanged
,
564 SID_ATTR_CHAR_ESCAPEMENT
,
577 bool SvxAutoCorrect::FnChgToEnEmDash(
578 SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
579 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
583 CharClass
& rCC
= GetCharClass( eLang
);
584 if (eLang
== LANGUAGE_SYSTEM
)
585 eLang
= GetAppLang().getLanguageType();
586 bool bAlwaysUseEmDash
= (eLang
== LANGUAGE_RUSSIAN
|| eLang
== LANGUAGE_UKRAINIAN
);
588 // rTxt may refer to the frame text that will change in the calls to rDoc.Delete / rDoc.Insert;
589 // keep a local copy for later use
590 OUString aOrigTxt
= rTxt
;
591 sal_Int32 nFirstReplacementTextLengthChange
= 0;
593 // replace " - " or " --" with "enDash"
594 if( 1 < nSttPos
&& 1 <= nEndPos
- nSttPos
)
596 sal_Unicode cCh
= rTxt
[ nSttPos
];
599 if( 1 < nEndPos
- nSttPos
&&
600 ' ' == rTxt
[ nSttPos
-1 ] &&
601 '-' == rTxt
[ nSttPos
+1 ])
604 for( n
= nSttPos
+2; n
< nEndPos
&& lcl_IsInArr(
605 sImplSttSkipChars
,(cCh
= rTxt
[ n
]));
609 // found: " --[<AnySttChars>][A-z0-9]
610 if( rCC
.isLetterNumeric( OUString(cCh
) ) )
612 for( n
= nSttPos
-1; n
&& lcl_IsInArr(
613 sImplEndSkipChars
,(cCh
= rTxt
[ --n
])); )
616 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
617 if( rCC
.isLetterNumeric( OUString(cCh
) ))
619 rDoc
.Delete( nSttPos
, nSttPos
+ 2 );
620 rDoc
.Insert( nSttPos
, bAlwaysUseEmDash
? sEmDash
: sEnDash
);
621 nFirstReplacementTextLengthChange
= -1; // 2 ch -> 1 ch
627 else if( 3 < nSttPos
&&
628 ' ' == rTxt
[ nSttPos
-1 ] &&
629 '-' == rTxt
[ nSttPos
-2 ])
631 sal_Int32 n
, nLen
= 1, nTmpPos
= nSttPos
- 2;
632 if( '-' == ( cCh
= rTxt
[ nTmpPos
-1 ]) )
636 cCh
= rTxt
[ nTmpPos
-1 ];
640 for( n
= nSttPos
; n
< nEndPos
&& lcl_IsInArr(
641 sImplSttSkipChars
,(cCh
= rTxt
[ n
]));
645 // found: " - [<AnySttChars>][A-z0-9]
646 if( rCC
.isLetterNumeric( OUString(cCh
) ) )
649 for( n
= nTmpPos
-1; n
&& lcl_IsInArr(
650 sImplEndSkipChars
,(cCh
= rTxt
[ --n
])); )
652 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
653 if (rCC
.isLetterNumeric(OUString(cCh
)) || lcl_IsInArr(u
".!?", cCh
))
655 rDoc
.Delete( nTmpPos
, nTmpPos
+ nLen
);
656 rDoc
.Insert( nTmpPos
, bAlwaysUseEmDash
? sEmDash
: sEnDash
);
657 nFirstReplacementTextLengthChange
= 1 - nLen
; // nLen ch -> 1 ch
665 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
666 // [0-9]--[0-9] double dash always replaced with "enDash"
667 // Finnish and Hungarian use enDash instead of emDash.
668 bool bEnDash
= (eLang
== LANGUAGE_HUNGARIAN
|| eLang
== LANGUAGE_FINNISH
);
669 if( 4 <= nEndPos
- nSttPos
)
671 std::u16string_view
sTmpView( aOrigTxt
.subView( nSttPos
, nEndPos
- nSttPos
) );
672 size_t nFndPos
= sTmpView
.find(u
"--");
673 if (nFndPos
> 0 && nFndPos
< sTmpView
.size() - 2)
675 // Use proper codepoints. Currently, CharClass::isLetterNumeric is broken, it
676 // uses the index *both* as code unit index (when checking it as ASCII), *and*
677 // as code point index (when passes to css::i18n::XCharacterClassification).
678 // Oh well... Anyway, single-codepoint strings will workaround it.
679 sal_Int32 nStart
= nSttPos
+ nFndPos
;
680 sal_uInt32 chStart
= aOrigTxt
.iterateCodePoints(&nStart
, -1);
681 OUString
sStart(&chStart
, 1);
682 // No idea why sImplEndSkipChars is checked at start
683 if (rCC
.isLetterNumeric(sStart
, 0) || lcl_IsInArr(sImplEndSkipChars
, chStart
))
685 sal_Int32 nEnd
= nSttPos
+ nFndPos
+ 2;
686 sal_uInt32 chEnd
= aOrigTxt
.iterateCodePoints(&nEnd
, 1);
687 OUString
sEnd(&chEnd
, 1);
688 // No idea why sImplSttSkipChars is checked at end
689 if (rCC
.isLetterNumeric(sEnd
, 0) || lcl_IsInArr(sImplSttSkipChars
, chEnd
))
691 nSttPos
= nSttPos
+ nFndPos
+ nFirstReplacementTextLengthChange
;
692 rDoc
.Delete(nSttPos
, nSttPos
+ 2);
694 (bEnDash
|| (rCC
.isDigit(sStart
, 0) && rCC
.isDigit(sEnd
, 0))
705 // Add non-breaking space before specific punctuation marks in French text
706 sal_Int32
SvxAutoCorrect::FnAddNonBrkSpace(
707 SvxAutoCorrDoc
& rDoc
, std::u16string_view rTxt
,
709 LanguageType eLang
, bool& io_bNbspRunNext
)
713 CharClass
& rCC
= GetCharClass( eLang
);
715 if ( rCC
.getLanguageTag().getLanguage() == "fr" )
717 bool bFrCA
= (rCC
.getLanguageTag().getCountry() == "CA");
718 OUString allChars
= u
":;?!%"_ustr
;
719 OUString
chars( allChars
);
723 sal_Unicode cChar
= rTxt
[ nEndPos
];
724 bool bHasSpace
= chars
.indexOf( cChar
) != -1;
725 bool bIsSpecial
= allChars
.indexOf( cChar
) != -1;
728 // Get the last word delimiter position
729 sal_Int32 nSttWdPos
= nEndPos
;
730 bool bWasWordDelim
= false;
733 bWasWordDelim
= IsWordDelim( rTxt
[ --nSttWdPos
]);
738 //See if the text is the start of a protocol string, e.g. have text of
739 //"http" see if it is the start of "http:" and if so leave it alone
740 size_t nIndex
= nSttWdPos
+ (bWasWordDelim
? 1 : 0);
741 size_t nProtocolLen
= nEndPos
- nSttWdPos
+ 1;
742 if (nIndex
+ nProtocolLen
<= rTxt
.size())
744 if (INetURLObject::CompareProtocolScheme(rTxt
.substr(nIndex
, nProtocolLen
)) != INetProtocol::NotValid
)
748 // Check the presence of "://" in the word
749 size_t nStrPos
= rTxt
.find( u
"://", nSttWdPos
+ 1 );
750 if ( nStrPos
== std::u16string_view::npos
&& nEndPos
> 0 )
752 // Check the previous char
753 sal_Unicode cPrevChar
= rTxt
[ nEndPos
- 1 ];
754 if ( ( chars
.indexOf( cPrevChar
) == -1 ) && cPrevChar
!= '\t' )
756 // Remove any previous normal space
757 sal_Int32 nPos
= nEndPos
- 1;
758 while ( cPrevChar
== ' ' || cPrevChar
== cNonBreakingSpace
)
760 if ( nPos
== 0 ) break;
762 cPrevChar
= rTxt
[ nPos
];
766 if ( nEndPos
- nPos
> 0 )
767 rDoc
.Delete( nPos
, nEndPos
);
769 // Add the non-breaking space at the end pos
771 rDoc
.Insert( nPos
, OUString(cNonBreakingSpace
) );
772 io_bNbspRunNext
= true;
775 else if ( chars
.indexOf( cPrevChar
) != -1 )
776 io_bNbspRunNext
= true;
779 else if ( cChar
== '/' && nEndPos
> 1 && static_cast<sal_Int32
>(rTxt
.size()) > (nEndPos
- 1) )
781 // Remove the hardspace right before to avoid formatting URLs
782 sal_Unicode cPrevChar
= rTxt
[ nEndPos
- 1 ];
783 sal_Unicode cMaybeSpaceChar
= rTxt
[ nEndPos
- 2 ];
784 if ( cPrevChar
== ':' && cMaybeSpaceChar
== cNonBreakingSpace
)
786 rDoc
.Delete( nEndPos
- 2, nEndPos
- 1 );
796 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
797 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
800 OUString
sURL( URIHelper::FindFirstURLInText( rTxt
, nSttPos
, nEndPos
,
801 GetCharClass( eLang
) ));
802 bool bRet
= !sURL
.isEmpty();
803 if( bRet
) // so, set attribute:
804 rDoc
.SetINetAttr( nSttPos
, nEndPos
, sURL
);
808 // DOI citation recognition
809 bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc
& rDoc
, std::u16string_view rTxt
,
810 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
813 OUString
sURL( URIHelper::FindFirstDOIInText( rTxt
, nSttPos
, nEndPos
, GetCharClass( eLang
) ));
814 bool bRet
= !sURL
.isEmpty();
815 if( bRet
) // so, set attribute:
816 rDoc
.SetINetAttr( nSttPos
, nEndPos
, sURL
);
820 // Automatic *bold*, /italic/, -strikeout- and _underline_
821 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
825 // at the beginning: _, *, / or ~ after Space with the following !Space
826 // at the end: _, *, / or ~ before Space (word delimiter?)
828 sal_Unicode cInsChar
= rTxt
[ nEndPos
]; // underline, bold, italic or strikeout
829 if( ++nEndPos
!= rTxt
.getLength() &&
830 !IsWordDelim( rTxt
[ nEndPos
] ) )
835 bool bAlphaNum
= false;
836 sal_Int32 nPos
= nEndPos
;
837 sal_Int32 nFndPos
= -1;
838 CharClass
& rCC
= GetCharClass( LANGUAGE_SYSTEM
);
842 switch( sal_Unicode c
= rTxt
[ --nPos
] )
850 if( bAlphaNum
&& nPos
+1 < nEndPos
&& ( !nPos
||
851 IsWordDelim( rTxt
[ nPos
-1 ])) &&
852 !IsWordDelim( rTxt
[ nPos
+1 ]))
855 // Condition is not satisfied, so cancel
862 bAlphaNum
= rCC
.isLetterNumeric( rTxt
, nPos
);
868 // first delete the Character at the end - this allows insertion
869 // of an empty hint in SetAttr which would be removed by Delete
870 // (fdo#62536, AUTOFMT in Writer)
871 rDoc
.Delete( nEndPos
, nEndPos
+ 1 );
873 // Span the Attribute over the area
875 if( '*' == cInsChar
) // Bold
877 SvxWeightItem
aSvxWeightItem( WEIGHT_BOLD
, SID_ATTR_CHAR_WEIGHT
);
878 rDoc
.SetAttr( nFndPos
+ 1, nEndPos
,
879 SID_ATTR_CHAR_WEIGHT
,
882 else if( '/' == cInsChar
) // Italic
884 SvxPostureItem
aSvxPostureItem( ITALIC_NORMAL
, SID_ATTR_CHAR_POSTURE
);
885 rDoc
.SetAttr( nFndPos
+ 1, nEndPos
,
886 SID_ATTR_CHAR_POSTURE
,
889 else if( '-' == cInsChar
) // Strikeout
891 SvxCrossedOutItem
aSvxCrossedOutItem( STRIKEOUT_SINGLE
, SID_ATTR_CHAR_STRIKEOUT
);
892 rDoc
.SetAttr( nFndPos
+ 1, nEndPos
,
893 SID_ATTR_CHAR_STRIKEOUT
,
898 SvxUnderlineItem
aSvxUnderlineItem( LINESTYLE_SINGLE
, SID_ATTR_CHAR_UNDERLINE
);
899 rDoc
.SetAttr( nFndPos
+ 1, nEndPos
,
900 SID_ATTR_CHAR_UNDERLINE
,
903 rDoc
.Delete( nFndPos
, nFndPos
+ 1 );
906 return -1 != nFndPos
;
909 // Capitalize first letter of every sentence
910 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc
& rDoc
,
911 const OUString
& rTxt
, bool bNormalPos
,
912 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
916 if( rTxt
.isEmpty() || nEndPos
<= nSttPos
)
919 CharClass
& rCC
= GetCharClass( eLang
);
920 OUString
aText( rTxt
);
921 const sal_Unicode
*pStart
= aText
.getStr(),
922 *pStr
= pStart
+ nEndPos
,
926 bool bAtStart
= false;
929 if (rCC
.isLetter(aText
, pStr
- pStart
))
935 else if (pWordStt
&& !rCC
.isDigit(aText
, pStr
- pStart
))
937 if( (lcl_IsInArr( u
"-'", *pStr
) || *pStr
== cApostrophe
) && // These characters are allowed in words
938 pWordStt
- 1 == pStr
&&
939 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
940 (pStart
+ 1) <= pStr
&&
941 rCC
.isLetter(aText
, pStr
-1 - pStart
))
946 bAtStart
= (pStart
== pStr
);
947 } while( !bAtStart
);
950 return; // no character to be replaced
953 if (rCC
.isDigit(aText
, pStr
- pStart
))
954 return; // already ok
956 if (IsUpperLetter(rCC
.getCharacterType(aText
, pWordStt
- pStart
)))
957 return; // already ok
959 //See if the text is the start of a protocol string, e.g. have text of
960 //"http" see if it is the start of "http:" and if so leave it alone
961 sal_Int32 nIndex
= pWordStt
- pStart
;
962 sal_Int32 nProtocolLen
= pDelim
- pWordStt
+ 1;
963 if (nIndex
+ nProtocolLen
<= rTxt
.getLength())
965 if (INetURLObject::CompareProtocolScheme(rTxt
.subView(nIndex
, nProtocolLen
)) != INetProtocol::NotValid
)
966 return; // already ok
969 if (0x1 == *pWordStt
|| 0x2 == *pWordStt
)
970 return; // already ok
972 // Only capitalize, if string before specified characters is long enough
973 if( *pDelim
&& 2 >= pDelim
- pWordStt
&&
974 lcl_IsInArr( u
".-)>", *pDelim
) )
977 // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
978 if ( 1 == pDelim
- pWordStt
&& 0x03B1 <= *pWordStt
&& *pWordStt
<= 0x03C9 && eLang
!= LANGUAGE_GREEK
)
981 if( !bAtStart
) // Still no beginning of a paragraph?
983 if (NonFieldWordDelim(*pStr
))
987 bAtStart
= (pStart
== pStr
--);
988 if (bAtStart
|| !NonFieldWordDelim(*pStr
))
992 // Asian full stop, full width full stop, full width exclamation mark
993 // and full width question marks are treated as word delimiters
994 else if ( 0x3002 != *pStr
&& 0xFF0E != *pStr
&& 0xFF01 != *pStr
&&
996 return; // no valid separator -> no replacement
999 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1000 if (FindInWordStartExceptList(eLang
, OUString(pWordStt
, pDelim
- pWordStt
)))
1003 if( bAtStart
) // at the beginning of a paragraph?
1005 // Check out the previous paragraph, if it exists.
1006 // If so, then check to paragraph separator at the end.
1007 OUString
const*const pPrevPara
= rDoc
.GetPrevPara(bNormalPos
);
1010 // valid separator -> replace
1011 OUString
sChar( *pWordStt
);
1012 sChar
= rCC
.titlecase(sChar
); //see fdo#56740
1013 if (sChar
!= OUStringChar(*pWordStt
))
1014 rDoc
.ReplaceRange( pWordStt
- pStart
, 1, sChar
);
1020 pStart
= aText
.getStr();
1021 pStr
= pStart
+ aText
.getLength();
1023 do { // overwrite all blanks
1025 if (!NonFieldWordDelim(*pStr
))
1027 bAtStart
= (pStart
== pStr
);
1028 } while( !bAtStart
);
1031 return; // no valid separator -> no replacement
1034 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
1035 // all three can happen, but not more than once!
1036 const sal_Unicode
* pExceptStt
= nullptr;
1037 bool bContinue
= true;
1038 Flags nFlag
= Flags::NONE
;
1043 // Western and Asian full stop
1048 if (pStr
>= pStart
+ 2 && *(pStr
- 2) == '.')
1050 //e.g. text "f.o.o. word": Now currently considering
1051 //capitalizing word but second last character of
1052 //previous word is a . So probably last word is an
1053 //anagram that ends in . and not truly the end of a
1054 //previous sentence, so don't autocapitalize this word
1057 if (nFlag
& Flags::FullStop
)
1058 return; // no valid separator -> no replacement
1059 nFlag
|= Flags::FullStop
;
1066 if (nFlag
& Flags::ExclamationMark
)
1067 return; // no valid separator -> no replacement
1068 nFlag
|= Flags::ExclamationMark
;
1074 if (nFlag
& Flags::QuestionMark
)
1075 return; // no valid separator -> no replacement
1076 nFlag
|= Flags::QuestionMark
;
1080 if (nFlag
== Flags::NONE
)
1081 return; // no valid separator -> no replacement
1087 if (bContinue
&& pStr
-- == pStart
)
1089 return; // no valid separator -> no replacement
1091 } while (bContinue
);
1092 if (Flags::FullStop
!= nFlag
)
1093 pExceptStt
= nullptr;
1095 // Only capitalize, if string is long enough
1096 if( 2 > ( pStr
- pStart
) )
1099 if (!rCC
.isLetterNumeric(aText
, pStr
-- - pStart
))
1101 bool bValid
= false, bAlphaFnd
= false;
1102 const sal_Unicode
* pTmpStr
= pStr
;
1105 if( rCC
.isDigit( aText
, pTmpStr
- pStart
) )
1110 else if( rCC
.isLetter( aText
, pTmpStr
- pStart
) )
1120 else if (bAlphaFnd
|| NonFieldWordDelim(*pTmpStr
))
1123 if( pTmpStr
== pStart
)
1130 return; // no valid separator -> no replacement
1133 bool bNumericOnly
= '0' <= *(pStr
+1) && *(pStr
+1) <= '9';
1135 // Search for the beginning of the word
1136 while (!NonFieldWordDelim(*pStr
))
1138 if( bNumericOnly
&& rCC
.isLetter( aText
, pStr
- pStart
) )
1139 bNumericOnly
= false;
1141 if( pStart
== pStr
)
1147 if( bNumericOnly
) // consists of only numbers, then not
1150 if (NonFieldWordDelim(*pStr
))
1155 // check on the basis of the exception list
1158 sWord
= OUString(pStr
, pExceptStt
- pStr
+ 1);
1159 if( FindInCplSttExceptList(eLang
, sWord
) )
1162 // Delete all non alphanumeric. Test the characters at the
1163 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1164 OUString
sTmp( sWord
);
1165 while( !sTmp
.isEmpty() &&
1166 !rCC
.isLetterNumeric( sTmp
, 0 ) )
1167 sTmp
= sTmp
.copy(1);
1169 // Remove all non alphanumeric characters towards the end up until
1171 sal_Int32 nLen
= sTmp
.getLength();
1172 while( nLen
&& !rCC
.isLetterNumeric( sTmp
, nLen
-1 ) )
1174 if( nLen
+ 1 < sTmp
.getLength() )
1175 sTmp
= sTmp
.copy( 0, nLen
+ 1 );
1177 if( !sTmp
.isEmpty() && sTmp
.getLength() != sWord
.getLength() &&
1178 FindInCplSttExceptList(eLang
, sTmp
))
1181 if(FindInCplSttExceptList(eLang
, sWord
, true))
1186 sal_Unicode cSave
= *pWordStt
;
1187 nSttPos
= pWordStt
- rTxt
.getStr();
1188 OUString sChar
= rCC
.titlecase(OUString(cSave
)); //see fdo#56740
1189 bool bRet
= sChar
[0] != cSave
&& rDoc
.ReplaceRange( nSttPos
, 1, sChar
);
1191 // Perhaps someone wants to have the word
1192 if( bRet
&& ACFlags::SaveWordCplSttLst
& nFlags
)
1193 rDoc
.SaveCpltSttWord( ACFlags::CapitalStartSentence
, nSttPos
, sWord
, cSave
);
1196 // Correct accidental use of cAPS LOCK key
1197 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
1198 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
1199 LanguageType eLang
)
1201 if (nEndPos
- nSttPos
< 2)
1202 // string must be at least 2-character long.
1205 CharClass
& rCC
= GetCharClass( eLang
);
1207 // Check the first 2 letters.
1208 if ( !IsLowerLetter(rCC
.getCharacterType(rTxt
, nSttPos
)) )
1211 if ( !IsUpperLetter(rCC
.getCharacterType(rTxt
, nSttPos
+1)) )
1214 OUStringBuffer aConverted
;
1215 aConverted
.append( rCC
.uppercase(OUString(rTxt
[nSttPos
])) );
1216 aConverted
.append( rCC
.lowercase(OUString(rTxt
[nSttPos
+1])) );
1218 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1219 if (FindInWordStartExceptList(eLang
, rTxt
.copy(nSttPos
, nEndPos
- nSttPos
)))
1222 for( sal_Int32 i
= nSttPos
+2; i
< nEndPos
; ++i
)
1224 if ( IsLowerLetter(rCC
.getCharacterType(rTxt
, i
)) )
1225 // A lowercase letter disqualifies the whole text.
1228 if ( IsUpperLetter(rCC
.getCharacterType(rTxt
, i
)) )
1229 // Another uppercase letter. Convert it.
1230 aConverted
.append( rCC
.lowercase(OUString(rTxt
[i
])) );
1232 // This is not an alphabetic letter. Leave it as-is.
1233 aConverted
.append( rTxt
[i
] );
1236 // Replace the word.
1237 rDoc
.Delete(nSttPos
, nEndPos
);
1238 rDoc
.Insert(nSttPos
, aConverted
.makeStringAndClear());
1244 sal_Unicode
SvxAutoCorrect::GetQuote( sal_Unicode cInsChar
, bool bSttQuote
,
1245 LanguageType eLang
) const
1247 sal_Unicode cRet
= bSttQuote
? ( '\"' == cInsChar
1248 ? GetStartDoubleQuote()
1249 : GetStartSingleQuote() )
1250 : ( '\"' == cInsChar
1251 ? GetEndDoubleQuote()
1252 : GetEndSingleQuote() );
1255 // then through the Language find the right character
1256 if( LANGUAGE_NONE
== eLang
)
1260 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1261 OUString
sRet( bSttQuote
1262 ? ( '\"' == cInsChar
1263 ? rLcl
.getDoubleQuotationMarkStart()
1264 : rLcl
.getQuotationMarkStart() )
1265 : ( '\"' == cInsChar
1266 ? rLcl
.getDoubleQuotationMarkEnd()
1267 : rLcl
.getQuotationMarkEnd() ));
1268 cRet
= !sRet
.isEmpty() ? sRet
[0] : cInsChar
;
1274 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc
& rDoc
, sal_Int32 nInsPos
,
1275 sal_Unicode cInsChar
, bool bSttQuote
,
1276 bool bIns
, LanguageType eLang
, ACQuotes eType
) const
1280 if ( eType
== ACQuotes::DoubleAngleQuote
)
1282 bool bSwiss
= eLang
== LANGUAGE_FRENCH_SWISS
;
1283 // pressing " inside a quotation -> use second level angle quotes
1284 bool bLeftQuote
= '\"' == cInsChar
&&
1285 // start position and Romanian OR
1286 // not start position and Hungarian
1287 bSttQuote
== (eLang
!= LANGUAGE_HUNGARIAN
);
1288 cRet
= ( '<' == cInsChar
|| bLeftQuote
)
1289 ? ( bSwiss
? cLeftSingleAngleQuote
: cLeftDoubleAngleQuote
)
1290 : ( bSwiss
? cRightSingleAngleQuote
: cRightDoubleAngleQuote
);
1292 else if ( eType
== ACQuotes::UseApostrophe
)
1295 cRet
= GetQuote( cInsChar
, bSttQuote
, eLang
);
1297 OUString
sChg( cInsChar
);
1299 rDoc
.Insert( nInsPos
, sChg
);
1301 rDoc
.Replace( nInsPos
, sChg
);
1303 sChg
= OUString(cRet
);
1305 if( eType
== ACQuotes::NonBreakingSpace
)
1307 if( rDoc
.Insert( bSttQuote
? nInsPos
+1 : nInsPos
, OUStringChar(cNonBreakingSpace
) ))
1313 else if( eType
== ACQuotes::DoubleAngleQuote
&& cInsChar
!= '\"' )
1315 rDoc
.Delete( nInsPos
-1, nInsPos
);
1319 rDoc
.Replace( nInsPos
, sChg
);
1321 // i' -> I' in English (last step for the Undo)
1322 if( eType
== ACQuotes::CapitalizeIAm
)
1323 rDoc
.Replace( nInsPos
-1, u
"I"_ustr
);
1326 OUString
SvxAutoCorrect::GetQuote( SvxAutoCorrDoc
const & rDoc
, sal_Int32 nInsPos
,
1327 sal_Unicode cInsChar
, bool bSttQuote
)
1329 const LanguageType eLang
= GetDocLanguage( rDoc
, nInsPos
);
1330 sal_Unicode cRet
= GetQuote( cInsChar
, bSttQuote
, eLang
);
1332 OUString
sRet(cRet
);
1334 if( '\"' == cInsChar
)
1336 if (primary(eLang
) == primary(LANGUAGE_FRENCH
) && eLang
!= LANGUAGE_FRENCH_SWISS
)
1347 // search preceding opening quote in the paragraph before the insert position
1348 static bool lcl_HasPrecedingChar( std::u16string_view rTxt
, sal_Int32 nPos
,
1349 const sal_Unicode sPrecedingChar
, const sal_Unicode sStopChar
, const sal_Unicode
* aStopChars
)
1351 sal_Unicode cTmpChar
;
1354 cTmpChar
= rTxt
[ --nPos
];
1355 if ( cTmpChar
== sPrecedingChar
)
1358 if ( cTmpChar
== sStopChar
)
1361 for ( const sal_Unicode
* pCh
= aStopChars
; *pCh
; ++pCh
)
1362 if ( cTmpChar
== *pCh
)
1365 } while ( nPos
> 0 );
1370 // WARNING: rText may become invalid, see comment below
1371 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
1372 sal_Int32 nInsPos
, sal_Unicode cChar
,
1373 bool bInsert
, bool& io_bNbspRunNext
, vcl::Window
const * pFrameWin
)
1375 bool bIsNextRun
= io_bNbspRunNext
;
1376 io_bNbspRunNext
= false; // if it was set, then it has to be turned off
1378 do{ // only for middle check loop !!
1381 // Prevent double space
1382 if( nInsPos
&& ' ' == cChar
&&
1383 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace
) &&
1384 ' ' == rTxt
[ nInsPos
- 1 ])
1389 bool bSingle
= '\'' == cChar
;
1390 bool bIsReplaceQuote
=
1391 (IsAutoCorrFlag( ACFlags::ChgQuotes
) && ('\"' == cChar
)) ||
1392 (IsAutoCorrFlag( ACFlags::ChgSglQuotes
) && bSingle
);
1393 if( bIsReplaceQuote
)
1395 bool bSttQuote
= !nInsPos
;
1396 ACQuotes eType
= ACQuotes::NONE
;
1397 const LanguageType eLang
= GetDocLanguage( rDoc
, nInsPos
);
1400 sal_Unicode cPrev
= rTxt
[ nInsPos
-1 ];
1401 bSttQuote
= NonFieldWordDelim(cPrev
) ||
1402 lcl_IsInArr( u
"([{", cPrev
) ||
1403 ( cEmDash
== cPrev
) ||
1404 ( cEnDash
== cPrev
);
1405 // tdf#38394 use opening quotation mark << in French l'<<word>>
1406 if ( !bSingle
&& !bSttQuote
&& cPrev
== cApostrophe
&&
1407 primary(eLang
) == primary(LANGUAGE_FRENCH
) &&
1408 ( ( ( nInsPos
== 2 || ( nInsPos
> 2 && IsWordDelim( rTxt
[ nInsPos
-3 ] ) ) ) &&
1409 // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1410 u
"cdjlnmtsCDJLNMTS"_ustr
.indexOf( rTxt
[ nInsPos
-2 ] ) > -1 ) ||
1411 ( ( nInsPos
== 3 || (nInsPos
> 3 && IsWordDelim( rTxt
[ nInsPos
-4 ] ) ) ) &&
1412 // abbreviated form of que
1413 ( rTxt
[ nInsPos
-2 ] == 'u' || rTxt
[ nInsPos
-2 ] == 'U' ) &&
1414 ( rTxt
[ nInsPos
-3 ] == 'q' || rTxt
[ nInsPos
-3 ] == 'Q' ) ) ) )
1418 // tdf#108423 for capitalization of English i'm
1419 else if ( bSingle
&& ( cPrev
== 'i' ) &&
1420 primary(eLang
) == primary(LANGUAGE_ENGLISH
) &&
1421 ( nInsPos
== 1 || IsWordDelim( rTxt
[ nInsPos
-2 ] ) ) )
1423 eType
= ACQuotes::CapitalizeIAm
;
1425 // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1426 else if ( !bSingle
&& nInsPos
&&
1427 ( ( eLang
== LANGUAGE_HUNGARIAN
&&
1428 lcl_HasPrecedingChar( rTxt
, nInsPos
,
1429 bSttQuote
? aStopDoubleAngleQuoteStart
[0] : aStopDoubleAngleQuoteEnd
[0],
1430 bSttQuote
? aStopDoubleAngleQuoteStart
[1] : aStopDoubleAngleQuoteEnd
[1],
1431 bSttQuote
? aStopDoubleAngleQuoteStart
+ 1 : aStopDoubleAngleQuoteEnd
+ 2 ) ) ||
1434 LANGUAGE_ROMANIAN_MOLDOVA
) &&
1435 lcl_HasPrecedingChar( rTxt
, nInsPos
,
1436 bSttQuote
? aStopDoubleAngleQuoteStart
[0] : aStopDoubleAngleQuoteEndRo
[0],
1437 bSttQuote
? aStopDoubleAngleQuoteStart
[1] : aStopDoubleAngleQuoteEndRo
[1],
1438 bSttQuote
? aStopDoubleAngleQuoteStart
+ 1 : aStopDoubleAngleQuoteEndRo
+ 2 ) ) ) )
1440 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1441 // only if the opening double quotation mark is the default one
1442 if ( rLcl
.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart
[0]) )
1443 eType
= ACQuotes::DoubleAngleQuote
;
1445 else if ( bSingle
&& nInsPos
&& !bSttQuote
&&
1446 // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1447 // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1448 // tdf#123786 the same for Russian and Ukrainian
1452 LANGUAGE_GERMAN_SWISS
,
1453 LANGUAGE_GERMAN_AUSTRIAN
,
1454 LANGUAGE_GERMAN_LUXEMBOURG
,
1455 LANGUAGE_GERMAN_LIECHTENSTEIN
,
1458 LANGUAGE_SLOVENIAN
) ) )
1460 sal_Unicode sStartChar
= GetStartSingleQuote();
1461 sal_Unicode sEndChar
= GetEndSingleQuote();
1462 if ( !sStartChar
|| !sEndChar
) {
1463 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1464 if ( !sStartChar
) sStartChar
= rLcl
.getQuotationMarkStart()[0];
1465 if ( !sEndChar
) sEndChar
= rLcl
.getQuotationMarkStart()[0];
1467 if ( !lcl_HasPrecedingChar( rTxt
, nInsPos
, sStartChar
, sEndChar
, aStopSingleQuoteEnd
+ 1 ) )
1469 CharClass
& rCC
= GetCharClass( eLang
);
1470 if ( rCC
.isLetter(rTxt
, nInsPos
-1) )
1472 eType
= ACQuotes::UseApostrophe
;
1476 else if ( bSingle
&& nInsPos
&& !bSttQuote
&&
1479 LANGUAGE_UKRAINIAN
) &&
1480 !lcl_HasPrecedingChar( rTxt
, nInsPos
, aStopSingleQuoteEndRuUa
[0], aStopSingleQuoteEndRuUa
[1], aStopSingleQuoteEndRuUa
+ 2 ) ) )
1482 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1483 CharClass
& rCC
= GetCharClass( eLang
);
1484 if ( rLcl
.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa
[0]) &&
1485 // use apostrophe only after letters, not after digits or punctuation
1486 rCC
.isLetter(rTxt
, nInsPos
-1) )
1488 eType
= ACQuotes::UseApostrophe
;
1493 if ( eType
== ACQuotes::NONE
&& !bSingle
&&
1494 ( primary(eLang
) == primary(LANGUAGE_FRENCH
) && eLang
!= LANGUAGE_FRENCH_SWISS
) )
1495 eType
= ACQuotes::NonBreakingSpace
;
1497 InsertQuote( rDoc
, nInsPos
, cChar
, bSttQuote
, bInsert
, eLang
, eType
);
1500 // tdf#133524 change "<<" and ">>" to double angle quotation marks
1501 else if ( IsAutoCorrFlag( ACFlags::ChgQuotes
) &&
1502 IsAutoCorrFlag( ACFlags::ChgAngleQuotes
) &&
1503 ('<' == cChar
|| '>' == cChar
) &&
1504 nInsPos
> 0 && cChar
== rTxt
[ nInsPos
-1 ] )
1506 const LanguageType eLang
= GetDocLanguage( rDoc
, nInsPos
);
1508 LANGUAGE_CATALAN
, // primary level
1509 LANGUAGE_CATALAN_VALENCIAN
, // primary level
1510 LANGUAGE_FINNISH
, // alternative primary level
1511 LANGUAGE_FRENCH_SWISS
, // second level
1512 LANGUAGE_GALICIAN
, // primary level
1513 LANGUAGE_HUNGARIAN
, // second level
1514 LANGUAGE_POLISH
, // second level
1515 LANGUAGE_PORTUGUESE
, // primary level
1516 LANGUAGE_PORTUGUESE_BRAZILIAN
, // primary level
1517 LANGUAGE_ROMANIAN
, // second level
1518 LANGUAGE_ROMANIAN_MOLDOVA
, // second level
1519 LANGUAGE_SWEDISH
, // alternative primary level
1520 LANGUAGE_SWEDISH_FINLAND
, // alternative primary level
1521 LANGUAGE_UKRAINIAN
, // primary level
1522 LANGUAGE_USER_ARAGONESE
, // primary level
1523 LANGUAGE_USER_ASTURIAN
) || // primary level
1524 primary(eLang
) == primary(LANGUAGE_GERMAN
) || // alternative primary level
1525 primary(eLang
) == primary(LANGUAGE_SPANISH
) ) // primary level
1527 InsertQuote( rDoc
, nInsPos
, cChar
, false, bInsert
, eLang
, ACQuotes::DoubleAngleQuote
);
1533 rDoc
.Insert( nInsPos
, OUString(cChar
) );
1535 rDoc
.Replace( nInsPos
, OUString(cChar
) );
1537 // Hardspaces autocorrection
1538 if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace
) )
1540 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1541 // and its length may change (even become shorter) if FnAddNonBrkSpace succeeds!
1542 sal_Int32 nUpdatedPos
= -1;
1543 if (NeedsHardspaceAutocorr(cChar
))
1544 nUpdatedPos
= FnAddNonBrkSpace( rDoc
, rTxt
, nInsPos
, GetDocLanguage( rDoc
, nInsPos
), io_bNbspRunNext
);
1545 if (nUpdatedPos
>= 0)
1547 nInsPos
= nUpdatedPos
;
1549 else if ( bIsNextRun
&& !IsAutoCorrectChar( cChar
) )
1551 // Remove the NBSP if it wasn't an autocorrection
1552 if ( nInsPos
!= 0 && NeedsHardspaceAutocorr( rTxt
[ nInsPos
- 1 ] ) &&
1553 cChar
!= ' ' && cChar
!= '\t' && cChar
!= cNonBreakingSpace
)
1555 // Look for the last HARD_SPACE
1556 sal_Int32 nPos
= nInsPos
- 1;
1557 bool bContinue
= true;
1560 const sal_Unicode cTmpChar
= rTxt
[ nPos
];
1561 if ( cTmpChar
== cNonBreakingSpace
)
1563 rDoc
.Delete( nPos
, nPos
+ 1 );
1566 else if ( !NeedsHardspaceAutocorr( cTmpChar
) || nPos
== 0 )
1578 sal_Int32 nPos
= nInsPos
- 1;
1580 if( IsWordDelim( rTxt
[ nPos
]))
1583 // Set bold or underline automatically?
1584 if (('*' == cChar
|| '_' == cChar
|| '/' == cChar
|| '-' == cChar
) && (nPos
+1 < rTxt
.getLength()))
1586 if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl
) )
1588 FnChgWeightUnderl( rDoc
, rTxt
, nPos
+1 );
1593 while( nPos
&& !IsWordDelim( rTxt
[ --nPos
]))
1596 // Found a Paragraph-start or a Blank, search for the word shortcut in
1598 sal_Int32 nCapLttrPos
= nPos
+1; // on the 1st Character
1599 if( !nPos
&& !IsWordDelim( rTxt
[ 0 ]))
1600 --nCapLttrPos
; // begin of paragraph and no blank
1602 const LanguageType eLang
= GetDocLanguage( rDoc
, nCapLttrPos
);
1603 CharClass
& rCC
= GetCharClass( eLang
);
1605 // no symbol characters
1606 if( lcl_IsSymbolChar( rCC
, rTxt
, nCapLttrPos
, nInsPos
))
1609 if( IsAutoCorrFlag( ACFlags::Autocorrect
) &&
1610 // tdf#134940 fix regression of arrow "-->" resulted by premature
1611 // replacement of "--" since '>' was added to IsAutoCorrectChar()
1614 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1615 // and becomes INVALID if ChgAutoCorrWord returns true!
1616 // => use aPara/pPara to create a valid copy of the string!
1618 OUString
* pPara
= IsAutoCorrFlag(ACFlags::CapitalStartSentence
) ? &aPara
: nullptr;
1620 bool bChgWord
= rDoc
.ChgAutoCorrWord( nCapLttrPos
, nInsPos
,
1624 sal_Int32 nCapLttrPos1
= nCapLttrPos
, nInsPos1
= nInsPos
;
1625 while( nCapLttrPos1
< nInsPos
&&
1626 lcl_IsInArr( sImplSttSkipChars
, rTxt
[ nCapLttrPos1
] )
1629 while( nCapLttrPos1
< nInsPos1
&& nInsPos1
&&
1630 lcl_IsInArr( sImplEndSkipChars
, rTxt
[ nInsPos1
-1 ] )
1634 if( (nCapLttrPos1
!= nCapLttrPos
|| nInsPos1
!= nInsPos
) &&
1635 nCapLttrPos1
< nInsPos1
&&
1636 rDoc
.ChgAutoCorrWord( nCapLttrPos1
, nInsPos1
, *this, pPara
))
1639 nCapLttrPos
= nCapLttrPos1
;
1645 if( !aPara
.isEmpty() )
1647 sal_Int32 nEnd
= nCapLttrPos
;
1648 while( nEnd
< aPara
.getLength() &&
1649 !IsWordDelim( aPara
[ nEnd
]))
1652 // Capital letter at beginning of paragraph?
1653 if( IsAutoCorrFlag( ACFlags::CapitalStartSentence
) )
1655 FnCapitalStartSentence( rDoc
, aPara
, false,
1656 nCapLttrPos
, nEnd
, eLang
);
1659 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash
) )
1661 FnChgToEnEmDash( rDoc
, aPara
, nCapLttrPos
, nEnd
, eLang
);
1668 if( IsAutoCorrFlag( ACFlags::TransliterateRTL
) && GetDocLanguage( rDoc
, nInsPos
) == LANGUAGE_HUNGARIAN
)
1670 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1671 // and becomes INVALID if TransliterateRTLWord returns true!
1672 if ( rDoc
.TransliterateRTLWord( nCapLttrPos
, nInsPos
) )
1676 if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber
) &&
1677 (nInsPos
>= 2 ) && // fdo#69762 avoid autocorrect for 2e-3
1678 ( '-' != cChar
|| 'E' != rtl::toAsciiUpperCase(rTxt
[nInsPos
-1]) || '0' > rTxt
[nInsPos
-2] || '9' < rTxt
[nInsPos
-2] ) &&
1679 FnChgOrdinalNumber( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) ) ||
1680 ( IsAutoCorrFlag( ACFlags::SetINetAttr
) &&
1681 ( ' ' == cChar
|| '\t' == cChar
|| 0x0a == cChar
|| !cChar
) &&
1682 FnSetINetAttr( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) ) ||
1683 ( IsAutoCorrFlag( ACFlags::SetDOIAttr
) &&
1684 ( ' ' == cChar
|| '\t' == cChar
|| 0x0a == cChar
|| !cChar
) &&
1685 FnSetDOIAttr( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) ) )
1689 bool bLockKeyOn
= pFrameWin
&& (pFrameWin
->GetIndicatorState() & KeyIndicatorState::CAPSLOCK
);
1690 bool bUnsupported
= lcl_IsUnsupportedUnicodeChar( rCC
, rTxt
, nCapLttrPos
, nInsPos
);
1692 if ( bLockKeyOn
&& IsAutoCorrFlag( ACFlags::CorrectCapsLock
) &&
1693 FnCorrectCapsLock( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) )
1695 // Correct accidental use of cAPS LOCK key (do this only when
1696 // the caps or shift lock key is pressed). Turn off the caps
1698 pFrameWin
->SimulateKeyPress( KEY_CAPSLOCK
);
1701 // Capital letter at beginning of paragraph ?
1702 if( !bUnsupported
&&
1703 IsAutoCorrFlag( ACFlags::CapitalStartSentence
) )
1705 FnCapitalStartSentence( rDoc
, rTxt
, true, nCapLttrPos
, nInsPos
, eLang
);
1708 // Two capital letters at beginning of word ??
1709 if( !bUnsupported
&&
1710 IsAutoCorrFlag( ACFlags::CapitalStartWord
) )
1712 FnCapitalStartWord( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
);
1715 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash
) )
1717 FnChgToEnEmDash( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
);
1724 SvxAutoCorrectLanguageLists
& SvxAutoCorrect::GetLanguageList_(
1725 LanguageType eLang
)
1727 LanguageTag
aLanguageTag( eLang
);
1728 if (m_aLangTable
.find(aLanguageTag
) == m_aLangTable
.end())
1729 (void)CreateLanguageFile(aLanguageTag
);
1730 const auto iter
= m_aLangTable
.find(aLanguageTag
);
1731 assert(iter
!= m_aLangTable
.end());
1732 return iter
->second
;
1735 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang
)
1737 auto const iter
= m_aLangTable
.find(LanguageTag(eLang
));
1738 if (iter
!= m_aLangTable
.end())
1739 iter
->second
.SaveCplSttExceptList();
1742 SAL_WARN("editeng", "Save an empty list? ");
1746 void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang
)
1748 auto const iter
= m_aLangTable
.find(LanguageTag(eLang
));
1749 if (iter
!= m_aLangTable
.end())
1750 iter
->second
.SaveWordStartExceptList();
1753 SAL_WARN("editeng", "Save an empty list? ");
1757 // Adds a single word. The list will immediately be written to the file!
1758 bool SvxAutoCorrect::AddCplSttException( const OUString
& rNew
,
1759 LanguageType eLang
)
1761 SvxAutoCorrectLanguageLists
* pLists
= nullptr;
1762 // either the right language is present or it will be this in the general list
1763 auto iter
= m_aLangTable
.find(LanguageTag(eLang
));
1764 if (iter
!= m_aLangTable
.end())
1765 pLists
= &iter
->second
;
1768 LanguageTag
aLangTagUndetermined( LANGUAGE_UNDETERMINED
);
1769 iter
= m_aLangTable
.find(aLangTagUndetermined
);
1770 if (iter
!= m_aLangTable
.end())
1771 pLists
= &iter
->second
;
1772 else if(CreateLanguageFile(aLangTagUndetermined
))
1774 iter
= m_aLangTable
.find(aLangTagUndetermined
);
1775 assert(iter
!= m_aLangTable
.end());
1776 pLists
= &iter
->second
;
1779 OSL_ENSURE(pLists
, "No auto correction data");
1780 return pLists
&& pLists
->AddToCplSttExceptList(rNew
);
1783 // Adds a single word. The list will immediately be written to the file!
1784 bool SvxAutoCorrect::AddWordStartException( const OUString
& rNew
,
1785 LanguageType eLang
)
1787 SvxAutoCorrectLanguageLists
* pLists
= nullptr;
1788 //either the right language is present or it is set in the general list
1789 auto iter
= m_aLangTable
.find(LanguageTag(eLang
));
1790 if (iter
!= m_aLangTable
.end())
1791 pLists
= &iter
->second
;
1794 LanguageTag
aLangTagUndetermined( LANGUAGE_UNDETERMINED
);
1795 iter
= m_aLangTable
.find(aLangTagUndetermined
);
1796 if (iter
!= m_aLangTable
.end())
1797 pLists
= &iter
->second
;
1798 else if(CreateLanguageFile(aLangTagUndetermined
))
1800 iter
= m_aLangTable
.find(aLangTagUndetermined
);
1801 assert(iter
!= m_aLangTable
.end());
1802 pLists
= &iter
->second
;
1805 OSL_ENSURE(pLists
, "No auto correction file!");
1806 return pLists
&& pLists
->AddToWordStartExceptList(rNew
);
1809 OUString
SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc
const& rDoc
, const OUString
& rTxt
,
1816 sal_Int32 nEnd
= nPos
;
1818 // it must be followed by a blank or tab!
1819 if( ( nPos
< rTxt
.getLength() &&
1820 !IsWordDelim( rTxt
[ nPos
])) ||
1821 IsWordDelim( rTxt
[ --nPos
]))
1824 while( nPos
&& !IsWordDelim( rTxt
[ --nPos
]))
1827 // Found a Paragraph-start or a Blank, search for the word shortcut in
1829 sal_Int32 nCapLttrPos
= nPos
+1; // on the 1st Character
1830 if( !nPos
&& !IsWordDelim( rTxt
[ 0 ]))
1831 --nCapLttrPos
; // Beginning of paragraph and no Blank!
1833 while( lcl_IsInArr( sImplSttSkipChars
, rTxt
[ nCapLttrPos
]) )
1834 if( ++nCapLttrPos
>= nEnd
)
1837 if( 3 > nEnd
- nCapLttrPos
)
1840 const LanguageType eLang
= GetDocLanguage( rDoc
, nCapLttrPos
);
1842 CharClass
& rCC
= GetCharClass(eLang
);
1844 if( lcl_IsSymbolChar( rCC
, rTxt
, nCapLttrPos
, nEnd
))
1847 sRet
= rTxt
.copy( nCapLttrPos
, nEnd
- nCapLttrPos
);
1852 std::vector
<OUString
> SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt
,
1853 const sal_Int32 nPos
)
1855 constexpr sal_Int32 nMinLen
= 3;
1856 constexpr sal_Int32 nMaxLen
= 9;
1857 std::vector
<OUString
> aRes
;
1858 if (nPos
>= nMinLen
)
1860 sal_Int32 nBegin
= std::max
<sal_Int32
>(nPos
- nMaxLen
, 0);
1861 // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1862 if (nBegin
> 0 && !IsWordDelim(rTxt
[nBegin
-1]))
1864 while (nBegin
+ nMinLen
<= nPos
&& !IsWordDelim(rTxt
[nBegin
]))
1867 if (nBegin
+ nMinLen
<= nPos
)
1869 OUString
sRes( rTxt
.substr(nBegin
, nPos
- nBegin
) );
1870 aRes
.push_back(sRes
);
1871 bool bLastStartedWithDelim
= IsWordDelim(sRes
[0]);
1872 for (sal_Int32 i
= 1; i
<= sRes
.getLength() - nMinLen
; ++i
)
1874 bool bAdd
= bLastStartedWithDelim
;
1875 bLastStartedWithDelim
= IsWordDelim(sRes
[i
]);
1876 bAdd
= bAdd
|| bLastStartedWithDelim
;
1878 aRes
.push_back(sRes
.copy(i
));
1885 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag
& rLanguageTag
, bool bNewFile
)
1887 OSL_ENSURE(m_aLangTable
.find(rLanguageTag
) == m_aLangTable
.end(), "Language already exists ");
1889 OUString
sUserDirFile( GetAutoCorrFileName( rLanguageTag
, true ));
1890 OUString
sShareDirFile( sUserDirFile
);
1892 SvxAutoCorrectLanguageLists
* pLists
= nullptr;
1894 tools::Time
nAktTime(tools::Time::SYSTEM
);
1896 auto nFndPos
= aLastFileTable
.find(rLanguageTag
);
1897 bool lastCheckLessThan2MinutesAgo
= nFndPos
!= aLastFileTable
.end();
1898 if (lastCheckLessThan2MinutesAgo
)
1900 const tools::Time
nLastCheckTime(tools::Time::fromEncodedTime(nFndPos
->second
));
1901 lastCheckLessThan2MinutesAgo
1902 = nLastCheckTime
< nAktTime
&& nAktTime
- nLastCheckTime
< tools::Time(0, 2);
1904 if (lastCheckLessThan2MinutesAgo
)
1906 // no need to test the file, because the last check is not older then
1910 sShareDirFile
= sUserDirFile
;
1911 auto itBool
= m_aLangTable
.emplace(std::piecewise_construct
,
1912 std::forward_as_tuple(rLanguageTag
),
1913 std::forward_as_tuple(*this, sShareDirFile
, sUserDirFile
));
1914 pLists
= &itBool
.first
->second
;
1915 aLastFileTable
.erase(nFndPos
);
1919 ( FStatHelper::IsDocument( sUserDirFile
) ||
1920 FStatHelper::IsDocument( sShareDirFile
=
1921 GetAutoCorrFileName( rLanguageTag
) ) ||
1922 FStatHelper::IsDocument( sShareDirFile
=
1923 GetAutoCorrFileName( rLanguageTag
, false, false, true) )
1925 ( sShareDirFile
= sUserDirFile
, bNewFile
)
1928 auto itBool
= m_aLangTable
.emplace(std::piecewise_construct
,
1929 std::forward_as_tuple(rLanguageTag
),
1930 std::forward_as_tuple(*this, sShareDirFile
, sUserDirFile
));
1931 pLists
= &itBool
.first
->second
;
1932 if (nFndPos
!= aLastFileTable
.end())
1933 aLastFileTable
.erase(nFndPos
);
1935 else if( !bNewFile
)
1937 aLastFileTable
[rLanguageTag
] = nAktTime
.GetTime();
1939 return pLists
!= nullptr;
1942 bool SvxAutoCorrect::PutText( const OUString
& rShort
, const OUString
& rLong
,
1943 LanguageType eLang
)
1945 LanguageTag
aLanguageTag( eLang
);
1946 if (auto const iter
= m_aLangTable
.find(aLanguageTag
); iter
!= m_aLangTable
.end())
1947 return iter
->second
.PutText(rShort
, rLong
);
1948 if (CreateLanguageFile(aLanguageTag
))
1950 auto const iter
= m_aLangTable
.find(aLanguageTag
);
1951 assert (iter
!= m_aLangTable
.end());
1952 return iter
->second
.PutText(rShort
, rLong
);
1957 void SvxAutoCorrect::MakeCombinedChanges( std::vector
<SvxAutocorrWord
>& aNewEntries
,
1958 std::vector
<SvxAutocorrWord
>& aDeleteEntries
,
1959 LanguageType eLang
)
1961 LanguageTag
aLanguageTag( eLang
);
1962 auto iter
= m_aLangTable
.find(aLanguageTag
);
1963 if (iter
!= m_aLangTable
.end())
1965 iter
->second
.MakeCombinedChanges( aNewEntries
, aDeleteEntries
);
1967 else if(CreateLanguageFile( aLanguageTag
))
1969 iter
= m_aLangTable
.find(aLanguageTag
);
1970 assert(iter
!= m_aLangTable
.end());
1971 iter
->second
.MakeCombinedChanges( aNewEntries
, aDeleteEntries
);
1975 // - return the replacement text (only for SWG-Format, all other
1976 // can be taken from the word list!)
1977 bool SvxAutoCorrect::GetLongText( const OUString
&, OUString
& )
1982 void SvxAutoCorrect::refreshBlockList( const uno::Reference
< embed::XStorage
>& )
1986 // Text with attribution (only the SWG - SWG format!)
1987 bool SvxAutoCorrect::PutText( const css::uno::Reference
< css::embed::XStorage
>&,
1988 const OUString
&, const OUString
&, SfxObjectShell
&, OUString
& )
1993 OUString
EncryptBlockName_Imp(std::u16string_view rName
)
1995 OUStringBuffer aName
;
1996 aName
.append('#').append(rName
);
1997 for (size_t nLen
= rName
.size(), nPos
= 1; nPos
< nLen
; ++nPos
)
1999 if (lcl_IsInArr( u
"!/:.\\", aName
[nPos
]))
2000 aName
[nPos
] &= 0x0f;
2002 return aName
.makeStringAndClear();
2005 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
2006 static void GeneratePackageName ( std::u16string_view rShort
, OUString
& rPackageName
)
2008 OString
sByte(OUStringToOString(rShort
, RTL_TEXTENCODING_UTF7
));
2009 OUStringBuffer
aBuf(OStringToOUString(sByte
, RTL_TEXTENCODING_ASCII_US
));
2011 for (sal_Int32 nPos
= 0; nPos
< aBuf
.getLength(); ++nPos
)
2020 // tdf#156769 - escape the question mark in the storage name
2029 rPackageName
= aBuf
.makeStringAndClear();
2032 static std::optional
<SvxAutocorrWordList::WordSearchStatus
>
2033 lcl_SearchWordsInList( SvxAutoCorrectLanguageLists
* pList
,
2034 std::u16string_view rTxt
,
2035 sal_Int32
& rStt
, sal_Int32 nEndPos
)
2037 const SvxAutocorrWordList
* pAutoCorrWordList
= pList
->GetAutocorrWordList();
2038 return pAutoCorrWordList
->SearchWordsInList( rTxt
, rStt
, nEndPos
);
2041 // the search for the words in the substitution table
2042 std::optional
<SvxAutocorrWordList::WordSearchStatus
>
2043 SvxAutoCorrect::SearchWordsInList(
2044 std::u16string_view rTxt
, sal_Int32
& rStt
, sal_Int32 nEndPos
,
2045 SvxAutoCorrDoc
&, LanguageTag
& rLang
)
2047 LanguageTag
aLanguageTag( rLang
);
2048 if( aLanguageTag
.isSystemLocale() )
2049 aLanguageTag
.reset( MsLangId::getConfiguredSystemLanguage());
2051 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
2054 // First search for eLang, then US-English -> English
2055 // and last in LANGUAGE_UNDETERMINED
2056 if (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() || CreateLanguageFile(aLanguageTag
, false))
2058 //the language is available - so bring it on
2059 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2060 assert(iter
!= m_aLangTable
.end());
2061 SvxAutoCorrectLanguageLists
& rList
= iter
->second
;
2062 auto pRet
= lcl_SearchWordsInList( &rList
, rTxt
, rStt
, nEndPos
);
2065 rLang
= aLanguageTag
;
2070 // If it still could not be found here, then keep on searching
2071 LanguageType eLang
= aLanguageTag
.getLanguageType();
2072 // the primary language for example EN
2073 aLanguageTag
.reset(aLanguageTag
.getLanguage());
2074 LanguageType nTmpKey
= aLanguageTag
.getLanguageType(false);
2075 if (nTmpKey
!= eLang
&& nTmpKey
!= LANGUAGE_UNDETERMINED
&&
2076 (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() ||
2077 CreateLanguageFile(aLanguageTag
, false)))
2079 //the language is available - so bring it on
2080 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2081 assert(iter
!= m_aLangTable
.end());
2082 SvxAutoCorrectLanguageLists
& rList
= iter
->second
;
2083 auto pRet
= lcl_SearchWordsInList( &rList
, rTxt
, rStt
, nEndPos
);
2086 rLang
= aLanguageTag
;
2091 if (m_aLangTable
.find(aLanguageTag
.reset(LANGUAGE_UNDETERMINED
)) != m_aLangTable
.end() ||
2092 CreateLanguageFile(aLanguageTag
, false))
2094 //the language is available - so bring it on
2095 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2096 assert(iter
!= m_aLangTable
.end());
2097 SvxAutoCorrectLanguageLists
& rList
= iter
->second
;
2098 auto pRet
= lcl_SearchWordsInList( &rList
, rTxt
, rStt
, nEndPos
);
2101 rLang
= std::move(aLanguageTag
);
2105 return std::nullopt
;
2108 bool SvxAutoCorrect::SearchWordsNext(
2109 std::u16string_view rTxt
, sal_Int32
& rStt
, sal_Int32 nEndPos
,
2110 SvxAutocorrWordList::WordSearchStatus
& rStatus
)
2112 const SvxAutocorrWordList
* pWordList
= rStatus
.GetAutocorrWordList();
2113 return pWordList
->SearchWordsNext( rTxt
, rStt
, nEndPos
, rStatus
);
2116 bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang
,
2117 const OUString
& sWord
)
2119 LanguageTag
aLanguageTag( eLang
);
2121 /* TODO-BCP47: again horrible ugliness */
2123 // First search for eLang, then primary language of eLang
2124 // and last in LANGUAGE_UNDETERMINED
2126 if (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() || CreateLanguageFile(aLanguageTag
, false))
2128 //the language is available - so bring it on
2129 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2130 assert(iter
!= m_aLangTable
.end() && "CreateLanguageFile can't fail");
2131 auto& rList
= iter
->second
;
2132 if(rList
.GetWordStartExceptList()->find(sWord
) != rList
.GetWordStartExceptList()->end() )
2136 // If it still could not be found here, then keep on searching
2137 // the primary language for example EN
2138 aLanguageTag
.reset(aLanguageTag
.getLanguage());
2139 LanguageType nTmpKey
= aLanguageTag
.getLanguageType(false);
2140 if (nTmpKey
!= eLang
&& nTmpKey
!= LANGUAGE_UNDETERMINED
&&
2141 (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() ||
2142 CreateLanguageFile(aLanguageTag
, false)))
2144 //the language is available - so bring it on
2145 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2146 assert(iter
!= m_aLangTable
.end() && "CreateLanguageFile can't fail");
2147 auto& rList
= iter
->second
;
2148 if(rList
.GetWordStartExceptList()->find(sWord
) != rList
.GetWordStartExceptList()->end() )
2152 if (m_aLangTable
.find(aLanguageTag
.reset(LANGUAGE_UNDETERMINED
)) != m_aLangTable
.end() ||
2153 CreateLanguageFile(aLanguageTag
, false))
2155 //the language is available - so bring it on
2156 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2157 assert(iter
!= m_aLangTable
.end());
2158 auto& rList
= iter
->second
;
2159 if(rList
.GetWordStartExceptList()->find(sWord
) != rList
.GetWordStartExceptList()->end() )
2165 static bool lcl_FindAbbreviation(const SvStringsISortDtor
* pList
, const OUString
& sWord
)
2167 SvStringsISortDtor::const_iterator it
= pList
->find(u
"~"_ustr
);
2168 SvStringsISortDtor::size_type nPos
= it
- pList
->begin();
2169 if( nPos
< pList
->size() )
2171 OUString
sLowerWord(sWord
.toAsciiLowerCase());
2173 for( SvStringsISortDtor::size_type n
= nPos
; n
< pList
->size(); ++n
)
2175 sAbr
= (*pList
)[ n
];
2178 // ~ and ~. are not allowed!
2179 if( 2 < sAbr
.getLength() && sAbr
.getLength() - 1 <= sWord
.getLength() )
2181 OUString
sLowerAbk(sAbr
.toAsciiLowerCase());
2182 for (sal_Int32 i
= sLowerAbk
.getLength(), ii
= sLowerWord
.getLength(); i
;)
2184 if( !--i
) // agrees
2187 if( sLowerAbk
[i
] != sLowerWord
[--ii
])
2193 OSL_ENSURE( !(nPos
&& '~' == (*pList
)[ --nPos
][ 0 ] ),
2194 "Wrongly sorted exception list?" );
2198 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang
,
2199 const OUString
& sWord
, bool bAbbreviation
)
2201 LanguageTag
aLanguageTag( eLang
);
2203 /* TODO-BCP47: did I mention terrible horrible ugliness? */
2205 // First search for eLang, then primary language of eLang
2206 // and last in LANGUAGE_UNDETERMINED
2208 if (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() || CreateLanguageFile(aLanguageTag
, false))
2210 //the language is available - so bring it on
2211 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2212 assert(iter
!= m_aLangTable
.end() && "CreateLanguageFile can't fail");
2213 const SvStringsISortDtor
* pList
= iter
->second
.GetCplSttExceptList();
2214 if(bAbbreviation
? lcl_FindAbbreviation(pList
, sWord
) : pList
->find(sWord
) != pList
->end() )
2218 // If it still could not be found here, then keep on searching
2219 // the primary language for example EN
2220 aLanguageTag
.reset(aLanguageTag
.getLanguage());
2221 LanguageType nTmpKey
= aLanguageTag
.getLanguageType(false);
2222 if (nTmpKey
!= eLang
&& nTmpKey
!= LANGUAGE_UNDETERMINED
&&
2223 (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() ||
2224 CreateLanguageFile(aLanguageTag
, false)))
2226 //the language is available - so bring it on
2227 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2228 assert(iter
!= m_aLangTable
.end() && "CreateLanguageFile can't fail");
2229 const SvStringsISortDtor
* pList
= iter
->second
.GetCplSttExceptList();
2230 if(bAbbreviation
? lcl_FindAbbreviation(pList
, sWord
) : pList
->find(sWord
) != pList
->end() )
2234 if (m_aLangTable
.find(aLanguageTag
.reset(LANGUAGE_UNDETERMINED
)) != m_aLangTable
.end() ||
2235 CreateLanguageFile(aLanguageTag
, false))
2237 //the language is available - so bring it on
2238 const auto iter
= m_aLangTable
.find(aLanguageTag
);
2239 assert(iter
!= m_aLangTable
.end() && "CreateLanguageFile can't fail");
2240 const SvStringsISortDtor
* pList
= iter
->second
.GetCplSttExceptList();
2241 if(bAbbreviation
? lcl_FindAbbreviation(pList
, sWord
) : pList
->find(sWord
) != pList
->end() )
2247 OUString
SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag
& rLanguageTag
,
2248 bool bNewFile
, bool bTst
, bool bUnlocalized
) const
2250 OUString sRet
, sExt( rLanguageTag
.getBcp47() );
2253 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2254 std::vector
< OUString
> vecFallBackStrings
= rLanguageTag
.getFallbackStrings(false);
2255 if (!vecFallBackStrings
.empty())
2256 sExt
= vecFallBackStrings
[0];
2259 sExt
= "_" + sExt
+ ".dat";
2261 sRet
= sUserAutoCorrFile
+ sExt
;
2263 sRet
= sShareAutoCorrFile
+ sExt
;
2266 // test first in the user directory - if not exist, then
2267 sRet
= sUserAutoCorrFile
+ sExt
;
2268 if( !FStatHelper::IsDocument( sRet
))
2269 sRet
= sShareAutoCorrFile
+ sExt
;
2274 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2275 SvxAutoCorrect
& rParent
,
2276 OUString aShareAutoCorrectFile
,
2277 OUString aUserAutoCorrectFile
)
2278 : sShareAutoCorrFile(std::move( aShareAutoCorrectFile
)),
2279 sUserAutoCorrFile(std::move( aUserAutoCorrectFile
)),
2280 aModifiedDate( Date::EMPTY
),
2281 aModifiedTime( tools::Time::EMPTY
),
2282 aLastCheckTime( tools::Time::EMPTY
),
2283 rAutoCorrect(rParent
),
2284 nFlags(ACFlags::NONE
)
2288 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2292 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2294 // Access the file system only every 2 minutes to check the date stamp
2297 tools::Time
nMinTime( 0, 2 );
2298 tools::Time
nAktTime( tools::Time::SYSTEM
);
2299 if( aLastCheckTime
<= nAktTime
) // overflow?
2301 nAktTime
-= aLastCheckTime
;
2302 if( nAktTime
> nMinTime
) // min time past
2304 Date
aTstDate( Date::EMPTY
); tools::Time
aTstTime( tools::Time::EMPTY
);
2305 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile
,
2306 &aTstDate
, &aTstTime
) &&
2307 ( aModifiedDate
!= aTstDate
|| aModifiedTime
!= aTstTime
))
2310 // then remove all the lists fast!
2311 if( (ACFlags::CplSttLstLoad
& nFlags
) && pCplStt_ExcptLst
)
2313 pCplStt_ExcptLst
.reset();
2315 if( (ACFlags::WordStartLstLoad
& nFlags
) && pWordStart_ExcptLst
)
2317 pWordStart_ExcptLst
.reset();
2319 if( (ACFlags::ChgWordLstLoad
& nFlags
) && pAutocorr_List
)
2321 pAutocorr_List
.reset();
2323 nFlags
&= ~ACFlags(ACFlags::CplSttLstLoad
| ACFlags::WordStartLstLoad
| ACFlags::ChgWordLstLoad
);
2325 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2330 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2331 std::unique_ptr
<SvStringsISortDtor
>& rpLst
,
2332 const OUString
& sStrmName
,
2333 rtl::Reference
<SotStorage
>& rStg
)
2338 rpLst
.reset( new SvStringsISortDtor
);
2341 if( rStg
.is() && rStg
->IsStream( sStrmName
) )
2343 rtl::Reference
<SotStorageStream
> xStrm
= rStg
->OpenSotStream( sStrmName
,
2344 ( StreamMode::READ
| StreamMode::SHARE_DENYWRITE
| StreamMode::NOCREATE
) );
2345 if( ERRCODE_NONE
!= xStrm
->GetError())
2349 RemoveStream_Imp( sStrmName
);
2353 const uno::Reference
< uno::XComponentContext
>& xContext
=
2354 comphelper::getProcessComponentContext();
2356 xml::sax::InputSource aParserInput
;
2357 aParserInput
.sSystemId
= sStrmName
;
2360 xStrm
->SetBufferSize( 8 * 1024 );
2361 aParserInput
.aInputStream
= new utl::OInputStreamWrapper( *xStrm
);
2364 uno::Reference
< xml::sax::XFastDocumentHandler
> xFilter
= new SvXMLExceptionListImport ( xContext
, *rpLst
);
2366 // connect parser and filter
2367 uno::Reference
< xml::sax::XFastParser
> xParser
= xml::sax::FastParser::create( xContext
);
2368 uno::Reference
<xml::sax::XFastTokenHandler
> xTokenHandler
= new SvXMLAutoCorrectTokenHandler
;
2369 xParser
->setFastDocumentHandler( xFilter
);
2370 xParser
->registerNamespace( u
"http://openoffice.org/2001/block-list"_ustr
, SvXMLAutoCorrectToken::NAMESPACE
);
2371 xParser
->setTokenHandler( xTokenHandler
);
2376 xParser
->parseStream( aParserInput
);
2378 catch( const xml::sax::SAXParseException
& )
2382 catch( const xml::sax::SAXException
& )
2386 catch( const io::IOException
& )
2394 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile
,
2395 &aModifiedDate
, &aModifiedTime
);
2396 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2401 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2402 const SvStringsISortDtor
& rLst
,
2403 const OUString
& sStrmName
,
2404 rtl::Reference
<SotStorage
> const &rStg
,
2412 rStg
->Remove( sStrmName
);
2417 rtl::Reference
<SotStorageStream
> xStrm
= rStg
->OpenSotStream( sStrmName
,
2418 ( StreamMode::READ
| StreamMode::WRITE
| StreamMode::SHARE_DENYWRITE
) );
2421 xStrm
->SetSize( 0 );
2422 xStrm
->SetBufferSize( 8192 );
2423 xStrm
->SetProperty( u
"MediaType"_ustr
, Any(u
"text/xml"_ustr
) );
2426 const uno::Reference
< uno::XComponentContext
>& xContext
=
2427 comphelper::getProcessComponentContext();
2429 uno::Reference
< xml::sax::XWriter
> xWriter
= xml::sax::Writer::create(xContext
);
2430 uno::Reference
< io::XOutputStream
> xOut
= new utl::OOutputStreamWrapper( *xStrm
);
2431 xWriter
->setOutputStream(xOut
);
2433 uno::Reference
< xml::sax::XDocumentHandler
> xHandler(xWriter
, UNO_QUERY_THROW
);
2434 rtl::Reference
< SvXMLExceptionListExport
> xExp( new SvXMLExceptionListExport( xContext
, rLst
, sStrmName
, xHandler
) );
2436 xExp
->exportDoc( XML_BLOCK_LIST
);
2439 if( xStrm
->GetError() == ERRCODE_NONE
)
2445 if( ERRCODE_NONE
!= rStg
->GetError() )
2447 rStg
->Remove( sStrmName
);
2456 SvxAutocorrWordList
* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2458 if( pAutocorr_List
)
2459 pAutocorr_List
->DeleteAndDestroyAll();
2461 pAutocorr_List
.reset( new SvxAutocorrWordList() );
2465 uno::Reference
< embed::XStorage
> xStg
= comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile
, embed::ElementModes::READ
);
2466 uno::Reference
< io::XStream
> xStrm
= xStg
->openStreamElement( pXMLImplAutocorr_ListStr
, embed::ElementModes::READ
);
2467 const uno::Reference
< uno::XComponentContext
>& xContext
= comphelper::getProcessComponentContext();
2469 xml::sax::InputSource aParserInput
;
2470 aParserInput
.sSystemId
= pXMLImplAutocorr_ListStr
;
2471 aParserInput
.aInputStream
= xStrm
->getInputStream();
2474 uno::Reference
< xml::sax::XFastParser
> xParser
= xml::sax::FastParser::create(xContext
);
2475 SAL_INFO("editeng", "AutoCorrect Import" );
2476 uno::Reference
< xml::sax::XFastDocumentHandler
> xFilter
= new SvXMLAutoCorrectImport( xContext
, pAutocorr_List
.get(), rAutoCorrect
, xStg
);
2477 uno::Reference
<xml::sax::XFastTokenHandler
> xTokenHandler
= new SvXMLAutoCorrectTokenHandler
;
2479 // connect parser and filter
2480 xParser
->setFastDocumentHandler( xFilter
);
2481 xParser
->registerNamespace( u
"http://openoffice.org/2001/block-list"_ustr
, SvXMLAutoCorrectToken::NAMESPACE
);
2482 xParser
->setTokenHandler(xTokenHandler
);
2485 xParser
->parseStream( aParserInput
);
2487 catch ( const uno::Exception
& )
2489 TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile
);
2493 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile
,
2494 &aModifiedDate
, &aModifiedTime
);
2495 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2497 return pAutocorr_List
.get();
2500 const SvxAutocorrWordList
* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2502 if( !( ACFlags::ChgWordLstLoad
& nFlags
) || IsFileChanged_Imp() )
2504 LoadAutocorrWordList();
2505 if( !pAutocorr_List
)
2507 OSL_ENSURE( false, "No valid list" );
2508 pAutocorr_List
.reset( new SvxAutocorrWordList() );
2510 nFlags
|= ACFlags::ChgWordLstLoad
;
2512 return pAutocorr_List
.get();
2515 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2517 if( !( ACFlags::CplSttLstLoad
& nFlags
) || IsFileChanged_Imp() )
2519 LoadCplSttExceptList();
2520 if( !pCplStt_ExcptLst
)
2522 OSL_ENSURE( false, "No valid list" );
2523 pCplStt_ExcptLst
.reset( new SvStringsISortDtor
);
2525 nFlags
|= ACFlags::CplSttLstLoad
;
2527 return pCplStt_ExcptLst
.get();
2530 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString
& rNew
)
2533 if( !rNew
.isEmpty() && GetCplSttExceptList()->insert( rNew
).second
)
2535 MakeUserStorage_Impl();
2536 rtl::Reference
<SotStorage
> xStg
= new SotStorage(sUserAutoCorrFile
, StreamMode::READWRITE
);
2538 SaveExceptList_Imp( *pCplStt_ExcptLst
, pXMLImplCplStt_ExcptLstStr
, xStg
);
2542 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2543 &aModifiedDate
, &aModifiedTime
);
2544 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2550 bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString
& rNew
)
2553 if( !rNew
.isEmpty() && GetWordStartExceptList()->insert( rNew
).second
)
2555 MakeUserStorage_Impl();
2556 rtl::Reference
<SotStorage
> xStg
= new SotStorage(sUserAutoCorrFile
, StreamMode::READWRITE
);
2558 SaveExceptList_Imp( *pWordStart_ExcptLst
, pXMLImplWordStart_ExcptLstStr
, xStg
);
2562 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2563 &aModifiedDate
, &aModifiedTime
);
2564 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2570 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2574 rtl::Reference
<SotStorage
> xStg
= new SotStorage( sShareAutoCorrFile
, StreamMode::READ
| StreamMode::SHARE_DENYNONE
);
2575 if( xStg
.is() && xStg
->IsContained( pXMLImplCplStt_ExcptLstStr
) )
2576 LoadXMLExceptList_Imp( pCplStt_ExcptLst
, pXMLImplCplStt_ExcptLstStr
, xStg
);
2578 catch (const css::ucb::ContentCreationException
&)
2581 return pCplStt_ExcptLst
.get();
2584 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2586 MakeUserStorage_Impl();
2587 rtl::Reference
<SotStorage
> xStg
= new SotStorage(sUserAutoCorrFile
, StreamMode::READWRITE
);
2589 SaveExceptList_Imp( *pCplStt_ExcptLst
, pXMLImplCplStt_ExcptLstStr
, xStg
);
2594 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2595 &aModifiedDate
, &aModifiedTime
);
2596 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2599 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::LoadWordStartExceptList()
2603 rtl::Reference
<SotStorage
> xStg
= new SotStorage( sShareAutoCorrFile
, StreamMode::READ
| StreamMode::SHARE_DENYNONE
);
2604 if( xStg
.is() && xStg
->IsContained( pXMLImplWordStart_ExcptLstStr
) )
2605 LoadXMLExceptList_Imp( pWordStart_ExcptLst
, pXMLImplWordStart_ExcptLstStr
, xStg
);
2607 catch (const css::ucb::ContentCreationException
&)
2609 TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList");
2611 return pWordStart_ExcptLst
.get();
2614 void SvxAutoCorrectLanguageLists::SaveWordStartExceptList()
2616 MakeUserStorage_Impl();
2617 rtl::Reference
<SotStorage
> xStg
= new SotStorage(sUserAutoCorrFile
, StreamMode::READWRITE
);
2619 SaveExceptList_Imp( *pWordStart_ExcptLst
, pXMLImplWordStart_ExcptLstStr
, xStg
);
2623 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2624 &aModifiedDate
, &aModifiedTime
);
2625 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2628 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::GetWordStartExceptList()
2630 if( !( ACFlags::WordStartLstLoad
& nFlags
) || IsFileChanged_Imp() )
2632 LoadWordStartExceptList();
2633 if( !pWordStart_ExcptLst
)
2635 OSL_ENSURE( false, "No valid list" );
2636 pWordStart_ExcptLst
.reset( new SvStringsISortDtor
);
2638 nFlags
|= ACFlags::WordStartLstLoad
;
2640 return pWordStart_ExcptLst
.get();
2643 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString
& rName
)
2645 if( sShareAutoCorrFile
!= sUserAutoCorrFile
)
2647 rtl::Reference
<SotStorage
> xStg
= new SotStorage(sUserAutoCorrFile
, StreamMode::READWRITE
);
2648 if( xStg
.is() && ERRCODE_NONE
== xStg
->GetError() &&
2649 xStg
->IsStream( rName
) )
2651 xStg
->Remove( rName
);
2659 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2661 // The conversion needs to happen if the file is already in the user
2662 // directory and is in the old format. Additionally it needs to
2663 // happen when the file is being copied from share to user.
2665 bool bError
= false, bConvert
= false, bCopy
= false;
2666 INetURLObject aDest
;
2667 INetURLObject aSource
;
2669 if (sUserAutoCorrFile
!= sShareAutoCorrFile
)
2671 aSource
= INetURLObject ( sShareAutoCorrFile
);
2672 aDest
= INetURLObject ( sUserAutoCorrFile
);
2673 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile
) )
2675 aDest
.SetExtension ( u
"bak" );
2680 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile
) )
2682 aSource
= INetURLObject ( sUserAutoCorrFile
);
2683 aDest
= INetURLObject ( sUserAutoCorrFile
);
2684 aDest
.SetExtension ( u
"bak" );
2685 bCopy
= bConvert
= true;
2691 OUString
sMain(aDest
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
));
2692 sal_Int32 nSlashPos
= sMain
.lastIndexOf('/');
2693 sMain
= sMain
.copy(0, nSlashPos
);
2694 ::ucbhelper::Content
aNewContent( sMain
, uno::Reference
< XCommandEnvironment
>(), comphelper::getProcessComponentContext() );
2696 aInfo
.NameClash
= NameClash::OVERWRITE
;
2697 aInfo
.NewTitle
= aDest
.GetLastName();
2698 aInfo
.SourceURL
= aSource
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
);
2699 aInfo
.MoveData
= false;
2700 aNewContent
.executeCommand( u
"transfer"_ustr
, Any(aInfo
));
2707 if (bConvert
&& !bError
)
2709 rtl::Reference
<SotStorage
> xSrcStg
= new SotStorage( aDest
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
), StreamMode::READ
);
2710 rtl::Reference
<SotStorage
> xDstStg
= new SotStorage(sUserAutoCorrFile
, StreamMode::WRITE
);
2712 if( xSrcStg
.is() && xDstStg
.is() )
2714 std::unique_ptr
<SvStringsISortDtor
> pTmpWordList
;
2716 if (xSrcStg
->IsContained( pXMLImplWordStart_ExcptLstStr
) )
2717 LoadXMLExceptList_Imp( pTmpWordList
, pXMLImplWordStart_ExcptLstStr
, xSrcStg
);
2721 SaveExceptList_Imp( *pTmpWordList
, pXMLImplWordStart_ExcptLstStr
, xDstStg
, true );
2722 pTmpWordList
.reset();
2726 if (xSrcStg
->IsContained( pXMLImplCplStt_ExcptLstStr
) )
2727 LoadXMLExceptList_Imp( pTmpWordList
, pXMLImplCplStt_ExcptLstStr
, xSrcStg
);
2731 SaveExceptList_Imp( *pTmpWordList
, pXMLImplCplStt_ExcptLstStr
, xDstStg
, true );
2732 pTmpWordList
->clear();
2735 GetAutocorrWordList();
2736 MakeBlocklist_Imp( *xDstStg
);
2737 sShareAutoCorrFile
= sUserAutoCorrFile
;
2741 ::ucbhelper::Content
aContent ( aDest
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
), uno::Reference
< XCommandEnvironment
>(), comphelper::getProcessComponentContext() );
2742 aContent
.executeCommand ( u
"delete"_ustr
, Any ( true ) );
2749 else if( bCopy
&& !bError
)
2750 sShareAutoCorrFile
= sUserAutoCorrFile
;
2753 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage
& rStg
)
2755 bool bRet
= true, bRemove
= !pAutocorr_List
|| pAutocorr_List
->empty();
2758 rtl::Reference
<SotStorageStream
> refList
= rStg
.OpenSotStream( pXMLImplAutocorr_ListStr
,
2759 ( StreamMode::READ
| StreamMode::WRITE
| StreamMode::SHARE_DENYWRITE
) );
2762 refList
->SetSize( 0 );
2763 refList
->SetBufferSize( 8192 );
2764 refList
->SetProperty( u
"MediaType"_ustr
, Any(u
"text/xml"_ustr
) );
2766 const uno::Reference
< uno::XComponentContext
>& xContext
=
2767 comphelper::getProcessComponentContext();
2769 uno::Reference
< xml::sax::XWriter
> xWriter
= xml::sax::Writer::create(xContext
);
2770 uno::Reference
< io::XOutputStream
> xOut
= new utl::OOutputStreamWrapper( *refList
);
2771 xWriter
->setOutputStream(xOut
);
2773 rtl::Reference
< SvXMLAutoCorrectExport
> xExp( new SvXMLAutoCorrectExport( xContext
, pAutocorr_List
.get(), pXMLImplAutocorr_ListStr
, xWriter
) );
2775 xExp
->exportDoc( XML_BLOCK_LIST
);
2778 bRet
= ERRCODE_NONE
== refList
->GetError();
2783 if( ERRCODE_NONE
!= rStg
.GetError() )
2796 rStg
.Remove( pXMLImplAutocorr_ListStr
);
2803 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector
<SvxAutocorrWord
>& aNewEntries
, std::vector
<SvxAutocorrWord
>& aDeleteEntries
)
2805 // First get the current list!
2806 GetAutocorrWordList();
2808 MakeUserStorage_Impl();
2809 rtl::Reference
<SotStorage
> xStorage
= new SotStorage(sUserAutoCorrFile
, StreamMode::READWRITE
);
2811 bool bRet
= xStorage
.is() && ERRCODE_NONE
== xStorage
->GetError();
2815 for (SvxAutocorrWord
& aWordToDelete
: aDeleteEntries
)
2817 std::optional
<SvxAutocorrWord
> xFoundEntry
= pAutocorr_List
->FindAndRemove( &aWordToDelete
);
2820 if( !xFoundEntry
->IsTextOnly() )
2822 OUString
aName( aWordToDelete
.GetShort() );
2823 if (xStorage
->IsOLEStorage())
2824 aName
= EncryptBlockName_Imp(aName
);
2826 GeneratePackageName ( aWordToDelete
.GetShort(), aName
);
2828 if( xStorage
->IsContained( aName
) )
2830 xStorage
->Remove( aName
);
2831 bRet
= xStorage
->Commit();
2837 for (const SvxAutocorrWord
& aNewEntrie
: aNewEntries
)
2839 SvxAutocorrWord
aWordToAdd(aNewEntrie
.GetShort(), aNewEntrie
.GetLong(), true );
2840 std::optional
<SvxAutocorrWord
> xRemoved
= pAutocorr_List
->FindAndRemove( &aWordToAdd
);
2843 if( !xRemoved
->IsTextOnly() )
2845 // Still have to remove the Storage
2846 OUString
sStorageName( aWordToAdd
.GetShort() );
2847 if (xStorage
->IsOLEStorage())
2848 sStorageName
= EncryptBlockName_Imp(sStorageName
);
2850 GeneratePackageName ( aWordToAdd
.GetShort(), sStorageName
);
2852 if( xStorage
->IsContained( sStorageName
) )
2853 xStorage
->Remove( sStorageName
);
2856 bRet
= pAutocorr_List
->Insert( std::move(aWordToAdd
) );
2866 bRet
= MakeBlocklist_Imp( *xStorage
);
2872 bool SvxAutoCorrectLanguageLists::PutText( const OUString
& rShort
, const OUString
& rLong
)
2874 // First get the current list!
2875 GetAutocorrWordList();
2877 MakeUserStorage_Impl();
2878 rtl::Reference
<SotStorage
> xStg
= new SotStorage(sUserAutoCorrFile
, StreamMode::READWRITE
);
2880 bool bRet
= xStg
.is() && ERRCODE_NONE
== xStg
->GetError();
2882 // Update the word list
2885 SvxAutocorrWord
aNew(rShort
, rLong
, true );
2886 std::optional
<SvxAutocorrWord
> xRemove
= pAutocorr_List
->FindAndRemove( &aNew
);
2889 if( !xRemove
->IsTextOnly() )
2891 // Still have to remove the Storage
2892 OUString
sStgNm( rShort
);
2893 if (xStg
->IsOLEStorage())
2894 sStgNm
= EncryptBlockName_Imp(sStgNm
);
2896 GeneratePackageName ( rShort
, sStgNm
);
2898 if( xStg
->IsContained( sStgNm
) )
2899 xStg
->Remove( sStgNm
);
2903 if( pAutocorr_List
->Insert( std::move(aNew
) ) )
2905 bRet
= MakeBlocklist_Imp( *xStg
);
2916 void SvxAutoCorrectLanguageLists::PutText( const OUString
& rShort
,
2917 SfxObjectShell
& rShell
)
2919 // First get the current list!
2920 GetAutocorrWordList();
2922 MakeUserStorage_Impl();
2926 uno::Reference
< embed::XStorage
> xStg
= comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile
, embed::ElementModes::READWRITE
);
2928 bool bRet
= rAutoCorrect
.PutText( xStg
, sUserAutoCorrFile
, rShort
, rShell
, sLong
);
2931 // Update the word list
2934 if( pAutocorr_List
->Insert( SvxAutocorrWord(rShort
, sLong
, false) ) )
2936 rtl::Reference
<SotStorage
> xStor
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2937 MakeBlocklist_Imp( *xStor
);
2941 catch ( const uno::Exception
& )
2946 // Keep the list sorted ...
2947 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2949 bool operator()( SvxAutocorrWord
const & lhs
, SvxAutocorrWord
const & rhs
) const
2951 CollatorWrapper
& rCmp
= ::GetCollatorWrapper();
2952 return rCmp
.compareString( lhs
.GetShort(), rhs
.GetShort() ) < 0;
2958 typedef std::unordered_map
<OUString
, SvxAutocorrWord
> AutocorrWordHashType
;
2962 struct SvxAutocorrWordList::Impl
2965 // only one of these contains the data
2966 // maSortedVector is manually sorted so we can optimise data movement
2967 mutable AutocorrWordSetType maSortedVector
;
2968 mutable AutocorrWordHashType maHash
; // key is 'Short'
2970 void DeleteAndDestroyAll()
2973 maSortedVector
.clear();
2977 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl
) {}
2979 SvxAutocorrWordList::~SvxAutocorrWordList()
2983 void SvxAutocorrWordList::DeleteAndDestroyAll()
2985 mpImpl
->DeleteAndDestroyAll();
2988 struct SvxAutocorrWordList::Iterator::Impl
{
2989 typedef SvxAutocorrWordList::AutocorrWordSetType::const_iterator VecIterType
;
2990 typedef AutocorrWordHashType::const_iterator HashIterType
;
2992 HashIterType mHashIter
, mHashEnd
;
2993 VecIterType mSortedVectorIter
, mSortedVectorEnd
;
2995 Impl(const HashIterType
& hashIter
, const HashIterType
& hashEnd
,
2996 const VecIterType
& vecIter
, const VecIterType
& vecEnd
)
2997 : mHashIter(hashIter
), mHashEnd(hashEnd
),
2998 mSortedVectorIter(vecIter
), mSortedVectorEnd(vecEnd
) {}
3001 // Iterate hash table, followed by sorted vector
3002 if (mHashIter
!= mHashEnd
) {
3003 return ++mHashIter
!= mHashEnd
3004 || mSortedVectorIter
!= mSortedVectorEnd
;
3006 return ++mSortedVectorIter
!= mSortedVectorEnd
;
3009 const SvxAutocorrWord
& operator*() {
3010 return (mHashIter
== mHashEnd
) ? *mSortedVectorIter
: mHashIter
->second
;
3012 const SvxAutocorrWord
* operator->() {
3013 return (mHashIter
== mHashEnd
) ? &*mSortedVectorIter
: &mHashIter
->second
;
3017 SvxAutocorrWordList::Iterator::Iterator(
3018 std::unique_ptr
<SvxAutocorrWordList::Iterator::Impl
> pImpl
3019 ) : mpImpl(std::move(pImpl
))
3023 SvxAutocorrWordList::Iterator::Iterator(
3024 const SvxAutocorrWordList::Iterator
& it
3025 ) : mpImpl(new Impl(*(it
.mpImpl
)))
3029 SvxAutocorrWordList::Iterator::~Iterator()
3033 bool SvxAutocorrWordList::Iterator::Step()
3035 return mpImpl
->Step();
3038 const SvxAutocorrWord
& SvxAutocorrWordList::Iterator::operator*() const
3043 const SvxAutocorrWord
* SvxAutocorrWordList::Iterator::operator->() const
3045 return mpImpl
->operator->();
3048 bool SvxAutocorrWordList::ContainsPattern(const OUString
& aShort
) const
3050 // check hash table first
3051 if (mpImpl
->maHash
.contains(aShort
)) {
3055 // then do binary search on sorted vector
3056 CollatorWrapper
& rCmp
= ::GetCollatorWrapper();
3057 auto it
= std::lower_bound(mpImpl
->maSortedVector
.begin(),
3058 mpImpl
->maSortedVector
.end(),
3060 [&](const SvxAutocorrWord
& elm
,
3061 const OUString
& val
) {
3062 return rCmp
.compareString(elm
.GetShort(),
3065 if (it
!= mpImpl
->maSortedVector
.end()
3066 && rCmp
.compareString(aShort
, it
->GetShort()) == 0)
3074 // returns true if inserted
3075 const SvxAutocorrWord
* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord
) const
3077 if ( mpImpl
->maSortedVector
.empty() ) // use the hash
3079 OUString aShort
= aWord
.GetShort();
3080 auto [it
,inserted
] = mpImpl
->maHash
.emplace( std::move(aShort
), std::move(aWord
) );
3082 return &(it
->second
);
3087 auto it
= std::lower_bound(mpImpl
->maSortedVector
.begin(), mpImpl
->maSortedVector
.end(), aWord
, CompareSvxAutocorrWordList());
3088 CollatorWrapper
& rCmp
= ::GetCollatorWrapper();
3089 if (it
== mpImpl
->maSortedVector
.end() || rCmp
.compareString( aWord
.GetShort(), it
->GetShort() ) != 0)
3091 it
= mpImpl
->maSortedVector
.insert(it
, std::move(aWord
));
3098 void SvxAutocorrWordList::LoadEntry(const OUString
& sWrong
, const OUString
& sRight
, bool bOnlyTxt
)
3100 (void)Insert(SvxAutocorrWord( sWrong
, sRight
, bOnlyTxt
));
3103 bool SvxAutocorrWordList::empty() const
3105 return mpImpl
->maHash
.empty() && mpImpl
->maSortedVector
.empty();
3108 std::optional
<SvxAutocorrWord
> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord
*pWord
)
3111 if ( mpImpl
->maSortedVector
.empty() ) // use the hash
3113 AutocorrWordHashType::iterator it
= mpImpl
->maHash
.find( pWord
->GetShort() );
3114 if( it
!= mpImpl
->maHash
.end() )
3116 SvxAutocorrWord pMatch
= std::move(it
->second
);
3117 mpImpl
->maHash
.erase (it
);
3123 auto it
= std::lower_bound(mpImpl
->maSortedVector
.begin(), mpImpl
->maSortedVector
.end(), *pWord
, CompareSvxAutocorrWordList());
3124 if (it
!= mpImpl
->maSortedVector
.end() && !CompareSvxAutocorrWordList()(*pWord
, *it
))
3126 SvxAutocorrWord pMatch
= std::move(*it
);
3127 mpImpl
->maSortedVector
.erase (it
);
3131 return std::optional
<SvxAutocorrWord
>();
3134 // return the sorted contents - defer sorting until we have to.
3135 const SvxAutocorrWordList::AutocorrWordSetType
& SvxAutocorrWordList::getSortedContent() const
3137 // convert from hash to set permanently
3138 if ( mpImpl
->maSortedVector
.empty() )
3140 std::vector
<SvxAutocorrWord
> tmp
;
3141 tmp
.reserve(mpImpl
->maHash
.size());
3142 for (auto & rPair
: mpImpl
->maHash
)
3143 tmp
.emplace_back(std::move(rPair
.second
));
3144 mpImpl
->maHash
.clear();
3145 // sort twice - this gets the list into mostly-sorted order, which
3146 // reduces the number of times we need to invoke the expensive ICU collate fn.
3147 std::sort(tmp
.begin(), tmp
.end(),
3148 [] ( SvxAutocorrWord
const & lhs
, SvxAutocorrWord
const & rhs
)
3150 return lhs
.GetShort() < rhs
.GetShort();
3152 // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
3153 // stable_sort is twice as fast as sort in this situation because it does
3154 // fewer comparison operations.
3155 std::stable_sort(tmp
.begin(), tmp
.end(), CompareSvxAutocorrWordList());
3156 mpImpl
->maSortedVector
= std::move(tmp
);
3158 return mpImpl
->maSortedVector
;
3161 std::optional
<SvxAutocorrWord
>
3162 SvxAutocorrWordList::WordMatches(const SvxAutocorrWord
*pFnd
,
3163 std::u16string_view rTxt
,
3165 sal_Int32 nEndPos
) const
3167 const OUString
& rChk
= pFnd
->GetShort();
3169 sal_Int32 left_wildcard
= rChk
.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
3170 sal_Int32 right_wildcard
= rChk
.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
3171 assert(nEndPos
>= 0);
3172 size_t nSttWdPos
= nEndPos
;
3174 // direct replacement of keywords surrounded by colons (for example, ":name:")
3175 bool bColonNameColon
= static_cast<sal_Int32
>(rTxt
.size()) > nEndPos
&&
3176 rTxt
[nEndPos
] == ':' && rChk
[0] == ':' && rChk
.endsWith(":");
3177 if ( nEndPos
+ (bColonNameColon
? 1 : 0) < rChk
.getLength() - left_wildcard
- right_wildcard
)
3179 return std::nullopt
;
3182 bool bWasWordDelim
= false;
3183 sal_Int32 nCalcStt
= nEndPos
- rChk
.getLength() + left_wildcard
;
3184 if (bColonNameColon
)
3186 if( !right_wildcard
&& ( !nCalcStt
|| nCalcStt
== rStt
|| left_wildcard
|| bColonNameColon
||
3187 ( nCalcStt
< rStt
&&
3188 IsWordDelim( rTxt
[ nCalcStt
- 1 ] ))) )
3190 TransliterationWrapper
& rCmp
= GetIgnoreTranslWrapper();
3191 OUString
sWord( rTxt
.substr(nCalcStt
, rChk
.getLength() - left_wildcard
) );
3192 if( (!left_wildcard
&& rCmp
.isEqual( rChk
, sWord
)) || (left_wildcard
&& rCmp
.isEqual( rChk
.copy(left_wildcard
), sWord
) ))
3197 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
3198 if (static_cast<sal_Int32
>(rTxt
.size()) > nEndPos
&& rTxt
[nEndPos
] == '/' && rChk
.indexOf('/') != -1)
3200 return std::nullopt
;
3204 // get the first word delimiter position before the matching ".*word" pattern
3205 while( rStt
&& !(bWasWordDelim
= IsWordDelim( rTxt
[ --rStt
])))
3207 if (bWasWordDelim
) rStt
++;
3209 // don't let wildcard pattern override non-wildcard one
3210 OUString
aShort(rTxt
.substr(rStt
, nEndPos
- rStt
));
3211 if (ContainsPattern(aShort
)) {
3212 return std::nullopt
;
3215 OUString
left_pattern( rTxt
.substr(rStt
, nEndPos
- rStt
- rChk
.getLength() + left_wildcard
) );
3216 // avoid double spaces before simple "word" replacement
3217 left_pattern
+= (left_pattern
.getLength() == 0 && pFnd
->GetLong()[0] == 0x20) ? pFnd
->GetLong().subView(1) : pFnd
->GetLong();
3218 return SvxAutocorrWord(aShort
, left_pattern
);
3221 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
3222 if ( right_wildcard
)
3225 OUString
sTmp( rChk
.copy( left_wildcard
, rChk
.getLength() - left_wildcard
- right_wildcard
) );
3226 // Get the last word delimiter position
3229 while( nSttWdPos
&& !(bWasWordDelim
= IsWordDelim( rTxt
[ --nSttWdPos
])))
3231 // search the first occurrence (with a left word delimitation, if needed)
3232 size_t nFndPos
= rStt
- 1;
3234 nFndPos
= rTxt
.find( sTmp
, nFndPos
+ 1);
3235 if (nFndPos
== std::u16string_view::npos
)
3237 not_suffix
= bWasWordDelim
&& (nSttWdPos
>= (nFndPos
+ sTmp
.getLength()));
3238 } while ( (!left_wildcard
&& nFndPos
&& !IsWordDelim( rTxt
[ nFndPos
- 1 ])) || not_suffix
);
3240 if ( nFndPos
!= std::u16string_view::npos
)
3242 sal_Int32 extra_repl
= static_cast<sal_Int32
>(nFndPos
) + sTmp
.getLength() > nEndPos
? 1: 0; // for patterns with terminating characters, eg. "a:"
3244 if ( left_wildcard
)
3246 // get the first word delimiter position before the matching ".*word.*" pattern
3247 while( nFndPos
&& !(bWasWordDelim
= IsWordDelim( rTxt
[ --nFndPos
])))
3249 if (bWasWordDelim
) nFndPos
++;
3251 if (nEndPos
+ extra_repl
<= static_cast<sal_Int32
>(nFndPos
))
3253 return std::nullopt
;
3255 // return matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3256 OUString
aShort( rTxt
.substr(nFndPos
, nEndPos
- nFndPos
+ extra_repl
) );
3257 // don't let wildcard pattern override non-wildcard one
3258 if (ContainsPattern(aShort
)) {
3259 return std::nullopt
;
3264 if ( !left_wildcard
)
3266 sal_Int32 siz
= nEndPos
- nFndPos
- sTmp
.getLength();
3267 aLong
= pFnd
->GetLong() + (siz
> 0 ? rTxt
.substr(nFndPos
+ sTmp
.getLength(), siz
) : u
"");
3271 nSttWdPos
= rTxt
.find( sTmp
, nFndPos
);
3272 if (nSttWdPos
!= std::u16string_view::npos
)
3274 sal_Int32
nTmp(nFndPos
);
3275 while (nTmp
< static_cast<sal_Int32
>(nSttWdPos
) && !IsWordDelim(rTxt
[nTmp
]))
3279 if (nTmp
< static_cast<sal_Int32
>(nSttWdPos
)) {
3280 break; // word delimiter found
3282 buf
.append(rTxt
.substr(nFndPos
, nSttWdPos
- nFndPos
)).append(pFnd
->GetLong());
3283 nFndPos
= nSttWdPos
+ sTmp
.getLength();
3285 } while (nSttWdPos
!= std::u16string_view::npos
);
3286 if (static_cast<sal_Int32
>(nEndPos
- nFndPos
) > extra_repl
) {
3287 buf
.append(rTxt
.substr(nFndPos
, nEndPos
- nFndPos
));
3289 aLong
= buf
.makeStringAndClear();
3291 if ( (static_cast<sal_Int32
>(rTxt
.size()) > nEndPos
&& IsWordDelim(rTxt
[nEndPos
])) || static_cast<sal_Int32
>(rTxt
.size()) == nEndPos
)
3293 return SvxAutocorrWord(aShort
, aLong
);
3297 return std::nullopt
;
3300 std::optional
<SvxAutocorrWordList::WordSearchStatus
>
3301 SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt
,
3303 sal_Int32 nEndPos
) const
3305 for (auto it
= mpImpl
->maHash
.begin(); it
!= mpImpl
->maHash
.end(); ++it
)
3307 if(auto pTmp
= WordMatches(&it
->second
, rTxt
, rStt
, nEndPos
))
3309 return WordSearchStatus(
3311 Iterator(std::make_unique
<Iterator::Impl
>(
3312 it
, mpImpl
->maHash
.end(),
3313 mpImpl
->maSortedVector
.begin(), mpImpl
->maSortedVector
.end()
3319 for (auto it
= mpImpl
->maSortedVector
.begin();
3320 it
!= mpImpl
->maSortedVector
.end(); ++it
)
3322 if(auto pTmp
= WordMatches(&*it
, rTxt
, rStt
, nEndPos
))
3324 return WordSearchStatus(
3326 Iterator(std::make_unique
<Iterator::Impl
>(
3327 mpImpl
->maHash
.end(), mpImpl
->maHash
.end(),
3328 it
, mpImpl
->maSortedVector
.end()
3334 return std::nullopt
;
3338 SvxAutocorrWordList::SearchWordsNext(std::u16string_view rTxt
,
3341 SvxAutocorrWordList::WordSearchStatus
& rStatus
) const
3343 while(rStatus
.StepIter())
3345 if(auto pTmp
= WordMatches(rStatus
.GetWordAtIter(),
3346 rTxt
, rStt
, nEndPos
))
3348 rStatus
.mFnd
= *pTmp
;
3356 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */