1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <string_view>
22 #include <sal/config.h>
24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
25 #include <com/sun/star/embed/XStorage.hpp>
26 #include <com/sun/star/io/IOException.hpp>
27 #include <com/sun/star/io/XStream.hpp>
28 #include <tools/urlobj.hxx>
29 #include <i18nlangtag/mslangid.hxx>
30 #include <i18nutil/transliteration.hxx>
31 #include <sal/log.hxx>
32 #include <osl/diagnose.h>
33 #include <vcl/svapp.hxx>
34 #include <vcl/settings.hxx>
35 #include <svl/fstathelper.hxx>
36 #include <svl/urihelper.hxx>
37 #include <unotools/charclass.hxx>
38 #include <com/sun/star/i18n/UnicodeType.hpp>
39 #include <unotools/collatorwrapper.hxx>
40 #include <com/sun/star/i18n/UnicodeScript.hpp>
41 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
42 #include <unotools/localedatawrapper.hxx>
43 #include <unotools/transliterationwrapper.hxx>
44 #include <comphelper/processfactory.hxx>
45 #include <comphelper/storagehelper.hxx>
46 #include <comphelper/string.hxx>
47 #include <editeng/editids.hrc>
48 #include <sot/storage.hxx>
49 #include <editeng/udlnitem.hxx>
50 #include <editeng/wghtitem.hxx>
51 #include <editeng/postitem.hxx>
52 #include <editeng/crossedoutitem.hxx>
53 #include <editeng/escapementitem.hxx>
54 #include <editeng/svxacorr.hxx>
55 #include <editeng/unolingu.hxx>
56 #include <vcl/window.hxx>
57 #include <com/sun/star/xml/sax/InputSource.hpp>
58 #include <com/sun/star/xml/sax/FastParser.hpp>
59 #include <com/sun/star/xml/sax/Writer.hpp>
60 #include <com/sun/star/xml/sax/SAXParseException.hpp>
61 #include <unotools/streamwrap.hxx>
62 #include "SvXMLAutoCorrectImport.hxx"
63 #include "SvXMLAutoCorrectExport.hxx"
64 #include "SvXMLAutoCorrectTokenHandler.hxx"
65 #include <ucbhelper/content.hxx>
66 #include <com/sun/star/ucb/ContentCreationException.hpp>
67 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
68 #include <com/sun/star/ucb/TransferInfo.hpp>
69 #include <com/sun/star/ucb/NameClash.hpp>
70 #include <tools/diagnose_ex.h>
71 #include <xmloff/xmltoken.hxx>
72 #include <unordered_map>
73 #include <rtl/character.hxx>
75 using namespace ::com::sun::star::ucb
;
76 using namespace ::com::sun::star::uno
;
77 using namespace ::com::sun::star::xml::sax
;
78 using namespace ::com::sun::star
;
79 using namespace ::xmloff::token
;
80 using namespace ::utl
;
87 ExclamationMark
= 0x02,
94 template<> struct typed_flags
<Flags
> : is_typed_flags
<Flags
, 0x07> {};
96 const sal_Unicode cNonBreakingSpace
= 0xA0; // UNICODE code for no break space
98 const char pXMLImplWrdStt_ExcptLstStr
[] = "WordExceptList.xml";
99 const char pXMLImplCplStt_ExcptLstStr
[] = "SentenceExceptList.xml";
100 const char pXMLImplAutocorr_ListStr
[] = "DocumentList.xml";
103 /* also at these beginnings - Brackets and all kinds of begin characters */
104 sImplSttSkipChars
[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
105 /* also at these ends - Brackets and all kinds of begin characters */
106 sImplEndSkipChars
[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
108 static OUString
EncryptBlockName_Imp(const OUString
& rName
);
110 static bool NonFieldWordDelim( const sal_Unicode c
)
112 return ' ' == c
|| '\t' == c
|| 0x0a == c
||
113 cNonBreakingSpace
== c
|| 0x2011 == c
;
116 static bool IsWordDelim( const sal_Unicode c
)
118 return c
== 0x1 || NonFieldWordDelim(c
);
122 static bool IsLowerLetter( sal_Int32 nCharType
)
124 return CharClass::isLetterType( nCharType
) &&
125 ( css::i18n::KCharacterType::LOWER
& nCharType
);
128 static bool IsUpperLetter( sal_Int32 nCharType
)
130 return CharClass::isLetterType( nCharType
) &&
131 ( css::i18n::KCharacterType::UPPER
& nCharType
);
134 static bool lcl_IsUnsupportedUnicodeChar( CharClass
const & rCC
, const OUString
& rTxt
,
135 sal_Int32 nStt
, sal_Int32 nEnd
)
137 for( ; nStt
< nEnd
; ++nStt
)
139 css::i18n::UnicodeScript nScript
= rCC
.getScript( rTxt
, nStt
);
142 case css::i18n::UnicodeScript_kCJKRadicalsSupplement
:
143 case css::i18n::UnicodeScript_kHangulJamo
:
144 case css::i18n::UnicodeScript_kCJKSymbolPunctuation
:
145 case css::i18n::UnicodeScript_kHiragana
:
146 case css::i18n::UnicodeScript_kKatakana
:
147 case css::i18n::UnicodeScript_kHangulCompatibilityJamo
:
148 case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth
:
149 case css::i18n::UnicodeScript_kCJKCompatibility
:
150 case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA
:
151 case css::i18n::UnicodeScript_kCJKUnifiedIdeograph
:
152 case css::i18n::UnicodeScript_kHangulSyllable
:
153 case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph
:
154 case css::i18n::UnicodeScript_kHalfwidthFullwidthForm
:
156 default: ; //do nothing
162 static bool lcl_IsSymbolChar( CharClass
const & rCC
, const OUString
& rTxt
,
163 sal_Int32 nStt
, sal_Int32 nEnd
)
165 for( ; nStt
< nEnd
; ++nStt
)
167 if( css::i18n::UnicodeType::PRIVATE_USE
== rCC
.getType( rTxt
, nStt
))
173 static bool lcl_IsInAsciiArr( const char* pArr
, const sal_Unicode c
)
175 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
176 if ( 0x2018 <= c
&& c
<= 0x201F && (pArr
== sImplSttSkipChars
|| pArr
== sImplEndSkipChars
) )
180 for( ; *pArr
; ++pArr
)
189 SvxAutoCorrDoc::~SvxAutoCorrDoc()
193 // Called by the functions:
194 // - FnCapitalStartWord
195 // - FnCapitalStartSentence
196 // after the exchange of characters. Then the words, if necessary, can be inserted
197 // into the exception list.
198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags
, sal_Int32
, const OUString
&,
203 LanguageType
SvxAutoCorrDoc::GetLanguage( sal_Int32
) const
205 return LANGUAGE_SYSTEM
;
208 static const LanguageTag
& GetAppLang()
210 return Application::GetSettings().GetLanguageTag();
213 /// Never use an unresolved LANGUAGE_SYSTEM.
214 static LanguageType
GetDocLanguage( const SvxAutoCorrDoc
& rDoc
, sal_Int32 nPos
)
216 LanguageType eLang
= rDoc
.GetLanguage( nPos
);
217 if (eLang
== LANGUAGE_SYSTEM
)
218 eLang
= GetAppLang().getLanguageType(); // the current work locale
222 static LocaleDataWrapper
& GetLocaleDataWrapper( LanguageType nLang
)
224 static LocaleDataWrapper
aLclDtWrp( GetAppLang() );
225 LanguageTag
aLcl( nLang
);
226 const LanguageTag
& rLcl
= aLclDtWrp
.getLoadedLanguageTag();
228 aLclDtWrp
.setLanguageTag( aLcl
);
231 static TransliterationWrapper
& GetIgnoreTranslWrapper()
233 static int bIsInit
= 0;
234 static TransliterationWrapper
aWrp( ::comphelper::getProcessComponentContext(),
235 TransliterationFlags::IGNORE_KANA
|
236 TransliterationFlags::IGNORE_WIDTH
);
239 aWrp
.loadModuleIfNeeded( GetAppLang().getLanguageType() );
244 static CollatorWrapper
& GetCollatorWrapper()
246 static CollatorWrapper aCollWrp
= [&]()
248 CollatorWrapper
tmp( ::comphelper::getProcessComponentContext() );
249 tmp
.loadDefaultCollator( GetAppLang().getLocale(), 0 );
255 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar
)
257 return cChar
== '\0' || cChar
== '\t' || cChar
== 0x0a ||
258 cChar
== ' ' || cChar
== '\'' || cChar
== '\"' ||
259 cChar
== '*' || cChar
== '_' || cChar
== '%' ||
260 cChar
== '.' || cChar
== ',' || cChar
== ';' ||
261 cChar
== ':' || cChar
== '?' || cChar
== '!' ||
262 cChar
== '<' || cChar
== '>' ||
263 cChar
== '/' || cChar
== '-';
268 bool IsCompoundWordDelimChar(sal_Unicode cChar
)
270 return cChar
== '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar
);
274 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar
)
276 return cChar
== '%' || cChar
== ';' || cChar
== ':' || cChar
== '?' || cChar
== '!' ||
277 cChar
== '/' /*case for the urls exception*/;
280 ACFlags
SvxAutoCorrect::GetDefaultFlags()
282 ACFlags nRet
= ACFlags::Autocorrect
283 | ACFlags::CapitalStartSentence
284 | ACFlags::CapitalStartWord
285 | ACFlags::ChgOrdinalNumber
286 | ACFlags::ChgToEnEmDash
287 | ACFlags::AddNonBrkSpace
288 | ACFlags::TransliterateRTL
289 | ACFlags::ChgAngleQuotes
290 | ACFlags::ChgWeightUnderl
291 | ACFlags::SetINetAttr
293 | ACFlags::SaveWordCplSttLst
294 | ACFlags::SaveWordWrdSttLst
295 | ACFlags::CorrectCapsLock
;
296 LanguageType eLang
= GetAppLang().getLanguageType();
301 LANGUAGE_ENGLISH_AUS
,
302 LANGUAGE_ENGLISH_CAN
,
304 LANGUAGE_ENGLISH_EIRE
,
305 LANGUAGE_ENGLISH_SAFRICA
,
306 LANGUAGE_ENGLISH_JAMAICA
,
307 LANGUAGE_ENGLISH_CARIBBEAN
))
308 nRet
&= ~ACFlags(ACFlags::ChgQuotes
|ACFlags::ChgSglQuotes
);
312 constexpr sal_Unicode cEmDash
= 0x2014;
313 constexpr sal_Unicode cEnDash
= 0x2013;
314 constexpr sal_Unicode cApostrophe
= 0x2019;
315 constexpr sal_Unicode cLeftDoubleAngleQuote
= 0xAB;
316 constexpr sal_Unicode cRightDoubleAngleQuote
= 0xBB;
317 constexpr sal_Unicode cLeftSingleAngleQuote
= 0x2039;
318 constexpr sal_Unicode cRightSingleAngleQuote
= 0x203A;
319 // stop characters for searching preceding quotes
320 // (the first character is also the opening quote we are looking for)
321 const sal_Unicode aStopDoubleAngleQuoteStart
[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
322 const sal_Unicode aStopDoubleAngleQuoteEnd
[] = { cRightDoubleAngleQuote
, cLeftDoubleAngleQuote
, 0x201D, 0x201E, 0 }; // preceding >>
323 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
324 const sal_Unicode aStopDoubleAngleQuoteEndRo
[] = { cLeftDoubleAngleQuote
, cRightDoubleAngleQuote
, 0x201D, 0x201E, 0x201C, 0 };
325 const sal_Unicode aStopSingleQuoteEnd
[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
326 const sal_Unicode aStopSingleQuoteEndRuUa
[] = { 0x201E, 0x201C, cRightDoubleAngleQuote
, cLeftDoubleAngleQuote
, 0 };
328 SvxAutoCorrect::SvxAutoCorrect( const OUString
& rShareAutocorrFile
,
329 const OUString
& rUserAutocorrFile
)
330 : sShareAutoCorrFile( rShareAutocorrFile
)
331 , sUserAutoCorrFile( rUserAutocorrFile
)
332 , eCharClassLang( LANGUAGE_DONTKNOW
)
333 , nFlags(SvxAutoCorrect::GetDefaultFlags())
341 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect
& rCpy
)
342 : sShareAutoCorrFile( rCpy
.sShareAutoCorrFile
)
343 , sUserAutoCorrFile( rCpy
.sUserAutoCorrFile
)
344 , aSwFlags( rCpy
.aSwFlags
)
345 , eCharClassLang(rCpy
.eCharClassLang
)
346 , nFlags( rCpy
.nFlags
& ~ACFlags(ACFlags::ChgWordLstLoad
|ACFlags::CplSttLstLoad
|ACFlags::WrdSttLstLoad
))
347 , cStartDQuote( rCpy
.cStartDQuote
)
348 , cEndDQuote( rCpy
.cEndDQuote
)
349 , cStartSQuote( rCpy
.cStartSQuote
)
350 , cEndSQuote( rCpy
.cEndSQuote
)
355 SvxAutoCorrect::~SvxAutoCorrect()
359 void SvxAutoCorrect::GetCharClass_( LanguageType eLang
)
361 pCharClass
.reset( new CharClass( LanguageTag( eLang
)) );
362 eCharClassLang
= eLang
;
365 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag
, bool bOn
)
367 ACFlags nOld
= nFlags
;
368 nFlags
= bOn
? nFlags
| nFlag
373 if( (nOld
& ACFlags::CapitalStartSentence
) != (nFlags
& ACFlags::CapitalStartSentence
) )
374 nFlags
&= ~ACFlags::CplSttLstLoad
;
375 if( (nOld
& ACFlags::CapitalStartWord
) != (nFlags
& ACFlags::CapitalStartWord
) )
376 nFlags
&= ~ACFlags::WrdSttLstLoad
;
377 if( (nOld
& ACFlags::Autocorrect
) != (nFlags
& ACFlags::Autocorrect
) )
378 nFlags
&= ~ACFlags::ChgWordLstLoad
;
383 // Correct TWo INitial CApitals
384 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
385 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
388 CharClass
& rCC
= GetCharClass( eLang
);
390 // Delete all non alphanumeric. Test the characters at the beginning/end of
391 // the word ( recognizes: "(min.", "/min.", and so on.)
392 for( ; nSttPos
< nEndPos
; ++nSttPos
)
393 if( rCC
.isLetterNumeric( rTxt
, nSttPos
))
395 for( ; nSttPos
< nEndPos
; --nEndPos
)
396 if( rCC
.isLetterNumeric( rTxt
, nEndPos
- 1 ))
399 // Is the word a compounded word separated by delimiters?
400 // If so, keep track of all delimiters so each constituent
401 // word can be checked for two initial capital letters.
402 std::deque
<sal_Int32
> aDelimiters
;
404 // Always check for two capitals at the beginning
405 // of the entire word, so start at nSttPos.
406 aDelimiters
.push_back(nSttPos
);
408 // Find all compound word delimiters
409 for (sal_Int32 n
= nSttPos
; n
< nEndPos
; ++n
)
411 if (IsCompoundWordDelimChar(rTxt
[ n
]))
413 aDelimiters
.push_back( n
+ 1 ); // Get position of char after delimiter
417 // Decide where to put the terminating delimiter.
418 // If the last AutoCorrect char was a newline, then the AutoCorrect
419 // char will not be included in rTxt.
420 // If the last AutoCorrect char was not a newline, then the AutoCorrect
421 // character will be the last character in rTxt.
422 if (!IsCompoundWordDelimChar(rTxt
[nEndPos
-1]))
423 aDelimiters
.push_back(nEndPos
);
425 // Iterate through the word and all words that compose it.
426 // Two capital letters at the beginning of word?
427 for (size_t nI
= 0; nI
< aDelimiters
.size() - 1; ++nI
)
429 nSttPos
= aDelimiters
[nI
];
430 nEndPos
= aDelimiters
[nI
+ 1];
432 if( nSttPos
+2 < nEndPos
&&
433 IsUpperLetter( rCC
.getCharacterType( rTxt
, nSttPos
)) &&
434 IsUpperLetter( rCC
.getCharacterType( rTxt
, ++nSttPos
)) &&
435 // Is the third character a lower case
436 IsLowerLetter( rCC
.getCharacterType( rTxt
, nSttPos
+1 )) &&
437 // Do not replace special attributes
438 0x1 != rTxt
[ nSttPos
] && 0x2 != rTxt
[ nSttPos
])
440 // test if the word is in an exception list
441 OUString
sWord( rTxt
.copy( nSttPos
- 1, nEndPos
- nSttPos
+ 1 ));
442 if( !FindInWrdSttExceptList(eLang
, sWord
) )
444 // Check that word isn't correctly spelt before correcting:
445 css::uno::Reference
< css::linguistic2::XSpellChecker1
> xSpeller
=
446 LinguMgr::GetSpellChecker();
447 if( xSpeller
->hasLanguage(static_cast<sal_uInt16
>(eLang
)) )
449 Sequence
< css::beans::PropertyValue
> aEmptySeq
;
450 if (xSpeller
->isValid(sWord
, static_cast<sal_uInt16
>(eLang
), aEmptySeq
))
455 sal_Unicode cSave
= rTxt
[ nSttPos
];
456 OUString sChar
= rCC
.lowercase( OUString(cSave
) );
457 if( sChar
[0] != cSave
&& rDoc
.ReplaceRange( nSttPos
, 1, sChar
))
459 if( ACFlags::SaveWordWrdSttLst
& nFlags
)
460 rDoc
.SaveCpltSttWord( ACFlags::CapitalStartWord
, nSttPos
, sWord
, cSave
);
467 // Format ordinal numbers suffixes (1st -> 1^st)
468 bool SvxAutoCorrect::FnChgOrdinalNumber(
469 SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
470 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
473 // 1st, 2nd, 3rd, 4 - 0th
478 // In some languages ordinal suffixes should never be
479 // changed to superscript. Let's break for those languages.
482 LANGUAGE_SWEDISH_FINLAND
))
484 CharClass
& rCC
= GetCharClass(eLang
);
486 for (; nSttPos
< nEndPos
; ++nSttPos
)
487 if (!lcl_IsInAsciiArr(sImplSttSkipChars
, rTxt
[nSttPos
]))
489 for (; nSttPos
< nEndPos
; --nEndPos
)
490 if (!lcl_IsInAsciiArr(sImplEndSkipChars
, rTxt
[nEndPos
- 1]))
494 // Get the last number in the string to check
495 sal_Int32 nNumEnd
= nEndPos
;
496 bool bFoundEnd
= false;
497 bool isValidNumber
= true;
498 sal_Int32 i
= nEndPos
;
502 bool isDigit
= rCC
.isDigit(rTxt
, i
);
504 isValidNumber
&= (isDigit
|| !rCC
.isLetter(rTxt
, i
));
506 if (isDigit
&& !bFoundEnd
)
513 if (bFoundEnd
&& isValidNumber
) {
514 sal_Int32 nNum
= rTxt
.copy(nSttPos
, nNumEnd
- nSttPos
+ 1).toInt32();
516 // Check if the characters after that number correspond to the ordinal suffix
517 uno::Reference
< i18n::XOrdinalSuffix
> xOrdSuffix
518 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
520 const uno::Sequence
< OUString
> aSuffixes
= xOrdSuffix
->getOrdinalSuffix(nNum
, rCC
.getLanguageTag().getLocale());
521 for (OUString
const & sSuffix
: aSuffixes
)
523 OUString sEnd
= rTxt
.copy(nNumEnd
+ 1, nEndPos
- nNumEnd
- 1);
527 // Check if the ordinal suffix has to be set as super script
528 if (rCC
.isLetter(sSuffix
))
531 SvxEscapementItem
aSvxEscapementItem(DFLT_ESC_AUTO_SUPER
,
532 DFLT_ESC_PROP
, SID_ATTR_CHAR_ESCAPEMENT
);
533 rDoc
.SetAttr(nNumEnd
+ 1, nEndPos
,
534 SID_ATTR_CHAR_ESCAPEMENT
,
546 bool SvxAutoCorrect::FnChgToEnEmDash(
547 SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
548 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
552 CharClass
& rCC
= GetCharClass( eLang
);
553 if (eLang
== LANGUAGE_SYSTEM
)
554 eLang
= GetAppLang().getLanguageType();
555 bool bAlwaysUseEmDash
= (eLang
== LANGUAGE_RUSSIAN
|| eLang
== LANGUAGE_UKRAINIAN
);
557 // replace " - " or " --" with "enDash"
558 if( 1 < nSttPos
&& 1 <= nEndPos
- nSttPos
)
560 sal_Unicode cCh
= rTxt
[ nSttPos
];
563 if( 1 < nEndPos
- nSttPos
&&
564 ' ' == rTxt
[ nSttPos
-1 ] &&
565 '-' == rTxt
[ nSttPos
+1 ])
568 for( n
= nSttPos
+2; n
< nEndPos
&& lcl_IsInAsciiArr(
569 sImplSttSkipChars
,(cCh
= rTxt
[ n
]));
573 // found: " --[<AnySttChars>][A-z0-9]
574 if( rCC
.isLetterNumeric( OUString(cCh
) ) )
576 for( n
= nSttPos
-1; n
&& lcl_IsInAsciiArr(
577 sImplEndSkipChars
,(cCh
= rTxt
[ --n
])); )
580 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
581 if( rCC
.isLetterNumeric( OUString(cCh
) ))
583 rDoc
.Delete( nSttPos
, nSttPos
+ 2 );
584 rDoc
.Insert( nSttPos
, bAlwaysUseEmDash
? OUString(cEmDash
) : OUString(cEnDash
) );
590 else if( 3 < nSttPos
&&
591 ' ' == rTxt
[ nSttPos
-1 ] &&
592 '-' == rTxt
[ nSttPos
-2 ])
594 sal_Int32 n
, nLen
= 1, nTmpPos
= nSttPos
- 2;
595 if( '-' == ( cCh
= rTxt
[ nTmpPos
-1 ]) )
599 cCh
= rTxt
[ nTmpPos
-1 ];
603 for( n
= nSttPos
; n
< nEndPos
&& lcl_IsInAsciiArr(
604 sImplSttSkipChars
,(cCh
= rTxt
[ n
]));
608 // found: " - [<AnySttChars>][A-z0-9]
609 if( rCC
.isLetterNumeric( OUString(cCh
) ) )
612 for( n
= nTmpPos
-1; n
&& lcl_IsInAsciiArr(
613 sImplEndSkipChars
,(cCh
= rTxt
[ --n
])); )
615 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
616 if( rCC
.isLetterNumeric( OUString(cCh
) ))
618 rDoc
.Delete( nTmpPos
, nTmpPos
+ nLen
);
619 rDoc
.Insert( nTmpPos
, bAlwaysUseEmDash
? OUString(cEmDash
) : OUString(cEnDash
) );
627 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
628 // [0-9]--[0-9] double dash always replaced with "enDash"
629 // Finnish and Hungarian use enDash instead of emDash.
630 bool bEnDash
= (eLang
== LANGUAGE_HUNGARIAN
|| eLang
== LANGUAGE_FINNISH
);
631 if( 4 <= nEndPos
- nSttPos
)
633 OUString
sTmp( rTxt
.copy( nSttPos
, nEndPos
- nSttPos
) );
634 sal_Int32 nFndPos
= sTmp
.indexOf("--");
635 if( nFndPos
!= -1 && nFndPos
&&
636 nFndPos
+ 2 < sTmp
.getLength() &&
637 ( rCC
.isLetterNumeric( sTmp
, nFndPos
- 1 ) ||
638 lcl_IsInAsciiArr( sImplEndSkipChars
, rTxt
[ nFndPos
- 1 ] )) &&
639 ( rCC
.isLetterNumeric( sTmp
, nFndPos
+ 2 ) ||
640 lcl_IsInAsciiArr( sImplSttSkipChars
, rTxt
[ nFndPos
+ 2 ] )))
642 nSttPos
= nSttPos
+ nFndPos
;
643 rDoc
.Delete( nSttPos
, nSttPos
+ 2 );
644 rDoc
.Insert( nSttPos
, (bEnDash
|| (rCC
.isDigit( sTmp
, nFndPos
- 1 ) &&
645 rCC
.isDigit( sTmp
, nFndPos
+ 2 )) ? OUString(cEnDash
) : OUString(cEmDash
)) );
652 // Add non-breaking space before specific punctuation marks in French text
653 bool SvxAutoCorrect::FnAddNonBrkSpace(
654 SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
656 LanguageType eLang
, bool& io_bNbspRunNext
)
660 CharClass
& rCC
= GetCharClass( eLang
);
662 if ( rCC
.getLanguageTag().getLanguage() == "fr" )
664 bool bFrCA
= (rCC
.getLanguageTag().getCountry() == "CA");
665 OUString allChars
= ":;?!%";
666 OUString
chars( allChars
);
670 sal_Unicode cChar
= rTxt
[ nEndPos
];
671 bool bHasSpace
= chars
.indexOf( cChar
) != -1;
672 bool bIsSpecial
= allChars
.indexOf( cChar
) != -1;
675 // Get the last word delimiter position
676 sal_Int32 nSttWdPos
= nEndPos
;
677 bool bWasWordDelim
= false;
680 bWasWordDelim
= IsWordDelim( rTxt
[ --nSttWdPos
]);
685 //See if the text is the start of a protocol string, e.g. have text of
686 //"http" see if it is the start of "http:" and if so leave it alone
687 sal_Int32 nIndex
= nSttWdPos
+ (bWasWordDelim
? 1 : 0);
688 sal_Int32 nProtocolLen
= nEndPos
- nSttWdPos
+ 1;
689 if (nIndex
+ nProtocolLen
<= rTxt
.getLength())
691 if (INetURLObject::CompareProtocolScheme(rTxt
.copy(nIndex
, nProtocolLen
)) != INetProtocol::NotValid
)
695 // Check the presence of "://" in the word
696 sal_Int32 nStrPos
= rTxt
.indexOf( "://", nSttWdPos
+ 1 );
697 if ( nStrPos
== -1 && nEndPos
> 0 )
699 // Check the previous char
700 sal_Unicode cPrevChar
= rTxt
[ nEndPos
- 1 ];
701 if ( ( chars
.indexOf( cPrevChar
) == -1 ) && cPrevChar
!= '\t' )
703 // Remove any previous normal space
704 sal_Int32 nPos
= nEndPos
- 1;
705 while ( cPrevChar
== ' ' || cPrevChar
== cNonBreakingSpace
)
707 if ( nPos
== 0 ) break;
709 cPrevChar
= rTxt
[ nPos
];
713 if ( nEndPos
- nPos
> 0 )
714 rDoc
.Delete( nPos
, nEndPos
);
716 // Add the non-breaking space at the end pos
718 rDoc
.Insert( nPos
, OUString(cNonBreakingSpace
) );
719 io_bNbspRunNext
= true;
722 else if ( chars
.indexOf( cPrevChar
) != -1 )
723 io_bNbspRunNext
= true;
726 else if ( cChar
== '/' && nEndPos
> 1 && rTxt
.getLength() > (nEndPos
- 1) )
728 // Remove the hardspace right before to avoid formatting URLs
729 sal_Unicode cPrevChar
= rTxt
[ nEndPos
- 1 ];
730 sal_Unicode cMaybeSpaceChar
= rTxt
[ nEndPos
- 2 ];
731 if ( cPrevChar
== ':' && cMaybeSpaceChar
== cNonBreakingSpace
)
733 rDoc
.Delete( nEndPos
- 2, nEndPos
- 1 );
743 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
744 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
747 OUString
sURL( URIHelper::FindFirstURLInText( rTxt
, nSttPos
, nEndPos
,
748 GetCharClass( eLang
) ));
749 bool bRet
= !sURL
.isEmpty();
750 if( bRet
) // so, set attribute:
751 rDoc
.SetINetAttr( nSttPos
, nEndPos
, sURL
);
755 // Automatic *bold*, /italic/, -strikeout- and _underline_
756 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
760 // at the beginning: _, *, / or ~ after Space with the following !Space
761 // at the end: _, *, / or ~ before Space (word delimiter?)
763 sal_Unicode cInsChar
= rTxt
[ nEndPos
]; // underline, bold, italic or strikeout
764 if( ++nEndPos
!= rTxt
.getLength() &&
765 !IsWordDelim( rTxt
[ nEndPos
] ) )
770 bool bAlphaNum
= false;
771 sal_Int32 nPos
= nEndPos
;
772 sal_Int32 nFndPos
= -1;
773 CharClass
& rCC
= GetCharClass( LANGUAGE_SYSTEM
);
777 switch( sal_Unicode c
= rTxt
[ --nPos
] )
785 if( bAlphaNum
&& nPos
+1 < nEndPos
&& ( !nPos
||
786 IsWordDelim( rTxt
[ nPos
-1 ])) &&
787 !IsWordDelim( rTxt
[ nPos
+1 ]))
790 // Condition is not satisfied, so cancel
797 bAlphaNum
= rCC
.isLetterNumeric( rTxt
, nPos
);
803 // first delete the Character at the end - this allows insertion
804 // of an empty hint in SetAttr which would be removed by Delete
805 // (fdo#62536, AUTOFMT in Writer)
806 rDoc
.Delete( nEndPos
, nEndPos
+ 1 );
807 rDoc
.Delete( nFndPos
, nFndPos
+ 1 );
808 // Span the Attribute over the area
810 if( '*' == cInsChar
) // Bold
812 SvxWeightItem
aSvxWeightItem( WEIGHT_BOLD
, SID_ATTR_CHAR_WEIGHT
);
813 rDoc
.SetAttr( nFndPos
, nEndPos
- 1,
814 SID_ATTR_CHAR_WEIGHT
,
817 else if( '/' == cInsChar
) // Italic
819 SvxPostureItem
aSvxPostureItem( ITALIC_NORMAL
, SID_ATTR_CHAR_POSTURE
);
820 rDoc
.SetAttr( nFndPos
, nEndPos
- 1,
821 SID_ATTR_CHAR_POSTURE
,
824 else if( '-' == cInsChar
) // Strikeout
826 SvxCrossedOutItem
aSvxCrossedOutItem( STRIKEOUT_SINGLE
, SID_ATTR_CHAR_STRIKEOUT
);
827 rDoc
.SetAttr( nFndPos
, nEndPos
- 1,
828 SID_ATTR_CHAR_STRIKEOUT
,
833 SvxUnderlineItem
aSvxUnderlineItem( LINESTYLE_SINGLE
, SID_ATTR_CHAR_UNDERLINE
);
834 rDoc
.SetAttr( nFndPos
, nEndPos
- 1,
835 SID_ATTR_CHAR_UNDERLINE
,
840 return -1 != nFndPos
;
843 // Capitalize first letter of every sentence
844 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc
& rDoc
,
845 const OUString
& rTxt
, bool bNormalPos
,
846 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
850 if( rTxt
.isEmpty() || nEndPos
<= nSttPos
)
853 CharClass
& rCC
= GetCharClass( eLang
);
854 OUString
aText( rTxt
);
855 const sal_Unicode
*pStart
= aText
.getStr(),
856 *pStr
= pStart
+ nEndPos
,
860 bool bAtStart
= false;
863 if (rCC
.isLetter(aText
, pStr
- pStart
))
869 else if (pWordStt
&& !rCC
.isDigit(aText
, pStr
- pStart
))
871 if( (lcl_IsInAsciiArr( "-'", *pStr
) || *pStr
== cApostrophe
) && // These characters are allowed in words
872 pWordStt
- 1 == pStr
&&
873 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
874 (pStart
+ 1) <= pStr
&&
875 rCC
.isLetter(aText
, pStr
-1 - pStart
))
880 bAtStart
= (pStart
== pStr
);
881 } while( !bAtStart
);
884 return; // no character to be replaced
887 if (rCC
.isDigit(aText
, pStr
- pStart
))
888 return; // already ok
890 if (IsUpperLetter(rCC
.getCharacterType(aText
, pWordStt
- pStart
)))
891 return; // already ok
893 //See if the text is the start of a protocol string, e.g. have text of
894 //"http" see if it is the start of "http:" and if so leave it alone
895 sal_Int32 nIndex
= pWordStt
- pStart
;
896 sal_Int32 nProtocolLen
= pDelim
- pWordStt
+ 1;
897 if (nIndex
+ nProtocolLen
<= rTxt
.getLength())
899 if (INetURLObject::CompareProtocolScheme(rTxt
.copy(nIndex
, nProtocolLen
)) != INetProtocol::NotValid
)
900 return; // already ok
903 if (0x1 == *pWordStt
|| 0x2 == *pWordStt
)
904 return; // already ok
906 // Only capitalize, if string before specified characters is long enough
907 if( *pDelim
&& 2 >= pDelim
- pWordStt
&&
908 lcl_IsInAsciiArr( ".-)>", *pDelim
) )
911 // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
912 if ( 1 == pDelim
- pWordStt
&& 0x03B1 <= *pWordStt
&& *pWordStt
<= 0x03C9 && eLang
!= LANGUAGE_GREEK
)
915 if( !bAtStart
) // Still no beginning of a paragraph?
917 if (NonFieldWordDelim(*pStr
))
921 bAtStart
= (pStart
== pStr
--);
922 if (bAtStart
|| !NonFieldWordDelim(*pStr
))
926 // Asian full stop, full width full stop, full width exclamation mark
927 // and full width question marks are treated as word delimiters
928 else if ( 0x3002 != *pStr
&& 0xFF0E != *pStr
&& 0xFF01 != *pStr
&&
930 return; // no valid separator -> no replacement
933 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
934 if (FindInWrdSttExceptList(eLang
, OUString(pWordStt
, pDelim
- pWordStt
)))
937 if( bAtStart
) // at the beginning of a paragraph?
939 // Check out the previous paragraph, if it exists.
940 // If so, then check to paragraph separator at the end.
941 OUString
const*const pPrevPara
= rDoc
.GetPrevPara(bNormalPos
);
944 // valid separator -> replace
945 OUString
sChar( *pWordStt
);
946 sChar
= rCC
.titlecase(sChar
); //see fdo#56740
947 if (!comphelper::string::equals(sChar
, *pWordStt
))
948 rDoc
.ReplaceRange( pWordStt
- pStart
, 1, sChar
);
954 pStart
= aText
.getStr();
955 pStr
= pStart
+ aText
.getLength();
957 do { // overwrite all blanks
959 if (!NonFieldWordDelim(*pStr
))
961 bAtStart
= (pStart
== pStr
);
962 } while( !bAtStart
);
965 return; // no valid separator -> no replacement
968 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
969 // all three can happen, but not more than once!
970 const sal_Unicode
* pExceptStt
= nullptr;
971 bool bContinue
= true;
972 Flags nFlag
= Flags::NONE
;
977 // Western and Asian full stop
982 if (pStr
>= pStart
+ 2 && *(pStr
- 2) == '.')
984 //e.g. text "f.o.o. word": Now currently considering
985 //capitalizing word but second last character of
986 //previous word is a . So probably last word is an
987 //anagram that ends in . and not truly the end of a
988 //previous sentence, so don't autocapitalize this word
991 if (nFlag
& Flags::FullStop
)
992 return; // no valid separator -> no replacement
993 nFlag
|= Flags::FullStop
;
1000 if (nFlag
& Flags::ExclamationMark
)
1001 return; // no valid separator -> no replacement
1002 nFlag
|= Flags::ExclamationMark
;
1008 if (nFlag
& Flags::QuestionMark
)
1009 return; // no valid separator -> no replacement
1010 nFlag
|= Flags::QuestionMark
;
1014 if (nFlag
== Flags::NONE
)
1015 return; // no valid separator -> no replacement
1021 if (bContinue
&& pStr
-- == pStart
)
1023 return; // no valid separator -> no replacement
1025 } while (bContinue
);
1026 if (Flags::FullStop
!= nFlag
)
1027 pExceptStt
= nullptr;
1029 // Only capitalize, if string is long enough
1030 if( 2 > ( pStr
- pStart
) )
1033 if (!rCC
.isLetterNumeric(aText
, pStr
-- - pStart
))
1035 bool bValid
= false, bAlphaFnd
= false;
1036 const sal_Unicode
* pTmpStr
= pStr
;
1039 if( rCC
.isDigit( aText
, pTmpStr
- pStart
) )
1044 else if( rCC
.isLetter( aText
, pTmpStr
- pStart
) )
1054 else if (bAlphaFnd
|| NonFieldWordDelim(*pTmpStr
))
1057 if( pTmpStr
== pStart
)
1064 return; // no valid separator -> no replacement
1067 bool bNumericOnly
= '0' <= *(pStr
+1) && *(pStr
+1) <= '9';
1069 // Search for the beginning of the word
1070 while (!NonFieldWordDelim(*pStr
))
1072 if( bNumericOnly
&& rCC
.isLetter( aText
, pStr
- pStart
) )
1073 bNumericOnly
= false;
1075 if( pStart
== pStr
)
1081 if( bNumericOnly
) // consists of only numbers, then not
1084 if (NonFieldWordDelim(*pStr
))
1089 // check on the basis of the exception list
1092 sWord
= OUString(pStr
, pExceptStt
- pStr
+ 1);
1093 if( FindInCplSttExceptList(eLang
, sWord
) )
1096 // Delete all non alphanumeric. Test the characters at the
1097 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1098 OUString
sTmp( sWord
);
1099 while( !sTmp
.isEmpty() &&
1100 !rCC
.isLetterNumeric( sTmp
, 0 ) )
1101 sTmp
= sTmp
.copy(1);
1103 // Remove all non alphanumeric characters towards the end up until
1105 sal_Int32 nLen
= sTmp
.getLength();
1106 while( nLen
&& !rCC
.isLetterNumeric( sTmp
, nLen
-1 ) )
1108 if( nLen
+ 1 < sTmp
.getLength() )
1109 sTmp
= sTmp
.copy( 0, nLen
+ 1 );
1111 if( !sTmp
.isEmpty() && sTmp
.getLength() != sWord
.getLength() &&
1112 FindInCplSttExceptList(eLang
, sTmp
))
1115 if(FindInCplSttExceptList(eLang
, sWord
, true))
1120 sal_Unicode cSave
= *pWordStt
;
1121 nSttPos
= pWordStt
- rTxt
.getStr();
1122 OUString sChar
= rCC
.titlecase(OUString(cSave
)); //see fdo#56740
1123 bool bRet
= sChar
[0] != cSave
&& rDoc
.ReplaceRange( nSttPos
, 1, sChar
);
1125 // Perhaps someone wants to have the word
1126 if( bRet
&& ACFlags::SaveWordCplSttLst
& nFlags
)
1127 rDoc
.SaveCpltSttWord( ACFlags::CapitalStartSentence
, nSttPos
, sWord
, cSave
);
1130 // Correct accidental use of cAPS LOCK key
1131 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
1132 sal_Int32 nSttPos
, sal_Int32 nEndPos
,
1133 LanguageType eLang
)
1135 if (nEndPos
- nSttPos
< 2)
1136 // string must be at least 2-character long.
1139 CharClass
& rCC
= GetCharClass( eLang
);
1141 // Check the first 2 letters.
1142 if ( !IsLowerLetter(rCC
.getCharacterType(rTxt
, nSttPos
)) )
1145 if ( !IsUpperLetter(rCC
.getCharacterType(rTxt
, nSttPos
+1)) )
1148 OUStringBuffer aConverted
;
1149 aConverted
.append( rCC
.uppercase(OUString(rTxt
[nSttPos
])) );
1150 aConverted
.append( rCC
.lowercase(OUString(rTxt
[nSttPos
+1])) );
1152 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1153 if (FindInWrdSttExceptList(eLang
, rTxt
.copy(nSttPos
, nEndPos
- nSttPos
)))
1156 for( sal_Int32 i
= nSttPos
+2; i
< nEndPos
; ++i
)
1158 if ( IsLowerLetter(rCC
.getCharacterType(rTxt
, i
)) )
1159 // A lowercase letter disqualifies the whole text.
1162 if ( IsUpperLetter(rCC
.getCharacterType(rTxt
, i
)) )
1163 // Another uppercase letter. Convert it.
1164 aConverted
.append( rCC
.lowercase(OUString(rTxt
[i
])) );
1166 // This is not an alphabetic letter. Leave it as-is.
1167 aConverted
.append( rTxt
[i
] );
1170 // Replace the word.
1171 rDoc
.Delete(nSttPos
, nEndPos
);
1172 rDoc
.Insert(nSttPos
, aConverted
.makeStringAndClear());
1178 sal_Unicode
SvxAutoCorrect::GetQuote( sal_Unicode cInsChar
, bool bSttQuote
,
1179 LanguageType eLang
) const
1181 sal_Unicode cRet
= bSttQuote
? ( '\"' == cInsChar
1182 ? GetStartDoubleQuote()
1183 : GetStartSingleQuote() )
1184 : ( '\"' == cInsChar
1185 ? GetEndDoubleQuote()
1186 : GetEndSingleQuote() );
1189 // then through the Language find the right character
1190 if( LANGUAGE_NONE
== eLang
)
1194 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1195 OUString
sRet( bSttQuote
1196 ? ( '\"' == cInsChar
1197 ? rLcl
.getDoubleQuotationMarkStart()
1198 : rLcl
.getQuotationMarkStart() )
1199 : ( '\"' == cInsChar
1200 ? rLcl
.getDoubleQuotationMarkEnd()
1201 : rLcl
.getQuotationMarkEnd() ));
1202 cRet
= !sRet
.isEmpty() ? sRet
[0] : cInsChar
;
1208 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc
& rDoc
, sal_Int32 nInsPos
,
1209 sal_Unicode cInsChar
, bool bSttQuote
,
1210 bool bIns
, LanguageType eLang
, ACQuotes eType
) const
1214 if ( eType
== ACQuotes::DoubleAngleQuote
)
1216 bool bSwiss
= eLang
== LANGUAGE_FRENCH_SWISS
;
1217 // pressing " inside a quotation -> use second level angle quotes
1218 bool bLeftQuote
= '\"' == cInsChar
&&
1219 // start position and Romanian OR
1220 // not start position and Hungarian
1221 bSttQuote
== (eLang
!= LANGUAGE_HUNGARIAN
);
1222 cRet
= ( '<' == cInsChar
|| bLeftQuote
)
1223 ? ( bSwiss
? cLeftSingleAngleQuote
: cLeftDoubleAngleQuote
)
1224 : ( bSwiss
? cRightSingleAngleQuote
: cRightDoubleAngleQuote
);
1226 else if ( eType
== ACQuotes::UseApostrophe
)
1229 cRet
= GetQuote( cInsChar
, bSttQuote
, eLang
);
1231 OUString
sChg( cInsChar
);
1233 rDoc
.Insert( nInsPos
, sChg
);
1235 rDoc
.Replace( nInsPos
, sChg
);
1237 sChg
= OUString(cRet
);
1239 if( eType
== ACQuotes::NonBreakingSpace
)
1241 if( rDoc
.Insert( bSttQuote
? nInsPos
+1 : nInsPos
, OUStringChar(cNonBreakingSpace
) ))
1247 else if( eType
== ACQuotes::DoubleAngleQuote
&& cInsChar
!= '\"' )
1249 rDoc
.Delete( nInsPos
-1, nInsPos
);
1253 rDoc
.Replace( nInsPos
, sChg
);
1255 // i' -> I' in English (last step for the Undo)
1256 if( eType
== ACQuotes::CapitalizeIAm
)
1257 rDoc
.Replace( nInsPos
-1, "I" );
1260 OUString
SvxAutoCorrect::GetQuote( SvxAutoCorrDoc
const & rDoc
, sal_Int32 nInsPos
,
1261 sal_Unicode cInsChar
, bool bSttQuote
)
1263 const LanguageType eLang
= GetDocLanguage( rDoc
, nInsPos
);
1264 sal_Unicode cRet
= GetQuote( cInsChar
, bSttQuote
, eLang
);
1266 OUString
sRet(cRet
);
1268 if( '\"' == cInsChar
)
1270 if (primary(eLang
) == primary(LANGUAGE_FRENCH
) && eLang
!= LANGUAGE_FRENCH_SWISS
)
1281 // search preceding opening quote in the paragraph before the insert position
1282 static bool lcl_HasPrecedingChar( std::u16string_view rTxt
, sal_Int32 nPos
,
1283 const sal_Unicode sPrecedingChar
, const sal_Unicode
* aStopChars
)
1285 sal_Unicode cTmpChar
;
1288 cTmpChar
= rTxt
[ --nPos
];
1289 if ( cTmpChar
== sPrecedingChar
)
1292 for ( const sal_Unicode
* pCh
= aStopChars
; *pCh
; ++pCh
)
1293 if ( cTmpChar
== *pCh
)
1296 } while ( nPos
> 0 );
1301 // WARNING: rText may become invalid, see comment below
1302 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc
& rDoc
, const OUString
& rTxt
,
1303 sal_Int32 nInsPos
, sal_Unicode cChar
,
1304 bool bInsert
, bool& io_bNbspRunNext
, vcl::Window
const * pFrameWin
)
1306 bool bIsNextRun
= io_bNbspRunNext
;
1307 io_bNbspRunNext
= false; // if it was set, then it has to be turned off
1309 do{ // only for middle check loop !!
1312 // Prevent double space
1313 if( nInsPos
&& ' ' == cChar
&&
1314 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace
) &&
1315 ' ' == rTxt
[ nInsPos
- 1 ])
1320 bool bSingle
= '\'' == cChar
;
1321 bool bIsReplaceQuote
=
1322 (IsAutoCorrFlag( ACFlags::ChgQuotes
) && ('\"' == cChar
)) ||
1323 (IsAutoCorrFlag( ACFlags::ChgSglQuotes
) && bSingle
);
1324 if( bIsReplaceQuote
)
1326 bool bSttQuote
= !nInsPos
;
1327 ACQuotes eType
= ACQuotes::NONE
;
1328 const LanguageType eLang
= GetDocLanguage( rDoc
, nInsPos
);
1331 sal_Unicode cPrev
= rTxt
[ nInsPos
-1 ];
1332 bSttQuote
= NonFieldWordDelim(cPrev
) ||
1333 lcl_IsInAsciiArr( "([{", cPrev
) ||
1334 ( cEmDash
== cPrev
) ||
1335 ( cEnDash
== cPrev
);
1336 // tdf#38394 use opening quotation mark << in French l'<<word>>
1337 if ( !bSingle
&& !bSttQuote
&& cPrev
== cApostrophe
&&
1338 primary(eLang
) == primary(LANGUAGE_FRENCH
) &&
1339 ( ( ( nInsPos
== 2 || ( nInsPos
> 2 && IsWordDelim( rTxt
[ nInsPos
-3 ] ) ) ) &&
1340 // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1341 OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt
[ nInsPos
-2 ] ) > -1 ) ||
1342 ( ( nInsPos
== 3 || (nInsPos
> 3 && IsWordDelim( rTxt
[ nInsPos
-4 ] ) ) ) &&
1343 // abbreviated form of que
1344 ( rTxt
[ nInsPos
-2 ] == 'u' || rTxt
[ nInsPos
-2 ] == 'U' ) &&
1345 ( rTxt
[ nInsPos
-3 ] == 'q' || rTxt
[ nInsPos
-3 ] == 'Q' ) ) ) )
1349 // tdf#108423 for capitalization of English i'm
1350 else if ( bSingle
&& ( cPrev
== 'i' ) &&
1351 primary(eLang
) == primary(LANGUAGE_ENGLISH
) &&
1352 ( nInsPos
== 1 || IsWordDelim( rTxt
[ nInsPos
-2 ] ) ) )
1354 eType
= ACQuotes::CapitalizeIAm
;
1356 // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1357 else if ( !bSingle
&& nInsPos
&&
1358 ( ( eLang
== LANGUAGE_HUNGARIAN
&&
1359 lcl_HasPrecedingChar( rTxt
, nInsPos
,
1360 bSttQuote
? aStopDoubleAngleQuoteStart
[0] : aStopDoubleAngleQuoteEnd
[0],
1361 bSttQuote
? aStopDoubleAngleQuoteStart
+ 1 : aStopDoubleAngleQuoteEnd
+ 1 ) ) ||
1364 LANGUAGE_ROMANIAN_MOLDOVA
) &&
1365 lcl_HasPrecedingChar( rTxt
, nInsPos
,
1366 bSttQuote
? aStopDoubleAngleQuoteStart
[0] : aStopDoubleAngleQuoteEndRo
[0],
1367 bSttQuote
? aStopDoubleAngleQuoteStart
+ 1 : aStopDoubleAngleQuoteEndRo
+ 1 ) ) ) )
1369 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1370 // only if the opening double quotation mark is the default one
1371 if ( rLcl
.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart
[0]) )
1372 eType
= ACQuotes::DoubleAngleQuote
;
1374 else if ( bSingle
&& nInsPos
&& !bSttQuote
&&
1375 // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1376 // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1377 // tdf#123786 the same for Russian and Ukrainian
1381 LANGUAGE_GERMAN_SWISS
,
1382 LANGUAGE_GERMAN_AUSTRIAN
,
1383 LANGUAGE_GERMAN_LUXEMBOURG
,
1384 LANGUAGE_GERMAN_LIECHTENSTEIN
,
1387 LANGUAGE_SLOVENIAN
) &&
1388 !lcl_HasPrecedingChar( rTxt
, nInsPos
, aStopSingleQuoteEnd
[0], aStopSingleQuoteEnd
+ 1 ) ) ||
1391 LANGUAGE_UKRAINIAN
) &&
1392 !lcl_HasPrecedingChar( rTxt
, nInsPos
, aStopSingleQuoteEndRuUa
[0], aStopSingleQuoteEndRuUa
+ 1 ) ) ) )
1394 LocaleDataWrapper
& rLcl
= GetLocaleDataWrapper( eLang
);
1395 CharClass
& rCC
= GetCharClass( eLang
);
1396 if ( ( rLcl
.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd
[0]) ||
1397 rLcl
.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa
[0]) ) &&
1398 // use apostrophe only after letters, not after digits or punctuation
1399 rCC
.isLetter(rTxt
, nInsPos
-1) )
1401 eType
= ACQuotes::UseApostrophe
;
1406 if ( eType
== ACQuotes::NONE
&& !bSingle
&&
1407 ( primary(eLang
) == primary(LANGUAGE_FRENCH
) && eLang
!= LANGUAGE_FRENCH_SWISS
) )
1408 eType
= ACQuotes::NonBreakingSpace
;
1410 InsertQuote( rDoc
, nInsPos
, cChar
, bSttQuote
, bInsert
, eLang
, eType
);
1413 // tdf#133524 change "<<" and ">>" to double angle quotation marks
1414 else if ( IsAutoCorrFlag( ACFlags::ChgQuotes
) &&
1415 IsAutoCorrFlag( ACFlags::ChgAngleQuotes
) &&
1416 ('<' == cChar
|| '>' == cChar
) &&
1417 nInsPos
> 0 && cChar
== rTxt
[ nInsPos
-1 ] )
1419 const LanguageType eLang
= GetDocLanguage( rDoc
, nInsPos
);
1421 LANGUAGE_CATALAN
, // primary level
1422 LANGUAGE_CATALAN_VALENCIAN
, // primary level
1423 LANGUAGE_FINNISH
, // alternative primary level
1424 LANGUAGE_FRENCH_SWISS
, // second level
1425 LANGUAGE_GALICIAN
, // primary level
1426 LANGUAGE_HUNGARIAN
, // second level
1427 LANGUAGE_POLISH
, // second level
1428 LANGUAGE_PORTUGUESE
, // primary level
1429 LANGUAGE_PORTUGUESE_BRAZILIAN
, // primary level
1430 LANGUAGE_ROMANIAN
, // second level
1431 LANGUAGE_ROMANIAN_MOLDOVA
, // second level
1432 LANGUAGE_SWEDISH
, // alternative primary level
1433 LANGUAGE_SWEDISH_FINLAND
, // alternative primary level
1434 LANGUAGE_UKRAINIAN
, // primary level
1435 LANGUAGE_USER_ARAGONESE
, // primary level
1436 LANGUAGE_USER_ASTURIAN
) || // primary level
1437 primary(eLang
) == primary(LANGUAGE_GERMAN
) || // alternative primary level
1438 primary(eLang
) == primary(LANGUAGE_SPANISH
) ) // primary level
1440 InsertQuote( rDoc
, nInsPos
, cChar
, false, bInsert
, eLang
, ACQuotes::DoubleAngleQuote
);
1446 rDoc
.Insert( nInsPos
, OUString(cChar
) );
1448 rDoc
.Replace( nInsPos
, OUString(cChar
) );
1450 // Hardspaces autocorrection
1451 if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace
) )
1453 if ( NeedsHardspaceAutocorr( cChar
) &&
1454 FnAddNonBrkSpace( rDoc
, rTxt
, nInsPos
, GetDocLanguage( rDoc
, nInsPos
), io_bNbspRunNext
) )
1458 else if ( bIsNextRun
&& !IsAutoCorrectChar( cChar
) )
1460 // Remove the NBSP if it wasn't an autocorrection
1461 if ( nInsPos
!= 0 && NeedsHardspaceAutocorr( rTxt
[ nInsPos
- 1 ] ) &&
1462 cChar
!= ' ' && cChar
!= '\t' && cChar
!= cNonBreakingSpace
)
1464 // Look for the last HARD_SPACE
1465 sal_Int32 nPos
= nInsPos
- 1;
1466 bool bContinue
= true;
1469 const sal_Unicode cTmpChar
= rTxt
[ nPos
];
1470 if ( cTmpChar
== cNonBreakingSpace
)
1472 rDoc
.Delete( nPos
, nPos
+ 1 );
1475 else if ( !NeedsHardspaceAutocorr( cTmpChar
) || nPos
== 0 )
1487 sal_Int32 nPos
= nInsPos
- 1;
1489 if( IsWordDelim( rTxt
[ nPos
]))
1492 // Set bold or underline automatically?
1493 if (('*' == cChar
|| '_' == cChar
|| '/' == cChar
|| '-' == cChar
) && (nPos
+1 < rTxt
.getLength()))
1495 if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl
) )
1497 FnChgWeightUnderl( rDoc
, rTxt
, nPos
+1 );
1502 while( nPos
&& !IsWordDelim( rTxt
[ --nPos
]))
1505 // Found a Paragraph-start or a Blank, search for the word shortcut in
1507 sal_Int32 nCapLttrPos
= nPos
+1; // on the 1st Character
1508 if( !nPos
&& !IsWordDelim( rTxt
[ 0 ]))
1509 --nCapLttrPos
; // begin of paragraph and no blank
1511 const LanguageType eLang
= GetDocLanguage( rDoc
, nCapLttrPos
);
1512 CharClass
& rCC
= GetCharClass( eLang
);
1514 // no symbol characters
1515 if( lcl_IsSymbolChar( rCC
, rTxt
, nCapLttrPos
, nInsPos
))
1518 if( IsAutoCorrFlag( ACFlags::Autocorrect
) &&
1519 // tdf#134940 fix regression of arrow "-->" resulted by premature
1520 // replacement of "--" since '>' was added to IsAutoCorrectChar()
1523 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1524 // and becomes INVALID if ChgAutoCorrWord returns true!
1525 // => use aPara/pPara to create a valid copy of the string!
1527 OUString
* pPara
= IsAutoCorrFlag(ACFlags::CapitalStartSentence
) ? &aPara
: nullptr;
1529 bool bChgWord
= rDoc
.ChgAutoCorrWord( nCapLttrPos
, nInsPos
,
1533 sal_Int32 nCapLttrPos1
= nCapLttrPos
, nInsPos1
= nInsPos
;
1534 while( nCapLttrPos1
< nInsPos
&&
1535 lcl_IsInAsciiArr( sImplSttSkipChars
, rTxt
[ nCapLttrPos1
] )
1538 while( nCapLttrPos1
< nInsPos1
&& nInsPos1
&&
1539 lcl_IsInAsciiArr( sImplEndSkipChars
, rTxt
[ nInsPos1
-1 ] )
1543 if( (nCapLttrPos1
!= nCapLttrPos
|| nInsPos1
!= nInsPos
) &&
1544 nCapLttrPos1
< nInsPos1
&&
1545 rDoc
.ChgAutoCorrWord( nCapLttrPos1
, nInsPos1
, *this, pPara
))
1548 nCapLttrPos
= nCapLttrPos1
;
1554 if( !aPara
.isEmpty() )
1556 sal_Int32 nEnd
= nCapLttrPos
;
1557 while( nEnd
< aPara
.getLength() &&
1558 !IsWordDelim( aPara
[ nEnd
]))
1561 // Capital letter at beginning of paragraph?
1562 if( IsAutoCorrFlag( ACFlags::CapitalStartSentence
) )
1564 FnCapitalStartSentence( rDoc
, aPara
, false,
1565 nCapLttrPos
, nEnd
, eLang
);
1568 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash
) )
1570 FnChgToEnEmDash( rDoc
, aPara
, nCapLttrPos
, nEnd
, eLang
);
1577 if( IsAutoCorrFlag( ACFlags::TransliterateRTL
) && GetDocLanguage( rDoc
, nInsPos
) == LANGUAGE_HUNGARIAN
)
1579 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1580 // and becomes INVALID if TransliterateRTLWord returns true!
1581 if ( rDoc
.TransliterateRTLWord( nCapLttrPos
, nInsPos
) )
1585 if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber
) &&
1586 (nInsPos
>= 2 ) && // fdo#69762 avoid autocorrect for 2e-3
1587 ( '-' != cChar
|| 'E' != rtl::toAsciiUpperCase(rTxt
[nInsPos
-1]) || '0' > rTxt
[nInsPos
-2] || '9' < rTxt
[nInsPos
-2] ) &&
1588 FnChgOrdinalNumber( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) ) ||
1589 ( IsAutoCorrFlag( ACFlags::SetINetAttr
) &&
1590 ( ' ' == cChar
|| '\t' == cChar
|| 0x0a == cChar
|| !cChar
) &&
1591 FnSetINetAttr( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) ) )
1595 bool bLockKeyOn
= pFrameWin
&& (pFrameWin
->GetIndicatorState() & KeyIndicatorState::CAPSLOCK
);
1596 bool bUnsupported
= lcl_IsUnsupportedUnicodeChar( rCC
, rTxt
, nCapLttrPos
, nInsPos
);
1598 if ( bLockKeyOn
&& IsAutoCorrFlag( ACFlags::CorrectCapsLock
) &&
1599 FnCorrectCapsLock( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
) )
1601 // Correct accidental use of cAPS LOCK key (do this only when
1602 // the caps or shift lock key is pressed). Turn off the caps
1604 pFrameWin
->SimulateKeyPress( KEY_CAPSLOCK
);
1607 // Capital letter at beginning of paragraph ?
1608 if( !bUnsupported
&&
1609 IsAutoCorrFlag( ACFlags::CapitalStartSentence
) )
1611 FnCapitalStartSentence( rDoc
, rTxt
, true, nCapLttrPos
, nInsPos
, eLang
);
1614 // Two capital letters at beginning of word ??
1615 if( !bUnsupported
&&
1616 IsAutoCorrFlag( ACFlags::CapitalStartWord
) )
1618 FnCapitalStartWord( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
);
1621 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash
) )
1623 FnChgToEnEmDash( rDoc
, rTxt
, nCapLttrPos
, nInsPos
, eLang
);
1630 SvxAutoCorrectLanguageLists
& SvxAutoCorrect::GetLanguageList_(
1631 LanguageType eLang
)
1633 LanguageTag
aLanguageTag( eLang
);
1634 if (m_aLangTable
.find(aLanguageTag
) == m_aLangTable
.end())
1635 (void)CreateLanguageFile(aLanguageTag
);
1636 return *(m_aLangTable
.find(aLanguageTag
)->second
);
1639 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang
)
1641 auto const iter
= m_aLangTable
.find(LanguageTag(eLang
));
1642 if (iter
!= m_aLangTable
.end() && iter
->second
)
1643 iter
->second
->SaveCplSttExceptList();
1646 SAL_WARN("editeng", "Save an empty list? ");
1650 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang
)
1652 auto const iter
= m_aLangTable
.find(LanguageTag(eLang
));
1653 if (iter
!= m_aLangTable
.end() && iter
->second
)
1654 iter
->second
->SaveWrdSttExceptList();
1657 SAL_WARN("editeng", "Save an empty list? ");
1661 // Adds a single word. The list will immediately be written to the file!
1662 bool SvxAutoCorrect::AddCplSttException( const OUString
& rNew
,
1663 LanguageType eLang
)
1665 SvxAutoCorrectLanguageLists
* pLists
= nullptr;
1666 // either the right language is present or it will be this in the general list
1667 auto iter
= m_aLangTable
.find(LanguageTag(eLang
));
1668 if (iter
!= m_aLangTable
.end())
1669 pLists
= iter
->second
.get();
1672 LanguageTag
aLangTagUndetermined( LANGUAGE_UNDETERMINED
);
1673 iter
= m_aLangTable
.find(aLangTagUndetermined
);
1674 if (iter
!= m_aLangTable
.end())
1675 pLists
= iter
->second
.get();
1676 else if(CreateLanguageFile(aLangTagUndetermined
))
1677 pLists
= m_aLangTable
.find(aLangTagUndetermined
)->second
.get();
1679 OSL_ENSURE(pLists
, "No auto correction data");
1680 return pLists
&& pLists
->AddToCplSttExceptList(rNew
);
1683 // Adds a single word. The list will immediately be written to the file!
1684 bool SvxAutoCorrect::AddWrtSttException( const OUString
& rNew
,
1685 LanguageType eLang
)
1687 SvxAutoCorrectLanguageLists
* pLists
= nullptr;
1688 //either the right language is present or it is set in the general list
1689 auto iter
= m_aLangTable
.find(LanguageTag(eLang
));
1690 if (iter
!= m_aLangTable
.end())
1691 pLists
= iter
->second
.get();
1694 LanguageTag
aLangTagUndetermined( LANGUAGE_UNDETERMINED
);
1695 iter
= m_aLangTable
.find(aLangTagUndetermined
);
1696 if (iter
!= m_aLangTable
.end())
1697 pLists
= iter
->second
.get();
1698 else if(CreateLanguageFile(aLangTagUndetermined
))
1699 pLists
= m_aLangTable
.find(aLangTagUndetermined
)->second
.get();
1701 OSL_ENSURE(pLists
, "No auto correction file!");
1702 return pLists
&& pLists
->AddToWrdSttExceptList(rNew
);
1705 OUString
SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc
const& rDoc
, const OUString
& rTxt
,
1712 sal_Int32 nEnd
= nPos
;
1714 // it must be followed by a blank or tab!
1715 if( ( nPos
< rTxt
.getLength() &&
1716 !IsWordDelim( rTxt
[ nPos
])) ||
1717 IsWordDelim( rTxt
[ --nPos
]))
1720 while( nPos
&& !IsWordDelim( rTxt
[ --nPos
]))
1723 // Found a Paragraph-start or a Blank, search for the word shortcut in
1725 sal_Int32 nCapLttrPos
= nPos
+1; // on the 1st Character
1726 if( !nPos
&& !IsWordDelim( rTxt
[ 0 ]))
1727 --nCapLttrPos
; // Beginning of paragraph and no Blank!
1729 while( lcl_IsInAsciiArr( sImplSttSkipChars
, rTxt
[ nCapLttrPos
]) )
1730 if( ++nCapLttrPos
>= nEnd
)
1733 if( 3 > nEnd
- nCapLttrPos
)
1736 const LanguageType eLang
= GetDocLanguage( rDoc
, nCapLttrPos
);
1738 CharClass
& rCC
= GetCharClass(eLang
);
1740 if( lcl_IsSymbolChar( rCC
, rTxt
, nCapLttrPos
, nEnd
))
1743 sRet
= rTxt
.copy( nCapLttrPos
, nEnd
- nCapLttrPos
);
1748 std::vector
<OUString
> SvxAutoCorrect::GetChunkForAutoText(const OUString
& rTxt
,
1749 const sal_Int32 nPos
)
1751 constexpr sal_Int32 nMinLen
= 3;
1752 constexpr sal_Int32 nMaxLen
= 9;
1753 std::vector
<OUString
> aRes
;
1754 if (nPos
>= nMinLen
)
1756 sal_Int32 nBegin
= std::max
<sal_Int32
>(nPos
- nMaxLen
, 0);
1757 // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1758 if (nBegin
> 0 && !IsWordDelim(rTxt
[nBegin
-1]))
1760 while (nBegin
+ nMinLen
<= nPos
&& !IsWordDelim(rTxt
[nBegin
]))
1763 if (nBegin
+ nMinLen
<= nPos
)
1765 OUString sRes
= rTxt
.copy(nBegin
, nPos
- nBegin
);
1766 aRes
.push_back(sRes
);
1767 bool bLastStartedWithDelim
= IsWordDelim(sRes
[0]);
1768 for (sal_Int32 i
= 1; i
<= sRes
.getLength() - nMinLen
; ++i
)
1770 bool bAdd
= bLastStartedWithDelim
;
1771 bLastStartedWithDelim
= IsWordDelim(sRes
[i
]);
1772 bAdd
= bAdd
|| bLastStartedWithDelim
;
1774 aRes
.push_back(sRes
.copy(i
));
1781 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag
& rLanguageTag
, bool bNewFile
)
1783 OSL_ENSURE(m_aLangTable
.find(rLanguageTag
) == m_aLangTable
.end(), "Language already exists ");
1785 OUString
sUserDirFile( GetAutoCorrFileName( rLanguageTag
, true ));
1786 OUString
sShareDirFile( sUserDirFile
);
1788 SvxAutoCorrectLanguageLists
* pLists
= nullptr;
1790 tools::Time
nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM
), nLastCheckTime( tools::Time::EMPTY
);
1792 auto nFndPos
= aLastFileTable
.find(rLanguageTag
);
1793 if(nFndPos
!= aLastFileTable
.end() &&
1794 (nLastCheckTime
.SetTime(nFndPos
->second
), nLastCheckTime
< nAktTime
) &&
1795 nAktTime
- nLastCheckTime
< nMinTime
)
1797 // no need to test the file, because the last check is not older then
1801 sShareDirFile
= sUserDirFile
;
1802 pLists
= new SvxAutoCorrectLanguageLists( *this, sShareDirFile
, sUserDirFile
);
1803 LanguageTag
aTmp(rLanguageTag
); // this insert() needs a non-const reference
1804 m_aLangTable
.insert(std::make_pair(aTmp
, std::unique_ptr
<SvxAutoCorrectLanguageLists
>(pLists
)));
1805 aLastFileTable
.erase(nFndPos
);
1809 ( FStatHelper::IsDocument( sUserDirFile
) ||
1810 FStatHelper::IsDocument( sShareDirFile
=
1811 GetAutoCorrFileName( rLanguageTag
) ) ||
1812 FStatHelper::IsDocument( sShareDirFile
=
1813 GetAutoCorrFileName( rLanguageTag
, false, false, true) )
1815 ( sShareDirFile
= sUserDirFile
, bNewFile
)
1818 pLists
= new SvxAutoCorrectLanguageLists( *this, sShareDirFile
, sUserDirFile
);
1819 LanguageTag
aTmp(rLanguageTag
); // this insert() needs a non-const reference
1820 m_aLangTable
.insert(std::make_pair(aTmp
, std::unique_ptr
<SvxAutoCorrectLanguageLists
>(pLists
)));
1821 if (nFndPos
!= aLastFileTable
.end())
1822 aLastFileTable
.erase(nFndPos
);
1824 else if( !bNewFile
)
1826 aLastFileTable
[rLanguageTag
] = nAktTime
.GetTime();
1828 return pLists
!= nullptr;
1831 bool SvxAutoCorrect::PutText( const OUString
& rShort
, const OUString
& rLong
,
1832 LanguageType eLang
)
1834 LanguageTag
aLanguageTag( eLang
);
1835 auto const iter
= m_aLangTable
.find(aLanguageTag
);
1836 if (iter
!= m_aLangTable
.end())
1837 return iter
->second
->PutText(rShort
, rLong
);
1838 if(CreateLanguageFile(aLanguageTag
))
1839 return m_aLangTable
.find(aLanguageTag
)->second
->PutText(rShort
, rLong
);
1843 void SvxAutoCorrect::MakeCombinedChanges( std::vector
<SvxAutocorrWord
>& aNewEntries
,
1844 std::vector
<SvxAutocorrWord
>& aDeleteEntries
,
1845 LanguageType eLang
)
1847 LanguageTag
aLanguageTag( eLang
);
1848 auto const iter
= m_aLangTable
.find(aLanguageTag
);
1849 if (iter
!= m_aLangTable
.end())
1851 iter
->second
->MakeCombinedChanges( aNewEntries
, aDeleteEntries
);
1853 else if(CreateLanguageFile( aLanguageTag
))
1855 m_aLangTable
.find( aLanguageTag
)->second
->MakeCombinedChanges( aNewEntries
, aDeleteEntries
);
1859 // - return the replacement text (only for SWG-Format, all other
1860 // can be taken from the word list!)
1861 bool SvxAutoCorrect::GetLongText( const OUString
&, OUString
& )
1866 void SvxAutoCorrect::refreshBlockList( const uno::Reference
< embed::XStorage
>& )
1870 // Text with attribution (only the SWG - SWG format!)
1871 bool SvxAutoCorrect::PutText( const css::uno::Reference
< css::embed::XStorage
>&,
1872 const OUString
&, const OUString
&, SfxObjectShell
&, OUString
& )
1877 OUString
EncryptBlockName_Imp(const OUString
& rName
)
1879 OUStringBuffer aName
;
1880 aName
.append('#').append(rName
);
1881 for (sal_Int32 nLen
= rName
.getLength(), nPos
= 1; nPos
< nLen
; ++nPos
)
1883 if (lcl_IsInAsciiArr( "!/:.\\", aName
[nPos
]))
1884 aName
[nPos
] &= 0x0f;
1886 return aName
.makeStringAndClear();
1889 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1890 static void GeneratePackageName ( const OUString
& rShort
, OUString
& rPackageName
)
1892 OString
sByte(OUStringToOString(rShort
, RTL_TEXTENCODING_UTF7
));
1893 OUStringBuffer
aBuf(OStringToOUString(sByte
, RTL_TEXTENCODING_ASCII_US
));
1895 for (sal_Int32 nPos
= 0; nPos
< aBuf
.getLength(); ++nPos
)
1911 rPackageName
= aBuf
.makeStringAndClear();
1914 static const SvxAutocorrWord
* lcl_SearchWordsInList(
1915 SvxAutoCorrectLanguageLists
* pList
, const OUString
& rTxt
,
1916 sal_Int32
& rStt
, sal_Int32 nEndPos
)
1918 const SvxAutocorrWordList
* pAutoCorrWordList
= pList
->GetAutocorrWordList();
1919 return pAutoCorrWordList
->SearchWordsInList( rTxt
, rStt
, nEndPos
);
1922 // the search for the words in the substitution table
1923 const SvxAutocorrWord
* SvxAutoCorrect::SearchWordsInList(
1924 const OUString
& rTxt
, sal_Int32
& rStt
, sal_Int32 nEndPos
,
1925 SvxAutoCorrDoc
&, LanguageTag
& rLang
)
1927 const SvxAutocorrWord
* pRet
= nullptr;
1928 LanguageTag
aLanguageTag( rLang
);
1929 if( aLanguageTag
.isSystemLocale() )
1930 aLanguageTag
.reset( MsLangId::getSystemLanguage());
1932 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
1935 // First search for eLang, then US-English -> English
1936 // and last in LANGUAGE_UNDETERMINED
1937 if (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() || CreateLanguageFile(aLanguageTag
, false))
1939 //the language is available - so bring it on
1940 std::unique_ptr
<SvxAutoCorrectLanguageLists
> const& pList
= m_aLangTable
.find(aLanguageTag
)->second
;
1941 pRet
= lcl_SearchWordsInList( pList
.get(), rTxt
, rStt
, nEndPos
);
1944 rLang
= aLanguageTag
;
1951 // If it still could not be found here, then keep on searching
1952 LanguageType eLang
= aLanguageTag
.getLanguageType();
1953 // the primary language for example EN
1954 aLanguageTag
.reset(aLanguageTag
.getLanguage());
1955 LanguageType nTmpKey
= aLanguageTag
.getLanguageType(false);
1956 if (nTmpKey
!= eLang
&& nTmpKey
!= LANGUAGE_UNDETERMINED
&&
1957 (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() ||
1958 CreateLanguageFile(aLanguageTag
, false)))
1960 //the language is available - so bring it on
1961 std::unique_ptr
<SvxAutoCorrectLanguageLists
> const& pList
= m_aLangTable
.find(aLanguageTag
)->second
;
1962 pRet
= lcl_SearchWordsInList( pList
.get(), rTxt
, rStt
, nEndPos
);
1965 rLang
= aLanguageTag
;
1970 if (m_aLangTable
.find(aLanguageTag
.reset(LANGUAGE_UNDETERMINED
)) != m_aLangTable
.end() ||
1971 CreateLanguageFile(aLanguageTag
, false))
1973 //the language is available - so bring it on
1974 std::unique_ptr
<SvxAutoCorrectLanguageLists
> const& pList
= m_aLangTable
.find(aLanguageTag
)->second
;
1975 pRet
= lcl_SearchWordsInList( pList
.get(), rTxt
, rStt
, nEndPos
);
1978 rLang
= aLanguageTag
;
1985 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang
,
1986 const OUString
& sWord
)
1988 LanguageTag
aLanguageTag( eLang
);
1990 /* TODO-BCP47: again horrible ugliness */
1992 // First search for eLang, then primary language of eLang
1993 // and last in LANGUAGE_UNDETERMINED
1995 if (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() || CreateLanguageFile(aLanguageTag
, false))
1997 //the language is available - so bring it on
1998 auto const& pList
= m_aLangTable
.find(aLanguageTag
)->second
;
1999 if(pList
->GetWrdSttExceptList()->find(sWord
) != pList
->GetWrdSttExceptList()->end() )
2003 // If it still could not be found here, then keep on searching
2004 // the primary language for example EN
2005 aLanguageTag
.reset(aLanguageTag
.getLanguage());
2006 LanguageType nTmpKey
= aLanguageTag
.getLanguageType(false);
2007 if (nTmpKey
!= eLang
&& nTmpKey
!= LANGUAGE_UNDETERMINED
&&
2008 (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() ||
2009 CreateLanguageFile(aLanguageTag
, false)))
2011 //the language is available - so bring it on
2012 auto const& pList
= m_aLangTable
.find(aLanguageTag
)->second
;
2013 if(pList
->GetWrdSttExceptList()->find(sWord
) != pList
->GetWrdSttExceptList()->end() )
2017 if (m_aLangTable
.find(aLanguageTag
.reset(LANGUAGE_UNDETERMINED
)) != m_aLangTable
.end() ||
2018 CreateLanguageFile(aLanguageTag
, false))
2020 //the language is available - so bring it on
2021 auto const& pList
= m_aLangTable
.find(aLanguageTag
)->second
;
2022 if(pList
->GetWrdSttExceptList()->find(sWord
) != pList
->GetWrdSttExceptList()->end() )
2028 static bool lcl_FindAbbreviation(const SvStringsISortDtor
* pList
, const OUString
& sWord
)
2030 SvStringsISortDtor::const_iterator it
= pList
->find( "~" );
2031 SvStringsISortDtor::size_type nPos
= it
- pList
->begin();
2032 if( nPos
< pList
->size() )
2034 OUString
sLowerWord(sWord
.toAsciiLowerCase());
2036 for( SvStringsISortDtor::size_type n
= nPos
; n
< pList
->size(); ++n
)
2038 sAbr
= (*pList
)[ n
];
2041 // ~ and ~. are not allowed!
2042 if( 2 < sAbr
.getLength() && sAbr
.getLength() - 1 <= sWord
.getLength() )
2044 OUString
sLowerAbk(sAbr
.toAsciiLowerCase());
2045 for (sal_Int32 i
= sLowerAbk
.getLength(), ii
= sLowerWord
.getLength(); i
;)
2047 if( !--i
) // agrees
2050 if( sLowerAbk
[i
] != sLowerWord
[--ii
])
2056 OSL_ENSURE( !(nPos
&& '~' == (*pList
)[ --nPos
][ 0 ] ),
2057 "Wrongly sorted exception list?" );
2061 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang
,
2062 const OUString
& sWord
, bool bAbbreviation
)
2064 LanguageTag
aLanguageTag( eLang
);
2066 /* TODO-BCP47: did I mention terrible horrible ugliness? */
2068 // First search for eLang, then primary language of eLang
2069 // and last in LANGUAGE_UNDETERMINED
2071 if (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() || CreateLanguageFile(aLanguageTag
, false))
2073 //the language is available - so bring it on
2074 const SvStringsISortDtor
* pList
= m_aLangTable
.find(aLanguageTag
)->second
->GetCplSttExceptList();
2075 if(bAbbreviation
? lcl_FindAbbreviation(pList
, sWord
) : pList
->find(sWord
) != pList
->end() )
2079 // If it still could not be found here, then keep on searching
2080 // the primary language for example EN
2081 aLanguageTag
.reset(aLanguageTag
.getLanguage());
2082 LanguageType nTmpKey
= aLanguageTag
.getLanguageType(false);
2083 if (nTmpKey
!= eLang
&& nTmpKey
!= LANGUAGE_UNDETERMINED
&&
2084 (m_aLangTable
.find(aLanguageTag
) != m_aLangTable
.end() ||
2085 CreateLanguageFile(aLanguageTag
, false)))
2087 //the language is available - so bring it on
2088 const SvStringsISortDtor
* pList
= m_aLangTable
.find(aLanguageTag
)->second
->GetCplSttExceptList();
2089 if(bAbbreviation
? lcl_FindAbbreviation(pList
, sWord
) : pList
->find(sWord
) != pList
->end() )
2093 if (m_aLangTable
.find(aLanguageTag
.reset(LANGUAGE_UNDETERMINED
)) != m_aLangTable
.end() ||
2094 CreateLanguageFile(aLanguageTag
, false))
2096 //the language is available - so bring it on
2097 const SvStringsISortDtor
* pList
= m_aLangTable
.find(aLanguageTag
)->second
->GetCplSttExceptList();
2098 if(bAbbreviation
? lcl_FindAbbreviation(pList
, sWord
) : pList
->find(sWord
) != pList
->end() )
2104 OUString
SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag
& rLanguageTag
,
2105 bool bNewFile
, bool bTst
, bool bUnlocalized
) const
2107 OUString sRet
, sExt( rLanguageTag
.getBcp47() );
2110 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2111 std::vector
< OUString
> vecFallBackStrings
= rLanguageTag
.getFallbackStrings(false);
2112 if (!vecFallBackStrings
.empty())
2113 sExt
= vecFallBackStrings
[0];
2116 sExt
= "_" + sExt
+ ".dat";
2118 sRet
= sUserAutoCorrFile
+ sExt
;
2120 sRet
= sShareAutoCorrFile
+ sExt
;
2123 // test first in the user directory - if not exist, then
2124 sRet
= sUserAutoCorrFile
+ sExt
;
2125 if( !FStatHelper::IsDocument( sRet
))
2126 sRet
= sShareAutoCorrFile
+ sExt
;
2131 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2132 SvxAutoCorrect
& rParent
,
2133 const OUString
& rShareAutoCorrectFile
,
2134 const OUString
& rUserAutoCorrectFile
)
2135 : sShareAutoCorrFile( rShareAutoCorrectFile
),
2136 sUserAutoCorrFile( rUserAutoCorrectFile
),
2137 aModifiedDate( Date::EMPTY
),
2138 aModifiedTime( tools::Time::EMPTY
),
2139 aLastCheckTime( tools::Time::EMPTY
),
2140 rAutoCorrect(rParent
),
2141 nFlags(ACFlags::NONE
)
2145 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2149 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2151 // Access the file system only every 2 minutes to check the date stamp
2154 tools::Time
nMinTime( 0, 2 );
2155 tools::Time
nAktTime( tools::Time::SYSTEM
);
2156 if( aLastCheckTime
<= nAktTime
) // overflow?
2158 nAktTime
-= aLastCheckTime
;
2159 if( nAktTime
> nMinTime
) // min time past
2161 Date
aTstDate( Date::EMPTY
); tools::Time
aTstTime( tools::Time::EMPTY
);
2162 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile
,
2163 &aTstDate
, &aTstTime
) &&
2164 ( aModifiedDate
!= aTstDate
|| aModifiedTime
!= aTstTime
))
2167 // then remove all the lists fast!
2168 if( (ACFlags::CplSttLstLoad
& nFlags
) && pCplStt_ExcptLst
)
2170 pCplStt_ExcptLst
.reset();
2172 if( (ACFlags::WrdSttLstLoad
& nFlags
) && pWrdStt_ExcptLst
)
2174 pWrdStt_ExcptLst
.reset();
2176 if( (ACFlags::ChgWordLstLoad
& nFlags
) && pAutocorr_List
)
2178 pAutocorr_List
.reset();
2180 nFlags
&= ~ACFlags(ACFlags::CplSttLstLoad
| ACFlags::WrdSttLstLoad
| ACFlags::ChgWordLstLoad
);
2182 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2187 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2188 std::unique_ptr
<SvStringsISortDtor
>& rpLst
,
2189 const char* pStrmName
,
2190 tools::SvRef
<SotStorage
>& rStg
)
2195 rpLst
.reset( new SvStringsISortDtor
);
2198 const OUString
sStrmName( pStrmName
, strlen(pStrmName
), RTL_TEXTENCODING_MS_1252
);
2200 if( rStg
.is() && rStg
->IsStream( sStrmName
) )
2202 tools::SvRef
<SotStorageStream
> xStrm
= rStg
->OpenSotStream( sStrmName
,
2203 ( StreamMode::READ
| StreamMode::SHARE_DENYWRITE
| StreamMode::NOCREATE
) );
2204 if( ERRCODE_NONE
!= xStrm
->GetError())
2208 RemoveStream_Imp( sStrmName
);
2212 uno::Reference
< uno::XComponentContext
> xContext
=
2213 comphelper::getProcessComponentContext();
2215 xml::sax::InputSource aParserInput
;
2216 aParserInput
.sSystemId
= sStrmName
;
2219 xStrm
->SetBufferSize( 8 * 1024 );
2220 aParserInput
.aInputStream
= new utl::OInputStreamWrapper( *xStrm
);
2223 uno::Reference
< xml::sax::XFastDocumentHandler
> xFilter
= new SvXMLExceptionListImport ( xContext
, *rpLst
);
2225 // connect parser and filter
2226 uno::Reference
< xml::sax::XFastParser
> xParser
= xml::sax::FastParser::create( xContext
);
2227 uno::Reference
<xml::sax::XFastTokenHandler
> xTokenHandler
= new SvXMLAutoCorrectTokenHandler
;
2228 xParser
->setFastDocumentHandler( xFilter
);
2229 xParser
->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE
);
2230 xParser
->setTokenHandler( xTokenHandler
);
2235 xParser
->parseStream( aParserInput
);
2237 catch( const xml::sax::SAXParseException
& )
2241 catch( const xml::sax::SAXException
& )
2245 catch( const io::IOException
& )
2253 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile
,
2254 &aModifiedDate
, &aModifiedTime
);
2255 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2260 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2261 const SvStringsISortDtor
& rLst
,
2262 const char* pStrmName
,
2263 tools::SvRef
<SotStorage
> const &rStg
,
2269 OUString
sStrmName( pStrmName
, strlen(pStrmName
), RTL_TEXTENCODING_MS_1252
);
2272 rStg
->Remove( sStrmName
);
2277 tools::SvRef
<SotStorageStream
> xStrm
= rStg
->OpenSotStream( sStrmName
,
2278 ( StreamMode::READ
| StreamMode::WRITE
| StreamMode::SHARE_DENYWRITE
) );
2281 xStrm
->SetSize( 0 );
2282 xStrm
->SetBufferSize( 8192 );
2283 xStrm
->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2286 uno::Reference
< uno::XComponentContext
> xContext
=
2287 comphelper::getProcessComponentContext();
2289 uno::Reference
< xml::sax::XWriter
> xWriter
= xml::sax::Writer::create(xContext
);
2290 uno::Reference
< io::XOutputStream
> xOut
= new utl::OOutputStreamWrapper( *xStrm
);
2291 xWriter
->setOutputStream(xOut
);
2293 uno::Reference
< xml::sax::XDocumentHandler
> xHandler(xWriter
, UNO_QUERY_THROW
);
2294 rtl::Reference
< SvXMLExceptionListExport
> xExp( new SvXMLExceptionListExport( xContext
, rLst
, sStrmName
, xHandler
) );
2296 xExp
->exportDoc( XML_BLOCK_LIST
);
2299 if( xStrm
->GetError() == ERRCODE_NONE
)
2305 if( ERRCODE_NONE
!= rStg
->GetError() )
2307 rStg
->Remove( sStrmName
);
2316 SvxAutocorrWordList
* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2318 if( pAutocorr_List
)
2319 pAutocorr_List
->DeleteAndDestroyAll();
2321 pAutocorr_List
.reset( new SvxAutocorrWordList() );
2325 uno::Reference
< embed::XStorage
> xStg
= comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile
, embed::ElementModes::READ
);
2326 uno::Reference
< io::XStream
> xStrm
= xStg
->openStreamElement( pXMLImplAutocorr_ListStr
, embed::ElementModes::READ
);
2327 uno::Reference
< uno::XComponentContext
> xContext
= comphelper::getProcessComponentContext();
2329 xml::sax::InputSource aParserInput
;
2330 aParserInput
.sSystemId
= pXMLImplAutocorr_ListStr
;
2331 aParserInput
.aInputStream
= xStrm
->getInputStream();
2334 uno::Reference
< xml::sax::XFastParser
> xParser
= xml::sax::FastParser::create(xContext
);
2335 SAL_INFO("editeng", "AutoCorrect Import" );
2336 uno::Reference
< xml::sax::XFastDocumentHandler
> xFilter
= new SvXMLAutoCorrectImport( xContext
, pAutocorr_List
.get(), rAutoCorrect
, xStg
);
2337 uno::Reference
<xml::sax::XFastTokenHandler
> xTokenHandler
= new SvXMLAutoCorrectTokenHandler
;
2339 // connect parser and filter
2340 xParser
->setFastDocumentHandler( xFilter
);
2341 xParser
->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE
);
2342 xParser
->setTokenHandler(xTokenHandler
);
2345 xParser
->parseStream( aParserInput
);
2347 catch ( const uno::Exception
& )
2349 TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile
);
2353 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile
,
2354 &aModifiedDate
, &aModifiedTime
);
2355 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2357 return pAutocorr_List
.get();
2360 const SvxAutocorrWordList
* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2362 if( !( ACFlags::ChgWordLstLoad
& nFlags
) || IsFileChanged_Imp() )
2364 LoadAutocorrWordList();
2365 if( !pAutocorr_List
)
2367 OSL_ENSURE( false, "No valid list" );
2368 pAutocorr_List
.reset( new SvxAutocorrWordList() );
2370 nFlags
|= ACFlags::ChgWordLstLoad
;
2372 return pAutocorr_List
.get();
2375 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2377 if( !( ACFlags::CplSttLstLoad
& nFlags
) || IsFileChanged_Imp() )
2379 LoadCplSttExceptList();
2380 if( !pCplStt_ExcptLst
)
2382 OSL_ENSURE( false, "No valid list" );
2383 pCplStt_ExcptLst
.reset( new SvStringsISortDtor
);
2385 nFlags
|= ACFlags::CplSttLstLoad
;
2387 return pCplStt_ExcptLst
.get();
2390 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString
& rNew
)
2393 if( !rNew
.isEmpty() && GetCplSttExceptList()->insert( rNew
).second
)
2395 MakeUserStorage_Impl();
2396 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2398 SaveExceptList_Imp( *pCplStt_ExcptLst
, pXMLImplCplStt_ExcptLstStr
, xStg
);
2402 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2403 &aModifiedDate
, &aModifiedTime
);
2404 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2410 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString
& rNew
)
2413 SvStringsISortDtor
* pExceptList
= LoadWrdSttExceptList();
2414 if( !rNew
.isEmpty() && pExceptList
&& pExceptList
->insert( rNew
).second
)
2416 MakeUserStorage_Impl();
2417 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2419 SaveExceptList_Imp( *pWrdStt_ExcptLst
, pXMLImplWrdStt_ExcptLstStr
, xStg
);
2423 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2424 &aModifiedDate
, &aModifiedTime
);
2425 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2431 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2435 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sShareAutoCorrFile
, StreamMode::READ
| StreamMode::SHARE_DENYNONE
);
2436 if( xStg
.is() && xStg
->IsContained( pXMLImplCplStt_ExcptLstStr
) )
2437 LoadXMLExceptList_Imp( pCplStt_ExcptLst
, pXMLImplCplStt_ExcptLstStr
, xStg
);
2439 catch (const css::ucb::ContentCreationException
&)
2442 return pCplStt_ExcptLst
.get();
2445 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2447 MakeUserStorage_Impl();
2448 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2450 SaveExceptList_Imp( *pCplStt_ExcptLst
, pXMLImplCplStt_ExcptLstStr
, xStg
);
2455 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2456 &aModifiedDate
, &aModifiedTime
);
2457 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2460 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList()
2464 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sShareAutoCorrFile
, StreamMode::READ
| StreamMode::SHARE_DENYNONE
);
2465 if( xStg
.is() && xStg
->IsContained( pXMLImplWrdStt_ExcptLstStr
) )
2466 LoadXMLExceptList_Imp( pWrdStt_ExcptLst
, pXMLImplWrdStt_ExcptLstStr
, xStg
);
2468 catch (const css::ucb::ContentCreationException
&)
2470 TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList");
2472 return pWrdStt_ExcptLst
.get();
2475 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList()
2477 MakeUserStorage_Impl();
2478 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2480 SaveExceptList_Imp( *pWrdStt_ExcptLst
, pXMLImplWrdStt_ExcptLstStr
, xStg
);
2484 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile
,
2485 &aModifiedDate
, &aModifiedTime
);
2486 aLastCheckTime
= tools::Time( tools::Time::SYSTEM
);
2489 SvStringsISortDtor
* SvxAutoCorrectLanguageLists::GetWrdSttExceptList()
2491 if( !( ACFlags::WrdSttLstLoad
& nFlags
) || IsFileChanged_Imp() )
2493 LoadWrdSttExceptList();
2494 if( !pWrdStt_ExcptLst
)
2496 OSL_ENSURE( false, "No valid list" );
2497 pWrdStt_ExcptLst
.reset( new SvStringsISortDtor
);
2499 nFlags
|= ACFlags::WrdSttLstLoad
;
2501 return pWrdStt_ExcptLst
.get();
2504 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString
& rName
)
2506 if( sShareAutoCorrFile
!= sUserAutoCorrFile
)
2508 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2509 if( xStg
.is() && ERRCODE_NONE
== xStg
->GetError() &&
2510 xStg
->IsStream( rName
) )
2512 xStg
->Remove( rName
);
2520 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2522 // The conversion needs to happen if the file is already in the user
2523 // directory and is in the old format. Additionally it needs to
2524 // happen when the file is being copied from share to user.
2526 bool bError
= false, bConvert
= false, bCopy
= false;
2527 INetURLObject aDest
;
2528 INetURLObject aSource
;
2530 if (sUserAutoCorrFile
!= sShareAutoCorrFile
)
2532 aSource
= INetURLObject ( sShareAutoCorrFile
);
2533 aDest
= INetURLObject ( sUserAutoCorrFile
);
2534 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile
) )
2536 aDest
.SetExtension ( "bak" );
2541 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile
) )
2543 aSource
= INetURLObject ( sUserAutoCorrFile
);
2544 aDest
= INetURLObject ( sUserAutoCorrFile
);
2545 aDest
.SetExtension ( "bak" );
2546 bCopy
= bConvert
= true;
2552 OUString
sMain(aDest
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
));
2553 sal_Int32 nSlashPos
= sMain
.lastIndexOf('/');
2554 sMain
= sMain
.copy(0, nSlashPos
);
2555 ::ucbhelper::Content
aNewContent( sMain
, uno::Reference
< XCommandEnvironment
>(), comphelper::getProcessComponentContext() );
2557 aInfo
.NameClash
= NameClash::OVERWRITE
;
2558 aInfo
.NewTitle
= aDest
.GetLastName();
2559 aInfo
.SourceURL
= aSource
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
);
2560 aInfo
.MoveData
= false;
2561 aNewContent
.executeCommand( "transfer", Any(aInfo
));
2568 if (bConvert
&& !bError
)
2570 tools::SvRef
<SotStorage
> xSrcStg
= new SotStorage( aDest
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
), StreamMode::READ
);
2571 tools::SvRef
<SotStorage
> xDstStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::WRITE
);
2573 if( xSrcStg
.is() && xDstStg
.is() )
2575 std::unique_ptr
<SvStringsISortDtor
> pTmpWordList
;
2577 if (xSrcStg
->IsContained( pXMLImplWrdStt_ExcptLstStr
) )
2578 LoadXMLExceptList_Imp( pTmpWordList
, pXMLImplWrdStt_ExcptLstStr
, xSrcStg
);
2582 SaveExceptList_Imp( *pTmpWordList
, pXMLImplWrdStt_ExcptLstStr
, xDstStg
, true );
2583 pTmpWordList
.reset();
2587 if (xSrcStg
->IsContained( pXMLImplCplStt_ExcptLstStr
) )
2588 LoadXMLExceptList_Imp( pTmpWordList
, pXMLImplCplStt_ExcptLstStr
, xSrcStg
);
2592 SaveExceptList_Imp( *pTmpWordList
, pXMLImplCplStt_ExcptLstStr
, xDstStg
, true );
2593 pTmpWordList
->clear();
2596 GetAutocorrWordList();
2597 MakeBlocklist_Imp( *xDstStg
);
2598 sShareAutoCorrFile
= sUserAutoCorrFile
;
2602 ::ucbhelper::Content
aContent ( aDest
.GetMainURL( INetURLObject::DecodeMechanism::ToIUri
), uno::Reference
< XCommandEnvironment
>(), comphelper::getProcessComponentContext() );
2603 aContent
.executeCommand ( "delete", makeAny ( true ) );
2610 else if( bCopy
&& !bError
)
2611 sShareAutoCorrFile
= sUserAutoCorrFile
;
2614 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage
& rStg
)
2616 bool bRet
= true, bRemove
= !pAutocorr_List
|| pAutocorr_List
->empty();
2619 tools::SvRef
<SotStorageStream
> refList
= rStg
.OpenSotStream( pXMLImplAutocorr_ListStr
,
2620 ( StreamMode::READ
| StreamMode::WRITE
| StreamMode::SHARE_DENYWRITE
) );
2623 refList
->SetSize( 0 );
2624 refList
->SetBufferSize( 8192 );
2625 refList
->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2627 uno::Reference
< uno::XComponentContext
> xContext
=
2628 comphelper::getProcessComponentContext();
2630 uno::Reference
< xml::sax::XWriter
> xWriter
= xml::sax::Writer::create(xContext
);
2631 uno::Reference
< io::XOutputStream
> xOut
= new utl::OOutputStreamWrapper( *refList
);
2632 xWriter
->setOutputStream(xOut
);
2634 rtl::Reference
< SvXMLAutoCorrectExport
> xExp( new SvXMLAutoCorrectExport( xContext
, pAutocorr_List
.get(), pXMLImplAutocorr_ListStr
, xWriter
) );
2636 xExp
->exportDoc( XML_BLOCK_LIST
);
2639 bRet
= ERRCODE_NONE
== refList
->GetError();
2644 if( ERRCODE_NONE
!= rStg
.GetError() )
2657 rStg
.Remove( pXMLImplAutocorr_ListStr
);
2664 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector
<SvxAutocorrWord
>& aNewEntries
, std::vector
<SvxAutocorrWord
>& aDeleteEntries
)
2666 // First get the current list!
2667 GetAutocorrWordList();
2669 MakeUserStorage_Impl();
2670 tools::SvRef
<SotStorage
> xStorage
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2672 bool bRet
= xStorage
.is() && ERRCODE_NONE
== xStorage
->GetError();
2676 for (SvxAutocorrWord
& aWordToDelete
: aDeleteEntries
)
2678 std::optional
<SvxAutocorrWord
> xFoundEntry
= pAutocorr_List
->FindAndRemove( &aWordToDelete
);
2681 if( !xFoundEntry
->IsTextOnly() )
2683 OUString
aName( aWordToDelete
.GetShort() );
2684 if (xStorage
->IsOLEStorage())
2685 aName
= EncryptBlockName_Imp(aName
);
2687 GeneratePackageName ( aWordToDelete
.GetShort(), aName
);
2689 if( xStorage
->IsContained( aName
) )
2691 xStorage
->Remove( aName
);
2692 bRet
= xStorage
->Commit();
2698 for (const SvxAutocorrWord
& aNewEntrie
: aNewEntries
)
2700 SvxAutocorrWord
aWordToAdd(aNewEntrie
.GetShort(), aNewEntrie
.GetLong(), true );
2701 std::optional
<SvxAutocorrWord
> xRemoved
= pAutocorr_List
->FindAndRemove( &aWordToAdd
);
2704 if( !xRemoved
->IsTextOnly() )
2706 // Still have to remove the Storage
2707 OUString
sStorageName( aWordToAdd
.GetShort() );
2708 if (xStorage
->IsOLEStorage())
2709 sStorageName
= EncryptBlockName_Imp(sStorageName
);
2711 GeneratePackageName ( aWordToAdd
.GetShort(), sStorageName
);
2713 if( xStorage
->IsContained( sStorageName
) )
2714 xStorage
->Remove( sStorageName
);
2717 bRet
= pAutocorr_List
->Insert( std::move(aWordToAdd
) );
2727 bRet
= MakeBlocklist_Imp( *xStorage
);
2733 bool SvxAutoCorrectLanguageLists::PutText( const OUString
& rShort
, const OUString
& rLong
)
2735 // First get the current list!
2736 GetAutocorrWordList();
2738 MakeUserStorage_Impl();
2739 tools::SvRef
<SotStorage
> xStg
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2741 bool bRet
= xStg
.is() && ERRCODE_NONE
== xStg
->GetError();
2743 // Update the word list
2746 SvxAutocorrWord
aNew(rShort
, rLong
, true );
2747 std::optional
<SvxAutocorrWord
> xRemove
= pAutocorr_List
->FindAndRemove( &aNew
);
2750 if( !xRemove
->IsTextOnly() )
2752 // Still have to remove the Storage
2753 OUString
sStgNm( rShort
);
2754 if (xStg
->IsOLEStorage())
2755 sStgNm
= EncryptBlockName_Imp(sStgNm
);
2757 GeneratePackageName ( rShort
, sStgNm
);
2759 if( xStg
->IsContained( sStgNm
) )
2760 xStg
->Remove( sStgNm
);
2764 if( pAutocorr_List
->Insert( std::move(aNew
) ) )
2766 bRet
= MakeBlocklist_Imp( *xStg
);
2777 void SvxAutoCorrectLanguageLists::PutText( const OUString
& rShort
,
2778 SfxObjectShell
& rShell
)
2780 // First get the current list!
2781 GetAutocorrWordList();
2783 MakeUserStorage_Impl();
2787 uno::Reference
< embed::XStorage
> xStg
= comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile
, embed::ElementModes::READWRITE
);
2789 bool bRet
= rAutoCorrect
.PutText( xStg
, sUserAutoCorrFile
, rShort
, rShell
, sLong
);
2792 // Update the word list
2795 if( pAutocorr_List
->Insert( SvxAutocorrWord(rShort
, sLong
, false) ) )
2797 tools::SvRef
<SotStorage
> xStor
= new SotStorage( sUserAutoCorrFile
, StreamMode::READWRITE
);
2798 MakeBlocklist_Imp( *xStor
);
2802 catch ( const uno::Exception
& )
2807 // Keep the list sorted ...
2808 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2810 bool operator()( SvxAutocorrWord
const & lhs
, SvxAutocorrWord
const & rhs
) const
2812 CollatorWrapper
& rCmp
= ::GetCollatorWrapper();
2813 return rCmp
.compareString( lhs
.GetShort(), rhs
.GetShort() ) < 0;
2819 typedef std::unordered_map
<OUString
, SvxAutocorrWord
> AutocorrWordHashType
;
2823 struct SvxAutocorrWordList::Impl
2826 // only one of these contains the data
2827 // maSortedVector is manually sorted so we can optimise data movement
2828 mutable AutocorrWordSetType maSortedVector
;
2829 mutable AutocorrWordHashType maHash
; // key is 'Short'
2831 void DeleteAndDestroyAll()
2834 maSortedVector
.clear();
2838 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl
) {}
2840 SvxAutocorrWordList::~SvxAutocorrWordList()
2844 void SvxAutocorrWordList::DeleteAndDestroyAll()
2846 mpImpl
->DeleteAndDestroyAll();
2849 // returns true if inserted
2850 const SvxAutocorrWord
* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord
) const
2852 if ( mpImpl
->maSortedVector
.empty() ) // use the hash
2854 OUString aShort
= aWord
.GetShort();
2855 auto [it
,inserted
] = mpImpl
->maHash
.emplace( std::move(aShort
), std::move(aWord
) );
2857 return &(it
->second
);
2862 auto it
= std::lower_bound(mpImpl
->maSortedVector
.begin(), mpImpl
->maSortedVector
.end(), aWord
, CompareSvxAutocorrWordList());
2863 CollatorWrapper
& rCmp
= ::GetCollatorWrapper();
2864 if (it
== mpImpl
->maSortedVector
.end() || rCmp
.compareString( aWord
.GetShort(), it
->GetShort() ) != 0)
2866 it
= mpImpl
->maSortedVector
.insert(it
, std::move(aWord
));
2873 void SvxAutocorrWordList::LoadEntry(const OUString
& sWrong
, const OUString
& sRight
, bool bOnlyTxt
)
2875 (void)Insert(SvxAutocorrWord( sWrong
, sRight
, bOnlyTxt
));
2878 bool SvxAutocorrWordList::empty() const
2880 return mpImpl
->maHash
.empty() && mpImpl
->maSortedVector
.empty();
2883 std::optional
<SvxAutocorrWord
> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord
*pWord
)
2886 if ( mpImpl
->maSortedVector
.empty() ) // use the hash
2888 AutocorrWordHashType::iterator it
= mpImpl
->maHash
.find( pWord
->GetShort() );
2889 if( it
!= mpImpl
->maHash
.end() )
2891 SvxAutocorrWord pMatch
= std::move(it
->second
);
2892 mpImpl
->maHash
.erase (it
);
2898 auto it
= std::lower_bound(mpImpl
->maSortedVector
.begin(), mpImpl
->maSortedVector
.end(), *pWord
, CompareSvxAutocorrWordList());
2899 if (it
!= mpImpl
->maSortedVector
.end() && !CompareSvxAutocorrWordList()(*pWord
, *it
))
2901 SvxAutocorrWord pMatch
= std::move(*it
);
2902 mpImpl
->maSortedVector
.erase (it
);
2906 return std::optional
<SvxAutocorrWord
>();
2909 // return the sorted contents - defer sorting until we have to.
2910 const SvxAutocorrWordList::AutocorrWordSetType
& SvxAutocorrWordList::getSortedContent() const
2912 // convert from hash to set permanently
2913 if ( mpImpl
->maSortedVector
.empty() )
2915 std::vector
<SvxAutocorrWord
> tmp
;
2916 tmp
.reserve(mpImpl
->maHash
.size());
2917 for (auto & rPair
: mpImpl
->maHash
)
2918 tmp
.emplace_back(std::move(rPair
.second
));
2919 mpImpl
->maHash
.clear();
2920 // sort twice - this gets the list into mostly-sorted order, which
2921 // reduces the number of times we need to invoke the expensive ICU collate fn.
2922 std::sort(tmp
.begin(), tmp
.end(),
2923 [] ( SvxAutocorrWord
const & lhs
, SvxAutocorrWord
const & rhs
)
2925 return lhs
.GetShort() < rhs
.GetShort();
2927 // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
2928 // stable_sort is twice as fast as sort in this situation because it does
2929 // fewer comparison operations.
2930 std::stable_sort(tmp
.begin(), tmp
.end(), CompareSvxAutocorrWordList());
2931 mpImpl
->maSortedVector
= std::move(tmp
);
2933 return mpImpl
->maSortedVector
;
2936 const SvxAutocorrWord
* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord
*pFnd
,
2937 const OUString
&rTxt
,
2939 sal_Int32 nEndPos
) const
2941 const OUString
& rChk
= pFnd
->GetShort();
2943 sal_Int32 left_wildcard
= rChk
.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
2944 sal_Int32 right_wildcard
= rChk
.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
2945 sal_Int32 nSttWdPos
= nEndPos
;
2947 // direct replacement of keywords surrounded by colons (for example, ":name:")
2948 bool bColonNameColon
= rTxt
.getLength() > nEndPos
&&
2949 rTxt
[nEndPos
] == ':' && rChk
[0] == ':' && rChk
.endsWith(":");
2950 if ( nEndPos
+ (bColonNameColon
? 1 : 0) >= rChk
.getLength() - left_wildcard
- right_wildcard
)
2953 bool bWasWordDelim
= false;
2954 sal_Int32 nCalcStt
= nEndPos
- rChk
.getLength() + left_wildcard
;
2955 if (bColonNameColon
)
2957 if( !right_wildcard
&& ( !nCalcStt
|| nCalcStt
== rStt
|| left_wildcard
|| bColonNameColon
||
2958 ( nCalcStt
< rStt
&&
2959 IsWordDelim( rTxt
[ nCalcStt
- 1 ] ))) )
2961 TransliterationWrapper
& rCmp
= GetIgnoreTranslWrapper();
2962 OUString sWord
= rTxt
.copy(nCalcStt
, rChk
.getLength() - left_wildcard
);
2963 if( (!left_wildcard
&& rCmp
.isEqual( rChk
, sWord
)) || (left_wildcard
&& rCmp
.isEqual( rChk
.copy(left_wildcard
), sWord
) ))
2968 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
2969 if (rTxt
.getLength() > nEndPos
&& rTxt
[nEndPos
] == '/' && rChk
.indexOf('/') != -1)
2973 // get the first word delimiter position before the matching ".*word" pattern
2974 while( rStt
&& !(bWasWordDelim
= IsWordDelim( rTxt
[ --rStt
])))
2976 if (bWasWordDelim
) rStt
++;
2977 OUString left_pattern
= rTxt
.copy(rStt
, nEndPos
- rStt
- rChk
.getLength() + left_wildcard
);
2978 // avoid double spaces before simple "word" replacement
2979 left_pattern
+= (left_pattern
.getLength() == 0 && pFnd
->GetLong()[0] == 0x20) ? pFnd
->GetLong().copy(1) : pFnd
->GetLong();
2980 if( const SvxAutocorrWord
* pNew
= Insert( SvxAutocorrWord(rTxt
.copy(rStt
, nEndPos
- rStt
), left_pattern
) ) )
2984 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
2985 if ( right_wildcard
)
2988 OUString
sTmp( rChk
.copy( left_wildcard
, rChk
.getLength() - left_wildcard
- right_wildcard
) );
2989 // Get the last word delimiter position
2992 while( nSttWdPos
&& !(bWasWordDelim
= IsWordDelim( rTxt
[ --nSttWdPos
])))
2994 // search the first occurrence (with a left word delimitation, if needed)
2995 sal_Int32 nFndPos
= -1;
2997 nFndPos
= rTxt
.indexOf( sTmp
, nFndPos
+ 1);
3000 not_suffix
= bWasWordDelim
&& (nSttWdPos
>= (nFndPos
+ sTmp
.getLength()));
3001 } while ( (!left_wildcard
&& nFndPos
&& !IsWordDelim( rTxt
[ nFndPos
- 1 ])) || not_suffix
);
3003 if ( nFndPos
!= -1 )
3005 sal_Int32 extra_repl
= nFndPos
+ sTmp
.getLength() > nEndPos
? 1: 0; // for patterns with terminating characters, eg. "a:"
3007 if ( left_wildcard
)
3009 // get the first word delimiter position before the matching ".*word.*" pattern
3010 while( nFndPos
&& !(bWasWordDelim
= IsWordDelim( rTxt
[ --nFndPos
])))
3012 if (bWasWordDelim
) nFndPos
++;
3014 if (nEndPos
+ extra_repl
<= nFndPos
)
3018 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3019 OUString aShort
= rTxt
.copy(nFndPos
, nEndPos
- nFndPos
+ extra_repl
);
3023 if ( !left_wildcard
)
3025 sal_Int32 siz
= nEndPos
- nFndPos
- sTmp
.getLength();
3026 aLong
= pFnd
->GetLong() + (siz
> 0 ? rTxt
.copy(nFndPos
+ sTmp
.getLength(), siz
) : "");
3030 nSttWdPos
= rTxt
.indexOf( sTmp
, nFndPos
);
3031 if (nSttWdPos
!= -1)
3033 sal_Int32
nTmp(nFndPos
);
3034 while (nTmp
< nSttWdPos
&& !IsWordDelim(rTxt
[nTmp
]))
3036 if (nTmp
< nSttWdPos
)
3037 break; // word delimiter found
3038 buf
.append(std::u16string_view(rTxt
).substr(nFndPos
, nSttWdPos
- nFndPos
)).append(pFnd
->GetLong());
3039 nFndPos
= nSttWdPos
+ sTmp
.getLength();
3041 } while (nSttWdPos
!= -1);
3042 if (nEndPos
- nFndPos
> extra_repl
)
3043 buf
.append(std::u16string_view(rTxt
).substr(nFndPos
, nEndPos
- nFndPos
));
3044 aLong
= buf
.makeStringAndClear();
3046 if ( const SvxAutocorrWord
* pNew
= Insert( SvxAutocorrWord(aShort
, aLong
) ) )
3048 if ( (rTxt
.getLength() > nEndPos
&& IsWordDelim(rTxt
[nEndPos
])) || rTxt
.getLength() == nEndPos
)
3057 const SvxAutocorrWord
* SvxAutocorrWordList::SearchWordsInList(const OUString
& rTxt
, sal_Int32
& rStt
,
3058 sal_Int32 nEndPos
) const
3060 for (auto const& elem
: mpImpl
->maHash
)
3062 if( const SvxAutocorrWord
*pTmp
= WordMatches( &elem
.second
, rTxt
, rStt
, nEndPos
) )
3066 for (auto const& elem
: mpImpl
->maSortedVector
)
3068 if( const SvxAutocorrWord
*pTmp
= WordMatches( &elem
, rTxt
, rStt
, nEndPos
) )
3074 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */