bump product version to 5.0.4.1
[LibreOffice.git] / editeng / source / misc / svxacorr.cxx
blob03b0733c90097cb0a98d21187565ae6ccaebb2a7
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/io/XStream.hpp>
21 #include <com/sun/star/lang/Locale.hpp>
22 #include <tools/urlobj.hxx>
23 #include <i18nlangtag/mslangid.hxx>
24 #include <vcl/svapp.hxx>
25 #include <vcl/settings.hxx>
26 #include <sot/storinfo.hxx>
27 #include <svl/fstathelper.hxx>
28 #include <svtools/helpopt.hxx>
29 #include <svl/urihelper.hxx>
30 #include <unotools/charclass.hxx>
31 #include <com/sun/star/i18n/UnicodeType.hpp>
32 #include <unotools/collatorwrapper.hxx>
33 #include <com/sun/star/i18n/CollatorOptions.hpp>
34 #include <com/sun/star/i18n/UnicodeScript.hpp>
35 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
36 #include <unotools/localedatawrapper.hxx>
37 #include <unotools/transliterationwrapper.hxx>
38 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
39 #include <com/sun/star/io/XActiveDataSource.hpp>
40 #include <comphelper/processfactory.hxx>
41 #include <comphelper/storagehelper.hxx>
42 #include <comphelper/string.hxx>
43 #include <editeng/editids.hrc>
44 #include <sot/storage.hxx>
45 #include <editeng/udlnitem.hxx>
46 #include <editeng/wghtitem.hxx>
47 #include <editeng/escapementitem.hxx>
48 #include <editeng/svxacorr.hxx>
49 #include <editeng/unolingu.hxx>
50 #include "vcl/window.hxx"
51 #include <helpid.hrc>
52 #include <com/sun/star/xml/sax/InputSource.hpp>
53 #include <com/sun/star/xml/sax/FastParser.hpp>
54 #include <com/sun/star/xml/sax/FastToken.hpp>
55 #include <com/sun/star/xml/sax/Writer.hpp>
56 #include <com/sun/star/xml/sax/FastTokenHandler.hpp>
57 #include <unotools/streamwrap.hxx>
58 #include <SvXMLAutoCorrectImport.hxx>
59 #include <SvXMLAutoCorrectExport.hxx>
60 #include <SvXMLAutoCorrectTokenHandler.hxx>
61 #include <ucbhelper/content.hxx>
62 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
63 #include <com/sun/star/ucb/TransferInfo.hpp>
64 #include <com/sun/star/ucb/NameClash.hpp>
65 #include <xmloff/xmltoken.hxx>
66 #include <vcl/help.hxx>
67 #include <set>
68 #include <unordered_map>
70 using namespace ::com::sun::star::ucb;
71 using namespace ::com::sun::star::uno;
72 using namespace ::com::sun::star::xml::sax;
73 using namespace ::com::sun::star;
74 using namespace ::xmloff::token;
75 using namespace ::utl;
77 static const int C_NONE = 0x00;
78 static const int C_FULL_STOP = 0x01;
79 static const int C_EXCLAMATION_MARK = 0x02;
80 static const int C_QUESTION_MARK = 0x04;
81 static const sal_Unicode cNonBreakingSpace = 0xA0;
83 static const sal_Char pXMLImplWrdStt_ExcptLstStr[] = "WordExceptList.xml";
84 static const sal_Char pXMLImplCplStt_ExcptLstStr[] = "SentenceExceptList.xml";
85 static const sal_Char pXMLImplAutocorr_ListStr[] = "DocumentList.xml";
87 static const sal_Char
88 /* also at these beginnings - Brackets and all kinds of begin characters */
89 sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
90 /* also at these ends - Brackets and all kinds of begin characters */
91 sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
93 // These characters are allowed in words: (for FnCptlSttSntnc)
94 static const sal_Char sImplWordChars[] = "-'";
96 OUString EncryptBlockName_Imp(const OUString& rName);
98 TYPEINIT0(SvxAutoCorrect)
100 typedef SvxAutoCorrectLanguageLists* SvxAutoCorrectLanguageListsPtr;
102 static inline bool IsWordDelim( const sal_Unicode c )
104 return ' ' == c || '\t' == c || 0x0a == c ||
105 cNonBreakingSpace == c || 0x2011 == c || 0x1 == c;
108 static inline bool IsLowerLetter( sal_Int32 nCharType )
110 return CharClass::isLetterType( nCharType ) &&
111 0 == ( ::com::sun::star::i18n::KCharacterType::UPPER & nCharType);
114 static inline bool IsUpperLetter( sal_Int32 nCharType )
116 return CharClass::isLetterType( nCharType ) &&
117 0 == ( ::com::sun::star::i18n::KCharacterType::LOWER & nCharType);
120 bool lcl_IsUnsupportedUnicodeChar( CharClass& rCC, const OUString& rTxt,
121 sal_Int32 nStt, sal_Int32 nEnd )
123 for( ; nStt < nEnd; ++nStt )
125 short nScript = rCC.getScript( rTxt, nStt );
126 switch( nScript )
128 case ::com::sun::star::i18n::UnicodeScript_kCJKRadicalsSupplement:
129 case ::com::sun::star::i18n::UnicodeScript_kHangulJamo:
130 case ::com::sun::star::i18n::UnicodeScript_kCJKSymbolPunctuation:
131 case ::com::sun::star::i18n::UnicodeScript_kHiragana:
132 case ::com::sun::star::i18n::UnicodeScript_kKatakana:
133 case ::com::sun::star::i18n::UnicodeScript_kHangulCompatibilityJamo:
134 case ::com::sun::star::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
135 case ::com::sun::star::i18n::UnicodeScript_kCJKCompatibility:
136 case ::com::sun::star::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA:
137 case ::com::sun::star::i18n::UnicodeScript_kCJKUnifiedIdeograph:
138 case ::com::sun::star::i18n::UnicodeScript_kHangulSyllable:
139 case ::com::sun::star::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
140 case ::com::sun::star::i18n::UnicodeScript_kHalfwidthFullwidthForm:
141 return true;
142 default: ; //do nothing
145 return false;
148 static bool lcl_IsSymbolChar( CharClass& rCC, const OUString& rTxt,
149 sal_Int32 nStt, sal_Int32 nEnd )
151 for( ; nStt < nEnd; ++nStt )
153 if( ::com::sun::star::i18n::UnicodeType::PRIVATE_USE ==
154 rCC.getType( rTxt, nStt ))
155 return true;
157 return false;
160 static bool lcl_IsInAsciiArr( const sal_Char* pArr, const sal_Unicode c )
162 bool bRet = false;
163 for( ; *pArr; ++pArr )
164 if( *pArr == c )
166 bRet = true;
167 break;
169 return bRet;
172 SvxAutoCorrDoc::~SvxAutoCorrDoc()
176 // Called by the functions:
177 // - FnCptlSttWrd
178 // - FnCptlSttSntnc
179 // after the exchange of characters. Then the words, if necessary, can be inserted
180 // into the exception list.
181 void SvxAutoCorrDoc::SaveCpltSttWord( sal_uLong, sal_Int32, const OUString&,
182 sal_Unicode )
186 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32, bool ) const
188 return LANGUAGE_SYSTEM;
191 static const LanguageTag& GetAppLang()
193 return Application::GetSettings().GetLanguageTag();
195 static LocaleDataWrapper& GetLocaleDataWrapper( sal_uInt16 nLang )
197 static LocaleDataWrapper aLclDtWrp( GetAppLang() );
198 LanguageTag aLcl( nLang );
199 const LanguageTag& rLcl = aLclDtWrp.getLoadedLanguageTag();
200 if( aLcl != rLcl )
201 aLclDtWrp.setLanguageTag( aLcl );
202 return aLclDtWrp;
204 static TransliterationWrapper& GetIgnoreTranslWrapper()
206 static int bIsInit = 0;
207 static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
208 ::com::sun::star::i18n::TransliterationModules_IGNORE_KANA |
209 ::com::sun::star::i18n::TransliterationModules_IGNORE_WIDTH );
210 if( !bIsInit )
212 aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
213 bIsInit = 1;
215 return aWrp;
217 static CollatorWrapper& GetCollatorWrapper()
219 static int bIsInit = 0;
220 static CollatorWrapper aCollWrp( ::comphelper::getProcessComponentContext() );
221 if( !bIsInit )
223 aCollWrp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
224 bIsInit = 1;
226 return aCollWrp;
229 static void lcl_ClearTable(boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>& rLangTable)
231 rLangTable.clear();
234 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
236 return cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
237 cChar == ' ' || cChar == '\'' || cChar == '\"' ||
238 cChar == '*' || cChar == '_' || cChar == '%' ||
239 cChar == '.' || cChar == ',' || cChar == ';' ||
240 cChar == ':' || cChar == '?' || cChar == '!' ||
241 cChar == '/' || cChar == '-';
244 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
246 return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' ||
247 cChar == '/' /*case for the urls exception*/;
250 long SvxAutoCorrect::GetDefaultFlags()
252 long nRet = Autocorrect
253 | CptlSttSntnc
254 | CptlSttWrd
255 | ChgOrdinalNumber
256 | ChgToEnEmDash
257 | AddNonBrkSpace
258 | ChgWeightUnderl
259 | SetINetAttr
260 | ChgQuotes
261 | SaveWordCplSttLst
262 | SaveWordWrdSttLst
263 | CorrectCapsLock;
264 LanguageType eLang = GetAppLang().getLanguageType();
265 switch( eLang )
267 case LANGUAGE_ENGLISH:
268 case LANGUAGE_ENGLISH_US:
269 case LANGUAGE_ENGLISH_UK:
270 case LANGUAGE_ENGLISH_AUS:
271 case LANGUAGE_ENGLISH_CAN:
272 case LANGUAGE_ENGLISH_NZ:
273 case LANGUAGE_ENGLISH_EIRE:
274 case LANGUAGE_ENGLISH_SAFRICA:
275 case LANGUAGE_ENGLISH_JAMAICA:
276 case LANGUAGE_ENGLISH_CARRIBEAN:
277 nRet &= ~(ChgQuotes|ChgSglQuotes);
278 break;
280 return nRet;
284 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile,
285 const OUString& rUserAutocorrFile )
286 : sShareAutoCorrFile( rShareAutocorrFile )
287 , sUserAutoCorrFile( rUserAutocorrFile )
288 , pLangTable( new boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists> )
289 , pCharClass( 0 )
290 , bRunNext( false )
291 , eCharClassLang( LANGUAGE_DONTKNOW )
292 , nFlags(SvxAutoCorrect::GetDefaultFlags())
293 , cStartDQuote( 0 )
294 , cEndDQuote( 0 )
295 , cStartSQuote( 0 )
296 , cEndSQuote( 0 )
297 , cEmDash( 0x2014 )
298 , cEnDash( 0x2013)
302 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
303 : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
304 , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
305 , aSwFlags( rCpy.aSwFlags )
306 , pLangTable( new boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists> )
307 , pCharClass( 0 )
308 , bRunNext( false )
309 , eCharClassLang(rCpy.eCharClassLang)
310 , nFlags( rCpy.nFlags & ~(ChgWordLstLoad|CplSttLstLoad|WrdSttLstLoad))
311 , cStartDQuote( rCpy.cStartDQuote )
312 , cEndDQuote( rCpy.cEndDQuote )
313 , cStartSQuote( rCpy.cStartSQuote )
314 , cEndSQuote( rCpy.cEndSQuote )
315 , cEmDash( rCpy.cEmDash )
316 , cEnDash( rCpy.cEnDash )
321 SvxAutoCorrect::~SvxAutoCorrect()
323 lcl_ClearTable(*pLangTable);
324 delete pLangTable;
325 delete pCharClass;
328 void SvxAutoCorrect::_GetCharClass( LanguageType eLang )
330 delete pCharClass;
331 pCharClass = new CharClass( LanguageTag( eLang));
332 eCharClassLang = eLang;
335 void SvxAutoCorrect::SetAutoCorrFlag( long nFlag, bool bOn )
337 long nOld = nFlags;
338 nFlags = bOn ? nFlags | nFlag
339 : nFlags & ~nFlag;
341 if( !bOn )
343 if( (nOld & CptlSttSntnc) != (nFlags & CptlSttSntnc) )
344 nFlags &= ~CplSttLstLoad;
345 if( (nOld & CptlSttWrd) != (nFlags & CptlSttWrd) )
346 nFlags &= ~WrdSttLstLoad;
347 if( (nOld & Autocorrect) != (nFlags & Autocorrect) )
348 nFlags &= ~ChgWordLstLoad;
353 // Two capital letters at the beginning of word?
354 bool SvxAutoCorrect::FnCptlSttWrd( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
355 sal_Int32 nSttPos, sal_Int32 nEndPos,
356 LanguageType eLang )
358 bool bRet = false;
359 CharClass& rCC = GetCharClass( eLang );
361 // Delete all non alphanumeric. Test the characters at the beginning/end of
362 // the word ( recognizes: "(min.", "/min.", and so on.)
363 for( ; nSttPos < nEndPos; ++nSttPos )
364 if( rCC.isLetterNumeric( rTxt, nSttPos ))
365 break;
366 for( ; nSttPos < nEndPos; --nEndPos )
367 if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
368 break;
370 // Is the word a compounded word separated by delimiters?
371 // If so, keep track of all delimiters so each constituent
372 // word can be checked for two initial capital letters.
373 std::deque<sal_Int32> aDelimiters;
375 // Always check for two capitals at the beginning
376 // of the entire word, so start at nSttPos.
377 aDelimiters.push_back(nSttPos);
379 // Find all compound word delimiters
380 for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
382 if (IsAutoCorrectChar(rTxt[ n ]))
384 aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
388 // Decide where to put the terminating delimiter.
389 // If the last AutoCorrect char was a newline, then the AutoCorrect
390 // char will not be included in rTxt.
391 // If the last AutoCorrect char was not a newline, then the AutoCorrect
392 // character will be the last character in rTxt.
393 if (!IsAutoCorrectChar(rTxt[nEndPos-1]))
394 aDelimiters.push_back(nEndPos);
396 // Iterate through the word and all words that compose it.
397 // Two capital letters at the beginning of word?
398 for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
400 nSttPos = aDelimiters[nI];
401 nEndPos = aDelimiters[nI + 1];
403 if( nSttPos+2 < nEndPos &&
404 IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
405 IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
406 // Is the third character a lower case
407 IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
408 // Do not replace special attributes
409 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
411 // test if the word is in an exception list
412 OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
413 if( !FindInWrdSttExceptList(eLang, sWord) )
415 // Check that word isn't correctly spelled before correcting:
416 ::com::sun::star::uno::Reference<
417 ::com::sun::star::linguistic2::XSpellChecker1 > xSpeller =
418 SvxGetSpellChecker();
419 if( xSpeller->hasLanguage(eLang) )
421 Sequence< ::com::sun::star::beans::PropertyValue > aEmptySeq;
422 if (!xSpeller->spell(sWord, eLang, aEmptySeq).is())
424 return false;
427 sal_Unicode cSave = rTxt[ nSttPos ];
428 OUString sChar( cSave );
429 sChar = rCC.lowercase( sChar );
430 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
432 if( SaveWordWrdSttLst & nFlags )
433 rDoc.SaveCpltSttWord( CptlSttWrd, nSttPos, sWord, cSave );
434 bRet = true;
439 return bRet;
443 bool SvxAutoCorrect::FnChgOrdinalNumber(
444 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
445 sal_Int32 nSttPos, sal_Int32 nEndPos,
446 LanguageType eLang)
448 // 1st, 2nd, 3rd, 4 - 0th
449 // 201th or 201st
450 // 12th or 12nd
451 bool bChg = false;
453 // In some languages ordinal suffixes should never be
454 // changed to superscript. Let's break for those languages.
455 switch (eLang)
457 case LANGUAGE_SWEDISH:
458 case LANGUAGE_SWEDISH_FINLAND:
459 break;
460 default:
461 CharClass& rCC = GetCharClass(eLang);
463 for (; nSttPos < nEndPos; ++nSttPos)
464 if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos]))
465 break;
466 for (; nSttPos < nEndPos; --nEndPos)
467 if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
468 break;
471 // Get the last number in the string to check
472 sal_Int32 nNumEnd = nEndPos;
473 bool foundEnd = false;
474 bool validNumber = true;
475 sal_Int32 i = nEndPos;
477 while (i > nSttPos)
479 i--;
480 bool isDigit = rCC.isDigit(rTxt, i);
481 if (foundEnd)
482 validNumber |= isDigit;
484 if (isDigit && !foundEnd)
486 foundEnd = true;
487 nNumEnd = i;
491 if (foundEnd && validNumber) {
492 sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32();
494 // Check if the characters after that number correspond to the ordinal suffix
495 uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
496 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
498 uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
499 for (sal_Int32 nSuff = 0; nSuff < aSuffixes.getLength(); nSuff++)
501 OUString sSuffix(aSuffixes[nSuff]);
502 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1);
504 if (sSuffix == sEnd)
506 // Check if the ordinal suffix has to be set as super script
507 if (rCC.isLetter(sSuffix))
509 // Do the change
510 SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
511 DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
512 rDoc.SetAttr(nNumEnd + 1, nEndPos,
513 SID_ATTR_CHAR_ESCAPEMENT,
514 aSvxEscapementItem);
515 bChg = true;
521 return bChg;
525 bool SvxAutoCorrect::FnChgToEnEmDash(
526 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
527 sal_Int32 nSttPos, sal_Int32 nEndPos,
528 LanguageType eLang )
530 bool bRet = false;
531 CharClass& rCC = GetCharClass( eLang );
532 if (eLang == LANGUAGE_SYSTEM)
533 eLang = GetAppLang().getLanguageType();
534 bool bAlwaysUseEmDash = (cEmDash && (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN));
536 // replace " - " or " --" with "enDash"
537 if( cEnDash && 1 < nSttPos && 1 <= nEndPos - nSttPos )
539 sal_Unicode cCh = rTxt[ nSttPos ];
540 if( '-' == cCh )
542 if( ' ' == rTxt[ nSttPos-1 ] &&
543 '-' == rTxt[ nSttPos+1 ])
545 sal_Int32 n;
546 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr(
547 sImplSttSkipChars,(cCh = rTxt[ n ]));
548 ++n )
551 // found: " --[<AnySttChars>][A-z0-9]
552 if( rCC.isLetterNumeric( OUString(cCh) ) )
554 for( n = nSttPos-1; n && lcl_IsInAsciiArr(
555 sImplEndSkipChars,(cCh = rTxt[ --n ])); )
558 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
559 if( rCC.isLetterNumeric( OUString(cCh) ))
561 rDoc.Delete( nSttPos, nSttPos + 2 );
562 rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
563 bRet = true;
568 else if( 3 < nSttPos &&
569 ' ' == rTxt[ nSttPos-1 ] &&
570 '-' == rTxt[ nSttPos-2 ])
572 sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
573 if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
575 --nTmpPos;
576 ++nLen;
577 cCh = rTxt[ nTmpPos-1 ];
579 if( ' ' == cCh )
581 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr(
582 sImplSttSkipChars,(cCh = rTxt[ n ]));
583 ++n )
586 // found: " - [<AnySttChars>][A-z0-9]
587 if( rCC.isLetterNumeric( OUString(cCh) ) )
589 cCh = ' ';
590 for( n = nTmpPos-1; n && lcl_IsInAsciiArr(
591 sImplEndSkipChars,(cCh = rTxt[ --n ])); )
593 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
594 if( rCC.isLetterNumeric( OUString(cCh) ))
596 rDoc.Delete( nTmpPos, nTmpPos + nLen );
597 rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
598 bRet = true;
605 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
606 // [0-9]--[0-9] double dash always replaced with "enDash"
607 // Finnish and Hungarian use enDash instead of emDash.
608 bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
609 if( ((cEmDash && !bEnDash) || (cEnDash && bEnDash)) && 4 <= nEndPos - nSttPos )
611 OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) );
612 sal_Int32 nFndPos = sTmp.indexOf("--");
613 if( nFndPos != -1 && nFndPos &&
614 nFndPos + 2 < sTmp.getLength() &&
615 ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) ||
616 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) &&
617 ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) ||
618 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] )))
620 nSttPos = nSttPos + nFndPos;
621 rDoc.Delete( nSttPos, nSttPos + 2 );
622 rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) &&
623 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) );
624 bRet = true;
627 return bRet;
631 bool SvxAutoCorrect::FnAddNonBrkSpace(
632 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
633 sal_Int32, sal_Int32 nEndPos,
634 LanguageType eLang )
636 bool bRet = false;
638 CharClass& rCC = GetCharClass( eLang );
640 if ( rCC.getLanguageTag().getLanguage() == "fr" )
642 bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
643 OUString allChars = ":;?!%";
644 OUString chars( allChars );
645 if ( bFrCA )
646 chars = ":";
648 sal_Unicode cChar = rTxt[ nEndPos ];
649 bool bHasSpace = chars.indexOf( cChar ) != -1;
650 bool bIsSpecial = allChars.indexOf( cChar ) != -1;
651 if ( bIsSpecial )
653 // Get the last word delimiter position
654 sal_Int32 nSttWdPos = nEndPos;
655 bool bWasWordDelim = false;
656 while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
659 //See if the text is the start of a protocol string, e.g. have text of
660 //"http" see if it is the start of "http:" and if so leave it alone
661 sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
662 sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1;
663 if (nIndex + nProtocolLen <= rTxt.getLength())
665 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
666 return false;
669 // Check the presence of "://" in the word
670 sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 );
671 if ( nStrPos == -1 && nEndPos > 0 )
673 // Check the previous char
674 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
675 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
677 // Remove any previous normal space
678 sal_Int32 nPos = nEndPos - 1;
679 while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
681 if ( nPos == 0 ) break;
682 nPos--;
683 cPrevChar = rTxt[ nPos ];
686 nPos++;
687 if ( nEndPos - nPos > 0 )
688 rDoc.Delete( nPos, nEndPos );
690 // Add the non-breaking space at the end pos
691 if ( bHasSpace )
692 rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
693 bRunNext = true;
694 bRet = true;
696 else if ( chars.indexOf( cPrevChar ) != -1 )
697 bRunNext = true;
700 else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) )
702 // Remove the hardspace right before to avoid formatting URLs
703 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
704 sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
705 if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
707 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
708 bRet = true;
713 return bRet;
717 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
718 sal_Int32 nSttPos, sal_Int32 nEndPos,
719 LanguageType eLang )
721 OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
722 GetCharClass( eLang ) ));
723 bool bRet = !sURL.isEmpty();
724 if( bRet ) // also Attribut setzen:
725 rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
726 return bRet;
730 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
731 sal_Int32 , sal_Int32 nEndPos,
732 LanguageType eLang )
734 // Condition:
735 // at the beginning: _ or * after Space with the folloeing !Space
736 // at the end: _ or * before Space (word delimiter?)
738 sal_Unicode c, cInsChar = rTxt[ nEndPos ]; // underline or bold
739 if( ++nEndPos != rTxt.getLength() &&
740 !IsWordDelim( rTxt[ nEndPos ] ) )
741 return false;
743 --nEndPos;
745 bool bAlphaNum = false;
746 sal_Int32 nPos = nEndPos;
747 sal_Int32 nFndPos = -1;
748 CharClass& rCC = GetCharClass( eLang );
750 while( nPos )
752 switch( c = rTxt[ --nPos ] )
754 case '_':
755 case '*':
756 if( c == cInsChar )
758 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
759 IsWordDelim( rTxt[ nPos-1 ])) &&
760 !IsWordDelim( rTxt[ nPos+1 ]))
761 nFndPos = nPos;
762 else
763 // Condition is not satisfied, so cancel
764 nFndPos = -1;
765 nPos = 0;
767 break;
768 default:
769 if( !bAlphaNum )
770 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
774 if( -1 != nFndPos )
776 // first delete the Character at the end - this allows insertion
777 // of an empty hint in SetAttr which would be removed by Delete
778 // (fdo#62536, AUTOFMT in Writer)
779 rDoc.Delete( nEndPos, nEndPos + 1 );
780 rDoc.Delete( nFndPos, nFndPos + 1 );
781 // Span the Attribute over the area
782 // the end.
783 if( '*' == cInsChar ) // Bold
785 SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
786 rDoc.SetAttr( nFndPos, nEndPos - 1,
787 SID_ATTR_CHAR_WEIGHT,
788 aSvxWeightItem);
790 else // underline
792 SvxUnderlineItem aSvxUnderlineItem( UNDERLINE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
793 rDoc.SetAttr( nFndPos, nEndPos - 1,
794 SID_ATTR_CHAR_UNDERLINE,
795 aSvxUnderlineItem);
799 return -1 != nFndPos;
803 bool SvxAutoCorrect::FnCptlSttSntnc( SvxAutoCorrDoc& rDoc,
804 const OUString& rTxt, bool bNormalPos,
805 sal_Int32 nSttPos, sal_Int32 nEndPos,
806 LanguageType eLang )
809 if( rTxt.isEmpty() || nEndPos <= nSttPos )
810 return false;
812 CharClass& rCC = GetCharClass( eLang );
813 OUString aText( rTxt );
814 const sal_Unicode *pStart = aText.getStr(),
815 *pStr = pStart + nEndPos,
816 *pWordStt = 0,
817 *pDelim = 0;
819 bool bAtStart = false;
820 do {
821 --pStr;
822 if (rCC.isLetter(aText, pStr - pStart))
824 if( !pWordStt )
825 pDelim = pStr+1;
826 pWordStt = pStr;
828 else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
830 if( lcl_IsInAsciiArr( sImplWordChars, *pStr ) &&
831 pWordStt - 1 == pStr &&
832 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
833 (pStart + 1) <= pStr &&
834 rCC.isLetter(aText, pStr-1 - pStart))
835 pWordStt = --pStr;
836 else
837 break;
839 } while( ! ( bAtStart = (pStart == pStr) ) );
841 if (!pWordStt)
842 return false; // no character to be replaced
845 if (rCC.isDigit(aText, pStr - pStart))
846 return false; // already ok
848 if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
849 return false; // already ok
851 //See if the text is the start of a protocol string, e.g. have text of
852 //"http" see if it is the start of "http:" and if so leave it alone
853 sal_Int32 nIndex = pWordStt - pStart;
854 sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
855 if (nIndex + nProtocolLen <= rTxt.getLength())
857 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
858 return false; // already ok
861 if (0x1 == *pWordStt || 0x2 == *pWordStt)
862 return false; // already ok
864 if( *pDelim && 2 >= pDelim - pWordStt &&
865 lcl_IsInAsciiArr( ".-)>", *pDelim ) )
866 return false;
868 if( !bAtStart ) // Still no beginning of a paragraph?
870 if ( IsWordDelim( *pStr ) )
872 while( ! ( bAtStart = (pStart == pStr--) ) && IsWordDelim( *pStr ) )
875 // Asian full stop, full width full stop, full width exclamation mark
876 // and full width question marks are treated as word delimiters
877 else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
878 0xFF1F != *pStr )
879 return false; // no valid separator -> no replacement
882 if( bAtStart ) // at the beginning of a paragraph?
884 // Check out the previous paragraph, if it exists.
885 // If so, then check to paragraph separator at the end.
886 OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
887 if (!pPrevPara)
889 // valid separator -> replace
890 OUString sChar( *pWordStt );
891 sChar = rCC.titlecase(sChar); //see fdo#56740
892 return !comphelper::string::equals(sChar, *pWordStt) &&
893 rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
896 aText = *pPrevPara;
897 bAtStart = false;
898 pStart = aText.getStr();
899 pStr = pStart + aText.getLength();
901 do { // overwrite all blanks
902 --pStr;
903 if( !IsWordDelim( *pStr ))
904 break;
905 } while( ! ( bAtStart = (pStart == pStr) ) );
907 if( bAtStart )
908 return false; // no valid separator -> no replacement
911 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
912 // all three can happen, but not more than once!
913 const sal_Unicode* pExceptStt = 0;
914 if( !bAtStart )
916 bool bContinue = true;
917 int nFlag = C_NONE;
918 do {
919 switch( *pStr )
921 // Western and Asian full stop
922 case '.':
923 case 0x3002 :
924 case 0xFF0E :
926 if (pStr >= pStart + 2 && *(pStr-2) == '.')
928 //e.g. text "f.o.o. word": Now currently considering
929 //capitalizing word but second last character of
930 //previous word is a . So probably last word is an
931 //anagram that ends in . and not truly the end of a
932 //previous sentence, so don't autocapitalize this word
933 return false;
935 if( nFlag & C_FULL_STOP )
936 return false; // no valid separator -> no replacement
937 nFlag |= C_FULL_STOP;
938 pExceptStt = pStr;
940 break;
941 case '!':
942 case 0xFF01 :
944 if( nFlag & C_EXCLAMATION_MARK )
945 return false; // no valid separator -> no replacement
946 nFlag |= C_EXCLAMATION_MARK;
948 break;
949 case '?':
950 case 0xFF1F :
952 if( nFlag & C_QUESTION_MARK)
953 return false; // no valid separator -> no replacement
954 nFlag |= C_QUESTION_MARK;
956 break;
957 default:
958 if( !nFlag )
959 return false; // no valid separator -> no replacement
960 else
961 bContinue = false;
962 break;
965 if( bContinue && pStr-- == pStart )
967 return false; // no valid separator -> no replacement
969 } while( bContinue );
970 if( C_FULL_STOP != nFlag )
971 pExceptStt = 0;
974 if( 2 > ( pStr - pStart ) )
975 return false;
977 if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
979 bool bValid = false, bAlphaFnd = false;
980 const sal_Unicode* pTmpStr = pStr;
981 while( !bValid )
983 if( rCC.isDigit( aText, pTmpStr - pStart ) )
985 bValid = true;
986 pStr = pTmpStr - 1;
988 else if( rCC.isLetter( aText, pTmpStr - pStart ) )
990 if( bAlphaFnd )
992 bValid = true;
993 pStr = pTmpStr;
995 else
996 bAlphaFnd = true;
998 else if( bAlphaFnd || IsWordDelim( *pTmpStr ) )
999 break;
1001 if( pTmpStr == pStart )
1002 break;
1004 --pTmpStr;
1007 if( !bValid )
1008 return false; // no valid separator -> no replacement
1011 bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1013 // Search for the beginning of the word
1014 while( !IsWordDelim( *pStr ))
1016 if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1017 bNumericOnly = false;
1019 if( pStart == pStr )
1020 break;
1022 --pStr;
1025 if( bNumericOnly ) // consists of only numbers, then not
1026 return false;
1028 if( IsWordDelim( *pStr ))
1029 ++pStr;
1031 OUString sWord;
1033 // check on the basis of the exception list
1034 if( pExceptStt )
1036 sWord = OUString(pStr, pExceptStt - pStr + 1);
1037 if( FindInCplSttExceptList(eLang, sWord) )
1038 return false;
1040 // Delete all non alphanumeric. Test the characters at the
1041 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1042 OUString sTmp( sWord );
1043 while( !sTmp.isEmpty() &&
1044 !rCC.isLetterNumeric( sTmp, 0 ) )
1045 sTmp = sTmp.copy(1);
1047 // Remove all non alphanumeric characters towards the end up until
1048 // the last one.
1049 sal_Int32 nLen = sTmp.getLength();
1050 while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1051 --nLen;
1052 if( nLen + 1 < sTmp.getLength() )
1053 sTmp = sTmp.copy( 0, nLen + 1 );
1055 if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1056 FindInCplSttExceptList(eLang, sTmp))
1057 return false;
1059 if(FindInCplSttExceptList(eLang, sWord, true))
1060 return false;
1063 // Ok, then replace
1064 sal_Unicode cSave = *pWordStt;
1065 nSttPos = pWordStt - rTxt.getStr();
1066 OUString sChar( cSave );
1067 sChar = rCC.titlecase(sChar); //see fdo#56740
1068 bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1070 // Parahaps someone wants to have the word
1071 if( bRet && SaveWordCplSttLst & nFlags )
1072 rDoc.SaveCpltSttWord( CptlSttSntnc, nSttPos, sWord, cSave );
1074 return bRet;
1077 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1078 sal_Int32 nSttPos, sal_Int32 nEndPos,
1079 LanguageType eLang )
1081 if (nEndPos - nSttPos < 2)
1082 // string must be at least 2-character long.
1083 return false;
1085 CharClass& rCC = GetCharClass( eLang );
1087 // Check the first 2 letters.
1088 if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1089 return false;
1091 if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1092 return false;
1094 OUString aConverted;
1095 aConverted += rCC.uppercase(OUString(rTxt[nSttPos]));
1096 aConverted += rCC.lowercase(OUString(rTxt[nSttPos+1]));
1098 for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1100 if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1101 // A lowercase letter disqualifies the whole text.
1102 return false;
1104 if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1105 // Another uppercase letter. Convert it.
1106 aConverted += rCC.lowercase(OUString(rTxt[i]));
1107 else
1108 // This is not an alphabetic letter. Leave it as-is.
1109 aConverted += OUString( rTxt[i] );
1112 // Replace the word.
1113 rDoc.Delete(nSttPos, nEndPos);
1114 rDoc.Insert(nSttPos, aConverted);
1116 return true;
1120 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1121 LanguageType eLang ) const
1123 sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1124 ? GetStartDoubleQuote()
1125 : GetStartSingleQuote() )
1126 : ( '\"' == cInsChar
1127 ? GetEndDoubleQuote()
1128 : GetEndSingleQuote() );
1129 if( !cRet )
1131 // then through the Language find the right character
1132 if( LANGUAGE_NONE == eLang )
1133 cRet = cInsChar;
1134 else
1136 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1137 OUString sRet( bSttQuote
1138 ? ( '\"' == cInsChar
1139 ? rLcl.getDoubleQuotationMarkStart()
1140 : rLcl.getQuotationMarkStart() )
1141 : ( '\"' == cInsChar
1142 ? rLcl.getDoubleQuotationMarkEnd()
1143 : rLcl.getQuotationMarkEnd() ));
1144 cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1147 return cRet;
1150 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1151 sal_Unicode cInsChar, bool bSttQuote,
1152 bool bIns )
1154 LanguageType eLang = rDoc.GetLanguage( nInsPos, false );
1155 sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1157 OUString sChg( cInsChar );
1158 if( bIns )
1159 rDoc.Insert( nInsPos, sChg );
1160 else
1161 rDoc.Replace( nInsPos, sChg );
1163 sChg = OUString(cRet);
1165 if( '\"' == cInsChar )
1167 if( LANGUAGE_SYSTEM == eLang )
1168 eLang = GetAppLang().getLanguageType();
1169 switch( eLang )
1171 case LANGUAGE_FRENCH:
1172 case LANGUAGE_FRENCH_BELGIAN:
1173 case LANGUAGE_FRENCH_CANADIAN:
1174 case LANGUAGE_FRENCH_SWISS:
1175 case LANGUAGE_FRENCH_LUXEMBOURG:
1177 OUString s( cNonBreakingSpace );
1178 // UNICODE code for no break space
1179 if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, s ))
1181 if( !bSttQuote )
1182 ++nInsPos;
1185 break;
1189 rDoc.Replace( nInsPos, sChg );
1192 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1193 sal_Unicode cInsChar, bool bSttQuote )
1195 LanguageType eLang = rDoc.GetLanguage( nInsPos, false );
1196 sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1198 OUString sRet = OUString(cRet);
1200 if( '\"' == cInsChar )
1202 if( LANGUAGE_SYSTEM == eLang )
1203 eLang = GetAppLang().getLanguageType();
1204 switch( eLang )
1206 case LANGUAGE_FRENCH:
1207 case LANGUAGE_FRENCH_BELGIAN:
1208 case LANGUAGE_FRENCH_CANADIAN:
1209 case LANGUAGE_FRENCH_SWISS:
1210 case LANGUAGE_FRENCH_LUXEMBOURG:
1211 if( bSttQuote )
1212 sRet += " ";
1213 else
1214 sRet = " " + sRet;
1215 break;
1218 return sRet;
1221 sal_uLong
1222 SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1223 sal_Int32 nInsPos, sal_Unicode cChar,
1224 bool bInsert, vcl::Window* pFrameWin )
1226 sal_uLong nRet = 0;
1227 bool bIsNextRun = bRunNext;
1228 bRunNext = false; // if it was set, then it has to be turned off
1230 do{ // only for middle check loop !!
1231 if( cChar )
1233 // Prevent double space
1234 if( nInsPos && ' ' == cChar &&
1235 IsAutoCorrFlag( IgnoreDoubleSpace ) &&
1236 ' ' == rTxt[ nInsPos - 1 ])
1238 nRet = IgnoreDoubleSpace;
1239 break;
1242 bool bSingle = '\'' == cChar;
1243 bool bIsReplaceQuote =
1244 (IsAutoCorrFlag( ChgQuotes ) && ('\"' == cChar )) ||
1245 (IsAutoCorrFlag( ChgSglQuotes ) && bSingle );
1246 if( bIsReplaceQuote )
1248 sal_Unicode cPrev;
1249 bool bSttQuote = !nInsPos ||
1250 IsWordDelim( ( cPrev = rTxt[ nInsPos-1 ])) ||
1251 lcl_IsInAsciiArr( "([{", cPrev ) ||
1252 ( cEmDash && cEmDash == cPrev ) ||
1253 ( cEnDash && cEnDash == cPrev );
1255 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert );
1256 nRet = bSingle ? ChgSglQuotes : ChgQuotes;
1257 break;
1260 if( bInsert )
1261 rDoc.Insert( nInsPos, OUString(cChar) );
1262 else
1263 rDoc.Replace( nInsPos, OUString(cChar) );
1265 // Hardspaces autocorrection
1266 if ( IsAutoCorrFlag( AddNonBrkSpace ) )
1268 if ( NeedsHardspaceAutocorr( cChar ) &&
1269 FnAddNonBrkSpace( rDoc, rTxt, 0, nInsPos, rDoc.GetLanguage( nInsPos, false ) ) )
1271 nRet = AddNonBrkSpace;
1273 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1275 // Remove the NBSP if it wasn't an autocorrection
1276 if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1277 cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1279 // Look for the last HARD_SPACE
1280 sal_Int32 nPos = nInsPos - 1;
1281 bool bContinue = true;
1282 while ( bContinue )
1284 const sal_Unicode cTmpChar = rTxt[ nPos ];
1285 if ( cTmpChar == cNonBreakingSpace )
1287 rDoc.Delete( nPos, nPos + 1 );
1288 nRet = AddNonBrkSpace;
1289 bContinue = false;
1291 else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1292 bContinue = false;
1293 nPos--;
1300 if( !nInsPos )
1301 break;
1303 sal_Int32 nPos = nInsPos - 1;
1305 if( IsWordDelim( rTxt[ nPos ]))
1306 break;
1308 // Set bold or underline automatically?
1309 if (('*' == cChar || '_' == cChar) && (nPos+1 < rTxt.getLength()))
1311 if( IsAutoCorrFlag( ChgWeightUnderl ) &&
1312 FnChgWeightUnderl( rDoc, rTxt, 0, nPos+1 ) )
1313 nRet = ChgWeightUnderl;
1314 break;
1317 while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1320 // Found a Paragraph-start or a Blank, search for the word shortcut in
1321 // auto.
1322 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character
1323 if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1324 --nCapLttrPos; // Absatz Anfang und kein Blank !
1326 LanguageType eLang = rDoc.GetLanguage( nCapLttrPos, false );
1327 if( LANGUAGE_SYSTEM == eLang )
1328 eLang = MsLangId::getSystemLanguage();
1329 CharClass& rCC = GetCharClass( eLang );
1331 // no symbol characters
1332 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1333 break;
1335 if( IsAutoCorrFlag( Autocorrect ) )
1337 OUString aPara;
1338 OUString* pPara = IsAutoCorrFlag(CptlSttSntnc) ? &aPara : 0;
1340 // since LibO 4.1, '-' is a word separator
1341 // fdo#67742 avoid "--" to be replaced by "–" if next is "-"
1342 if( rTxt.endsWith( "---" ) )
1343 break;
1344 bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1345 *this, pPara );
1346 if( !bChgWord )
1348 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1349 while( nCapLttrPos1 < nInsPos &&
1350 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1352 ++nCapLttrPos1;
1353 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1354 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1356 --nInsPos1;
1358 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1359 nCapLttrPos1 < nInsPos1 &&
1360 rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1362 bChgWord = true;
1363 nCapLttrPos = nCapLttrPos1;
1367 if( bChgWord )
1369 nRet = Autocorrect;
1370 if( !aPara.isEmpty() )
1372 sal_Int32 nEnd = nCapLttrPos;
1373 while( nEnd < aPara.getLength() &&
1374 !IsWordDelim( aPara[ nEnd ]))
1375 ++nEnd;
1377 // Capital letter at beginning of paragraph?
1378 if( IsAutoCorrFlag( CptlSttSntnc ) &&
1379 FnCptlSttSntnc( rDoc, aPara, false,
1380 nCapLttrPos, nEnd, eLang ) )
1381 nRet |= CptlSttSntnc;
1383 if( IsAutoCorrFlag( ChgToEnEmDash ) &&
1384 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nEnd, eLang ) )
1385 nRet |= ChgToEnEmDash;
1387 break;
1391 if( ( IsAutoCorrFlag( nRet = ChgOrdinalNumber ) &&
1392 (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3
1393 ( '-' != cChar || 'E' != toupper(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1394 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1395 ( IsAutoCorrFlag( nRet = SetINetAttr ) &&
1396 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1397 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1399 else
1401 bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1402 bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1404 nRet = 0;
1405 if ( bLockKeyOn && IsAutoCorrFlag( CorrectCapsLock ) &&
1406 FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1408 // Correct accidental use of cAPS LOCK key (do this only when
1409 // the caps or shift lock key is pressed). Turn off the caps
1410 // lock afterwords.
1411 nRet |= CorrectCapsLock;
1412 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1415 // Capital letter at beginning of paragraph ?
1416 if( !bUnsupported &&
1417 IsAutoCorrFlag( CptlSttSntnc ) &&
1418 FnCptlSttSntnc( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang ) )
1419 nRet |= CptlSttSntnc;
1421 // Two capital letters at beginning of word ??
1422 if( !bUnsupported &&
1423 IsAutoCorrFlag( CptlSttWrd ) &&
1424 FnCptlSttWrd( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1425 nRet |= CptlSttWrd;
1427 if( IsAutoCorrFlag( ChgToEnEmDash ) &&
1428 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1429 nRet |= ChgToEnEmDash;
1432 } while( false );
1434 return nRet;
1437 SvxAutoCorrectLanguageLists& SvxAutoCorrect::_GetLanguageList(
1438 LanguageType eLang )
1440 LanguageTag aLanguageTag( eLang);
1441 if (pLangTable->find(aLanguageTag) == pLangTable->end())
1442 (void)CreateLanguageFile(aLanguageTag, true);
1443 return *(pLangTable->find(aLanguageTag)->second);
1446 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1448 boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(LanguageTag(eLang));
1449 if(nTmpVal != pLangTable->end() && nTmpVal->second)
1450 nTmpVal->second->SaveCplSttExceptList();
1451 #ifdef DBG_UTIL
1452 else
1454 SAL_WARN("editeng", "Save an empty list? ");
1456 #endif
1459 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang)
1461 boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(LanguageTag(eLang));
1462 if(nTmpVal != pLangTable->end() && nTmpVal->second)
1463 nTmpVal->second->SaveWrdSttExceptList();
1464 #ifdef DBG_UTIL
1465 else
1467 SAL_WARN("editeng", "Save an empty list? ");
1469 #endif
1472 // Adds a single word. The list will immediately be written to the file!
1473 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1474 LanguageType eLang )
1476 SvxAutoCorrectLanguageLists* pLists = 0;
1477 // either the right language is present or it will be this in the general list
1478 boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(LanguageTag(eLang));
1479 if(nTmpVal != pLangTable->end())
1480 pLists = nTmpVal->second;
1481 else
1483 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1484 nTmpVal = pLangTable->find(aLangTagUndetermined);
1485 if(nTmpVal != pLangTable->end())
1486 pLists = nTmpVal->second;
1487 else if(CreateLanguageFile(aLangTagUndetermined, true))
1488 pLists = pLangTable->find(aLangTagUndetermined)->second;
1490 OSL_ENSURE(pLists, "No auto correction data");
1491 return pLists && pLists->AddToCplSttExceptList(rNew);
1494 // Adds a single word. The list will immediately be written to the file!
1495 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew,
1496 LanguageType eLang )
1498 SvxAutoCorrectLanguageLists* pLists = 0;
1499 //either the right language is present or it is set in the general list
1500 boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(LanguageTag(eLang));
1501 if(nTmpVal != pLangTable->end())
1502 pLists = nTmpVal->second;
1503 else
1505 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1506 nTmpVal = pLangTable->find(aLangTagUndetermined);
1507 if(nTmpVal != pLangTable->end())
1508 pLists = nTmpVal->second;
1509 else if(CreateLanguageFile(aLangTagUndetermined, true))
1510 pLists = pLangTable->find(aLangTagUndetermined)->second;
1512 OSL_ENSURE(pLists, "No auto correction file!");
1513 return pLists && pLists->AddToWrdSttExceptList(rNew);
1516 bool SvxAutoCorrect::GetPrevAutoCorrWord( SvxAutoCorrDoc& rDoc,
1517 const OUString& rTxt, sal_Int32 nPos,
1518 OUString& rWord ) const
1520 if( !nPos )
1521 return false;
1523 sal_Int32 nEnde = nPos;
1525 // it must be followed by a blank or tab!
1526 if( ( nPos < rTxt.getLength() &&
1527 !IsWordDelim( rTxt[ nPos ])) ||
1528 IsWordDelim( rTxt[ --nPos ]))
1529 return false;
1531 while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1534 // Found a Paragraph-start or a Blank, search for the word shortcut in
1535 // auto.
1536 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character
1537 if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1538 --nCapLttrPos; // Beginning of pargraph and no Blank!
1540 while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1541 if( ++nCapLttrPos >= nEnde )
1542 return false;
1544 if( 3 > nEnde - nCapLttrPos )
1545 return false;
1547 LanguageType eLang = rDoc.GetLanguage( nCapLttrPos, false );
1548 if( LANGUAGE_SYSTEM == eLang )
1549 eLang = MsLangId::getSystemLanguage();
1551 SvxAutoCorrect* pThis = const_cast<SvxAutoCorrect*>(this);
1552 CharClass& rCC = pThis->GetCharClass( eLang );
1554 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnde ))
1555 return false;
1557 rWord = rTxt.copy( nCapLttrPos, nEnde - nCapLttrPos );
1558 return true;
1561 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1563 OSL_ENSURE(pLangTable->find(rLanguageTag) == pLangTable->end(), "Language already exists ");
1565 OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true, false, false ));
1566 OUString sShareDirFile( sUserDirFile );
1568 SvxAutoCorrectLanguageListsPtr pLists = 0;
1570 tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1572 std::map<LanguageTag, long>::iterator nFndPos = aLastFileTable.find(rLanguageTag);
1573 if(nFndPos != aLastFileTable.end() &&
1574 (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1575 nAktTime - nLastCheckTime < nMinTime)
1577 // no need to test the file, because the last check is not older then
1578 // 2 minutes.
1579 if( bNewFile )
1581 sShareDirFile = sUserDirFile;
1582 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1583 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference
1584 pLangTable->insert(aTmp, pLists);
1585 aLastFileTable.erase(nFndPos);
1588 else if(
1589 ( FStatHelper::IsDocument( sUserDirFile ) ||
1590 FStatHelper::IsDocument( sShareDirFile =
1591 GetAutoCorrFileName( rLanguageTag, false, false, false ) ) ||
1592 FStatHelper::IsDocument( sShareDirFile =
1593 GetAutoCorrFileName( rLanguageTag, false, false, true) )
1594 ) ||
1595 ( sShareDirFile = sUserDirFile, bNewFile )
1598 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1599 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference
1600 pLangTable->insert(aTmp, pLists);
1601 if (nFndPos != aLastFileTable.end())
1602 aLastFileTable.erase(nFndPos);
1604 else if( !bNewFile )
1606 aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1608 return pLists != 0;
1611 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1612 LanguageType eLang )
1614 LanguageTag aLanguageTag( eLang);
1615 boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(aLanguageTag);
1616 if(nTmpVal != pLangTable->end())
1617 return nTmpVal->second->PutText(rShort, rLong);
1618 if(CreateLanguageFile(aLanguageTag))
1619 return pLangTable->find(aLanguageTag)->second->PutText(rShort, rLong);
1620 return false;
1623 bool SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1624 std::vector<SvxAutocorrWord>& aDeleteEntries,
1625 LanguageType eLang )
1627 LanguageTag aLanguageTag( eLang);
1628 boost::ptr_map<LanguageTag, SvxAutoCorrectLanguageLists>::iterator nTmpVal = pLangTable->find(aLanguageTag);
1629 if(nTmpVal != pLangTable->end())
1631 return nTmpVal->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1633 else if(CreateLanguageFile( aLanguageTag ))
1635 return pLangTable->find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1637 return false;
1641 // - return the replacement text (only for SWG-Format, all other
1642 // can be taken from the word list!)
1643 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1645 return false;
1648 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1652 // Text with attribution (only the SWG - SWG format!)
1653 bool SvxAutoCorrect::PutText( const com::sun::star::uno::Reference < com::sun::star::embed::XStorage >&,
1654 const OUString&, const OUString&, SfxObjectShell&, OUString& )
1656 return false;
1659 OUString EncryptBlockName_Imp(const OUString& rName)
1661 OUStringBuffer aName;
1662 aName.append('#').append(rName);
1663 for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos)
1665 if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos]))
1666 aName[nPos] &= 0x0f;
1668 return aName.makeStringAndClear();
1671 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1672 static void GeneratePackageName ( const OUString& rShort, OUString& rPackageName )
1674 OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
1675 OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
1677 for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
1679 switch (aBuf[nPos])
1681 case '!':
1682 case '/':
1683 case ':':
1684 case '.':
1685 case '\\':
1686 aBuf[nPos] = '_';
1687 break;
1688 default:
1689 break;
1693 rPackageName = aBuf.makeStringAndClear();
1696 static const SvxAutocorrWord* lcl_SearchWordsInList(
1697 SvxAutoCorrectLanguageListsPtr pList, const OUString& rTxt,
1698 sal_Int32& rStt, sal_Int32 nEndPos)
1700 const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
1701 return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
1704 // the search for the words in the substitution table
1705 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
1706 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
1707 SvxAutoCorrDoc&, LanguageTag& rLang )
1709 const SvxAutocorrWord* pRet = 0;
1710 LanguageTag aLanguageTag( rLang);
1711 if( aLanguageTag.isSystemLocale() )
1712 aLanguageTag.reset( MsLangId::getSystemLanguage());
1714 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
1715 * list instead? */
1717 // First search for eLang, then US-English -> English
1718 // and last in LANGUAGE_UNDETERMINED
1719 if(pLangTable->find(aLanguageTag) != pLangTable->end() || CreateLanguageFile(aLanguageTag, false))
1721 //the language is available - so bring it on
1722 SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1723 pRet = lcl_SearchWordsInList( pList, rTxt, rStt, nEndPos );
1724 if( pRet )
1726 rLang = aLanguageTag;
1727 return pRet;
1731 // If it still could not be found here, then keep on searching
1732 LanguageType eLang = aLanguageTag.getLanguageType();
1733 LanguageType nTmpKey1 = eLang & 0x7ff; // the main language in many cases DE
1734 if(nTmpKey1 != eLang && (pLangTable->find(aLanguageTag.reset(nTmpKey1)) != pLangTable->end() ||
1735 CreateLanguageFile(aLanguageTag, false)))
1737 //the language is available - so bring it on
1738 SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1739 pRet = lcl_SearchWordsInList( pList, rTxt, rStt, nEndPos );
1740 if( pRet )
1742 rLang = aLanguageTag;
1743 return pRet;
1747 // otherwise for example EN
1748 aLanguageTag.reset(aLanguageTag.getLanguage());
1749 LanguageType nTmpKey2 = aLanguageTag.getLanguageType(false);
1750 if (nTmpKey2 != eLang && nTmpKey2 != LANGUAGE_UNDETERMINED &&
1751 (pLangTable->find(aLanguageTag) != pLangTable->end() ||
1752 CreateLanguageFile(aLanguageTag, false)))
1754 //the language is available - so bring it on
1755 SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1756 pRet = lcl_SearchWordsInList( pList, rTxt, rStt, nEndPos );
1757 if( pRet )
1759 rLang = aLanguageTag;
1760 return pRet;
1764 if(pLangTable->find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != pLangTable->end() ||
1765 CreateLanguageFile(aLanguageTag, false))
1767 //the language is available - so bring it on
1768 SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1769 pRet = lcl_SearchWordsInList( pList, rTxt, rStt, nEndPos );
1770 if( pRet )
1772 rLang = aLanguageTag;
1773 return pRet;
1776 return 0;
1779 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang,
1780 const OUString& sWord )
1782 LanguageTag aLanguageTag( eLang);
1784 /* TODO-BCP47: again horrible uglyness */
1786 // First search for eLang, then US-English -> English
1787 // and last in LANGUAGE_UNDETERMINED
1788 LanguageType nTmpKey1 = eLang & 0x7ff; // the main language in many cases DE
1789 OUString sTemp(sWord);
1791 if(pLangTable->find(aLanguageTag) != pLangTable->end() || CreateLanguageFile(aLanguageTag, false))
1793 //the language is available - so bring it on
1794 SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1795 OUString _sTemp(sWord);
1796 if(pList->GetWrdSttExceptList()->find(_sTemp) != pList->GetWrdSttExceptList()->end() )
1797 return true;
1800 // If it still could not be found here, then keep on searching
1801 if(nTmpKey1 != eLang && (pLangTable->find(aLanguageTag.reset(nTmpKey1)) != pLangTable->end() ||
1802 CreateLanguageFile(aLanguageTag, false)))
1804 //the language is available - so bring it on
1805 SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1806 if(pList->GetWrdSttExceptList()->find(sTemp) != pList->GetWrdSttExceptList()->end() )
1807 return true;
1810 // otherwise for example EN
1811 aLanguageTag.reset(aLanguageTag.getLanguage());
1812 LanguageType nTmpKey2 = aLanguageTag.getLanguageType(false);
1813 if (nTmpKey2 != eLang && nTmpKey2 != LANGUAGE_UNDETERMINED &&
1814 (pLangTable->find(aLanguageTag) != pLangTable->end() ||
1815 CreateLanguageFile(aLanguageTag, false)))
1817 //the language is available - so bring it on
1818 SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1819 if(pList->GetWrdSttExceptList()->find(sTemp) != pList->GetWrdSttExceptList()->end() )
1820 return true;
1823 if(pLangTable->find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != pLangTable->end() ||
1824 CreateLanguageFile(aLanguageTag, false))
1826 //the language is available - so bring it on
1827 SvxAutoCorrectLanguageLists* pList = pLangTable->find(aLanguageTag)->second;
1828 if(pList->GetWrdSttExceptList()->find(sTemp) != pList->GetWrdSttExceptList()->end() )
1829 return true;
1831 return false;
1834 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
1836 OUString sAbk('~');
1837 SvStringsISortDtor::const_iterator it = pList->find( sAbk );
1838 sal_uInt16 nPos = it - pList->begin();
1839 if( nPos < pList->size() )
1841 OUString sLowerWord(sWord.toAsciiLowerCase());
1842 OUString pAbk;
1843 for( sal_uInt16 n = nPos;
1844 n < pList->size() &&
1845 '~' == ( pAbk = (*pList)[ n ])[ 0 ];
1846 ++n )
1848 // ~ and ~. are not allowed!
1849 if( 2 < pAbk.getLength() && pAbk.getLength() - 1 <= sWord.getLength() )
1851 OUString sLowerAbk(pAbk.toAsciiLowerCase());
1852 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
1854 if( !--i ) // agrees
1855 return true;
1857 if( sLowerAbk[i] != sLowerWord[--ii])
1858 break;
1863 OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
1864 "Wrongly sorted exception list?" );
1865 return false;
1868 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
1869 const OUString& sWord, bool bAbbreviation)
1871 LanguageTag aLanguageTag( eLang);
1873 /* TODO-BCP47: did I mention terrible horrible uglyness? */
1875 // First search for eLang, then US-English -> English
1876 // and last in LANGUAGE_UNDETERMINED
1877 LanguageType nTmpKey1 = eLang & 0x7ff; // the main language in many cases DE
1878 OUString sTemp( sWord );
1880 if(pLangTable->find(aLanguageTag) != pLangTable->end() || CreateLanguageFile(aLanguageTag, false))
1882 //the language is available - so bring it on
1883 const SvStringsISortDtor* pList = pLangTable->find(aLanguageTag)->second->GetCplSttExceptList();
1884 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sTemp) != pList->end() )
1885 return true;
1888 // If it still could not be found here, then keep on searching
1889 if(nTmpKey1 != eLang && (pLangTable->find(aLanguageTag.reset(nTmpKey1)) != pLangTable->end() ||
1890 CreateLanguageFile(aLanguageTag, false)))
1892 const SvStringsISortDtor* pList = pLangTable->find(aLanguageTag)->second->GetCplSttExceptList();
1893 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sTemp) != pList->end() )
1894 return true;
1897 // otherwise for example EN
1898 aLanguageTag.reset(aLanguageTag.getLanguage());
1899 LanguageType nTmpKey2 = aLanguageTag.getLanguageType(false);
1900 if (nTmpKey2 != eLang && nTmpKey2 != LANGUAGE_UNDETERMINED &&
1901 (pLangTable->find(aLanguageTag) != pLangTable->end() ||
1902 CreateLanguageFile(aLanguageTag, false)))
1904 //the language is available - so bring it on
1905 const SvStringsISortDtor* pList = pLangTable->find(aLanguageTag)->second->GetCplSttExceptList();
1906 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sTemp) != pList->end() )
1907 return true;
1910 if(pLangTable->find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != pLangTable->end() ||
1911 CreateLanguageFile(aLanguageTag, false))
1913 //the language is available - so bring it on
1914 const SvStringsISortDtor* pList = pLangTable->find(aLanguageTag)->second->GetCplSttExceptList();
1915 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sTemp) != pList->end() )
1916 return true;
1918 return false;
1921 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
1922 bool bNewFile, bool bTst, bool bUnlocalized ) const
1924 OUString sRet, sExt( rLanguageTag.getBcp47() );
1925 if (bUnlocalized)
1927 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
1928 ::std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
1929 if (!vecFallBackStrings.empty())
1930 sExt = vecFallBackStrings[0];
1933 sExt = "_" + sExt + ".dat";
1934 if( bNewFile )
1935 ( sRet = sUserAutoCorrFile ) += sExt;
1936 else if( !bTst )
1937 ( sRet = sShareAutoCorrFile ) += sExt;
1938 else
1940 // test first in the user directory - if not exist, then
1941 ( sRet = sUserAutoCorrFile ) += sExt;
1942 if( !FStatHelper::IsDocument( sRet ))
1943 ( sRet = sShareAutoCorrFile ) += sExt;
1945 return sRet;
1948 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
1949 SvxAutoCorrect& rParent,
1950 const OUString& rShareAutoCorrectFile,
1951 const OUString& rUserAutoCorrectFile)
1952 : sShareAutoCorrFile( rShareAutoCorrectFile ),
1953 sUserAutoCorrFile( rUserAutoCorrectFile ),
1954 aModifiedDate( Date::EMPTY ),
1955 aModifiedTime( tools::Time::EMPTY ),
1956 aLastCheckTime( tools::Time::EMPTY ),
1957 pCplStt_ExcptLst( 0 ),
1958 pWrdStt_ExcptLst( 0 ),
1959 pAutocorr_List( 0 ),
1960 rAutoCorrect(rParent),
1961 nFlags(0)
1965 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
1967 delete pCplStt_ExcptLst;
1968 delete pWrdStt_ExcptLst;
1969 delete pAutocorr_List;
1972 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
1974 // Access the file system only every 2 minutes to check the date stamp
1975 bool bRet = false;
1977 tools::Time nMinTime( 0, 2 );
1978 tools::Time nAktTime( tools::Time::SYSTEM );
1979 if( aLastCheckTime > nAktTime || // overflow?
1980 ( nAktTime -= aLastCheckTime ) > nMinTime ) // min time past
1982 Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
1983 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
1984 &aTstDate, &aTstTime ) &&
1985 ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
1987 bRet = true;
1988 // then remove all the lists fast!
1989 if( CplSttLstLoad & nFlags && pCplStt_ExcptLst )
1990 delete pCplStt_ExcptLst, pCplStt_ExcptLst = 0;
1991 if( WrdSttLstLoad & nFlags && pWrdStt_ExcptLst )
1992 delete pWrdStt_ExcptLst, pWrdStt_ExcptLst = 0;
1993 if( ChgWordLstLoad & nFlags && pAutocorr_List )
1994 delete pAutocorr_List, pAutocorr_List = 0;
1995 nFlags &= ~(CplSttLstLoad | WrdSttLstLoad | ChgWordLstLoad );
1997 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
1999 return bRet;
2002 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2003 SvStringsISortDtor*& rpLst,
2004 const sal_Char* pStrmName,
2005 tools::SvRef<SotStorage>& rStg)
2007 if( rpLst )
2008 rpLst->clear();
2009 else
2010 rpLst = new SvStringsISortDtor;
2013 OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 );
2014 OUString sTmp( sStrmName );
2016 if( rStg.Is() && rStg->IsStream( sStrmName ) )
2018 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sTmp,
2019 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2020 if( SVSTREAM_OK != xStrm->GetError())
2022 xStrm.Clear();
2023 rStg.Clear();
2024 RemoveStream_Imp( sStrmName );
2026 else
2028 uno::Reference< uno::XComponentContext > xContext =
2029 comphelper::getProcessComponentContext();
2031 xml::sax::InputSource aParserInput;
2032 aParserInput.sSystemId = sStrmName;
2034 xStrm->Seek( 0L );
2035 xStrm->SetBufferSize( 8 * 1024 );
2036 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2038 // get filter
2039 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2041 // connect parser and filter
2042 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2043 uno::Reference< xml::sax::XFastTokenHandler > xTokenHandler = static_cast< xml::sax::XFastTokenHandler* >( new SvXMLAutoCorrectTokenHandler );
2044 xParser->setFastDocumentHandler( xFilter );
2045 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2046 xParser->setTokenHandler( xTokenHandler );
2048 // parse
2051 xParser->parseStream( aParserInput );
2053 catch( const xml::sax::SAXParseException& )
2055 // re throw ?
2057 catch( const xml::sax::SAXException& )
2059 // re throw ?
2061 catch( const io::IOException& )
2063 // re throw ?
2068 // Set time stamp
2069 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2070 &aModifiedDate, &aModifiedTime );
2071 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2076 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2077 const SvStringsISortDtor& rLst,
2078 const sal_Char* pStrmName,
2079 tools::SvRef<SotStorage> &rStg,
2080 bool bConvert )
2082 if( rStg.Is() )
2084 OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 );
2085 if( rLst.empty() )
2087 rStg->Remove( sStrmName );
2088 rStg->Commit();
2090 else
2092 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2093 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2094 if( xStrm.Is() )
2096 xStrm->SetSize( 0 );
2097 xStrm->SetBufferSize( 8192 );
2098 OUString aMime( "text/xml" );
2099 uno::Any aAny;
2100 aAny <<= aMime;
2101 xStrm->SetProperty( OUString("MediaType"), aAny );
2104 uno::Reference< uno::XComponentContext > xContext =
2105 comphelper::getProcessComponentContext();
2107 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2108 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2109 xWriter->setOutputStream(xOut);
2111 uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2112 SvXMLExceptionListExport aExp( xContext, rLst, sStrmName, xHandler );
2114 aExp.exportDoc( XML_BLOCK_LIST );
2116 xStrm->Commit();
2117 if( xStrm->GetError() == SVSTREAM_OK )
2119 xStrm.Clear();
2120 if (!bConvert)
2122 rStg->Commit();
2123 if( SVSTREAM_OK != rStg->GetError() )
2125 rStg->Remove( sStrmName );
2126 rStg->Commit();
2135 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2137 if( pAutocorr_List )
2138 pAutocorr_List->DeleteAndDestroyAll();
2139 else
2140 pAutocorr_List = new SvxAutocorrWordList();
2144 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2145 OUString aXMLWordListName( pXMLImplAutocorr_ListStr, strlen(pXMLImplAutocorr_ListStr), RTL_TEXTENCODING_MS_1252 );
2146 uno::Reference < io::XStream > xStrm = xStg->openStreamElement( aXMLWordListName, embed::ElementModes::READ );
2147 uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2149 xml::sax::InputSource aParserInput;
2150 aParserInput.sSystemId = aXMLWordListName;
2151 aParserInput.aInputStream = xStrm->getInputStream();
2153 // get parser
2154 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2155 SAL_INFO("editeng", "AutoCorrect Import" );
2156 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List, rAutoCorrect, xStg );
2157 uno::Reference< xml::sax::XFastTokenHandler > xTokenHandler = static_cast< xml::sax::XFastTokenHandler* >( new SvXMLAutoCorrectTokenHandler );
2159 // connect parser and filter
2160 xParser->setFastDocumentHandler( xFilter );
2161 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2162 xParser->setTokenHandler(xTokenHandler);
2164 // parse
2165 xParser->parseStream( aParserInput );
2167 catch ( const uno::Exception& )
2171 // Set time stamp
2172 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2173 &aModifiedDate, &aModifiedTime );
2174 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2176 return pAutocorr_List;
2179 void SvxAutoCorrectLanguageLists::SetAutocorrWordList( SvxAutocorrWordList* pList )
2181 if( pAutocorr_List && pList != pAutocorr_List )
2182 delete pAutocorr_List;
2183 pAutocorr_List = pList;
2184 if( !pAutocorr_List )
2186 OSL_ENSURE( false, "No valid list" );
2187 pAutocorr_List = new SvxAutocorrWordList();
2189 nFlags |= ChgWordLstLoad;
2192 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2194 if( !( ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2195 SetAutocorrWordList( LoadAutocorrWordList() );
2196 return pAutocorr_List;
2199 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2201 if( !( CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2202 SetCplSttExceptList( LoadCplSttExceptList() );
2203 return pCplStt_ExcptLst;
2206 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2208 bool aRet = false;
2209 if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2211 MakeUserStorage_Impl();
2212 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2214 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2216 xStg = 0;
2217 // Set time stamp
2218 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2219 &aModifiedDate, &aModifiedTime );
2220 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2221 aRet = true;
2223 return aRet;
2226 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew)
2228 bool aRet = false;
2229 SvStringsISortDtor* pExceptList = LoadWrdSttExceptList();
2230 if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second )
2232 MakeUserStorage_Impl();
2233 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2235 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2237 xStg = 0;
2238 // Set time stamp
2239 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2240 &aModifiedDate, &aModifiedTime );
2241 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2242 aRet = true;
2244 return aRet;
2247 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2251 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2252 OUString sTemp ( pXMLImplCplStt_ExcptLstStr );
2253 if( xStg.Is() && xStg->IsContained( sTemp ) )
2254 LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2256 catch (const css::ucb::ContentCreationException&)
2259 return pCplStt_ExcptLst;
2262 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2264 MakeUserStorage_Impl();
2265 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2267 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2269 xStg = 0;
2271 // Set time stamp
2272 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2273 &aModifiedDate, &aModifiedTime );
2274 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2277 void SvxAutoCorrectLanguageLists::SetCplSttExceptList( SvStringsISortDtor* pList )
2279 if( pCplStt_ExcptLst && pList != pCplStt_ExcptLst )
2280 delete pCplStt_ExcptLst;
2282 pCplStt_ExcptLst = pList;
2283 if( !pCplStt_ExcptLst )
2285 OSL_ENSURE( false, "No valid list" );
2286 pCplStt_ExcptLst = new SvStringsISortDtor;
2288 nFlags |= CplSttLstLoad;
2291 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList()
2295 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2296 OUString sTemp ( pXMLImplWrdStt_ExcptLstStr );
2297 if( xStg.Is() && xStg->IsContained( sTemp ) )
2298 LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2300 catch (const css::ucb::ContentCreationException &e)
2302 SAL_WARN("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList: Caught exception: " << e.Message);
2304 return pWrdStt_ExcptLst;
2307 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList()
2309 MakeUserStorage_Impl();
2310 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2312 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2314 xStg = 0;
2315 // Set time stamp
2316 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2317 &aModifiedDate, &aModifiedTime );
2318 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2321 void SvxAutoCorrectLanguageLists::SetWrdSttExceptList( SvStringsISortDtor* pList )
2323 if( pWrdStt_ExcptLst && pList != pWrdStt_ExcptLst )
2324 delete pWrdStt_ExcptLst;
2325 pWrdStt_ExcptLst = pList;
2326 if( !pWrdStt_ExcptLst )
2328 OSL_ENSURE( false, "No valid list" );
2329 pWrdStt_ExcptLst = new SvStringsISortDtor;
2331 nFlags |= WrdSttLstLoad;
2334 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList()
2336 if( !( WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2337 SetWrdSttExceptList( LoadWrdSttExceptList() );
2338 return pWrdStt_ExcptLst;
2341 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2343 if( sShareAutoCorrFile != sUserAutoCorrFile )
2345 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2346 if( xStg.Is() && SVSTREAM_OK == xStg->GetError() &&
2347 xStg->IsStream( rName ) )
2349 xStg->Remove( rName );
2350 xStg->Commit();
2352 xStg = 0;
2357 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2359 // The conversion needs to happen if the file is already in the user
2360 // directory and is in the old format. Additionally it needs to
2361 // happen when the file is being copied from share to user.
2363 bool bError = false, bConvert = false, bCopy = false;
2364 INetURLObject aDest;
2365 INetURLObject aSource;
2367 if (sUserAutoCorrFile != sShareAutoCorrFile )
2369 aSource = INetURLObject ( sShareAutoCorrFile );
2370 aDest = INetURLObject ( sUserAutoCorrFile );
2371 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2373 aDest.SetExtension ( OUString("bak") );
2374 bConvert = true;
2376 bCopy = true;
2378 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2380 aSource = INetURLObject ( sUserAutoCorrFile );
2381 aDest = INetURLObject ( sUserAutoCorrFile );
2382 aDest.SetExtension ( OUString("bak") );
2383 bCopy = bConvert = true;
2385 if (bCopy)
2389 OUString sMain(aDest.GetMainURL( INetURLObject::DECODE_TO_IURI ));
2390 sal_Unicode cSlash = '/';
2391 sal_Int32 nSlashPos = sMain.lastIndexOf(cSlash);
2392 sMain = sMain.copy(0, nSlashPos);
2393 ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2394 Any aAny;
2395 TransferInfo aInfo;
2396 aInfo.NameClash = NameClash::OVERWRITE;
2397 aInfo.NewTitle = aDest.GetName();
2398 aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DECODE_TO_IURI );
2399 aInfo.MoveData = sal_False;
2400 aAny <<= aInfo;
2401 aNewContent.executeCommand( OUString ( "transfer" ), aAny);
2403 catch (...)
2405 bError = true;
2408 if (bConvert && !bError)
2410 tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DECODE_TO_IURI ), StreamMode::READ );
2411 tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
2413 if( xSrcStg.Is() && xDstStg.Is() )
2415 OUString sXMLWord ( pXMLImplWrdStt_ExcptLstStr );
2416 OUString sXMLSentence ( pXMLImplCplStt_ExcptLstStr );
2417 SvStringsISortDtor *pTmpWordList = NULL;
2419 if (xSrcStg->IsContained( sXMLWord ) )
2420 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg );
2422 if (pTmpWordList)
2424 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true );
2425 pTmpWordList->clear();
2426 pTmpWordList = NULL;
2430 if (xSrcStg->IsContained( sXMLSentence ) )
2431 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2433 if (pTmpWordList)
2435 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2436 pTmpWordList->clear();
2439 GetAutocorrWordList();
2440 MakeBlocklist_Imp( *xDstStg );
2441 sShareAutoCorrFile = sUserAutoCorrFile;
2442 xDstStg = 0;
2445 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DECODE_TO_IURI ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2446 aContent.executeCommand ( OUString( "delete" ), makeAny ( true ) );
2448 catch (...)
2453 else if( bCopy && !bError )
2454 sShareAutoCorrFile = sUserAutoCorrFile;
2457 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2459 OUString sStrmName( pXMLImplAutocorr_ListStr, strlen(pXMLImplAutocorr_ListStr), RTL_TEXTENCODING_MS_1252 );
2460 bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2461 if( !bRemove )
2463 tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( sStrmName,
2464 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2465 if( refList.Is() )
2467 refList->SetSize( 0 );
2468 refList->SetBufferSize( 8192 );
2469 OUString aPropName( "MediaType" );
2470 OUString aMime( "text/xml" );
2471 uno::Any aAny;
2472 aAny <<= aMime;
2473 refList->SetProperty( aPropName, aAny );
2475 uno::Reference< uno::XComponentContext > xContext =
2476 comphelper::getProcessComponentContext();
2478 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2479 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2480 xWriter->setOutputStream(xOut);
2482 uno::Reference<xml::sax::XDocumentHandler> xHandler(xWriter, uno::UNO_QUERY);
2483 SvXMLAutoCorrectExport aExp( xContext, pAutocorr_List, sStrmName, xHandler );
2485 aExp.exportDoc( XML_BLOCK_LIST );
2487 refList->Commit();
2488 bRet = SVSTREAM_OK == refList->GetError();
2489 if( bRet )
2491 refList.Clear();
2492 rStg.Commit();
2493 if( SVSTREAM_OK != rStg.GetError() )
2495 bRemove = true;
2496 bRet = false;
2500 else
2501 bRet = false;
2504 if( bRemove )
2506 rStg.Remove( sStrmName );
2507 rStg.Commit();
2510 return bRet;
2513 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2515 // First get the current list!
2516 GetAutocorrWordList();
2518 MakeUserStorage_Impl();
2519 tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2521 bool bRet = xStorage.Is() && SVSTREAM_OK == xStorage->GetError();
2523 if( bRet )
2525 for ( sal_uInt32 i=0; i < aDeleteEntries.size(); i++ )
2527 SvxAutocorrWord aWordToDelete = aDeleteEntries[i];
2528 SvxAutocorrWord *pFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2529 if( pFoundEntry )
2531 if( !pFoundEntry->IsTextOnly() )
2533 OUString aName( aWordToDelete.GetShort() );
2534 if (xStorage->IsOLEStorage())
2535 aName = EncryptBlockName_Imp(aName);
2536 else
2537 GeneratePackageName ( aWordToDelete.GetShort(), aName );
2539 if( xStorage->IsContained( aName ) )
2541 xStorage->Remove( aName );
2542 bRet = xStorage->Commit();
2545 delete pFoundEntry;
2549 for ( sal_uInt32 i=0; i < aNewEntries.size(); i++ )
2551 SvxAutocorrWord *pWordToAdd = new SvxAutocorrWord( aNewEntries[i].GetShort(), aNewEntries[i].GetLong(), true );
2552 SvxAutocorrWord *pRemoved = pAutocorr_List->FindAndRemove( pWordToAdd );
2553 if( pRemoved )
2555 if( !pRemoved->IsTextOnly() )
2557 // Still have to remove the Storage
2558 OUString sStorageName( pWordToAdd->GetShort() );
2559 if (xStorage->IsOLEStorage())
2560 sStorageName = EncryptBlockName_Imp(sStorageName);
2561 else
2562 GeneratePackageName ( pWordToAdd->GetShort(), sStorageName);
2564 if( xStorage->IsContained( sStorageName ) )
2565 xStorage->Remove( sStorageName );
2567 delete pRemoved;
2569 bRet = pAutocorr_List->Insert( pWordToAdd );
2571 if ( !bRet )
2573 delete pWordToAdd;
2574 break;
2578 if ( bRet )
2580 bRet = MakeBlocklist_Imp( *xStorage );
2583 return bRet;
2586 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2588 // First get the current list!
2589 GetAutocorrWordList();
2591 MakeUserStorage_Impl();
2592 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2594 bool bRet = xStg.Is() && SVSTREAM_OK == xStg->GetError();
2596 // Update the word list
2597 if( bRet )
2599 SvxAutocorrWord* pNew = new SvxAutocorrWord( rShort, rLong, true );
2600 SvxAutocorrWord *pRemove = pAutocorr_List->FindAndRemove( pNew );
2601 if( pRemove )
2603 if( !pRemove->IsTextOnly() )
2605 // Still have to remove the Storage
2606 OUString sStgNm( rShort );
2607 if (xStg->IsOLEStorage())
2608 sStgNm = EncryptBlockName_Imp(sStgNm);
2609 else
2610 GeneratePackageName ( rShort, sStgNm);
2612 if( xStg->IsContained( sStgNm ) )
2613 xStg->Remove( sStgNm );
2615 delete pRemove;
2618 if( pAutocorr_List->Insert( pNew ) )
2620 bRet = MakeBlocklist_Imp( *xStg );
2621 xStg = 0;
2623 else
2625 delete pNew;
2626 bRet = false;
2629 return bRet;
2632 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2633 SfxObjectShell& rShell )
2635 // First get the current list!
2636 GetAutocorrWordList();
2638 MakeUserStorage_Impl();
2640 bool bRet = false;
2641 OUString sLong;
2644 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2645 bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2646 xStg = 0;
2648 // Update the word list
2649 if( bRet )
2651 SvxAutocorrWord* pNew = new SvxAutocorrWord( rShort, sLong, false );
2652 if( pAutocorr_List->Insert( pNew ) )
2654 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2655 MakeBlocklist_Imp( *xStor );
2657 else
2658 delete pNew;
2661 catch ( const uno::Exception& )
2665 return bRet;
2668 // Delete an entry
2669 bool SvxAutoCorrectLanguageLists::DeleteText( const OUString& rShort )
2671 // First get the current list!
2672 GetAutocorrWordList();
2674 MakeUserStorage_Impl();
2676 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, STREAM_READWRITE );
2677 bool bRet = xStg.Is() && SVSTREAM_OK == xStg->GetError();
2678 if( bRet )
2680 SvxAutocorrWord aTmp( rShort, rShort );
2681 SvxAutocorrWord *pFnd = pAutocorr_List->FindAndRemove( &aTmp );
2682 if( pFnd )
2684 if( !pFnd->IsTextOnly() )
2686 OUString aName( rShort );
2687 if (xStg->IsOLEStorage())
2688 aName = EncryptBlockName_Imp(aName);
2689 else
2690 GeneratePackageName ( rShort, aName );
2691 if( xStg->IsContained( aName ) )
2693 xStg->Remove( aName );
2694 bRet = xStg->Commit();
2698 delete pFnd;
2699 MakeBlocklist_Imp( *xStg );
2700 xStg = 0;
2702 else
2703 bRet = false;
2705 return bRet;
2708 // Keep the list sorted ...
2709 struct CompareSvxAutocorrWordList
2711 bool operator()( SvxAutocorrWord* const& lhs, SvxAutocorrWord* const& rhs ) const
2713 CollatorWrapper& rCmp = ::GetCollatorWrapper();
2714 return rCmp.compareString( lhs->GetShort(), rhs->GetShort() ) < 0;
2718 namespace {
2720 typedef std::set<SvxAutocorrWord*, CompareSvxAutocorrWordList> AutocorrWordSetType;
2721 typedef std::unordered_map<OUString, SvxAutocorrWord*, OUStringHash> AutocorrWordHashType;
2725 struct SvxAutocorrWordList::Impl
2728 // only one of these contains the data
2729 mutable AutocorrWordSetType maSet;
2730 mutable AutocorrWordHashType maHash; // key is 'Short'
2732 void DeleteAndDestroyAll()
2734 for (AutocorrWordHashType::const_iterator it = maHash.begin(); it != maHash.end(); ++it)
2735 delete it->second;
2736 maHash.clear();
2738 for (AutocorrWordSetType::const_iterator it2 = maSet.begin(); it2 != maSet.end(); ++it2)
2739 delete *it2;
2740 maSet.clear();
2744 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2746 SvxAutocorrWordList::~SvxAutocorrWordList()
2748 mpImpl->DeleteAndDestroyAll();
2749 delete mpImpl;
2752 void SvxAutocorrWordList::DeleteAndDestroyAll()
2754 mpImpl->DeleteAndDestroyAll();
2757 // returns true if inserted
2758 bool SvxAutocorrWordList::Insert(SvxAutocorrWord *pWord) const
2760 if ( mpImpl->maSet.empty() ) // use the hash
2762 OUString aShort( pWord->GetShort() );
2763 return mpImpl->maHash.insert( std::pair<OUString, SvxAutocorrWord *>( aShort, pWord ) ).second;
2765 else
2766 return mpImpl->maSet.insert( pWord ).second;
2769 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
2771 SvxAutocorrWord* pNew = new SvxAutocorrWord( sWrong, sRight, bOnlyTxt );
2772 if( !Insert( pNew ) )
2773 delete pNew;
2776 bool SvxAutocorrWordList::empty() const
2778 return mpImpl->maHash.empty() && mpImpl->maSet.empty();
2781 SvxAutocorrWord *SvxAutocorrWordList::FindAndRemove(SvxAutocorrWord *pWord)
2783 SvxAutocorrWord *pMatch = NULL;
2785 if ( mpImpl->maSet.empty() ) // use the hash
2787 AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
2788 if( it != mpImpl->maHash.end() )
2790 pMatch = it->second;
2791 mpImpl->maHash.erase (it);
2794 else
2796 AutocorrWordSetType::iterator it = mpImpl->maSet.find( pWord );
2797 if( it != mpImpl->maSet.end() )
2799 pMatch = *it;
2800 mpImpl->maSet.erase (it);
2803 return pMatch;
2806 // return the sorted contents - defer sorting until we have to.
2807 SvxAutocorrWordList::Content SvxAutocorrWordList::getSortedContent() const
2809 Content aContent;
2811 // convert from hash to set permanantly
2812 if ( mpImpl->maSet.empty() )
2814 // This beasty has some O(N log(N)) in a terribly slow ICU collate fn.
2815 for (AutocorrWordHashType::const_iterator it = mpImpl->maHash.begin(); it != mpImpl->maHash.end(); ++it)
2816 mpImpl->maSet.insert( it->second );
2817 mpImpl->maHash.clear();
2819 for (AutocorrWordSetType::const_iterator it = mpImpl->maSet.begin(); it != mpImpl->maSet.end(); ++it)
2820 aContent.push_back( *it );
2822 return aContent;
2825 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
2826 const OUString &rTxt,
2827 sal_Int32 &rStt,
2828 sal_Int32 nEndPos) const
2830 const OUString& rChk = pFnd->GetShort();
2832 sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
2833 sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
2834 sal_Int32 nSttWdPos = nEndPos;
2836 // direct replacement of keywords surrounded by colons (for example, ":name:")
2837 bool bColonNameColon = rTxt.getLength() > nEndPos &&
2838 rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
2839 if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard )
2842 bool bWasWordDelim = false;
2843 sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
2844 if (bColonNameColon)
2845 nCalcStt++;
2846 if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
2847 ( nCalcStt < rStt &&
2848 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
2850 TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
2851 OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard);
2852 if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
2854 rStt = nCalcStt;
2855 if (!left_wildcard)
2857 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
2858 if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
2859 return NULL;
2860 return pFnd;
2862 // get the first word delimiter position before the matching ".*word" pattern
2863 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
2865 if (bWasWordDelim) rStt++;
2866 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard);
2867 // avoid double spaces before simple "word" replacement
2868 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong();
2869 SvxAutocorrWord* pNew = new SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern);
2870 if( Insert( pNew ) ) return pNew; else delete pNew;
2872 } else
2873 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
2874 if ( right_wildcard )
2877 OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
2878 // Get the last word delimiter position
2879 bool not_suffix;
2881 while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
2883 // search the first occurrence (with a left word delimitation, if needed)
2884 sal_Int32 nFndPos = -1;
2885 do {
2886 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1);
2887 not_suffix = (bWasWordDelim && (nSttWdPos >= nFndPos + sTmp.getLength()));
2888 } while ( nFndPos != -1 && (!(left_wildcard || (!left_wildcard && (!nFndPos || IsWordDelim( rTxt[ nFndPos - 1 ])))) || not_suffix));
2890 if ( nFndPos != -1 )
2892 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
2894 if ( left_wildcard )
2896 // get the first word delimiter position before the matching ".*word.*" pattern
2897 while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
2899 if (bWasWordDelim) nFndPos++;
2901 if (nEndPos + extra_repl <= nFndPos)
2903 return 0;
2905 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
2906 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl);
2908 OUString aLong;
2909 rStt = nFndPos;
2910 if ( !left_wildcard )
2912 sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
2913 aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : "");
2914 } else {
2915 OUStringBuffer buf;
2916 do {
2917 nSttWdPos = rTxt.indexOf( sTmp, nFndPos);
2918 if (nSttWdPos != -1)
2920 buf.append(rTxt.copy(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
2921 nFndPos = nSttWdPos + sTmp.getLength();
2923 } while (nSttWdPos != -1);
2924 if (nEndPos - nFndPos > extra_repl) buf.append(rTxt.copy(nFndPos, nEndPos - nFndPos));
2925 aLong = buf.makeStringAndClear();
2927 SvxAutocorrWord* pNew = new SvxAutocorrWord(aShort, aLong);
2928 if ( Insert( pNew ) )
2930 if ( IsWordDelim(rTxt[nEndPos]) ) return pNew;
2931 } else delete pNew;
2935 return NULL;
2938 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt,
2939 sal_Int32 nEndPos) const
2941 for (AutocorrWordHashType::const_iterator it = mpImpl->maHash.begin(); it != mpImpl->maHash.end(); ++it)
2943 if( const SvxAutocorrWord *aTmp = WordMatches( it->second, rTxt, rStt, nEndPos ) )
2944 return aTmp;
2947 for (AutocorrWordSetType::const_iterator it2 = mpImpl->maSet.begin(); it2 != mpImpl->maSet.end(); ++it2)
2949 if( const SvxAutocorrWord *aTmp = WordMatches( *it2, rTxt, rStt, nEndPos ) )
2950 return aTmp;
2952 return 0;
2955 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */