tdf#130857 qt weld: Implement QtInstanceWidget::strip_mnemonic
[LibreOffice.git] / lingucomponent / source / spellcheck / spell / sspellimp.cxx
blobfe676cde531255acea2975bb65fc55360e4aaa1c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/uno/Reference.h>
22 #include <com/sun/star/linguistic2/SpellFailure.hpp>
23 #include <com/sun/star/linguistic2/XLinguProperties.hpp>
24 #include <comphelper/lok.hxx>
25 #include <comphelper/processfactory.hxx>
26 #include <cppuhelper/supportsservice.hxx>
27 #include <cppuhelper/weak.hxx>
28 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
29 #include <tools/debug.hxx>
30 #include <osl/mutex.hxx>
31 #include <osl/thread.h>
32 #include <com/sun/star/ucb/XSimpleFileAccess.hpp>
34 #include <lingutil.hxx>
35 #include <hunspell.hxx>
36 #include "sspellimp.hxx"
38 #include <linguistic/misc.hxx>
39 #include <linguistic/spelldta.hxx>
40 #include <i18nlangtag/languagetag.hxx>
41 #include <svtools/strings.hrc>
42 #include <unotools/lingucfg.hxx>
43 #include <unotools/resmgr.hxx>
44 #include <osl/diagnose.h>
45 #include <osl/file.hxx>
46 #include <rtl/ustrbuf.hxx>
47 #include <rtl/textenc.h>
48 #include <sal/log.hxx>
50 #include <numeric>
51 #include <utility>
52 #include <vector>
53 #include <set>
54 #include <string.h>
56 using namespace osl;
57 using namespace com::sun::star;
58 using namespace com::sun::star::beans;
59 using namespace com::sun::star::lang;
60 using namespace com::sun::star::uno;
61 using namespace com::sun::star::linguistic2;
62 using namespace linguistic;
64 // XML-header of SPELLML queries
65 #if !defined SPELL_XML
66 constexpr OUStringLiteral SPELL_XML = u"<?xml?>";
67 #endif
69 // only available in hunspell >= 1.5
70 #if !defined MAXWORDLEN
71 #define MAXWORDLEN 176
72 #endif
74 SpellChecker::SpellChecker() :
75 m_aEvtListeners(GetLinguMutex()),
76 m_bDisposing(false)
80 SpellChecker::DictItem::DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc)
81 : m_aDName(std::move(i_DName))
82 , m_aDLoc(std::move(i_DLoc))
83 , m_aDEnc(i_DEnc)
87 SpellChecker::~SpellChecker()
89 if (m_pPropHelper)
91 m_pPropHelper->RemoveAsPropListener();
95 PropertyHelper_Spelling & SpellChecker::GetPropHelper_Impl()
97 if (!m_pPropHelper)
99 Reference< XLinguProperties > xPropSet = GetLinguProperties();
101 m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) );
102 m_pPropHelper->AddAsPropListener(); //! after a reference is established
104 return *m_pPropHelper;
107 Sequence< Locale > SAL_CALL SpellChecker::getLocales()
109 MutexGuard aGuard( GetLinguMutex() );
111 // this routine should return the locales supported by the installed
112 // dictionaries.
113 if (m_DictItems.empty())
115 SvtLinguConfig aLinguCfg;
117 // get list of extension dictionaries-to-use
118 // (or better speaking: the list of dictionaries using the
119 // new configuration entries).
120 std::vector< SvtLinguConfigDictionaryEntry > aDics;
121 uno::Sequence< OUString > aFormatList;
122 aLinguCfg.GetSupportedDictionaryFormatsFor( u"SpellCheckers"_ustr,
123 u"org.openoffice.lingu.MySpellSpellChecker"_ustr, aFormatList );
124 for (auto const& format : aFormatList)
126 std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
127 aLinguCfg.GetActiveDictionariesByFormat(format) );
128 aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
131 //!! for compatibility with old dictionaries (the ones not using extensions
132 //!! or new configuration entries, but still using the dictionary.lst file)
133 //!! Get the list of old style spell checking dictionaries to use...
134 std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
135 GetOldStyleDics( "DICT" ) );
137 // to prefer dictionaries with configuration entries we will only
138 // use those old style dictionaries that add a language that
139 // is not yet supported by the list of new style dictionaries
140 MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
142 if (!aDics.empty())
144 uno::Reference< lang::XMultiServiceFactory > xServiceFactory(comphelper::getProcessServiceFactory());
145 uno::Reference< ucb::XSimpleFileAccess > xAccess(xServiceFactory->createInstance(u"com.sun.star.ucb.SimpleFileAccess"_ustr), uno::UNO_QUERY);
146 // get supported locales from the dictionaries-to-use...
147 std::set<OUString> aLocaleNamesSet;
148 for (auto const& dict : aDics)
150 const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames );
151 uno::Sequence< OUString > aLocations( dict.aLocations );
152 SAL_WARN_IF(
153 aLocaleNames.hasElements() && !aLocations.hasElements(),
154 "lingucomponent", "no locations");
155 if (aLocations.hasElements())
157 if (xAccess.is() && xAccess->exists(aLocations[0]))
159 for (auto const& locale : aLocaleNames)
161 if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(locale))
162 continue;
164 aLocaleNamesSet.insert(locale);
167 else
169 SAL_WARN(
170 "lingucomponent",
171 "missing <" << aLocations[0] << ">");
175 // ... and add them to the resulting sequence
176 m_aSuppLocales.realloc( aLocaleNamesSet.size() );
177 std::transform(
178 aLocaleNamesSet.begin(), aLocaleNamesSet.end(), m_aSuppLocales.getArray(),
179 [](auto const& localeName) { return LanguageTag::convertToLocale(localeName); });
181 //! For each dictionary and each locale we need a separate entry.
182 //! If this results in more than one dictionary per locale than (for now)
183 //! it is undefined which dictionary gets used.
184 //! In the future the implementation should support using several dictionaries
185 //! for one locale.
186 sal_uInt32 nDictSize = std::accumulate(aDics.begin(), aDics.end(), sal_uInt32(0),
187 [](const sal_uInt32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
188 return nSum + dict.aLocaleNames.getLength(); });
190 // add dictionary information
191 m_DictItems.reserve(nDictSize);
192 for (auto const& dict : aDics)
194 if (dict.aLocaleNames.hasElements() &&
195 dict.aLocations.hasElements())
197 const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames );
199 // currently only one language per dictionary is supported in the actual implementation...
200 // Thus here we work-around this by adding the same dictionary several times.
201 // Once for each of its supported locales.
202 for (auto const& localeName : aLocaleNames)
204 // also both files have to be in the same directory and the
205 // file names must only differ in the extension (.aff/.dic).
206 // Thus we use the first location only and strip the extension part.
207 OUString aLocation = dict.aLocations[0];
208 sal_Int32 nPos = aLocation.lastIndexOf( '.' );
209 aLocation = aLocation.copy( 0, nPos );
211 m_DictItems.emplace_back(aLocation, LanguageTag::convertToLocale(localeName), RTL_TEXTENCODING_DONTKNOW);
215 DBG_ASSERT( nDictSize == m_DictItems.size(), "index mismatch?" );
217 else
219 // no dictionary found so register no dictionaries
220 m_aSuppLocales.realloc(0);
224 return m_aSuppLocales;
227 sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& rLocale)
229 MutexGuard aGuard( GetLinguMutex() );
231 bool bRes = false;
232 if (!m_aSuppLocales.hasElements())
233 getLocales();
235 for (auto const& suppLocale : m_aSuppLocales)
237 if (rLocale == suppLocale)
239 bRes = true;
240 break;
243 return bRes;
246 sal_Int16 SpellChecker::GetSpellFailure(const OUString &rWord, const Locale &rLocale, int& rInfo)
248 if (rWord.getLength() > MAXWORDLEN)
249 return -1;
251 Hunspell * pMS = nullptr;
252 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
254 // initialize a myspell object for each dictionary once
255 // (note: mutex is held higher up in isValid)
257 sal_Int16 nRes = -1;
259 // first handle smart quotes both single and double
260 OUStringBuffer rBuf(rWord);
261 sal_Int32 n = rBuf.getLength();
262 sal_Unicode c;
263 sal_Int32 extrachar = 0;
265 for (sal_Int32 ix=0; ix < n; ix++)
267 c = rBuf[ix];
268 if ((c == 0x201C) || (c == 0x201D))
269 rBuf[ix] = u'"';
270 else if ((c == 0x2018) || (c == 0x2019))
271 rBuf[ix] = u'\'';
273 // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only
274 // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries
275 // set ICONV and IGNORE aff file options, if needed.)
276 else if ((c == 0x200C) || (c == 0x200D) ||
277 ((c >= 0xFB00) && (c <= 0xFB04)))
278 extrachar = 1;
280 OUString nWord(rBuf.makeStringAndClear());
282 if (n)
284 for (auto& currDict : m_DictItems)
286 pMS = nullptr;
287 eEnc = RTL_TEXTENCODING_DONTKNOW;
289 if (rLocale == currDict.m_aDLoc)
291 if (!currDict.m_pDict)
293 OUString dicpath = currDict.m_aDName + ".dic";
294 OUString affpath = currDict.m_aDName + ".aff";
295 OUString dict;
296 OUString aff;
297 osl::FileBase::getSystemPathFromFileURL(dicpath,dict);
298 osl::FileBase::getSystemPathFromFileURL(affpath,aff);
299 #if defined(_WIN32)
300 // workaround for Windows specific problem that the
301 // path length in calls to 'fopen' is limited to somewhat
302 // about 120+ characters which will usually be exceed when
303 // using dictionaries as extensions. (Hunspell waits UTF-8 encoded
304 // path with \\?\ long path prefix.)
305 OString aTmpaff = Win_AddLongPathPrefix(OUStringToOString(aff, RTL_TEXTENCODING_UTF8));
306 OString aTmpdict = Win_AddLongPathPrefix(OUStringToOString(dict, RTL_TEXTENCODING_UTF8));
307 #else
308 OString aTmpaff(OU2ENC(aff,osl_getThreadTextEncoding()));
309 OString aTmpdict(OU2ENC(dict,osl_getThreadTextEncoding()));
310 #endif
312 currDict.m_pDict = std::make_unique<Hunspell>(aTmpaff.getStr(),aTmpdict.getStr());
313 #if defined(H_DEPRECATED)
314 currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dict_encoding().c_str());
315 #else
316 currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dic_encoding());
317 #endif
319 pMS = currDict.m_pDict.get();
320 eEnc = currDict.m_aDEnc;
323 if (pMS)
325 // we don't want to work with a default text encoding since following incorrect
326 // results may occur only for specific text and thus may be hard to notice.
327 // Thus better always make a clean exit here if the text encoding is in question.
328 // Hopefully something not working at all will raise proper attention quickly. ;-)
329 DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
330 if (eEnc == RTL_TEXTENCODING_DONTKNOW)
331 return -1;
333 OString aWrd(OU2ENC(nWord,eEnc));
334 #if defined(H_DEPRECATED)
335 bool bVal = pMS->spell(std::string(aWrd), &rInfo);
336 #else
337 bool bVal = pMS->spell(aWrd.getStr(), &rInfo) != 0;
338 #endif
339 if (!bVal) {
340 if (extrachar && (eEnc != RTL_TEXTENCODING_UTF8)) {
341 OUStringBuffer aBuf(nWord);
342 n = aBuf.getLength();
343 for (sal_Int32 ix=n-1; ix >= 0; ix--)
345 switch (aBuf[ix]) {
346 case 0xFB00: aBuf.remove(ix, 1); aBuf.insert(ix, "ff"); break;
347 case 0xFB01: aBuf.remove(ix, 1); aBuf.insert(ix, "fi"); break;
348 case 0xFB02: aBuf.remove(ix, 1); aBuf.insert(ix, "fl"); break;
349 case 0xFB03: aBuf.remove(ix, 1); aBuf.insert(ix, "ffi"); break;
350 case 0xFB04: aBuf.remove(ix, 1); aBuf.insert(ix, "ffl"); break;
351 case 0x200C:
352 case 0x200D: aBuf.remove(ix, 1); break;
355 OUString aWord(aBuf.makeStringAndClear());
356 OString bWrd(OU2ENC(aWord, eEnc));
357 #if defined(H_DEPRECATED)
358 bVal = pMS->spell(std::string(bWrd), &rInfo);
359 #else
360 bVal = pMS->spell(bWrd.getStr(), &rInfo) != 0;
361 #endif
362 if (bVal) return -1;
364 nRes = SpellFailure::SPELLING_ERROR;
365 } else {
366 return -1;
368 pMS = nullptr;
373 return nRes;
376 sal_Bool SAL_CALL SpellChecker::isValid( const OUString& rWord, const Locale& rLocale,
377 const css::uno::Sequence< css::beans::PropertyValue >& rProperties )
379 MutexGuard aGuard( GetLinguMutex() );
381 if (rLocale == Locale() || rWord.isEmpty())
382 return true;
384 if (!hasLocale( rLocale ))
385 return true;
387 // return sal_False to process SPELLML requests (they are longer than the header)
388 if (rWord.match(SPELL_XML, 0) && (rWord.getLength() > 10)) return false;
390 // Get property values to be used.
391 // These are be the default values set in the SN_LINGU_PROPERTIES
392 // PropertySet which are overridden by the supplied ones from the
393 // last argument.
394 // You'll probably like to use a simpler solution than the provided
395 // one using the PropertyHelper_Spell.
396 PropertyHelper_Spelling& rHelper = GetPropHelper();
397 rHelper.SetTmpPropVals( rProperties );
399 int nInfo = 0;
400 sal_Int16 nFailure = GetSpellFailure( rWord, rLocale, nInfo );
401 if (nFailure != -1 && !rWord.match(SPELL_XML, 0))
403 LanguageType nLang = LinguLocaleToLanguage( rLocale );
404 // postprocess result for errors that should be ignored
405 const bool bIgnoreError =
406 (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) ||
407 (!rHelper.IsSpellWithDigits() && HasDigits( rWord ));
408 if (bIgnoreError)
409 nFailure = -1;
411 //#define SPELL_COMPOUND 1 << 0
413 // valid word, but it's a rule-based compound word
414 if ( nFailure == -1 && (nInfo & SPELL_COMPOUND) )
416 bool bHasHyphen = rWord.indexOf('-') > -1;
417 if ( (bHasHyphen && !rHelper.IsSpellHyphenatedCompound()) ||
418 (!bHasHyphen && !rHelper.IsSpellClosedCompound()) )
420 return false;
424 return (nFailure == -1);
427 Reference< XSpellAlternatives >
428 SpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale )
430 // Retrieves the return values for the 'spell' function call in case
431 // of a misspelled word.
432 // Especially it may give a list of suggested (correct) words:
433 Reference< XSpellAlternatives > xRes;
434 // note: mutex is held by higher up by spell which covers both
436 Hunspell* pMS = nullptr;
437 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
439 // first handle smart quotes (single and double)
440 OUStringBuffer rBuf(rWord);
441 sal_Int32 n = rBuf.getLength();
442 sal_Unicode c;
443 for (sal_Int32 ix=0; ix < n; ix++)
445 c = rBuf[ix];
446 if ((c == 0x201C) || (c == 0x201D))
447 rBuf[ix] = u'"';
448 if ((c == 0x2018) || (c == 0x2019))
449 rBuf[ix] = u'\'';
451 OUString nWord(rBuf.makeStringAndClear());
453 if (n)
455 LanguageType nLang = LinguLocaleToLanguage( rLocale );
456 int numsug = 0;
458 Sequence< OUString > aStr( 0 );
459 for (const auto& currDict : m_DictItems)
461 pMS = nullptr;
462 eEnc = RTL_TEXTENCODING_DONTKNOW;
464 if (rLocale == currDict.m_aDLoc)
466 pMS = currDict.m_pDict.get();
467 eEnc = currDict.m_aDEnc;
470 if (pMS)
472 OString aWrd(OU2ENC(nWord,eEnc));
473 #if defined(H_DEPRECATED)
474 std::vector<std::string> suglst = pMS->suggest(std::string(aWrd));
475 if (!suglst.empty())
477 aStr.realloc(numsug + suglst.size());
478 OUString *pStr = aStr.getArray();
479 for (size_t ii = 0; ii < suglst.size(); ++ii)
481 pStr[numsug + ii] = OUString(suglst[ii].c_str(), suglst[ii].size(), eEnc);
483 numsug += suglst.size();
485 #else
486 char ** suglst = nullptr;
487 int count = pMS->suggest(&suglst, aWrd.getStr());
488 if (count)
490 aStr.realloc( numsug + count );
491 OUString *pStr = aStr.getArray();
492 for (int ii=0; ii < count; ++ii)
494 OUString cvtwrd(suglst[ii],strlen(suglst[ii]),eEnc);
495 pStr[numsug + ii] = cvtwrd;
497 numsug += count;
499 pMS->free_list(&suglst, count);
500 #endif
504 // now return an empty alternative for no suggestions or the list of alternatives if some found
505 xRes = SpellAlternatives::CreateSpellAlternatives( rWord, nLang, SpellFailure::SPELLING_ERROR, aStr );
506 return xRes;
508 return xRes;
511 Reference< XSpellAlternatives > SAL_CALL SpellChecker::spell(
512 const OUString& rWord, const Locale& rLocale,
513 const css::uno::Sequence< css::beans::PropertyValue >& rProperties )
515 MutexGuard aGuard( GetLinguMutex() );
517 if (rLocale == Locale() || rWord.isEmpty())
518 return nullptr;
520 if (!hasLocale( rLocale ))
521 return nullptr;
523 Reference< XSpellAlternatives > xAlt;
524 if (!isValid( rWord, rLocale, rProperties ))
526 xAlt = GetProposals( rWord, rLocale );
528 return xAlt;
531 sal_Bool SAL_CALL SpellChecker::addLinguServiceEventListener(
532 const Reference< XLinguServiceEventListener >& rxLstnr )
534 MutexGuard aGuard( GetLinguMutex() );
536 bool bRes = false;
537 if (!m_bDisposing && rxLstnr.is())
539 bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
541 return bRes;
544 sal_Bool SAL_CALL SpellChecker::removeLinguServiceEventListener(
545 const Reference< XLinguServiceEventListener >& rxLstnr )
547 MutexGuard aGuard( GetLinguMutex() );
549 bool bRes = false;
550 if (!m_bDisposing && rxLstnr.is())
552 bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
554 return bRes;
557 OUString SAL_CALL SpellChecker::getServiceDisplayName(const Locale& rLocale)
559 std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
560 return Translate::get(STR_DESCRIPTION_HUNSPELL, loc);
563 void SAL_CALL SpellChecker::initialize( const Sequence< Any >& rArguments )
565 MutexGuard aGuard( GetLinguMutex() );
567 if (m_pPropHelper)
568 return;
570 sal_Int32 nLen = rArguments.getLength();
571 if (2 == nLen)
573 Reference< XLinguProperties > xPropSet;
574 rArguments.getConstArray()[0] >>= xPropSet;
575 // rArguments.getConstArray()[1] >>= xDicList;
577 //! Pointer allows for access of the non-UNO functions.
578 //! And the reference to the UNO-functions while increasing
579 //! the ref-count and will implicitly free the memory
580 //! when the object is no longer used.
581 m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) );
582 m_pPropHelper->AddAsPropListener(); //! after a reference is established
584 else {
585 OSL_FAIL( "wrong number of arguments in sequence" );
589 void SAL_CALL SpellChecker::dispose()
591 MutexGuard aGuard( GetLinguMutex() );
593 if (!m_bDisposing)
595 m_bDisposing = true;
596 EventObject aEvtObj( static_cast<XSpellChecker *>(this) );
597 m_aEvtListeners.disposeAndClear( aEvtObj );
598 if (m_pPropHelper)
600 m_pPropHelper->RemoveAsPropListener();
601 m_pPropHelper.reset();
606 void SAL_CALL SpellChecker::addEventListener( const Reference< XEventListener >& rxListener )
608 MutexGuard aGuard( GetLinguMutex() );
610 if (!m_bDisposing && rxListener.is())
611 m_aEvtListeners.addInterface( rxListener );
614 void SAL_CALL SpellChecker::removeEventListener( const Reference< XEventListener >& rxListener )
616 MutexGuard aGuard( GetLinguMutex() );
618 if (!m_bDisposing && rxListener.is())
619 m_aEvtListeners.removeInterface( rxListener );
622 // Service specific part
623 OUString SAL_CALL SpellChecker::getImplementationName()
625 return u"org.openoffice.lingu.MySpellSpellChecker"_ustr;
628 sal_Bool SAL_CALL SpellChecker::supportsService( const OUString& ServiceName )
630 return cppu::supportsService(this, ServiceName);
633 Sequence< OUString > SAL_CALL SpellChecker::getSupportedServiceNames()
635 return { SN_SPELLCHECKER };
638 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
639 lingucomponent_SpellChecker_get_implementation(
640 css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
642 return cppu::acquire(new SpellChecker());
646 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */