bump product version to 6.4.0.3
[LibreOffice.git] / lingucomponent / source / spellcheck / spell / sspellimp.cxx
blobe6901af11577e83fff39d08e1c9b5f28cc5188c5
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/uno/Reference.h>
22 #include <com/sun/star/linguistic2/SpellFailure.hpp>
23 #include <com/sun/star/linguistic2/XLinguProperties.hpp>
24 #include <comphelper/lok.hxx>
25 #include <comphelper/processfactory.hxx>
26 #include <cppuhelper/factory.hxx>
27 #include <cppuhelper/supportsservice.hxx>
28 #include <com/sun/star/lang/XSingleServiceFactory.hpp>
29 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
30 #include <com/sun/star/registry/XRegistryKey.hpp>
31 #include <tools/debug.hxx>
32 #include <osl/mutex.hxx>
33 #include <osl/thread.h>
34 #include <com/sun/star/ucb/XSimpleFileAccess.hpp>
36 #include <lingutil.hxx>
37 #include <hunspell.hxx>
38 #include "sspellimp.hxx"
40 #include <linguistic/lngprops.hxx>
41 #include <linguistic/spelldta.hxx>
42 #include <i18nlangtag/languagetag.hxx>
43 #include <svtools/strings.hrc>
44 #include <unotools/pathoptions.hxx>
45 #include <unotools/lingucfg.hxx>
46 #include <unotools/resmgr.hxx>
47 #include <unotools/useroptions.hxx>
48 #include <osl/file.hxx>
49 #include <rtl/ustrbuf.hxx>
50 #include <rtl/textenc.h>
51 #include <sal/log.hxx>
53 #include <numeric>
54 #include <utility>
55 #include <vector>
56 #include <set>
57 #include <string.h>
59 using namespace utl;
60 using namespace osl;
61 using namespace com::sun::star;
62 using namespace com::sun::star::beans;
63 using namespace com::sun::star::lang;
64 using namespace com::sun::star::uno;
65 using namespace com::sun::star::linguistic2;
66 using namespace linguistic;
68 // XML-header of SPELLML queries
69 #if !defined SPELL_XML
70 #define SPELL_XML "<?xml?>"
71 #endif
73 // only available in hunspell >= 1.5
74 #if !defined MAXWORDLEN
75 #define MAXWORDLEN 176
76 #endif
78 SpellChecker::SpellChecker() :
79 m_aEvtListeners(GetLinguMutex()),
80 m_bDisposing(false)
84 SpellChecker::DictItem::DictItem(OUString i_DName, Locale i_DLoc, rtl_TextEncoding i_DEnc)
85 : m_aDName(std::move(i_DName))
86 , m_aDLoc(std::move(i_DLoc))
87 , m_aDEnc(i_DEnc)
91 SpellChecker::~SpellChecker()
93 if (m_pPropHelper)
95 m_pPropHelper->RemoveAsPropListener();
99 PropertyHelper_Spelling & SpellChecker::GetPropHelper_Impl()
101 if (!m_pPropHelper)
103 Reference< XLinguProperties > xPropSet = GetLinguProperties();
105 m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) );
106 m_pPropHelper->AddAsPropListener(); //! after a reference is established
108 return *m_pPropHelper;
111 Sequence< Locale > SAL_CALL SpellChecker::getLocales()
113 MutexGuard aGuard( GetLinguMutex() );
115 // this routine should return the locales supported by the installed
116 // dictionaries.
117 if (m_DictItems.empty())
119 SvtLinguConfig aLinguCfg;
121 // get list of extension dictionaries-to-use
122 // (or better speaking: the list of dictionaries using the
123 // new configuration entries).
124 std::vector< SvtLinguConfigDictionaryEntry > aDics;
125 uno::Sequence< OUString > aFormatList;
126 aLinguCfg.GetSupportedDictionaryFormatsFor( "SpellCheckers",
127 "org.openoffice.lingu.MySpellSpellChecker", aFormatList );
128 for (auto const& format : std::as_const(aFormatList))
130 std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
131 aLinguCfg.GetActiveDictionariesByFormat(format) );
132 aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
135 //!! for compatibility with old dictionaries (the ones not using extensions
136 //!! or new configuration entries, but still using the dictionary.lst file)
137 //!! Get the list of old style spell checking dictionaries to use...
138 std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
139 GetOldStyleDics( "DICT" ) );
141 // to prefer dictionaries with configuration entries we will only
142 // use those old style dictionaries that add a language that
143 // is not yet supported by the list of new style dictionaries
144 MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
146 if (!aDics.empty())
148 uno::Reference< lang::XMultiServiceFactory > xServiceFactory(comphelper::getProcessServiceFactory());
149 uno::Reference< ucb::XSimpleFileAccess > xAccess(xServiceFactory->createInstance("com.sun.star.ucb.SimpleFileAccess"), uno::UNO_QUERY);
150 // get supported locales from the dictionaries-to-use...
151 std::set<OUString> aLocaleNamesSet;
152 for (auto const& dict : aDics)
154 const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames );
155 uno::Sequence< OUString > aLocations( dict.aLocations );
156 SAL_WARN_IF(
157 aLocaleNames.hasElements() && !aLocations.hasElements(),
158 "lingucomponent", "no locations");
159 if (aLocations.hasElements())
161 if (xAccess.is() && xAccess->exists(aLocations[0]))
163 for (auto const& locale : aLocaleNames)
165 if (!comphelper::LibreOfficeKit::isWhitelistedLanguage(locale))
166 continue;
168 aLocaleNamesSet.insert(locale);
171 else
173 SAL_WARN(
174 "lingucomponent",
175 "missing <" << aLocations[0] << ">");
179 // ... and add them to the resulting sequence
180 m_aSuppLocales.realloc( aLocaleNamesSet.size() );
181 sal_Int32 k = 0;
182 for (auto const& localeName : aLocaleNamesSet)
184 Locale aTmp( LanguageTag::convertToLocale(localeName));
185 m_aSuppLocales[k++] = aTmp;
188 //! For each dictionary and each locale we need a separate entry.
189 //! If this results in more than one dictionary per locale than (for now)
190 //! it is undefined which dictionary gets used.
191 //! In the future the implementation should support using several dictionaries
192 //! for one locale.
193 sal_uInt32 nDictSize = std::accumulate(aDics.begin(), aDics.end(), sal_uInt32(0),
194 [](const sal_uInt32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
195 return nSum + dict.aLocaleNames.getLength(); });
197 // add dictionary information
198 m_DictItems.reserve(nDictSize);
199 for (auto const& dict : aDics)
201 if (dict.aLocaleNames.hasElements() &&
202 dict.aLocations.hasElements())
204 const uno::Sequence< OUString > aLocaleNames( dict.aLocaleNames );
206 // currently only one language per dictionary is supported in the actual implementation...
207 // Thus here we work-around this by adding the same dictionary several times.
208 // Once for each of its supported locales.
209 for (auto const& localeName : aLocaleNames)
211 // also both files have to be in the same directory and the
212 // file names must only differ in the extension (.aff/.dic).
213 // Thus we use the first location only and strip the extension part.
214 OUString aLocation = dict.aLocations[0];
215 sal_Int32 nPos = aLocation.lastIndexOf( '.' );
216 aLocation = aLocation.copy( 0, nPos );
218 m_DictItems.emplace_back(aLocation, LanguageTag::convertToLocale(localeName), RTL_TEXTENCODING_DONTKNOW);
222 DBG_ASSERT( nDictSize == m_DictItems.size(), "index mismatch?" );
224 else
226 // no dictionary found so register no dictionaries
227 m_aSuppLocales.realloc(0);
231 return m_aSuppLocales;
234 sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& rLocale)
236 MutexGuard aGuard( GetLinguMutex() );
238 bool bRes = false;
239 if (!m_aSuppLocales.hasElements())
240 getLocales();
242 for (auto const& suppLocale : std::as_const(m_aSuppLocales))
244 if (rLocale == suppLocale)
246 bRes = true;
247 break;
250 return bRes;
253 sal_Int16 SpellChecker::GetSpellFailure(const OUString &rWord, const Locale &rLocale)
255 if (rWord.getLength() > MAXWORDLEN)
256 return -1;
258 Hunspell * pMS = nullptr;
259 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
261 // initialize a myspell object for each dictionary once
262 // (note: mutex is held higher up in isValid)
264 sal_Int16 nRes = -1;
266 // first handle smart quotes both single and double
267 OUStringBuffer rBuf(rWord);
268 sal_Int32 n = rBuf.getLength();
269 sal_Unicode c;
270 sal_Int32 extrachar = 0;
272 for (sal_Int32 ix=0; ix < n; ix++)
274 c = rBuf[ix];
275 if ((c == 0x201C) || (c == 0x201D))
276 rBuf[ix] = u'"';
277 else if ((c == 0x2018) || (c == 0x2019))
278 rBuf[ix] = u'\'';
280 // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only
281 // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries
282 // set ICONV and IGNORE aff file options, if needed.)
283 else if ((c == 0x200C) || (c == 0x200D) ||
284 ((c >= 0xFB00) && (c <= 0xFB04)))
285 extrachar = 1;
287 OUString nWord(rBuf.makeStringAndClear());
289 if (n)
291 for (auto& currDict : m_DictItems)
293 pMS = nullptr;
294 eEnc = RTL_TEXTENCODING_DONTKNOW;
296 if (rLocale == currDict.m_aDLoc)
298 if (!currDict.m_pDict)
300 OUString dicpath = currDict.m_aDName + ".dic";
301 OUString affpath = currDict.m_aDName + ".aff";
302 OUString dict;
303 OUString aff;
304 osl::FileBase::getSystemPathFromFileURL(dicpath,dict);
305 osl::FileBase::getSystemPathFromFileURL(affpath,aff);
306 #if defined(_WIN32)
307 // workaround for Windows specific problem that the
308 // path length in calls to 'fopen' is limited to somewhat
309 // about 120+ characters which will usually be exceed when
310 // using dictionaries as extensions. (Hunspell waits UTF-8 encoded
311 // path with \\?\ long path prefix.)
312 OString aTmpaff = Win_AddLongPathPrefix(OUStringToOString(aff, RTL_TEXTENCODING_UTF8));
313 OString aTmpdict = Win_AddLongPathPrefix(OUStringToOString(dict, RTL_TEXTENCODING_UTF8));
314 #else
315 OString aTmpaff(OU2ENC(aff,osl_getThreadTextEncoding()));
316 OString aTmpdict(OU2ENC(dict,osl_getThreadTextEncoding()));
317 #endif
319 currDict.m_pDict = std::make_unique<Hunspell>(aTmpaff.getStr(),aTmpdict.getStr());
320 #if defined(H_DEPRECATED)
321 currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dict_encoding().c_str());
322 #else
323 currDict.m_aDEnc = getTextEncodingFromCharset(currDict.m_pDict->get_dic_encoding());
324 #endif
326 pMS = currDict.m_pDict.get();
327 eEnc = currDict.m_aDEnc;
330 if (pMS)
332 // we don't want to work with a default text encoding since following incorrect
333 // results may occur only for specific text and thus may be hard to notice.
334 // Thus better always make a clean exit here if the text encoding is in question.
335 // Hopefully something not working at all will raise proper attention quickly. ;-)
336 DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
337 if (eEnc == RTL_TEXTENCODING_DONTKNOW)
338 return -1;
340 OString aWrd(OU2ENC(nWord,eEnc));
341 #if defined(H_DEPRECATED)
342 bool bVal = pMS->spell(std::string(aWrd.getStr()));
343 #else
344 bool bVal = pMS->spell(aWrd.getStr()) != 0;
345 #endif
346 if (!bVal) {
347 if (extrachar && (eEnc != RTL_TEXTENCODING_UTF8)) {
348 OUStringBuffer aBuf(nWord);
349 n = aBuf.getLength();
350 for (sal_Int32 ix=n-1; ix >= 0; ix--)
352 switch (aBuf[ix]) {
353 case 0xFB00: aBuf.remove(ix, 1); aBuf.insert(ix, "ff"); break;
354 case 0xFB01: aBuf.remove(ix, 1); aBuf.insert(ix, "fi"); break;
355 case 0xFB02: aBuf.remove(ix, 1); aBuf.insert(ix, "fl"); break;
356 case 0xFB03: aBuf.remove(ix, 1); aBuf.insert(ix, "ffi"); break;
357 case 0xFB04: aBuf.remove(ix, 1); aBuf.insert(ix, "ffl"); break;
358 case 0x200C:
359 case 0x200D: aBuf.remove(ix, 1); break;
362 OUString aWord(aBuf.makeStringAndClear());
363 OString bWrd(OU2ENC(aWord, eEnc));
364 #if defined(H_DEPRECATED)
365 bVal = pMS->spell(std::string(bWrd.getStr()));
366 #else
367 bVal = pMS->spell(bWrd.getStr()) != 0;
368 #endif
369 if (bVal) return -1;
371 nRes = SpellFailure::SPELLING_ERROR;
372 } else {
373 return -1;
375 pMS = nullptr;
380 return nRes;
383 sal_Bool SAL_CALL SpellChecker::isValid( const OUString& rWord, const Locale& rLocale,
384 const css::uno::Sequence< css::beans::PropertyValue >& rProperties )
386 MutexGuard aGuard( GetLinguMutex() );
388 if (rLocale == Locale() || rWord.isEmpty())
389 return true;
391 if (!hasLocale( rLocale ))
392 return true;
394 // return sal_False to process SPELLML requests (they are longer than the header)
395 if (rWord.match(SPELL_XML, 0) && (rWord.getLength() > 10)) return false;
397 // Get property values to be used.
398 // These are be the default values set in the SN_LINGU_PROPERTIES
399 // PropertySet which are overridden by the supplied ones from the
400 // last argument.
401 // You'll probably like to use a simpler solution than the provided
402 // one using the PropertyHelper_Spell.
403 PropertyHelper_Spelling& rHelper = GetPropHelper();
404 rHelper.SetTmpPropVals( rProperties );
406 sal_Int16 nFailure = GetSpellFailure( rWord, rLocale );
407 if (nFailure != -1 && !rWord.match(SPELL_XML, 0))
409 LanguageType nLang = LinguLocaleToLanguage( rLocale );
410 // postprocess result for errors that should be ignored
411 const bool bIgnoreError =
412 (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) ||
413 (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) ||
414 (!rHelper.IsSpellCapitalization() && nFailure == SpellFailure::CAPTION_ERROR);
415 if (bIgnoreError)
416 nFailure = -1;
419 return (nFailure == -1);
422 Reference< XSpellAlternatives >
423 SpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale )
425 // Retrieves the return values for the 'spell' function call in case
426 // of a misspelled word.
427 // Especially it may give a list of suggested (correct) words:
428 Reference< XSpellAlternatives > xRes;
429 // note: mutex is held by higher up by spell which covers both
431 Hunspell* pMS = nullptr;
432 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
434 // first handle smart quotes (single and double)
435 OUStringBuffer rBuf(rWord);
436 sal_Int32 n = rBuf.getLength();
437 sal_Unicode c;
438 for (sal_Int32 ix=0; ix < n; ix++)
440 c = rBuf[ix];
441 if ((c == 0x201C) || (c == 0x201D))
442 rBuf[ix] = u'"';
443 if ((c == 0x2018) || (c == 0x2019))
444 rBuf[ix] = u'\'';
446 OUString nWord(rBuf.makeStringAndClear());
448 if (n)
450 LanguageType nLang = LinguLocaleToLanguage( rLocale );
451 int numsug = 0;
453 Sequence< OUString > aStr( 0 );
454 for (const auto& currDict : m_DictItems)
456 pMS = nullptr;
457 eEnc = RTL_TEXTENCODING_DONTKNOW;
459 if (rLocale == currDict.m_aDLoc)
461 pMS = currDict.m_pDict.get();
462 eEnc = currDict.m_aDEnc;
465 if (pMS)
467 OString aWrd(OU2ENC(nWord,eEnc));
468 #if defined(H_DEPRECATED)
469 std::vector<std::string> suglst = pMS->suggest(std::string(aWrd.getStr()));
470 if (!suglst.empty())
472 aStr.realloc(numsug + suglst.size());
473 OUString *pStr = aStr.getArray();
474 for (size_t ii = 0; ii < suglst.size(); ++ii)
476 OUString cvtwrd(suglst[ii].c_str(), suglst[ii].size(), eEnc);
477 pStr[numsug + ii] = cvtwrd;
479 numsug += suglst.size();
481 #else
482 char ** suglst = nullptr;
483 int count = pMS->suggest(&suglst, aWrd.getStr());
484 if (count)
486 aStr.realloc( numsug + count );
487 OUString *pStr = aStr.getArray();
488 for (int ii=0; ii < count; ++ii)
490 OUString cvtwrd(suglst[ii],strlen(suglst[ii]),eEnc);
491 pStr[numsug + ii] = cvtwrd;
493 numsug += count;
495 pMS->free_list(&suglst, count);
496 #endif
500 // now return an empty alternative for no suggestions or the list of alternatives if some found
501 xRes = SpellAlternatives::CreateSpellAlternatives( rWord, nLang, SpellFailure::SPELLING_ERROR, aStr );
502 return xRes;
504 return xRes;
507 Reference< XSpellAlternatives > SAL_CALL SpellChecker::spell(
508 const OUString& rWord, const Locale& rLocale,
509 const css::uno::Sequence< css::beans::PropertyValue >& rProperties )
511 MutexGuard aGuard( GetLinguMutex() );
513 if (rLocale == Locale() || rWord.isEmpty())
514 return nullptr;
516 if (!hasLocale( rLocale ))
517 return nullptr;
519 Reference< XSpellAlternatives > xAlt;
520 if (!isValid( rWord, rLocale, rProperties ))
522 xAlt = GetProposals( rWord, rLocale );
524 return xAlt;
527 /// @throws Exception
528 static Reference< XInterface > SpellChecker_CreateInstance(
529 const Reference< XMultiServiceFactory > & /*rSMgr*/ )
532 Reference< XInterface > xService = static_cast<cppu::OWeakObject*>(new SpellChecker);
533 return xService;
536 sal_Bool SAL_CALL SpellChecker::addLinguServiceEventListener(
537 const Reference< XLinguServiceEventListener >& rxLstnr )
539 MutexGuard aGuard( GetLinguMutex() );
541 bool bRes = false;
542 if (!m_bDisposing && rxLstnr.is())
544 bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
546 return bRes;
549 sal_Bool SAL_CALL SpellChecker::removeLinguServiceEventListener(
550 const Reference< XLinguServiceEventListener >& rxLstnr )
552 MutexGuard aGuard( GetLinguMutex() );
554 bool bRes = false;
555 if (!m_bDisposing && rxLstnr.is())
557 bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
559 return bRes;
562 OUString SAL_CALL SpellChecker::getServiceDisplayName(const Locale& rLocale)
564 std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
565 return Translate::get(STR_DESCRIPTION_HUNSPELL, loc);
568 void SAL_CALL SpellChecker::initialize( const Sequence< Any >& rArguments )
570 MutexGuard aGuard( GetLinguMutex() );
572 if (!m_pPropHelper)
574 sal_Int32 nLen = rArguments.getLength();
575 if (2 == nLen)
577 Reference< XLinguProperties > xPropSet;
578 rArguments.getConstArray()[0] >>= xPropSet;
579 // rArguments.getConstArray()[1] >>= xDicList;
581 //! Pointer allows for access of the non-UNO functions.
582 //! And the reference to the UNO-functions while increasing
583 //! the ref-count and will implicitly free the memory
584 //! when the object is no longer used.
585 m_pPropHelper.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker *>(this), xPropSet ) );
586 m_pPropHelper->AddAsPropListener(); //! after a reference is established
588 else {
589 OSL_FAIL( "wrong number of arguments in sequence" );
594 void SAL_CALL SpellChecker::dispose()
596 MutexGuard aGuard( GetLinguMutex() );
598 if (!m_bDisposing)
600 m_bDisposing = true;
601 EventObject aEvtObj( static_cast<XSpellChecker *>(this) );
602 m_aEvtListeners.disposeAndClear( aEvtObj );
603 if (m_pPropHelper)
605 m_pPropHelper->RemoveAsPropListener();
606 m_pPropHelper.reset();
611 void SAL_CALL SpellChecker::addEventListener( const Reference< XEventListener >& rxListener )
613 MutexGuard aGuard( GetLinguMutex() );
615 if (!m_bDisposing && rxListener.is())
616 m_aEvtListeners.addInterface( rxListener );
619 void SAL_CALL SpellChecker::removeEventListener( const Reference< XEventListener >& rxListener )
621 MutexGuard aGuard( GetLinguMutex() );
623 if (!m_bDisposing && rxListener.is())
624 m_aEvtListeners.removeInterface( rxListener );
627 // Service specific part
628 OUString SAL_CALL SpellChecker::getImplementationName()
630 return getImplementationName_Static();
633 sal_Bool SAL_CALL SpellChecker::supportsService( const OUString& ServiceName )
635 return cppu::supportsService(this, ServiceName);
638 Sequence< OUString > SAL_CALL SpellChecker::getSupportedServiceNames()
640 return getSupportedServiceNames_Static();
643 Sequence< OUString > SpellChecker::getSupportedServiceNames_Static()
644 throw()
646 Sequence< OUString > aSNS { SN_SPELLCHECKER };
647 return aSNS;
650 extern "C"
653 SAL_DLLPUBLIC_EXPORT void * spell_component_getFactory(
654 const sal_Char * pImplName, void * pServiceManager, void * /*pRegistryKey*/ )
656 void * pRet = nullptr;
657 if ( SpellChecker::getImplementationName_Static().equalsAscii( pImplName ) )
659 Reference< XSingleServiceFactory > xFactory =
660 cppu::createOneInstanceFactory(
661 static_cast< XMultiServiceFactory * >( pServiceManager ),
662 SpellChecker::getImplementationName_Static(),
663 SpellChecker_CreateInstance,
664 SpellChecker::getSupportedServiceNames_Static());
665 // acquire, because we return an interface pointer instead of a reference
666 xFactory->acquire();
667 pRet = xFactory.get();
669 return pRet;
674 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */