1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/uno/Reference.h>
22 #include <com/sun/star/linguistic2/SpellFailure.hpp>
23 #include <com/sun/star/linguistic2/XLinguProperties.hpp>
24 #include <comphelper/lok.hxx>
25 #include <comphelper/processfactory.hxx>
26 #include <cppuhelper/factory.hxx>
27 #include <cppuhelper/supportsservice.hxx>
28 #include <com/sun/star/lang/XSingleServiceFactory.hpp>
29 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
30 #include <com/sun/star/registry/XRegistryKey.hpp>
31 #include <tools/debug.hxx>
32 #include <osl/mutex.hxx>
33 #include <osl/thread.h>
34 #include <com/sun/star/ucb/XSimpleFileAccess.hpp>
36 #include <lingutil.hxx>
37 #include <hunspell.hxx>
38 #include "sspellimp.hxx"
40 #include <linguistic/lngprops.hxx>
41 #include <linguistic/spelldta.hxx>
42 #include <i18nlangtag/languagetag.hxx>
43 #include <svtools/strings.hrc>
44 #include <unotools/pathoptions.hxx>
45 #include <unotools/lingucfg.hxx>
46 #include <unotools/resmgr.hxx>
47 #include <unotools/useroptions.hxx>
48 #include <osl/file.hxx>
49 #include <rtl/ustrbuf.hxx>
50 #include <rtl/textenc.h>
51 #include <sal/log.hxx>
61 using namespace com::sun::star
;
62 using namespace com::sun::star::beans
;
63 using namespace com::sun::star::lang
;
64 using namespace com::sun::star::uno
;
65 using namespace com::sun::star::linguistic2
;
66 using namespace linguistic
;
68 // XML-header of SPELLML queries
69 #if !defined SPELL_XML
70 #define SPELL_XML "<?xml?>"
73 // only available in hunspell >= 1.5
74 #if !defined MAXWORDLEN
75 #define MAXWORDLEN 176
78 SpellChecker::SpellChecker() :
79 m_aEvtListeners(GetLinguMutex()),
84 SpellChecker::DictItem::DictItem(OUString i_DName
, Locale i_DLoc
, rtl_TextEncoding i_DEnc
)
85 : m_aDName(std::move(i_DName
))
86 , m_aDLoc(std::move(i_DLoc
))
91 SpellChecker::~SpellChecker()
95 m_pPropHelper
->RemoveAsPropListener();
99 PropertyHelper_Spelling
& SpellChecker::GetPropHelper_Impl()
103 Reference
< XLinguProperties
> xPropSet
= GetLinguProperties();
105 m_pPropHelper
.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker
*>(this), xPropSet
) );
106 m_pPropHelper
->AddAsPropListener(); //! after a reference is established
108 return *m_pPropHelper
;
111 Sequence
< Locale
> SAL_CALL
SpellChecker::getLocales()
113 MutexGuard
aGuard( GetLinguMutex() );
115 // this routine should return the locales supported by the installed
117 if (m_DictItems
.empty())
119 SvtLinguConfig aLinguCfg
;
121 // get list of extension dictionaries-to-use
122 // (or better speaking: the list of dictionaries using the
123 // new configuration entries).
124 std::vector
< SvtLinguConfigDictionaryEntry
> aDics
;
125 uno::Sequence
< OUString
> aFormatList
;
126 aLinguCfg
.GetSupportedDictionaryFormatsFor( "SpellCheckers",
127 "org.openoffice.lingu.MySpellSpellChecker", aFormatList
);
128 for (auto const& format
: std::as_const(aFormatList
))
130 std::vector
< SvtLinguConfigDictionaryEntry
> aTmpDic(
131 aLinguCfg
.GetActiveDictionariesByFormat(format
) );
132 aDics
.insert( aDics
.end(), aTmpDic
.begin(), aTmpDic
.end() );
135 //!! for compatibility with old dictionaries (the ones not using extensions
136 //!! or new configuration entries, but still using the dictionary.lst file)
137 //!! Get the list of old style spell checking dictionaries to use...
138 std::vector
< SvtLinguConfigDictionaryEntry
> aOldStyleDics(
139 GetOldStyleDics( "DICT" ) );
141 // to prefer dictionaries with configuration entries we will only
142 // use those old style dictionaries that add a language that
143 // is not yet supported by the list of new style dictionaries
144 MergeNewStyleDicsAndOldStyleDics( aDics
, aOldStyleDics
);
148 uno::Reference
< lang::XMultiServiceFactory
> xServiceFactory(comphelper::getProcessServiceFactory());
149 uno::Reference
< ucb::XSimpleFileAccess
> xAccess(xServiceFactory
->createInstance("com.sun.star.ucb.SimpleFileAccess"), uno::UNO_QUERY
);
150 // get supported locales from the dictionaries-to-use...
151 std::set
<OUString
> aLocaleNamesSet
;
152 for (auto const& dict
: aDics
)
154 const uno::Sequence
< OUString
> aLocaleNames( dict
.aLocaleNames
);
155 uno::Sequence
< OUString
> aLocations( dict
.aLocations
);
157 aLocaleNames
.hasElements() && !aLocations
.hasElements(),
158 "lingucomponent", "no locations");
159 if (aLocations
.hasElements())
161 if (xAccess
.is() && xAccess
->exists(aLocations
[0]))
163 for (auto const& locale
: aLocaleNames
)
165 if (!comphelper::LibreOfficeKit::isWhitelistedLanguage(locale
))
168 aLocaleNamesSet
.insert(locale
);
175 "missing <" << aLocations
[0] << ">");
179 // ... and add them to the resulting sequence
180 m_aSuppLocales
.realloc( aLocaleNamesSet
.size() );
182 for (auto const& localeName
: aLocaleNamesSet
)
184 Locale
aTmp( LanguageTag::convertToLocale(localeName
));
185 m_aSuppLocales
[k
++] = aTmp
;
188 //! For each dictionary and each locale we need a separate entry.
189 //! If this results in more than one dictionary per locale than (for now)
190 //! it is undefined which dictionary gets used.
191 //! In the future the implementation should support using several dictionaries
193 sal_uInt32 nDictSize
= std::accumulate(aDics
.begin(), aDics
.end(), sal_uInt32(0),
194 [](const sal_uInt32 nSum
, const SvtLinguConfigDictionaryEntry
& dict
) {
195 return nSum
+ dict
.aLocaleNames
.getLength(); });
197 // add dictionary information
198 m_DictItems
.reserve(nDictSize
);
199 for (auto const& dict
: aDics
)
201 if (dict
.aLocaleNames
.hasElements() &&
202 dict
.aLocations
.hasElements())
204 const uno::Sequence
< OUString
> aLocaleNames( dict
.aLocaleNames
);
206 // currently only one language per dictionary is supported in the actual implementation...
207 // Thus here we work-around this by adding the same dictionary several times.
208 // Once for each of its supported locales.
209 for (auto const& localeName
: aLocaleNames
)
211 // also both files have to be in the same directory and the
212 // file names must only differ in the extension (.aff/.dic).
213 // Thus we use the first location only and strip the extension part.
214 OUString aLocation
= dict
.aLocations
[0];
215 sal_Int32 nPos
= aLocation
.lastIndexOf( '.' );
216 aLocation
= aLocation
.copy( 0, nPos
);
218 m_DictItems
.emplace_back(aLocation
, LanguageTag::convertToLocale(localeName
), RTL_TEXTENCODING_DONTKNOW
);
222 DBG_ASSERT( nDictSize
== m_DictItems
.size(), "index mismatch?" );
226 // no dictionary found so register no dictionaries
227 m_aSuppLocales
.realloc(0);
231 return m_aSuppLocales
;
234 sal_Bool SAL_CALL
SpellChecker::hasLocale(const Locale
& rLocale
)
236 MutexGuard
aGuard( GetLinguMutex() );
239 if (!m_aSuppLocales
.hasElements())
242 for (auto const& suppLocale
: std::as_const(m_aSuppLocales
))
244 if (rLocale
== suppLocale
)
253 sal_Int16
SpellChecker::GetSpellFailure(const OUString
&rWord
, const Locale
&rLocale
)
255 if (rWord
.getLength() > MAXWORDLEN
)
258 Hunspell
* pMS
= nullptr;
259 rtl_TextEncoding eEnc
= RTL_TEXTENCODING_DONTKNOW
;
261 // initialize a myspell object for each dictionary once
262 // (note: mutex is held higher up in isValid)
266 // first handle smart quotes both single and double
267 OUStringBuffer
rBuf(rWord
);
268 sal_Int32 n
= rBuf
.getLength();
270 sal_Int32 extrachar
= 0;
272 for (sal_Int32 ix
=0; ix
< n
; ix
++)
275 if ((c
== 0x201C) || (c
== 0x201D))
277 else if ((c
== 0x2018) || (c
== 0x2019))
280 // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only
281 // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries
282 // set ICONV and IGNORE aff file options, if needed.)
283 else if ((c
== 0x200C) || (c
== 0x200D) ||
284 ((c
>= 0xFB00) && (c
<= 0xFB04)))
287 OUString
nWord(rBuf
.makeStringAndClear());
291 for (auto& currDict
: m_DictItems
)
294 eEnc
= RTL_TEXTENCODING_DONTKNOW
;
296 if (rLocale
== currDict
.m_aDLoc
)
298 if (!currDict
.m_pDict
)
300 OUString dicpath
= currDict
.m_aDName
+ ".dic";
301 OUString affpath
= currDict
.m_aDName
+ ".aff";
304 osl::FileBase::getSystemPathFromFileURL(dicpath
,dict
);
305 osl::FileBase::getSystemPathFromFileURL(affpath
,aff
);
307 // workaround for Windows specific problem that the
308 // path length in calls to 'fopen' is limited to somewhat
309 // about 120+ characters which will usually be exceed when
310 // using dictionaries as extensions. (Hunspell waits UTF-8 encoded
311 // path with \\?\ long path prefix.)
312 OString aTmpaff
= Win_AddLongPathPrefix(OUStringToOString(aff
, RTL_TEXTENCODING_UTF8
));
313 OString aTmpdict
= Win_AddLongPathPrefix(OUStringToOString(dict
, RTL_TEXTENCODING_UTF8
));
315 OString
aTmpaff(OU2ENC(aff
,osl_getThreadTextEncoding()));
316 OString
aTmpdict(OU2ENC(dict
,osl_getThreadTextEncoding()));
319 currDict
.m_pDict
= std::make_unique
<Hunspell
>(aTmpaff
.getStr(),aTmpdict
.getStr());
320 #if defined(H_DEPRECATED)
321 currDict
.m_aDEnc
= getTextEncodingFromCharset(currDict
.m_pDict
->get_dict_encoding().c_str());
323 currDict
.m_aDEnc
= getTextEncodingFromCharset(currDict
.m_pDict
->get_dic_encoding());
326 pMS
= currDict
.m_pDict
.get();
327 eEnc
= currDict
.m_aDEnc
;
332 // we don't want to work with a default text encoding since following incorrect
333 // results may occur only for specific text and thus may be hard to notice.
334 // Thus better always make a clean exit here if the text encoding is in question.
335 // Hopefully something not working at all will raise proper attention quickly. ;-)
336 DBG_ASSERT( eEnc
!= RTL_TEXTENCODING_DONTKNOW
, "failed to get text encoding! (maybe incorrect encoding string in file)" );
337 if (eEnc
== RTL_TEXTENCODING_DONTKNOW
)
340 OString
aWrd(OU2ENC(nWord
,eEnc
));
341 #if defined(H_DEPRECATED)
342 bool bVal
= pMS
->spell(std::string(aWrd
.getStr()));
344 bool bVal
= pMS
->spell(aWrd
.getStr()) != 0;
347 if (extrachar
&& (eEnc
!= RTL_TEXTENCODING_UTF8
)) {
348 OUStringBuffer
aBuf(nWord
);
349 n
= aBuf
.getLength();
350 for (sal_Int32 ix
=n
-1; ix
>= 0; ix
--)
353 case 0xFB00: aBuf
.remove(ix
, 1); aBuf
.insert(ix
, "ff"); break;
354 case 0xFB01: aBuf
.remove(ix
, 1); aBuf
.insert(ix
, "fi"); break;
355 case 0xFB02: aBuf
.remove(ix
, 1); aBuf
.insert(ix
, "fl"); break;
356 case 0xFB03: aBuf
.remove(ix
, 1); aBuf
.insert(ix
, "ffi"); break;
357 case 0xFB04: aBuf
.remove(ix
, 1); aBuf
.insert(ix
, "ffl"); break;
359 case 0x200D: aBuf
.remove(ix
, 1); break;
362 OUString
aWord(aBuf
.makeStringAndClear());
363 OString
bWrd(OU2ENC(aWord
, eEnc
));
364 #if defined(H_DEPRECATED)
365 bVal
= pMS
->spell(std::string(bWrd
.getStr()));
367 bVal
= pMS
->spell(bWrd
.getStr()) != 0;
371 nRes
= SpellFailure::SPELLING_ERROR
;
383 sal_Bool SAL_CALL
SpellChecker::isValid( const OUString
& rWord
, const Locale
& rLocale
,
384 const css::uno::Sequence
< css::beans::PropertyValue
>& rProperties
)
386 MutexGuard
aGuard( GetLinguMutex() );
388 if (rLocale
== Locale() || rWord
.isEmpty())
391 if (!hasLocale( rLocale
))
394 // return sal_False to process SPELLML requests (they are longer than the header)
395 if (rWord
.match(SPELL_XML
, 0) && (rWord
.getLength() > 10)) return false;
397 // Get property values to be used.
398 // These are be the default values set in the SN_LINGU_PROPERTIES
399 // PropertySet which are overridden by the supplied ones from the
401 // You'll probably like to use a simpler solution than the provided
402 // one using the PropertyHelper_Spell.
403 PropertyHelper_Spelling
& rHelper
= GetPropHelper();
404 rHelper
.SetTmpPropVals( rProperties
);
406 sal_Int16 nFailure
= GetSpellFailure( rWord
, rLocale
);
407 if (nFailure
!= -1 && !rWord
.match(SPELL_XML
, 0))
409 LanguageType nLang
= LinguLocaleToLanguage( rLocale
);
410 // postprocess result for errors that should be ignored
411 const bool bIgnoreError
=
412 (!rHelper
.IsSpellUpperCase() && IsUpper( rWord
, nLang
)) ||
413 (!rHelper
.IsSpellWithDigits() && HasDigits( rWord
)) ||
414 (!rHelper
.IsSpellCapitalization() && nFailure
== SpellFailure::CAPTION_ERROR
);
419 return (nFailure
== -1);
422 Reference
< XSpellAlternatives
>
423 SpellChecker::GetProposals( const OUString
&rWord
, const Locale
&rLocale
)
425 // Retrieves the return values for the 'spell' function call in case
426 // of a misspelled word.
427 // Especially it may give a list of suggested (correct) words:
428 Reference
< XSpellAlternatives
> xRes
;
429 // note: mutex is held by higher up by spell which covers both
431 Hunspell
* pMS
= nullptr;
432 rtl_TextEncoding eEnc
= RTL_TEXTENCODING_DONTKNOW
;
434 // first handle smart quotes (single and double)
435 OUStringBuffer
rBuf(rWord
);
436 sal_Int32 n
= rBuf
.getLength();
438 for (sal_Int32 ix
=0; ix
< n
; ix
++)
441 if ((c
== 0x201C) || (c
== 0x201D))
443 if ((c
== 0x2018) || (c
== 0x2019))
446 OUString
nWord(rBuf
.makeStringAndClear());
450 LanguageType nLang
= LinguLocaleToLanguage( rLocale
);
453 Sequence
< OUString
> aStr( 0 );
454 for (const auto& currDict
: m_DictItems
)
457 eEnc
= RTL_TEXTENCODING_DONTKNOW
;
459 if (rLocale
== currDict
.m_aDLoc
)
461 pMS
= currDict
.m_pDict
.get();
462 eEnc
= currDict
.m_aDEnc
;
467 OString
aWrd(OU2ENC(nWord
,eEnc
));
468 #if defined(H_DEPRECATED)
469 std::vector
<std::string
> suglst
= pMS
->suggest(std::string(aWrd
.getStr()));
472 aStr
.realloc(numsug
+ suglst
.size());
473 OUString
*pStr
= aStr
.getArray();
474 for (size_t ii
= 0; ii
< suglst
.size(); ++ii
)
476 OUString
cvtwrd(suglst
[ii
].c_str(), suglst
[ii
].size(), eEnc
);
477 pStr
[numsug
+ ii
] = cvtwrd
;
479 numsug
+= suglst
.size();
482 char ** suglst
= nullptr;
483 int count
= pMS
->suggest(&suglst
, aWrd
.getStr());
486 aStr
.realloc( numsug
+ count
);
487 OUString
*pStr
= aStr
.getArray();
488 for (int ii
=0; ii
< count
; ++ii
)
490 OUString
cvtwrd(suglst
[ii
],strlen(suglst
[ii
]),eEnc
);
491 pStr
[numsug
+ ii
] = cvtwrd
;
495 pMS
->free_list(&suglst
, count
);
500 // now return an empty alternative for no suggestions or the list of alternatives if some found
501 xRes
= SpellAlternatives::CreateSpellAlternatives( rWord
, nLang
, SpellFailure::SPELLING_ERROR
, aStr
);
507 Reference
< XSpellAlternatives
> SAL_CALL
SpellChecker::spell(
508 const OUString
& rWord
, const Locale
& rLocale
,
509 const css::uno::Sequence
< css::beans::PropertyValue
>& rProperties
)
511 MutexGuard
aGuard( GetLinguMutex() );
513 if (rLocale
== Locale() || rWord
.isEmpty())
516 if (!hasLocale( rLocale
))
519 Reference
< XSpellAlternatives
> xAlt
;
520 if (!isValid( rWord
, rLocale
, rProperties
))
522 xAlt
= GetProposals( rWord
, rLocale
);
527 /// @throws Exception
528 static Reference
< XInterface
> SpellChecker_CreateInstance(
529 const Reference
< XMultiServiceFactory
> & /*rSMgr*/ )
532 Reference
< XInterface
> xService
= static_cast<cppu::OWeakObject
*>(new SpellChecker
);
536 sal_Bool SAL_CALL
SpellChecker::addLinguServiceEventListener(
537 const Reference
< XLinguServiceEventListener
>& rxLstnr
)
539 MutexGuard
aGuard( GetLinguMutex() );
542 if (!m_bDisposing
&& rxLstnr
.is())
544 bRes
= GetPropHelper().addLinguServiceEventListener( rxLstnr
);
549 sal_Bool SAL_CALL
SpellChecker::removeLinguServiceEventListener(
550 const Reference
< XLinguServiceEventListener
>& rxLstnr
)
552 MutexGuard
aGuard( GetLinguMutex() );
555 if (!m_bDisposing
&& rxLstnr
.is())
557 bRes
= GetPropHelper().removeLinguServiceEventListener( rxLstnr
);
562 OUString SAL_CALL
SpellChecker::getServiceDisplayName(const Locale
& rLocale
)
564 std::locale
loc(Translate::Create("svt", LanguageTag(rLocale
)));
565 return Translate::get(STR_DESCRIPTION_HUNSPELL
, loc
);
568 void SAL_CALL
SpellChecker::initialize( const Sequence
< Any
>& rArguments
)
570 MutexGuard
aGuard( GetLinguMutex() );
574 sal_Int32 nLen
= rArguments
.getLength();
577 Reference
< XLinguProperties
> xPropSet
;
578 rArguments
.getConstArray()[0] >>= xPropSet
;
579 // rArguments.getConstArray()[1] >>= xDicList;
581 //! Pointer allows for access of the non-UNO functions.
582 //! And the reference to the UNO-functions while increasing
583 //! the ref-count and will implicitly free the memory
584 //! when the object is no longer used.
585 m_pPropHelper
.reset( new PropertyHelper_Spelling( static_cast<XSpellChecker
*>(this), xPropSet
) );
586 m_pPropHelper
->AddAsPropListener(); //! after a reference is established
589 OSL_FAIL( "wrong number of arguments in sequence" );
594 void SAL_CALL
SpellChecker::dispose()
596 MutexGuard
aGuard( GetLinguMutex() );
601 EventObject
aEvtObj( static_cast<XSpellChecker
*>(this) );
602 m_aEvtListeners
.disposeAndClear( aEvtObj
);
605 m_pPropHelper
->RemoveAsPropListener();
606 m_pPropHelper
.reset();
611 void SAL_CALL
SpellChecker::addEventListener( const Reference
< XEventListener
>& rxListener
)
613 MutexGuard
aGuard( GetLinguMutex() );
615 if (!m_bDisposing
&& rxListener
.is())
616 m_aEvtListeners
.addInterface( rxListener
);
619 void SAL_CALL
SpellChecker::removeEventListener( const Reference
< XEventListener
>& rxListener
)
621 MutexGuard
aGuard( GetLinguMutex() );
623 if (!m_bDisposing
&& rxListener
.is())
624 m_aEvtListeners
.removeInterface( rxListener
);
627 // Service specific part
628 OUString SAL_CALL
SpellChecker::getImplementationName()
630 return getImplementationName_Static();
633 sal_Bool SAL_CALL
SpellChecker::supportsService( const OUString
& ServiceName
)
635 return cppu::supportsService(this, ServiceName
);
638 Sequence
< OUString
> SAL_CALL
SpellChecker::getSupportedServiceNames()
640 return getSupportedServiceNames_Static();
643 Sequence
< OUString
> SpellChecker::getSupportedServiceNames_Static()
646 Sequence
< OUString
> aSNS
{ SN_SPELLCHECKER
};
653 SAL_DLLPUBLIC_EXPORT
void * spell_component_getFactory(
654 const sal_Char
* pImplName
, void * pServiceManager
, void * /*pRegistryKey*/ )
656 void * pRet
= nullptr;
657 if ( SpellChecker::getImplementationName_Static().equalsAscii( pImplName
) )
659 Reference
< XSingleServiceFactory
> xFactory
=
660 cppu::createOneInstanceFactory(
661 static_cast< XMultiServiceFactory
* >( pServiceManager
),
662 SpellChecker::getImplementationName_Static(),
663 SpellChecker_CreateInstance
,
664 SpellChecker::getSupportedServiceNames_Static());
665 // acquire, because we return an interface pointer instead of a reference
667 pRet
= xFactory
.get();
674 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */