1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/uno/Reference.h>
21 #include <cppuhelper/factory.hxx>
22 #include <cppuhelper/supportsservice.hxx>
23 #include <cppuhelper/weak.hxx>
24 #include <com/sun/star/linguistic2/LinguServiceManager.hpp>
25 #include <com/sun/star/linguistic2/XLinguProperties.hpp>
26 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
27 #include <i18nlangtag/languagetag.hxx>
28 #include <tools/debug.hxx>
29 #include <comphelper/lok.hxx>
30 #include <comphelper/processfactory.hxx>
31 #include <comphelper/sequence.hxx>
32 #include <osl/mutex.hxx>
33 #include <osl/thread.h>
34 #include <unotools/lingucfg.hxx>
35 #include <unotools/resmgr.hxx>
37 #include <rtl/string.hxx>
38 #include <rtl/textenc.h>
40 #include <svtools/strings.hrc>
42 #include "nthesimp.hxx"
43 #include <linguistic/misc.hxx>
44 #include "nthesdta.hxx"
51 // XML-header to query SPELLML support
52 constexpr OUStringLiteral SPELLML_SUPPORT
= u
"<?xml?>";
55 using namespace com::sun::star
;
56 using namespace com::sun::star::beans
;
57 using namespace com::sun::star::lang
;
58 using namespace com::sun::star::uno
;
59 using namespace com::sun::star::linguistic2
;
60 using namespace linguistic
;
62 static uno::Reference
< XLinguServiceManager2
> GetLngSvcMgr_Impl()
64 const uno::Reference
< XComponentContext
>& xContext( comphelper::getProcessComponentContext() );
65 uno::Reference
< XLinguServiceManager2
> xRes
= LinguServiceManager::create( xContext
) ;
69 Thesaurus::Thesaurus() :
70 aEvtListeners ( GetLinguMutex() ), pPropHelper(nullptr), bDisposing(false),
71 prevLocale(LANGUAGE_DONTKNOW
)
75 Thesaurus::~Thesaurus()
80 pPropHelper
->RemoveAsPropListener();
84 PropertyHelper_Thesaurus
& Thesaurus::GetPropHelper_Impl()
88 Reference
< XLinguProperties
> xPropSet
= GetLinguProperties();
90 pPropHelper
= new PropertyHelper_Thesaurus( static_cast<XThesaurus
*>(this), xPropSet
);
91 pPropHelper
->AddAsPropListener(); //! after a reference is established
96 Sequence
< Locale
> SAL_CALL
Thesaurus::getLocales()
98 MutexGuard
aGuard( GetLinguMutex() );
100 // this routine should return the locales supported by the installed
102 if (mvThesInfo
.empty())
104 SvtLinguConfig aLinguCfg
;
106 // get list of dictionaries-to-use
107 std::vector
< SvtLinguConfigDictionaryEntry
> aDics
;
108 uno::Sequence
< OUString
> aFormatList
;
109 aLinguCfg
.GetSupportedDictionaryFormatsFor( u
"Thesauri"_ustr
,
110 u
"org.openoffice.lingu.new.Thesaurus"_ustr
, aFormatList
);
111 for (const auto& rFormat
: aFormatList
)
113 std::vector
< SvtLinguConfigDictionaryEntry
> aTmpDic(
114 aLinguCfg
.GetActiveDictionariesByFormat( rFormat
) );
115 aDics
.insert( aDics
.end(), aTmpDic
.begin(), aTmpDic
.end() );
118 //!! for compatibility with old dictionaries (the ones not using extensions
119 //!! or new configuration entries, but still using the dictionary.lst file)
120 //!! Get the list of old style spell checking dictionaries to use...
121 std::vector
< SvtLinguConfigDictionaryEntry
> aOldStyleDics(
122 GetOldStyleDics( "THES" ) );
124 // to prefer dictionaries with configuration entries we will only
125 // use those old style dictionaries that add a language that
126 // is not yet supported by the list of new style dictionaries
127 MergeNewStyleDicsAndOldStyleDics( aDics
, aOldStyleDics
);
131 // get supported locales from the dictionaries-to-use...
132 std::set
<OUString
> aLocaleNamesSet
;
133 for (auto const& dict
: aDics
)
135 for (const auto& rLocaleName
: dict
.aLocaleNames
)
137 if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(rLocaleName
))
140 aLocaleNamesSet
.insert( rLocaleName
);
143 // ... and add them to the resulting sequence
144 std::vector
<Locale
> aLocalesVec
;
145 aLocalesVec
.reserve(aLocaleNamesSet
.size());
147 std::transform(aLocaleNamesSet
.begin(), aLocaleNamesSet
.end(), std::back_inserter(aLocalesVec
),
148 [](const OUString
& localeName
) -> Locale
{ return LanguageTag::convertToLocale(localeName
); });
150 aSuppLocales
= comphelper::containerToSequence(aLocalesVec
);
152 //! For each dictionary and each locale we need a separate entry.
153 //! If this results in more than one dictionary per locale than (for now)
154 //! it is undefined which dictionary gets used.
155 //! In the future the implementation should support using several dictionaries
157 sal_Int32 numthes
= std::accumulate(aDics
.begin(), aDics
.end(), 0,
158 [](const sal_Int32 nSum
, const SvtLinguConfigDictionaryEntry
& dict
) {
159 return nSum
+ dict
.aLocaleNames
.getLength(); });
161 // add dictionary information
162 mvThesInfo
.resize(numthes
);
165 for (auto const& dict
: aDics
)
167 if (dict
.aLocaleNames
.hasElements() &&
168 dict
.aLocations
.hasElements())
170 // currently only one language per dictionary is supported in the actual implementation...
171 // Thus here we work-around this by adding the same dictionary several times.
172 // Once for each of its supported locales.
173 for (const auto& rLocaleName
: dict
.aLocaleNames
)
175 LanguageTag
aLanguageTag(rLocaleName
);
176 mvThesInfo
[k
].aEncoding
= RTL_TEXTENCODING_DONTKNOW
;
177 mvThesInfo
[k
].aLocale
= aLanguageTag
.getLocale();
178 mvThesInfo
[k
].aCharSetInfo
.reset( new CharClass( std::move(aLanguageTag
) ) );
179 // also both files have to be in the same directory and the
180 // file names must only differ in the extension (.aff/.dic).
181 // Thus we use the first location only and strip the extension part.
182 OUString aLocation
= dict
.aLocations
[0];
183 sal_Int32 nPos
= aLocation
.lastIndexOf( '.' );
184 aLocation
= aLocation
.copy( 0, nPos
);
185 mvThesInfo
[k
].aName
= aLocation
;
191 DBG_ASSERT( k
== numthes
, "index mismatch?" );
195 /* no dictionary found so register no dictionaries */
197 aSuppLocales
.realloc(0);
204 sal_Bool SAL_CALL
Thesaurus::hasLocale(const Locale
& rLocale
)
206 MutexGuard
aGuard( GetLinguMutex() );
208 if (!aSuppLocales
.hasElements())
211 return comphelper::findValue(aSuppLocales
, rLocale
) != -1;
214 Sequence
< Reference
< css::linguistic2::XMeaning
> > SAL_CALL
Thesaurus::queryMeanings(
215 const OUString
& qTerm
, const Locale
& rLocale
,
216 const css::uno::Sequence
< css::beans::PropertyValue
>& rProperties
)
218 MutexGuard
aGuard( GetLinguMutex() );
220 uno::Sequence
< Reference
< XMeaning
> > aMeanings( 1 );
221 uno::Sequence
< Reference
< XMeaning
> > noMeanings( 0 );
222 uno::Reference
< XLinguServiceManager2
> xLngSvcMgr( GetLngSvcMgr_Impl() );
223 uno::Reference
< XSpellChecker1
> xSpell
;
225 OUString
aRTerm(qTerm
);
226 OUString
aPTerm(qTerm
);
227 CapType ct
= CapType::UNKNOWN
;
231 LanguageType nLanguage
= LinguLocaleToLanguage( rLocale
);
233 if (LinguIsUnspecified( nLanguage
) || aRTerm
.isEmpty())
236 if (!hasLocale( rLocale
))
237 #ifdef LINGU_EXCEPTIONS
238 throw( IllegalArgumentException() );
243 if (prevTerm
== qTerm
&& prevLocale
== nLanguage
)
246 mentry
* pmean
= nullptr;
249 PropertyHelper_Thesaurus
&rHelper
= GetPropHelper();
250 rHelper
.SetTmpPropVals( rProperties
);
252 MyThes
* pTH
= nullptr;
253 rtl_TextEncoding eEnc
= RTL_TEXTENCODING_DONTKNOW
;
254 CharClass
* pCC
= nullptr;
256 // find the first thesaurus that matches the locale
257 for (size_t i
=0; i
< mvThesInfo
.size(); i
++)
259 if (rLocale
== mvThesInfo
[i
].aLocale
)
261 // open up and initialize this thesaurus if need be
262 if (!mvThesInfo
[i
].aThes
)
264 OUString datpath
= mvThesInfo
[i
].aName
+ ".dat";
265 OUString idxpath
= mvThesInfo
[i
].aName
+ ".idx";
268 osl::FileBase::getSystemPathFromFileURL(datpath
,ndat
);
269 osl::FileBase::getSystemPathFromFileURL(idxpath
,nidx
);
272 // MyThes waits UTF-8 encoded paths with \\?\ long path prefix.
273 OString aTmpidx
= Win_AddLongPathPrefix(OUStringToOString(nidx
, RTL_TEXTENCODING_UTF8
));
274 OString aTmpdat
= Win_AddLongPathPrefix(OUStringToOString(ndat
, RTL_TEXTENCODING_UTF8
));
276 OString
aTmpidx(OU2ENC(nidx
,osl_getThreadTextEncoding()));
277 OString
aTmpdat(OU2ENC(ndat
,osl_getThreadTextEncoding()));
280 mvThesInfo
[i
].aThes
.reset( new MyThes(aTmpidx
.getStr(),aTmpdat
.getStr()) );
281 mvThesInfo
[i
].aEncoding
= getTextEncodingFromCharset(mvThesInfo
[i
].aThes
->get_th_encoding());
283 pTH
= mvThesInfo
[i
].aThes
.get();
284 eEnc
= mvThesInfo
[i
].aEncoding
;
285 pCC
= mvThesInfo
[i
].aCharSetInfo
.get();
292 // we don't want to work with a default text encoding since following incorrect
293 // results may occur only for specific text and thus may be hard to notice.
294 // Thus better always make a clean exit here if the text encoding is in question.
295 // Hopefully something not working at all will raise proper attention quickly. ;-)
296 DBG_ASSERT( eEnc
!= RTL_TEXTENCODING_DONTKNOW
, "failed to get text encoding! (maybe incorrect encoding string in file)" );
297 if (eEnc
== RTL_TEXTENCODING_DONTKNOW
)
302 // convert word to all lower case for searching
304 ct
= capitalType(aRTerm
, pCC
);
305 OUString
nTerm(makeLowerCase(aRTerm
, pCC
));
306 OString
aTmp( OU2ENC(nTerm
, eEnc
) );
307 nmean
= pTH
->Lookup(aTmp
.getStr(),aTmp
.getLength(),&pmean
);
310 aMeanings
.realloc( nmean
);
313 OUString codeTerm
= qTerm
;
314 Reference
< XSpellAlternatives
> xTmpRes2
;
318 xTmpRes2
= xSpell
->spell( "<?xml?><query type='analyze'><word>" +
319 aPTerm
+ "</word></query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
322 Sequence
<OUString
>seq
= xTmpRes2
->getAlternatives();
323 if (seq
.hasElements())
331 for (int j
= 0; j
< nmean
; j
++)
333 int count
= pe
->count
;
336 Sequence
< OUString
> aStr( count
);
337 OUString
*pStr
= aStr
.getArray();
339 for (int i
=0; i
< count
; i
++)
341 OUString
sTerm(pe
->psyns
[i
],strlen(pe
->psyns
[i
]),eEnc
);
342 sal_Int32 catpos
= sTerm
.indexOf('(');
346 // remove category name for affixation and casing
347 catst
= OUString::Concat(" ") + sTerm
.subView(catpos
);
348 sTerm
= sTerm
.copy(0, catpos
);
349 sTerm
= sTerm
.trim();
351 // generate synonyms with affixes
354 Reference
< XSpellAlternatives
> xTmpRes
= xSpell
->spell( "<?xml?><query type='generate'><word>" +
355 sTerm
+ "</word>" + codeTerm
+ "</query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
358 Sequence
<OUString
>seq
= xTmpRes
->getAlternatives();
359 if (seq
.hasElements())
364 CapType ct1
= capitalType(sTerm
, pCC
);
365 if (CapType::MIXED
== ct1
)
370 case CapType::ALLCAP
:
371 cTerm
= makeUpperCase(sTerm
, pCC
);
373 case CapType::INITCAP
:
374 cTerm
= makeInitCap(sTerm
, pCC
);
380 pStr
[i
] = OUString( cTerm
+ catst
);
382 rtl::Reference
<Meaning
> pMn
= new Meaning(aRTerm
);
383 OUString
dTerm(pe
->defn
,strlen(pe
->defn
),eEnc
);
384 pMn
->SetMeaning(dTerm
);
385 pMn
->SetSynonyms(aStr
);
386 Reference
<XMeaning
>* pMeaning
= aMeanings
.getArray();
391 pTH
->CleanUpAfterLookup(&pmean
,nmean
);
396 prevMeanings
= aMeanings
;
397 prevLocale
= nLanguage
;
401 if (stem
|| !xLngSvcMgr
.is())
405 xSpell
.set( xLngSvcMgr
->getSpellChecker(), UNO_QUERY
);
406 if (!xSpell
.is() || !xSpell
->isValid( SPELLML_SUPPORT
, static_cast<sal_uInt16
>(nLanguage
), rProperties
))
408 Reference
< XSpellAlternatives
> xTmpRes
= xSpell
->spell( "<?xml?><query type='stem'><word>" +
409 aRTerm
+ "</word></query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
412 Sequence
<OUString
>seq
= xTmpRes
->getAlternatives();
413 if (seq
.hasElements())
415 aRTerm
= seq
[0]; // XXX Use only the first stem
420 // stem the last word of the synonym (for categories after affixation)
421 aRTerm
= aRTerm
.trim();
422 sal_Int32 pos
= aRTerm
.lastIndexOf(' ');
425 xTmpRes
= xSpell
->spell( OUString::Concat("<?xml?><query type='stem'><word>") +
426 aRTerm
.subView(pos
+ 1) + "</word></query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
429 Sequence
<OUString
>seq
= xTmpRes
->getAlternatives();
430 if (seq
.hasElements())
432 aPTerm
= aRTerm
.copy(pos
+ 1);
433 aRTerm
= aRTerm
.subView(0, pos
+ 1) + seq
[0];
435 for (int i
= 0; i
< seq
.getLength(); i
++)
437 OString o
= OUStringToOString(seq
[i
], RTL_TEXTENCODING_UTF8
);
438 fprintf(stderr
, "%d: %s\n", i
+ 1, o
.pData
->buffer
);
449 OUString SAL_CALL
Thesaurus::getServiceDisplayName(const Locale
& rLocale
)
451 std::locale
loc(Translate::Create("svt", LanguageTag(rLocale
)));
452 return Translate::get(STR_DESCRIPTION_MYTHES
, loc
);
455 void SAL_CALL
Thesaurus::initialize( const Sequence
< Any
>& rArguments
)
457 MutexGuard
aGuard( GetLinguMutex() );
462 sal_Int32 nLen
= rArguments
.getLength();
463 // Accept one of two args so we can be compatible with the call site in GetAvailLocales()
465 if (1 == nLen
|| 2 == nLen
)
467 Reference
< XLinguProperties
> xPropSet
;
468 rArguments
.getConstArray()[0] >>= xPropSet
;
471 //! Pointer allows for access of the non-UNO functions.
472 //! And the reference to the UNO-functions while increasing
473 //! the ref-count and will implicitly free the memory
474 //! when the object is no longer used.
475 pPropHelper
= new PropertyHelper_Thesaurus( static_cast<XThesaurus
*>(this), xPropSet
);
476 pPropHelper
->AddAsPropListener(); //! after a reference is established
479 OSL_FAIL( "wrong number of arguments in sequence" );
482 OUString
Thesaurus::makeLowerCase(const OUString
& aTerm
, CharClass
const * pCC
)
485 return pCC
->lowercase(aTerm
);
489 OUString
Thesaurus::makeUpperCase(const OUString
& aTerm
, CharClass
const * pCC
)
492 return pCC
->uppercase(aTerm
);
496 OUString
Thesaurus::makeInitCap(const OUString
& aTerm
, CharClass
const * pCC
)
498 sal_Int32 tlen
= aTerm
.getLength();
501 OUString bTemp
= aTerm
.copy(0,1);
504 return ( pCC
->uppercase(bTemp
, 0, 1)
505 + pCC
->lowercase(aTerm
,1,(tlen
-1)) );
508 return pCC
->uppercase(bTemp
, 0, 1);
513 void SAL_CALL
Thesaurus::dispose()
515 MutexGuard
aGuard( GetLinguMutex() );
520 EventObject
aEvtObj( static_cast<XThesaurus
*>(this) );
521 aEvtListeners
.disposeAndClear( aEvtObj
);
524 pPropHelper
->RemoveAsPropListener();
526 pPropHelper
= nullptr;
531 void SAL_CALL
Thesaurus::addEventListener( const Reference
< XEventListener
>& rxListener
)
533 MutexGuard
aGuard( GetLinguMutex() );
535 if (!bDisposing
&& rxListener
.is())
536 aEvtListeners
.addInterface( rxListener
);
539 void SAL_CALL
Thesaurus::removeEventListener( const Reference
< XEventListener
>& rxListener
)
541 MutexGuard
aGuard( GetLinguMutex() );
543 if (!bDisposing
&& rxListener
.is())
544 aEvtListeners
.removeInterface( rxListener
);
547 // Service specific part
548 OUString SAL_CALL
Thesaurus::getImplementationName()
550 return u
"org.openoffice.lingu.new.Thesaurus"_ustr
;
553 sal_Bool SAL_CALL
Thesaurus::supportsService( const OUString
& ServiceName
)
555 return cppu::supportsService(this, ServiceName
);
558 Sequence
< OUString
> SAL_CALL
Thesaurus::getSupportedServiceNames()
560 return { SN_THESAURUS
};
563 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
*
564 lingucomponent_Thesaurus_get_implementation(
565 css::uno::XComponentContext
* , css::uno::Sequence
<css::uno::Any
> const&)
567 return cppu::acquire(new Thesaurus());
570 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */