1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/uno/Reference.h>
21 #include <cppuhelper/factory.hxx>
22 #include <cppuhelper/supportsservice.hxx>
23 #include <com/sun/star/lang/XSingleServiceFactory.hpp>
24 #include <com/sun/star/registry/XRegistryKey.hpp>
25 #include <com/sun/star/beans/XPropertySet.hpp>
26 #include <com/sun/star/linguistic2/LinguServiceManager.hpp>
27 #include <com/sun/star/linguistic2/XLinguProperties.hpp>
28 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
29 #include <i18nlangtag/languagetag.hxx>
30 #include <tools/debug.hxx>
31 #include <comphelper/lok.hxx>
32 #include <comphelper/processfactory.hxx>
33 #include <comphelper/sequence.hxx>
34 #include <osl/mutex.hxx>
35 #include <osl/thread.h>
36 #include <unotools/pathoptions.hxx>
37 #include <unotools/lingucfg.hxx>
38 #include <unotools/resmgr.hxx>
40 #include <rtl/string.hxx>
41 #include <rtl/ustrbuf.hxx>
42 #include <rtl/textenc.h>
44 #include <svtools/strings.hrc>
46 #include "nthesimp.hxx"
47 #include <linguistic/misc.hxx>
48 #include <linguistic/lngprops.hxx>
49 #include "nthesdta.hxx"
56 // XML-header to query SPELLML support
57 #define SPELLML_SUPPORT "<?xml?>"
60 using namespace com::sun::star
;
61 using namespace com::sun::star::beans
;
62 using namespace com::sun::star::lang
;
63 using namespace com::sun::star::uno
;
64 using namespace com::sun::star::linguistic2
;
65 using namespace linguistic
;
67 static uno::Reference
< XLinguServiceManager2
> GetLngSvcMgr_Impl()
69 uno::Reference
< XComponentContext
> xContext( comphelper::getProcessComponentContext() );
70 uno::Reference
< XLinguServiceManager2
> xRes
= LinguServiceManager::create( xContext
) ;
74 Thesaurus::Thesaurus() :
75 aEvtListeners ( GetLinguMutex() )
78 pPropHelper
= nullptr;
79 prevLocale
= LANGUAGE_DONTKNOW
;
82 Thesaurus::~Thesaurus()
87 pPropHelper
->RemoveAsPropListener();
91 PropertyHelper_Thesaurus
& Thesaurus::GetPropHelper_Impl()
95 Reference
< XLinguProperties
> xPropSet
= GetLinguProperties();
97 pPropHelper
= new PropertyHelper_Thesaurus( static_cast<XThesaurus
*>(this), xPropSet
);
98 pPropHelper
->AddAsPropListener(); //! after a reference is established
103 Sequence
< Locale
> SAL_CALL
Thesaurus::getLocales()
105 MutexGuard
aGuard( GetLinguMutex() );
107 // this routine should return the locales supported by the installed
109 if (mvThesInfo
.empty())
111 SvtLinguConfig aLinguCfg
;
113 // get list of dictionaries-to-use
114 std::vector
< SvtLinguConfigDictionaryEntry
> aDics
;
115 uno::Sequence
< OUString
> aFormatList
;
116 aLinguCfg
.GetSupportedDictionaryFormatsFor( "Thesauri",
117 "org.openoffice.lingu.new.Thesaurus", aFormatList
);
118 for (const auto& rFormat
: std::as_const(aFormatList
))
120 std::vector
< SvtLinguConfigDictionaryEntry
> aTmpDic(
121 aLinguCfg
.GetActiveDictionariesByFormat( rFormat
) );
122 aDics
.insert( aDics
.end(), aTmpDic
.begin(), aTmpDic
.end() );
125 //!! for compatibility with old dictionaries (the ones not using extensions
126 //!! or new configuration entries, but still using the dictionary.lst file)
127 //!! Get the list of old style spell checking dictionaries to use...
128 std::vector
< SvtLinguConfigDictionaryEntry
> aOldStyleDics(
129 GetOldStyleDics( "THES" ) );
131 // to prefer dictionaries with configuration entries we will only
132 // use those old style dictionaries that add a language that
133 // is not yet supported by the list of new style dictionaries
134 MergeNewStyleDicsAndOldStyleDics( aDics
, aOldStyleDics
);
138 // get supported locales from the dictionaries-to-use...
139 std::set
<OUString
> aLocaleNamesSet
;
140 for (auto const& dict
: aDics
)
142 for (const auto& rLocaleName
: dict
.aLocaleNames
)
144 if (!comphelper::LibreOfficeKit::isWhitelistedLanguage(rLocaleName
))
147 aLocaleNamesSet
.insert( rLocaleName
);
150 // ... and add them to the resulting sequence
151 std::vector
<Locale
> aLocalesVec
;
152 aLocalesVec
.reserve(aLocaleNamesSet
.size());
154 std::transform(aLocaleNamesSet
.begin(), aLocaleNamesSet
.end(), std::back_inserter(aLocalesVec
),
155 [](const OUString
& localeName
) -> Locale
{ return LanguageTag::convertToLocale(localeName
); });
157 aSuppLocales
= comphelper::containerToSequence(aLocalesVec
);
159 //! For each dictionary and each locale we need a separate entry.
160 //! If this results in more than one dictionary per locale than (for now)
161 //! it is undefined which dictionary gets used.
162 //! In the future the implementation should support using several dictionaries
164 sal_Int32 numthes
= std::accumulate(aDics
.begin(), aDics
.end(), 0,
165 [](const sal_Int32 nSum
, const SvtLinguConfigDictionaryEntry
& dict
) {
166 return nSum
+ dict
.aLocaleNames
.getLength(); });
168 // add dictionary information
169 mvThesInfo
.resize(numthes
);
172 for (auto const& dict
: aDics
)
174 if (dict
.aLocaleNames
.hasElements() &&
175 dict
.aLocations
.hasElements())
177 // currently only one language per dictionary is supported in the actual implementation...
178 // Thus here we work-around this by adding the same dictionary several times.
179 // Once for each of its supported locales.
180 for (const auto& rLocaleName
: dict
.aLocaleNames
)
182 LanguageTag
aLanguageTag(rLocaleName
);
183 mvThesInfo
[k
].aEncoding
= RTL_TEXTENCODING_DONTKNOW
;
184 mvThesInfo
[k
].aLocale
= aLanguageTag
.getLocale();
185 mvThesInfo
[k
].aCharSetInfo
.reset( new CharClass( aLanguageTag
) );
186 // also both files have to be in the same directory and the
187 // file names must only differ in the extension (.aff/.dic).
188 // Thus we use the first location only and strip the extension part.
189 OUString aLocation
= dict
.aLocations
[0];
190 sal_Int32 nPos
= aLocation
.lastIndexOf( '.' );
191 aLocation
= aLocation
.copy( 0, nPos
);
192 mvThesInfo
[k
].aName
= aLocation
;
198 DBG_ASSERT( k
== numthes
, "index mismatch?" );
202 /* no dictionary found so register no dictionaries */
204 aSuppLocales
.realloc(0);
211 sal_Bool SAL_CALL
Thesaurus::hasLocale(const Locale
& rLocale
)
213 MutexGuard
aGuard( GetLinguMutex() );
215 if (!aSuppLocales
.hasElements())
218 return comphelper::findValue(aSuppLocales
, rLocale
) != -1;
221 Sequence
< Reference
< css::linguistic2::XMeaning
> > SAL_CALL
Thesaurus::queryMeanings(
222 const OUString
& qTerm
, const Locale
& rLocale
,
223 const css::uno::Sequence
< css::beans::PropertyValue
>& rProperties
)
225 MutexGuard
aGuard( GetLinguMutex() );
227 uno::Sequence
< Reference
< XMeaning
> > aMeanings( 1 );
228 uno::Sequence
< Reference
< XMeaning
> > noMeanings( 0 );
229 uno::Reference
< XLinguServiceManager2
> xLngSvcMgr( GetLngSvcMgr_Impl() );
230 uno::Reference
< XSpellChecker1
> xSpell
;
232 OUString
aRTerm(qTerm
);
233 OUString
aPTerm(qTerm
);
234 CapType ct
= CapType::UNKNOWN
;
238 LanguageType nLanguage
= LinguLocaleToLanguage( rLocale
);
240 if (LinguIsUnspecified( nLanguage
) || aRTerm
.isEmpty())
243 if (!hasLocale( rLocale
))
244 #ifdef LINGU_EXCEPTIONS
245 throw( IllegalArgumentException() );
250 if (prevTerm
== qTerm
&& prevLocale
== nLanguage
)
253 mentry
* pmean
= nullptr;
256 PropertyHelper_Thesaurus
&rHelper
= GetPropHelper();
257 rHelper
.SetTmpPropVals( rProperties
);
259 MyThes
* pTH
= nullptr;
260 rtl_TextEncoding eEnc
= RTL_TEXTENCODING_DONTKNOW
;
261 CharClass
* pCC
= nullptr;
263 // find the first thesaurus that matches the locale
264 for (size_t i
=0; i
< mvThesInfo
.size(); i
++)
266 if (rLocale
== mvThesInfo
[i
].aLocale
)
268 // open up and initialize this thesaurus if need be
269 if (!mvThesInfo
[i
].aThes
)
271 OUString datpath
= mvThesInfo
[i
].aName
+ ".dat";
272 OUString idxpath
= mvThesInfo
[i
].aName
+ ".idx";
275 osl::FileBase::getSystemPathFromFileURL(datpath
,ndat
);
276 osl::FileBase::getSystemPathFromFileURL(idxpath
,nidx
);
279 // MyThes waits UTF-8 encoded paths with \\?\ long path prefix.
280 OString aTmpidx
= Win_AddLongPathPrefix(OUStringToOString(nidx
, RTL_TEXTENCODING_UTF8
));
281 OString aTmpdat
= Win_AddLongPathPrefix(OUStringToOString(ndat
, RTL_TEXTENCODING_UTF8
));
283 OString
aTmpidx(OU2ENC(nidx
,osl_getThreadTextEncoding()));
284 OString
aTmpdat(OU2ENC(ndat
,osl_getThreadTextEncoding()));
287 mvThesInfo
[i
].aThes
.reset( new MyThes(aTmpidx
.getStr(),aTmpdat
.getStr()) );
288 mvThesInfo
[i
].aEncoding
= getTextEncodingFromCharset(mvThesInfo
[i
].aThes
->get_th_encoding());
290 pTH
= mvThesInfo
[i
].aThes
.get();
291 eEnc
= mvThesInfo
[i
].aEncoding
;
292 pCC
= mvThesInfo
[i
].aCharSetInfo
.get();
299 // we don't want to work with a default text encoding since following incorrect
300 // results may occur only for specific text and thus may be hard to notice.
301 // Thus better always make a clean exit here if the text encoding is in question.
302 // Hopefully something not working at all will raise proper attention quickly. ;-)
303 DBG_ASSERT( eEnc
!= RTL_TEXTENCODING_DONTKNOW
, "failed to get text encoding! (maybe incorrect encoding string in file)" );
304 if (eEnc
== RTL_TEXTENCODING_DONTKNOW
)
309 // convert word to all lower case for searching
311 ct
= capitalType(aRTerm
, pCC
);
312 OUString
nTerm(makeLowerCase(aRTerm
, pCC
));
313 OString
aTmp( OU2ENC(nTerm
, eEnc
) );
314 nmean
= pTH
->Lookup(aTmp
.getStr(),aTmp
.getLength(),&pmean
);
317 aMeanings
.realloc( nmean
);
320 OUString codeTerm
= qTerm
;
321 Reference
< XSpellAlternatives
> xTmpRes2
;
325 xTmpRes2
= xSpell
->spell( "<?xml?><query type='analyze'><word>" +
326 aPTerm
+ "</word></query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
329 Sequence
<OUString
>seq
= xTmpRes2
->getAlternatives();
330 if (seq
.hasElements())
338 for (int j
= 0; j
< nmean
; j
++)
340 int count
= pe
->count
;
343 Sequence
< OUString
> aStr( count
);
344 OUString
*pStr
= aStr
.getArray();
346 for (int i
=0; i
< count
; i
++)
348 OUString
sTerm(pe
->psyns
[i
],strlen(pe
->psyns
[i
]),eEnc
);
349 sal_Int32 catpos
= sTerm
.indexOf('(');
353 // remove category name for affixation and casing
354 catst
= " " + sTerm
.copy(catpos
);
355 sTerm
= sTerm
.copy(0, catpos
);
356 sTerm
= sTerm
.trim();
358 // generate synonyms with affixes
361 Reference
< XSpellAlternatives
> xTmpRes
= xSpell
->spell( "<?xml?><query type='generate'><word>" +
362 sTerm
+ "</word>" + codeTerm
+ "</query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
365 Sequence
<OUString
>seq
= xTmpRes
->getAlternatives();
366 if (seq
.hasElements())
371 CapType ct1
= capitalType(sTerm
, pCC
);
372 if (CapType::MIXED
== ct1
)
377 case CapType::ALLCAP
:
378 cTerm
= makeUpperCase(sTerm
, pCC
);
380 case CapType::INITCAP
:
381 cTerm
= makeInitCap(sTerm
, pCC
);
387 OUString
aAlt( cTerm
+ catst
);
390 Meaning
* pMn
= new Meaning(aRTerm
);
391 OUString
dTerm(pe
->defn
,strlen(pe
->defn
),eEnc
);
392 pMn
->SetMeaning(dTerm
);
393 pMn
->SetSynonyms(aStr
);
394 Reference
<XMeaning
>* pMeaning
= aMeanings
.getArray();
399 pTH
->CleanUpAfterLookup(&pmean
,nmean
);
404 prevMeanings
= aMeanings
;
405 prevLocale
= nLanguage
;
409 if (stem
|| !xLngSvcMgr
.is())
413 xSpell
.set( xLngSvcMgr
->getSpellChecker(), UNO_QUERY
);
414 if (!xSpell
.is() || !xSpell
->isValid( SPELLML_SUPPORT
, static_cast<sal_uInt16
>(nLanguage
), rProperties
))
416 Reference
< XSpellAlternatives
> xTmpRes
= xSpell
->spell( "<?xml?><query type='stem'><word>" +
417 aRTerm
+ "</word></query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
420 Sequence
<OUString
>seq
= xTmpRes
->getAlternatives();
421 if (seq
.hasElements())
423 aRTerm
= seq
[0]; // XXX Use only the first stem
428 // stem the last word of the synonym (for categories after affixation)
429 aRTerm
= aRTerm
.trim();
430 sal_Int32 pos
= aRTerm
.lastIndexOf(' ');
433 xTmpRes
= xSpell
->spell( "<?xml?><query type='stem'><word>" +
434 aRTerm
.copy(pos
+ 1) + "</word></query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
437 Sequence
<OUString
>seq
= xTmpRes
->getAlternatives();
438 if (seq
.hasElements())
440 aPTerm
= aRTerm
.copy(pos
+ 1);
441 aRTerm
= aRTerm
.copy(0, pos
+ 1) + seq
[0];
443 for (int i
= 0; i
< seq
.getLength(); i
++)
445 OString o
= OUStringToOString(seq
[i
], RTL_TEXTENCODING_UTF8
);
446 fprintf(stderr
, "%d: %s\n", i
+ 1, o
.pData
->buffer
);
457 /// @throws Exception
458 static Reference
< XInterface
> Thesaurus_CreateInstance(
459 const Reference
< XMultiServiceFactory
> & /*rSMgr*/ )
461 Reference
< XInterface
> xService
= static_cast<cppu::OWeakObject
*>(new Thesaurus
);
465 OUString SAL_CALL
Thesaurus::getServiceDisplayName(const Locale
& rLocale
)
467 std::locale
loc(Translate::Create("svt", LanguageTag(rLocale
)));
468 return Translate::get(STR_DESCRIPTION_MYTHES
, loc
);
471 void SAL_CALL
Thesaurus::initialize( const Sequence
< Any
>& rArguments
)
473 MutexGuard
aGuard( GetLinguMutex() );
477 sal_Int32 nLen
= rArguments
.getLength();
480 Reference
< XLinguProperties
> xPropSet
;
481 rArguments
.getConstArray()[0] >>= xPropSet
;
483 //! Pointer allows for access of the non-UNO functions.
484 //! And the reference to the UNO-functions while increasing
485 //! the ref-count and will implicitly free the memory
486 //! when the object is no longer used.
487 pPropHelper
= new PropertyHelper_Thesaurus( static_cast<XThesaurus
*>(this), xPropSet
);
488 pPropHelper
->AddAsPropListener(); //! after a reference is established
491 OSL_FAIL( "wrong number of arguments in sequence" );
495 OUString
Thesaurus::makeLowerCase(const OUString
& aTerm
, CharClass
const * pCC
)
498 return pCC
->lowercase(aTerm
);
502 OUString
Thesaurus::makeUpperCase(const OUString
& aTerm
, CharClass
const * pCC
)
505 return pCC
->uppercase(aTerm
);
509 OUString
Thesaurus::makeInitCap(const OUString
& aTerm
, CharClass
const * pCC
)
511 sal_Int32 tlen
= aTerm
.getLength();
514 OUString bTemp
= aTerm
.copy(0,1);
517 return ( pCC
->uppercase(bTemp
, 0, 1)
518 + pCC
->lowercase(aTerm
,1,(tlen
-1)) );
521 return pCC
->uppercase(bTemp
, 0, 1);
526 void SAL_CALL
Thesaurus::dispose()
528 MutexGuard
aGuard( GetLinguMutex() );
533 EventObject
aEvtObj( static_cast<XThesaurus
*>(this) );
534 aEvtListeners
.disposeAndClear( aEvtObj
);
537 pPropHelper
->RemoveAsPropListener();
539 pPropHelper
= nullptr;
544 void SAL_CALL
Thesaurus::addEventListener( const Reference
< XEventListener
>& rxListener
)
546 MutexGuard
aGuard( GetLinguMutex() );
548 if (!bDisposing
&& rxListener
.is())
549 aEvtListeners
.addInterface( rxListener
);
552 void SAL_CALL
Thesaurus::removeEventListener( const Reference
< XEventListener
>& rxListener
)
554 MutexGuard
aGuard( GetLinguMutex() );
556 if (!bDisposing
&& rxListener
.is())
557 aEvtListeners
.removeInterface( rxListener
);
560 // Service specific part
561 OUString SAL_CALL
Thesaurus::getImplementationName()
563 return getImplementationName_Static();
566 sal_Bool SAL_CALL
Thesaurus::supportsService( const OUString
& ServiceName
)
568 return cppu::supportsService(this, ServiceName
);
571 Sequence
< OUString
> SAL_CALL
Thesaurus::getSupportedServiceNames()
573 return getSupportedServiceNames_Static();
576 Sequence
< OUString
> Thesaurus::getSupportedServiceNames_Static()
579 Sequence
< OUString
> aSNS
{ SN_THESAURUS
};
585 SAL_DLLPUBLIC_EXPORT
void * lnth_component_getFactory(
586 const sal_Char
* pImplName
, void * pServiceManager
, void * /*pRegistryKey*/ )
588 void * pRet
= nullptr;
589 if ( Thesaurus::getImplementationName_Static().equalsAscii( pImplName
) )
592 Reference
< XSingleServiceFactory
> xFactory
=
593 cppu::createOneInstanceFactory(
594 static_cast< XMultiServiceFactory
* >( pServiceManager
),
595 Thesaurus::getImplementationName_Static(),
596 Thesaurus_CreateInstance
,
597 Thesaurus::getSupportedServiceNames_Static());
598 // acquire, because we return an interface pointer instead of a reference
600 pRet
= xFactory
.get();
606 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */