1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/uno/Reference.h>
21 #include <cppuhelper/factory.hxx>
22 #include <cppuhelper/supportsservice.hxx>
23 #include <com/sun/star/lang/XSingleServiceFactory.hpp>
24 #include <com/sun/star/registry/XRegistryKey.hpp>
25 #include <com/sun/star/beans/XPropertySet.hpp>
26 #include <com/sun/star/linguistic2/LinguServiceManager.hpp>
27 #include <com/sun/star/linguistic2/XLinguProperties.hpp>
28 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
29 #include <i18nlangtag/languagetag.hxx>
30 #include <tools/debug.hxx>
31 #include <comphelper/lok.hxx>
32 #include <comphelper/processfactory.hxx>
33 #include <osl/mutex.hxx>
34 #include <osl/thread.h>
35 #include <unotools/pathoptions.hxx>
36 #include <unotools/lingucfg.hxx>
37 #include <unotools/resmgr.hxx>
39 #include <rtl/string.hxx>
40 #include <rtl/ustrbuf.hxx>
41 #include <rtl/textenc.h>
43 #include <svtools/strings.hrc>
45 #include "nthesimp.hxx"
46 #include <linguistic/misc.hxx>
47 #include <linguistic/lngprops.hxx>
48 #include "nthesdta.hxx"
54 // XML-header to query SPELLML support
55 #define SPELLML_SUPPORT "<?xml?>"
58 using namespace com::sun::star
;
59 using namespace com::sun::star::beans
;
60 using namespace com::sun::star::lang
;
61 using namespace com::sun::star::uno
;
62 using namespace com::sun::star::linguistic2
;
63 using namespace linguistic
;
65 static uno::Reference
< XLinguServiceManager2
> GetLngSvcMgr_Impl()
67 uno::Reference
< XComponentContext
> xContext( comphelper::getProcessComponentContext() );
68 uno::Reference
< XLinguServiceManager2
> xRes
= LinguServiceManager::create( xContext
) ;
72 Thesaurus::Thesaurus() :
73 aEvtListeners ( GetLinguMutex() )
76 pPropHelper
= nullptr;
77 prevLocale
= LANGUAGE_DONTKNOW
;
80 Thesaurus::~Thesaurus()
85 pPropHelper
->RemoveAsPropListener();
89 PropertyHelper_Thesaurus
& Thesaurus::GetPropHelper_Impl()
93 Reference
< XLinguProperties
> xPropSet( GetLinguProperties(), UNO_QUERY
);
95 pPropHelper
= new PropertyHelper_Thesaurus( static_cast<XThesaurus
*>(this), xPropSet
);
96 pPropHelper
->AddAsPropListener(); //! after a reference is established
101 Sequence
< Locale
> SAL_CALL
Thesaurus::getLocales()
103 MutexGuard
aGuard( GetLinguMutex() );
105 // this routine should return the locales supported by the installed
107 if (mvThesInfo
.empty())
109 SvtLinguConfig aLinguCfg
;
111 // get list of dictionaries-to-use
112 std::vector
< SvtLinguConfigDictionaryEntry
> aDics
;
113 uno::Sequence
< OUString
> aFormatList
;
114 aLinguCfg
.GetSupportedDictionaryFormatsFor( "Thesauri",
115 "org.openoffice.lingu.new.Thesaurus", aFormatList
);
116 sal_Int32 nLen
= aFormatList
.getLength();
117 for (sal_Int32 i
= 0; i
< nLen
; ++i
)
119 std::vector
< SvtLinguConfigDictionaryEntry
> aTmpDic(
120 aLinguCfg
.GetActiveDictionariesByFormat( aFormatList
[i
] ) );
121 aDics
.insert( aDics
.end(), aTmpDic
.begin(), aTmpDic
.end() );
124 //!! for compatibility with old dictionaries (the ones not using extensions
125 //!! or new configuration entries, but still using the dictionary.lst file)
126 //!! Get the list of old style spell checking dictionaries to use...
127 std::vector
< SvtLinguConfigDictionaryEntry
> aOldStyleDics(
128 GetOldStyleDics( "THES" ) );
130 // to prefer dictionaries with configuration entries we will only
131 // use those old style dictionaries that add a language that
132 // is not yet supported by the list od new style dictionaries
133 MergeNewStyleDicsAndOldStyleDics( aDics
, aOldStyleDics
);
135 sal_Int32 numthes
= aDics
.size();
138 // get supported locales from the dictionaries-to-use...
140 std::set
<OUString
> aLocaleNamesSet
;
141 for (auto const& dict
: aDics
)
143 uno::Sequence
< OUString
> aLocaleNames(dict
.aLocaleNames
);
144 sal_Int32 nLen2
= aLocaleNames
.getLength();
145 for (k
= 0; k
< nLen2
; ++k
)
147 if (!comphelper::LibreOfficeKit::isWhitelistedLanguage(aLocaleNames
[k
]))
150 aLocaleNamesSet
.insert( aLocaleNames
[k
] );
153 // ... and add them to the resulting sequence
154 aSuppLocales
.realloc( aLocaleNamesSet
.size() );
155 std::set
<OUString
>::const_iterator aItB
;
157 for (auto const& localeName
: aLocaleNamesSet
)
159 Locale
aTmp( LanguageTag::convertToLocale(localeName
));
160 aSuppLocales
[k
++] = aTmp
;
163 //! For each dictionary and each locale we need a separate entry.
164 //! If this results in more than one dictionary per locale than (for now)
165 //! it is undefined which dictionary gets used.
166 //! In the future the implementation should support using several dictionaries
169 for (auto const& dict
: aDics
)
170 numthes
= numthes
+ dict
.aLocaleNames
.getLength();
172 // add dictionary information
173 mvThesInfo
.resize(numthes
);
176 for (auto const& dict
: aDics
)
178 if (dict
.aLocaleNames
.hasElements() &&
179 dict
.aLocations
.hasElements())
181 uno::Sequence
< OUString
> aLocaleNames(dict
.aLocaleNames
);
182 sal_Int32 nLocales
= aLocaleNames
.getLength();
184 // currently only one language per dictionary is supported in the actual implementation...
185 // Thus here we work-around this by adding the same dictionary several times.
186 // Once for each of its supported locales.
187 for (sal_Int32 i
= 0; i
< nLocales
; ++i
)
189 LanguageTag
aLanguageTag(dict
.aLocaleNames
[i
]);
190 mvThesInfo
[k
].aEncoding
= RTL_TEXTENCODING_DONTKNOW
;
191 mvThesInfo
[k
].aLocale
= aLanguageTag
.getLocale();
192 mvThesInfo
[k
].aCharSetInfo
.reset( new CharClass( aLanguageTag
) );
193 // also both files have to be in the same directory and the
194 // file names must only differ in the extension (.aff/.dic).
195 // Thus we use the first location only and strip the extension part.
196 OUString aLocation
= dict
.aLocations
[0];
197 sal_Int32 nPos
= aLocation
.lastIndexOf( '.' );
198 aLocation
= aLocation
.copy( 0, nPos
);
199 mvThesInfo
[k
].aName
= aLocation
;
205 DBG_ASSERT( k
== numthes
, "index mismatch?" );
209 /* no dictionary found so register no dictionaries */
211 aSuppLocales
.realloc(0);
218 sal_Bool SAL_CALL
Thesaurus::hasLocale(const Locale
& rLocale
)
220 MutexGuard
aGuard( GetLinguMutex() );
223 if (!aSuppLocales
.hasElements())
225 sal_Int32 nLen
= aSuppLocales
.getLength();
226 for (sal_Int32 i
= 0; i
< nLen
; ++i
)
228 const Locale
*pLocale
= aSuppLocales
.getConstArray();
229 if (rLocale
== pLocale
[i
])
238 Sequence
< Reference
< css::linguistic2::XMeaning
> > SAL_CALL
Thesaurus::queryMeanings(
239 const OUString
& qTerm
, const Locale
& rLocale
,
240 const css::uno::Sequence
< css::beans::PropertyValue
>& rProperties
)
242 MutexGuard
aGuard( GetLinguMutex() );
244 uno::Sequence
< Reference
< XMeaning
> > aMeanings( 1 );
245 uno::Sequence
< Reference
< XMeaning
> > noMeanings( 0 );
246 uno::Reference
< XLinguServiceManager2
> xLngSvcMgr( GetLngSvcMgr_Impl() );
247 uno::Reference
< XSpellChecker1
> xSpell
;
249 OUString
aRTerm(qTerm
);
250 OUString
aPTerm(qTerm
);
251 CapType ct
= CapType::UNKNOWN
;
255 LanguageType nLanguage
= LinguLocaleToLanguage( rLocale
);
257 if (LinguIsUnspecified( nLanguage
) || aRTerm
.isEmpty())
260 if (!hasLocale( rLocale
))
261 #ifdef LINGU_EXCEPTIONS
262 throw( IllegalArgumentException() );
267 if (prevTerm
== qTerm
&& prevLocale
== nLanguage
)
270 mentry
* pmean
= nullptr;
273 PropertyHelper_Thesaurus
&rHelper
= GetPropHelper();
274 rHelper
.SetTmpPropVals( rProperties
);
276 MyThes
* pTH
= nullptr;
277 rtl_TextEncoding eEnc
= RTL_TEXTENCODING_DONTKNOW
;
278 CharClass
* pCC
= nullptr;
280 // find the first thesaurus that matches the locale
281 for (size_t i
=0; i
< mvThesInfo
.size(); i
++)
283 if (rLocale
== mvThesInfo
[i
].aLocale
)
285 // open up and initialize this thesaurus if need be
286 if (!mvThesInfo
[i
].aThes
)
288 OUString datpath
= mvThesInfo
[i
].aName
+ ".dat";
289 OUString idxpath
= mvThesInfo
[i
].aName
+ ".idx";
292 osl::FileBase::getSystemPathFromFileURL(datpath
,ndat
);
293 osl::FileBase::getSystemPathFromFileURL(idxpath
,nidx
);
296 // MyThes waits UTF-8 encoded paths with \\?\ long path prefix.
297 OString aTmpidx
= Win_AddLongPathPrefix(OUStringToOString(nidx
, RTL_TEXTENCODING_UTF8
));
298 OString aTmpdat
= Win_AddLongPathPrefix(OUStringToOString(ndat
, RTL_TEXTENCODING_UTF8
));
300 OString
aTmpidx(OU2ENC(nidx
,osl_getThreadTextEncoding()));
301 OString
aTmpdat(OU2ENC(ndat
,osl_getThreadTextEncoding()));
304 mvThesInfo
[i
].aThes
.reset( new MyThes(aTmpidx
.getStr(),aTmpdat
.getStr()) );
305 mvThesInfo
[i
].aEncoding
= getTextEncodingFromCharset(mvThesInfo
[i
].aThes
->get_th_encoding());
307 pTH
= mvThesInfo
[i
].aThes
.get();
308 eEnc
= mvThesInfo
[i
].aEncoding
;
309 pCC
= mvThesInfo
[i
].aCharSetInfo
.get();
316 // we don't want to work with a default text encoding since following incorrect
317 // results may occur only for specific text and thus may be hard to notice.
318 // Thus better always make a clean exit here if the text encoding is in question.
319 // Hopefully something not working at all will raise proper attention quickly. ;-)
320 DBG_ASSERT( eEnc
!= RTL_TEXTENCODING_DONTKNOW
, "failed to get text encoding! (maybe incorrect encoding string in file)" );
321 if (eEnc
== RTL_TEXTENCODING_DONTKNOW
)
326 // convert word to all lower case for searching
328 ct
= capitalType(aRTerm
, pCC
);
329 OUString
nTerm(makeLowerCase(aRTerm
, pCC
));
330 OString
aTmp( OU2ENC(nTerm
, eEnc
) );
331 nmean
= pTH
->Lookup(aTmp
.getStr(),aTmp
.getLength(),&pmean
);
334 aMeanings
.realloc( nmean
);
337 OUString codeTerm
= qTerm
;
338 Reference
< XSpellAlternatives
> xTmpRes2
;
342 xTmpRes2
= xSpell
->spell( "<?xml?><query type='analyze'><word>" +
343 aPTerm
+ "</word></query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
346 Sequence
<OUString
>seq
= xTmpRes2
->getAlternatives();
347 if (seq
.hasElements())
355 for (int j
= 0; j
< nmean
; j
++)
357 int count
= pe
->count
;
360 Sequence
< OUString
> aStr( count
);
361 OUString
*pStr
= aStr
.getArray();
363 for (int i
=0; i
< count
; i
++)
365 OUString
sTerm(pe
->psyns
[i
],strlen(pe
->psyns
[i
]),eEnc
);
366 sal_Int32 catpos
= sTerm
.indexOf('(');
370 // remove category name for affixation and casing
371 catst
= " " + sTerm
.copy(catpos
);
372 sTerm
= sTerm
.copy(0, catpos
);
373 sTerm
= sTerm
.trim();
375 // generate synonyms with affixes
378 Reference
< XSpellAlternatives
> xTmpRes
;
379 xTmpRes
= xSpell
->spell( "<?xml?><query type='generate'><word>" +
380 sTerm
+ "</word>" + codeTerm
+ "</query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
383 Sequence
<OUString
>seq
= xTmpRes
->getAlternatives();
384 if (seq
.hasElements())
389 CapType ct1
= capitalType(sTerm
, pCC
);
390 if (CapType::MIXED
== ct1
)
395 case CapType::ALLCAP
:
396 cTerm
= makeUpperCase(sTerm
, pCC
);
398 case CapType::INITCAP
:
399 cTerm
= makeInitCap(sTerm
, pCC
);
405 OUString
aAlt( cTerm
+ catst
);
408 Meaning
* pMn
= new Meaning(aRTerm
);
409 OUString
dTerm(pe
->defn
,strlen(pe
->defn
),eEnc
);
410 pMn
->SetMeaning(dTerm
);
411 pMn
->SetSynonyms(aStr
);
412 Reference
<XMeaning
>* pMeaning
= aMeanings
.getArray();
417 pTH
->CleanUpAfterLookup(&pmean
,nmean
);
422 prevMeanings
= aMeanings
;
423 prevLocale
= nLanguage
;
427 if (stem
|| !xLngSvcMgr
.is())
431 xSpell
.set( xLngSvcMgr
->getSpellChecker(), UNO_QUERY
);
432 if (!xSpell
.is() || !xSpell
->isValid( SPELLML_SUPPORT
, static_cast<sal_uInt16
>(nLanguage
), rProperties
))
434 Reference
< XSpellAlternatives
> xTmpRes
;
435 xTmpRes
= xSpell
->spell( "<?xml?><query type='stem'><word>" +
436 aRTerm
+ "</word></query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
439 Sequence
<OUString
>seq
= xTmpRes
->getAlternatives();
440 if (seq
.hasElements())
442 aRTerm
= seq
[0]; // XXX Use only the first stem
447 // stem the last word of the synonym (for categories after affixation)
448 aRTerm
= aRTerm
.trim();
449 sal_Int32 pos
= aRTerm
.lastIndexOf(' ');
452 xTmpRes
= xSpell
->spell( "<?xml?><query type='stem'><word>" +
453 aRTerm
.copy(pos
+ 1) + "</word></query>", static_cast<sal_uInt16
>(nLanguage
), rProperties
);
456 Sequence
<OUString
>seq
= xTmpRes
->getAlternatives();
457 if (seq
.hasElements())
459 aPTerm
= aRTerm
.copy(pos
+ 1);
460 aRTerm
= aRTerm
.copy(0, pos
+ 1) + seq
[0];
462 for (int i
= 0; i
< seq
.getLength(); i
++)
464 OString o
= OUStringToOString(seq
[i
], RTL_TEXTENCODING_UTF8
);
465 fprintf(stderr
, "%d: %s\n", i
+ 1, o
.pData
->buffer
);
476 /// @throws Exception
477 static Reference
< XInterface
> Thesaurus_CreateInstance(
478 const Reference
< XMultiServiceFactory
> & /*rSMgr*/ )
480 Reference
< XInterface
> xService
= static_cast<cppu::OWeakObject
*>(new Thesaurus
);
484 OUString SAL_CALL
Thesaurus::getServiceDisplayName(const Locale
& rLocale
)
486 std::locale
loc(Translate::Create("svt", LanguageTag(rLocale
)));
487 return Translate::get(STR_DESCRIPTION_MYTHES
, loc
);
490 void SAL_CALL
Thesaurus::initialize( const Sequence
< Any
>& rArguments
)
492 MutexGuard
aGuard( GetLinguMutex() );
496 sal_Int32 nLen
= rArguments
.getLength();
499 Reference
< XLinguProperties
> xPropSet
;
500 rArguments
.getConstArray()[0] >>= xPropSet
;
502 //! Pointer allows for access of the non-UNO functions.
503 //! And the reference to the UNO-functions while increasing
504 //! the ref-count and will implicitly free the memory
505 //! when the object is no longer used.
506 pPropHelper
= new PropertyHelper_Thesaurus( static_cast<XThesaurus
*>(this), xPropSet
);
507 pPropHelper
->AddAsPropListener(); //! after a reference is established
510 OSL_FAIL( "wrong number of arguments in sequence" );
514 OUString
Thesaurus::makeLowerCase(const OUString
& aTerm
, CharClass
const * pCC
)
517 return pCC
->lowercase(aTerm
);
521 OUString
Thesaurus::makeUpperCase(const OUString
& aTerm
, CharClass
const * pCC
)
524 return pCC
->uppercase(aTerm
);
528 OUString
Thesaurus::makeInitCap(const OUString
& aTerm
, CharClass
const * pCC
)
530 sal_Int32 tlen
= aTerm
.getLength();
533 OUString bTemp
= aTerm
.copy(0,1);
536 return ( pCC
->uppercase(bTemp
, 0, 1)
537 + pCC
->lowercase(aTerm
,1,(tlen
-1)) );
540 return pCC
->uppercase(bTemp
, 0, 1);
545 void SAL_CALL
Thesaurus::dispose()
547 MutexGuard
aGuard( GetLinguMutex() );
552 EventObject
aEvtObj( static_cast<XThesaurus
*>(this) );
553 aEvtListeners
.disposeAndClear( aEvtObj
);
556 pPropHelper
->RemoveAsPropListener();
558 pPropHelper
= nullptr;
563 void SAL_CALL
Thesaurus::addEventListener( const Reference
< XEventListener
>& rxListener
)
565 MutexGuard
aGuard( GetLinguMutex() );
567 if (!bDisposing
&& rxListener
.is())
568 aEvtListeners
.addInterface( rxListener
);
571 void SAL_CALL
Thesaurus::removeEventListener( const Reference
< XEventListener
>& rxListener
)
573 MutexGuard
aGuard( GetLinguMutex() );
575 if (!bDisposing
&& rxListener
.is())
576 aEvtListeners
.removeInterface( rxListener
);
579 // Service specific part
580 OUString SAL_CALL
Thesaurus::getImplementationName()
582 return getImplementationName_Static();
585 sal_Bool SAL_CALL
Thesaurus::supportsService( const OUString
& ServiceName
)
587 return cppu::supportsService(this, ServiceName
);
590 Sequence
< OUString
> SAL_CALL
Thesaurus::getSupportedServiceNames()
592 return getSupportedServiceNames_Static();
595 Sequence
< OUString
> Thesaurus::getSupportedServiceNames_Static()
598 Sequence
< OUString
> aSNS
{ SN_THESAURUS
};
604 SAL_DLLPUBLIC_EXPORT
void * lnth_component_getFactory(
605 const sal_Char
* pImplName
, void * pServiceManager
, void * /*pRegistryKey*/ )
607 void * pRet
= nullptr;
608 if ( Thesaurus::getImplementationName_Static().equalsAscii( pImplName
) )
611 Reference
< XSingleServiceFactory
> xFactory
=
612 cppu::createOneInstanceFactory(
613 static_cast< XMultiServiceFactory
* >( pServiceManager
),
614 Thesaurus::getImplementationName_Static(),
615 Thesaurus_CreateInstance
,
616 Thesaurus::getSupportedServiceNames_Static());
617 // acquire, because we return an interface pointer instead of a reference
619 pRet
= xFactory
.get();
625 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */