bump product version to 6.3.0.0.beta1
[LibreOffice.git] / lingucomponent / source / thesaurus / libnth / nthesimp.cxx
blobf17e19081294183cea7a0a8a580ec943d5c39fe8
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/uno/Reference.h>
21 #include <cppuhelper/factory.hxx>
22 #include <cppuhelper/supportsservice.hxx>
23 #include <com/sun/star/lang/XSingleServiceFactory.hpp>
24 #include <com/sun/star/registry/XRegistryKey.hpp>
25 #include <com/sun/star/beans/XPropertySet.hpp>
26 #include <com/sun/star/linguistic2/LinguServiceManager.hpp>
27 #include <com/sun/star/linguistic2/XLinguProperties.hpp>
28 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
29 #include <i18nlangtag/languagetag.hxx>
30 #include <tools/debug.hxx>
31 #include <comphelper/lok.hxx>
32 #include <comphelper/processfactory.hxx>
33 #include <osl/mutex.hxx>
34 #include <osl/thread.h>
35 #include <unotools/pathoptions.hxx>
36 #include <unotools/lingucfg.hxx>
37 #include <unotools/resmgr.hxx>
39 #include <rtl/string.hxx>
40 #include <rtl/ustrbuf.hxx>
41 #include <rtl/textenc.h>
43 #include <svtools/strings.hrc>
45 #include "nthesimp.hxx"
46 #include <linguistic/misc.hxx>
47 #include <linguistic/lngprops.hxx>
48 #include "nthesdta.hxx"
50 #include <vector>
51 #include <set>
52 #include <string.h>
54 // XML-header to query SPELLML support
55 #define SPELLML_SUPPORT "<?xml?>"
57 using namespace osl;
58 using namespace com::sun::star;
59 using namespace com::sun::star::beans;
60 using namespace com::sun::star::lang;
61 using namespace com::sun::star::uno;
62 using namespace com::sun::star::linguistic2;
63 using namespace linguistic;
65 static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl()
67 uno::Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() );
68 uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ;
69 return xRes;
72 Thesaurus::Thesaurus() :
73 aEvtListeners ( GetLinguMutex() )
75 bDisposing = false;
76 pPropHelper = nullptr;
77 prevLocale = LANGUAGE_DONTKNOW;
80 Thesaurus::~Thesaurus()
82 mvThesInfo.clear();
83 if (pPropHelper)
85 pPropHelper->RemoveAsPropListener();
89 PropertyHelper_Thesaurus& Thesaurus::GetPropHelper_Impl()
91 if (!pPropHelper)
93 Reference< XLinguProperties > xPropSet( GetLinguProperties(), UNO_QUERY );
95 pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet );
96 pPropHelper->AddAsPropListener(); //! after a reference is established
98 return *pPropHelper;
101 Sequence< Locale > SAL_CALL Thesaurus::getLocales()
103 MutexGuard aGuard( GetLinguMutex() );
105 // this routine should return the locales supported by the installed
106 // dictionaries.
107 if (mvThesInfo.empty())
109 SvtLinguConfig aLinguCfg;
111 // get list of dictionaries-to-use
112 std::vector< SvtLinguConfigDictionaryEntry > aDics;
113 uno::Sequence< OUString > aFormatList;
114 aLinguCfg.GetSupportedDictionaryFormatsFor( "Thesauri",
115 "org.openoffice.lingu.new.Thesaurus", aFormatList );
116 sal_Int32 nLen = aFormatList.getLength();
117 for (sal_Int32 i = 0; i < nLen; ++i)
119 std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
120 aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
121 aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
124 //!! for compatibility with old dictionaries (the ones not using extensions
125 //!! or new configuration entries, but still using the dictionary.lst file)
126 //!! Get the list of old style spell checking dictionaries to use...
127 std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
128 GetOldStyleDics( "THES" ) );
130 // to prefer dictionaries with configuration entries we will only
131 // use those old style dictionaries that add a language that
132 // is not yet supported by the list od new style dictionaries
133 MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
135 sal_Int32 numthes = aDics.size();
136 if (numthes)
138 // get supported locales from the dictionaries-to-use...
139 sal_Int32 k = 0;
140 std::set<OUString> aLocaleNamesSet;
141 for (auto const& dict : aDics)
143 uno::Sequence< OUString > aLocaleNames(dict.aLocaleNames);
144 sal_Int32 nLen2 = aLocaleNames.getLength();
145 for (k = 0; k < nLen2; ++k)
147 if (!comphelper::LibreOfficeKit::isWhitelistedLanguage(aLocaleNames[k]))
148 continue;
150 aLocaleNamesSet.insert( aLocaleNames[k] );
153 // ... and add them to the resulting sequence
154 aSuppLocales.realloc( aLocaleNamesSet.size() );
155 std::set<OUString>::const_iterator aItB;
156 k = 0;
157 for (auto const& localeName : aLocaleNamesSet)
159 Locale aTmp( LanguageTag::convertToLocale(localeName));
160 aSuppLocales[k++] = aTmp;
163 //! For each dictionary and each locale we need a separate entry.
164 //! If this results in more than one dictionary per locale than (for now)
165 //! it is undefined which dictionary gets used.
166 //! In the future the implementation should support using several dictionaries
167 //! for one locale.
168 numthes = 0;
169 for (auto const& dict : aDics)
170 numthes = numthes + dict.aLocaleNames.getLength();
172 // add dictionary information
173 mvThesInfo.resize(numthes);
175 k = 0;
176 for (auto const& dict : aDics)
178 if (dict.aLocaleNames.hasElements() &&
179 dict.aLocations.hasElements())
181 uno::Sequence< OUString > aLocaleNames(dict.aLocaleNames);
182 sal_Int32 nLocales = aLocaleNames.getLength();
184 // currently only one language per dictionary is supported in the actual implementation...
185 // Thus here we work-around this by adding the same dictionary several times.
186 // Once for each of its supported locales.
187 for (sal_Int32 i = 0; i < nLocales; ++i)
189 LanguageTag aLanguageTag(dict.aLocaleNames[i]);
190 mvThesInfo[k].aEncoding = RTL_TEXTENCODING_DONTKNOW;
191 mvThesInfo[k].aLocale = aLanguageTag.getLocale();
192 mvThesInfo[k].aCharSetInfo.reset( new CharClass( aLanguageTag ) );
193 // also both files have to be in the same directory and the
194 // file names must only differ in the extension (.aff/.dic).
195 // Thus we use the first location only and strip the extension part.
196 OUString aLocation = dict.aLocations[0];
197 sal_Int32 nPos = aLocation.lastIndexOf( '.' );
198 aLocation = aLocation.copy( 0, nPos );
199 mvThesInfo[k].aName = aLocation;
201 ++k;
205 DBG_ASSERT( k == numthes, "index mismatch?" );
207 else
209 /* no dictionary found so register no dictionaries */
210 mvThesInfo.clear();
211 aSuppLocales.realloc(0);
215 return aSuppLocales;
218 sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale)
220 MutexGuard aGuard( GetLinguMutex() );
222 bool bRes = false;
223 if (!aSuppLocales.hasElements())
224 getLocales();
225 sal_Int32 nLen = aSuppLocales.getLength();
226 for (sal_Int32 i = 0; i < nLen; ++i)
228 const Locale *pLocale = aSuppLocales.getConstArray();
229 if (rLocale == pLocale[i])
231 bRes = true;
232 break;
235 return bRes;
238 Sequence < Reference < css::linguistic2::XMeaning > > SAL_CALL Thesaurus::queryMeanings(
239 const OUString& qTerm, const Locale& rLocale,
240 const css::uno::Sequence< css::beans::PropertyValue >& rProperties)
242 MutexGuard aGuard( GetLinguMutex() );
244 uno::Sequence< Reference< XMeaning > > aMeanings( 1 );
245 uno::Sequence< Reference< XMeaning > > noMeanings( 0 );
246 uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() );
247 uno::Reference< XSpellChecker1 > xSpell;
249 OUString aRTerm(qTerm);
250 OUString aPTerm(qTerm);
251 CapType ct = CapType::UNKNOWN;
252 sal_Int32 stem = 0;
253 sal_Int32 stem2 = 0;
255 LanguageType nLanguage = LinguLocaleToLanguage( rLocale );
257 if (LinguIsUnspecified( nLanguage) || aRTerm.isEmpty())
258 return noMeanings;
260 if (!hasLocale( rLocale ))
261 #ifdef LINGU_EXCEPTIONS
262 throw( IllegalArgumentException() );
263 #else
264 return noMeanings;
265 #endif
267 if (prevTerm == qTerm && prevLocale == nLanguage)
268 return prevMeanings;
270 mentry * pmean = nullptr;
271 sal_Int32 nmean = 0;
273 PropertyHelper_Thesaurus &rHelper = GetPropHelper();
274 rHelper.SetTmpPropVals( rProperties );
276 MyThes * pTH = nullptr;
277 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
278 CharClass * pCC = nullptr;
280 // find the first thesaurus that matches the locale
281 for (size_t i =0; i < mvThesInfo.size(); i++)
283 if (rLocale == mvThesInfo[i].aLocale)
285 // open up and initialize this thesaurus if need be
286 if (!mvThesInfo[i].aThes)
288 OUString datpath = mvThesInfo[i].aName + ".dat";
289 OUString idxpath = mvThesInfo[i].aName + ".idx";
290 OUString ndat;
291 OUString nidx;
292 osl::FileBase::getSystemPathFromFileURL(datpath,ndat);
293 osl::FileBase::getSystemPathFromFileURL(idxpath,nidx);
295 #if defined(_WIN32)
296 // MyThes waits UTF-8 encoded paths with \\?\ long path prefix.
297 OString aTmpidx = Win_AddLongPathPrefix(OUStringToOString(nidx, RTL_TEXTENCODING_UTF8));
298 OString aTmpdat = Win_AddLongPathPrefix(OUStringToOString(ndat, RTL_TEXTENCODING_UTF8));
299 #else
300 OString aTmpidx(OU2ENC(nidx,osl_getThreadTextEncoding()));
301 OString aTmpdat(OU2ENC(ndat,osl_getThreadTextEncoding()));
302 #endif
304 mvThesInfo[i].aThes.reset( new MyThes(aTmpidx.getStr(),aTmpdat.getStr()) );
305 mvThesInfo[i].aEncoding = getTextEncodingFromCharset(mvThesInfo[i].aThes->get_th_encoding());
307 pTH = mvThesInfo[i].aThes.get();
308 eEnc = mvThesInfo[i].aEncoding;
309 pCC = mvThesInfo[i].aCharSetInfo.get();
311 if (pTH)
312 break;
316 // we don't want to work with a default text encoding since following incorrect
317 // results may occur only for specific text and thus may be hard to notice.
318 // Thus better always make a clean exit here if the text encoding is in question.
319 // Hopefully something not working at all will raise proper attention quickly. ;-)
320 DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
321 if (eEnc == RTL_TEXTENCODING_DONTKNOW)
322 return noMeanings;
324 while (pTH)
326 // convert word to all lower case for searching
327 if (!stem)
328 ct = capitalType(aRTerm, pCC);
329 OUString nTerm(makeLowerCase(aRTerm, pCC));
330 OString aTmp( OU2ENC(nTerm, eEnc) );
331 nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean);
333 if (nmean)
334 aMeanings.realloc( nmean );
336 mentry * pe = pmean;
337 OUString codeTerm = qTerm;
338 Reference< XSpellAlternatives > xTmpRes2;
340 if (stem)
342 xTmpRes2 = xSpell->spell( "<?xml?><query type='analyze'><word>" +
343 aPTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
344 if (xTmpRes2.is())
346 Sequence<OUString>seq = xTmpRes2->getAlternatives();
347 if (seq.hasElements())
349 codeTerm = seq[0];
350 stem2 = 1;
355 for (int j = 0; j < nmean; j++)
357 int count = pe->count;
358 if (count)
360 Sequence< OUString > aStr( count );
361 OUString *pStr = aStr.getArray();
363 for (int i=0; i < count; i++)
365 OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),eEnc );
366 sal_Int32 catpos = sTerm.indexOf('(');
367 OUString catst;
368 if (catpos > 2)
370 // remove category name for affixation and casing
371 catst = " " + sTerm.copy(catpos);
372 sTerm = sTerm.copy(0, catpos);
373 sTerm = sTerm.trim();
375 // generate synonyms with affixes
376 if (stem && stem2)
378 Reference< XSpellAlternatives > xTmpRes;
379 xTmpRes = xSpell->spell( "<?xml?><query type='generate'><word>" +
380 sTerm + "</word>" + codeTerm + "</query>", static_cast<sal_uInt16>(nLanguage), rProperties );
381 if (xTmpRes.is())
383 Sequence<OUString>seq = xTmpRes->getAlternatives();
384 if (seq.hasElements())
385 sTerm = seq[0];
389 CapType ct1 = capitalType(sTerm, pCC);
390 if (CapType::MIXED == ct1)
391 ct = ct1;
392 OUString cTerm;
393 switch (ct)
395 case CapType::ALLCAP:
396 cTerm = makeUpperCase(sTerm, pCC);
397 break;
398 case CapType::INITCAP:
399 cTerm = makeInitCap(sTerm, pCC);
400 break;
401 default:
402 cTerm = sTerm;
403 break;
405 OUString aAlt( cTerm + catst);
406 pStr[i] = aAlt;
408 Meaning * pMn = new Meaning(aRTerm);
409 OUString dTerm(pe->defn,strlen(pe->defn),eEnc );
410 pMn->SetMeaning(dTerm);
411 pMn->SetSynonyms(aStr);
412 Reference<XMeaning>* pMeaning = aMeanings.getArray();
413 pMeaning[j] = pMn;
415 pe++;
417 pTH->CleanUpAfterLookup(&pmean,nmean);
419 if (nmean)
421 prevTerm = qTerm;
422 prevMeanings = aMeanings;
423 prevLocale = nLanguage;
424 return aMeanings;
427 if (stem || !xLngSvcMgr.is())
428 return noMeanings;
429 stem = 1;
431 xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
432 if (!xSpell.is() || !xSpell->isValid( SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage), rProperties ))
433 return noMeanings;
434 Reference< XSpellAlternatives > xTmpRes;
435 xTmpRes = xSpell->spell( "<?xml?><query type='stem'><word>" +
436 aRTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
437 if (xTmpRes.is())
439 Sequence<OUString>seq = xTmpRes->getAlternatives();
440 if (seq.hasElements())
442 aRTerm = seq[0]; // XXX Use only the first stem
443 continue;
447 // stem the last word of the synonym (for categories after affixation)
448 aRTerm = aRTerm.trim();
449 sal_Int32 pos = aRTerm.lastIndexOf(' ');
450 if (!pos)
451 return noMeanings;
452 xTmpRes = xSpell->spell( "<?xml?><query type='stem'><word>" +
453 aRTerm.copy(pos + 1) + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
454 if (xTmpRes.is())
456 Sequence<OUString>seq = xTmpRes->getAlternatives();
457 if (seq.hasElements())
459 aPTerm = aRTerm.copy(pos + 1);
460 aRTerm = aRTerm.copy(0, pos + 1) + seq[0];
461 #if 0
462 for (int i = 0; i < seq.getLength(); i++)
464 OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8);
465 fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer);
467 #endif
468 continue;
471 break;
473 return noMeanings;
476 /// @throws Exception
477 static Reference< XInterface > Thesaurus_CreateInstance(
478 const Reference< XMultiServiceFactory > & /*rSMgr*/ )
480 Reference< XInterface > xService = static_cast<cppu::OWeakObject*>(new Thesaurus);
481 return xService;
484 OUString SAL_CALL Thesaurus::getServiceDisplayName(const Locale& rLocale)
486 std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
487 return Translate::get(STR_DESCRIPTION_MYTHES, loc);
490 void SAL_CALL Thesaurus::initialize( const Sequence< Any >& rArguments )
492 MutexGuard aGuard( GetLinguMutex() );
494 if (!pPropHelper)
496 sal_Int32 nLen = rArguments.getLength();
497 if (1 == nLen)
499 Reference< XLinguProperties > xPropSet;
500 rArguments.getConstArray()[0] >>= xPropSet;
502 //! Pointer allows for access of the non-UNO functions.
503 //! And the reference to the UNO-functions while increasing
504 //! the ref-count and will implicitly free the memory
505 //! when the object is no longer used.
506 pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet );
507 pPropHelper->AddAsPropListener(); //! after a reference is established
509 else
510 OSL_FAIL( "wrong number of arguments in sequence" );
514 OUString Thesaurus::makeLowerCase(const OUString& aTerm, CharClass const * pCC)
516 if (pCC)
517 return pCC->lowercase(aTerm);
518 return aTerm;
521 OUString Thesaurus::makeUpperCase(const OUString& aTerm, CharClass const * pCC)
523 if (pCC)
524 return pCC->uppercase(aTerm);
525 return aTerm;
528 OUString Thesaurus::makeInitCap(const OUString& aTerm, CharClass const * pCC)
530 sal_Int32 tlen = aTerm.getLength();
531 if (pCC && tlen)
533 OUString bTemp = aTerm.copy(0,1);
534 if (tlen > 1)
536 return ( pCC->uppercase(bTemp, 0, 1)
537 + pCC->lowercase(aTerm,1,(tlen-1)) );
540 return pCC->uppercase(bTemp, 0, 1);
542 return aTerm;
545 void SAL_CALL Thesaurus::dispose()
547 MutexGuard aGuard( GetLinguMutex() );
549 if (!bDisposing)
551 bDisposing = true;
552 EventObject aEvtObj( static_cast<XThesaurus *>(this) );
553 aEvtListeners.disposeAndClear( aEvtObj );
554 if (pPropHelper)
556 pPropHelper->RemoveAsPropListener();
557 delete pPropHelper;
558 pPropHelper = nullptr;
563 void SAL_CALL Thesaurus::addEventListener( const Reference< XEventListener >& rxListener )
565 MutexGuard aGuard( GetLinguMutex() );
567 if (!bDisposing && rxListener.is())
568 aEvtListeners.addInterface( rxListener );
571 void SAL_CALL Thesaurus::removeEventListener( const Reference< XEventListener >& rxListener )
573 MutexGuard aGuard( GetLinguMutex() );
575 if (!bDisposing && rxListener.is())
576 aEvtListeners.removeInterface( rxListener );
579 // Service specific part
580 OUString SAL_CALL Thesaurus::getImplementationName()
582 return getImplementationName_Static();
585 sal_Bool SAL_CALL Thesaurus::supportsService( const OUString& ServiceName )
587 return cppu::supportsService(this, ServiceName);
590 Sequence< OUString > SAL_CALL Thesaurus::getSupportedServiceNames()
592 return getSupportedServiceNames_Static();
595 Sequence< OUString > Thesaurus::getSupportedServiceNames_Static()
596 throw()
598 Sequence< OUString > aSNS { SN_THESAURUS };
599 return aSNS;
602 extern "C"
604 SAL_DLLPUBLIC_EXPORT void * lnth_component_getFactory(
605 const sal_Char * pImplName, void * pServiceManager, void * /*pRegistryKey*/ )
607 void * pRet = nullptr;
608 if ( Thesaurus::getImplementationName_Static().equalsAscii( pImplName ) )
611 Reference< XSingleServiceFactory > xFactory =
612 cppu::createOneInstanceFactory(
613 static_cast< XMultiServiceFactory * >( pServiceManager ),
614 Thesaurus::getImplementationName_Static(),
615 Thesaurus_CreateInstance,
616 Thesaurus::getSupportedServiceNames_Static());
617 // acquire, because we return an interface pointer instead of a reference
618 xFactory->acquire();
619 pRet = xFactory.get();
621 return pRet;
625 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */