bump product version to 4.1.6.2
[LibreOffice.git] / lingucomponent / source / spellcheck / spell / sspellimp.cxx
blob2e4e7e6305b9bb8073704d9ac671168b497ace5d
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <com/sun/star/uno/Reference.h>
22 #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
24 #include <com/sun/star/linguistic2/SpellFailure.hpp>
25 #include <cppuhelper/factory.hxx> // helper for factories
26 #include <com/sun/star/registry/XRegistryKey.hpp>
27 #include <tools/debug.hxx>
28 #include <osl/mutex.hxx>
30 #include <lingutil.hxx>
31 #include <hunspell.hxx>
32 #include <dictmgr.hxx>
33 #include <sspellimp.hxx>
35 #include <linguistic/lngprops.hxx>
36 #include <linguistic/spelldta.hxx>
37 #include <i18nlangtag/languagetag.hxx>
38 #include <unotools/pathoptions.hxx>
39 #include <unotools/lingucfg.hxx>
40 #include <unotools/useroptions.hxx>
41 #include <osl/file.hxx>
42 #include <rtl/ustrbuf.hxx>
43 #include <rtl/textenc.h>
45 #include <list>
46 #include <set>
47 #include <string.h>
49 using namespace utl;
50 using namespace osl;
51 using namespace com::sun::star;
52 using namespace com::sun::star::beans;
53 using namespace com::sun::star::lang;
54 using namespace com::sun::star::uno;
55 using namespace com::sun::star::linguistic2;
56 using namespace linguistic;
59 // XML-header of SPELLML queries
60 #define SPELLML_HEADER "<?xml?>"
62 ///////////////////////////////////////////////////////////////////////////
64 SpellChecker::SpellChecker() :
65 aDicts(NULL),
66 aDEncs(NULL),
67 aDLocs(NULL),
68 aDNames(NULL),
69 numdict(0),
70 aEvtListeners(GetLinguMutex()),
71 pPropHelper(NULL),
72 bDisposing(false)
76 SpellChecker::~SpellChecker()
78 if (aDicts)
80 for (int i = 0; i < numdict; ++i)
82 delete aDicts[i];
84 delete[] aDicts;
86 delete[] aDEncs;
87 delete[] aDLocs;
88 delete[] aDNames;
89 if (pPropHelper)
91 pPropHelper->RemoveAsPropListener();
92 delete pPropHelper;
96 PropertyHelper_Spelling & SpellChecker::GetPropHelper_Impl()
98 if (!pPropHelper)
100 Reference< XLinguProperties > xPropSet( GetLinguProperties(), UNO_QUERY );
102 pPropHelper = new PropertyHelper_Spelling( (XSpellChecker *) this, xPropSet );
103 pPropHelper->AddAsPropListener(); //! after a reference is established
105 return *pPropHelper;
109 Sequence< Locale > SAL_CALL SpellChecker::getLocales()
110 throw(RuntimeException)
112 MutexGuard aGuard( GetLinguMutex() );
114 // this routine should return the locales supported by the installed
115 // dictionaries.
117 if (!numdict)
119 SvtLinguConfig aLinguCfg;
121 // get list of extension dictionaries-to-use
122 // (or better speaking: the list of dictionaries using the
123 // new configuration entries).
124 std::list< SvtLinguConfigDictionaryEntry > aDics;
125 uno::Sequence< OUString > aFormatList;
126 aLinguCfg.GetSupportedDictionaryFormatsFor( "SpellCheckers",
127 "org.openoffice.lingu.MySpellSpellChecker", aFormatList );
128 sal_Int32 nLen = aFormatList.getLength();
129 for (sal_Int32 i = 0; i < nLen; ++i)
131 std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
132 aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
133 aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
136 //!! for compatibility with old dictionaries (the ones not using extensions
137 //!! or new configuration entries, but still using the dictionary.lst file)
138 //!! Get the list of old style spell checking dictionaries to use...
139 std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
140 GetOldStyleDics( "DICT" ) );
142 // to prefer dictionaries with configuration entries we will only
143 // use those old style dictionaries that add a language that
144 // is not yet supported by the list od new style dictionaries
145 MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
147 if (!aDics.empty())
149 // get supported locales from the dictionaries-to-use...
150 sal_Int32 k = 0;
151 std::set< OUString, lt_rtl_OUString > aLocaleNamesSet;
152 std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
153 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
155 uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
156 sal_Int32 nLen2 = aLocaleNames.getLength();
157 for (k = 0; k < nLen2; ++k)
159 aLocaleNamesSet.insert( aLocaleNames[k] );
162 // ... and add them to the resulting sequence
163 aSuppLocales.realloc( aLocaleNamesSet.size() );
164 std::set< OUString, lt_rtl_OUString >::const_iterator aItB;
165 k = 0;
166 for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB)
168 Locale aTmp( LanguageTag( *aItB ).getLocale());
169 aSuppLocales[k++] = aTmp;
172 //! For each dictionary and each locale we need a separate entry.
173 //! If this results in more than one dictionary per locale than (for now)
174 //! it is undefined which dictionary gets used.
175 //! In the future the implementation should support using several dictionaries
176 //! for one locale.
177 numdict = 0;
178 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
179 numdict = numdict + aDictIt->aLocaleNames.getLength();
181 // add dictionary information
182 aDicts = new Hunspell* [numdict];
183 aDEncs = new rtl_TextEncoding [numdict];
184 aDLocs = new Locale [numdict];
185 aDNames = new OUString [numdict];
186 k = 0;
187 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
189 if (aDictIt->aLocaleNames.getLength() > 0 &&
190 aDictIt->aLocations.getLength() > 0)
192 uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
193 sal_Int32 nLocales = aLocaleNames.getLength();
195 // currently only one language per dictionary is supported in the actual implementation...
196 // Thus here we work-around this by adding the same dictionary several times.
197 // Once for each of it's supported locales.
198 for (sal_Int32 i = 0; i < nLocales; ++i)
200 aDicts[k] = NULL;
201 aDEncs[k] = RTL_TEXTENCODING_DONTKNOW;
202 aDLocs[k] = LanguageTag( aLocaleNames[i] ).getLocale();
203 // also both files have to be in the same directory and the
204 // file names must only differ in the extension (.aff/.dic).
205 // Thus we use the first location only and strip the extension part.
206 OUString aLocation = aDictIt->aLocations[0];
207 sal_Int32 nPos = aLocation.lastIndexOf( '.' );
208 aLocation = aLocation.copy( 0, nPos );
209 aDNames[k] = aLocation;
211 ++k;
215 DBG_ASSERT( k == numdict, "index mismatch?" );
217 else
219 /* no dictionary found so register no dictionaries */
220 numdict = 0;
221 delete[] aDicts;
222 aDicts = NULL;
223 delete[] aDEncs;
224 aDEncs = NULL;
225 delete[] aDLocs;
226 aDLocs = NULL;
227 delete[] aDNames;
228 aDNames = NULL;
229 aSuppLocales.realloc(0);
233 return aSuppLocales;
237 sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& rLocale)
238 throw(RuntimeException)
240 MutexGuard aGuard( GetLinguMutex() );
242 sal_Bool bRes = sal_False;
243 if (!aSuppLocales.getLength())
244 getLocales();
246 sal_Int32 nLen = aSuppLocales.getLength();
247 for (sal_Int32 i = 0; i < nLen; ++i)
249 const Locale *pLocale = aSuppLocales.getConstArray();
250 if (rLocale == pLocale[i])
252 bRes = sal_True;
253 break;
256 return bRes;
260 sal_Int16 SpellChecker::GetSpellFailure( const OUString &rWord, const Locale &rLocale )
262 Hunspell * pMS = NULL;
263 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
265 // initialize a myspell object for each dictionary once
266 // (note: mutex is held higher up in isValid)
268 sal_Int16 nRes = -1;
270 // first handle smart quotes both single and double
271 OUStringBuffer rBuf(rWord);
272 sal_Int32 n = rBuf.getLength();
273 sal_Unicode c;
274 sal_Int32 extrachar = 0;
276 for (sal_Int32 ix=0; ix < n; ix++)
278 c = rBuf[ix];
279 if ((c == 0x201C) || (c == 0x201D))
280 rBuf[ix] = (sal_Unicode)0x0022;
281 else if ((c == 0x2018) || (c == 0x2019))
282 rBuf[ix] = (sal_Unicode)0x0027;
284 // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only
285 // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries
286 // set ICONV and IGNORE aff file options, if needed.)
288 else if ((c == 0x200C) || (c == 0x200D) ||
289 ((c >= 0xFB00) && (c <= 0xFB04)))
290 extrachar = 1;
292 OUString nWord(rBuf.makeStringAndClear());
294 if (n)
296 for (sal_Int32 i = 0; i < numdict; ++i)
298 pMS = NULL;
299 eEnc = RTL_TEXTENCODING_DONTKNOW;
301 if (rLocale == aDLocs[i])
303 if (!aDicts[i])
305 OUString dicpath = aDNames[i] + ".dic";
306 OUString affpath = aDNames[i] + ".aff";
307 OUString dict;
308 OUString aff;
309 osl::FileBase::getSystemPathFromFileURL(dicpath,dict);
310 osl::FileBase::getSystemPathFromFileURL(affpath,aff);
311 OString aTmpaff(OU2ENC(aff,osl_getThreadTextEncoding()));
312 OString aTmpdict(OU2ENC(dict,osl_getThreadTextEncoding()));
314 #if defined(WNT)
315 // workaround for Windows specifc problem that the
316 // path length in calls to 'fopen' is limted to somewhat
317 // about 120+ characters which will usually be exceed when
318 // using dictionaries as extensions.
319 aTmpaff = Win_GetShortPathName( aff );
320 aTmpdict = Win_GetShortPathName( dict );
321 #endif
323 aDicts[i] = new Hunspell(aTmpaff.getStr(),aTmpdict.getStr());
324 aDEncs[i] = RTL_TEXTENCODING_DONTKNOW;
325 if (aDicts[i])
326 aDEncs[i] = getTextEncodingFromCharset(aDicts[i]->get_dic_encoding());
328 pMS = aDicts[i];
329 eEnc = aDEncs[i];
332 if (pMS)
334 // we don't want to work with a default text encoding since following incorrect
335 // results may occur only for specific text and thus may be hard to notice.
336 // Thus better always make a clean exit here if the text encoding is in question.
337 // Hopefully something not working at all will raise proper attention quickly. ;-)
338 DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
339 if (eEnc == RTL_TEXTENCODING_DONTKNOW)
340 return -1;
342 OString aWrd(OU2ENC(nWord,eEnc));
343 int rVal = pMS->spell((char*)aWrd.getStr());
344 if (rVal != 1) {
345 if (extrachar && (eEnc != RTL_TEXTENCODING_UTF8)) {
346 OUStringBuffer mBuf(nWord);
347 n = mBuf.getLength();
348 for (sal_Int32 ix=n-1; ix >= 0; ix--)
350 switch (mBuf[ix]) {
351 case 0xFB00: mBuf.remove(ix, 1); mBuf.insert(ix, "ff"); break;
352 case 0xFB01: mBuf.remove(ix, 1); mBuf.insert(ix, "fi"); break;
353 case 0xFB02: mBuf.remove(ix, 1); mBuf.insert(ix, "fl"); break;
354 case 0xFB03: mBuf.remove(ix, 1); mBuf.insert(ix, "ffi"); break;
355 case 0xFB04: mBuf.remove(ix, 1); mBuf.insert(ix, "ffl"); break;
356 case 0x200C:
357 case 0x200D: mBuf.remove(ix, 1); break;
360 OUString mWord(mBuf.makeStringAndClear());
361 OString bWrd(OU2ENC(mWord, eEnc));
362 rVal = pMS->spell((char*)bWrd.getStr());
363 if (rVal == 1) return -1;
365 nRes = SpellFailure::SPELLING_ERROR;
366 } else {
367 return -1;
369 pMS = NULL;
374 return nRes;
378 sal_Bool SAL_CALL SpellChecker::isValid( const OUString& rWord, const Locale& rLocale,
379 const PropertyValues& rProperties )
380 throw(IllegalArgumentException, RuntimeException)
382 MutexGuard aGuard( GetLinguMutex() );
384 if (rLocale == Locale() || rWord.isEmpty())
385 return sal_True;
387 if (!hasLocale( rLocale ))
388 return sal_True;
390 // return sal_False to process SPELLML requests (they are longer than the header)
391 if (rWord.match(SPELLML_HEADER, 0) && (rWord.getLength() > 10)) return sal_False;
393 // Get property values to be used.
394 // These are be the default values set in the SN_LINGU_PROPERTIES
395 // PropertySet which are overridden by the supplied ones from the
396 // last argument.
397 // You'll probably like to use a simplier solution than the provided
398 // one using the PropertyHelper_Spell.
400 PropertyHelper_Spelling& rHelper = GetPropHelper();
401 rHelper.SetTmpPropVals( rProperties );
403 sal_Int16 nFailure = GetSpellFailure( rWord, rLocale );
404 if (nFailure != -1 && !rWord.match(SPELLML_HEADER, 0))
406 sal_Int16 nLang = LinguLocaleToLanguage( rLocale );
407 // postprocess result for errors that should be ignored
408 const bool bIgnoreError =
409 (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) ||
410 (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) ||
411 (!rHelper.IsSpellCapitalization() && nFailure == SpellFailure::CAPTION_ERROR);
412 if (bIgnoreError)
413 nFailure = -1;
416 return (nFailure == -1);
420 Reference< XSpellAlternatives >
421 SpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale )
423 // Retrieves the return values for the 'spell' function call in case
424 // of a misspelled word.
425 // Especially it may give a list of suggested (correct) words:
427 Reference< XSpellAlternatives > xRes;
428 // note: mutex is held by higher up by spell which covers both
430 Hunspell* pMS = NULL;
431 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
433 // first handle smart quotes (single and double)
434 OUStringBuffer rBuf(rWord);
435 sal_Int32 n = rBuf.getLength();
436 sal_Unicode c;
437 for (sal_Int32 ix=0; ix < n; ix++)
439 c = rBuf[ix];
440 if ((c == 0x201C) || (c == 0x201D))
441 rBuf[ix] = (sal_Unicode)0x0022;
442 if ((c == 0x2018) || (c == 0x2019))
443 rBuf[ix] = (sal_Unicode)0x0027;
445 OUString nWord(rBuf.makeStringAndClear());
447 if (n)
449 sal_Int16 nLang = LinguLocaleToLanguage( rLocale );
450 int numsug = 0;
452 Sequence< OUString > aStr( 0 );
453 for (int i = 0; i < numdict; i++)
455 pMS = NULL;
456 eEnc = RTL_TEXTENCODING_DONTKNOW;
458 if (rLocale == aDLocs[i])
460 pMS = aDicts[i];
461 eEnc = aDEncs[i];
464 if (pMS)
466 char ** suglst = NULL;
467 OString aWrd(OU2ENC(nWord,eEnc));
468 int count = pMS->suggest(&suglst, (const char *) aWrd.getStr());
470 if (count)
472 aStr.realloc( numsug + count );
473 OUString *pStr = aStr.getArray();
474 for (int ii=0; ii < count; ++ii)
476 OUString cvtwrd(suglst[ii],strlen(suglst[ii]),eEnc);
477 pStr[numsug + ii] = cvtwrd;
479 pMS->free_list(&suglst, count);
480 numsug += count;
485 // now return an empty alternative for no suggestions or the list of alternatives if some found
486 String aTmp(rWord);
487 xRes = SpellAlternatives::CreateSpellAlternatives( aTmp, nLang, SpellFailure::SPELLING_ERROR, aStr );
488 return xRes;
490 return xRes;
494 Reference< XSpellAlternatives > SAL_CALL SpellChecker::spell(
495 const OUString& rWord, const Locale& rLocale,
496 const PropertyValues& rProperties )
497 throw(IllegalArgumentException, RuntimeException)
499 MutexGuard aGuard( GetLinguMutex() );
501 if (rLocale == Locale() || rWord.isEmpty())
502 return NULL;
504 if (!hasLocale( rLocale ))
505 return NULL;
507 Reference< XSpellAlternatives > xAlt;
508 if (!isValid( rWord, rLocale, rProperties ))
510 xAlt = GetProposals( rWord, rLocale );
512 return xAlt;
516 Reference< XInterface > SAL_CALL SpellChecker_CreateInstance(
517 const Reference< XMultiServiceFactory > & /*rSMgr*/ )
518 throw(Exception)
521 Reference< XInterface > xService = (cppu::OWeakObject*) new SpellChecker;
522 return xService;
526 sal_Bool SAL_CALL SpellChecker::addLinguServiceEventListener(
527 const Reference< XLinguServiceEventListener >& rxLstnr )
528 throw(RuntimeException)
530 MutexGuard aGuard( GetLinguMutex() );
532 sal_Bool bRes = sal_False;
533 if (!bDisposing && rxLstnr.is())
535 bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
537 return bRes;
541 sal_Bool SAL_CALL SpellChecker::removeLinguServiceEventListener(
542 const Reference< XLinguServiceEventListener >& rxLstnr )
543 throw(RuntimeException)
545 MutexGuard aGuard( GetLinguMutex() );
547 sal_Bool bRes = sal_False;
548 if (!bDisposing && rxLstnr.is())
550 bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
552 return bRes;
556 OUString SAL_CALL SpellChecker::getServiceDisplayName( const Locale& /*rLocale*/ )
557 throw(RuntimeException)
559 MutexGuard aGuard( GetLinguMutex() );
560 return OUString( "Hunspell SpellChecker" );
564 void SAL_CALL SpellChecker::initialize( const Sequence< Any >& rArguments )
565 throw(Exception, RuntimeException)
567 MutexGuard aGuard( GetLinguMutex() );
569 if (!pPropHelper)
571 sal_Int32 nLen = rArguments.getLength();
572 if (2 == nLen)
574 Reference< XLinguProperties > xPropSet;
575 rArguments.getConstArray()[0] >>= xPropSet;
576 //rArguments.getConstArray()[1] >>= xDicList;
578 //! Pointer allows for access of the non-UNO functions.
579 //! And the reference to the UNO-functions while increasing
580 //! the ref-count and will implicitly free the memory
581 //! when the object is not longer used.
582 pPropHelper = new PropertyHelper_Spelling( (XSpellChecker *) this, xPropSet );
583 pPropHelper->AddAsPropListener(); //! after a reference is established
585 else {
586 OSL_FAIL( "wrong number of arguments in sequence" );
592 void SAL_CALL SpellChecker::dispose()
593 throw(RuntimeException)
595 MutexGuard aGuard( GetLinguMutex() );
597 if (!bDisposing)
599 bDisposing = true;
600 EventObject aEvtObj( (XSpellChecker *) this );
601 aEvtListeners.disposeAndClear( aEvtObj );
602 if (pPropHelper)
604 pPropHelper->RemoveAsPropListener();
605 delete pPropHelper;
606 pPropHelper = NULL;
612 void SAL_CALL SpellChecker::addEventListener( const Reference< XEventListener >& rxListener )
613 throw(RuntimeException)
615 MutexGuard aGuard( GetLinguMutex() );
617 if (!bDisposing && rxListener.is())
618 aEvtListeners.addInterface( rxListener );
622 void SAL_CALL SpellChecker::removeEventListener( const Reference< XEventListener >& rxListener )
623 throw(RuntimeException)
625 MutexGuard aGuard( GetLinguMutex() );
627 if (!bDisposing && rxListener.is())
628 aEvtListeners.removeInterface( rxListener );
632 ///////////////////////////////////////////////////////////////////////////
633 // Service specific part
636 OUString SAL_CALL SpellChecker::getImplementationName()
637 throw(RuntimeException)
639 MutexGuard aGuard( GetLinguMutex() );
641 return getImplementationName_Static();
645 sal_Bool SAL_CALL SpellChecker::supportsService( const OUString& ServiceName )
646 throw(RuntimeException)
648 MutexGuard aGuard( GetLinguMutex() );
650 Sequence< OUString > aSNL = getSupportedServiceNames();
651 const OUString * pArray = aSNL.getConstArray();
652 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
653 if( pArray[i] == ServiceName )
654 return sal_True;
655 return sal_False;
659 Sequence< OUString > SAL_CALL SpellChecker::getSupportedServiceNames()
660 throw(RuntimeException)
662 MutexGuard aGuard( GetLinguMutex() );
664 return getSupportedServiceNames_Static();
668 Sequence< OUString > SpellChecker::getSupportedServiceNames_Static()
669 throw()
671 MutexGuard aGuard( GetLinguMutex() );
673 Sequence< OUString > aSNS( 1 ); // auch mehr als 1 Service moeglich
674 aSNS.getArray()[0] = SN_SPELLCHECKER;
675 return aSNS;
678 void * SAL_CALL SpellChecker_getFactory( const sal_Char * pImplName,
679 XMultiServiceFactory * pServiceManager, void * )
681 void * pRet = 0;
682 if ( !SpellChecker::getImplementationName_Static().compareToAscii( pImplName ) )
684 Reference< XSingleServiceFactory > xFactory =
685 cppu::createOneInstanceFactory(
686 pServiceManager,
687 SpellChecker::getImplementationName_Static(),
688 SpellChecker_CreateInstance,
689 SpellChecker::getSupportedServiceNames_Static());
690 // acquire, because we return an interface pointer instead of a reference
691 xFactory->acquire();
692 pRet = xFactory.get();
694 return pRet;
698 ///////////////////////////////////////////////////////////////////////////
700 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */