bump product version to 4.2.0.1
[LibreOffice.git] / lingucomponent / source / spellcheck / spell / sspellimp.cxx
blobe3a79df0fe6648615cbc98de559192740d92989c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <com/sun/star/uno/Reference.h>
22 #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
24 #include <com/sun/star/linguistic2/SpellFailure.hpp>
25 #include <cppuhelper/factory.hxx>
26 #include <cppuhelper/supportsservice.hxx>
27 #include <com/sun/star/registry/XRegistryKey.hpp>
28 #include <tools/debug.hxx>
29 #include <osl/mutex.hxx>
31 #include <lingutil.hxx>
32 #include <hunspell.hxx>
33 #include <dictmgr.hxx>
34 #include <sspellimp.hxx>
36 #include <linguistic/lngprops.hxx>
37 #include <linguistic/spelldta.hxx>
38 #include <i18nlangtag/languagetag.hxx>
39 #include <unotools/pathoptions.hxx>
40 #include <unotools/lingucfg.hxx>
41 #include <unotools/useroptions.hxx>
42 #include <osl/file.hxx>
43 #include <rtl/ustrbuf.hxx>
44 #include <rtl/textenc.h>
46 #include <list>
47 #include <set>
48 #include <string.h>
50 using namespace utl;
51 using namespace osl;
52 using namespace com::sun::star;
53 using namespace com::sun::star::beans;
54 using namespace com::sun::star::lang;
55 using namespace com::sun::star::uno;
56 using namespace com::sun::star::linguistic2;
57 using namespace linguistic;
60 // XML-header of SPELLML queries
61 #define SPELLML_HEADER "<?xml?>"
63 ///////////////////////////////////////////////////////////////////////////
65 SpellChecker::SpellChecker() :
66 aDicts(NULL),
67 aDEncs(NULL),
68 aDLocs(NULL),
69 aDNames(NULL),
70 numdict(0),
71 aEvtListeners(GetLinguMutex()),
72 pPropHelper(NULL),
73 bDisposing(false)
77 SpellChecker::~SpellChecker()
79 if (aDicts)
81 for (int i = 0; i < numdict; ++i)
83 delete aDicts[i];
85 delete[] aDicts;
87 delete[] aDEncs;
88 delete[] aDLocs;
89 delete[] aDNames;
90 if (pPropHelper)
92 pPropHelper->RemoveAsPropListener();
93 delete pPropHelper;
97 PropertyHelper_Spelling & SpellChecker::GetPropHelper_Impl()
99 if (!pPropHelper)
101 Reference< XLinguProperties > xPropSet( GetLinguProperties(), UNO_QUERY );
103 pPropHelper = new PropertyHelper_Spelling( (XSpellChecker *) this, xPropSet );
104 pPropHelper->AddAsPropListener(); //! after a reference is established
106 return *pPropHelper;
110 Sequence< Locale > SAL_CALL SpellChecker::getLocales()
111 throw(RuntimeException)
113 MutexGuard aGuard( GetLinguMutex() );
115 // this routine should return the locales supported by the installed
116 // dictionaries.
118 if (!numdict)
120 SvtLinguConfig aLinguCfg;
122 // get list of extension dictionaries-to-use
123 // (or better speaking: the list of dictionaries using the
124 // new configuration entries).
125 std::list< SvtLinguConfigDictionaryEntry > aDics;
126 uno::Sequence< OUString > aFormatList;
127 aLinguCfg.GetSupportedDictionaryFormatsFor( "SpellCheckers",
128 "org.openoffice.lingu.MySpellSpellChecker", aFormatList );
129 sal_Int32 nLen = aFormatList.getLength();
130 for (sal_Int32 i = 0; i < nLen; ++i)
132 std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
133 aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
134 aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
137 //!! for compatibility with old dictionaries (the ones not using extensions
138 //!! or new configuration entries, but still using the dictionary.lst file)
139 //!! Get the list of old style spell checking dictionaries to use...
140 std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
141 GetOldStyleDics( "DICT" ) );
143 // to prefer dictionaries with configuration entries we will only
144 // use those old style dictionaries that add a language that
145 // is not yet supported by the list od new style dictionaries
146 MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
148 if (!aDics.empty())
150 // get supported locales from the dictionaries-to-use...
151 sal_Int32 k = 0;
152 std::set< OUString, lt_rtl_OUString > aLocaleNamesSet;
153 std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
154 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
156 uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
157 sal_Int32 nLen2 = aLocaleNames.getLength();
158 for (k = 0; k < nLen2; ++k)
160 aLocaleNamesSet.insert( aLocaleNames[k] );
163 // ... and add them to the resulting sequence
164 aSuppLocales.realloc( aLocaleNamesSet.size() );
165 std::set< OUString, lt_rtl_OUString >::const_iterator aItB;
166 k = 0;
167 for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB)
169 Locale aTmp( LanguageTag::convertToLocale( *aItB ));
170 aSuppLocales[k++] = aTmp;
173 //! For each dictionary and each locale we need a separate entry.
174 //! If this results in more than one dictionary per locale than (for now)
175 //! it is undefined which dictionary gets used.
176 //! In the future the implementation should support using several dictionaries
177 //! for one locale.
178 numdict = 0;
179 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
180 numdict = numdict + aDictIt->aLocaleNames.getLength();
182 // add dictionary information
183 aDicts = new Hunspell* [numdict];
184 aDEncs = new rtl_TextEncoding [numdict];
185 aDLocs = new Locale [numdict];
186 aDNames = new OUString [numdict];
187 k = 0;
188 for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
190 if (aDictIt->aLocaleNames.getLength() > 0 &&
191 aDictIt->aLocations.getLength() > 0)
193 uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
194 sal_Int32 nLocales = aLocaleNames.getLength();
196 // currently only one language per dictionary is supported in the actual implementation...
197 // Thus here we work-around this by adding the same dictionary several times.
198 // Once for each of it's supported locales.
199 for (sal_Int32 i = 0; i < nLocales; ++i)
201 aDicts[k] = NULL;
202 aDEncs[k] = RTL_TEXTENCODING_DONTKNOW;
203 aDLocs[k] = LanguageTag::convertToLocale( aLocaleNames[i] );
204 // also both files have to be in the same directory and the
205 // file names must only differ in the extension (.aff/.dic).
206 // Thus we use the first location only and strip the extension part.
207 OUString aLocation = aDictIt->aLocations[0];
208 sal_Int32 nPos = aLocation.lastIndexOf( '.' );
209 aLocation = aLocation.copy( 0, nPos );
210 aDNames[k] = aLocation;
212 ++k;
216 DBG_ASSERT( k == numdict, "index mismatch?" );
218 else
220 /* no dictionary found so register no dictionaries */
221 numdict = 0;
222 delete[] aDicts;
223 aDicts = NULL;
224 delete[] aDEncs;
225 aDEncs = NULL;
226 delete[] aDLocs;
227 aDLocs = NULL;
228 delete[] aDNames;
229 aDNames = NULL;
230 aSuppLocales.realloc(0);
234 return aSuppLocales;
238 sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& rLocale)
239 throw(RuntimeException)
241 MutexGuard aGuard( GetLinguMutex() );
243 sal_Bool bRes = sal_False;
244 if (!aSuppLocales.getLength())
245 getLocales();
247 const Locale *pLocale = aSuppLocales.getConstArray();
248 sal_Int32 nLen = aSuppLocales.getLength();
249 for (sal_Int32 i = 0; i < nLen; ++i)
251 if (rLocale == pLocale[i])
253 bRes = sal_True;
254 break;
257 return bRes;
261 sal_Int16 SpellChecker::GetSpellFailure( const OUString &rWord, const Locale &rLocale )
263 Hunspell * pMS = NULL;
264 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
266 // initialize a myspell object for each dictionary once
267 // (note: mutex is held higher up in isValid)
269 sal_Int16 nRes = -1;
271 // first handle smart quotes both single and double
272 OUStringBuffer rBuf(rWord);
273 sal_Int32 n = rBuf.getLength();
274 sal_Unicode c;
275 sal_Int32 extrachar = 0;
277 for (sal_Int32 ix=0; ix < n; ix++)
279 c = rBuf[ix];
280 if ((c == 0x201C) || (c == 0x201D))
281 rBuf[ix] = (sal_Unicode)0x0022;
282 else if ((c == 0x2018) || (c == 0x2019))
283 rBuf[ix] = (sal_Unicode)0x0027;
285 // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only
286 // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries
287 // set ICONV and IGNORE aff file options, if needed.)
289 else if ((c == 0x200C) || (c == 0x200D) ||
290 ((c >= 0xFB00) && (c <= 0xFB04)))
291 extrachar = 1;
293 OUString nWord(rBuf.makeStringAndClear());
295 if (n)
297 for (sal_Int32 i = 0; i < numdict; ++i)
299 pMS = NULL;
300 eEnc = RTL_TEXTENCODING_DONTKNOW;
302 if (rLocale == aDLocs[i])
304 if (!aDicts[i])
306 OUString dicpath = aDNames[i] + ".dic";
307 OUString affpath = aDNames[i] + ".aff";
308 OUString dict;
309 OUString aff;
310 osl::FileBase::getSystemPathFromFileURL(dicpath,dict);
311 osl::FileBase::getSystemPathFromFileURL(affpath,aff);
312 OString aTmpaff(OU2ENC(aff,osl_getThreadTextEncoding()));
313 OString aTmpdict(OU2ENC(dict,osl_getThreadTextEncoding()));
315 #if defined(WNT)
316 // workaround for Windows specifc problem that the
317 // path length in calls to 'fopen' is limted to somewhat
318 // about 120+ characters which will usually be exceed when
319 // using dictionaries as extensions.
320 aTmpaff = Win_GetShortPathName( aff );
321 aTmpdict = Win_GetShortPathName( dict );
322 #endif
324 aDicts[i] = new Hunspell(aTmpaff.getStr(),aTmpdict.getStr());
325 aDEncs[i] = RTL_TEXTENCODING_DONTKNOW;
326 if (aDicts[i])
327 aDEncs[i] = getTextEncodingFromCharset(aDicts[i]->get_dic_encoding());
329 pMS = aDicts[i];
330 eEnc = aDEncs[i];
333 if (pMS)
335 // we don't want to work with a default text encoding since following incorrect
336 // results may occur only for specific text and thus may be hard to notice.
337 // Thus better always make a clean exit here if the text encoding is in question.
338 // Hopefully something not working at all will raise proper attention quickly. ;-)
339 DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
340 if (eEnc == RTL_TEXTENCODING_DONTKNOW)
341 return -1;
343 OString aWrd(OU2ENC(nWord,eEnc));
344 int rVal = pMS->spell((char*)aWrd.getStr());
345 if (rVal != 1) {
346 if (extrachar && (eEnc != RTL_TEXTENCODING_UTF8)) {
347 OUStringBuffer mBuf(nWord);
348 n = mBuf.getLength();
349 for (sal_Int32 ix=n-1; ix >= 0; ix--)
351 switch (mBuf[ix]) {
352 case 0xFB00: mBuf.remove(ix, 1); mBuf.insert(ix, "ff"); break;
353 case 0xFB01: mBuf.remove(ix, 1); mBuf.insert(ix, "fi"); break;
354 case 0xFB02: mBuf.remove(ix, 1); mBuf.insert(ix, "fl"); break;
355 case 0xFB03: mBuf.remove(ix, 1); mBuf.insert(ix, "ffi"); break;
356 case 0xFB04: mBuf.remove(ix, 1); mBuf.insert(ix, "ffl"); break;
357 case 0x200C:
358 case 0x200D: mBuf.remove(ix, 1); break;
361 OUString mWord(mBuf.makeStringAndClear());
362 OString bWrd(OU2ENC(mWord, eEnc));
363 rVal = pMS->spell((char*)bWrd.getStr());
364 if (rVal == 1) return -1;
366 nRes = SpellFailure::SPELLING_ERROR;
367 } else {
368 return -1;
370 pMS = NULL;
375 return nRes;
379 sal_Bool SAL_CALL SpellChecker::isValid( const OUString& rWord, const Locale& rLocale,
380 const PropertyValues& rProperties )
381 throw(IllegalArgumentException, RuntimeException)
383 MutexGuard aGuard( GetLinguMutex() );
385 if (rLocale == Locale() || rWord.isEmpty())
386 return sal_True;
388 if (!hasLocale( rLocale ))
389 return sal_True;
391 // return sal_False to process SPELLML requests (they are longer than the header)
392 if (rWord.match(SPELLML_HEADER, 0) && (rWord.getLength() > 10)) return sal_False;
394 // Get property values to be used.
395 // These are be the default values set in the SN_LINGU_PROPERTIES
396 // PropertySet which are overridden by the supplied ones from the
397 // last argument.
398 // You'll probably like to use a simplier solution than the provided
399 // one using the PropertyHelper_Spell.
401 PropertyHelper_Spelling& rHelper = GetPropHelper();
402 rHelper.SetTmpPropVals( rProperties );
404 sal_Int16 nFailure = GetSpellFailure( rWord, rLocale );
405 if (nFailure != -1 && !rWord.match(SPELLML_HEADER, 0))
407 sal_Int16 nLang = LinguLocaleToLanguage( rLocale );
408 // postprocess result for errors that should be ignored
409 const bool bIgnoreError =
410 (!rHelper.IsSpellUpperCase() && IsUpper( rWord, nLang )) ||
411 (!rHelper.IsSpellWithDigits() && HasDigits( rWord )) ||
412 (!rHelper.IsSpellCapitalization() && nFailure == SpellFailure::CAPTION_ERROR);
413 if (bIgnoreError)
414 nFailure = -1;
417 return (nFailure == -1);
421 Reference< XSpellAlternatives >
422 SpellChecker::GetProposals( const OUString &rWord, const Locale &rLocale )
424 // Retrieves the return values for the 'spell' function call in case
425 // of a misspelled word.
426 // Especially it may give a list of suggested (correct) words:
428 Reference< XSpellAlternatives > xRes;
429 // note: mutex is held by higher up by spell which covers both
431 Hunspell* pMS = NULL;
432 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
434 // first handle smart quotes (single and double)
435 OUStringBuffer rBuf(rWord);
436 sal_Int32 n = rBuf.getLength();
437 sal_Unicode c;
438 for (sal_Int32 ix=0; ix < n; ix++)
440 c = rBuf[ix];
441 if ((c == 0x201C) || (c == 0x201D))
442 rBuf[ix] = (sal_Unicode)0x0022;
443 if ((c == 0x2018) || (c == 0x2019))
444 rBuf[ix] = (sal_Unicode)0x0027;
446 OUString nWord(rBuf.makeStringAndClear());
448 if (n)
450 sal_Int16 nLang = LinguLocaleToLanguage( rLocale );
451 int numsug = 0;
453 Sequence< OUString > aStr( 0 );
454 for (int i = 0; i < numdict; i++)
456 pMS = NULL;
457 eEnc = RTL_TEXTENCODING_DONTKNOW;
459 if (rLocale == aDLocs[i])
461 pMS = aDicts[i];
462 eEnc = aDEncs[i];
465 if (pMS)
467 char ** suglst = NULL;
468 OString aWrd(OU2ENC(nWord,eEnc));
469 int count = pMS->suggest(&suglst, (const char *) aWrd.getStr());
471 if (count)
473 aStr.realloc( numsug + count );
474 OUString *pStr = aStr.getArray();
475 for (int ii=0; ii < count; ++ii)
477 OUString cvtwrd(suglst[ii],strlen(suglst[ii]),eEnc);
478 pStr[numsug + ii] = cvtwrd;
480 numsug += count;
483 pMS->free_list(&suglst, count);
487 // now return an empty alternative for no suggestions or the list of alternatives if some found
488 OUString aTmp(rWord);
489 xRes = SpellAlternatives::CreateSpellAlternatives( aTmp, nLang, SpellFailure::SPELLING_ERROR, aStr );
490 return xRes;
492 return xRes;
496 Reference< XSpellAlternatives > SAL_CALL SpellChecker::spell(
497 const OUString& rWord, const Locale& rLocale,
498 const PropertyValues& rProperties )
499 throw(IllegalArgumentException, RuntimeException)
501 MutexGuard aGuard( GetLinguMutex() );
503 if (rLocale == Locale() || rWord.isEmpty())
504 return NULL;
506 if (!hasLocale( rLocale ))
507 return NULL;
509 Reference< XSpellAlternatives > xAlt;
510 if (!isValid( rWord, rLocale, rProperties ))
512 xAlt = GetProposals( rWord, rLocale );
514 return xAlt;
518 Reference< XInterface > SAL_CALL SpellChecker_CreateInstance(
519 const Reference< XMultiServiceFactory > & /*rSMgr*/ )
520 throw(Exception)
523 Reference< XInterface > xService = (cppu::OWeakObject*) new SpellChecker;
524 return xService;
528 sal_Bool SAL_CALL SpellChecker::addLinguServiceEventListener(
529 const Reference< XLinguServiceEventListener >& rxLstnr )
530 throw(RuntimeException)
532 MutexGuard aGuard( GetLinguMutex() );
534 sal_Bool bRes = sal_False;
535 if (!bDisposing && rxLstnr.is())
537 bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
539 return bRes;
543 sal_Bool SAL_CALL SpellChecker::removeLinguServiceEventListener(
544 const Reference< XLinguServiceEventListener >& rxLstnr )
545 throw(RuntimeException)
547 MutexGuard aGuard( GetLinguMutex() );
549 sal_Bool bRes = sal_False;
550 if (!bDisposing && rxLstnr.is())
552 bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
554 return bRes;
558 OUString SAL_CALL SpellChecker::getServiceDisplayName( const Locale& /*rLocale*/ )
559 throw(RuntimeException)
561 MutexGuard aGuard( GetLinguMutex() );
562 return OUString( "Hunspell SpellChecker" );
566 void SAL_CALL SpellChecker::initialize( const Sequence< Any >& rArguments )
567 throw(Exception, RuntimeException)
569 MutexGuard aGuard( GetLinguMutex() );
571 if (!pPropHelper)
573 sal_Int32 nLen = rArguments.getLength();
574 if (2 == nLen)
576 Reference< XLinguProperties > xPropSet;
577 rArguments.getConstArray()[0] >>= xPropSet;
578 //rArguments.getConstArray()[1] >>= xDicList;
580 //! Pointer allows for access of the non-UNO functions.
581 //! And the reference to the UNO-functions while increasing
582 //! the ref-count and will implicitly free the memory
583 //! when the object is not longer used.
584 pPropHelper = new PropertyHelper_Spelling( (XSpellChecker *) this, xPropSet );
585 pPropHelper->AddAsPropListener(); //! after a reference is established
587 else {
588 OSL_FAIL( "wrong number of arguments in sequence" );
594 void SAL_CALL SpellChecker::dispose()
595 throw(RuntimeException)
597 MutexGuard aGuard( GetLinguMutex() );
599 if (!bDisposing)
601 bDisposing = true;
602 EventObject aEvtObj( (XSpellChecker *) this );
603 aEvtListeners.disposeAndClear( aEvtObj );
604 if (pPropHelper)
606 pPropHelper->RemoveAsPropListener();
607 delete pPropHelper;
608 pPropHelper = NULL;
614 void SAL_CALL SpellChecker::addEventListener( const Reference< XEventListener >& rxListener )
615 throw(RuntimeException)
617 MutexGuard aGuard( GetLinguMutex() );
619 if (!bDisposing && rxListener.is())
620 aEvtListeners.addInterface( rxListener );
624 void SAL_CALL SpellChecker::removeEventListener( const Reference< XEventListener >& rxListener )
625 throw(RuntimeException)
627 MutexGuard aGuard( GetLinguMutex() );
629 if (!bDisposing && rxListener.is())
630 aEvtListeners.removeInterface( rxListener );
634 // Service specific part
635 OUString SAL_CALL SpellChecker::getImplementationName()
636 throw(RuntimeException)
638 MutexGuard aGuard( GetLinguMutex() );
640 return getImplementationName_Static();
643 sal_Bool SAL_CALL SpellChecker::supportsService( const OUString& ServiceName )
644 throw(RuntimeException)
646 return cppu::supportsService(this, ServiceName);
649 Sequence< OUString > SAL_CALL SpellChecker::getSupportedServiceNames()
650 throw(RuntimeException)
652 MutexGuard aGuard( GetLinguMutex() );
654 return getSupportedServiceNames_Static();
657 Sequence< OUString > SpellChecker::getSupportedServiceNames_Static()
658 throw()
660 MutexGuard aGuard( GetLinguMutex() );
662 Sequence< OUString > aSNS( 1 ); // auch mehr als 1 Service moeglich
663 aSNS.getArray()[0] = SN_SPELLCHECKER;
664 return aSNS;
667 void * SAL_CALL SpellChecker_getFactory( const sal_Char * pImplName,
668 XMultiServiceFactory * pServiceManager, void * )
670 void * pRet = 0;
671 if ( SpellChecker::getImplementationName_Static().equalsAscii( pImplName ) )
673 Reference< XSingleServiceFactory > xFactory =
674 cppu::createOneInstanceFactory(
675 pServiceManager,
676 SpellChecker::getImplementationName_Static(),
677 SpellChecker_CreateInstance,
678 SpellChecker::getSupportedServiceNames_Static());
679 // acquire, because we return an interface pointer instead of a reference
680 xFactory->acquire();
681 pRet = xFactory.get();
683 return pRet;
687 ///////////////////////////////////////////////////////////////////////////
689 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */