1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <com/sun/star/uno/Reference.h>
22 #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
24 #include <com/sun/star/linguistic2/SpellFailure.hpp>
25 #include <cppuhelper/factory.hxx> // helper for factories
26 #include <com/sun/star/registry/XRegistryKey.hpp>
27 #include <tools/debug.hxx>
28 #include <osl/mutex.hxx>
30 #include <lingutil.hxx>
31 #include <hunspell.hxx>
32 #include <dictmgr.hxx>
33 #include <sspellimp.hxx>
35 #include <linguistic/lngprops.hxx>
36 #include <linguistic/spelldta.hxx>
37 #include <i18nlangtag/languagetag.hxx>
38 #include <unotools/pathoptions.hxx>
39 #include <unotools/lingucfg.hxx>
40 #include <unotools/useroptions.hxx>
41 #include <osl/file.hxx>
42 #include <rtl/ustrbuf.hxx>
43 #include <rtl/textenc.h>
51 using namespace com::sun::star
;
52 using namespace com::sun::star::beans
;
53 using namespace com::sun::star::lang
;
54 using namespace com::sun::star::uno
;
55 using namespace com::sun::star::linguistic2
;
56 using namespace linguistic
;
59 // XML-header of SPELLML queries
60 #define SPELLML_HEADER "<?xml?>"
62 ///////////////////////////////////////////////////////////////////////////
64 SpellChecker::SpellChecker() :
70 aEvtListeners(GetLinguMutex()),
76 SpellChecker::~SpellChecker()
80 for (int i
= 0; i
< numdict
; ++i
)
91 pPropHelper
->RemoveAsPropListener();
96 PropertyHelper_Spelling
& SpellChecker::GetPropHelper_Impl()
100 Reference
< XLinguProperties
> xPropSet( GetLinguProperties(), UNO_QUERY
);
102 pPropHelper
= new PropertyHelper_Spelling( (XSpellChecker
*) this, xPropSet
);
103 pPropHelper
->AddAsPropListener(); //! after a reference is established
109 Sequence
< Locale
> SAL_CALL
SpellChecker::getLocales()
110 throw(RuntimeException
)
112 MutexGuard
aGuard( GetLinguMutex() );
114 // this routine should return the locales supported by the installed
119 SvtLinguConfig aLinguCfg
;
121 // get list of extension dictionaries-to-use
122 // (or better speaking: the list of dictionaries using the
123 // new configuration entries).
124 std::list
< SvtLinguConfigDictionaryEntry
> aDics
;
125 uno::Sequence
< OUString
> aFormatList
;
126 aLinguCfg
.GetSupportedDictionaryFormatsFor( "SpellCheckers",
127 "org.openoffice.lingu.MySpellSpellChecker", aFormatList
);
128 sal_Int32 nLen
= aFormatList
.getLength();
129 for (sal_Int32 i
= 0; i
< nLen
; ++i
)
131 std::vector
< SvtLinguConfigDictionaryEntry
> aTmpDic(
132 aLinguCfg
.GetActiveDictionariesByFormat( aFormatList
[i
] ) );
133 aDics
.insert( aDics
.end(), aTmpDic
.begin(), aTmpDic
.end() );
136 //!! for compatibility with old dictionaries (the ones not using extensions
137 //!! or new configuration entries, but still using the dictionary.lst file)
138 //!! Get the list of old style spell checking dictionaries to use...
139 std::vector
< SvtLinguConfigDictionaryEntry
> aOldStyleDics(
140 GetOldStyleDics( "DICT" ) );
142 // to prefer dictionaries with configuration entries we will only
143 // use those old style dictionaries that add a language that
144 // is not yet supported by the list od new style dictionaries
145 MergeNewStyleDicsAndOldStyleDics( aDics
, aOldStyleDics
);
149 // get supported locales from the dictionaries-to-use...
151 std::set
< OUString
, lt_rtl_OUString
> aLocaleNamesSet
;
152 std::list
< SvtLinguConfigDictionaryEntry
>::const_iterator aDictIt
;
153 for (aDictIt
= aDics
.begin(); aDictIt
!= aDics
.end(); ++aDictIt
)
155 uno::Sequence
< OUString
> aLocaleNames( aDictIt
->aLocaleNames
);
156 sal_Int32 nLen2
= aLocaleNames
.getLength();
157 for (k
= 0; k
< nLen2
; ++k
)
159 aLocaleNamesSet
.insert( aLocaleNames
[k
] );
162 // ... and add them to the resulting sequence
163 aSuppLocales
.realloc( aLocaleNamesSet
.size() );
164 std::set
< OUString
, lt_rtl_OUString
>::const_iterator aItB
;
166 for (aItB
= aLocaleNamesSet
.begin(); aItB
!= aLocaleNamesSet
.end(); ++aItB
)
168 Locale
aTmp( LanguageTag( *aItB
).getLocale());
169 aSuppLocales
[k
++] = aTmp
;
172 //! For each dictionary and each locale we need a separate entry.
173 //! If this results in more than one dictionary per locale than (for now)
174 //! it is undefined which dictionary gets used.
175 //! In the future the implementation should support using several dictionaries
178 for (aDictIt
= aDics
.begin(); aDictIt
!= aDics
.end(); ++aDictIt
)
179 numdict
= numdict
+ aDictIt
->aLocaleNames
.getLength();
181 // add dictionary information
182 aDicts
= new Hunspell
* [numdict
];
183 aDEncs
= new rtl_TextEncoding
[numdict
];
184 aDLocs
= new Locale
[numdict
];
185 aDNames
= new OUString
[numdict
];
187 for (aDictIt
= aDics
.begin(); aDictIt
!= aDics
.end(); ++aDictIt
)
189 if (aDictIt
->aLocaleNames
.getLength() > 0 &&
190 aDictIt
->aLocations
.getLength() > 0)
192 uno::Sequence
< OUString
> aLocaleNames( aDictIt
->aLocaleNames
);
193 sal_Int32 nLocales
= aLocaleNames
.getLength();
195 // currently only one language per dictionary is supported in the actual implementation...
196 // Thus here we work-around this by adding the same dictionary several times.
197 // Once for each of it's supported locales.
198 for (sal_Int32 i
= 0; i
< nLocales
; ++i
)
201 aDEncs
[k
] = RTL_TEXTENCODING_DONTKNOW
;
202 aDLocs
[k
] = LanguageTag( aLocaleNames
[i
] ).getLocale();
203 // also both files have to be in the same directory and the
204 // file names must only differ in the extension (.aff/.dic).
205 // Thus we use the first location only and strip the extension part.
206 OUString aLocation
= aDictIt
->aLocations
[0];
207 sal_Int32 nPos
= aLocation
.lastIndexOf( '.' );
208 aLocation
= aLocation
.copy( 0, nPos
);
209 aDNames
[k
] = aLocation
;
215 DBG_ASSERT( k
== numdict
, "index mismatch?" );
219 /* no dictionary found so register no dictionaries */
229 aSuppLocales
.realloc(0);
237 sal_Bool SAL_CALL
SpellChecker::hasLocale(const Locale
& rLocale
)
238 throw(RuntimeException
)
240 MutexGuard
aGuard( GetLinguMutex() );
242 sal_Bool bRes
= sal_False
;
243 if (!aSuppLocales
.getLength())
246 sal_Int32 nLen
= aSuppLocales
.getLength();
247 for (sal_Int32 i
= 0; i
< nLen
; ++i
)
249 const Locale
*pLocale
= aSuppLocales
.getConstArray();
250 if (rLocale
== pLocale
[i
])
260 sal_Int16
SpellChecker::GetSpellFailure( const OUString
&rWord
, const Locale
&rLocale
)
262 Hunspell
* pMS
= NULL
;
263 rtl_TextEncoding eEnc
= RTL_TEXTENCODING_DONTKNOW
;
265 // initialize a myspell object for each dictionary once
266 // (note: mutex is held higher up in isValid)
270 // first handle smart quotes both single and double
271 OUStringBuffer
rBuf(rWord
);
272 sal_Int32 n
= rBuf
.getLength();
274 sal_Int32 extrachar
= 0;
276 for (sal_Int32 ix
=0; ix
< n
; ix
++)
279 if ((c
== 0x201C) || (c
== 0x201D))
280 rBuf
[ix
] = (sal_Unicode
)0x0022;
281 else if ((c
== 0x2018) || (c
== 0x2019))
282 rBuf
[ix
] = (sal_Unicode
)0x0027;
284 // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only
285 // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries
286 // set ICONV and IGNORE aff file options, if needed.)
288 else if ((c
== 0x200C) || (c
== 0x200D) ||
289 ((c
>= 0xFB00) && (c
<= 0xFB04)))
292 OUString
nWord(rBuf
.makeStringAndClear());
296 for (sal_Int32 i
= 0; i
< numdict
; ++i
)
299 eEnc
= RTL_TEXTENCODING_DONTKNOW
;
301 if (rLocale
== aDLocs
[i
])
305 OUString dicpath
= aDNames
[i
] + ".dic";
306 OUString affpath
= aDNames
[i
] + ".aff";
309 osl::FileBase::getSystemPathFromFileURL(dicpath
,dict
);
310 osl::FileBase::getSystemPathFromFileURL(affpath
,aff
);
311 OString
aTmpaff(OU2ENC(aff
,osl_getThreadTextEncoding()));
312 OString
aTmpdict(OU2ENC(dict
,osl_getThreadTextEncoding()));
315 // workaround for Windows specifc problem that the
316 // path length in calls to 'fopen' is limted to somewhat
317 // about 120+ characters which will usually be exceed when
318 // using dictionaries as extensions.
319 aTmpaff
= Win_GetShortPathName( aff
);
320 aTmpdict
= Win_GetShortPathName( dict
);
323 aDicts
[i
] = new Hunspell(aTmpaff
.getStr(),aTmpdict
.getStr());
324 aDEncs
[i
] = RTL_TEXTENCODING_DONTKNOW
;
326 aDEncs
[i
] = getTextEncodingFromCharset(aDicts
[i
]->get_dic_encoding());
334 // we don't want to work with a default text encoding since following incorrect
335 // results may occur only for specific text and thus may be hard to notice.
336 // Thus better always make a clean exit here if the text encoding is in question.
337 // Hopefully something not working at all will raise proper attention quickly. ;-)
338 DBG_ASSERT( eEnc
!= RTL_TEXTENCODING_DONTKNOW
, "failed to get text encoding! (maybe incorrect encoding string in file)" );
339 if (eEnc
== RTL_TEXTENCODING_DONTKNOW
)
342 OString
aWrd(OU2ENC(nWord
,eEnc
));
343 int rVal
= pMS
->spell((char*)aWrd
.getStr());
345 if (extrachar
&& (eEnc
!= RTL_TEXTENCODING_UTF8
)) {
346 OUStringBuffer
mBuf(nWord
);
347 n
= mBuf
.getLength();
348 for (sal_Int32 ix
=n
-1; ix
>= 0; ix
--)
351 case 0xFB00: mBuf
.remove(ix
, 1); mBuf
.insert(ix
, "ff"); break;
352 case 0xFB01: mBuf
.remove(ix
, 1); mBuf
.insert(ix
, "fi"); break;
353 case 0xFB02: mBuf
.remove(ix
, 1); mBuf
.insert(ix
, "fl"); break;
354 case 0xFB03: mBuf
.remove(ix
, 1); mBuf
.insert(ix
, "ffi"); break;
355 case 0xFB04: mBuf
.remove(ix
, 1); mBuf
.insert(ix
, "ffl"); break;
357 case 0x200D: mBuf
.remove(ix
, 1); break;
360 OUString
mWord(mBuf
.makeStringAndClear());
361 OString
bWrd(OU2ENC(mWord
, eEnc
));
362 rVal
= pMS
->spell((char*)bWrd
.getStr());
363 if (rVal
== 1) return -1;
365 nRes
= SpellFailure::SPELLING_ERROR
;
378 sal_Bool SAL_CALL
SpellChecker::isValid( const OUString
& rWord
, const Locale
& rLocale
,
379 const PropertyValues
& rProperties
)
380 throw(IllegalArgumentException
, RuntimeException
)
382 MutexGuard
aGuard( GetLinguMutex() );
384 if (rLocale
== Locale() || rWord
.isEmpty())
387 if (!hasLocale( rLocale
))
390 // return sal_False to process SPELLML requests (they are longer than the header)
391 if (rWord
.match(SPELLML_HEADER
, 0) && (rWord
.getLength() > 10)) return sal_False
;
393 // Get property values to be used.
394 // These are be the default values set in the SN_LINGU_PROPERTIES
395 // PropertySet which are overridden by the supplied ones from the
397 // You'll probably like to use a simplier solution than the provided
398 // one using the PropertyHelper_Spell.
400 PropertyHelper_Spelling
& rHelper
= GetPropHelper();
401 rHelper
.SetTmpPropVals( rProperties
);
403 sal_Int16 nFailure
= GetSpellFailure( rWord
, rLocale
);
404 if (nFailure
!= -1 && !rWord
.match(SPELLML_HEADER
, 0))
406 sal_Int16 nLang
= LinguLocaleToLanguage( rLocale
);
407 // postprocess result for errors that should be ignored
408 const bool bIgnoreError
=
409 (!rHelper
.IsSpellUpperCase() && IsUpper( rWord
, nLang
)) ||
410 (!rHelper
.IsSpellWithDigits() && HasDigits( rWord
)) ||
411 (!rHelper
.IsSpellCapitalization() && nFailure
== SpellFailure::CAPTION_ERROR
);
416 return (nFailure
== -1);
420 Reference
< XSpellAlternatives
>
421 SpellChecker::GetProposals( const OUString
&rWord
, const Locale
&rLocale
)
423 // Retrieves the return values for the 'spell' function call in case
424 // of a misspelled word.
425 // Especially it may give a list of suggested (correct) words:
427 Reference
< XSpellAlternatives
> xRes
;
428 // note: mutex is held by higher up by spell which covers both
430 Hunspell
* pMS
= NULL
;
431 rtl_TextEncoding eEnc
= RTL_TEXTENCODING_DONTKNOW
;
433 // first handle smart quotes (single and double)
434 OUStringBuffer
rBuf(rWord
);
435 sal_Int32 n
= rBuf
.getLength();
437 for (sal_Int32 ix
=0; ix
< n
; ix
++)
440 if ((c
== 0x201C) || (c
== 0x201D))
441 rBuf
[ix
] = (sal_Unicode
)0x0022;
442 if ((c
== 0x2018) || (c
== 0x2019))
443 rBuf
[ix
] = (sal_Unicode
)0x0027;
445 OUString
nWord(rBuf
.makeStringAndClear());
449 sal_Int16 nLang
= LinguLocaleToLanguage( rLocale
);
452 Sequence
< OUString
> aStr( 0 );
453 for (int i
= 0; i
< numdict
; i
++)
456 eEnc
= RTL_TEXTENCODING_DONTKNOW
;
458 if (rLocale
== aDLocs
[i
])
466 char ** suglst
= NULL
;
467 OString
aWrd(OU2ENC(nWord
,eEnc
));
468 int count
= pMS
->suggest(&suglst
, (const char *) aWrd
.getStr());
472 aStr
.realloc( numsug
+ count
);
473 OUString
*pStr
= aStr
.getArray();
474 for (int ii
=0; ii
< count
; ++ii
)
476 OUString
cvtwrd(suglst
[ii
],strlen(suglst
[ii
]),eEnc
);
477 pStr
[numsug
+ ii
] = cvtwrd
;
479 pMS
->free_list(&suglst
, count
);
485 // now return an empty alternative for no suggestions or the list of alternatives if some found
487 xRes
= SpellAlternatives::CreateSpellAlternatives( aTmp
, nLang
, SpellFailure::SPELLING_ERROR
, aStr
);
494 Reference
< XSpellAlternatives
> SAL_CALL
SpellChecker::spell(
495 const OUString
& rWord
, const Locale
& rLocale
,
496 const PropertyValues
& rProperties
)
497 throw(IllegalArgumentException
, RuntimeException
)
499 MutexGuard
aGuard( GetLinguMutex() );
501 if (rLocale
== Locale() || rWord
.isEmpty())
504 if (!hasLocale( rLocale
))
507 Reference
< XSpellAlternatives
> xAlt
;
508 if (!isValid( rWord
, rLocale
, rProperties
))
510 xAlt
= GetProposals( rWord
, rLocale
);
516 Reference
< XInterface
> SAL_CALL
SpellChecker_CreateInstance(
517 const Reference
< XMultiServiceFactory
> & /*rSMgr*/ )
521 Reference
< XInterface
> xService
= (cppu::OWeakObject
*) new SpellChecker
;
526 sal_Bool SAL_CALL
SpellChecker::addLinguServiceEventListener(
527 const Reference
< XLinguServiceEventListener
>& rxLstnr
)
528 throw(RuntimeException
)
530 MutexGuard
aGuard( GetLinguMutex() );
532 sal_Bool bRes
= sal_False
;
533 if (!bDisposing
&& rxLstnr
.is())
535 bRes
= GetPropHelper().addLinguServiceEventListener( rxLstnr
);
541 sal_Bool SAL_CALL
SpellChecker::removeLinguServiceEventListener(
542 const Reference
< XLinguServiceEventListener
>& rxLstnr
)
543 throw(RuntimeException
)
545 MutexGuard
aGuard( GetLinguMutex() );
547 sal_Bool bRes
= sal_False
;
548 if (!bDisposing
&& rxLstnr
.is())
550 bRes
= GetPropHelper().removeLinguServiceEventListener( rxLstnr
);
556 OUString SAL_CALL
SpellChecker::getServiceDisplayName( const Locale
& /*rLocale*/ )
557 throw(RuntimeException
)
559 MutexGuard
aGuard( GetLinguMutex() );
560 return OUString( "Hunspell SpellChecker" );
564 void SAL_CALL
SpellChecker::initialize( const Sequence
< Any
>& rArguments
)
565 throw(Exception
, RuntimeException
)
567 MutexGuard
aGuard( GetLinguMutex() );
571 sal_Int32 nLen
= rArguments
.getLength();
574 Reference
< XLinguProperties
> xPropSet
;
575 rArguments
.getConstArray()[0] >>= xPropSet
;
576 //rArguments.getConstArray()[1] >>= xDicList;
578 //! Pointer allows for access of the non-UNO functions.
579 //! And the reference to the UNO-functions while increasing
580 //! the ref-count and will implicitly free the memory
581 //! when the object is not longer used.
582 pPropHelper
= new PropertyHelper_Spelling( (XSpellChecker
*) this, xPropSet
);
583 pPropHelper
->AddAsPropListener(); //! after a reference is established
586 OSL_FAIL( "wrong number of arguments in sequence" );
592 void SAL_CALL
SpellChecker::dispose()
593 throw(RuntimeException
)
595 MutexGuard
aGuard( GetLinguMutex() );
600 EventObject
aEvtObj( (XSpellChecker
*) this );
601 aEvtListeners
.disposeAndClear( aEvtObj
);
604 pPropHelper
->RemoveAsPropListener();
612 void SAL_CALL
SpellChecker::addEventListener( const Reference
< XEventListener
>& rxListener
)
613 throw(RuntimeException
)
615 MutexGuard
aGuard( GetLinguMutex() );
617 if (!bDisposing
&& rxListener
.is())
618 aEvtListeners
.addInterface( rxListener
);
622 void SAL_CALL
SpellChecker::removeEventListener( const Reference
< XEventListener
>& rxListener
)
623 throw(RuntimeException
)
625 MutexGuard
aGuard( GetLinguMutex() );
627 if (!bDisposing
&& rxListener
.is())
628 aEvtListeners
.removeInterface( rxListener
);
632 ///////////////////////////////////////////////////////////////////////////
633 // Service specific part
636 OUString SAL_CALL
SpellChecker::getImplementationName()
637 throw(RuntimeException
)
639 MutexGuard
aGuard( GetLinguMutex() );
641 return getImplementationName_Static();
645 sal_Bool SAL_CALL
SpellChecker::supportsService( const OUString
& ServiceName
)
646 throw(RuntimeException
)
648 MutexGuard
aGuard( GetLinguMutex() );
650 Sequence
< OUString
> aSNL
= getSupportedServiceNames();
651 const OUString
* pArray
= aSNL
.getConstArray();
652 for( sal_Int32 i
= 0; i
< aSNL
.getLength(); i
++ )
653 if( pArray
[i
] == ServiceName
)
659 Sequence
< OUString
> SAL_CALL
SpellChecker::getSupportedServiceNames()
660 throw(RuntimeException
)
662 MutexGuard
aGuard( GetLinguMutex() );
664 return getSupportedServiceNames_Static();
668 Sequence
< OUString
> SpellChecker::getSupportedServiceNames_Static()
671 MutexGuard
aGuard( GetLinguMutex() );
673 Sequence
< OUString
> aSNS( 1 ); // auch mehr als 1 Service moeglich
674 aSNS
.getArray()[0] = SN_SPELLCHECKER
;
678 void * SAL_CALL
SpellChecker_getFactory( const sal_Char
* pImplName
,
679 XMultiServiceFactory
* pServiceManager
, void * )
682 if ( !SpellChecker::getImplementationName_Static().compareToAscii( pImplName
) )
684 Reference
< XSingleServiceFactory
> xFactory
=
685 cppu::createOneInstanceFactory(
687 SpellChecker::getImplementationName_Static(),
688 SpellChecker_CreateInstance
,
689 SpellChecker::getSupportedServiceNames_Static());
690 // acquire, because we return an interface pointer instead of a reference
692 pRet
= xFactory
.get();
698 ///////////////////////////////////////////////////////////////////////////
700 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */