bump product version to 4.1.6.2
[LibreOffice.git] / lingucomponent / source / languageguessing / guesslang.cxx
blobd8777c6464aa5cb657734f37123ae744f9c6410e
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <iostream>
23 #include <tools/debug.hxx>
25 #include <sal/config.h>
26 #include <cppuhelper/factory.hxx>
27 #include <cppuhelper/implementationentry.hxx>
28 #include <cppuhelper/implbase2.hxx>
29 #include <tools/string.hxx>
31 #include <simpleguesser.hxx>
32 #include <guess.hxx>
34 #include <com/sun/star/registry/XRegistryKey.hpp>
35 #include <com/sun/star/lang/XServiceInfo.hpp>
36 #include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
37 #include <unotools/pathoptions.hxx>
38 #include <unotools/localfilehelper.hxx>
39 #include <osl/thread.h>
41 #include <sal/macros.h>
43 #ifdef SYSTEM_LIBEXTTEXTCAT
44 #include <libexttextcat/textcat.h>
45 #else
46 #include <textcat.h>
47 #endif
49 using namespace ::rtl;
50 using namespace ::osl;
51 using namespace ::cppu;
52 using namespace ::com::sun::star;
53 using namespace ::com::sun::star::uno;
54 using namespace ::com::sun::star::lang;
55 using namespace ::com::sun::star::linguistic2;
57 //==================================================================================================
59 #define SERVICENAME "com.sun.star.linguistic2.LanguageGuessing"
60 #define IMPLNAME "com.sun.star.lingu2.LanguageGuessing"
62 static Sequence< OUString > getSupportedServiceNames_LangGuess_Impl()
64 Sequence<OUString> names(1);
65 names[0] = SERVICENAME;
66 return names;
69 static OUString getImplementationName_LangGuess_Impl()
71 return OUString( IMPLNAME );
74 static osl::Mutex & GetLangGuessMutex()
76 static osl::Mutex aMutex;
77 return aMutex;
81 class LangGuess_Impl :
82 public ::cppu::WeakImplHelper2<
83 XLanguageGuessing,
84 XServiceInfo >
86 SimpleGuesser m_aGuesser;
87 bool m_bInitialized;
88 css::uno::Reference< css::uno::XComponentContext > m_xContext;
90 LangGuess_Impl( const LangGuess_Impl & ); // not defined
91 LangGuess_Impl & operator =( const LangGuess_Impl & ); // not defined
93 virtual ~LangGuess_Impl() {}
94 void EnsureInitialized();
96 public:
97 explicit LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext);
99 // XServiceInfo implementation
100 virtual OUString SAL_CALL getImplementationName( ) throw(RuntimeException);
101 virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) throw(RuntimeException);
102 virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) throw(RuntimeException);
103 static Sequence< OUString > SAL_CALL getSupportedServiceNames_Static( );
105 // XLanguageGuessing implementation
106 virtual ::com::sun::star::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
107 virtual void SAL_CALL disableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
108 virtual void SAL_CALL enableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
109 virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getAvailableLanguages( ) throw (::com::sun::star::uno::RuntimeException);
110 virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getEnabledLanguages( ) throw (::com::sun::star::uno::RuntimeException);
111 virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getDisabledLanguages( ) throw (::com::sun::star::uno::RuntimeException);
113 // implementation specific
114 void SetFingerPrintsDB( const OUString &fileName ) throw (RuntimeException);
116 static const OUString & SAL_CALL getImplementationName_Static() throw();
120 //*************************************************************************
122 LangGuess_Impl::LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext) :
123 m_bInitialized( false ),
124 m_xContext( rxContext )
128 //*************************************************************************
130 void LangGuess_Impl::EnsureInitialized()
132 if (!m_bInitialized)
134 // set this to true at the very start to prevent loops because of
135 // implicitly called functions below
136 m_bInitialized = true;
138 // set default fingerprint path to where those get installed
139 OUString aPhysPath;
140 OUString aURL( SvtPathOptions().GetFingerprintPath() );
141 utl::LocalFileHelper::ConvertURLToPhysicalName( aURL, aPhysPath );
142 #ifdef WNT
143 aPhysPath = aPhysPath + OUString(static_cast<sal_Unicode>('\\'));
144 #else
145 aPhysPath = aPhysPath + OUString(static_cast<sal_Unicode>('/'));
146 #endif
148 SetFingerPrintsDB( aPhysPath );
150 #if !defined(EXTTEXTCAT_VERSION_MAJOR)
152 // disable currently not functional languages...
154 struct LangCountry
156 const char *pLang;
157 const char *pCountry;
159 LangCountry aDisable[] =
161 // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0
162 // which is the first with EXTTEXTCAT_VERSION_MAJOR defined
164 {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""},
165 {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""},
166 {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""}
168 sal_Int32 nNum = SAL_N_ELEMENTS(aDisable);
169 Sequence< Locale > aDisableSeq( nNum );
170 Locale *pDisableSeq = aDisableSeq.getArray();
171 for (sal_Int32 i = 0; i < nNum; ++i)
173 Locale aLocale;
174 aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
175 aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry );
176 pDisableSeq[i] = aLocale;
178 disableLanguages( aDisableSeq );
179 DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
180 #endif
184 //*************************************************************************
186 Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
187 const OUString& rText,
188 ::sal_Int32 nStartPos,
189 ::sal_Int32 nLen )
190 throw (lang::IllegalArgumentException, uno::RuntimeException)
192 osl::MutexGuard aGuard( GetLangGuessMutex() );
194 EnsureInitialized();
196 lang::Locale aRes;
197 if (nStartPos >=0 && nLen >= 0 && nStartPos + nLen <= rText.getLength())
199 OString o( OUStringToOString( rText.copy(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
200 Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr());
201 aRes.Language = OUString::createFromAscii( g.GetLanguage().c_str() );
202 aRes.Country = OUString::createFromAscii( g.GetCountry().c_str() );
204 else
205 throw lang::IllegalArgumentException();
207 return aRes;
210 //*************************************************************************
211 #define DEFAULT_CONF_FILE_NAME "fpdb.conf"
213 void LangGuess_Impl::SetFingerPrintsDB(
214 const OUString &filePath )
215 throw (RuntimeException)
217 //! text encoding for file name / path needs to be in the same encoding the OS uses
218 OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
219 OString conf_file_name( DEFAULT_CONF_FILE_NAME );
220 OString conf_file_path(path);
221 conf_file_path += conf_file_name;
223 m_aGuesser.SetDBPath((const char*)conf_file_path.getStr(), (const char*)path.getStr());
226 //*************************************************************************
227 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( )
228 throw (uno::RuntimeException)
230 osl::MutexGuard aGuard( GetLangGuessMutex() );
232 EnsureInitialized();
234 Sequence< com::sun::star::lang::Locale > aRes;
235 vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
236 aRes.realloc(gs.size());
238 com::sun::star::lang::Locale *pRes = aRes.getArray();
240 for(size_t i = 0; i < gs.size() ; i++ ){
241 com::sun::star::lang::Locale current_aRes;
242 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
243 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
244 pRes[i] = current_aRes;
247 return aRes;
250 //*************************************************************************
251 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( )
252 throw (uno::RuntimeException)
254 osl::MutexGuard aGuard( GetLangGuessMutex() );
256 EnsureInitialized();
258 Sequence< com::sun::star::lang::Locale > aRes;
259 vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
260 aRes.realloc(gs.size());
262 com::sun::star::lang::Locale *pRes = aRes.getArray();
264 for(size_t i = 0; i < gs.size() ; i++ ){
265 com::sun::star::lang::Locale current_aRes;
266 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
267 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
268 pRes[i] = current_aRes;
271 return aRes;
274 //*************************************************************************
275 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( )
276 throw (uno::RuntimeException)
278 osl::MutexGuard aGuard( GetLangGuessMutex() );
280 EnsureInitialized();
282 Sequence< com::sun::star::lang::Locale > aRes;
283 vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
284 aRes.realloc(gs.size());
286 com::sun::star::lang::Locale *pRes = aRes.getArray();
288 for(size_t i = 0; i < gs.size() ; i++ ){
289 com::sun::star::lang::Locale current_aRes;
290 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
291 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
292 pRes[i] = current_aRes;
295 return aRes;
298 //*************************************************************************
299 void SAL_CALL LangGuess_Impl::disableLanguages(
300 const uno::Sequence< Locale >& rLanguages )
301 throw (lang::IllegalArgumentException, uno::RuntimeException)
303 osl::MutexGuard aGuard( GetLangGuessMutex() );
305 EnsureInitialized();
307 sal_Int32 nLanguages = rLanguages.getLength();
308 const Locale *pLanguages = rLanguages.getConstArray();
310 for (sal_Int32 i = 0; i < nLanguages; ++i)
312 string language;
314 OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
315 OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
317 language += l.getStr();
318 language += "-";
319 language += c.getStr();
320 m_aGuesser.DisableLanguage(language);
324 //*************************************************************************
325 void SAL_CALL LangGuess_Impl::enableLanguages(
326 const uno::Sequence< Locale >& rLanguages )
327 throw (lang::IllegalArgumentException, uno::RuntimeException)
329 osl::MutexGuard aGuard( GetLangGuessMutex() );
331 EnsureInitialized();
333 sal_Int32 nLanguages = rLanguages.getLength();
334 const Locale *pLanguages = rLanguages.getConstArray();
336 for (sal_Int32 i = 0; i < nLanguages; ++i)
338 string language;
340 OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
341 OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
343 language += l.getStr();
344 language += "-";
345 language += c.getStr();
346 m_aGuesser.EnableLanguage(language);
350 //*************************************************************************
351 OUString SAL_CALL LangGuess_Impl::getImplementationName( )
352 throw(RuntimeException)
354 osl::MutexGuard aGuard( GetLangGuessMutex() );
355 return OUString( IMPLNAME );
358 //*************************************************************************
359 sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
360 throw(RuntimeException)
362 osl::MutexGuard aGuard( GetLangGuessMutex() );
363 Sequence< OUString > aSNL = getSupportedServiceNames();
364 const OUString * pArray = aSNL.getArray();
365 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
366 if( pArray[i] == ServiceName )
367 return sal_True;
368 return sal_False;
371 //*************************************************************************
372 Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( )
373 throw(RuntimeException)
375 osl::MutexGuard aGuard( GetLangGuessMutex() );
376 return getSupportedServiceNames_Static();
379 //*************************************************************************
380 Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames_Static( )
382 OUString aName( SERVICENAME );
383 return Sequence< OUString >( &aName, 1 );
386 //*************************************************************************
390 * Function to create a new component instance; is needed by factory helper implementation.
391 * @param xMgr service manager to if the components needs other component instances
393 Reference< XInterface > SAL_CALL LangGuess_Impl_create(
394 Reference< XComponentContext > const & xContext )
395 SAL_THROW(())
397 return static_cast< ::cppu::OWeakObject * >( new LangGuess_Impl(xContext) );
400 //##################################################################################################
401 //#### EXPORTED ### functions to allow for registration and creation of the UNO component
402 //##################################################################################################
404 static struct ::cppu::ImplementationEntry s_component_entries [] =
407 LangGuess_Impl_create, getImplementationName_LangGuess_Impl,
408 getSupportedServiceNames_LangGuess_Impl,
409 ::cppu::createSingleComponentFactory,
410 0, 0
412 { 0, 0, 0, 0, 0, 0 }
415 extern "C"
418 SAL_DLLPUBLIC_EXPORT void * SAL_CALL guesslang_component_getFactory(
419 sal_Char const * implName, lang::XMultiServiceFactory * xMgr,
420 registry::XRegistryKey * xRegistry )
422 return ::cppu::component_getFactoryHelper(
423 implName, xMgr, xRegistry, s_component_entries );
428 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */