bump product version to 6.3.0.0.beta1
[LibreOffice.git] / lingucomponent / source / languageguessing / guesslang.cxx
blob8e2fc213d089d0d6a0036648e7e72efcb732391f
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <iostream>
22 #include <osl/file.hxx>
23 #include <tools/debug.hxx>
25 #include <sal/config.h>
26 #include <cppuhelper/factory.hxx>
27 #include <cppuhelper/implementationentry.hxx>
28 #include <cppuhelper/implbase.hxx>
29 #include <cppuhelper/supportsservice.hxx>
31 #include "simpleguesser.hxx"
32 #include "guess.hxx"
34 #include <com/sun/star/lang/IllegalArgumentException.hpp>
35 #include <com/sun/star/lang/XServiceInfo.hpp>
36 #include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
37 #include <unotools/pathoptions.hxx>
38 #include <osl/thread.h>
40 #include <sal/macros.h>
42 #ifdef SYSTEM_LIBEXTTEXTCAT
43 #include <libexttextcat/textcat.h>
44 #else
45 #include <textcat.h>
46 #endif
48 using namespace ::osl;
49 using namespace ::cppu;
50 using namespace ::com::sun::star;
51 using namespace ::com::sun::star::uno;
52 using namespace ::com::sun::star::lang;
53 using namespace ::com::sun::star::linguistic2;
55 #define SERVICENAME "com.sun.star.linguistic2.LanguageGuessing"
56 #define IMPLNAME "com.sun.star.lingu2.LanguageGuessing"
58 static Sequence< OUString > getSupportedServiceNames_LangGuess_Impl()
60 Sequence<OUString> names { SERVICENAME };
61 return names;
64 static OUString getImplementationName_LangGuess_Impl()
66 return OUString( IMPLNAME );
69 static osl::Mutex & GetLangGuessMutex()
71 static osl::Mutex aMutex;
72 return aMutex;
75 class LangGuess_Impl :
76 public ::cppu::WeakImplHelper<
77 XLanguageGuessing,
78 XServiceInfo >
80 SimpleGuesser m_aGuesser;
81 bool m_bInitialized;
83 virtual ~LangGuess_Impl() override {}
84 void EnsureInitialized();
86 public:
87 LangGuess_Impl();
88 LangGuess_Impl(const LangGuess_Impl&) = delete;
89 LangGuess_Impl& operator=(const LangGuess_Impl&) = delete;
91 // XServiceInfo implementation
92 virtual OUString SAL_CALL getImplementationName( ) override;
93 virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override;
94 virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) override;
95 static Sequence< OUString > getSupportedServiceNames_Static( );
97 // XLanguageGuessing implementation
98 virtual css::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) override;
99 virtual void SAL_CALL disableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
100 virtual void SAL_CALL enableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
101 virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages( ) override;
102 virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages( ) override;
103 virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages( ) override;
105 // implementation specific
106 /// @throws RuntimeException
107 void SetFingerPrintsDB( const OUString &fileName );
110 LangGuess_Impl::LangGuess_Impl() :
111 m_bInitialized( false )
115 void LangGuess_Impl::EnsureInitialized()
117 if (!m_bInitialized)
119 // set this to true at the very start to prevent loops because of
120 // implicitly called functions below
121 m_bInitialized = true;
123 // set default fingerprint path to where those get installed
124 OUString aPhysPath;
125 OUString aURL( SvtPathOptions().GetFingerprintPath() );
126 osl::FileBase::getSystemPathFromFileURL( aURL, aPhysPath );
127 #ifdef _WIN32
128 aPhysPath += "\\";
129 #else
130 aPhysPath += "/";
131 #endif
133 SetFingerPrintsDB( aPhysPath );
135 #if !defined(EXTTEXTCAT_VERSION_MAJOR)
137 // disable currently not functional languages...
138 struct LangCountry
140 const char *pLang;
141 const char *pCountry;
143 LangCountry aDisable[] =
145 // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0
146 // which is the first with EXTTEXTCAT_VERSION_MAJOR defined
147 {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""},
148 {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""},
149 {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""}
151 sal_Int32 nNum = SAL_N_ELEMENTS(aDisable);
152 Sequence< Locale > aDisableSeq( nNum );
153 Locale *pDisableSeq = aDisableSeq.getArray();
154 for (sal_Int32 i = 0; i < nNum; ++i)
156 Locale aLocale;
157 aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
158 aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry );
159 pDisableSeq[i] = aLocale;
161 disableLanguages( aDisableSeq );
162 DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
163 #endif
167 Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
168 const OUString& rText,
169 ::sal_Int32 nStartPos,
170 ::sal_Int32 nLen )
172 osl::MutexGuard aGuard( GetLangGuessMutex() );
174 EnsureInitialized();
176 if (nStartPos < 0 || nLen < 0 || nStartPos + nLen > rText.getLength())
177 throw lang::IllegalArgumentException();
179 OString o( OUStringToOString( rText.copy(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
180 Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr());
181 lang::Locale aRes;
182 aRes.Language = OUString::createFromAscii( g.GetLanguage().c_str() );
183 aRes.Country = OUString::createFromAscii( g.GetCountry().c_str() );
184 return aRes;
187 #define DEFAULT_CONF_FILE_NAME "fpdb.conf"
189 void LangGuess_Impl::SetFingerPrintsDB(
190 const OUString &filePath )
192 //! text encoding for file name / path needs to be in the same encoding the OS uses
193 OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
194 OString conf_file_path(path);
195 conf_file_path += DEFAULT_CONF_FILE_NAME;
197 m_aGuesser.SetDBPath(conf_file_path.getStr(), path.getStr());
200 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( )
202 osl::MutexGuard aGuard( GetLangGuessMutex() );
204 EnsureInitialized();
206 Sequence< css::lang::Locale > aRes;
207 vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
208 aRes.realloc(gs.size());
210 css::lang::Locale *pRes = aRes.getArray();
212 for(size_t i = 0; i < gs.size() ; i++ ){
213 css::lang::Locale current_aRes;
214 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
215 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
216 pRes[i] = current_aRes;
219 return aRes;
222 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( )
224 osl::MutexGuard aGuard( GetLangGuessMutex() );
226 EnsureInitialized();
228 Sequence< css::lang::Locale > aRes;
229 vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
230 aRes.realloc(gs.size());
232 css::lang::Locale *pRes = aRes.getArray();
234 for(size_t i = 0; i < gs.size() ; i++ ){
235 css::lang::Locale current_aRes;
236 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
237 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
238 pRes[i] = current_aRes;
241 return aRes;
244 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( )
246 osl::MutexGuard aGuard( GetLangGuessMutex() );
248 EnsureInitialized();
250 Sequence< css::lang::Locale > aRes;
251 vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
252 aRes.realloc(gs.size());
254 css::lang::Locale *pRes = aRes.getArray();
256 for(size_t i = 0; i < gs.size() ; i++ ){
257 css::lang::Locale current_aRes;
258 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
259 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
260 pRes[i] = current_aRes;
263 return aRes;
266 void SAL_CALL LangGuess_Impl::disableLanguages(
267 const uno::Sequence< Locale >& rLanguages )
269 osl::MutexGuard aGuard( GetLangGuessMutex() );
271 EnsureInitialized();
273 sal_Int32 nLanguages = rLanguages.getLength();
274 const Locale *pLanguages = rLanguages.getConstArray();
276 for (sal_Int32 i = 0; i < nLanguages; ++i)
278 string language;
280 OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
281 OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
283 language += l.getStr();
284 language += "-";
285 language += c.getStr();
286 m_aGuesser.DisableLanguage(language);
290 void SAL_CALL LangGuess_Impl::enableLanguages(
291 const uno::Sequence< Locale >& rLanguages )
293 osl::MutexGuard aGuard( GetLangGuessMutex() );
295 EnsureInitialized();
297 sal_Int32 nLanguages = rLanguages.getLength();
298 const Locale *pLanguages = rLanguages.getConstArray();
300 for (sal_Int32 i = 0; i < nLanguages; ++i)
302 string language;
304 OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
305 OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
307 language += l.getStr();
308 language += "-";
309 language += c.getStr();
310 m_aGuesser.EnableLanguage(language);
314 OUString SAL_CALL LangGuess_Impl::getImplementationName( )
316 return OUString( IMPLNAME );
319 sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
321 return cppu::supportsService(this, ServiceName);
324 Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( )
326 return getSupportedServiceNames_Static();
329 Sequence<OUString> LangGuess_Impl::getSupportedServiceNames_Static( )
331 OUString aName( SERVICENAME );
332 return Sequence< OUString >( &aName, 1 );
336 * Function to create a new component instance; is needed by factory helper implementation.
337 * @param xMgr service manager to if the components needs other component instances
339 static Reference< XInterface > LangGuess_Impl_create(
340 Reference< XComponentContext > const & )
342 return static_cast< ::cppu::OWeakObject * >( new LangGuess_Impl );
345 //#### EXPORTED ### functions to allow for registration and creation of the UNO component
346 static const struct ::cppu::ImplementationEntry s_component_entries [] =
349 LangGuess_Impl_create, getImplementationName_LangGuess_Impl,
350 getSupportedServiceNames_LangGuess_Impl,
351 ::cppu::createSingleComponentFactory,
352 nullptr, 0
354 { nullptr, nullptr, nullptr, nullptr, nullptr, 0 }
357 extern "C"
360 SAL_DLLPUBLIC_EXPORT void * guesslang_component_getFactory(
361 sal_Char const * implName, void * xMgr,
362 void * xRegistry )
364 return ::cppu::component_getFactoryHelper(
365 implName, xMgr, xRegistry, s_component_entries );
370 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */