1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
22 #include <string_view>
24 #include <osl/file.hxx>
25 #include <tools/debug.hxx>
27 #include <sal/config.h>
28 #include <cppuhelper/factory.hxx>
29 #include <cppuhelper/implbase.hxx>
30 #include <cppuhelper/supportsservice.hxx>
32 #include "simpleguesser.hxx"
35 #include <com/sun/star/lang/IllegalArgumentException.hpp>
36 #include <com/sun/star/lang/XServiceInfo.hpp>
37 #include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
38 #include <unotools/pathoptions.hxx>
39 #include <osl/thread.h>
41 #include <sal/macros.h>
43 #ifdef SYSTEM_LIBEXTTEXTCAT
44 #include <libexttextcat/textcat.h>
49 using namespace ::osl
;
50 using namespace ::cppu
;
51 using namespace ::com::sun::star
;
52 using namespace ::com::sun::star::uno
;
53 using namespace ::com::sun::star::lang
;
54 using namespace ::com::sun::star::linguistic2
;
56 static std::mutex
& GetLangGuessMutex()
58 static std::mutex aMutex
;
64 class LangGuess_Impl
:
65 public ::cppu::WeakImplHelper
<
69 SimpleGuesser m_aGuesser
;
72 virtual ~LangGuess_Impl() override
{}
73 void EnsureInitialized();
77 LangGuess_Impl(const LangGuess_Impl
&) = delete;
78 LangGuess_Impl
& operator=(const LangGuess_Impl
&) = delete;
80 // XServiceInfo implementation
81 virtual OUString SAL_CALL
getImplementationName( ) override
;
82 virtual sal_Bool SAL_CALL
supportsService( const OUString
& ServiceName
) override
;
83 virtual Sequence
< OUString
> SAL_CALL
getSupportedServiceNames( ) override
;
85 // XLanguageGuessing implementation
86 virtual css::lang::Locale SAL_CALL
guessPrimaryLanguage( const OUString
& aText
, ::sal_Int32 nStartPos
, ::sal_Int32 nLen
) override
;
87 virtual void SAL_CALL
disableLanguages( const css::uno::Sequence
< css::lang::Locale
>& aLanguages
) override
;
88 virtual void SAL_CALL
enableLanguages( const css::uno::Sequence
< css::lang::Locale
>& aLanguages
) override
;
89 virtual css::uno::Sequence
< css::lang::Locale
> SAL_CALL
getAvailableLanguages( ) override
;
90 virtual css::uno::Sequence
< css::lang::Locale
> SAL_CALL
getEnabledLanguages( ) override
;
91 virtual css::uno::Sequence
< css::lang::Locale
> SAL_CALL
getDisabledLanguages( ) override
;
93 // implementation specific
94 /// @throws RuntimeException
95 void SetFingerPrintsDB( std::u16string_view fileName
);
100 LangGuess_Impl::LangGuess_Impl() :
101 m_bInitialized( false )
105 void LangGuess_Impl::EnsureInitialized()
110 // set this to true at the very start to prevent loops because of
111 // implicitly called functions below
112 m_bInitialized
= true;
114 // set default fingerprint path to where those get installed
116 OUString
aURL( SvtPathOptions().GetFingerprintPath() );
117 osl::FileBase::getSystemPathFromFileURL( aURL
, aPhysPath
);
124 SetFingerPrintsDB( aPhysPath
);
126 #if !defined(EXTTEXTCAT_VERSION_MAJOR)
128 // disable currently not functional languages...
132 const char *pCountry
;
134 LangCountry aDisable
[] =
136 // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0
137 // which is the first with EXTTEXTCAT_VERSION_MAJOR defined
138 {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""},
139 {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""},
140 {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""}
142 sal_Int32 nNum
= SAL_N_ELEMENTS(aDisable
);
143 Sequence
< Locale
> aDisableSeq( nNum
);
144 Locale
*pDisableSeq
= aDisableSeq
.getArray();
145 for (sal_Int32 i
= 0; i
< nNum
; ++i
)
148 aLocale
.Language
= OUString::createFromAscii( aDisable
[i
].pLang
);
149 aLocale
.Country
= OUString::createFromAscii( aDisable
[i
].pCountry
);
150 pDisableSeq
[i
] = aLocale
;
152 disableLanguages( aDisableSeq
);
153 DBG_ASSERT( nNum
== getDisabledLanguages().getLength(), "size mismatch" );
157 Locale SAL_CALL
LangGuess_Impl::guessPrimaryLanguage(
158 const OUString
& rText
,
159 ::sal_Int32 nStartPos
,
162 std::scoped_lock
aGuard( GetLangGuessMutex() );
166 if (nStartPos
< 0 || nLen
< 0 || nStartPos
+ nLen
> rText
.getLength())
167 throw lang::IllegalArgumentException();
169 OString
o( OUStringToOString( rText
.subView(nStartPos
, nLen
), RTL_TEXTENCODING_UTF8
) );
170 Guess g
= m_aGuesser
.GuessPrimaryLanguage(o
.getStr());
172 aRes
.Language
= OUString::createFromAscii( g
.GetLanguage() );
173 aRes
.Country
= OUString::createFromAscii( g
.GetCountry() );
177 #define DEFAULT_CONF_FILE_NAME "fpdb.conf"
179 void LangGuess_Impl::SetFingerPrintsDB(
180 std::u16string_view filePath
)
182 //! text encoding for file name / path needs to be in the same encoding the OS uses
183 OString path
= OUStringToOString( filePath
, osl_getThreadTextEncoding() );
184 OString conf_file_path
= path
+ DEFAULT_CONF_FILE_NAME
;
186 m_aGuesser
.SetDBPath(conf_file_path
.getStr(), path
.getStr());
189 uno::Sequence
< Locale
> SAL_CALL
LangGuess_Impl::getAvailableLanguages( )
191 std::scoped_lock
aGuard( GetLangGuessMutex() );
195 Sequence
< css::lang::Locale
> aRes
;
196 std::vector
<Guess
> gs
= m_aGuesser
.GetAllManagedLanguages();
197 aRes
.realloc(gs
.size());
199 css::lang::Locale
*pRes
= aRes
.getArray();
201 for(size_t i
= 0; i
< gs
.size() ; i
++ ){
202 css::lang::Locale current_aRes
;
203 current_aRes
.Language
= OUString::createFromAscii( gs
[i
].GetLanguage() );
204 current_aRes
.Country
= OUString::createFromAscii( gs
[i
].GetCountry() );
205 pRes
[i
] = current_aRes
;
211 uno::Sequence
< Locale
> SAL_CALL
LangGuess_Impl::getEnabledLanguages( )
213 std::scoped_lock
aGuard( GetLangGuessMutex() );
217 Sequence
< css::lang::Locale
> aRes
;
218 std::vector
<Guess
> gs
= m_aGuesser
.GetAvailableLanguages();
219 aRes
.realloc(gs
.size());
221 css::lang::Locale
*pRes
= aRes
.getArray();
223 for(size_t i
= 0; i
< gs
.size() ; i
++ ){
224 css::lang::Locale current_aRes
;
225 current_aRes
.Language
= OUString::createFromAscii( gs
[i
].GetLanguage() );
226 current_aRes
.Country
= OUString::createFromAscii( gs
[i
].GetCountry() );
227 pRes
[i
] = current_aRes
;
233 uno::Sequence
< Locale
> SAL_CALL
LangGuess_Impl::getDisabledLanguages( )
235 std::scoped_lock
aGuard( GetLangGuessMutex() );
239 Sequence
< css::lang::Locale
> aRes
;
240 std::vector
<Guess
> gs
= m_aGuesser
.GetUnavailableLanguages();
241 aRes
.realloc(gs
.size());
243 css::lang::Locale
*pRes
= aRes
.getArray();
245 for(size_t i
= 0; i
< gs
.size() ; i
++ ){
246 css::lang::Locale current_aRes
;
247 current_aRes
.Language
= OUString::createFromAscii( gs
[i
].GetLanguage() );
248 current_aRes
.Country
= OUString::createFromAscii( gs
[i
].GetCountry() );
249 pRes
[i
] = current_aRes
;
255 void SAL_CALL
LangGuess_Impl::disableLanguages(
256 const uno::Sequence
< Locale
>& rLanguages
)
258 std::scoped_lock
aGuard( GetLangGuessMutex() );
262 for (const Locale
& rLanguage
: rLanguages
)
264 std::string language
;
266 OString l
= OUStringToOString( rLanguage
.Language
, RTL_TEXTENCODING_ASCII_US
);
267 OString c
= OUStringToOString( rLanguage
.Country
, RTL_TEXTENCODING_ASCII_US
);
269 language
+= l
.getStr();
271 language
+= c
.getStr();
272 m_aGuesser
.DisableLanguage(language
);
276 void SAL_CALL
LangGuess_Impl::enableLanguages(
277 const uno::Sequence
< Locale
>& rLanguages
)
279 std::scoped_lock
aGuard( GetLangGuessMutex() );
283 for (const Locale
& rLanguage
: rLanguages
)
285 std::string language
;
287 OString l
= OUStringToOString( rLanguage
.Language
, RTL_TEXTENCODING_ASCII_US
);
288 OString c
= OUStringToOString( rLanguage
.Country
, RTL_TEXTENCODING_ASCII_US
);
290 language
+= l
.getStr();
292 language
+= c
.getStr();
293 m_aGuesser
.EnableLanguage(language
);
297 OUString SAL_CALL
LangGuess_Impl::getImplementationName( )
299 return "com.sun.star.lingu2.LanguageGuessing";
302 sal_Bool SAL_CALL
LangGuess_Impl::supportsService( const OUString
& ServiceName
)
304 return cppu::supportsService(this, ServiceName
);
307 Sequence
<OUString
> SAL_CALL
LangGuess_Impl::getSupportedServiceNames( )
309 return { "com.sun.star.linguistic2.LanguageGuessing" };
312 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
*
313 lingucomponent_LangGuess_get_implementation(
314 css::uno::XComponentContext
* , css::uno::Sequence
<css::uno::Any
> const&)
316 return cppu::acquire(new LangGuess_Impl());
320 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */