1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #if !defined WIN32_LEAN_AND_MEAN
22 # define WIN32_LEAN_AND_MEAN
27 #include <osl/thread.h>
28 #include <osl/file.hxx>
29 #include <osl/process.h>
30 #include <tools/debug.hxx>
31 #include <tools/urlobj.hxx>
32 #include <i18nlangtag/languagetag.hxx>
33 #include <i18nlangtag/mslangid.hxx>
34 #include <unotools/bootstrap.hxx>
35 #include <unotools/lingucfg.hxx>
36 #include <unotools/pathoptions.hxx>
37 #include <rtl/bootstrap.hxx>
38 #include <rtl/ustring.hxx>
39 #include <rtl/string.hxx>
40 #include <rtl/tencinfo.h>
41 #include <linguistic/misc.hxx>
47 #include "lingutil.hxx"
49 #include <sal/macros.h>
51 using namespace ::com::sun::star
;
54 OString
Win_AddLongPathPrefix( const OString
&rPathName
)
56 constexpr OStringLiteral WIN32_LONG_PATH_PREFIX
= "\\\\?\\";
57 if (!rPathName
.match(WIN32_LONG_PATH_PREFIX
)) return WIN32_LONG_PATH_PREFIX
+ rPathName
;
60 #endif //defined(_WIN32)
62 #if defined SYSTEM_DICTS || defined IOS
63 // find old style dictionaries in system directories
64 static void GetOldStyleDicsInDir(
65 OUString
const & aSystemDir
, OUString
const & aFormatName
,
66 std::u16string_view aSystemSuffix
, std::u16string_view aSystemPrefix
,
67 std::set
< OUString
>& aDicLangInUse
,
68 std::vector
< SvtLinguConfigDictionaryEntry
>& aRes
)
70 osl::Directory
aSystemDicts(aSystemDir
);
71 if (aSystemDicts
.open() != osl::FileBase::E_None
)
74 osl::DirectoryItem aItem
;
75 osl::FileStatus
aFileStatus(osl_FileStatus_Mask_FileURL
);
76 while (aSystemDicts
.getNextItem(aItem
) == osl::FileBase::E_None
)
78 aItem
.getFileStatus(aFileStatus
);
79 OUString sPath
= aFileStatus
.getFileURL();
80 if (sPath
.endsWith(aSystemSuffix
))
82 sal_Int32 nStartIndex
= sPath
.lastIndexOf('/') + 1;
83 if (!sPath
.match(aSystemPrefix
, nStartIndex
))
85 OUString sChunk
= sPath
.copy(nStartIndex
+ aSystemPrefix
.size(),
86 sPath
.getLength() - aSystemSuffix
.size() -
87 nStartIndex
- aSystemPrefix
.size());
91 // We prefer (now) to use language tags.
92 // Avoid feeding in the older LANG_REGION scheme to the BCP47
93 // ctor as that triggers use of liblangtag and initializes its
94 // database which we do not want during startup. Convert
96 sChunk
= sChunk
.replace( '_', '-');
98 // There's a known exception to the rule, the dreaded
99 // hu_HU_u8.dic of the myspell-hu package, see
100 // http://packages.debian.org/search?arch=any&searchon=contents&keywords=hu_HU_u8.dic
101 // This was ignored because unknown in the old implementation,
102 // truncate to the known locale and either insert because hu_HU
103 // wasn't encountered yet, or skip because it was. It doesn't
104 // really matter because the proper new-style hu_HU dictionary
105 // will take precedence anyway if installed with a Hungarian
106 // languagepack. Again, this is only to not pull in all
107 // liblangtag and stuff during startup, the result would be
108 // !isValidBcp47() and the dictionary ignored.
109 if (sChunk
== "hu-HU-u8")
112 LanguageTag
aLangTag(sChunk
, true);
113 if (!aLangTag
.isValidBcp47())
116 // Thus we first get the language of the dictionary
117 const OUString
& aLocaleName(aLangTag
.getBcp47());
119 if (aDicLangInUse
.insert(aLocaleName
).second
)
121 // add the dictionary to the resulting vector
122 SvtLinguConfigDictionaryEntry aDicEntry
;
123 aDicEntry
.aLocations
= { sPath
};
124 aDicEntry
.aFormatName
= aFormatName
;
125 if (aLocaleName
== u
"ar")
126 aDicEntry
.aLocaleNames
= {
128 u
"ar-AE", u
"ar-BH", u
"ar-DJ", u
"ar-DZ", u
"ar-EG",
129 u
"ar-ER", u
"ar-IL", u
"ar-IQ", u
"ar-JO", u
"ar-KM",
130 u
"ar-KW", u
"ar-LB", u
"ar-LY", u
"ar-MA", u
"ar-MR",
131 u
"ar-OM", u
"ar-PS", u
"ar-QA", u
"ar-SA", u
"ar-SD",
132 u
"ar-SO", u
"ar-SY", u
"ar-TD", u
"ar-TN", u
"ar-YE"
135 aDicEntry
.aLocaleNames
= { aLocaleName
};
136 aRes
.push_back( aDicEntry
);
143 // build list of old style dictionaries (not as extensions) to use.
144 // User installed dictionaries (the ones residing in the user paths)
145 // will get precedence over system installed ones for the same language.
146 std::vector
< SvtLinguConfigDictionaryEntry
> GetOldStyleDics( const char *pDicType
)
148 std::vector
< SvtLinguConfigDictionaryEntry
> aRes
;
153 OUString aFormatName
;
154 OUString aDicExtension
;
155 #if defined SYSTEM_DICTS || defined IOS
157 OUString aSystemPrefix
;
158 OUString aSystemSuffix
;
160 if (strcmp( pDicType
, "DICT" ) == 0)
162 aFormatName
= "DICT_SPELL";
163 aDicExtension
= ".dic";
165 aSystemDir
= DICT_SYSTEM_DIR
;
166 aSystemSuffix
= aDicExtension
;
168 aSystemDir
= "$BRAND_BASE_DIR/share/spell";
169 rtl::Bootstrap::expandMacros(aSystemDir
);
170 aSystemSuffix
= ".dic";
173 else if (strcmp( pDicType
, "HYPH" ) == 0)
175 aFormatName
= "DICT_HYPH";
176 aDicExtension
= ".dic";
178 aSystemDir
= HYPH_SYSTEM_DIR
;
179 aSystemPrefix
= "hyph_";
180 aSystemSuffix
= aDicExtension
;
183 else if (strcmp( pDicType
, "THES" ) == 0)
185 aFormatName
= "DICT_THES";
186 aDicExtension
= ".dat";
188 aSystemDir
= THES_SYSTEM_DIR
;
189 aSystemPrefix
= "th_";
190 aSystemSuffix
= "_v2.dat";
192 aSystemDir
= "$BRAND_BASE_DIR/share/thes";
193 rtl::Bootstrap::expandMacros(aSystemDir
);
194 aSystemPrefix
= "th_";
195 aSystemSuffix
= "_v2.dat";
199 if (aFormatName
.isEmpty() || aDicExtension
.isEmpty())
202 #if defined SYSTEM_DICTS || defined IOS
203 // set of languages to remember the language where it is already
204 // decided to make use of the dictionary.
205 std::set
< OUString
> aDicLangInUse
;
208 // follow the hunspell tool's example and check DICPATH for preferred dictionaries
209 rtl_uString
* pSearchPath
= nullptr;
210 osl_getEnvironment(OUString("DICPATH").pData
, &pSearchPath
);
214 OUString
aSearchPath(pSearchPath
);
215 rtl_uString_release(pSearchPath
);
217 sal_Int32 nIndex
= 0;
220 OUString
aSystem( aSearchPath
.getToken(0, ':', nIndex
) );
225 if (!utl::Bootstrap::getProcessWorkingDir(aCWD
))
227 if (osl::FileBase::getFileURLFromSystemPath(aSystem
, aRelative
)
228 != osl::FileBase::E_None
)
230 if (osl::FileBase::getAbsoluteFileURL(aCWD
, aRelative
, aAbsolute
)
231 != osl::FileBase::E_None
)
234 // GetOldStyleDicsInDir will make sure the dictionary is the right
235 // type based on its prefix, that way hyphen, mythes and regular
236 // dictionaries can live in one directory
237 GetOldStyleDicsInDir(aAbsolute
, aFormatName
, aSystemSuffix
,
238 aSystemPrefix
, aDicLangInUse
, aRes
);
240 while (nIndex
!= -1);
244 // load system directories last so that DICPATH prevails
245 GetOldStyleDicsInDir(aSystemDir
, aFormatName
, aSystemSuffix
, aSystemPrefix
,
246 aDicLangInUse
, aRes
);
252 void MergeNewStyleDicsAndOldStyleDics(
253 std::vector
< SvtLinguConfigDictionaryEntry
> &rNewStyleDics
,
254 const std::vector
< SvtLinguConfigDictionaryEntry
> &rOldStyleDics
)
256 // get list of languages supported by new style dictionaries
257 std::set
< OUString
> aNewStyleLanguages
;
258 for (auto const& newStyleDic
: rNewStyleDics
)
260 const uno::Sequence
< OUString
> aLocaleNames(newStyleDic
.aLocaleNames
);
261 sal_Int32 nLocaleNames
= aLocaleNames
.getLength();
262 for (sal_Int32 k
= 0; k
< nLocaleNames
; ++k
)
264 aNewStyleLanguages
.insert( aLocaleNames
[k
] );
268 // now check all old style dictionaries if they will add a not yet
269 // added language. If so add them to the resulting vector
270 for (auto const& oldStyleDic
: rOldStyleDics
)
272 sal_Int32 nOldStyleDics
= oldStyleDic
.aLocaleNames
.getLength();
274 // old style dics should only have one language listed...
275 DBG_ASSERT( nOldStyleDics
, "old style dictionary with more than one language found!");
276 if (nOldStyleDics
> 0)
278 if (linguistic::LinguIsUnspecified( oldStyleDic
.aLocaleNames
[0]))
280 OSL_FAIL( "old style dictionary with invalid language found!" );
284 // language not yet added?
285 if (aNewStyleLanguages
.find( oldStyleDic
.aLocaleNames
[0] ) == aNewStyleLanguages
.end())
286 rNewStyleDics
.push_back(oldStyleDic
);
290 OSL_FAIL( "old style dictionary with no language found!" );
295 rtl_TextEncoding
getTextEncodingFromCharset(const char* pCharset
)
297 // default result: used to indicate that we failed to get the proper encoding
298 rtl_TextEncoding eRet
= RTL_TEXTENCODING_DONTKNOW
;
302 eRet
= rtl_getTextEncodingFromMimeCharset(pCharset
);
303 if (eRet
== RTL_TEXTENCODING_DONTKNOW
)
304 eRet
= rtl_getTextEncodingFromUnixCharset(pCharset
);
305 if (eRet
== RTL_TEXTENCODING_DONTKNOW
)
307 if (strcmp("ISCII-DEVANAGARI", pCharset
) == 0)
308 eRet
= RTL_TEXTENCODING_ISCII_DEVANAGARI
;
314 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */