tdf#130857 qt weld: Support mail merge "Server Auth" dialog
[LibreOffice.git] / lingucomponent / source / lingutil / lingutil.cxx
blob21b04520e1220538b5c0e56f1126a1fb5bec5723
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #if defined(_WIN32)
21 #if !defined WIN32_LEAN_AND_MEAN
22 # define WIN32_LEAN_AND_MEAN
23 #endif
24 #include <windows.h>
25 #endif
27 #include <osl/diagnose.h>
28 #include <osl/thread.h>
29 #include <osl/file.hxx>
30 #include <osl/process.h>
31 #include <tools/debug.hxx>
32 #include <tools/urlobj.hxx>
33 #include <i18nlangtag/languagetag.hxx>
34 #include <i18nlangtag/mslangid.hxx>
35 #include <unotools/bootstrap.hxx>
36 #include <unotools/lingucfg.hxx>
37 #include <unotools/pathoptions.hxx>
38 #include <rtl/bootstrap.hxx>
39 #include <rtl/ustring.hxx>
40 #include <rtl/string.hxx>
41 #include <rtl/tencinfo.h>
42 #include <linguistic/misc.hxx>
44 #include <set>
45 #include <vector>
46 #include <string.h>
48 #include "lingutil.hxx"
50 #include <sal/macros.h>
52 using namespace ::com::sun::star;
54 #if defined(_WIN32)
55 OString Win_AddLongPathPrefix( const OString &rPathName )
57 constexpr OString WIN32_LONG_PATH_PREFIX = "\\\\?\\"_ostr;
58 if (!rPathName.match(WIN32_LONG_PATH_PREFIX)) return WIN32_LONG_PATH_PREFIX + rPathName;
59 return rPathName;
61 #endif //defined(_WIN32)
63 #if defined SYSTEM_DICTS || defined IOS
64 // find old style dictionaries in system directories
65 static void GetOldStyleDicsInDir(
66 OUString const & aSystemDir, OUString const & aFormatName,
67 std::u16string_view aSystemSuffix, std::u16string_view aSystemPrefix,
68 std::set< OUString >& aDicLangInUse,
69 std::vector< SvtLinguConfigDictionaryEntry >& aRes )
71 osl::Directory aSystemDicts(aSystemDir);
72 if (aSystemDicts.open() != osl::FileBase::E_None)
73 return;
75 osl::DirectoryItem aItem;
76 osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileURL);
77 while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
79 aItem.getFileStatus(aFileStatus);
80 OUString sPath = aFileStatus.getFileURL();
81 if (sPath.endsWith(aSystemSuffix))
83 sal_Int32 nStartIndex = sPath.lastIndexOf('/') + 1;
84 if (!sPath.match(aSystemPrefix, nStartIndex))
85 continue;
86 OUString sChunk = sPath.copy(nStartIndex + aSystemPrefix.size(),
87 sPath.getLength() - aSystemSuffix.size() -
88 nStartIndex - aSystemPrefix.size());
89 if (sChunk.isEmpty())
90 continue;
92 // We prefer (now) to use language tags.
93 // Avoid feeding in the older LANG_REGION scheme to the BCP47
94 // ctor as that triggers use of liblangtag and initializes its
95 // database which we do not want during startup. Convert
96 // instead.
97 sChunk = sChunk.replace( '_', '-');
99 // There's a known exception to the rule, the dreaded
100 // hu_HU_u8.dic of the myspell-hu package, see
101 // http://packages.debian.org/search?arch=any&searchon=contents&keywords=hu_HU_u8.dic
102 // This was ignored because unknown in the old implementation,
103 // truncate to the known locale and either insert because hu_HU
104 // wasn't encountered yet, or skip because it was. It doesn't
105 // really matter because the proper new-style hu_HU dictionary
106 // will take precedence anyway if installed with a Hungarian
107 // languagepack. Again, this is only to not pull in all
108 // liblangtag and stuff during startup, the result would be
109 // !isValidBcp47() and the dictionary ignored.
110 if (sChunk == "hu-HU-u8")
111 sChunk = "hu-HU";
113 LanguageTag aLangTag(sChunk, true);
114 if (!aLangTag.isValidBcp47())
115 continue;
117 // Thus we first get the language of the dictionary
118 const OUString& aLocaleName(aLangTag.getBcp47());
120 if (aDicLangInUse.insert(aLocaleName).second)
122 // add the dictionary to the resulting vector
123 SvtLinguConfigDictionaryEntry aDicEntry;
124 aDicEntry.aLocations = { sPath };
125 aDicEntry.aFormatName = aFormatName;
126 if (aLocaleName == u"ar")
127 aDicEntry.aLocaleNames = {
128 aLocaleName,
129 u"ar-AE"_ustr, u"ar-BH"_ustr, u"ar-DJ"_ustr, u"ar-DZ"_ustr, u"ar-EG"_ustr,
130 u"ar-ER"_ustr, u"ar-IL"_ustr, u"ar-IQ"_ustr, u"ar-JO"_ustr, u"ar-KM"_ustr,
131 u"ar-KW"_ustr, u"ar-LB"_ustr, u"ar-LY"_ustr, u"ar-MA"_ustr, u"ar-MR"_ustr,
132 u"ar-OM"_ustr, u"ar-PS"_ustr, u"ar-QA"_ustr, u"ar-SA"_ustr, u"ar-SD"_ustr,
133 u"ar-SO"_ustr, u"ar-SY"_ustr, u"ar-TD"_ustr, u"ar-TN"_ustr, u"ar-YE"_ustr
135 else
136 aDicEntry.aLocaleNames = { aLocaleName };
137 aRes.push_back( aDicEntry );
142 #endif
144 // build list of old style dictionaries (not as extensions) to use.
145 // User installed dictionaries (the ones residing in the user paths)
146 // will get precedence over system installed ones for the same language.
147 std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
149 std::vector< SvtLinguConfigDictionaryEntry > aRes;
151 if (!pDicType)
152 return aRes;
154 OUString aFormatName;
155 OUString aDicExtension;
156 #if defined SYSTEM_DICTS || defined IOS
157 OUString aSystemDir;
158 OUString aSystemPrefix;
159 OUString aSystemSuffix;
160 #endif
161 if (strcmp( pDicType, "DICT" ) == 0)
163 aFormatName = "DICT_SPELL";
164 aDicExtension = ".dic";
165 #ifdef SYSTEM_DICTS
166 aSystemDir = DICT_SYSTEM_DIR;
167 aSystemSuffix = aDicExtension;
168 #elif defined IOS
169 aSystemDir = "$BRAND_BASE_DIR/share/spell";
170 rtl::Bootstrap::expandMacros(aSystemDir);
171 aSystemSuffix = ".dic";
172 #endif
174 else if (strcmp( pDicType, "HYPH" ) == 0)
176 aFormatName = "DICT_HYPH";
177 aDicExtension = ".dic";
178 #ifdef SYSTEM_DICTS
179 aSystemDir = HYPH_SYSTEM_DIR;
180 aSystemPrefix = "hyph_";
181 aSystemSuffix = aDicExtension;
182 #endif
184 else if (strcmp( pDicType, "THES" ) == 0)
186 aFormatName = "DICT_THES";
187 aDicExtension = ".dat";
188 #ifdef SYSTEM_DICTS
189 aSystemDir = THES_SYSTEM_DIR;
190 aSystemPrefix = "th_";
191 aSystemSuffix = "_v2.dat";
192 #elif defined IOS
193 aSystemDir = "$BRAND_BASE_DIR/share/thes";
194 rtl::Bootstrap::expandMacros(aSystemDir);
195 aSystemPrefix = "th_";
196 aSystemSuffix = "_v2.dat";
197 #endif
200 if (aFormatName.isEmpty() || aDicExtension.isEmpty())
201 return aRes;
203 #if defined SYSTEM_DICTS || defined IOS
204 // set of languages to remember the language where it is already
205 // decided to make use of the dictionary.
206 std::set< OUString > aDicLangInUse;
208 #ifndef IOS
209 // follow the hunspell tool's example and check DICPATH for preferred dictionaries
210 rtl_uString * pSearchPath = nullptr;
211 osl_getEnvironment(u"DICPATH"_ustr.pData, &pSearchPath);
213 if (pSearchPath)
215 OUString aSearchPath(pSearchPath);
216 rtl_uString_release(pSearchPath);
218 sal_Int32 nIndex = 0;
221 OUString aSystem( aSearchPath.getToken(0, ':', nIndex) );
222 OUString aCWD;
223 OUString aRelative;
224 OUString aAbsolute;
226 if (!utl::Bootstrap::getProcessWorkingDir(aCWD))
227 continue;
228 if (osl::FileBase::getFileURLFromSystemPath(aSystem, aRelative)
229 != osl::FileBase::E_None)
230 continue;
231 if (osl::FileBase::getAbsoluteFileURL(aCWD, aRelative, aAbsolute)
232 != osl::FileBase::E_None)
233 continue;
235 // GetOldStyleDicsInDir will make sure the dictionary is the right
236 // type based on its prefix, that way hyphen, mythes and regular
237 // dictionaries can live in one directory
238 GetOldStyleDicsInDir(aAbsolute, aFormatName, aSystemSuffix,
239 aSystemPrefix, aDicLangInUse, aRes);
241 while (nIndex != -1);
243 #endif
245 // load system directories last so that DICPATH prevails
246 GetOldStyleDicsInDir(aSystemDir, aFormatName, aSystemSuffix, aSystemPrefix,
247 aDicLangInUse, aRes);
248 #endif
250 return aRes;
253 void MergeNewStyleDicsAndOldStyleDics(
254 std::vector< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
255 const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
257 // get list of languages supported by new style dictionaries
258 std::set< OUString > aNewStyleLanguages;
259 for (auto const& newStyleDic : rNewStyleDics)
261 const uno::Sequence< OUString > aLocaleNames(newStyleDic.aLocaleNames);
262 sal_Int32 nLocaleNames = aLocaleNames.getLength();
263 for (sal_Int32 k = 0; k < nLocaleNames; ++k)
265 aNewStyleLanguages.insert( aLocaleNames[k] );
269 // now check all old style dictionaries if they will add a not yet
270 // added language. If so add them to the resulting vector
271 for (auto const& oldStyleDic : rOldStyleDics)
273 sal_Int32 nOldStyleDics = oldStyleDic.aLocaleNames.getLength();
275 // old style dics should only have one language listed...
276 DBG_ASSERT( nOldStyleDics, "old style dictionary with more than one language found!");
277 if (nOldStyleDics > 0)
279 if (linguistic::LinguIsUnspecified( oldStyleDic.aLocaleNames[0]))
281 OSL_FAIL( "old style dictionary with invalid language found!" );
282 continue;
285 // language not yet added?
286 if (aNewStyleLanguages.find( oldStyleDic.aLocaleNames[0] ) == aNewStyleLanguages.end())
287 rNewStyleDics.push_back(oldStyleDic);
289 else
291 OSL_FAIL( "old style dictionary with no language found!" );
296 rtl_TextEncoding getTextEncodingFromCharset(const char* pCharset)
298 // default result: used to indicate that we failed to get the proper encoding
299 rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
301 if (pCharset)
303 eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
304 if (eRet == RTL_TEXTENCODING_DONTKNOW)
305 eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
306 if (eRet == RTL_TEXTENCODING_DONTKNOW)
308 if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
309 eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
312 return eRet;
315 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */