Branch libreoffice-5-0-4
[LibreOffice.git] / lingucomponent / source / lingutil / lingutil.cxx
blobd0da501e22e4b2e28f0310649380c2631b3ba423
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #if defined(WNT)
21 #include <windows.h>
22 #endif
24 #include <osl/thread.h>
25 #include <osl/file.hxx>
26 #include <tools/debug.hxx>
27 #include <tools/urlobj.hxx>
28 #include <i18nlangtag/languagetag.hxx>
29 #include <i18nlangtag/mslangid.hxx>
30 #include <unotools/lingucfg.hxx>
31 #include <unotools/pathoptions.hxx>
32 #include <rtl/ustring.hxx>
33 #include <rtl/string.hxx>
34 #include <rtl/tencinfo.h>
35 #include <linguistic/misc.hxx>
37 #include <set>
38 #include <vector>
39 #include <string.h>
41 #include <lingutil.hxx>
43 #include <sal/macros.h>
45 using ::com::sun::star::lang::Locale;
46 using namespace ::com::sun::star;
48 #if defined(WNT)
49 OString Win_AddLongPathPrefix( const OString &rPathName )
51 #define WIN32_LONG_PATH_PREFIX "\\\\?\\"
52 if (!rPathName.match(WIN32_LONG_PATH_PREFIX)) return WIN32_LONG_PATH_PREFIX + rPathName;
53 return rPathName;
55 #endif //defined(WNT)
57 // build list of old style diuctionaries (not as extensions) to use.
58 // User installed dictionaries (the ones residing in the user paths)
59 // will get precedence over system installed ones for the same language.
60 std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
62 std::vector< SvtLinguConfigDictionaryEntry > aRes;
64 if (!pDicType)
65 return aRes;
67 OUString aFormatName;
68 OUString aDicExtension;
69 #ifdef SYSTEM_DICTS
70 OUString aSystemDir;
71 OUString aSystemPrefix;
72 OUString aSystemSuffix;
73 #endif
74 if (strcmp( pDicType, "DICT" ) == 0)
76 aFormatName = "DICT_SPELL";
77 aDicExtension = ".dic";
78 #ifdef SYSTEM_DICTS
79 aSystemDir = DICT_SYSTEM_DIR;
80 aSystemSuffix = aDicExtension;
81 #endif
83 else if (strcmp( pDicType, "HYPH" ) == 0)
85 aFormatName = "DICT_HYPH";
86 aDicExtension = ".dic";
87 #ifdef SYSTEM_DICTS
88 aSystemDir = HYPH_SYSTEM_DIR;
89 aSystemPrefix = "hyph_";
90 aSystemSuffix = aDicExtension;
91 #endif
93 else if (strcmp( pDicType, "THES" ) == 0)
95 aFormatName = "DICT_THES";
96 aDicExtension = ".dat";
97 #ifdef SYSTEM_DICTS
98 aSystemDir = THES_SYSTEM_DIR;
99 aSystemPrefix = "th_";
100 aSystemSuffix = "_v2.dat";
101 #endif
104 if (aFormatName.isEmpty() || aDicExtension.isEmpty())
105 return aRes;
107 #ifdef SYSTEM_DICTS
108 osl::Directory aSystemDicts(aSystemDir);
109 if (aSystemDicts.open() == osl::FileBase::E_None)
111 // set of languages to remember the language where it is already
112 // decided to make use of the dictionary.
113 std::set< OUString > aDicLangInUse;
115 osl::DirectoryItem aItem;
116 osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileURL);
117 while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
119 aItem.getFileStatus(aFileStatus);
120 OUString sPath = aFileStatus.getFileURL();
121 if (sPath.endsWith(aSystemSuffix))
123 sal_Int32 nStartIndex = sPath.lastIndexOf('/') + 1;
124 if (!sPath.match(aSystemPrefix, nStartIndex))
125 continue;
126 OUString sChunk = sPath.copy(nStartIndex + aSystemPrefix.getLength(),
127 sPath.getLength() - aSystemSuffix.getLength() -
128 nStartIndex - aSystemPrefix.getLength());
129 if (sChunk.isEmpty())
130 continue;
132 // We prefer (now) to use language tags.
133 // Avoid feeding in the older LANG_REGION scheme to the BCP47
134 // ctor as that triggers use of liblangtag and initializes its
135 // database which we do not want during startup. Convert
136 // instead.
137 sChunk = sChunk.replace( '_', '-');
139 // There's a known exception to the rule, the dreaded
140 // hu_HU_u8.dic of the myspell-hu package, see
141 // http://packages.debian.org/search?arch=any&searchon=contents&keywords=hu_HU_u8.dic
142 // This was ignored because unknown in the old implementation,
143 // truncate to the known locale and either insert because hu_HU
144 // wasn't encountered yet, or skip because it was. It doesn't
145 // really matter because the proper new-style hu_HU dictionary
146 // will take precedence anyway if installed with a Hungarian
147 // languagepack. Again, this is only to not pull in all
148 // liblangtag and stuff during startup, the result would be
149 // !isValidBcp47() and the dictionary ignored.
150 if (sChunk == "hu-HU-u8")
151 sChunk = "hu-HU";
153 LanguageTag aLangTag(sChunk, true);
154 if (!aLangTag.isValidBcp47())
155 continue;
157 // Thus we first get the language of the dictionary
158 OUString aLocaleName(aLangTag.getBcp47());
160 if (aDicLangInUse.insert(aLocaleName).second)
162 // add the dictionary to the resulting vector
163 SvtLinguConfigDictionaryEntry aDicEntry;
164 aDicEntry.aLocations.realloc(1);
165 aDicEntry.aLocaleNames.realloc(1);
166 aDicEntry.aLocations[0] = sPath;
167 aDicEntry.aFormatName = aFormatName;
168 aDicEntry.aLocaleNames[0] = aLocaleName;
169 aRes.push_back( aDicEntry );
174 #endif
176 return aRes;
179 void MergeNewStyleDicsAndOldStyleDics(
180 std::list< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
181 const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
183 // get list of languages supported by new style dictionaries
184 std::set< OUString > aNewStyleLanguages;
185 std::list< SvtLinguConfigDictionaryEntry >::const_iterator aIt;
186 for (aIt = rNewStyleDics.begin() ; aIt != rNewStyleDics.end(); ++aIt)
188 const uno::Sequence< OUString > aLocaleNames( aIt->aLocaleNames );
189 sal_Int32 nLocaleNames = aLocaleNames.getLength();
190 for (sal_Int32 k = 0; k < nLocaleNames; ++k)
192 aNewStyleLanguages.insert( aLocaleNames[k] );
196 // now check all old style dictionaries if they will add a not yet
197 // added language. If so add them to the resulting vector
198 std::vector< SvtLinguConfigDictionaryEntry >::const_iterator aIt2;
199 for (aIt2 = rOldStyleDics.begin(); aIt2 != rOldStyleDics.end(); ++aIt2)
201 sal_Int32 nOldStyleDics = aIt2->aLocaleNames.getLength();
203 // old style dics should only have one language listed...
204 DBG_ASSERT( nOldStyleDics, "old style dictionary with more than one language found!");
205 if (nOldStyleDics > 0)
207 if (linguistic::LinguIsUnspecified( aIt2->aLocaleNames[0]))
209 OSL_FAIL( "old style dictionary with invalid language found!" );
210 continue;
213 // language not yet added?
214 if (aNewStyleLanguages.find( aIt2->aLocaleNames[0] ) == aNewStyleLanguages.end())
215 rNewStyleDics.push_back( *aIt2 );
217 else
219 OSL_FAIL( "old style dictionary with no language found!" );
224 rtl_TextEncoding getTextEncodingFromCharset(const sal_Char* pCharset)
226 // default result: used to indicate that we failed to get the proper encoding
227 rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
229 if (pCharset)
231 eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
232 if (eRet == RTL_TEXTENCODING_DONTKNOW)
233 eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
234 if (eRet == RTL_TEXTENCODING_DONTKNOW)
236 if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
237 eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
240 return eRet;
243 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */