bump product version to 4.1.6.2
[LibreOffice.git] / lingucomponent / source / lingutil / lingutil.cxx
blobbd107fae82a5e9e7fed6cb06b38177b5c785046b
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #if defined(WNT)
22 #include <windows.h>
23 #endif
25 #include <osl/thread.h>
26 #include <osl/file.hxx>
27 #include <tools/debug.hxx>
28 #include <tools/urlobj.hxx>
29 #include <i18nlangtag/languagetag.hxx>
30 #include <i18nlangtag/mslangid.hxx>
31 #include <unotools/lingucfg.hxx>
32 #include <unotools/pathoptions.hxx>
33 #include <rtl/ustring.hxx>
34 #include <rtl/string.hxx>
35 #include <rtl/tencinfo.h>
36 #include <linguistic/misc.hxx>
38 #include <set>
39 #include <vector>
40 #include <string.h>
42 #include <lingutil.hxx>
44 #include <sal/macros.h>
47 using ::com::sun::star::lang::Locale;
48 using namespace ::com::sun::star;
50 #if defined(WNT)
51 OString Win_GetShortPathName( const OUString &rLongPathName )
53 OString aRes;
55 sal_Unicode aShortBuffer[1024] = {0};
56 sal_Int32 nShortBufSize = SAL_N_ELEMENTS( aShortBuffer );
58 // use the version of 'GetShortPathName' that can deal with Unicode...
59 sal_Int32 nShortLen = GetShortPathNameW(
60 reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
61 reinterpret_cast<LPWSTR>( aShortBuffer ),
62 nShortBufSize );
64 if (nShortLen < nShortBufSize) // conversion successful?
65 aRes = OString( OU2ENC( OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
66 else
67 OSL_FAIL( "Win_GetShortPathName: buffer to short" );
69 return aRes;
71 #endif //defined(WNT)
73 //////////////////////////////////////////////////////////////////////
75 // build list of old style diuctionaries (not as extensions) to use.
76 // User installed dictionaries (the ones residing in the user paths)
77 // will get precedence over system installed ones for the same language.
78 std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
80 std::vector< SvtLinguConfigDictionaryEntry > aRes;
82 if (!pDicType)
83 return aRes;
85 OUString aFormatName;
86 String aDicExtension;
87 #ifdef SYSTEM_DICTS
88 OUString aSystemDir;
89 OUString aSystemPrefix;
90 OUString aSystemSuffix;
91 #endif
92 if (strcmp( pDicType, "DICT" ) == 0)
94 aFormatName = "DICT_SPELL";
95 aDicExtension = ".dic";
96 #ifdef SYSTEM_DICTS
97 aSystemDir = DICT_SYSTEM_DIR;
98 aSystemSuffix = aDicExtension;
99 #endif
101 else if (strcmp( pDicType, "HYPH" ) == 0)
103 aFormatName = "DICT_HYPH";
104 aDicExtension = ".dic";
105 #ifdef SYSTEM_DICTS
106 aSystemDir = HYPH_SYSTEM_DIR;
107 aSystemPrefix = "hyph_";
108 aSystemSuffix = aDicExtension;
109 #endif
111 else if (strcmp( pDicType, "THES" ) == 0)
113 aFormatName = "DICT_THES";
114 aDicExtension = ".dat";
115 #ifdef SYSTEM_DICTS
116 aSystemDir = THES_SYSTEM_DIR;
117 aSystemPrefix = "th_";
118 aSystemSuffix = "_v2.dat";
119 #endif
123 if (aFormatName.isEmpty() || aDicExtension.Len() == 0)
124 return aRes;
126 #ifdef SYSTEM_DICTS
127 osl::Directory aSystemDicts(aSystemDir);
128 if (aSystemDicts.open() == osl::FileBase::E_None)
130 // set of languages to remember the language where it is already
131 // decided to make use of the dictionary.
132 std::set< OUString > aDicLangInUse;
134 osl::DirectoryItem aItem;
135 osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileURL);
136 while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
138 aItem.getFileStatus(aFileStatus);
139 OUString sPath = aFileStatus.getFileURL();
140 if (sPath.lastIndexOf(aSystemSuffix) == sPath.getLength()-aSystemSuffix.getLength())
142 sal_Int32 nStartIndex = sPath.lastIndexOf(sal_Unicode('/')) + 1;
143 if (!sPath.match(aSystemPrefix, nStartIndex))
144 continue;
145 OUString sChunk = sPath.copy(nStartIndex + aSystemPrefix.getLength(),
146 sPath.getLength() - aSystemSuffix.getLength() -
147 nStartIndex - aSystemPrefix.getLength());
148 if (sChunk.isEmpty())
149 continue;
150 // We prefer (now) to use language tags.
151 // Avoid feeding in the older LANG_REGION scheme to the BCP47
152 // ctor as that triggers use of liblangtag and initializes its
153 // database which we do not want during startup. Convert
154 // instead.
155 sal_Int32 nPos;
156 if (sChunk.indexOf('-') < 0 && ((nPos = sChunk.indexOf('_')) > 0))
157 sChunk = sChunk.replaceAt( nPos, 1, OUString('-'));
158 LanguageTag aLangTag(sChunk, true);
159 if (!aLangTag.isValidBcp47())
160 continue;
162 // Thus we first get the language of the dictionary
163 OUString aLocaleName(aLangTag.getBcp47());
165 if (aDicLangInUse.count(aLocaleName) == 0)
167 // remember the new language in use
168 aDicLangInUse.insert(aLocaleName);
170 // add the dictionary to the resulting vector
171 SvtLinguConfigDictionaryEntry aDicEntry;
172 aDicEntry.aLocations.realloc(1);
173 aDicEntry.aLocaleNames.realloc(1);
174 aDicEntry.aLocations[0] = sPath;
175 aDicEntry.aFormatName = aFormatName;
176 aDicEntry.aLocaleNames[0] = aLocaleName;
177 aRes.push_back( aDicEntry );
182 #endif
184 return aRes;
188 void MergeNewStyleDicsAndOldStyleDics(
189 std::list< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
190 const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
192 // get list of languages supported by new style dictionaries
193 std::set< LanguageType > aNewStyleLanguages;
194 std::list< SvtLinguConfigDictionaryEntry >::const_iterator aIt;
195 for (aIt = rNewStyleDics.begin() ; aIt != rNewStyleDics.end(); ++aIt)
197 const uno::Sequence< OUString > aLocaleNames( aIt->aLocaleNames );
198 sal_Int32 nLocaleNames = aLocaleNames.getLength();
199 for (sal_Int32 k = 0; k < nLocaleNames; ++k)
201 LanguageType nLang = LanguageTag( aLocaleNames[k] ).getLanguageType();
202 aNewStyleLanguages.insert( nLang );
206 // now check all old style dictionaries if they will add a not yet
207 // added language. If so add them to the resulting vector
208 std::vector< SvtLinguConfigDictionaryEntry >::const_iterator aIt2;
209 for (aIt2 = rOldStyleDics.begin(); aIt2 != rOldStyleDics.end(); ++aIt2)
211 sal_Int32 nOldStyleDics = aIt2->aLocaleNames.getLength();
213 // old style dics should only have one language listed...
214 DBG_ASSERT( nOldStyleDics, "old style dictionary with more then one language found!");
215 if (nOldStyleDics > 0)
217 LanguageType nLang = LanguageTag( aIt2->aLocaleNames[0] ).getLanguageType();
219 if (nLang == LANGUAGE_DONTKNOW || linguistic::LinguIsUnspecified( nLang))
221 OSL_FAIL( "old style dictionary with invalid language found!" );
222 continue;
225 // language not yet added?
226 if (aNewStyleLanguages.count( nLang ) == 0)
227 rNewStyleDics.push_back( *aIt2 );
229 else
231 OSL_FAIL( "old style dictionary with no language found!" );
237 rtl_TextEncoding getTextEncodingFromCharset(const sal_Char* pCharset)
239 // default result: used to indicate that we failed to get the proper encoding
240 rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
242 if (pCharset)
244 eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
245 if (eRet == RTL_TEXTENCODING_DONTKNOW)
246 eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
247 if (eRet == RTL_TEXTENCODING_DONTKNOW)
249 if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
250 eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
253 return eRet;
256 //////////////////////////////////////////////////////////////////////
258 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */