merged tag ooo/DEV300_m102
[LibreOffice.git] / lingucomponent / source / lingutil / lingutil.cxx
blob14253fca5cd301925f5d3432c66f8508f3ec3a35
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_lingucomponent.hxx"
31 #if defined(WNT)
32 #include <tools/prewin.h>
33 #endif
35 #if defined(WNT)
36 #include <Windows.h>
37 #endif
39 #if defined(WNT)
40 #include <tools/postwin.h>
41 #endif
44 #include <osl/thread.h>
45 #include <osl/file.hxx>
46 #include <tools/debug.hxx>
47 #include <tools/urlobj.hxx>
48 #include <i18npool/mslangid.hxx>
49 #include <unotools/lingucfg.hxx>
50 #include <unotools/pathoptions.hxx>
51 #include <rtl/ustring.hxx>
52 #include <rtl/string.hxx>
53 #include <rtl/tencinfo.h>
54 #include <linguistic/misc.hxx>
56 #include <set>
57 #include <vector>
58 #include <string.h>
60 #include <lingutil.hxx>
61 #include <dictmgr.hxx>
66 using ::com::sun::star::lang::Locale;
67 using namespace ::com::sun::star;
69 #if 0
70 //////////////////////////////////////////////////////////////////////
72 String GetDirectoryPathFromFileURL( const String &rFileURL )
74 // get file URL
75 INetURLObject aURLObj;
76 aURLObj.SetSmartProtocol( INET_PROT_FILE );
77 aURLObj.SetSmartURL( rFileURL );
78 aURLObj.removeSegment();
79 DBG_ASSERT( !aURLObj.HasError(), "invalid URL" );
80 String aRes = aURLObj.GetMainURL( INetURLObject::DECODE_TO_IURI );
81 return aRes;
83 #endif
85 #if defined(WNT)
86 rtl::OString Win_GetShortPathName( const rtl::OUString &rLongPathName )
88 rtl::OString aRes;
90 sal_Unicode aShortBuffer[1024] = {0};
91 sal_Int32 nShortBufSize = sizeof( aShortBuffer ) / sizeof( aShortBuffer[0] );
93 // use the version of 'GetShortPathName' that can deal with Unicode...
94 sal_Int32 nShortLen = GetShortPathNameW(
95 reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
96 reinterpret_cast<LPWSTR>( aShortBuffer ),
97 nShortBufSize );
99 if (nShortLen < nShortBufSize) // conversion successful?
100 aRes = rtl::OString( OU2ENC( rtl::OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
101 else
102 DBG_ERROR( "Win_GetShortPathName: buffer to short" );
104 return aRes;
106 #endif //defined(WNT)
108 //////////////////////////////////////////////////////////////////////
110 // build list of old style diuctionaries (not as extensions) to use.
111 // User installed dictionaries (the ones residing in the user paths)
112 // will get precedence over system installed ones for the same language.
113 std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
115 std::vector< SvtLinguConfigDictionaryEntry > aRes;
117 if (!pDicType)
118 return aRes;
120 rtl::OUString aFormatName;
121 String aDicExtension;
122 #ifdef SYSTEM_DICTS
123 rtl::OUString aSystemDir;
124 rtl::OUString aSystemPrefix;
125 rtl::OUString aSystemSuffix;
126 #endif
127 bool bSpell = false;
128 bool bHyph = false;
129 bool bThes = false;
130 if (strcmp( pDicType, "DICT" ) == 0)
132 aFormatName = A2OU("DICT_SPELL");
133 aDicExtension = String::CreateFromAscii( ".dic" );
134 #ifdef SYSTEM_DICTS
135 aSystemDir = A2OU( DICT_SYSTEM_DIR );
136 aSystemSuffix = aDicExtension;
137 #endif
138 bSpell = true;
140 else if (strcmp( pDicType, "HYPH" ) == 0)
142 aFormatName = A2OU("DICT_HYPH");
143 aDicExtension = String::CreateFromAscii( ".dic" );
144 #ifdef SYSTEM_DICTS
145 aSystemDir = A2OU( HYPH_SYSTEM_DIR );
146 aSystemPrefix = A2OU( "hyph_" );
147 aSystemSuffix = aDicExtension;
148 #endif
149 bHyph = true;
151 else if (strcmp( pDicType, "THES" ) == 0)
153 aFormatName = A2OU("DICT_THES");
154 aDicExtension = String::CreateFromAscii( ".dat" );
155 #ifdef SYSTEM_DICTS
156 aSystemDir = A2OU( THES_SYSTEM_DIR );
157 aSystemPrefix = A2OU( "th_" );
158 aSystemSuffix = A2OU( "_v2.dat" );
159 #endif
160 bThes = true;
164 if (aFormatName.getLength() == 0 || aDicExtension.Len() == 0)
165 return aRes;
167 // set of languages to remember the language where it is already
168 // decided to make use of the dictionary.
169 std::set< LanguageType > aDicLangInUse;
171 #ifdef SYSTEM_DICTS
172 osl::Directory aSystemDicts(aSystemDir);
173 if (aSystemDicts.open() == osl::FileBase::E_None)
175 osl::DirectoryItem aItem;
176 osl::FileStatus aFileStatus(FileStatusMask_FileURL);
177 while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
179 aItem.getFileStatus(aFileStatus);
180 rtl::OUString sPath = aFileStatus.getFileURL();
181 if (sPath.lastIndexOf(aSystemSuffix) == sPath.getLength()-aSystemSuffix.getLength())
183 sal_Int32 nStartIndex = sPath.lastIndexOf(sal_Unicode('/')) + 1;
184 if (!sPath.match(aSystemPrefix, nStartIndex))
185 continue;
186 rtl::OUString sChunk = sPath.copy(0, sPath.getLength() - aSystemSuffix.getLength());
187 sal_Int32 nIndex = nStartIndex + aSystemPrefix.getLength();
188 rtl::OUString sLang = sChunk.getToken( 0, '_', nIndex );
189 if (!sLang.getLength())
190 continue;
191 rtl::OUString sRegion;
192 if (nIndex != -1)
193 sRegion = sChunk.copy( nIndex, sChunk.getLength() - nIndex );
195 // Thus we first get the language of the dictionary
196 LanguageType nLang = MsLangId::convertIsoNamesToLanguage(
197 sLang, sRegion );
199 if (aDicLangInUse.count( nLang ) == 0)
201 // remember the new language in use
202 aDicLangInUse.insert( nLang );
204 // add the dictionary to the resulting vector
205 SvtLinguConfigDictionaryEntry aDicEntry;
206 aDicEntry.aLocations.realloc(1);
207 aDicEntry.aLocaleNames.realloc(1);
208 rtl::OUString aLocaleName( MsLangId::convertLanguageToIsoString( nLang ) );
209 aDicEntry.aLocations[0] = sPath;
210 aDicEntry.aFormatName = aFormatName;
211 aDicEntry.aLocaleNames[0] = aLocaleName;
212 aRes.push_back( aDicEntry );
218 #endif
220 return aRes;
224 void MergeNewStyleDicsAndOldStyleDics(
225 std::list< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
226 const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
228 // get list of languages supported by new style dictionaries
229 std::set< LanguageType > aNewStyleLanguages;
230 std::list< SvtLinguConfigDictionaryEntry >::const_iterator aIt;
231 for (aIt = rNewStyleDics.begin() ; aIt != rNewStyleDics.end(); ++aIt)
233 const uno::Sequence< rtl::OUString > aLocaleNames( aIt->aLocaleNames );
234 sal_Int32 nLocaleNames = aLocaleNames.getLength();
235 for (sal_Int32 k = 0; k < nLocaleNames; ++k)
237 LanguageType nLang = MsLangId::convertIsoStringToLanguage( aLocaleNames[k] );
238 aNewStyleLanguages.insert( nLang );
242 // now check all old style dictionaries if they will add a not yet
243 // added language. If so add them to the resulting vector
244 std::vector< SvtLinguConfigDictionaryEntry >::const_iterator aIt2;
245 for (aIt2 = rOldStyleDics.begin(); aIt2 != rOldStyleDics.end(); ++aIt2)
247 sal_Int32 nOldStyleDics = aIt2->aLocaleNames.getLength();
249 // old style dics should only have one language listed...
250 DBG_ASSERT( nOldStyleDics, "old style dictionary with more then one language found!");
251 if (nOldStyleDics > 0)
253 LanguageType nLang = MsLangId::convertIsoStringToLanguage( aIt2->aLocaleNames[0] );
255 if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_NONE)
257 DBG_ERROR( "old style dictionary with invalid language found!" );
258 continue;
261 // language not yet added?
262 if (aNewStyleLanguages.count( nLang ) == 0)
263 rNewStyleDics.push_back( *aIt2 );
265 else
267 DBG_ERROR( "old style dictionary with no language found!" );
273 rtl_TextEncoding getTextEncodingFromCharset(const sal_Char* pCharset)
275 // default result: used to indicate that we failed to get the proper encoding
276 rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
278 if (pCharset)
280 eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
281 if (eRet == RTL_TEXTENCODING_DONTKNOW)
282 eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
283 if (eRet == RTL_TEXTENCODING_DONTKNOW)
285 if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
286 eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
289 return eRet;
292 //////////////////////////////////////////////////////////////////////