1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <sal/config.h>
12 #include <string_view>
14 #include <unotools/wincodepage.hxx>
15 #include <rtl/textenc.h>
19 struct LangEncodingDef
21 const std::u16string_view msLangStr
;
22 rtl_TextEncoding meTextEncoding
;
25 // See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756
26 rtl_TextEncoding
impl_getWinTextEncodingFromLangStrANSI(const OUString
& sLanguage
)
28 static constexpr LangEncodingDef aLanguageTab
[] =
30 { u
"en", RTL_TEXTENCODING_MS_1252
}, // Most used -> first in list
31 { u
"th", RTL_TEXTENCODING_MS_874
},
32 { u
"ja", RTL_TEXTENCODING_MS_932
},
33 { u
"zh-cn", RTL_TEXTENCODING_MS_936
}, // Chinese (simplified) - must go before "zh"
34 { u
"ko", RTL_TEXTENCODING_MS_949
},
35 { u
"zh", RTL_TEXTENCODING_MS_950
}, // Chinese (traditional)
36 { u
"bs", RTL_TEXTENCODING_MS_1250
},
37 { u
"cs", RTL_TEXTENCODING_MS_1250
},
38 { u
"hr", RTL_TEXTENCODING_MS_1250
},
39 { u
"hu", RTL_TEXTENCODING_MS_1250
},
40 { u
"pl", RTL_TEXTENCODING_MS_1250
},
41 { u
"ro", RTL_TEXTENCODING_MS_1250
},
42 { u
"sk", RTL_TEXTENCODING_MS_1250
},
43 { u
"sl", RTL_TEXTENCODING_MS_1250
},
44 // { "sr", RTL_TEXTENCODING_MS_1250 },
45 { u
"sq", RTL_TEXTENCODING_MS_1250
},
46 { u
"be", RTL_TEXTENCODING_MS_1251
},
47 { u
"bg", RTL_TEXTENCODING_MS_1251
},
48 { u
"mk", RTL_TEXTENCODING_MS_1251
},
49 { u
"ru", RTL_TEXTENCODING_MS_1251
},
50 { u
"sr", RTL_TEXTENCODING_MS_1251
},
51 { u
"uk", RTL_TEXTENCODING_MS_1251
},
52 { u
"es", RTL_TEXTENCODING_MS_1252
},
53 { u
"el", RTL_TEXTENCODING_MS_1253
},
54 { u
"tr", RTL_TEXTENCODING_MS_1254
},
55 { u
"he", RTL_TEXTENCODING_MS_1255
},
56 { u
"ar", RTL_TEXTENCODING_MS_1256
},
57 { u
"et", RTL_TEXTENCODING_MS_1257
},
58 { u
"lt", RTL_TEXTENCODING_MS_1257
},
59 { u
"lv", RTL_TEXTENCODING_MS_1257
},
60 { u
"vi", RTL_TEXTENCODING_MS_1258
},
63 for (auto& def
: aLanguageTab
)
65 if (sLanguage
.startsWithIgnoreAsciiCase(def
.msLangStr
))
66 return def
.meTextEncoding
;
69 return RTL_TEXTENCODING_MS_1252
;
72 /* ----------------------------------------------------------------------- */
74 // See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756
75 // See http://shapelib.maptools.org/codepage.html
76 rtl_TextEncoding
impl_getWinTextEncodingFromLangStrOEM(const OUString
& sLanguage
)
78 static constexpr LangEncodingDef aLanguageTab
[] =
80 { u
"de", RTL_TEXTENCODING_IBM_437
}, // OEM United States
81 { u
"en-us", RTL_TEXTENCODING_IBM_437
}, // OEM United States
82 { u
"fi", RTL_TEXTENCODING_IBM_437
}, // OEM United States
83 { u
"fr-ca", RTL_TEXTENCODING_IBM_863
}, // OEM French Canadian; French Canadian (DOS)
84 { u
"fr", RTL_TEXTENCODING_IBM_437
}, // OEM United States
85 { u
"it", RTL_TEXTENCODING_IBM_437
}, // OEM United States
86 { u
"nl", RTL_TEXTENCODING_IBM_437
}, // OEM United States
87 { u
"sv", RTL_TEXTENCODING_IBM_437
}, // OEM United States
88 { u
"el", RTL_TEXTENCODING_IBM_737
}, // OEM Greek (formerly 437G); Greek (DOS)
89 { u
"et", RTL_TEXTENCODING_IBM_775
}, // OEM Baltic; Baltic (DOS)
90 { u
"lt", RTL_TEXTENCODING_IBM_775
}, // OEM Baltic; Baltic (DOS)
91 { u
"lv", RTL_TEXTENCODING_IBM_775
}, // OEM Baltic; Baltic (DOS)
92 { u
"en", RTL_TEXTENCODING_IBM_850
}, // OEM Multilingual Latin 1; Western European (DOS)
93 { u
"bs", RTL_TEXTENCODING_IBM_852
}, // OEM Latin 2; Central European (DOS)
94 { u
"cs", RTL_TEXTENCODING_IBM_852
}, // OEM Latin 2; Central European (DOS)
95 { u
"hr", RTL_TEXTENCODING_IBM_852
}, // OEM Latin 2; Central European (DOS)
96 { u
"hu", RTL_TEXTENCODING_IBM_852
}, // OEM Latin 2; Central European (DOS)
97 { u
"pl", RTL_TEXTENCODING_IBM_852
}, // OEM Latin 2; Central European (DOS)
98 { u
"ro", RTL_TEXTENCODING_IBM_852
}, // OEM Latin 2; Central European (DOS)
99 { u
"sk", RTL_TEXTENCODING_IBM_852
}, // OEM Latin 2; Central European (DOS)
100 { u
"sl", RTL_TEXTENCODING_IBM_852
}, // OEM Latin 2; Central European (DOS)
101 // { "sr", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS)
102 { u
"bg", RTL_TEXTENCODING_IBM_855
}, // OEM Cyrillic (primarily Russian)
103 { u
"mk", RTL_TEXTENCODING_IBM_855
}, // OEM Cyrillic (primarily Russian)
104 { u
"sr", RTL_TEXTENCODING_IBM_855
}, // OEM Cyrillic (primarily Russian)
105 { u
"tr", RTL_TEXTENCODING_IBM_857
}, // OEM Turkish; Turkish (DOS)
106 { u
"pt", RTL_TEXTENCODING_IBM_860
}, // OEM Portuguese; Portuguese (DOS)
107 { u
"is", RTL_TEXTENCODING_IBM_861
}, // OEM Icelandic; Icelandic (DOS)
108 { u
"he", RTL_TEXTENCODING_IBM_862
}, // OEM Hebrew; Hebrew (DOS)
109 { u
"ar", RTL_TEXTENCODING_IBM_864
}, // OEM Arabic; Arabic (864)
110 { u
"da", RTL_TEXTENCODING_IBM_865
}, // OEM Nordic; Nordic (DOS)
111 { u
"nn", RTL_TEXTENCODING_IBM_865
}, // OEM Nordic; Nordic (DOS)
112 { u
"be", RTL_TEXTENCODING_IBM_866
}, // OEM Russian; Cyrillic (DOS)
113 { u
"ru", RTL_TEXTENCODING_IBM_866
}, // OEM Russian; Cyrillic (DOS)
114 { u
"uk", RTL_TEXTENCODING_IBM_866
}, // OEM Russian; Cyrillic (DOS)
115 { u
"th", RTL_TEXTENCODING_MS_874
}, // ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
116 { u
"ja", RTL_TEXTENCODING_MS_932
}, // ANSI/OEM Japanese; Japanese (Shift-JIS)
117 { u
"zh-cn", RTL_TEXTENCODING_MS_936
}, // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
118 { u
"ko", RTL_TEXTENCODING_MS_949
}, // ANSI/OEM Korean (Unified Hangul Code)
119 { u
"zh", RTL_TEXTENCODING_MS_950
}, // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
120 { u
"vi", RTL_TEXTENCODING_MS_1258
}, // ANSI/OEM Vietnamese; Vietnamese (Windows)
123 for (auto& def
: aLanguageTab
)
125 if (sLanguage
.startsWithIgnoreAsciiCase(def
.msLangStr
))
126 return def
.meTextEncoding
;
129 return RTL_TEXTENCODING_IBM_850
;
134 rtl_TextEncoding
utl_getWinTextEncodingFromLangStr(const OUString
& sLanguage
, bool bOEM
)
137 impl_getWinTextEncodingFromLangStrOEM(sLanguage
) :
138 impl_getWinTextEncodingFromLangStrANSI(sLanguage
);
141 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */