1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: tcvtbyte.c,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
32 #include "rtl/textcvt.h"
34 /* ======================================================================= */
36 #define IMPL_MAX_REPLACECHAR 5
38 sal_uInt16
ImplGetReplaceChar(sal_Unicode c
);
40 sal_uInt16
const * ImplGetReplaceString(sal_Unicode c
);
42 /* ----------------------------------------------------------------------- */
47 sal_uInt16 mnReplaceChar
;
48 } ImplReplaceCharData
;
50 static ImplReplaceCharData
const aImplRepCharTab
[] =
52 { 0x00A0, 0x0020 }, /* NO-BREAK-SPACE */
53 { 0x00A1, 0x0021 }, /* INVERTED EXCLAMATION MARK */
54 { 0x00B7, 0x0045 }, /* MIDDLE DOT */
55 { 0x00BF, 0x003F }, /* INVERTED QUESTION MARK */
56 { 0x00D7, 0x002A }, /* MULTIPLIKATION SIGN */
57 { 0x00F7, 0x002F }, /* DIVISION SIGN */
58 { 0x2000, 0x0020 }, /* EN QUAD */
59 { 0x2001, 0x0020 }, /* EM QUAD */
60 { 0x2002, 0x0020 }, /* EN SPACE */
61 { 0x2003, 0x0020 }, /* EM SPACE */
62 { 0x2004, 0x0020 }, /* THREE-PER-EM SPACE */
63 { 0x2005, 0x0020 }, /* FOUR-PER-EM SPACE */
64 { 0x2006, 0x0020 }, /* SIX-PER-EM SPACE */
65 { 0x2007, 0x0020 }, /* FIGURE SPACE */
66 { 0x2008, 0x0020 }, /* PUNCTATION SPACE */
67 { 0x2009, 0x0020 }, /* THIN SPACE */
68 { 0x200A, 0x0020 }, /* HAIR SPACE */
69 { 0x2010, 0x002D }, /* HYPHEN */
70 { 0x2011, 0x002D }, /* NON-BREAKING HYPHEN */
71 { 0x2012, 0x002D }, /* FIGURE DASH */
72 { 0x2013, 0x002D }, /* EN DASH */
73 { 0x2014, 0x002D }, /* EM DASH */
74 { 0x2015, 0x002D }, /* HORIZONTAL BAR */
75 { 0x2018, 0x0027 }, /* LEFT SINGLE QUOTATION MARK */
76 { 0x2019, 0x0027 }, /* RIGHT SINGLE QUOTATION MARK */
77 { 0x201A, 0x002C }, /* SINGLE LOW-9 QUOTATION MARK */
78 { 0x201B, 0x0027 }, /* SINGLE HIGH-RESERVED-9 QUOTATION MARK */
79 { 0x201C, 0x0022 }, /* LEFT DOUBLE QUOTATION MARK */
80 { 0x201D, 0x0022 }, /* RIGHT DOUBLE QUOTATION MARK */
81 { 0x201E, 0x0022 }, /* DOUBLE LOW-9 QUOTATION MARK */
82 { 0x201F, 0x0022 }, /* DOUBLE HIGH-RESERVED-9 QUOTATION MARK */
83 { 0x2022, 0x002D }, /* BULLET */
84 { 0x2023, 0x002D }, /* TRIANGULAR BULLET */
85 { 0x2024, 0x002D }, /* ONE DOT LEADER */
86 { 0x2027, 0x002D }, /* HYPHENATION POINT */
87 { 0x2028, 0x000A }, /* LINE SEPARATOR */
88 { 0x2029, 0x000D }, /* PARAGRAPH SEPARATOR */
89 { 0x2032, 0x0027 }, /* PRIME */
90 { 0x2033, 0x0022 }, /* DOUBLE PRIME */
91 { 0x2035, 0x0027 }, /* RESERVED PRIME */
92 { 0x2036, 0x0022 }, /* RESERVED DOUBLE PRIME */
93 { 0x2039, 0x003C }, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
94 { 0x203A, 0x003E }, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
95 { 0x2043, 0x002D }, /* HYPHEN BULLET */
96 { 0x2044, 0x002F }, /* FRACTION SLASH */
97 { 0x2160, 0x0049 }, /* ROMAN NUMERAL ONE */
98 { 0x2164, 0x0056 }, /* ROMAN NUMERAL FIVE */
99 { 0x2169, 0x0058 }, /* ROMAN NUMERAL TEN */
100 { 0x216C, 0x004C }, /* ROMAN NUMERAL FIFTY */
101 { 0x216D, 0x0043 }, /* ROMAN NUMERAL ONE HUNDRED */
102 { 0x216E, 0x0044 }, /* ROMAN NUMERAL FIVE HUNDRED */
103 { 0x216F, 0x004D }, /* ROMAN NUMERAL ONE THOUSAND */
104 { 0x2170, 0x0069 }, /* SMALL ROMAN NUMERAL ONE */
105 { 0x2174, 0x0076 }, /* SMALL ROMAN NUMERAL FIVE */
106 { 0x2179, 0x0078 }, /* SMALL ROMAN NUMERAL TEN */
107 { 0x217C, 0x006C }, /* SMALL ROMAN NUMERAL FIFTY */
108 { 0x217D, 0x0063 }, /* SMALL ROMAN NUMERAL ONE HUNDRED */
109 { 0x217E, 0x0064 }, /* SMALL ROMAN NUMERAL FIVE HUNDRED */
110 { 0x217F, 0x006D }, /* SMALL ROMAN NUMERAL ONE THOUSAND */
111 { 0x2215, 0x002F }, /* DIVISION SLASH */
112 { 0x2217, 0x002A }, /* ASTERIX OPERATOR */
113 { 0xFF00, 0x0020 }, /* FULLWIDTH ASCII FORMS */
114 { 0xFF01, 0x0021 }, /* FULLWIDTH ASCII FORMS */
115 { 0xFF02, 0x0022 }, /* FULLWIDTH ASCII FORMS*/
116 { 0xFF03, 0x0023 }, /* FULLWIDTH ASCII FORMS */
117 { 0xFF04, 0x0024 }, /* FULLWIDTH ASCII FORMS*/
118 { 0xFF05, 0x0025 }, /* FULLWIDTH ASCII FORMS */
119 { 0xFF06, 0x0026 }, /* FULLWIDTH ASCII FORMS*/
120 { 0xFF07, 0x0027 }, /* FULLWIDTH ASCII FORMS */
121 { 0xFF08, 0x0028 }, /* FULLWIDTH ASCII FORMS*/
122 { 0xFF09, 0x0029 }, /* FULLWIDTH ASCII FORMS */
123 { 0xFF0A, 0x002A }, /* FULLWIDTH ASCII FORMS*/
124 { 0xFF0B, 0x002B }, /* FULLWIDTH ASCII FORMS */
125 { 0xFF0C, 0x002C }, /* FULLWIDTH ASCII FORMS*/
126 { 0xFF0D, 0x002D }, /* FULLWIDTH ASCII FORMS */
127 { 0xFF0E, 0x002E }, /* FULLWIDTH ASCII FORMS*/
128 { 0xFF0F, 0x002F }, /* FULLWIDTH ASCII FORMS */
129 { 0xFF10, 0x0030 }, /* FULLWIDTH ASCII FORMS */
130 { 0xFF11, 0x0031 }, /* FULLWIDTH ASCII FORMS */
131 { 0xFF12, 0x0032 }, /* FULLWIDTH ASCII FORMS*/
132 { 0xFF13, 0x0033 }, /* FULLWIDTH ASCII FORMS */
133 { 0xFF14, 0x0034 }, /* FULLWIDTH ASCII FORMS*/
134 { 0xFF15, 0x0035 }, /* FULLWIDTH ASCII FORMS */
135 { 0xFF16, 0x0036 }, /* FULLWIDTH ASCII FORMS*/
136 { 0xFF17, 0x0037 }, /* FULLWIDTH ASCII FORMS */
137 { 0xFF18, 0x0038 }, /* FULLWIDTH ASCII FORMS*/
138 { 0xFF19, 0x0039 }, /* FULLWIDTH ASCII FORMS */
139 { 0xFF1A, 0x003A }, /* FULLWIDTH ASCII FORMS*/
140 { 0xFF1B, 0x003B }, /* FULLWIDTH ASCII FORMS */
141 { 0xFF1C, 0x003C }, /* FULLWIDTH ASCII FORMS*/
142 { 0xFF1D, 0x003D }, /* FULLWIDTH ASCII FORMS */
143 { 0xFF1E, 0x003E }, /* FULLWIDTH ASCII FORMS*/
144 { 0xFF1F, 0x003F }, /* FULLWIDTH ASCII FORMS */
145 { 0xFF20, 0x0040 }, /* FULLWIDTH ASCII FORMS */
146 { 0xFF21, 0x0041 }, /* FULLWIDTH ASCII FORMS */
147 { 0xFF22, 0x0042 }, /* FULLWIDTH ASCII FORMS*/
148 { 0xFF23, 0x0043 }, /* FULLWIDTH ASCII FORMS */
149 { 0xFF24, 0x0044 }, /* FULLWIDTH ASCII FORMS*/
150 { 0xFF25, 0x0045 }, /* FULLWIDTH ASCII FORMS */
151 { 0xFF26, 0x0046 }, /* FULLWIDTH ASCII FORMS*/
152 { 0xFF27, 0x0047 }, /* FULLWIDTH ASCII FORMS */
153 { 0xFF28, 0x0048 }, /* FULLWIDTH ASCII FORMS*/
154 { 0xFF29, 0x0049 }, /* FULLWIDTH ASCII FORMS */
155 { 0xFF2A, 0x004A }, /* FULLWIDTH ASCII FORMS*/
156 { 0xFF2B, 0x004B }, /* FULLWIDTH ASCII FORMS */
157 { 0xFF2C, 0x004C }, /* FULLWIDTH ASCII FORMS*/
158 { 0xFF2D, 0x004D }, /* FULLWIDTH ASCII FORMS */
159 { 0xFF2E, 0x004E }, /* FULLWIDTH ASCII FORMS*/
160 { 0xFF2F, 0x004F }, /* FULLWIDTH ASCII FORMS */
161 { 0xFF30, 0x0050 }, /* FULLWIDTH ASCII FORMS */
162 { 0xFF31, 0x0051 }, /* FULLWIDTH ASCII FORMS */
163 { 0xFF32, 0x0052 }, /* FULLWIDTH ASCII FORMS*/
164 { 0xFF33, 0x0053 }, /* FULLWIDTH ASCII FORMS */
165 { 0xFF34, 0x0054 }, /* FULLWIDTH ASCII FORMS*/
166 { 0xFF35, 0x0055 }, /* FULLWIDTH ASCII FORMS */
167 { 0xFF36, 0x0056 }, /* FULLWIDTH ASCII FORMS*/
168 { 0xFF37, 0x0057 }, /* FULLWIDTH ASCII FORMS */
169 { 0xFF38, 0x0058 }, /* FULLWIDTH ASCII FORMS*/
170 { 0xFF39, 0x0059 }, /* FULLWIDTH ASCII FORMS */
171 { 0xFF3A, 0x005A }, /* FULLWIDTH ASCII FORMS*/
172 { 0xFF3B, 0x005B }, /* FULLWIDTH ASCII FORMS */
173 { 0xFF3C, 0x005C }, /* FULLWIDTH ASCII FORMS*/
174 { 0xFF3D, 0x005D }, /* FULLWIDTH ASCII FORMS */
175 { 0xFF3E, 0x005E }, /* FULLWIDTH ASCII FORMS*/
176 { 0xFF3F, 0x005F }, /* FULLWIDTH ASCII FORMS */
177 { 0xFF40, 0x0060 }, /* FULLWIDTH ASCII FORMS */
178 { 0xFF41, 0x0061 }, /* FULLWIDTH ASCII FORMS */
179 { 0xFF42, 0x0062 }, /* FULLWIDTH ASCII FORMS*/
180 { 0xFF43, 0x0063 }, /* FULLWIDTH ASCII FORMS */
181 { 0xFF44, 0x0064 }, /* FULLWIDTH ASCII FORMS*/
182 { 0xFF45, 0x0065 }, /* FULLWIDTH ASCII FORMS */
183 { 0xFF46, 0x0066 }, /* FULLWIDTH ASCII FORMS*/
184 { 0xFF47, 0x0067 }, /* FULLWIDTH ASCII FORMS */
185 { 0xFF48, 0x0068 }, /* FULLWIDTH ASCII FORMS*/
186 { 0xFF49, 0x0069 }, /* FULLWIDTH ASCII FORMS */
187 { 0xFF4A, 0x006A }, /* FULLWIDTH ASCII FORMS*/
188 { 0xFF4B, 0x006B }, /* FULLWIDTH ASCII FORMS */
189 { 0xFF4C, 0x006C }, /* FULLWIDTH ASCII FORMS*/
190 { 0xFF4D, 0x006D }, /* FULLWIDTH ASCII FORMS */
191 { 0xFF4E, 0x006E }, /* FULLWIDTH ASCII FORMS*/
192 { 0xFF4F, 0x006F }, /* FULLWIDTH ASCII FORMS */
193 { 0xFF50, 0x0070 }, /* FULLWIDTH ASCII FORMS */
194 { 0xFF51, 0x0071 }, /* FULLWIDTH ASCII FORMS */
195 { 0xFF52, 0x0072 }, /* FULLWIDTH ASCII FORMS*/
196 { 0xFF53, 0x0073 }, /* FULLWIDTH ASCII FORMS */
197 { 0xFF54, 0x0074 }, /* FULLWIDTH ASCII FORMS*/
198 { 0xFF55, 0x0075 }, /* FULLWIDTH ASCII FORMS */
199 { 0xFF56, 0x0076 }, /* FULLWIDTH ASCII FORMS*/
200 { 0xFF57, 0x0077 }, /* FULLWIDTH ASCII FORMS */
201 { 0xFF58, 0x0078 }, /* FULLWIDTH ASCII FORMS*/
202 { 0xFF59, 0x0079 }, /* FULLWIDTH ASCII FORMS */
203 { 0xFF5A, 0x007A }, /* FULLWIDTH ASCII FORMS*/
204 { 0xFF5B, 0x007B }, /* FULLWIDTH ASCII FORMS */
205 { 0xFF5C, 0x007C }, /* FULLWIDTH ASCII FORMS*/
206 { 0xFF5D, 0x007D }, /* FULLWIDTH ASCII FORMS */
207 { 0xFF5E, 0x007E }, /* FULLWIDTH ASCII FORMS*/
208 { 0xFF5F, 0x007F }, /* FULLWIDTH ASCII FORMS */
209 { 0xFF61, 0x3002 }, /* HALFWIDTH KATAKANA FORMS */
210 { 0xFF62, 0x300C }, /* HALFWIDTH KATAKANA FORMS */
211 { 0xFF63, 0x300D }, /* HALFWIDTH KATAKANA FORMS */
212 { 0xFF64, 0x3001 }, /* HALFWIDTH KATAKANA FORMS */
213 { 0xFF65, 0x30FB }, /* HALFWIDTH KATAKANA FORMS */
214 { 0xFF66, 0x30F2 }, /* HALFWIDTH KATAKANA FORMS */
215 { 0xFF67, 0x30A1 }, /* HALFWIDTH KATAKANA FORMS */
216 { 0xFF68, 0x30A3 }, /* HALFWIDTH KATAKANA FORMS */
217 { 0xFF69, 0x30A5 }, /* HALFWIDTH KATAKANA FORMS */
218 { 0xFF6A, 0x30A7 }, /* HALFWIDTH KATAKANA FORMS */
219 { 0xFF6B, 0x30A9 }, /* HALFWIDTH KATAKANA FORMS */
220 { 0xFF6C, 0x30E3 }, /* HALFWIDTH KATAKANA FORMS */
221 { 0xFF6D, 0x30E5 }, /* HALFWIDTH KATAKANA FORMS */
222 { 0xFF6E, 0x30E7 }, /* HALFWIDTH KATAKANA FORMS */
223 { 0xFF6F, 0x30C3 }, /* HALFWIDTH KATAKANA FORMS */
224 { 0xFF70, 0x30FC }, /* HALFWIDTH KATAKANA FORMS */
225 { 0xFF71, 0x30A2 }, /* HALFWIDTH KATAKANA FORMS */
226 { 0xFF72, 0x30A4 }, /* HALFWIDTH KATAKANA FORMS */
227 { 0xFF73, 0x30A6 }, /* HALFWIDTH KATAKANA FORMS */
228 { 0xFF74, 0x30A8 }, /* HALFWIDTH KATAKANA FORMS */
229 { 0xFF75, 0x30AA }, /* HALFWIDTH KATAKANA FORMS */
230 { 0xFF76, 0x30AB }, /* HALFWIDTH KATAKANA FORMS */
231 { 0xFF77, 0x30AD }, /* HALFWIDTH KATAKANA FORMS */
232 { 0xFF78, 0x30AF }, /* HALFWIDTH KATAKANA FORMS */
233 { 0xFF79, 0x30B1 }, /* HALFWIDTH KATAKANA FORMS */
234 { 0xFF7A, 0x30B3 }, /* HALFWIDTH KATAKANA FORMS */
235 { 0xFF7B, 0x30B5 }, /* HALFWIDTH KATAKANA FORMS */
236 { 0xFF7C, 0x30B7 }, /* HALFWIDTH KATAKANA FORMS */
237 { 0xFF7D, 0x30B9 }, /* HALFWIDTH KATAKANA FORMS */
238 { 0xFF7E, 0x30BB }, /* HALFWIDTH KATAKANA FORMS */
239 { 0xFF7F, 0x30BD }, /* HALFWIDTH KATAKANA FORMS */
240 { 0xFF80, 0x30BF }, /* HALFWIDTH KATAKANA FORMS */
241 { 0xFF81, 0x30C1 }, /* HALFWIDTH KATAKANA FORMS */
242 { 0xFF82, 0x30C4 }, /* HALFWIDTH KATAKANA FORMS */
243 { 0xFF83, 0x30C6 }, /* HALFWIDTH KATAKANA FORMS */
244 { 0xFF84, 0x30C8 }, /* HALFWIDTH KATAKANA FORMS */
245 { 0xFF85, 0x30CA }, /* HALFWIDTH KATAKANA FORMS */
246 { 0xFF86, 0x30CB }, /* HALFWIDTH KATAKANA FORMS */
247 { 0xFF87, 0x30CC }, /* HALFWIDTH KATAKANA FORMS */
248 { 0xFF88, 0x30CD }, /* HALFWIDTH KATAKANA FORMS */
249 { 0xFF89, 0x30CE }, /* HALFWIDTH KATAKANA FORMS */
250 { 0xFF8A, 0x30CF }, /* HALFWIDTH KATAKANA FORMS */
251 { 0xFF8B, 0x30D2 }, /* HALFWIDTH KATAKANA FORMS */
252 { 0xFF8C, 0x30D5 }, /* HALFWIDTH KATAKANA FORMS */
253 { 0xFF8D, 0x30D8 }, /* HALFWIDTH KATAKANA FORMS */
254 { 0xFF8E, 0x30DB }, /* HALFWIDTH KATAKANA FORMS */
255 { 0xFF8F, 0x30DE }, /* HALFWIDTH KATAKANA FORMS */
256 { 0xFF90, 0x30DF }, /* HALFWIDTH KATAKANA FORMS */
257 { 0xFF91, 0x30E0 }, /* HALFWIDTH KATAKANA FORMS */
258 { 0xFF92, 0x30E1 }, /* HALFWIDTH KATAKANA FORMS */
259 { 0xFF93, 0x30E2 }, /* HALFWIDTH KATAKANA FORMS */
260 { 0xFF94, 0x30E4 }, /* HALFWIDTH KATAKANA FORMS */
261 { 0xFF95, 0x30E6 }, /* HALFWIDTH KATAKANA FORMS */
262 { 0xFF96, 0x30E8 }, /* HALFWIDTH KATAKANA FORMS */
263 { 0xFF97, 0x30E9 }, /* HALFWIDTH KATAKANA FORMS */
264 { 0xFF98, 0x30EA }, /* HALFWIDTH KATAKANA FORMS */
265 { 0xFF99, 0x30EB }, /* HALFWIDTH KATAKANA FORMS */
266 { 0xFF9A, 0x30EC }, /* HALFWIDTH KATAKANA FORMS */
267 { 0xFF9B, 0x30ED }, /* HALFWIDTH KATAKANA FORMS */
268 { 0xFF9C, 0x30EF }, /* HALFWIDTH KATAKANA FORMS */
269 { 0xFF9D, 0x30F3 }, /* HALFWIDTH KATAKANA FORMS */
270 { 0xFF9E, 0x309B }, /* HALFWIDTH KATAKANA FORMS */
271 { 0xFF9F, 0x309C }, /* HALFWIDTH KATAKANA FORMS */
272 { 0xFFA0, 0x3164 }, /* HALFWIDTH HANGUL FORMS */
273 { 0xFFA1, 0x3131 }, /* HALFWIDTH HANGUL FORMS */
274 { 0xFFA2, 0x3132 }, /* HALFWIDTH HANGUL FORMS */
275 { 0xFFA3, 0x3133 }, /* HALFWIDTH HANGUL FORMS */
276 { 0xFFA4, 0x3134 }, /* HALFWIDTH HANGUL FORMS */
277 { 0xFFA5, 0x3135 }, /* HALFWIDTH HANGUL FORMS */
278 { 0xFFA6, 0x3136 }, /* HALFWIDTH HANGUL FORMS */
279 { 0xFFA7, 0x3137 }, /* HALFWIDTH HANGUL FORMS */
280 { 0xFFA8, 0x3138 }, /* HALFWIDTH HANGUL FORMS */
281 { 0xFFA9, 0x3139 }, /* HALFWIDTH HANGUL FORMS */
282 { 0xFFAA, 0x313A }, /* HALFWIDTH HANGUL FORMS */
283 { 0xFFAB, 0x313B }, /* HALFWIDTH HANGUL FORMS */
284 { 0xFFAC, 0x313C }, /* HALFWIDTH HANGUL FORMS */
285 { 0xFFAD, 0x313D }, /* HALFWIDTH HANGUL FORMS */
286 { 0xFFAE, 0x313E }, /* HALFWIDTH HANGUL FORMS */
287 { 0xFFAF, 0x313F }, /* HALFWIDTH HANGUL FORMS */
288 { 0xFFB0, 0x3140 }, /* HALFWIDTH HANGUL FORMS */
289 { 0xFFB1, 0x3141 }, /* HALFWIDTH HANGUL FORMS */
290 { 0xFFB2, 0x3142 }, /* HALFWIDTH HANGUL FORMS */
291 { 0xFFB3, 0x3143 }, /* HALFWIDTH HANGUL FORMS */
292 { 0xFFB4, 0x3144 }, /* HALFWIDTH HANGUL FORMS */
293 { 0xFFB5, 0x3145 }, /* HALFWIDTH HANGUL FORMS */
294 { 0xFFB6, 0x3146 }, /* HALFWIDTH HANGUL FORMS */
295 { 0xFFB7, 0x3147 }, /* HALFWIDTH HANGUL FORMS */
296 { 0xFFB8, 0x3148 }, /* HALFWIDTH HANGUL FORMS */
297 { 0xFFB9, 0x3149 }, /* HALFWIDTH HANGUL FORMS */
298 { 0xFFBA, 0x314A }, /* HALFWIDTH HANGUL FORMS */
299 { 0xFFBB, 0x314B }, /* HALFWIDTH HANGUL FORMS */
300 { 0xFFBC, 0x314C }, /* HALFWIDTH HANGUL FORMS */
301 { 0xFFBD, 0x314D }, /* HALFWIDTH HANGUL FORMS */
302 { 0xFFBE, 0x314E }, /* HALFWIDTH HANGUL FORMS */
303 { 0xFFC2, 0x314F }, /* HALFWIDTH HANGUL FORMS */
304 { 0xFFC3, 0x3150 }, /* HALFWIDTH HANGUL FORMS */
305 { 0xFFC4, 0x3151 }, /* HALFWIDTH HANGUL FORMS */
306 { 0xFFC5, 0x3152 }, /* HALFWIDTH HANGUL FORMS */
307 { 0xFFC6, 0x3153 }, /* HALFWIDTH HANGUL FORMS */
308 { 0xFFC7, 0x3154 }, /* HALFWIDTH HANGUL FORMS */
309 { 0xFFCA, 0x3155 }, /* HALFWIDTH HANGUL FORMS */
310 { 0xFFCB, 0x3156 }, /* HALFWIDTH HANGUL FORMS */
311 { 0xFFCC, 0x3157 }, /* HALFWIDTH HANGUL FORMS */
312 { 0xFFCD, 0x3158 }, /* HALFWIDTH HANGUL FORMS */
313 { 0xFFCE, 0x3159 }, /* HALFWIDTH HANGUL FORMS */
314 { 0xFFCF, 0x315A }, /* HALFWIDTH HANGUL FORMS */
315 { 0xFFD2, 0x315B }, /* HALFWIDTH HANGUL FORMS */
316 { 0xFFD3, 0x315C }, /* HALFWIDTH HANGUL FORMS */
317 { 0xFFD4, 0x315D }, /* HALFWIDTH HANGUL FORMS */
318 { 0xFFD5, 0x315E }, /* HALFWIDTH HANGUL FORMS */
319 { 0xFFD6, 0x315F }, /* HALFWIDTH HANGUL FORMS */
320 { 0xFFD7, 0x3160 }, /* HALFWIDTH HANGUL FORMS */
321 { 0xFFDA, 0x3161 }, /* HALFWIDTH HANGUL FORMS */
322 { 0xFFDB, 0x3162 }, /* HALFWIDTH HANGUL FORMS */
323 { 0xFFDC, 0x3163 }, /* HALFWIDTH HANGUL FORMS */
324 { 0xFFE0, 0x00A2 }, /* FULLWIDTH CENT SIGN */
325 { 0xFFE1, 0x00A3 }, /* FULLWIDTH POUND SIGN */
326 { 0xFFE2, 0x00AC }, /* FULLWIDTH NOT SIGN */
327 { 0xFFE3, 0x00AF }, /* FULLWIDTH MACRON */
328 { 0xFFE4, 0x00A6 }, /* FULLWIDTH BROKEN BAR */
329 { 0xFFE5, 0x00A5 }, /* FULLWIDTH YEN SIGN */
330 { 0xFFE6, 0x20A9 }, /* FULLWIDTH WON SIGN */
331 { 0xFFE8, 0x2502 }, /* HALFWIDTH FORMS LIGHT VERTICAL */
332 { 0xFFE9, 0x2190 }, /* HALFWIDTH LEFTWARDS ARROW */
333 { 0xFFEA, 0x2191 }, /* HALFWIDTH UPWARDS ARROW */
334 { 0xFFEB, 0x2192 }, /* HALFWIDTH RIGHTWARDS ARROW */
335 { 0xFFEC, 0x2193 }, /* HALFWIDTH DOWNWARDS ARROW */
336 { 0xFFED, 0x25A0 }, /* HALFWIDTH BLACK SQUARE */
337 { 0xFFEE, 0x25CB }, /* HALFWIDTH WHITE CIRCLE */
338 { 0xFFFD, 0x003F } /* REPLACEMENT CHARACTER */
341 sal_uInt16
ImplGetReplaceChar( sal_Unicode c
)
346 sal_uInt16 nCompareChar
;
347 const ImplReplaceCharData
* pCharData
;
350 nHigh
= (sizeof( aImplRepCharTab
)/sizeof( ImplReplaceCharData
))-1;
353 nMid
= (nLow
+nHigh
)/2;
354 pCharData
= aImplRepCharTab
+nMid
;
355 nCompareChar
= pCharData
->mnUniChar
;
356 if ( c
< nCompareChar
)
364 if ( c
> nCompareChar
)
367 return pCharData
->mnReplaceChar
;
370 while ( nLow
<= nHigh
);
375 /* ----------------------------------------------------------------------- */
379 sal_uInt16 mnUniChar
;
380 sal_uInt16 maReplaceChars
[IMPL_MAX_REPLACECHAR
];
381 } ImplReplaceCharStrData
;
383 static ImplReplaceCharStrData
const aImplRepCharStrTab
[] =
385 { 0x00A9, { 0x0028, 0x0063, 0x0029, 0x0000, 0x0000 } }, /* COPYRIGHT SIGN */
386 { 0x00AB, { 0x003C, 0x003C, 0x0000, 0x0000, 0x0000 } }, /* LEFT-POINTING-DOUBLE ANGLE QUOTATION MARK */
387 { 0x0AE0, { 0x0028, 0x0072, 0x0029, 0x0000, 0x0000 } }, /* REGISTERED SIGN */
388 { 0x00BB, { 0x003E, 0x003E, 0x0000, 0x0000, 0x0000 } }, /* RIGHT-POINTING-DOUBLE ANGLE QUOTATION MARK */
389 { 0x00BC, { 0x0031, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE QUARTER */
390 { 0x00BD, { 0x0031, 0x002F, 0x0032, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE HALF */
391 { 0x00BE, { 0x0033, 0x002F, 0x0034, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE QUARTERS */
392 { 0x00C6, { 0x0041, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LETTER AE */
393 { 0x00E6, { 0x0061, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LETTER AE */
394 { 0x0152, { 0x004F, 0x0045, 0x0000, 0x0000, 0x0000 } }, /* LATIN CAPITAL LIGATURE OE */
395 { 0x0153, { 0x006F, 0x0065, 0x0000, 0x0000, 0x0000 } }, /* LATIN SMALL LIGATURE OE */
396 { 0x2025, { 0x002E, 0x002E, 0x0000, 0x0000, 0x0000 } }, /* TWO DOT LEADER */
397 { 0x2026, { 0x002E, 0x002E, 0x002E, 0x0000, 0x0000 } }, /* HORIZONTAL ELLIPSES */
398 { 0x2034, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* TRIPPLE PRIME */
399 { 0x2037, { 0x0027, 0x0027, 0x0027, 0x0000, 0x0000 } }, /* RESERVED TRIPPLE PRIME */
400 { 0x20AC, { 0x0045, 0x0055, 0x0052, 0x0000, 0x0000 } }, /* EURO SIGN */
401 { 0x2122, { 0x0028, 0x0074, 0x006D, 0x0029, 0x0000 } }, /* TRADE MARK SIGN */
402 { 0x2153, { 0x0031, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE THIRD */
403 { 0x2154, { 0x0032, 0x002F, 0x0033, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO THIRD */
404 { 0x2155, { 0x0031, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE FIFTH */
405 { 0x2156, { 0x0032, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION TWO FIFTH */
406 { 0x2157, { 0x0033, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE FIFTH */
407 { 0x2158, { 0x0034, 0x002F, 0x0035, 0x0000, 0x0000 } }, /* VULGAR FRACTION FOUR FIFTH */
408 { 0x2159, { 0x0031, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE SIXTH */
409 { 0x215A, { 0x0035, 0x002F, 0x0036, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE SIXTH */
410 { 0x215B, { 0x0031, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION ONE EIGHTH */
411 { 0x215C, { 0x0033, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION THREE EIGHTH */
412 { 0x215D, { 0x0035, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION FIVE EIGHTH */
413 { 0x215E, { 0x0037, 0x002F, 0x0038, 0x0000, 0x0000 } }, /* VULGAR FRACTION SEVEN EIGHTH */
414 { 0x215F, { 0x0031, 0x002F, 0x0000, 0x0000, 0x0000 } }, /* FRACTION NUMERATOR ONE */
415 { 0x2161, { 0x0049, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWO */
416 { 0x2162, { 0x0049, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL THREE */
417 { 0x2163, { 0x0049, 0x0056, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL FOUR */
418 { 0x2165, { 0x0056, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SIX */
419 { 0x2166, { 0x0056, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL SEVEN */
420 { 0x2168, { 0x0056, 0x0049, 0x0049, 0x0049, 0x0000 } }, /* ROMAN NUMERAL EIGHT */
421 { 0x2169, { 0x0049, 0x0058, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL NINE */
422 { 0x216A, { 0x0058, 0x0049, 0x0000, 0x0000, 0x0000 } }, /* ROMAN NUMERAL ELEVEN */
423 { 0x216B, { 0x0058, 0x0049, 0x0049, 0x0000, 0x0000 } }, /* ROMAN NUMERAL TWELVE */
424 { 0x2171, { 0x0069, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL TWO */
425 { 0x2172, { 0x0069, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL THREE */
426 { 0x2173, { 0x0069, 0x0076, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL FOUR */
427 { 0x2175, { 0x0076, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SIX */
428 { 0x2176, { 0x0076, 0x0069, 0x0069, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL SEVEN */
429 { 0x2178, { 0x0076, 0x0069, 0x0069, 0x0069, 0x0000 } }, /* SMALL ROMAN NUMERAL EIGHT */
430 { 0x2179, { 0x0069, 0x0078, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL NINE */
431 { 0x217A, { 0x0078, 0x0069, 0x0000, 0x0000, 0x0000 } }, /* SMALL ROMAN NUMERAL ELEVEN */
432 { 0x217B, { 0x0058, 0x0069, 0x0069, 0x0000, 0x0000 } } /* SMALL ROMAN NUMERAL TWELVE */
435 const sal_uInt16
* ImplGetReplaceString( sal_Unicode c
)
440 sal_uInt16 nCompareChar
;
441 const ImplReplaceCharStrData
* pCharData
;
444 nHigh
= (sizeof( aImplRepCharStrTab
)/sizeof( ImplReplaceCharStrData
))-1;
447 nMid
= (nLow
+nHigh
)/2;
448 pCharData
= aImplRepCharStrTab
+nMid
;
449 nCompareChar
= pCharData
->mnUniChar
;
450 if ( c
< nCompareChar
)
458 if ( c
> nCompareChar
)
461 return pCharData
->maReplaceChars
;
464 while ( nLow
<= nHigh
);
469 /* ======================================================================= */
471 sal_Size
ImplSymbolToUnicode( const ImplTextConverterData
* pData
,
473 const sal_Char
* pSrcBuf
, sal_Size nSrcBytes
,
474 sal_Unicode
* pDestBuf
, sal_Size nDestChars
,
475 sal_uInt32 nFlags
, sal_uInt32
* pInfo
,
476 sal_Size
* pSrcCvtBytes
)
479 sal_Unicode
* pEndDestBuf
;
480 const sal_Char
* pEndSrcBuf
;
482 (void) pData
; /* unused */
483 (void) pContext
; /* unused */
484 (void) nFlags
; /* unused */
487 pEndDestBuf
= pDestBuf
+nDestChars
;
488 pEndSrcBuf
= pSrcBuf
+nSrcBytes
;
489 while ( pSrcBuf
< pEndSrcBuf
)
491 if ( pDestBuf
== pEndDestBuf
)
493 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
497 /* 0-31 (all Control-Character get the same Unicode value) */
498 c
= (sal_uChar
)*pSrcBuf
;
500 *pDestBuf
= (sal_Unicode
)c
;
502 *pDestBuf
= ((sal_Unicode
)c
)+0xF000;
507 *pSrcCvtBytes
= nSrcBytes
- (pEndSrcBuf
-pSrcBuf
);
508 return (nDestChars
- (pEndDestBuf
-pDestBuf
));
511 /* ----------------------------------------------------------------------- */
513 sal_Size
ImplUnicodeToSymbol( const ImplTextConverterData
* pData
,
515 const sal_Unicode
* pSrcBuf
, sal_Size nSrcChars
,
516 sal_Char
* pDestBuf
, sal_Size nDestBytes
,
517 sal_uInt32 nFlags
, sal_uInt32
* pInfo
,
518 sal_Size
* pSrcCvtChars
)
521 sal_Char
* pEndDestBuf
;
522 const sal_Unicode
* pEndSrcBuf
;
524 (void) pContext
; /* unused */
527 pEndDestBuf
= pDestBuf
+nDestBytes
;
528 pEndSrcBuf
= pSrcBuf
+nSrcChars
;
529 while ( pSrcBuf
< pEndSrcBuf
)
531 if ( pDestBuf
== pEndDestBuf
)
533 *pInfo
|= RTL_UNICODETOTEXT_INFO_ERROR
| RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
538 if ( (c
>= 0xF000) && (c
<= 0xF0FF) )
540 *pDestBuf
= (sal_Char
)(sal_uChar
)(c
-0xF000);
544 // Normally 0x001F, but in many cases also symbol characters
545 // are stored in the first 256 bytes, so that we don't change
547 else if ( c
<= 0x00FF )
549 *pDestBuf
= (sal_Char
)(sal_uChar
)c
;
555 if ( nFlags
& RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
)
558 /* Only ascii characters < 0x1F */
561 /* Handle undefined and surrogates characters */
562 /* (all surrogates characters are undefined) */
563 if (!ImplHandleUndefinedUnicodeToTextChar(pData
,
574 *pSrcCvtChars
= nSrcChars
- (pEndSrcBuf
-pSrcBuf
);
575 return (nDestBytes
- (pEndDestBuf
-pDestBuf
));
578 /* ======================================================================= */
580 sal_Size
ImplCharToUnicode( const ImplTextConverterData
* pData
,
582 const sal_Char
* pSrcBuf
, sal_Size nSrcBytes
,
583 sal_Unicode
* pDestBuf
, sal_Size nDestChars
,
584 sal_uInt32 nFlags
, sal_uInt32
* pInfo
,
585 sal_Size
* pSrcCvtBytes
)
589 const ImplByteConvertData
* pConvertData
= (const ImplByteConvertData
*)pData
;
590 sal_Unicode
* pEndDestBuf
;
591 const sal_Char
* pEndSrcBuf
;
593 (void) pContext
; /* unused */
596 pEndDestBuf
= pDestBuf
+nDestChars
;
597 pEndSrcBuf
= pSrcBuf
+nSrcBytes
;
598 while ( pSrcBuf
< pEndSrcBuf
)
600 c
= (sal_uChar
)*pSrcBuf
;
605 if ( (c
>= pConvertData
->mnToUniStart1
) && (c
<= pConvertData
->mnToUniEnd1
) )
606 cConv
= pConvertData
->mpToUniTab1
[c
-pConvertData
->mnToUniStart1
];
607 else if ( (c
>= pConvertData
->mnToUniStart2
) && (c
<= pConvertData
->mnToUniEnd2
) )
608 cConv
= pConvertData
->mpToUniTab2
[c
-pConvertData
->mnToUniStart2
];
613 *pInfo
|= RTL_TEXTTOUNICODE_INFO_UNDEFINED
;
614 if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK
) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
)
616 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
;
619 else if ( (nFlags
& RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK
) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE
)
625 cConv
= ImplGetUndefinedUnicodeChar(c
, nFlags
);
629 if ( pDestBuf
== pEndDestBuf
)
631 *pInfo
|= RTL_TEXTTOUNICODE_INFO_ERROR
| RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
640 *pSrcCvtBytes
= nSrcBytes
- (pEndSrcBuf
-pSrcBuf
);
641 return (nDestChars
- (pEndDestBuf
-pDestBuf
));
644 /* ----------------------------------------------------------------------- */
646 // Writes 0--2 characters to dest:
647 static int ImplConvertUnicodeCharToChar(
648 const ImplByteConvertData
* pConvertData
, sal_Unicode c
, sal_Char
* dest
)
650 const ImplUniCharTabData
* pToCharExTab
;
654 dest
[0] = (sal_Char
)c
;
657 if ( (c
>= pConvertData
->mnToCharStart1
) && (c
<= pConvertData
->mnToCharEnd1
) )
659 dest
[0] = (sal_Char
)pConvertData
->mpToCharTab1
[c
-pConvertData
->mnToCharStart1
];
663 else if ( (c
>= pConvertData
->mnToCharStart2
) && (c
<= pConvertData
->mnToCharEnd2
) )
665 dest
[0] = (sal_Char
)pConvertData
->mpToCharTab2
[c
-pConvertData
->mnToCharStart2
];
669 pToCharExTab
= pConvertData
->mpToCharExTab
;
675 sal_uInt16 nCompareChar
;
676 const ImplUniCharTabData
* pCharExData
;
679 nHigh
= pConvertData
->mnToCharExCount
-1;
682 nMid
= (nLow
+nHigh
)/2;
683 pCharExData
= pToCharExTab
+nMid
;
684 nCompareChar
= pCharExData
->mnUniChar
;
685 if ( c
< nCompareChar
)
693 if ( c
> nCompareChar
)
697 dest
[0] = (sal_Char
)pCharExData
->mnChar
;
698 if ( pCharExData
->mnChar2
== 0 )
702 dest
[1] = (sal_Char
)pCharExData
->mnChar2
;
708 while ( nLow
<= nHigh
);
713 /* ----------------------------------------------------------------------- */
715 sal_Size
ImplUnicodeToChar( const ImplTextConverterData
* pData
,
717 const sal_Unicode
* pSrcBuf
, sal_Size nSrcChars
,
718 sal_Char
* pDestBuf
, sal_Size nDestBytes
,
719 sal_uInt32 nFlags
, sal_uInt32
* pInfo
,
720 sal_Size
* pSrcCvtChars
)
723 const ImplByteConvertData
* pConvertData
= (const ImplByteConvertData
*)pData
;
724 sal_Char
* pEndDestBuf
;
725 const sal_Unicode
* pEndSrcBuf
;
729 sal_Char aTempBuf
[IMPL_MAX_REPLACECHAR
+2];
730 const sal_uInt16
* pReplace
;
732 (void) pContext
; /* unused */
735 pEndDestBuf
= pDestBuf
+nDestBytes
;
736 pEndSrcBuf
= pSrcBuf
+nSrcChars
;
737 while ( pSrcBuf
< pEndSrcBuf
)
742 aTempBuf
[0] = (sal_Char
)c
;
747 n
= ImplConvertUnicodeCharToChar( pConvertData
, c
, aTempBuf
);
751 if ( nFlags
& RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
)
753 cTemp
= ImplGetReplaceChar( c
);
755 n
= ImplConvertUnicodeCharToChar(
756 pConvertData
, cTemp
, aTempBuf
);
761 if ( nFlags
& RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR
)
763 pReplace
= ImplGetReplaceString( c
);
766 while ( *pReplace
&& (n
< IMPL_MAX_REPLACECHAR
) )
768 i
= ImplConvertUnicodeCharToChar(
769 pConvertData
, *pReplace
, aTempBuf
+ n
);
781 /* Handle undefined and surrogates characters */
782 /* (all surrogates characters are undefined) */
785 if (ImplHandleUndefinedUnicodeToTextChar(pData
,
800 if ( pEndDestBuf
- pDestBuf
< n
)
802 *pInfo
|= RTL_UNICODETOTEXT_INFO_ERROR
| RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
806 for ( i
= 0; i
< n
; ++i
)
807 *pDestBuf
++ = aTempBuf
[i
];
811 *pSrcCvtChars
= nSrcChars
- (pEndSrcBuf
-pSrcBuf
);
812 return (nDestBytes
- (pEndDestBuf
-pDestBuf
));