Update ooo320-m1
[ooovba.git] / sal / textenc / convertiso2022kr.c
blob01ff49bb715a59ff7ed3fd3cc3c027df89709fa2
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: convertiso2022kr.c,v $
10 * $Revision: 1.7 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "convertiso2022kr.h"
32 #include "context.h"
33 #include "converter.h"
34 #include "tenchelp.h"
35 #include "unichars.h"
36 #include "rtl/alloc.h"
37 #include "rtl/textcvt.h"
38 #include "sal/types.h"
40 typedef enum /* order is important: */
42 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII,
43 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001,
44 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2,
45 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC,
46 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR,
47 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
48 } ImplIso2022KrToUnicodeState;
50 typedef struct
52 ImplIso2022KrToUnicodeState m_eState;
53 sal_uInt32 m_nRow;
54 } ImplIso2022KrToUnicodeContext;
56 typedef enum
58 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE,
59 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII,
60 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
61 } ImplUnicodeToIso2022KrSet;
63 typedef struct
65 sal_Unicode m_nHighSurrogate;
66 ImplUnicodeToIso2022KrSet m_eSet;
67 } ImplUnicodeToIso2022KrContext;
69 void * ImplCreateIso2022KrToUnicodeContext(void)
71 void * pContext
72 = rtl_allocateMemory(sizeof (ImplIso2022KrToUnicodeContext));
73 ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState
74 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
75 return pContext;
78 void ImplResetIso2022KrToUnicodeContext(void * pContext)
80 if (pContext)
81 ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState
82 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
85 sal_Size ImplConvertIso2022KrToUnicode(ImplTextConverterData const * pData,
86 void * pContext,
87 sal_Char const * pSrcBuf,
88 sal_Size nSrcBytes,
89 sal_Unicode * pDestBuf,
90 sal_Size nDestChars,
91 sal_uInt32 nFlags,
92 sal_uInt32 * pInfo,
93 sal_Size * pSrcCvtBytes)
95 ImplDBCSToUniLeadTab const * pKsX1001Data
96 = ((ImplIso2022KrConverterData const *) pData)->
97 m_pKsX1001ToUnicodeData;
98 ImplIso2022KrToUnicodeState eState
99 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
100 sal_uInt32 nRow = 0;
101 sal_uInt32 nInfo = 0;
102 sal_Size nConverted = 0;
103 sal_Unicode * pDestBufPtr = pDestBuf;
104 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
106 if (pContext)
108 eState = ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState;
109 nRow = ((ImplIso2022KrToUnicodeContext *) pContext)->m_nRow;
112 for (; nConverted < nSrcBytes; ++nConverted)
114 sal_Bool bUndefined = sal_True;
115 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
116 switch (eState)
118 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII:
119 if (nChar == 0x0E) /* SO */
120 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001;
121 else if (nChar == 0x1B) /* ESC */
122 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC;
123 else if (nChar < 0x80)
124 if (pDestBufPtr != pDestBufEnd)
125 *pDestBufPtr++ = (sal_Unicode) nChar;
126 else
127 goto no_output;
128 else
130 bUndefined = sal_False;
131 goto bad_input;
133 break;
135 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001:
136 if (nChar == 0x0F) /* SI */
137 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
138 else if (nChar >= 0x21 && nChar <= 0x7E)
140 nRow = nChar + 0x80;
141 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2;
143 else
145 bUndefined = sal_False;
146 goto bad_input;
148 break;
150 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2:
151 if (nChar >= 0x21 && nChar <= 0x7E)
153 sal_uInt16 nUnicode = 0;
154 sal_uInt32 nFirst = pKsX1001Data[nRow].mnTrailStart;
155 nChar += 0x80;
156 if (nChar >= nFirst && nChar <= pKsX1001Data[nRow].mnTrailEnd)
157 nUnicode = pKsX1001Data[nRow].
158 mpToUniTrailTab[nChar - nFirst];
159 if (nUnicode != 0)
160 if (pDestBufPtr != pDestBufEnd)
162 *pDestBufPtr++ = (sal_Unicode) nUnicode;
163 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001;
165 else
166 goto no_output;
167 else
168 goto bad_input;
170 else
172 bUndefined = sal_False;
173 goto bad_input;
175 break;
177 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC:
178 if (nChar == 0x24) /* $ */
179 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR;
180 else
182 bUndefined = sal_False;
183 goto bad_input;
185 break;
187 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR:
188 if (nChar == 0x29) /* ) */
189 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN;
190 else
192 bUndefined = sal_False;
193 goto bad_input;
195 break;
197 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN:
198 if (nChar == 0x43) /* C */
199 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
200 else
202 bUndefined = sal_False;
203 goto bad_input;
205 break;
207 continue;
209 bad_input:
210 switch (ImplHandleBadInputTextToUnicodeConversion(
211 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
212 &nInfo))
214 case IMPL_BAD_INPUT_STOP:
215 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
216 break;
218 case IMPL_BAD_INPUT_CONTINUE:
219 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
220 continue;
222 case IMPL_BAD_INPUT_NO_OUTPUT:
223 goto no_output;
225 break;
227 no_output:
228 --pSrcBuf;
229 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
230 break;
233 if (eState > IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
234 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
235 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
236 == 0)
238 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
239 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
240 else
241 switch (ImplHandleBadInputTextToUnicodeConversion(
242 sal_False, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
243 &nInfo))
245 case IMPL_BAD_INPUT_STOP:
246 case IMPL_BAD_INPUT_CONTINUE:
247 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
248 break;
250 case IMPL_BAD_INPUT_NO_OUTPUT:
251 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
252 break;
256 if (pContext)
258 ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState = eState;
259 ((ImplIso2022KrToUnicodeContext *) pContext)->m_nRow = nRow;
261 if (pInfo)
262 *pInfo = nInfo;
263 if (pSrcCvtBytes)
264 *pSrcCvtBytes = nConverted;
266 return pDestBufPtr - pDestBuf;
269 void * ImplCreateUnicodeToIso2022KrContext(void)
271 void * pContext
272 = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022KrContext));
273 ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate = 0;
274 ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet
275 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
276 return pContext;
279 void ImplResetUnicodeToIso2022KrContext(void * pContext)
281 if (pContext)
283 ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate = 0;
284 ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet
285 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
289 sal_Size ImplConvertUnicodeToIso2022Kr(ImplTextConverterData const * pData,
290 void * pContext,
291 sal_Unicode const * pSrcBuf,
292 sal_Size nSrcChars,
293 sal_Char * pDestBuf,
294 sal_Size nDestBytes,
295 sal_uInt32 nFlags,
296 sal_uInt32 * pInfo,
297 sal_Size * pSrcCvtChars)
299 ImplUniToDBCSHighTab const * pKsX1001Data
300 = ((ImplIso2022KrConverterData const *) pData)->
301 m_pUnicodeToKsX1001Data;
302 sal_Unicode nHighSurrogate = 0;
303 ImplUnicodeToIso2022KrSet eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
304 sal_uInt32 nInfo = 0;
305 sal_Size nConverted = 0;
306 sal_Char * pDestBufPtr = pDestBuf;
307 sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
308 sal_Bool bWritten;
310 if (pContext)
312 nHighSurrogate
313 = ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate;
314 eSet = ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet;
317 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE)
319 if (pDestBufEnd - pDestBufPtr >= 4)
321 *pDestBufPtr++ = 0x1B; /* ESC */
322 *pDestBufPtr++ = 0x24; /* $ */
323 *pDestBufPtr++ = 0x29; /* ) */
324 *pDestBufPtr++ = 0x43; /* C */
325 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
327 else
328 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
331 if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
332 for (; nConverted < nSrcChars; ++nConverted)
334 sal_Bool bUndefined = sal_True;
335 sal_uInt32 nChar = *pSrcBuf++;
336 if (nHighSurrogate == 0)
338 if (ImplIsHighSurrogate(nChar))
340 nHighSurrogate = (sal_Unicode) nChar;
341 continue;
344 else if (ImplIsLowSurrogate(nChar))
345 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
346 else
348 bUndefined = sal_False;
349 goto bad_input;
352 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
354 bUndefined = sal_False;
355 goto bad_input;
358 if (nChar == 0x0A || nChar == 0x0D) /* LF, CR */
360 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001)
362 if (pDestBufPtr != pDestBufEnd)
364 *pDestBufPtr++ = 0x0F; /* SI */
365 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
367 else
368 goto no_output;
370 if (pDestBufPtr != pDestBufEnd)
371 *pDestBufPtr++ = (sal_Char) nChar;
372 else
373 goto no_output;
375 else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B)
376 goto bad_input;
377 else if (nChar < 0x80)
379 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001)
381 if (pDestBufPtr != pDestBufEnd)
383 *pDestBufPtr++ = 0x0F; /* SI */
384 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
386 else
387 goto no_output;
389 if (pDestBufPtr != pDestBufEnd)
390 *pDestBufPtr++ = (sal_Char) nChar;
391 else
392 goto no_output;
394 else
396 sal_uInt16 nBytes = 0;
397 sal_uInt32 nIndex1 = nChar >> 8;
398 if (nIndex1 < 0x100)
400 sal_uInt32 nIndex2 = nChar & 0xFF;
401 sal_uInt32 nFirst = pKsX1001Data[nIndex1].mnLowStart;
402 if (nIndex2 >= nFirst
403 && nIndex2 <= pKsX1001Data[nIndex1].mnLowEnd)
404 nBytes = pKsX1001Data[nIndex1].
405 mpToUniTrailTab[nIndex2 - nFirst];
407 if (nBytes != 0)
409 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII)
411 if (pDestBufPtr != pDestBufEnd)
413 *pDestBufPtr++ = 0x0E; /* SO */
414 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_1001;
416 else
417 goto no_output;
419 if (pDestBufEnd - pDestBufPtr >= 2)
421 *pDestBufPtr++ = (sal_Char) ((nBytes >> 8) & 0x7F);
422 *pDestBufPtr++ = (sal_Char) (nBytes & 0x7F);
424 else
425 goto no_output;
427 else
428 goto bad_input;
430 nHighSurrogate = 0;
431 continue;
433 bad_input:
434 switch (ImplHandleBadInputUnicodeToTextConversion(
435 bUndefined,
436 nChar,
437 nFlags,
438 &pDestBufPtr,
439 pDestBufEnd,
440 &nInfo,
441 "\x0F", /* SI */
442 eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ? 0 : 1,
443 &bWritten))
445 case IMPL_BAD_INPUT_STOP:
446 nHighSurrogate = 0;
447 break;
449 case IMPL_BAD_INPUT_CONTINUE:
450 if (bWritten)
451 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
452 nHighSurrogate = 0;
453 continue;
455 case IMPL_BAD_INPUT_NO_OUTPUT:
456 goto no_output;
458 break;
460 no_output:
461 --pSrcBuf;
462 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
463 break;
466 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
467 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
468 == 0)
470 sal_Bool bFlush = sal_True;
471 if (nHighSurrogate != 0)
473 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
474 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
475 else
476 switch (ImplHandleBadInputUnicodeToTextConversion(
477 sal_False,
479 nFlags,
480 &pDestBufPtr,
481 pDestBufEnd,
482 &nInfo,
483 "\x0F", /* SI */
484 eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ?
485 0 : 1,
486 &bWritten))
488 case IMPL_BAD_INPUT_STOP:
489 nHighSurrogate = 0;
490 bFlush = sal_False;
491 break;
493 case IMPL_BAD_INPUT_CONTINUE:
494 if (bWritten)
495 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
496 nHighSurrogate = 0;
497 break;
499 case IMPL_BAD_INPUT_NO_OUTPUT:
500 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
501 break;
504 if (bFlush
505 && eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
506 && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
508 if (pDestBufPtr != pDestBufEnd)
510 *pDestBufPtr++ = 0x0F; /* SI */
511 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
513 else
514 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
518 if (pContext)
520 ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate
521 = nHighSurrogate;
522 ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet = eSet;
524 if (pInfo)
525 *pInfo = nInfo;
526 if (pSrcCvtChars)
527 *pSrcCvtChars = nConverted;
529 return pDestBufPtr - pDestBuf;