merged tag ooo/DEV300_m102
[LibreOffice.git] / sal / textenc / convertiso2022kr.c
blobc2bbee3196a8dca6237b53b3c7ff3bd6960e298d
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 #include "convertiso2022kr.h"
29 #include "context.h"
30 #include "converter.h"
31 #include "tenchelp.h"
32 #include "unichars.h"
33 #include "rtl/alloc.h"
34 #include "rtl/textcvt.h"
35 #include "sal/types.h"
37 typedef enum /* order is important: */
39 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII,
40 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001,
41 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2,
42 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC,
43 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR,
44 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
45 } ImplIso2022KrToUnicodeState;
47 typedef struct
49 ImplIso2022KrToUnicodeState m_eState;
50 sal_uInt32 m_nRow;
51 } ImplIso2022KrToUnicodeContext;
53 typedef enum
55 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE,
56 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII,
57 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
58 } ImplUnicodeToIso2022KrSet;
60 typedef struct
62 sal_Unicode m_nHighSurrogate;
63 ImplUnicodeToIso2022KrSet m_eSet;
64 } ImplUnicodeToIso2022KrContext;
66 void * ImplCreateIso2022KrToUnicodeContext(void)
68 void * pContext
69 = rtl_allocateMemory(sizeof (ImplIso2022KrToUnicodeContext));
70 ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState
71 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
72 return pContext;
75 void ImplResetIso2022KrToUnicodeContext(void * pContext)
77 if (pContext)
78 ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState
79 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
82 sal_Size ImplConvertIso2022KrToUnicode(ImplTextConverterData const * pData,
83 void * pContext,
84 sal_Char const * pSrcBuf,
85 sal_Size nSrcBytes,
86 sal_Unicode * pDestBuf,
87 sal_Size nDestChars,
88 sal_uInt32 nFlags,
89 sal_uInt32 * pInfo,
90 sal_Size * pSrcCvtBytes)
92 ImplDBCSToUniLeadTab const * pKsX1001Data
93 = ((ImplIso2022KrConverterData const *) pData)->
94 m_pKsX1001ToUnicodeData;
95 ImplIso2022KrToUnicodeState eState
96 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
97 sal_uInt32 nRow = 0;
98 sal_uInt32 nInfo = 0;
99 sal_Size nConverted = 0;
100 sal_Unicode * pDestBufPtr = pDestBuf;
101 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
103 if (pContext)
105 eState = ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState;
106 nRow = ((ImplIso2022KrToUnicodeContext *) pContext)->m_nRow;
109 for (; nConverted < nSrcBytes; ++nConverted)
111 sal_Bool bUndefined = sal_True;
112 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
113 switch (eState)
115 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII:
116 if (nChar == 0x0E) /* SO */
117 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001;
118 else if (nChar == 0x1B) /* ESC */
119 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC;
120 else if (nChar < 0x80)
121 if (pDestBufPtr != pDestBufEnd)
122 *pDestBufPtr++ = (sal_Unicode) nChar;
123 else
124 goto no_output;
125 else
127 bUndefined = sal_False;
128 goto bad_input;
130 break;
132 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001:
133 if (nChar == 0x0F) /* SI */
134 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
135 else if (nChar >= 0x21 && nChar <= 0x7E)
137 nRow = nChar + 0x80;
138 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2;
140 else
142 bUndefined = sal_False;
143 goto bad_input;
145 break;
147 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2:
148 if (nChar >= 0x21 && nChar <= 0x7E)
150 sal_uInt16 nUnicode = 0;
151 sal_uInt32 nFirst = pKsX1001Data[nRow].mnTrailStart;
152 nChar += 0x80;
153 if (nChar >= nFirst && nChar <= pKsX1001Data[nRow].mnTrailEnd)
154 nUnicode = pKsX1001Data[nRow].
155 mpToUniTrailTab[nChar - nFirst];
156 if (nUnicode != 0)
157 if (pDestBufPtr != pDestBufEnd)
159 *pDestBufPtr++ = (sal_Unicode) nUnicode;
160 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001;
162 else
163 goto no_output;
164 else
165 goto bad_input;
167 else
169 bUndefined = sal_False;
170 goto bad_input;
172 break;
174 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC:
175 if (nChar == 0x24) /* $ */
176 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR;
177 else
179 bUndefined = sal_False;
180 goto bad_input;
182 break;
184 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR:
185 if (nChar == 0x29) /* ) */
186 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN;
187 else
189 bUndefined = sal_False;
190 goto bad_input;
192 break;
194 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN:
195 if (nChar == 0x43) /* C */
196 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
197 else
199 bUndefined = sal_False;
200 goto bad_input;
202 break;
204 continue;
206 bad_input:
207 switch (ImplHandleBadInputTextToUnicodeConversion(
208 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
209 &nInfo))
211 case IMPL_BAD_INPUT_STOP:
212 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
213 break;
215 case IMPL_BAD_INPUT_CONTINUE:
216 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
217 continue;
219 case IMPL_BAD_INPUT_NO_OUTPUT:
220 goto no_output;
222 break;
224 no_output:
225 --pSrcBuf;
226 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
227 break;
230 if (eState > IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
231 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
232 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
233 == 0)
235 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
236 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
237 else
238 switch (ImplHandleBadInputTextToUnicodeConversion(
239 sal_False, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
240 &nInfo))
242 case IMPL_BAD_INPUT_STOP:
243 case IMPL_BAD_INPUT_CONTINUE:
244 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
245 break;
247 case IMPL_BAD_INPUT_NO_OUTPUT:
248 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
249 break;
253 if (pContext)
255 ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState = eState;
256 ((ImplIso2022KrToUnicodeContext *) pContext)->m_nRow = nRow;
258 if (pInfo)
259 *pInfo = nInfo;
260 if (pSrcCvtBytes)
261 *pSrcCvtBytes = nConverted;
263 return pDestBufPtr - pDestBuf;
266 void * ImplCreateUnicodeToIso2022KrContext(void)
268 void * pContext
269 = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022KrContext));
270 ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate = 0;
271 ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet
272 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
273 return pContext;
276 void ImplResetUnicodeToIso2022KrContext(void * pContext)
278 if (pContext)
280 ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate = 0;
281 ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet
282 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
286 sal_Size ImplConvertUnicodeToIso2022Kr(ImplTextConverterData const * pData,
287 void * pContext,
288 sal_Unicode const * pSrcBuf,
289 sal_Size nSrcChars,
290 sal_Char * pDestBuf,
291 sal_Size nDestBytes,
292 sal_uInt32 nFlags,
293 sal_uInt32 * pInfo,
294 sal_Size * pSrcCvtChars)
296 ImplUniToDBCSHighTab const * pKsX1001Data
297 = ((ImplIso2022KrConverterData const *) pData)->
298 m_pUnicodeToKsX1001Data;
299 sal_Unicode nHighSurrogate = 0;
300 ImplUnicodeToIso2022KrSet eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
301 sal_uInt32 nInfo = 0;
302 sal_Size nConverted = 0;
303 sal_Char * pDestBufPtr = pDestBuf;
304 sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
305 sal_Bool bWritten;
307 if (pContext)
309 nHighSurrogate
310 = ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate;
311 eSet = ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet;
314 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE)
316 if (pDestBufEnd - pDestBufPtr >= 4)
318 *pDestBufPtr++ = 0x1B; /* ESC */
319 *pDestBufPtr++ = 0x24; /* $ */
320 *pDestBufPtr++ = 0x29; /* ) */
321 *pDestBufPtr++ = 0x43; /* C */
322 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
324 else
325 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
328 if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
329 for (; nConverted < nSrcChars; ++nConverted)
331 sal_Bool bUndefined = sal_True;
332 sal_uInt32 nChar = *pSrcBuf++;
333 if (nHighSurrogate == 0)
335 if (ImplIsHighSurrogate(nChar))
337 nHighSurrogate = (sal_Unicode) nChar;
338 continue;
341 else if (ImplIsLowSurrogate(nChar))
342 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
343 else
345 bUndefined = sal_False;
346 goto bad_input;
349 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
351 bUndefined = sal_False;
352 goto bad_input;
355 if (nChar == 0x0A || nChar == 0x0D) /* LF, CR */
357 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001)
359 if (pDestBufPtr != pDestBufEnd)
361 *pDestBufPtr++ = 0x0F; /* SI */
362 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
364 else
365 goto no_output;
367 if (pDestBufPtr != pDestBufEnd)
368 *pDestBufPtr++ = (sal_Char) nChar;
369 else
370 goto no_output;
372 else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B)
373 goto bad_input;
374 else if (nChar < 0x80)
376 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001)
378 if (pDestBufPtr != pDestBufEnd)
380 *pDestBufPtr++ = 0x0F; /* SI */
381 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
383 else
384 goto no_output;
386 if (pDestBufPtr != pDestBufEnd)
387 *pDestBufPtr++ = (sal_Char) nChar;
388 else
389 goto no_output;
391 else
393 sal_uInt16 nBytes = 0;
394 sal_uInt32 nIndex1 = nChar >> 8;
395 if (nIndex1 < 0x100)
397 sal_uInt32 nIndex2 = nChar & 0xFF;
398 sal_uInt32 nFirst = pKsX1001Data[nIndex1].mnLowStart;
399 if (nIndex2 >= nFirst
400 && nIndex2 <= pKsX1001Data[nIndex1].mnLowEnd)
401 nBytes = pKsX1001Data[nIndex1].
402 mpToUniTrailTab[nIndex2 - nFirst];
404 if (nBytes != 0)
406 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII)
408 if (pDestBufPtr != pDestBufEnd)
410 *pDestBufPtr++ = 0x0E; /* SO */
411 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_1001;
413 else
414 goto no_output;
416 if (pDestBufEnd - pDestBufPtr >= 2)
418 *pDestBufPtr++ = (sal_Char) ((nBytes >> 8) & 0x7F);
419 *pDestBufPtr++ = (sal_Char) (nBytes & 0x7F);
421 else
422 goto no_output;
424 else
425 goto bad_input;
427 nHighSurrogate = 0;
428 continue;
430 bad_input:
431 switch (ImplHandleBadInputUnicodeToTextConversion(
432 bUndefined,
433 nChar,
434 nFlags,
435 &pDestBufPtr,
436 pDestBufEnd,
437 &nInfo,
438 "\x0F", /* SI */
439 eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ? 0 : 1,
440 &bWritten))
442 case IMPL_BAD_INPUT_STOP:
443 nHighSurrogate = 0;
444 break;
446 case IMPL_BAD_INPUT_CONTINUE:
447 if (bWritten)
448 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
449 nHighSurrogate = 0;
450 continue;
452 case IMPL_BAD_INPUT_NO_OUTPUT:
453 goto no_output;
455 break;
457 no_output:
458 --pSrcBuf;
459 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
460 break;
463 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
464 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
465 == 0)
467 sal_Bool bFlush = sal_True;
468 if (nHighSurrogate != 0)
470 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
471 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
472 else
473 switch (ImplHandleBadInputUnicodeToTextConversion(
474 sal_False,
476 nFlags,
477 &pDestBufPtr,
478 pDestBufEnd,
479 &nInfo,
480 "\x0F", /* SI */
481 eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ?
482 0 : 1,
483 &bWritten))
485 case IMPL_BAD_INPUT_STOP:
486 nHighSurrogate = 0;
487 bFlush = sal_False;
488 break;
490 case IMPL_BAD_INPUT_CONTINUE:
491 if (bWritten)
492 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
493 nHighSurrogate = 0;
494 break;
496 case IMPL_BAD_INPUT_NO_OUTPUT:
497 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
498 break;
501 if (bFlush
502 && eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
503 && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
505 if (pDestBufPtr != pDestBufEnd)
507 *pDestBufPtr++ = 0x0F; /* SI */
508 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
510 else
511 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
515 if (pContext)
517 ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate
518 = nHighSurrogate;
519 ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet = eSet;
521 if (pInfo)
522 *pInfo = nInfo;
523 if (pSrcCvtChars)
524 *pSrcCvtChars = nConverted;
526 return pDestBufPtr - pDestBuf;