1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: convertiso2022kr.c,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "convertiso2022kr.h"
33 #include "converter.h"
36 #include "rtl/alloc.h"
37 #include "rtl/textcvt.h"
38 #include "sal/types.h"
40 typedef enum /* order is important: */
42 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
,
43 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
,
44 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
,
45 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
,
46 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
,
47 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
48 } ImplIso2022KrToUnicodeState
;
52 ImplIso2022KrToUnicodeState m_eState
;
54 } ImplIso2022KrToUnicodeContext
;
58 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
,
59 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
,
60 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
61 } ImplUnicodeToIso2022KrSet
;
65 sal_Unicode m_nHighSurrogate
;
66 ImplUnicodeToIso2022KrSet m_eSet
;
67 } ImplUnicodeToIso2022KrContext
;
69 void * ImplCreateIso2022KrToUnicodeContext(void)
72 = rtl_allocateMemory(sizeof (ImplIso2022KrToUnicodeContext
));
73 ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_eState
74 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
78 void ImplResetIso2022KrToUnicodeContext(void * pContext
)
81 ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_eState
82 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
85 sal_Size
ImplConvertIso2022KrToUnicode(ImplTextConverterData
const * pData
,
87 sal_Char
const * pSrcBuf
,
89 sal_Unicode
* pDestBuf
,
93 sal_Size
* pSrcCvtBytes
)
95 ImplDBCSToUniLeadTab
const * pKsX1001Data
96 = ((ImplIso2022KrConverterData
const *) pData
)->
97 m_pKsX1001ToUnicodeData
;
98 ImplIso2022KrToUnicodeState eState
99 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
101 sal_uInt32 nInfo
= 0;
102 sal_Size nConverted
= 0;
103 sal_Unicode
* pDestBufPtr
= pDestBuf
;
104 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
108 eState
= ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_eState
;
109 nRow
= ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_nRow
;
112 for (; nConverted
< nSrcBytes
; ++nConverted
)
114 sal_Bool bUndefined
= sal_True
;
115 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
118 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
:
119 if (nChar
== 0x0E) /* SO */
120 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
121 else if (nChar
== 0x1B) /* ESC */
122 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
;
123 else if (nChar
< 0x80)
124 if (pDestBufPtr
!= pDestBufEnd
)
125 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
130 bUndefined
= sal_False
;
135 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
:
136 if (nChar
== 0x0F) /* SI */
137 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
138 else if (nChar
>= 0x21 && nChar
<= 0x7E)
141 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
;
145 bUndefined
= sal_False
;
150 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
:
151 if (nChar
>= 0x21 && nChar
<= 0x7E)
153 sal_uInt16 nUnicode
= 0;
154 sal_uInt32 nFirst
= pKsX1001Data
[nRow
].mnTrailStart
;
156 if (nChar
>= nFirst
&& nChar
<= pKsX1001Data
[nRow
].mnTrailEnd
)
157 nUnicode
= pKsX1001Data
[nRow
].
158 mpToUniTrailTab
[nChar
- nFirst
];
160 if (pDestBufPtr
!= pDestBufEnd
)
162 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
163 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
172 bUndefined
= sal_False
;
177 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
:
178 if (nChar
== 0x24) /* $ */
179 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
;
182 bUndefined
= sal_False
;
187 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
:
188 if (nChar
== 0x29) /* ) */
189 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
;
192 bUndefined
= sal_False
;
197 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
:
198 if (nChar
== 0x43) /* C */
199 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
202 bUndefined
= sal_False
;
210 switch (ImplHandleBadInputTextToUnicodeConversion(
211 bUndefined
, sal_True
, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
214 case IMPL_BAD_INPUT_STOP
:
215 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
218 case IMPL_BAD_INPUT_CONTINUE
:
219 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
222 case IMPL_BAD_INPUT_NO_OUTPUT
:
229 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
233 if (eState
> IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
234 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
235 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
238 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
239 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
241 switch (ImplHandleBadInputTextToUnicodeConversion(
242 sal_False
, sal_True
, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
245 case IMPL_BAD_INPUT_STOP
:
246 case IMPL_BAD_INPUT_CONTINUE
:
247 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
250 case IMPL_BAD_INPUT_NO_OUTPUT
:
251 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
258 ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_eState
= eState
;
259 ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_nRow
= nRow
;
264 *pSrcCvtBytes
= nConverted
;
266 return pDestBufPtr
- pDestBuf
;
269 void * ImplCreateUnicodeToIso2022KrContext(void)
272 = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022KrContext
));
273 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_nHighSurrogate
= 0;
274 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_eSet
275 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
279 void ImplResetUnicodeToIso2022KrContext(void * pContext
)
283 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_nHighSurrogate
= 0;
284 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_eSet
285 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
289 sal_Size
ImplConvertUnicodeToIso2022Kr(ImplTextConverterData
const * pData
,
291 sal_Unicode
const * pSrcBuf
,
297 sal_Size
* pSrcCvtChars
)
299 ImplUniToDBCSHighTab
const * pKsX1001Data
300 = ((ImplIso2022KrConverterData
const *) pData
)->
301 m_pUnicodeToKsX1001Data
;
302 sal_Unicode nHighSurrogate
= 0;
303 ImplUnicodeToIso2022KrSet eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
304 sal_uInt32 nInfo
= 0;
305 sal_Size nConverted
= 0;
306 sal_Char
* pDestBufPtr
= pDestBuf
;
307 sal_Char
* pDestBufEnd
= pDestBuf
+ nDestBytes
;
313 = ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_nHighSurrogate
;
314 eSet
= ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_eSet
;
317 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
)
319 if (pDestBufEnd
- pDestBufPtr
>= 4)
321 *pDestBufPtr
++ = 0x1B; /* ESC */
322 *pDestBufPtr
++ = 0x24; /* $ */
323 *pDestBufPtr
++ = 0x29; /* ) */
324 *pDestBufPtr
++ = 0x43; /* C */
325 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
328 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
331 if ((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0)
332 for (; nConverted
< nSrcChars
; ++nConverted
)
334 sal_Bool bUndefined
= sal_True
;
335 sal_uInt32 nChar
= *pSrcBuf
++;
336 if (nHighSurrogate
== 0)
338 if (ImplIsHighSurrogate(nChar
))
340 nHighSurrogate
= (sal_Unicode
) nChar
;
344 else if (ImplIsLowSurrogate(nChar
))
345 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
348 bUndefined
= sal_False
;
352 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
354 bUndefined
= sal_False
;
358 if (nChar
== 0x0A || nChar
== 0x0D) /* LF, CR */
360 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
362 if (pDestBufPtr
!= pDestBufEnd
)
364 *pDestBufPtr
++ = 0x0F; /* SI */
365 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
370 if (pDestBufPtr
!= pDestBufEnd
)
371 *pDestBufPtr
++ = (sal_Char
) nChar
;
375 else if (nChar
== 0x0E || nChar
== 0x0F || nChar
== 0x1B)
377 else if (nChar
< 0x80)
379 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
381 if (pDestBufPtr
!= pDestBufEnd
)
383 *pDestBufPtr
++ = 0x0F; /* SI */
384 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
389 if (pDestBufPtr
!= pDestBufEnd
)
390 *pDestBufPtr
++ = (sal_Char
) nChar
;
396 sal_uInt16 nBytes
= 0;
397 sal_uInt32 nIndex1
= nChar
>> 8;
400 sal_uInt32 nIndex2
= nChar
& 0xFF;
401 sal_uInt32 nFirst
= pKsX1001Data
[nIndex1
].mnLowStart
;
402 if (nIndex2
>= nFirst
403 && nIndex2
<= pKsX1001Data
[nIndex1
].mnLowEnd
)
404 nBytes
= pKsX1001Data
[nIndex1
].
405 mpToUniTrailTab
[nIndex2
- nFirst
];
409 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
)
411 if (pDestBufPtr
!= pDestBufEnd
)
413 *pDestBufPtr
++ = 0x0E; /* SO */
414 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
;
419 if (pDestBufEnd
- pDestBufPtr
>= 2)
421 *pDestBufPtr
++ = (sal_Char
) ((nBytes
>> 8) & 0x7F);
422 *pDestBufPtr
++ = (sal_Char
) (nBytes
& 0x7F);
434 switch (ImplHandleBadInputUnicodeToTextConversion(
442 eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
? 0 : 1,
445 case IMPL_BAD_INPUT_STOP
:
449 case IMPL_BAD_INPUT_CONTINUE
:
451 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
455 case IMPL_BAD_INPUT_NO_OUTPUT
:
462 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
466 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
467 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
470 sal_Bool bFlush
= sal_True
;
471 if (nHighSurrogate
!= 0)
473 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
474 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
476 switch (ImplHandleBadInputUnicodeToTextConversion(
484 eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
?
488 case IMPL_BAD_INPUT_STOP
:
493 case IMPL_BAD_INPUT_CONTINUE
:
495 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
499 case IMPL_BAD_INPUT_NO_OUTPUT
:
500 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
505 && eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
506 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
508 if (pDestBufPtr
!= pDestBufEnd
)
510 *pDestBufPtr
++ = 0x0F; /* SI */
511 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
514 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
520 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_nHighSurrogate
522 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_eSet
= eSet
;
527 *pSrcCvtChars
= nConverted
;
529 return pDestBufPtr
- pDestBuf
;