1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 #include "convertiso2022kr.h"
30 #include "converter.h"
33 #include "rtl/alloc.h"
34 #include "rtl/textcvt.h"
35 #include "sal/types.h"
37 typedef enum /* order is important: */
39 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
,
40 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
,
41 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
,
42 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
,
43 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
,
44 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
45 } ImplIso2022KrToUnicodeState
;
49 ImplIso2022KrToUnicodeState m_eState
;
51 } ImplIso2022KrToUnicodeContext
;
55 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
,
56 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
,
57 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
58 } ImplUnicodeToIso2022KrSet
;
62 sal_Unicode m_nHighSurrogate
;
63 ImplUnicodeToIso2022KrSet m_eSet
;
64 } ImplUnicodeToIso2022KrContext
;
66 void * ImplCreateIso2022KrToUnicodeContext(void)
69 = rtl_allocateMemory(sizeof (ImplIso2022KrToUnicodeContext
));
70 ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_eState
71 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
75 void ImplResetIso2022KrToUnicodeContext(void * pContext
)
78 ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_eState
79 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
82 sal_Size
ImplConvertIso2022KrToUnicode(ImplTextConverterData
const * pData
,
84 sal_Char
const * pSrcBuf
,
86 sal_Unicode
* pDestBuf
,
90 sal_Size
* pSrcCvtBytes
)
92 ImplDBCSToUniLeadTab
const * pKsX1001Data
93 = ((ImplIso2022KrConverterData
const *) pData
)->
94 m_pKsX1001ToUnicodeData
;
95 ImplIso2022KrToUnicodeState eState
96 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
99 sal_Size nConverted
= 0;
100 sal_Unicode
* pDestBufPtr
= pDestBuf
;
101 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
105 eState
= ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_eState
;
106 nRow
= ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_nRow
;
109 for (; nConverted
< nSrcBytes
; ++nConverted
)
111 sal_Bool bUndefined
= sal_True
;
112 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
115 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
:
116 if (nChar
== 0x0E) /* SO */
117 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
118 else if (nChar
== 0x1B) /* ESC */
119 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
;
120 else if (nChar
< 0x80)
121 if (pDestBufPtr
!= pDestBufEnd
)
122 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
127 bUndefined
= sal_False
;
132 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
:
133 if (nChar
== 0x0F) /* SI */
134 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
135 else if (nChar
>= 0x21 && nChar
<= 0x7E)
138 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
;
142 bUndefined
= sal_False
;
147 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
:
148 if (nChar
>= 0x21 && nChar
<= 0x7E)
150 sal_uInt16 nUnicode
= 0;
151 sal_uInt32 nFirst
= pKsX1001Data
[nRow
].mnTrailStart
;
153 if (nChar
>= nFirst
&& nChar
<= pKsX1001Data
[nRow
].mnTrailEnd
)
154 nUnicode
= pKsX1001Data
[nRow
].
155 mpToUniTrailTab
[nChar
- nFirst
];
157 if (pDestBufPtr
!= pDestBufEnd
)
159 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
160 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
169 bUndefined
= sal_False
;
174 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
:
175 if (nChar
== 0x24) /* $ */
176 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
;
179 bUndefined
= sal_False
;
184 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
:
185 if (nChar
== 0x29) /* ) */
186 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
;
189 bUndefined
= sal_False
;
194 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
:
195 if (nChar
== 0x43) /* C */
196 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
199 bUndefined
= sal_False
;
207 switch (ImplHandleBadInputTextToUnicodeConversion(
208 bUndefined
, sal_True
, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
211 case IMPL_BAD_INPUT_STOP
:
212 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
215 case IMPL_BAD_INPUT_CONTINUE
:
216 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
219 case IMPL_BAD_INPUT_NO_OUTPUT
:
226 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
230 if (eState
> IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
231 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
232 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
235 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
236 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
238 switch (ImplHandleBadInputTextToUnicodeConversion(
239 sal_False
, sal_True
, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
242 case IMPL_BAD_INPUT_STOP
:
243 case IMPL_BAD_INPUT_CONTINUE
:
244 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
247 case IMPL_BAD_INPUT_NO_OUTPUT
:
248 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
255 ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_eState
= eState
;
256 ((ImplIso2022KrToUnicodeContext
*) pContext
)->m_nRow
= nRow
;
261 *pSrcCvtBytes
= nConverted
;
263 return pDestBufPtr
- pDestBuf
;
266 void * ImplCreateUnicodeToIso2022KrContext(void)
269 = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022KrContext
));
270 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_nHighSurrogate
= 0;
271 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_eSet
272 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
276 void ImplResetUnicodeToIso2022KrContext(void * pContext
)
280 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_nHighSurrogate
= 0;
281 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_eSet
282 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
286 sal_Size
ImplConvertUnicodeToIso2022Kr(ImplTextConverterData
const * pData
,
288 sal_Unicode
const * pSrcBuf
,
294 sal_Size
* pSrcCvtChars
)
296 ImplUniToDBCSHighTab
const * pKsX1001Data
297 = ((ImplIso2022KrConverterData
const *) pData
)->
298 m_pUnicodeToKsX1001Data
;
299 sal_Unicode nHighSurrogate
= 0;
300 ImplUnicodeToIso2022KrSet eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
301 sal_uInt32 nInfo
= 0;
302 sal_Size nConverted
= 0;
303 sal_Char
* pDestBufPtr
= pDestBuf
;
304 sal_Char
* pDestBufEnd
= pDestBuf
+ nDestBytes
;
310 = ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_nHighSurrogate
;
311 eSet
= ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_eSet
;
314 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
)
316 if (pDestBufEnd
- pDestBufPtr
>= 4)
318 *pDestBufPtr
++ = 0x1B; /* ESC */
319 *pDestBufPtr
++ = 0x24; /* $ */
320 *pDestBufPtr
++ = 0x29; /* ) */
321 *pDestBufPtr
++ = 0x43; /* C */
322 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
325 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
328 if ((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0)
329 for (; nConverted
< nSrcChars
; ++nConverted
)
331 sal_Bool bUndefined
= sal_True
;
332 sal_uInt32 nChar
= *pSrcBuf
++;
333 if (nHighSurrogate
== 0)
335 if (ImplIsHighSurrogate(nChar
))
337 nHighSurrogate
= (sal_Unicode
) nChar
;
341 else if (ImplIsLowSurrogate(nChar
))
342 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
345 bUndefined
= sal_False
;
349 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
351 bUndefined
= sal_False
;
355 if (nChar
== 0x0A || nChar
== 0x0D) /* LF, CR */
357 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
359 if (pDestBufPtr
!= pDestBufEnd
)
361 *pDestBufPtr
++ = 0x0F; /* SI */
362 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
367 if (pDestBufPtr
!= pDestBufEnd
)
368 *pDestBufPtr
++ = (sal_Char
) nChar
;
372 else if (nChar
== 0x0E || nChar
== 0x0F || nChar
== 0x1B)
374 else if (nChar
< 0x80)
376 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
378 if (pDestBufPtr
!= pDestBufEnd
)
380 *pDestBufPtr
++ = 0x0F; /* SI */
381 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
386 if (pDestBufPtr
!= pDestBufEnd
)
387 *pDestBufPtr
++ = (sal_Char
) nChar
;
393 sal_uInt16 nBytes
= 0;
394 sal_uInt32 nIndex1
= nChar
>> 8;
397 sal_uInt32 nIndex2
= nChar
& 0xFF;
398 sal_uInt32 nFirst
= pKsX1001Data
[nIndex1
].mnLowStart
;
399 if (nIndex2
>= nFirst
400 && nIndex2
<= pKsX1001Data
[nIndex1
].mnLowEnd
)
401 nBytes
= pKsX1001Data
[nIndex1
].
402 mpToUniTrailTab
[nIndex2
- nFirst
];
406 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
)
408 if (pDestBufPtr
!= pDestBufEnd
)
410 *pDestBufPtr
++ = 0x0E; /* SO */
411 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
;
416 if (pDestBufEnd
- pDestBufPtr
>= 2)
418 *pDestBufPtr
++ = (sal_Char
) ((nBytes
>> 8) & 0x7F);
419 *pDestBufPtr
++ = (sal_Char
) (nBytes
& 0x7F);
431 switch (ImplHandleBadInputUnicodeToTextConversion(
439 eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
? 0 : 1,
442 case IMPL_BAD_INPUT_STOP
:
446 case IMPL_BAD_INPUT_CONTINUE
:
448 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
452 case IMPL_BAD_INPUT_NO_OUTPUT
:
459 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
463 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
464 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
467 sal_Bool bFlush
= sal_True
;
468 if (nHighSurrogate
!= 0)
470 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
471 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
473 switch (ImplHandleBadInputUnicodeToTextConversion(
481 eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
?
485 case IMPL_BAD_INPUT_STOP
:
490 case IMPL_BAD_INPUT_CONTINUE
:
492 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
496 case IMPL_BAD_INPUT_NO_OUTPUT
:
497 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
502 && eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
503 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
505 if (pDestBufPtr
!= pDestBufEnd
)
507 *pDestBufPtr
++ = 0x0F; /* SI */
508 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
511 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
517 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_nHighSurrogate
519 ((ImplUnicodeToIso2022KrContext
*) pContext
)->m_eSet
= eSet
;
524 *pSrcCvtChars
= nConverted
;
526 return pDestBufPtr
- pDestBuf
;