1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <rtl/textcvt.h>
23 #include <sal/types.h>
25 #include "converter.hxx"
26 #include "convertiso2022kr.hxx"
27 #include "tenchelp.hxx"
28 #include "unichars.hxx"
32 enum ImplIso2022KrToUnicodeState
// order is important:
34 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
,
35 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
,
36 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
,
37 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
,
38 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
,
39 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
42 struct ImplIso2022KrToUnicodeContext
44 ImplIso2022KrToUnicodeState m_eState
;
48 enum ImplUnicodeToIso2022KrSet
50 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
,
51 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
,
52 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
55 struct ImplUnicodeToIso2022KrContext
57 sal_Unicode m_nHighSurrogate
;
58 ImplUnicodeToIso2022KrSet m_eSet
;
63 void * ImplCreateIso2022KrToUnicodeContext()
65 ImplIso2022KrToUnicodeContext
* pContext
=
66 new ImplIso2022KrToUnicodeContext
;
67 pContext
->m_eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
71 void ImplResetIso2022KrToUnicodeContext(void * pContext
)
74 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
75 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
78 void ImplDestroyIso2022KrToUnicodeContext(void * pContext
)
80 delete static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
);
83 sal_Size
ImplConvertIso2022KrToUnicode(void const * pData
,
87 sal_Unicode
* pDestBuf
,
91 sal_Size
* pSrcCvtBytes
)
93 ImplDBCSToUniLeadTab
const * pKsX1001Data
94 = static_cast< ImplIso2022KrConverterData
const * >(pData
)->
95 m_pKsX1001ToUnicodeData
;
96 ImplIso2022KrToUnicodeState eState
97 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
100 sal_Size nConverted
= 0;
101 sal_Unicode
* pDestBufPtr
= pDestBuf
;
102 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
106 eState
= static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
;
107 nRow
= static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_nRow
;
110 for (; nConverted
< nSrcBytes
; ++nConverted
)
112 bool bUndefined
= true;
113 sal_uInt32 nChar
= *reinterpret_cast<unsigned char const *>(pSrcBuf
++);
116 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
:
117 if (nChar
== 0x0E) // SO
118 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
119 else if (nChar
== 0x1B) // ESC
120 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
;
121 else if (nChar
< 0x80)
122 if (pDestBufPtr
!= pDestBufEnd
)
123 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nChar
);
133 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
:
134 if (nChar
== 0x0F) // SI
135 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
136 else if (nChar
>= 0x21 && nChar
<= 0x7E)
139 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
;
148 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
:
149 if (nChar
>= 0x21 && nChar
<= 0x7E)
151 sal_uInt16 nUnicode
= 0;
152 sal_uInt32 nFirst
= pKsX1001Data
[nRow
].mnTrailStart
;
154 if (nChar
>= nFirst
&& nChar
<= pKsX1001Data
[nRow
].mnTrailEnd
)
155 nUnicode
= pKsX1001Data
[nRow
].
156 mpToUniTrailTab
[nChar
- nFirst
];
158 if (pDestBufPtr
!= pDestBufEnd
)
160 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nUnicode
);
161 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
175 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
:
176 if (nChar
== 0x24) // $
177 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
;
185 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
:
186 if (nChar
== 0x29) // )
187 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
;
195 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
:
196 if (nChar
== 0x43) // C
197 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
208 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
209 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
212 case sal::detail::textenc::BAD_INPUT_STOP
:
213 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
216 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
217 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
220 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
227 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
231 if (eState
> IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
232 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
233 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
))
236 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
237 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
;
239 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
240 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
243 case sal::detail::textenc::BAD_INPUT_STOP
:
244 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
245 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
248 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
249 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
256 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
= eState
;
257 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
262 *pSrcCvtBytes
= nConverted
;
264 return pDestBufPtr
- pDestBuf
;
267 void * ImplCreateUnicodeToIso2022KrContext()
269 ImplUnicodeToIso2022KrContext
* pContext
=
270 new ImplUnicodeToIso2022KrContext
;
271 pContext
->m_nHighSurrogate
= 0;
272 pContext
->m_eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
276 void ImplResetUnicodeToIso2022KrContext(void * pContext
)
280 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
= 0;
281 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
282 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
286 void ImplDestroyUnicodeToIso2022KrContext(void * pContext
)
288 delete static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
);
291 sal_Size
ImplConvertUnicodeToIso2022Kr(void const * pData
,
293 sal_Unicode
const * pSrcBuf
,
299 sal_Size
* pSrcCvtChars
)
301 ImplUniToDBCSHighTab
const * pKsX1001Data
302 = static_cast< ImplIso2022KrConverterData
const * >(pData
)->
303 m_pUnicodeToKsX1001Data
;
304 sal_Unicode nHighSurrogate
= 0;
305 ImplUnicodeToIso2022KrSet eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
306 sal_uInt32 nInfo
= 0;
307 sal_Size nConverted
= 0;
308 char * pDestBufPtr
= pDestBuf
;
309 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
315 = static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
;
316 eSet
= static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
;
319 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
)
321 if (pDestBufEnd
- pDestBufPtr
>= 4)
323 *pDestBufPtr
++ = 0x1B; // ESC
324 *pDestBufPtr
++ = 0x24; // $
325 *pDestBufPtr
++ = 0x29; // )
326 *pDestBufPtr
++ = 0x43; // C
327 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
330 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
333 if ((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0)
334 for (; nConverted
< nSrcChars
; ++nConverted
)
336 bool bUndefined
= true;
337 sal_uInt32 nChar
= *pSrcBuf
++;
338 if (nHighSurrogate
== 0)
340 if (ImplIsHighSurrogate(nChar
))
342 nHighSurrogate
= static_cast<sal_Unicode
>(nChar
);
346 else if (ImplIsLowSurrogate(nChar
))
347 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
354 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
360 if (nChar
== 0x0A || nChar
== 0x0D) // LF, CR
362 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
364 if (pDestBufPtr
!= pDestBufEnd
)
366 *pDestBufPtr
++ = 0x0F; // SI
367 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
372 if (pDestBufPtr
!= pDestBufEnd
)
373 *pDestBufPtr
++ = static_cast< char >(nChar
);
377 else if (nChar
== 0x0E || nChar
== 0x0F || nChar
== 0x1B)
379 else if (nChar
< 0x80)
381 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
383 if (pDestBufPtr
!= pDestBufEnd
)
385 *pDestBufPtr
++ = 0x0F; // SI
386 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
391 if (pDestBufPtr
!= pDestBufEnd
)
392 *pDestBufPtr
++ = static_cast< char >(nChar
);
398 sal_uInt16 nBytes
= 0;
399 sal_uInt32 nIndex1
= nChar
>> 8;
402 sal_uInt32 nIndex2
= nChar
& 0xFF;
403 sal_uInt32 nFirst
= pKsX1001Data
[nIndex1
].mnLowStart
;
404 if (nIndex2
>= nFirst
405 && nIndex2
<= pKsX1001Data
[nIndex1
].mnLowEnd
)
406 nBytes
= pKsX1001Data
[nIndex1
].
407 mpToUniTrailTab
[nIndex2
- nFirst
];
411 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
)
413 if (pDestBufPtr
!= pDestBufEnd
)
415 *pDestBufPtr
++ = 0x0E; // SO
416 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
;
421 if (pDestBufEnd
- pDestBufPtr
>= 2)
423 *pDestBufPtr
++ = static_cast< char >((nBytes
>> 8) & 0x7F);
424 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0x7F);
436 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
437 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
438 &nInfo
, "\x0F" /* SI */,
439 eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
? 0 : 1,
442 case sal::detail::textenc::BAD_INPUT_STOP
:
446 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
448 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
452 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
459 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
463 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
464 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
468 if (nHighSurrogate
!= 0)
470 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
471 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
473 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
474 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
476 (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
480 case sal::detail::textenc::BAD_INPUT_STOP
:
485 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
487 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
491 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
492 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
497 && eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
498 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
500 if (pDestBufPtr
!= pDestBufEnd
)
502 *pDestBufPtr
++ = 0x0F; // SI
503 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
506 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
512 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
514 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
= eSet
;
519 *pSrcCvtChars
= nConverted
;
521 return pDestBufPtr
- pDestBuf
;
524 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */