1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
28 #include "converter.hxx"
29 #include "convertiso2022kr.hxx"
30 #include "tenchelp.hxx"
31 #include "unichars.hxx"
35 enum ImplIso2022KrToUnicodeState
// order is important:
37 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
,
38 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
,
39 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
,
40 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
,
41 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
,
42 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
45 struct ImplIso2022KrToUnicodeContext
47 ImplIso2022KrToUnicodeState m_eState
;
51 enum ImplUnicodeToIso2022KrSet
53 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
,
54 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
,
55 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
58 struct ImplUnicodeToIso2022KrContext
60 sal_Unicode m_nHighSurrogate
;
61 ImplUnicodeToIso2022KrSet m_eSet
;
66 void * ImplCreateIso2022KrToUnicodeContext()
68 ImplIso2022KrToUnicodeContext
* pContext
=
69 new ImplIso2022KrToUnicodeContext
;
70 pContext
->m_eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
74 void ImplResetIso2022KrToUnicodeContext(void * pContext
)
77 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
78 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
81 void ImplDestroyIso2022KrToUnicodeContext(void * pContext
)
83 delete static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
);
86 sal_Size
ImplConvertIso2022KrToUnicode(void const * pData
,
90 sal_Unicode
* pDestBuf
,
94 sal_Size
* pSrcCvtBytes
)
96 ImplDBCSToUniLeadTab
const * pKsX1001Data
97 = static_cast< ImplIso2022KrConverterData
const * >(pData
)->
98 m_pKsX1001ToUnicodeData
;
99 ImplIso2022KrToUnicodeState eState
100 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
102 sal_uInt32 nInfo
= 0;
103 sal_Size nConverted
= 0;
104 sal_Unicode
* pDestBufPtr
= pDestBuf
;
105 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
106 sal_Size startOfCurrentChar
= 0;
110 eState
= static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
;
111 nRow
= static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_nRow
;
114 for (; nConverted
< nSrcBytes
; ++nConverted
)
116 bool bUndefined
= true;
117 sal_uInt32 nChar
= *reinterpret_cast<unsigned char const *>(pSrcBuf
++);
120 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
:
121 if (nChar
== 0x0E) // SO
122 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
123 else if (nChar
== 0x1B) // ESC
124 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
;
125 else if (nChar
< 0x80)
126 if (pDestBufPtr
!= pDestBufEnd
) {
127 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nChar
);
128 startOfCurrentChar
= nConverted
+ 1;
138 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
:
139 if (nChar
== 0x0F) // SI
140 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
141 else if (nChar
>= 0x21 && nChar
<= 0x7E)
144 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
;
153 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
:
154 if (nChar
>= 0x21 && nChar
<= 0x7E)
156 sal_uInt16 nUnicode
= 0;
157 sal_uInt32 nFirst
= pKsX1001Data
[nRow
].mnTrailStart
;
159 if (nChar
>= nFirst
&& nChar
<= pKsX1001Data
[nRow
].mnTrailEnd
)
160 nUnicode
= pKsX1001Data
[nRow
].
161 mpToUniTrailTab
[nChar
- nFirst
];
163 if (pDestBufPtr
!= pDestBufEnd
)
165 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nUnicode
);
166 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
167 startOfCurrentChar
= nConverted
+ 1;
181 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
:
182 if (nChar
== 0x24) // $
183 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
;
191 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
:
192 if (nChar
== 0x29) // )
193 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
;
201 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
:
202 if (nChar
== 0x43) // C
203 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
214 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
215 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
218 case sal::detail::textenc::BAD_INPUT_STOP
:
219 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
220 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0) {
223 nConverted
= startOfCurrentChar
;
227 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
228 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
229 startOfCurrentChar
= nConverted
+ 1;
232 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
239 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
243 if (eState
> IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
244 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
245 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
))
248 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
249 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
;
251 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
252 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
255 case sal::detail::textenc::BAD_INPUT_STOP
:
256 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) != 0) {
257 nConverted
= startOfCurrentChar
;
260 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
261 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
264 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
265 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
272 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
= eState
;
273 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
278 *pSrcCvtBytes
= nConverted
;
280 return pDestBufPtr
- pDestBuf
;
283 void * ImplCreateUnicodeToIso2022KrContext()
285 ImplUnicodeToIso2022KrContext
* pContext
=
286 new ImplUnicodeToIso2022KrContext
;
287 pContext
->m_nHighSurrogate
= 0;
288 pContext
->m_eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
292 void ImplResetUnicodeToIso2022KrContext(void * pContext
)
296 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
= 0;
297 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
298 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
302 void ImplDestroyUnicodeToIso2022KrContext(void * pContext
)
304 delete static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
);
307 sal_Size
ImplConvertUnicodeToIso2022Kr(void const * pData
,
309 sal_Unicode
const * pSrcBuf
,
315 sal_Size
* pSrcCvtChars
)
317 ImplUniToDBCSHighTab
const * pKsX1001Data
318 = static_cast< ImplIso2022KrConverterData
const * >(pData
)->
319 m_pUnicodeToKsX1001Data
;
320 sal_Unicode nHighSurrogate
= 0;
321 ImplUnicodeToIso2022KrSet eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
322 sal_uInt32 nInfo
= 0;
323 sal_Size nConverted
= 0;
324 char * pDestBufPtr
= pDestBuf
;
325 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
331 = static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
;
332 eSet
= static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
;
335 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
)
337 if (pDestBufEnd
- pDestBufPtr
>= 4)
339 *pDestBufPtr
++ = 0x1B; // ESC
340 *pDestBufPtr
++ = 0x24; // $
341 *pDestBufPtr
++ = 0x29; // )
342 *pDestBufPtr
++ = 0x43; // C
343 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
346 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
349 if ((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0)
350 for (; nConverted
< nSrcChars
; ++nConverted
)
352 bool bUndefined
= true;
353 sal_uInt32 nChar
= *pSrcBuf
++;
354 if (nHighSurrogate
== 0)
356 if (rtl::isHighSurrogate(nChar
))
358 nHighSurrogate
= static_cast<sal_Unicode
>(nChar
);
361 else if (rtl::isLowSurrogate(nChar
))
367 else if (rtl::isLowSurrogate(nChar
))
368 nChar
= rtl::combineSurrogates(nHighSurrogate
, nChar
);
375 assert(rtl::isUnicodeScalarValue(nChar
));
377 if (nChar
== 0x0A || nChar
== 0x0D) // LF, CR
379 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
381 if (pDestBufPtr
!= pDestBufEnd
)
383 *pDestBufPtr
++ = 0x0F; // SI
384 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
389 if (pDestBufPtr
!= pDestBufEnd
)
390 *pDestBufPtr
++ = static_cast< char >(nChar
);
394 else if (nChar
== 0x0E || nChar
== 0x0F || nChar
== 0x1B)
396 else if (nChar
< 0x80)
398 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
400 if (pDestBufPtr
!= pDestBufEnd
)
402 *pDestBufPtr
++ = 0x0F; // SI
403 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
408 if (pDestBufPtr
!= pDestBufEnd
)
409 *pDestBufPtr
++ = static_cast< char >(nChar
);
415 sal_uInt16 nBytes
= 0;
416 sal_uInt32 nIndex1
= nChar
>> 8;
419 sal_uInt32 nIndex2
= nChar
& 0xFF;
420 sal_uInt32 nFirst
= pKsX1001Data
[nIndex1
].mnLowStart
;
421 if (nIndex2
>= nFirst
422 && nIndex2
<= pKsX1001Data
[nIndex1
].mnLowEnd
)
423 nBytes
= pKsX1001Data
[nIndex1
].
424 mpToUniTrailTab
[nIndex2
- nFirst
];
428 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
)
430 if (pDestBufPtr
!= pDestBufEnd
)
432 *pDestBufPtr
++ = 0x0E; // SO
433 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
;
438 if (pDestBufEnd
- pDestBufPtr
>= 2)
440 *pDestBufPtr
++ = static_cast< char >((nBytes
>> 8) & 0x7F);
441 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0x7F);
453 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
454 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
455 &nInfo
, "\x0F" /* SI */,
456 eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
? 0 : 1,
459 case sal::detail::textenc::BAD_INPUT_STOP
:
463 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
465 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
469 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
476 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
480 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
481 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
485 if (nHighSurrogate
!= 0)
487 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
488 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
490 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
491 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
493 (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
497 case sal::detail::textenc::BAD_INPUT_STOP
:
502 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
504 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
508 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
509 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
514 && eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
515 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
517 if (pDestBufPtr
!= pDestBufEnd
)
519 *pDestBufPtr
++ = 0x0F; // SI
520 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
523 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
529 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
531 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
= eSet
;
536 *pSrcCvtChars
= nConverted
;
538 return pDestBufPtr
- pDestBuf
;
541 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */