1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
28 #include "converter.hxx"
29 #include "convertiso2022kr.hxx"
30 #include "tenchelp.hxx"
34 enum ImplIso2022KrToUnicodeState
// order is important:
36 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
,
37 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
,
38 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
,
39 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
,
40 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
,
41 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
44 struct ImplIso2022KrToUnicodeContext
46 ImplIso2022KrToUnicodeState m_eState
;
50 enum ImplUnicodeToIso2022KrSet
52 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
,
53 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
,
54 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
57 struct ImplUnicodeToIso2022KrContext
59 sal_Unicode m_nHighSurrogate
;
60 ImplUnicodeToIso2022KrSet m_eSet
;
65 void * ImplCreateIso2022KrToUnicodeContext()
67 ImplIso2022KrToUnicodeContext
* pContext
=
68 new ImplIso2022KrToUnicodeContext
;
69 pContext
->m_eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
73 void ImplResetIso2022KrToUnicodeContext(void * pContext
)
76 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
77 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
80 void ImplDestroyIso2022KrToUnicodeContext(void * pContext
)
82 delete static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
);
85 sal_Size
ImplConvertIso2022KrToUnicode(void const * pData
,
89 sal_Unicode
* pDestBuf
,
93 sal_Size
* pSrcCvtBytes
)
95 ImplDBCSToUniLeadTab
const * pKsX1001Data
96 = static_cast< ImplIso2022KrConverterData
const * >(pData
)->
97 m_pKsX1001ToUnicodeData
;
98 ImplIso2022KrToUnicodeState eState
99 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
101 sal_uInt32 nInfo
= 0;
102 sal_Size nConverted
= 0;
103 sal_Unicode
* pDestBufPtr
= pDestBuf
;
104 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
105 sal_Size startOfCurrentChar
= 0;
109 eState
= static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
;
110 nRow
= static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_nRow
;
113 for (; nConverted
< nSrcBytes
; ++nConverted
)
115 bool bUndefined
= true;
116 sal_uInt32 nChar
= *reinterpret_cast<unsigned char const *>(pSrcBuf
++);
119 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
:
120 if (nChar
== 0x0E) // SO
121 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
122 else if (nChar
== 0x1B) // ESC
123 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
;
124 else if (nChar
< 0x80)
125 if (pDestBufPtr
!= pDestBufEnd
) {
126 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nChar
);
127 startOfCurrentChar
= nConverted
+ 1;
137 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
:
138 if (nChar
== 0x0F) // SI
139 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
140 else if (nChar
>= 0x21 && nChar
<= 0x7E)
143 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
;
152 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
:
153 if (nChar
>= 0x21 && nChar
<= 0x7E)
155 sal_uInt16 nUnicode
= 0;
156 sal_uInt32 nFirst
= pKsX1001Data
[nRow
].mnTrailStart
;
158 if (nChar
>= nFirst
&& nChar
<= pKsX1001Data
[nRow
].mnTrailEnd
)
159 nUnicode
= pKsX1001Data
[nRow
].
160 mpToUniTrailTab
[nChar
- nFirst
];
162 if (pDestBufPtr
!= pDestBufEnd
)
164 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nUnicode
);
165 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
166 startOfCurrentChar
= nConverted
+ 1;
180 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
:
181 if (nChar
== 0x24) // $
182 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
;
190 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
:
191 if (nChar
== 0x29) // )
192 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
;
200 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
:
201 if (nChar
== 0x43) // C
202 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
213 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
214 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
217 case sal::detail::textenc::BAD_INPUT_STOP
:
218 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
219 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0) {
222 nConverted
= startOfCurrentChar
;
226 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
227 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
228 startOfCurrentChar
= nConverted
+ 1;
231 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
238 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
242 if (eState
> IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
243 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
244 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
))
247 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
248 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
;
250 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
251 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
254 case sal::detail::textenc::BAD_INPUT_STOP
:
255 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) != 0) {
256 nConverted
= startOfCurrentChar
;
259 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
260 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
263 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
264 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
271 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
= eState
;
272 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
277 *pSrcCvtBytes
= nConverted
;
279 return pDestBufPtr
- pDestBuf
;
282 void * ImplCreateUnicodeToIso2022KrContext()
284 ImplUnicodeToIso2022KrContext
* pContext
=
285 new ImplUnicodeToIso2022KrContext
;
286 pContext
->m_nHighSurrogate
= 0;
287 pContext
->m_eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
291 void ImplResetUnicodeToIso2022KrContext(void * pContext
)
295 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
= 0;
296 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
297 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
301 void ImplDestroyUnicodeToIso2022KrContext(void * pContext
)
303 delete static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
);
306 sal_Size
ImplConvertUnicodeToIso2022Kr(void const * pData
,
308 sal_Unicode
const * pSrcBuf
,
314 sal_Size
* pSrcCvtChars
)
316 ImplUniToDBCSHighTab
const * pKsX1001Data
317 = static_cast< ImplIso2022KrConverterData
const * >(pData
)->
318 m_pUnicodeToKsX1001Data
;
319 sal_Unicode nHighSurrogate
= 0;
320 ImplUnicodeToIso2022KrSet eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
321 sal_uInt32 nInfo
= 0;
322 sal_Size nConverted
= 0;
323 char * pDestBufPtr
= pDestBuf
;
324 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
330 = static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
;
331 eSet
= static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
;
334 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
)
336 if (pDestBufEnd
- pDestBufPtr
>= 4)
338 *pDestBufPtr
++ = 0x1B; // ESC
339 *pDestBufPtr
++ = 0x24; // $
340 *pDestBufPtr
++ = 0x29; // )
341 *pDestBufPtr
++ = 0x43; // C
342 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
345 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
348 if ((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0)
349 for (; nConverted
< nSrcChars
; ++nConverted
)
351 bool bUndefined
= true;
352 sal_uInt32 nChar
= *pSrcBuf
++;
353 if (nHighSurrogate
== 0)
355 if (rtl::isHighSurrogate(nChar
))
357 nHighSurrogate
= static_cast<sal_Unicode
>(nChar
);
360 else if (rtl::isLowSurrogate(nChar
))
366 else if (rtl::isLowSurrogate(nChar
))
367 nChar
= rtl::combineSurrogates(nHighSurrogate
, nChar
);
374 assert(rtl::isUnicodeScalarValue(nChar
));
376 if (nChar
== 0x0A || nChar
== 0x0D) // LF, CR
378 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
380 if (pDestBufPtr
!= pDestBufEnd
)
382 *pDestBufPtr
++ = 0x0F; // SI
383 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
388 if (pDestBufPtr
!= pDestBufEnd
)
389 *pDestBufPtr
++ = static_cast< char >(nChar
);
393 else if (nChar
== 0x0E || nChar
== 0x0F || nChar
== 0x1B)
395 else if (nChar
< 0x80)
397 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
399 if (pDestBufPtr
!= pDestBufEnd
)
401 *pDestBufPtr
++ = 0x0F; // SI
402 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
407 if (pDestBufPtr
!= pDestBufEnd
)
408 *pDestBufPtr
++ = static_cast< char >(nChar
);
414 sal_uInt16 nBytes
= 0;
415 sal_uInt32 nIndex1
= nChar
>> 8;
418 sal_uInt32 nIndex2
= nChar
& 0xFF;
419 sal_uInt32 nFirst
= pKsX1001Data
[nIndex1
].mnLowStart
;
420 if (nIndex2
>= nFirst
421 && nIndex2
<= pKsX1001Data
[nIndex1
].mnLowEnd
)
422 nBytes
= pKsX1001Data
[nIndex1
].
423 mpToUniTrailTab
[nIndex2
- nFirst
];
427 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
)
429 if (pDestBufPtr
!= pDestBufEnd
)
431 *pDestBufPtr
++ = 0x0E; // SO
432 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
;
437 if (pDestBufEnd
- pDestBufPtr
>= 2)
439 *pDestBufPtr
++ = static_cast< char >((nBytes
>> 8) & 0x7F);
440 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0x7F);
452 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
453 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
454 &nInfo
, "\x0F" /* SI */,
455 eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
? 0 : 1,
458 case sal::detail::textenc::BAD_INPUT_STOP
:
462 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
464 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
468 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
475 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
479 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
480 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
484 if (nHighSurrogate
!= 0)
486 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
487 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
489 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
490 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
492 (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
496 case sal::detail::textenc::BAD_INPUT_STOP
:
501 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
503 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
507 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
508 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
513 && eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
514 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
516 if (pDestBufPtr
!= pDestBufEnd
)
518 *pDestBufPtr
++ = 0x0F; // SI
519 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
522 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
528 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
530 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
= eSet
;
535 *pSrcCvtChars
= nConverted
;
537 return pDestBufPtr
- pDestBuf
;
540 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */