1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include "rtl/textcvt.h"
23 #include "sal/types.h"
25 #include "context.hxx"
26 #include "converter.hxx"
27 #include "convertiso2022kr.hxx"
28 #include "tenchelp.hxx"
29 #include "unichars.hxx"
33 enum ImplIso2022KrToUnicodeState
// order is important:
35 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
,
36 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
,
37 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
,
38 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
,
39 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
,
40 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
43 struct ImplIso2022KrToUnicodeContext
45 ImplIso2022KrToUnicodeState m_eState
;
49 enum ImplUnicodeToIso2022KrSet
51 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
,
52 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
,
53 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
56 struct ImplUnicodeToIso2022KrContext
58 sal_Unicode m_nHighSurrogate
;
59 ImplUnicodeToIso2022KrSet m_eSet
;
64 void * ImplCreateIso2022KrToUnicodeContext()
66 ImplIso2022KrToUnicodeContext
* pContext
=
67 new ImplIso2022KrToUnicodeContext
;
68 pContext
->m_eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
72 void ImplResetIso2022KrToUnicodeContext(void * pContext
)
75 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
76 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
79 void ImplDestroyIso2022KrToUnicodeContext(void * pContext
)
81 delete static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
);
84 sal_Size
ImplConvertIso2022KrToUnicode(void const * pData
,
88 sal_Unicode
* pDestBuf
,
92 sal_Size
* pSrcCvtBytes
)
94 ImplDBCSToUniLeadTab
const * pKsX1001Data
95 = static_cast< ImplIso2022KrConverterData
const * >(pData
)->
96 m_pKsX1001ToUnicodeData
;
97 ImplIso2022KrToUnicodeState eState
98 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
100 sal_uInt32 nInfo
= 0;
101 sal_Size nConverted
= 0;
102 sal_Unicode
* pDestBufPtr
= pDestBuf
;
103 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
107 eState
= static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
;
108 nRow
= static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_nRow
;
111 for (; nConverted
< nSrcBytes
; ++nConverted
)
113 bool bUndefined
= true;
114 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
117 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
:
118 if (nChar
== 0x0E) // SO
119 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
120 else if (nChar
== 0x1B) // ESC
121 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
;
122 else if (nChar
< 0x80)
123 if (pDestBufPtr
!= pDestBufEnd
)
124 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
134 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
:
135 if (nChar
== 0x0F) // SI
136 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
137 else if (nChar
>= 0x21 && nChar
<= 0x7E)
140 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
;
149 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
:
150 if (nChar
>= 0x21 && nChar
<= 0x7E)
152 sal_uInt16 nUnicode
= 0;
153 sal_uInt32 nFirst
= pKsX1001Data
[nRow
].mnTrailStart
;
155 if (nChar
>= nFirst
&& nChar
<= pKsX1001Data
[nRow
].mnTrailEnd
)
156 nUnicode
= pKsX1001Data
[nRow
].
157 mpToUniTrailTab
[nChar
- nFirst
];
159 if (pDestBufPtr
!= pDestBufEnd
)
161 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
162 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
176 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
:
177 if (nChar
== 0x24) // $
178 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
;
186 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
:
187 if (nChar
== 0x29) // )
188 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
;
196 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
:
197 if (nChar
== 0x43) // C
198 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
209 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
210 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
213 case sal::detail::textenc::BAD_INPUT_STOP
:
214 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
217 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
218 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
221 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
228 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
232 if (eState
> IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
233 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
234 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
237 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
238 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
240 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
241 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
244 case sal::detail::textenc::BAD_INPUT_STOP
:
245 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
246 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
249 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
250 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
257 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
= eState
;
258 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
263 *pSrcCvtBytes
= nConverted
;
265 return pDestBufPtr
- pDestBuf
;
268 void * ImplCreateUnicodeToIso2022KrContext()
270 ImplUnicodeToIso2022KrContext
* pContext
=
271 new ImplUnicodeToIso2022KrContext
;
272 pContext
->m_nHighSurrogate
= 0;
273 pContext
->m_eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
277 void ImplResetUnicodeToIso2022KrContext(void * pContext
)
281 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
= 0;
282 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
283 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
287 void ImplDestroyUnicodeToIso2022KrContext(void * pContext
)
289 delete static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
);
292 sal_Size
ImplConvertUnicodeToIso2022Kr(void const * pData
,
294 sal_Unicode
const * pSrcBuf
,
300 sal_Size
* pSrcCvtChars
)
302 ImplUniToDBCSHighTab
const * pKsX1001Data
303 = static_cast< ImplIso2022KrConverterData
const * >(pData
)->
304 m_pUnicodeToKsX1001Data
;
305 sal_Unicode nHighSurrogate
= 0;
306 ImplUnicodeToIso2022KrSet eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
307 sal_uInt32 nInfo
= 0;
308 sal_Size nConverted
= 0;
309 char * pDestBufPtr
= pDestBuf
;
310 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
316 = static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
;
317 eSet
= static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
;
320 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
)
322 if (pDestBufEnd
- pDestBufPtr
>= 4)
324 *pDestBufPtr
++ = 0x1B; // ESC
325 *pDestBufPtr
++ = 0x24; // $
326 *pDestBufPtr
++ = 0x29; // )
327 *pDestBufPtr
++ = 0x43; // C
328 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
331 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
334 if ((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0)
335 for (; nConverted
< nSrcChars
; ++nConverted
)
337 bool bUndefined
= true;
338 sal_uInt32 nChar
= *pSrcBuf
++;
339 if (nHighSurrogate
== 0)
341 if (ImplIsHighSurrogate(nChar
))
343 nHighSurrogate
= (sal_Unicode
) nChar
;
347 else if (ImplIsLowSurrogate(nChar
))
348 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
355 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
361 if (nChar
== 0x0A || nChar
== 0x0D) // LF, CR
363 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
365 if (pDestBufPtr
!= pDestBufEnd
)
367 *pDestBufPtr
++ = 0x0F; // SI
368 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
373 if (pDestBufPtr
!= pDestBufEnd
)
374 *pDestBufPtr
++ = static_cast< char >(nChar
);
378 else if (nChar
== 0x0E || nChar
== 0x0F || nChar
== 0x1B)
380 else if (nChar
< 0x80)
382 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
384 if (pDestBufPtr
!= pDestBufEnd
)
386 *pDestBufPtr
++ = 0x0F; // SI
387 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
392 if (pDestBufPtr
!= pDestBufEnd
)
393 *pDestBufPtr
++ = static_cast< char >(nChar
);
399 sal_uInt16 nBytes
= 0;
400 sal_uInt32 nIndex1
= nChar
>> 8;
403 sal_uInt32 nIndex2
= nChar
& 0xFF;
404 sal_uInt32 nFirst
= pKsX1001Data
[nIndex1
].mnLowStart
;
405 if (nIndex2
>= nFirst
406 && nIndex2
<= pKsX1001Data
[nIndex1
].mnLowEnd
)
407 nBytes
= pKsX1001Data
[nIndex1
].
408 mpToUniTrailTab
[nIndex2
- nFirst
];
412 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
)
414 if (pDestBufPtr
!= pDestBufEnd
)
416 *pDestBufPtr
++ = 0x0E; // SO
417 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
;
422 if (pDestBufEnd
- pDestBufPtr
>= 2)
424 *pDestBufPtr
++ = static_cast< char >((nBytes
>> 8) & 0x7F);
425 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0x7F);
437 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
438 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
439 &nInfo
, "\x0F" /* SI */,
440 eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
? 0 : 1,
443 case sal::detail::textenc::BAD_INPUT_STOP
:
447 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
449 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
453 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
460 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
464 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
465 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
469 if (nHighSurrogate
!= 0)
471 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
472 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
474 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
475 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
477 (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
481 case sal::detail::textenc::BAD_INPUT_STOP
:
486 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
488 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
492 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
493 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
498 && eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
499 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
501 if (pDestBufPtr
!= pDestBufEnd
)
503 *pDestBufPtr
++ = 0x0F; // SI
504 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
507 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
513 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
515 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
= eSet
;
520 *pSrcCvtChars
= nConverted
;
522 return pDestBufPtr
- pDestBuf
;
525 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */