1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "sal/config.h"
31 #include "rtl/textcvt.h"
32 #include "sal/types.h"
34 #include "context.hxx"
35 #include "converter.hxx"
36 #include "convertiso2022kr.hxx"
37 #include "tenchelp.hxx"
38 #include "unichars.hxx"
42 enum ImplIso2022KrToUnicodeState
// order is important:
44 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
,
45 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
,
46 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
,
47 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
,
48 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
,
49 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
52 struct ImplIso2022KrToUnicodeContext
54 ImplIso2022KrToUnicodeState m_eState
;
58 enum ImplUnicodeToIso2022KrSet
60 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
,
61 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
,
62 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
65 struct ImplUnicodeToIso2022KrContext
67 sal_Unicode m_nHighSurrogate
;
68 ImplUnicodeToIso2022KrSet m_eSet
;
73 void * ImplCreateIso2022KrToUnicodeContext()
75 ImplIso2022KrToUnicodeContext
* pContext
=
76 new ImplIso2022KrToUnicodeContext
;
77 pContext
->m_eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
81 void ImplResetIso2022KrToUnicodeContext(void * pContext
)
84 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
85 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
88 void ImplDestroyIso2022KrToUnicodeContext(void * pContext
)
90 delete static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
);
93 sal_Size
ImplConvertIso2022KrToUnicode(void const * pData
,
97 sal_Unicode
* pDestBuf
,
101 sal_Size
* pSrcCvtBytes
)
103 ImplDBCSToUniLeadTab
const * pKsX1001Data
104 = static_cast< ImplIso2022KrConverterData
const * >(pData
)->
105 m_pKsX1001ToUnicodeData
;
106 ImplIso2022KrToUnicodeState eState
107 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
109 sal_uInt32 nInfo
= 0;
110 sal_Size nConverted
= 0;
111 sal_Unicode
* pDestBufPtr
= pDestBuf
;
112 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
116 eState
= static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
;
117 nRow
= static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_nRow
;
120 for (; nConverted
< nSrcBytes
; ++nConverted
)
122 bool bUndefined
= true;
123 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
126 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
:
127 if (nChar
== 0x0E) // SO
128 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
129 else if (nChar
== 0x1B) // ESC
130 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
;
131 else if (nChar
< 0x80)
132 if (pDestBufPtr
!= pDestBufEnd
)
133 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
143 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
:
144 if (nChar
== 0x0F) // SI
145 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
146 else if (nChar
>= 0x21 && nChar
<= 0x7E)
149 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
;
158 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2
:
159 if (nChar
>= 0x21 && nChar
<= 0x7E)
161 sal_uInt16 nUnicode
= 0;
162 sal_uInt32 nFirst
= pKsX1001Data
[nRow
].mnTrailStart
;
164 if (nChar
>= nFirst
&& nChar
<= pKsX1001Data
[nRow
].mnTrailEnd
)
165 nUnicode
= pKsX1001Data
[nRow
].
166 mpToUniTrailTab
[nChar
- nFirst
];
168 if (pDestBufPtr
!= pDestBufEnd
)
170 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
171 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
;
185 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC
:
186 if (nChar
== 0x24) // $
187 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
;
195 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR
:
196 if (nChar
== 0x29) // )
197 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
;
205 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
:
206 if (nChar
== 0x43) // C
207 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
218 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
219 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
222 case sal::detail::textenc::BAD_INPUT_STOP
:
223 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
226 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
227 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
230 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
237 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
241 if (eState
> IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
242 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
243 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
246 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
247 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
249 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
250 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
253 case sal::detail::textenc::BAD_INPUT_STOP
:
254 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
255 eState
= IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII
;
258 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
259 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
266 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_eState
= eState
;
267 static_cast< ImplIso2022KrToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
272 *pSrcCvtBytes
= nConverted
;
274 return pDestBufPtr
- pDestBuf
;
277 void * ImplCreateUnicodeToIso2022KrContext()
279 ImplUnicodeToIso2022KrContext
* pContext
=
280 new ImplUnicodeToIso2022KrContext
;
281 pContext
->m_nHighSurrogate
= 0;
282 pContext
->m_eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
286 void ImplResetUnicodeToIso2022KrContext(void * pContext
)
290 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
= 0;
291 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
292 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
296 void ImplDestroyUnicodeToIso2022KrContext(void * pContext
)
298 delete static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
);
301 sal_Size
ImplConvertUnicodeToIso2022Kr(void const * pData
,
303 sal_Unicode
const * pSrcBuf
,
309 sal_Size
* pSrcCvtChars
)
311 ImplUniToDBCSHighTab
const * pKsX1001Data
312 = static_cast< ImplIso2022KrConverterData
const * >(pData
)->
313 m_pUnicodeToKsX1001Data
;
314 sal_Unicode nHighSurrogate
= 0;
315 ImplUnicodeToIso2022KrSet eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
;
316 sal_uInt32 nInfo
= 0;
317 sal_Size nConverted
= 0;
318 char * pDestBufPtr
= pDestBuf
;
319 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
325 = static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
;
326 eSet
= static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
;
329 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE
)
331 if (pDestBufEnd
- pDestBufPtr
>= 4)
333 *pDestBufPtr
++ = 0x1B; // ESC
334 *pDestBufPtr
++ = 0x24; // $
335 *pDestBufPtr
++ = 0x29; // )
336 *pDestBufPtr
++ = 0x43; // C
337 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
340 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
343 if ((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0)
344 for (; nConverted
< nSrcChars
; ++nConverted
)
346 bool bUndefined
= true;
347 sal_uInt32 nChar
= *pSrcBuf
++;
348 if (nHighSurrogate
== 0)
350 if (ImplIsHighSurrogate(nChar
))
352 nHighSurrogate
= (sal_Unicode
) nChar
;
356 else if (ImplIsLowSurrogate(nChar
))
357 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
364 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
370 if (nChar
== 0x0A || nChar
== 0x0D) // LF, CR
372 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
374 if (pDestBufPtr
!= pDestBufEnd
)
376 *pDestBufPtr
++ = 0x0F; // SI
377 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
382 if (pDestBufPtr
!= pDestBufEnd
)
383 *pDestBufPtr
++ = static_cast< char >(nChar
);
387 else if (nChar
== 0x0E || nChar
== 0x0F || nChar
== 0x1B)
389 else if (nChar
< 0x80)
391 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
)
393 if (pDestBufPtr
!= pDestBufEnd
)
395 *pDestBufPtr
++ = 0x0F; // SI
396 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
401 if (pDestBufPtr
!= pDestBufEnd
)
402 *pDestBufPtr
++ = static_cast< char >(nChar
);
408 sal_uInt16 nBytes
= 0;
409 sal_uInt32 nIndex1
= nChar
>> 8;
412 sal_uInt32 nIndex2
= nChar
& 0xFF;
413 sal_uInt32 nFirst
= pKsX1001Data
[nIndex1
].mnLowStart
;
414 if (nIndex2
>= nFirst
415 && nIndex2
<= pKsX1001Data
[nIndex1
].mnLowEnd
)
416 nBytes
= pKsX1001Data
[nIndex1
].
417 mpToUniTrailTab
[nIndex2
- nFirst
];
421 if (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
)
423 if (pDestBufPtr
!= pDestBufEnd
)
425 *pDestBufPtr
++ = 0x0E; // SO
426 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
;
431 if (pDestBufEnd
- pDestBufPtr
>= 2)
433 *pDestBufPtr
++ = static_cast< char >((nBytes
>> 8) & 0x7F);
434 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0x7F);
446 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
447 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
448 &nInfo
, "\x0F" /* SI */,
449 eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
? 0 : 1,
452 case sal::detail::textenc::BAD_INPUT_STOP
:
456 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
458 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
462 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
469 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
473 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
474 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
478 if (nHighSurrogate
!= 0)
480 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
481 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
483 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
484 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
486 (eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
490 case sal::detail::textenc::BAD_INPUT_STOP
:
495 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
497 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
501 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
502 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
507 && eSet
== IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
508 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
510 if (pDestBufPtr
!= pDestBufEnd
)
512 *pDestBufPtr
++ = 0x0F; // SI
513 eSet
= IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
;
516 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
522 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_nHighSurrogate
524 static_cast< ImplUnicodeToIso2022KrContext
* >(pContext
)->m_eSet
= eSet
;
529 *pSrcCvtChars
= nConverted
;
531 return pDestBufPtr
- pDestBuf
;
534 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */