1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include "rtl/textcvt.h"
23 #include "sal/types.h"
25 #include "context.hxx"
26 #include "converter.hxx"
27 #include "converteuctw.hxx"
28 #include "tenchelp.hxx"
29 #include "unichars.hxx"
33 enum ImplEucTwToUnicodeState
35 IMPL_EUC_TW_TO_UNICODE_STATE_0
,
36 IMPL_EUC_TW_TO_UNICODE_STATE_1
,
37 IMPL_EUC_TW_TO_UNICODE_STATE_2_1
,
38 IMPL_EUC_TW_TO_UNICODE_STATE_2_2
,
39 IMPL_EUC_TW_TO_UNICODE_STATE_2_3
42 struct ImplEucTwToUnicodeContext
44 ImplEucTwToUnicodeState m_eState
;
45 sal_Int32 m_nPlane
; // 0--15
46 sal_Int32 m_nRow
; // 0--93
51 void * ImplCreateEucTwToUnicodeContext()
53 ImplEucTwToUnicodeContext
* pContext
= new ImplEucTwToUnicodeContext
;
54 pContext
->m_eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
58 void ImplResetEucTwToUnicodeContext(void * pContext
)
61 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_eState
62 = IMPL_EUC_TW_TO_UNICODE_STATE_0
;
65 void ImplDestroyEucTwToUnicodeContext(void * pContext
)
67 delete static_cast< ImplEucTwToUnicodeContext
* >(pContext
);
70 sal_Size
ImplConvertEucTwToUnicode(void const * pData
,
74 sal_Unicode
* pDestBuf
,
78 sal_Size
* pSrcCvtBytes
)
80 sal_uInt16
const * pCns116431992Data
81 = static_cast< ImplEucTwConverterData
const * >(pData
)->
82 m_pCns116431992ToUnicodeData
;
83 sal_Int32
const * pCns116431992RowOffsets
84 = static_cast< ImplEucTwConverterData
const * >(pData
)->
85 m_pCns116431992ToUnicodeRowOffsets
;
86 sal_Int32
const * pCns116431992PlaneOffsets
87 = static_cast< ImplEucTwConverterData
const * >(pData
)->
88 m_pCns116431992ToUnicodePlaneOffsets
;
89 ImplEucTwToUnicodeState eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
93 sal_Size nConverted
= 0;
94 sal_Unicode
* pDestBufPtr
= pDestBuf
;
95 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
99 eState
= static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_eState
;
100 nPlane
= static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nPlane
;
101 nRow
= static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nRow
;
104 for (; nConverted
< nSrcBytes
; ++nConverted
)
106 bool bUndefined
= true;
107 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
110 case IMPL_EUC_TW_TO_UNICODE_STATE_0
:
112 if (pDestBufPtr
!= pDestBufEnd
)
113 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
116 else if (nChar
>= 0xA1 && nChar
<= 0xFE)
119 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_1
;
121 else if (nChar
== 0x8E)
122 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_2_1
;
130 case IMPL_EUC_TW_TO_UNICODE_STATE_1
:
131 if (nChar
>= 0xA1 && nChar
<= 0xFE)
143 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1
:
144 if (nChar
>= 0xA1 && nChar
<= 0xB0)
146 nPlane
= nChar
- 0xA1;
147 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_2_2
;
156 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2
:
157 if (nChar
>= 0xA1 && nChar
<= 0xFE)
160 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_2_3
;
169 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3
:
170 if (nChar
>= 0xA1 && nChar
<= 0xFE)
183 sal_Int32 nPlaneOffset
= pCns116431992PlaneOffsets
[nPlane
];
184 if (nPlaneOffset
== -1)
189 = pCns116431992RowOffsets
[nPlaneOffset
+ nRow
];
194 sal_uInt32 nFirstLast
= pCns116431992Data
[nOffset
++];
195 sal_uInt32 nFirst
= nFirstLast
& 0xFF;
196 sal_uInt32 nLast
= nFirstLast
>> 8;
198 if (nChar
>= nFirst
&& nChar
<= nLast
)
201 = pCns116431992Data
[nOffset
+ (nChar
- nFirst
)];
202 if (nUnicode
== 0xFFFF)
204 else if (ImplIsHighSurrogate(nUnicode
))
205 if (pDestBufEnd
- pDestBufPtr
>= 2)
207 nOffset
+= nLast
- nFirst
+ 1;
208 nFirst
= pCns116431992Data
[nOffset
++];
209 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
213 nOffset
+ (nChar
- nFirst
)];
218 if (pDestBufPtr
!= pDestBufEnd
)
219 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
225 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
232 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
233 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
236 case sal::detail::textenc::BAD_INPUT_STOP
:
237 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
240 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
241 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
244 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
251 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
255 if (eState
!= IMPL_EUC_TW_TO_UNICODE_STATE_0
256 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
257 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
260 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
261 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
263 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
264 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
267 case sal::detail::textenc::BAD_INPUT_STOP
:
268 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
269 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
272 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
273 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
280 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_eState
= eState
;
281 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nPlane
= nPlane
;
282 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
287 *pSrcCvtBytes
= nConverted
;
289 return pDestBufPtr
- pDestBuf
;
292 sal_Size
ImplConvertUnicodeToEucTw(void const * pData
,
294 sal_Unicode
const * pSrcBuf
,
300 sal_Size
* pSrcCvtChars
)
302 sal_uInt8
const * pCns116431992Data
303 = static_cast< ImplEucTwConverterData
const * >(pData
)->
304 m_pUnicodeToCns116431992Data
;
305 sal_Int32
const * pCns116431992PageOffsets
306 = static_cast< ImplEucTwConverterData
const * >(pData
)->
307 m_pUnicodeToCns116431992PageOffsets
;
308 sal_Int32
const * pCns116431992PlaneOffsets
309 = static_cast< ImplEucTwConverterData
const * >(pData
)->
310 m_pUnicodeToCns116431992PlaneOffsets
;
311 sal_Unicode nHighSurrogate
= 0;
312 sal_uInt32 nInfo
= 0;
313 sal_Size nConverted
= 0;
314 char * pDestBufPtr
= pDestBuf
;
315 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
319 = ((ImplUnicodeToTextContext
*) pContext
)->m_nHighSurrogate
;
321 for (; nConverted
< nSrcChars
; ++nConverted
)
323 bool bUndefined
= true;
324 sal_uInt32 nChar
= *pSrcBuf
++;
325 if (nHighSurrogate
== 0)
327 if (ImplIsHighSurrogate(nChar
))
329 nHighSurrogate
= (sal_Unicode
) nChar
;
333 else if (ImplIsLowSurrogate(nChar
))
334 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
341 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
348 if (pDestBufPtr
!= pDestBufEnd
)
349 *pDestBufPtr
++ = static_cast< char >(nChar
);
354 sal_Int32 nOffset
= pCns116431992PlaneOffsets
[nChar
>> 16];
361 = pCns116431992PageOffsets
[nOffset
+ ((nChar
& 0xFF00) >> 8)];
364 nFirst
= pCns116431992Data
[nOffset
++];
365 nLast
= pCns116431992Data
[nOffset
++];
367 if (nChar
< nFirst
|| nChar
> nLast
)
369 nOffset
+= 3 * (nChar
- nFirst
);
370 nPlane
= pCns116431992Data
[nOffset
++];
373 if (pDestBufEnd
- pDestBufPtr
< (nPlane
== 1 ? 2 : 4))
377 *pDestBufPtr
++ = static_cast< char >(static_cast< unsigned char >(0x8E));
378 *pDestBufPtr
++ = static_cast< char >(0xA0 + nPlane
);
380 *pDestBufPtr
++ = static_cast< char >(0xA0 + pCns116431992Data
[nOffset
++]);
381 *pDestBufPtr
++ = static_cast< char >(0xA0 + pCns116431992Data
[nOffset
]);
387 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
388 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
389 &nInfo
, NULL
, 0, NULL
))
391 case sal::detail::textenc::BAD_INPUT_STOP
:
395 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
399 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
406 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
410 if (nHighSurrogate
!= 0
411 && (nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
412 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
415 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
416 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
418 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
419 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
422 case sal::detail::textenc::BAD_INPUT_STOP
:
423 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
427 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
428 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
434 ((ImplUnicodeToTextContext
*) pContext
)->m_nHighSurrogate
439 *pSrcCvtChars
= nConverted
;
441 return pDestBufPtr
- pDestBuf
;
444 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */