1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
28 #include "context.hxx"
29 #include "converter.hxx"
30 #include "converteuctw.hxx"
31 #include "tenchelp.hxx"
32 #include "unichars.hxx"
36 enum ImplEucTwToUnicodeState
38 IMPL_EUC_TW_TO_UNICODE_STATE_0
,
39 IMPL_EUC_TW_TO_UNICODE_STATE_1
,
40 IMPL_EUC_TW_TO_UNICODE_STATE_2_1
,
41 IMPL_EUC_TW_TO_UNICODE_STATE_2_2
,
42 IMPL_EUC_TW_TO_UNICODE_STATE_2_3
45 struct ImplEucTwToUnicodeContext
47 ImplEucTwToUnicodeState m_eState
;
48 sal_Int32 m_nPlane
; // 0--15
49 sal_Int32 m_nRow
; // 0--93
54 void * ImplCreateEucTwToUnicodeContext()
56 ImplEucTwToUnicodeContext
* pContext
= new ImplEucTwToUnicodeContext
;
57 pContext
->m_eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
61 void ImplResetEucTwToUnicodeContext(void * pContext
)
64 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_eState
65 = IMPL_EUC_TW_TO_UNICODE_STATE_0
;
68 void ImplDestroyEucTwToUnicodeContext(void * pContext
)
70 delete static_cast< ImplEucTwToUnicodeContext
* >(pContext
);
73 sal_Size
ImplConvertEucTwToUnicode(void const * pData
,
77 sal_Unicode
* pDestBuf
,
81 sal_Size
* pSrcCvtBytes
)
83 sal_uInt16
const * pCns116431992Data
84 = static_cast< ImplEucTwConverterData
const * >(pData
)->
85 m_pCns116431992ToUnicodeData
;
86 sal_Int32
const * pCns116431992RowOffsets
87 = static_cast< ImplEucTwConverterData
const * >(pData
)->
88 m_pCns116431992ToUnicodeRowOffsets
;
89 sal_Int32
const * pCns116431992PlaneOffsets
90 = static_cast< ImplEucTwConverterData
const * >(pData
)->
91 m_pCns116431992ToUnicodePlaneOffsets
;
92 ImplEucTwToUnicodeState eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
96 sal_Size nConverted
= 0;
97 sal_Unicode
* pDestBufPtr
= pDestBuf
;
98 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
99 sal_Size startOfCurrentChar
= 0;
103 eState
= static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_eState
;
104 nPlane
= static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nPlane
;
105 nRow
= static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nRow
;
108 for (; nConverted
< nSrcBytes
; ++nConverted
)
110 bool bUndefined
= true;
111 sal_uInt32 nChar
= *reinterpret_cast<unsigned char const *>(pSrcBuf
++);
114 case IMPL_EUC_TW_TO_UNICODE_STATE_0
:
116 if (pDestBufPtr
!= pDestBufEnd
) {
117 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nChar
);
118 startOfCurrentChar
= nConverted
+ 1;
121 else if (nChar
>= 0xA1 && nChar
<= 0xFE)
124 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_1
;
126 else if (nChar
== 0x8E)
127 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_2_1
;
135 case IMPL_EUC_TW_TO_UNICODE_STATE_1
:
136 if (nChar
>= 0xA1 && nChar
<= 0xFE)
148 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1
:
149 if (nChar
>= 0xA1 && nChar
<= 0xB0)
151 nPlane
= nChar
- 0xA1;
152 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_2_2
;
161 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2
:
162 if (nChar
>= 0xA1 && nChar
<= 0xFE)
165 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_2_3
;
174 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3
:
175 if (nChar
>= 0xA1 && nChar
<= 0xFE)
188 sal_Int32 nPlaneOffset
= pCns116431992PlaneOffsets
[nPlane
];
189 if (nPlaneOffset
== -1)
194 = pCns116431992RowOffsets
[nPlaneOffset
+ nRow
];
199 sal_uInt32 nFirstLast
= pCns116431992Data
[nOffset
++];
200 sal_uInt32 nFirst
= nFirstLast
& 0xFF;
201 sal_uInt32 nLast
= nFirstLast
>> 8;
203 if (nChar
>= nFirst
&& nChar
<= nLast
)
206 = pCns116431992Data
[nOffset
+ (nChar
- nFirst
)];
207 if (nUnicode
== 0xFFFF)
209 else if (rtl::isHighSurrogate(nUnicode
))
210 if (pDestBufEnd
- pDestBufPtr
>= 2)
212 nOffset
+= nLast
- nFirst
+ 1;
213 nFirst
= pCns116431992Data
[nOffset
++];
214 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nUnicode
);
216 = static_cast<sal_Unicode
>(pCns116431992Data
[
217 nOffset
+ (nChar
- nFirst
)]);
218 startOfCurrentChar
= nConverted
+ 1;
223 if (pDestBufPtr
!= pDestBufEnd
) {
224 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nUnicode
);
225 startOfCurrentChar
= nConverted
+ 1;
231 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
238 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
239 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
242 case sal::detail::textenc::BAD_INPUT_STOP
:
243 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
244 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0) {
247 nConverted
= startOfCurrentChar
;
251 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
252 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
253 startOfCurrentChar
= nConverted
+ 1;
256 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
263 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
267 if (eState
!= IMPL_EUC_TW_TO_UNICODE_STATE_0
268 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
269 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
))
272 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
273 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
;
275 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
276 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
279 case sal::detail::textenc::BAD_INPUT_STOP
:
280 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) != 0) {
281 nConverted
= startOfCurrentChar
;
284 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
285 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
288 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
289 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
296 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_eState
= eState
;
297 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nPlane
= nPlane
;
298 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
303 *pSrcCvtBytes
= nConverted
;
305 return pDestBufPtr
- pDestBuf
;
308 sal_Size
ImplConvertUnicodeToEucTw(void const * pData
,
310 sal_Unicode
const * pSrcBuf
,
316 sal_Size
* pSrcCvtChars
)
318 sal_uInt8
const * pCns116431992Data
319 = static_cast< ImplEucTwConverterData
const * >(pData
)->
320 m_pUnicodeToCns116431992Data
;
321 sal_Int32
const * pCns116431992PageOffsets
322 = static_cast< ImplEucTwConverterData
const * >(pData
)->
323 m_pUnicodeToCns116431992PageOffsets
;
324 sal_Int32
const * pCns116431992PlaneOffsets
325 = static_cast< ImplEucTwConverterData
const * >(pData
)->
326 m_pUnicodeToCns116431992PlaneOffsets
;
327 sal_Unicode nHighSurrogate
= 0;
328 sal_uInt32 nInfo
= 0;
329 sal_Size nConverted
= 0;
330 char * pDestBufPtr
= pDestBuf
;
331 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
335 = static_cast<ImplUnicodeToTextContext
*>(pContext
)->m_nHighSurrogate
;
337 for (; nConverted
< nSrcChars
; ++nConverted
)
339 bool bUndefined
= true;
340 sal_uInt32 nChar
= *pSrcBuf
++;
341 if (nHighSurrogate
== 0)
343 if (rtl::isHighSurrogate(nChar
))
345 nHighSurrogate
= static_cast<sal_Unicode
>(nChar
);
348 else if (rtl::isLowSurrogate(nChar
))
354 else if (rtl::isLowSurrogate(nChar
))
355 nChar
= rtl::combineSurrogates(nHighSurrogate
, nChar
);
362 assert(rtl::isUnicodeScalarValue(nChar
));
365 if (pDestBufPtr
!= pDestBufEnd
)
366 *pDestBufPtr
++ = static_cast< char >(nChar
);
371 sal_Int32 nOffset
= pCns116431992PlaneOffsets
[nChar
>> 16];
378 = pCns116431992PageOffsets
[nOffset
+ ((nChar
& 0xFF00) >> 8)];
381 nFirst
= pCns116431992Data
[nOffset
++];
382 nLast
= pCns116431992Data
[nOffset
++];
384 if (nChar
< nFirst
|| nChar
> nLast
)
386 nOffset
+= 3 * (nChar
- nFirst
);
387 nPlane
= pCns116431992Data
[nOffset
++];
390 if (pDestBufEnd
- pDestBufPtr
< (nPlane
== 1 ? 2 : 4))
394 *pDestBufPtr
++ = static_cast< char >(static_cast< unsigned char >(0x8E));
395 *pDestBufPtr
++ = static_cast< char >(0xA0 + nPlane
);
397 *pDestBufPtr
++ = static_cast< char >(0xA0 + pCns116431992Data
[nOffset
++]);
398 *pDestBufPtr
++ = static_cast< char >(0xA0 + pCns116431992Data
[nOffset
]);
404 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
405 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
406 &nInfo
, nullptr, 0, nullptr))
408 case sal::detail::textenc::BAD_INPUT_STOP
:
412 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
416 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
423 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
427 if (nHighSurrogate
!= 0
428 && (nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
429 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
432 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
433 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
435 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
436 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
437 nullptr, 0, nullptr))
439 case sal::detail::textenc::BAD_INPUT_STOP
:
440 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
444 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
445 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
451 static_cast<ImplUnicodeToTextContext
*>(pContext
)->m_nHighSurrogate
456 *pSrcCvtChars
= nConverted
;
458 return pDestBufPtr
- pDestBuf
;
461 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */