1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "sal/config.h"
31 #include "rtl/textcvt.h"
32 #include "sal/types.h"
34 #include "context.hxx"
35 #include "converter.hxx"
36 #include "converteuctw.hxx"
37 #include "tenchelp.hxx"
38 #include "unichars.hxx"
42 enum ImplEucTwToUnicodeState
44 IMPL_EUC_TW_TO_UNICODE_STATE_0
,
45 IMPL_EUC_TW_TO_UNICODE_STATE_1
,
46 IMPL_EUC_TW_TO_UNICODE_STATE_2_1
,
47 IMPL_EUC_TW_TO_UNICODE_STATE_2_2
,
48 IMPL_EUC_TW_TO_UNICODE_STATE_2_3
51 struct ImplEucTwToUnicodeContext
53 ImplEucTwToUnicodeState m_eState
;
54 sal_Int32 m_nPlane
; // 0--15
55 sal_Int32 m_nRow
; // 0--93
60 void * ImplCreateEucTwToUnicodeContext()
62 ImplEucTwToUnicodeContext
* pContext
= new ImplEucTwToUnicodeContext
;
63 pContext
->m_eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
67 void ImplResetEucTwToUnicodeContext(void * pContext
)
70 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_eState
71 = IMPL_EUC_TW_TO_UNICODE_STATE_0
;
74 void ImplDestroyEucTwToUnicodeContext(void * pContext
)
76 delete static_cast< ImplEucTwToUnicodeContext
* >(pContext
);
79 sal_Size
ImplConvertEucTwToUnicode(void const * pData
,
83 sal_Unicode
* pDestBuf
,
87 sal_Size
* pSrcCvtBytes
)
89 sal_uInt16
const * pCns116431992Data
90 = static_cast< ImplEucTwConverterData
const * >(pData
)->
91 m_pCns116431992ToUnicodeData
;
92 sal_Int32
const * pCns116431992RowOffsets
93 = static_cast< ImplEucTwConverterData
const * >(pData
)->
94 m_pCns116431992ToUnicodeRowOffsets
;
95 sal_Int32
const * pCns116431992PlaneOffsets
96 = static_cast< ImplEucTwConverterData
const * >(pData
)->
97 m_pCns116431992ToUnicodePlaneOffsets
;
98 ImplEucTwToUnicodeState eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
101 sal_uInt32 nInfo
= 0;
102 sal_Size nConverted
= 0;
103 sal_Unicode
* pDestBufPtr
= pDestBuf
;
104 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
108 eState
= static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_eState
;
109 nPlane
= static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nPlane
;
110 nRow
= static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nRow
;
113 for (; nConverted
< nSrcBytes
; ++nConverted
)
115 bool bUndefined
= true;
116 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
119 case IMPL_EUC_TW_TO_UNICODE_STATE_0
:
121 if (pDestBufPtr
!= pDestBufEnd
)
122 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
125 else if (nChar
>= 0xA1 && nChar
<= 0xFE)
128 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_1
;
130 else if (nChar
== 0x8E)
131 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_2_1
;
139 case IMPL_EUC_TW_TO_UNICODE_STATE_1
:
140 if (nChar
>= 0xA1 && nChar
<= 0xFE)
152 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1
:
153 if (nChar
>= 0xA1 && nChar
<= 0xB0)
155 nPlane
= nChar
- 0xA1;
156 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_2_2
;
165 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2
:
166 if (nChar
>= 0xA1 && nChar
<= 0xFE)
169 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_2_3
;
178 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3
:
179 if (nChar
>= 0xA1 && nChar
<= 0xFE)
192 sal_Int32 nPlaneOffset
= pCns116431992PlaneOffsets
[nPlane
];
193 if (nPlaneOffset
== -1)
198 = pCns116431992RowOffsets
[nPlaneOffset
+ nRow
];
203 sal_uInt32 nFirstLast
= pCns116431992Data
[nOffset
++];
204 sal_uInt32 nFirst
= nFirstLast
& 0xFF;
205 sal_uInt32 nLast
= nFirstLast
>> 8;
207 if (nChar
>= nFirst
&& nChar
<= nLast
)
210 = pCns116431992Data
[nOffset
+ (nChar
- nFirst
)];
211 if (nUnicode
== 0xFFFF)
213 else if (ImplIsHighSurrogate(nUnicode
))
214 if (pDestBufEnd
- pDestBufPtr
>= 2)
216 nOffset
+= nLast
- nFirst
+ 1;
217 nFirst
= pCns116431992Data
[nOffset
++];
218 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
222 nOffset
+ (nChar
- nFirst
)];
227 if (pDestBufPtr
!= pDestBufEnd
)
228 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
234 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
241 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
242 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
245 case sal::detail::textenc::BAD_INPUT_STOP
:
246 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
249 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
250 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
253 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
260 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
264 if (eState
!= IMPL_EUC_TW_TO_UNICODE_STATE_0
265 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
266 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
269 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
270 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
272 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
273 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
276 case sal::detail::textenc::BAD_INPUT_STOP
:
277 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
278 eState
= IMPL_EUC_TW_TO_UNICODE_STATE_0
;
281 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
282 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
289 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_eState
= eState
;
290 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nPlane
= nPlane
;
291 static_cast< ImplEucTwToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
296 *pSrcCvtBytes
= nConverted
;
298 return pDestBufPtr
- pDestBuf
;
301 sal_Size
ImplConvertUnicodeToEucTw(void const * pData
,
303 sal_Unicode
const * pSrcBuf
,
309 sal_Size
* pSrcCvtChars
)
311 sal_uInt8
const * pCns116431992Data
312 = static_cast< ImplEucTwConverterData
const * >(pData
)->
313 m_pUnicodeToCns116431992Data
;
314 sal_Int32
const * pCns116431992PageOffsets
315 = static_cast< ImplEucTwConverterData
const * >(pData
)->
316 m_pUnicodeToCns116431992PageOffsets
;
317 sal_Int32
const * pCns116431992PlaneOffsets
318 = static_cast< ImplEucTwConverterData
const * >(pData
)->
319 m_pUnicodeToCns116431992PlaneOffsets
;
320 sal_Unicode nHighSurrogate
= 0;
321 sal_uInt32 nInfo
= 0;
322 sal_Size nConverted
= 0;
323 char * pDestBufPtr
= pDestBuf
;
324 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
328 = ((ImplUnicodeToTextContext
*) pContext
)->m_nHighSurrogate
;
330 for (; nConverted
< nSrcChars
; ++nConverted
)
332 bool bUndefined
= true;
333 sal_uInt32 nChar
= *pSrcBuf
++;
334 if (nHighSurrogate
== 0)
336 if (ImplIsHighSurrogate(nChar
))
338 nHighSurrogate
= (sal_Unicode
) nChar
;
342 else if (ImplIsLowSurrogate(nChar
))
343 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
350 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
357 if (pDestBufPtr
!= pDestBufEnd
)
358 *pDestBufPtr
++ = static_cast< char >(nChar
);
363 sal_Int32 nOffset
= pCns116431992PlaneOffsets
[nChar
>> 16];
370 = pCns116431992PageOffsets
[nOffset
+ ((nChar
& 0xFF00) >> 8)];
373 nFirst
= pCns116431992Data
[nOffset
++];
374 nLast
= pCns116431992Data
[nOffset
++];
376 if (nChar
< nFirst
|| nChar
> nLast
)
378 nOffset
+= 3 * (nChar
- nFirst
);
379 nPlane
= pCns116431992Data
[nOffset
++];
382 if (pDestBufEnd
- pDestBufPtr
< (nPlane
== 1 ? 2 : 4))
386 *pDestBufPtr
++ = static_cast< char >(static_cast< unsigned char >(0x8E));
387 *pDestBufPtr
++ = static_cast< char >(0xA0 + nPlane
);
389 *pDestBufPtr
++ = static_cast< char >(0xA0 + pCns116431992Data
[nOffset
++]);
390 *pDestBufPtr
++ = static_cast< char >(0xA0 + pCns116431992Data
[nOffset
]);
396 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
397 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
398 &nInfo
, NULL
, 0, NULL
))
400 case sal::detail::textenc::BAD_INPUT_STOP
:
404 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
408 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
415 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
419 if (nHighSurrogate
!= 0
420 && (nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
421 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
424 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
425 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
427 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
428 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
431 case sal::detail::textenc::BAD_INPUT_STOP
:
432 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
436 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
437 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
443 ((ImplUnicodeToTextContext
*) pContext
)->m_nHighSurrogate
448 *pSrcCvtChars
= nConverted
;
450 return pDestBufPtr
- pDestBuf
;
453 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */