1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: convertiso2022jp.c,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "convertiso2022jp.h"
33 #include "converter.h"
36 #include "rtl/alloc.h"
37 #include "rtl/textcvt.h"
38 #include "sal/types.h"
40 typedef enum /* order is important: */
42 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
,
43 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
,
44 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
,
45 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
,
46 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
,
47 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
,
48 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
49 } ImplIso2022JpToUnicodeState
;
53 ImplIso2022JpToUnicodeState m_eState
;
55 } ImplIso2022JpToUnicodeContext
;
59 sal_Unicode m_nHighSurrogate
;
61 } ImplUnicodeToIso2022JpContext
;
63 void * ImplCreateIso2022JpToUnicodeContext(void)
66 = rtl_allocateMemory(sizeof (ImplIso2022JpToUnicodeContext
));
67 ((ImplIso2022JpToUnicodeContext
*) pContext
)->m_eState
68 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
72 void ImplResetIso2022JpToUnicodeContext(void * pContext
)
75 ((ImplIso2022JpToUnicodeContext
*) pContext
)->m_eState
76 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
79 sal_Size
ImplConvertIso2022JpToUnicode(ImplTextConverterData
const * pData
,
81 sal_Char
const * pSrcBuf
,
83 sal_Unicode
* pDestBuf
,
87 sal_Size
* pSrcCvtBytes
)
89 ImplDBCSToUniLeadTab
const * pJisX0208Data
90 = ((ImplIso2022JpConverterData
const *) pData
)->
91 m_pJisX0208ToUnicodeData
;
92 ImplIso2022JpToUnicodeState eState
93 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
96 sal_Size nConverted
= 0;
97 sal_Unicode
* pDestBufPtr
= pDestBuf
;
98 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
102 eState
= ((ImplIso2022JpToUnicodeContext
*) pContext
)->m_eState
;
103 nRow
= ((ImplIso2022JpToUnicodeContext
*) pContext
)->m_nRow
;
106 for (; nConverted
< nSrcBytes
; ++nConverted
)
108 sal_Bool bUndefined
= sal_True
;
109 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
112 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
:
113 if (nChar
== 0x1B) /* ESC */
114 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
115 else if (nChar
< 0x80)
116 if (pDestBufPtr
!= pDestBufEnd
)
117 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
122 bUndefined
= sal_False
;
127 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
:
128 if (nChar
== 0x1B) /* ESC */
129 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
130 else if (nChar
< 0x80)
131 if (pDestBufPtr
!= pDestBufEnd
)
136 nChar
= 0xA5; /* YEN SIGN */
140 nChar
= 0xAF; /* MACRON */
143 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
149 bUndefined
= sal_False
;
154 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
:
155 if (nChar
== 0x1B) /* ESC */
156 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
157 else if (nChar
>= 0x21 && nChar
<= 0x7E)
160 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
;
164 bUndefined
= sal_False
;
169 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
:
170 if (nChar
>= 0x21 && nChar
<= 0x7E)
172 sal_uInt16 nUnicode
= 0;
173 sal_uInt32 nFirst
= pJisX0208Data
[nRow
].mnTrailStart
;
175 && nChar
<= pJisX0208Data
[nRow
].mnTrailEnd
)
176 nUnicode
= pJisX0208Data
[nRow
].
177 mpToUniTrailTab
[nChar
- nFirst
];
179 if (pDestBufPtr
!= pDestBufEnd
)
181 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
182 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
191 bUndefined
= sal_False
;
196 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
:
200 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
;
204 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
;
208 bUndefined
= sal_False
;
213 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
:
217 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
221 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
;
225 bUndefined
= sal_False
;
230 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
:
235 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
239 bUndefined
= sal_False
;
247 switch (ImplHandleBadInputTextToUnicodeConversion(
248 bUndefined
, sal_True
, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
251 case IMPL_BAD_INPUT_STOP
:
252 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
255 case IMPL_BAD_INPUT_CONTINUE
:
256 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
259 case IMPL_BAD_INPUT_NO_OUTPUT
:
266 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
270 if (eState
> IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
271 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
272 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
275 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
276 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
278 switch (ImplHandleBadInputTextToUnicodeConversion(
279 sal_False
, sal_True
, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
282 case IMPL_BAD_INPUT_STOP
:
283 case IMPL_BAD_INPUT_CONTINUE
:
284 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
287 case IMPL_BAD_INPUT_NO_OUTPUT
:
288 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
295 ((ImplIso2022JpToUnicodeContext
*) pContext
)->m_eState
= eState
;
296 ((ImplIso2022JpToUnicodeContext
*) pContext
)->m_nRow
= nRow
;
301 *pSrcCvtBytes
= nConverted
;
303 return pDestBufPtr
- pDestBuf
;
306 void * ImplCreateUnicodeToIso2022JpContext(void)
309 = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022JpContext
));
310 ((ImplUnicodeToIso2022JpContext
*) pContext
)->m_nHighSurrogate
= 0;
311 ((ImplUnicodeToIso2022JpContext
*) pContext
)->m_b0208
= sal_False
;
315 void ImplResetUnicodeToIso2022JpContext(void * pContext
)
319 ((ImplUnicodeToIso2022JpContext
*) pContext
)->m_nHighSurrogate
= 0;
320 ((ImplUnicodeToIso2022JpContext
*) pContext
)->m_b0208
= sal_False
;
324 sal_Size
ImplConvertUnicodeToIso2022Jp(ImplTextConverterData
const * pData
,
326 sal_Unicode
const * pSrcBuf
,
332 sal_Size
* pSrcCvtChars
)
334 ImplUniToDBCSHighTab
const * pJisX0208Data
335 = ((ImplIso2022JpConverterData
const *) pData
)->
336 m_pUnicodeToJisX0208Data
;
337 sal_Unicode nHighSurrogate
= 0;
338 sal_Bool b0208
= sal_False
;
339 sal_uInt32 nInfo
= 0;
340 sal_Size nConverted
= 0;
341 sal_Char
* pDestBufPtr
= pDestBuf
;
342 sal_Char
* pDestBufEnd
= pDestBuf
+ nDestBytes
;
348 = ((ImplUnicodeToIso2022JpContext
*) pContext
)->m_nHighSurrogate
;
349 b0208
= ((ImplUnicodeToIso2022JpContext
*) pContext
)->m_b0208
;
352 for (; nConverted
< nSrcChars
; ++nConverted
)
354 sal_Bool bUndefined
= sal_True
;
355 sal_uInt32 nChar
= *pSrcBuf
++;
356 if (nHighSurrogate
== 0)
358 if (ImplIsHighSurrogate(nChar
))
360 nHighSurrogate
= (sal_Unicode
) nChar
;
364 else if (ImplIsLowSurrogate(nChar
))
365 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
368 bUndefined
= sal_False
;
372 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
374 bUndefined
= sal_False
;
378 if (nChar
== 0x0A || nChar
== 0x0D) /* LF, CR */
382 if (pDestBufEnd
- pDestBufPtr
>= 3)
384 *pDestBufPtr
++ = 0x1B; /* ESC */
385 *pDestBufPtr
++ = 0x28; /* ( */
386 *pDestBufPtr
++ = 0x42; /* B */
392 if (pDestBufPtr
!= pDestBufEnd
)
393 *pDestBufPtr
++ = (sal_Char
) nChar
;
397 else if (nChar
== 0x1B)
399 else if (nChar
< 0x80)
403 if (pDestBufEnd
- pDestBufPtr
>= 3)
405 *pDestBufPtr
++ = 0x1B; /* ESC */
406 *pDestBufPtr
++ = 0x28; /* ( */
407 *pDestBufPtr
++ = 0x42; /* B */
413 if (pDestBufPtr
!= pDestBufEnd
)
414 *pDestBufPtr
++ = (sal_Char
) nChar
;
420 sal_uInt16 nBytes
= 0;
421 sal_uInt32 nIndex1
= nChar
>> 8;
424 sal_uInt32 nIndex2
= nChar
& 0xFF;
425 sal_uInt32 nFirst
= pJisX0208Data
[nIndex1
].mnLowStart
;
426 if (nIndex2
>= nFirst
427 && nIndex2
<= pJisX0208Data
[nIndex1
].mnLowEnd
)
429 nBytes
= pJisX0208Data
[nIndex1
].
430 mpToUniTrailTab
[nIndex2
- nFirst
];
432 /* For some reason, the tables in tcvtjp4.tab do not
433 include these two conversions: */
436 case 0xA5: /* YEN SIGN */
440 case 0xAF: /* MACRON */
450 if (pDestBufEnd
- pDestBufPtr
>= 3)
452 *pDestBufPtr
++ = 0x1B; /* ESC */
453 *pDestBufPtr
++ = 0x24; /* $ */
454 *pDestBufPtr
++ = 0x42; /* B */
460 if (pDestBufEnd
- pDestBufPtr
>= 2)
462 *pDestBufPtr
++ = (sal_Char
) (nBytes
>> 8);
463 *pDestBufPtr
++ = (sal_Char
) (nBytes
& 0xFF);
475 switch (ImplHandleBadInputUnicodeToTextConversion(
486 case IMPL_BAD_INPUT_STOP
:
490 case IMPL_BAD_INPUT_CONTINUE
:
496 case IMPL_BAD_INPUT_NO_OUTPUT
:
503 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
507 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
508 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
511 sal_Bool bFlush
= sal_True
;
512 if (nHighSurrogate
!= 0)
514 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
515 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
517 switch (ImplHandleBadInputUnicodeToTextConversion(
528 case IMPL_BAD_INPUT_STOP
:
533 case IMPL_BAD_INPUT_CONTINUE
:
539 case IMPL_BAD_INPUT_NO_OUTPUT
:
540 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
546 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
548 if (pDestBufEnd
- pDestBufPtr
>= 3)
550 *pDestBufPtr
++ = 0x1B; /* ESC */
551 *pDestBufPtr
++ = 0x28; /* ( */
552 *pDestBufPtr
++ = 0x42; /* B */
556 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
562 ((ImplUnicodeToIso2022JpContext
*) pContext
)->m_nHighSurrogate
564 ((ImplUnicodeToIso2022JpContext
*) pContext
)->m_b0208
= b0208
;
569 *pSrcCvtChars
= nConverted
;
571 return pDestBufPtr
- pDestBuf
;