1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "sal/config.h"
31 #include "rtl/textcvt.h"
32 #include "sal/types.h"
34 #include "context.hxx"
35 #include "converter.hxx"
36 #include "convertiso2022jp.hxx"
37 #include "tenchelp.hxx"
38 #include "unichars.hxx"
42 enum ImplIso2022JpToUnicodeState
// order is important:
44 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
,
45 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
,
46 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
,
47 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
,
48 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
,
49 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
,
50 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
53 struct ImplIso2022JpToUnicodeContext
55 ImplIso2022JpToUnicodeState m_eState
;
59 struct ImplUnicodeToIso2022JpContext
61 sal_Unicode m_nHighSurrogate
;
67 void * ImplCreateIso2022JpToUnicodeContext()
69 ImplIso2022JpToUnicodeContext
* pContext
=
70 new ImplIso2022JpToUnicodeContext
;
71 pContext
->m_eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
75 void ImplResetIso2022JpToUnicodeContext(void * pContext
)
78 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
79 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
82 void ImplDestroyIso2022JpToUnicodeContext(void * pContext
)
84 delete static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
);
87 sal_Size
ImplConvertIso2022JpToUnicode(void const * pData
,
91 sal_Unicode
* pDestBuf
,
95 sal_Size
* pSrcCvtBytes
)
97 ImplDBCSToUniLeadTab
const * pJisX0208Data
98 = static_cast< ImplIso2022JpConverterData
const * >(pData
)->
99 m_pJisX0208ToUnicodeData
;
100 ImplIso2022JpToUnicodeState eState
101 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
103 sal_uInt32 nInfo
= 0;
104 sal_Size nConverted
= 0;
105 sal_Unicode
* pDestBufPtr
= pDestBuf
;
106 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
110 eState
= static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
;
111 nRow
= static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_nRow
;
114 for (; nConverted
< nSrcBytes
; ++nConverted
)
116 bool bUndefined
= true;
117 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
120 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
:
121 if (nChar
== 0x1B) // ESC
122 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
123 else if (nChar
< 0x80)
124 if (pDestBufPtr
!= pDestBufEnd
)
125 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
135 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
:
136 if (nChar
== 0x1B) // ESC
137 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
138 else if (nChar
< 0x80)
139 if (pDestBufPtr
!= pDestBufEnd
)
143 case 0x5C: // REVERSE SOLIDUS (\)
144 nChar
= 0xA5; // YEN SIGN
148 nChar
= 0xAF; // MACRON
151 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
162 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
:
163 if (nChar
== 0x1B) // ESC
164 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
165 else if (nChar
>= 0x21 && nChar
<= 0x7E)
168 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
;
177 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
:
178 if (nChar
>= 0x21 && nChar
<= 0x7E)
180 sal_uInt16 nUnicode
= 0;
181 sal_uInt32 nFirst
= pJisX0208Data
[nRow
].mnTrailStart
;
183 && nChar
<= pJisX0208Data
[nRow
].mnTrailEnd
)
184 nUnicode
= pJisX0208Data
[nRow
].
185 mpToUniTrailTab
[nChar
- nFirst
];
187 if (pDestBufPtr
!= pDestBufEnd
)
189 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
190 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
204 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
:
208 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
;
212 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
;
221 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
:
225 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
229 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
;
238 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
:
243 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
255 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
256 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
259 case sal::detail::textenc::BAD_INPUT_STOP
:
260 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
263 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
264 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
267 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
274 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
278 if (eState
> IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
279 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
280 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
283 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
284 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
286 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
287 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
290 case sal::detail::textenc::BAD_INPUT_STOP
:
291 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
292 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
295 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
296 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
303 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
= eState
;
304 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
309 *pSrcCvtBytes
= nConverted
;
311 return pDestBufPtr
- pDestBuf
;
314 void * ImplCreateUnicodeToIso2022JpContext()
316 ImplUnicodeToIso2022JpContext
* pContext
=
317 new ImplUnicodeToIso2022JpContext
;
318 pContext
->m_nHighSurrogate
= 0;
319 pContext
->m_b0208
= false;
323 void ImplResetUnicodeToIso2022JpContext(void * pContext
)
327 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
= 0;
328 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
= false;
332 void ImplDestroyUnicodeToIso2022JpContext(void * pContext
)
334 delete static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
);
337 sal_Size
ImplConvertUnicodeToIso2022Jp(void const * pData
,
339 sal_Unicode
const * pSrcBuf
,
345 sal_Size
* pSrcCvtChars
)
347 ImplUniToDBCSHighTab
const * pJisX0208Data
348 = static_cast< ImplIso2022JpConverterData
const * >(pData
)->
349 m_pUnicodeToJisX0208Data
;
350 sal_Unicode nHighSurrogate
= 0;
352 sal_uInt32 nInfo
= 0;
353 sal_Size nConverted
= 0;
354 char * pDestBufPtr
= pDestBuf
;
355 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
361 = static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
;
362 b0208
= static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
;
365 for (; nConverted
< nSrcChars
; ++nConverted
)
367 bool bUndefined
= true;
368 sal_uInt32 nChar
= *pSrcBuf
++;
369 if (nHighSurrogate
== 0)
371 if (ImplIsHighSurrogate(nChar
))
373 nHighSurrogate
= (sal_Unicode
) nChar
;
377 else if (ImplIsLowSurrogate(nChar
))
378 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
385 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
391 if (nChar
== 0x0A || nChar
== 0x0D) // LF, CR
395 if (pDestBufEnd
- pDestBufPtr
>= 3)
397 *pDestBufPtr
++ = 0x1B; // ESC
398 *pDestBufPtr
++ = 0x28; // (
399 *pDestBufPtr
++ = 0x42; // B
405 if (pDestBufPtr
!= pDestBufEnd
)
406 *pDestBufPtr
++ = static_cast< char >(nChar
);
410 else if (nChar
== 0x1B)
412 else if (nChar
< 0x80)
416 if (pDestBufEnd
- pDestBufPtr
>= 3)
418 *pDestBufPtr
++ = 0x1B; // ESC
419 *pDestBufPtr
++ = 0x28; // (
420 *pDestBufPtr
++ = 0x42; // B
426 if (pDestBufPtr
!= pDestBufEnd
)
427 *pDestBufPtr
++ = static_cast< char >(nChar
);
433 sal_uInt16 nBytes
= 0;
434 sal_uInt32 nIndex1
= nChar
>> 8;
437 sal_uInt32 nIndex2
= nChar
& 0xFF;
438 sal_uInt32 nFirst
= pJisX0208Data
[nIndex1
].mnLowStart
;
439 if (nIndex2
>= nFirst
440 && nIndex2
<= pJisX0208Data
[nIndex1
].mnLowEnd
)
442 nBytes
= pJisX0208Data
[nIndex1
].
443 mpToUniTrailTab
[nIndex2
- nFirst
];
445 // For some reason, the tables in tcvtjp4.tab do not
446 // include these two conversions:
449 case 0xA5: // YEN SIGN
463 if (pDestBufEnd
- pDestBufPtr
>= 3)
465 *pDestBufPtr
++ = 0x1B; // ESC
466 *pDestBufPtr
++ = 0x24; // $
467 *pDestBufPtr
++ = 0x42; // B
473 if (pDestBufEnd
- pDestBufPtr
>= 2)
475 *pDestBufPtr
++ = static_cast< char >(nBytes
>> 8);
476 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0xFF);
488 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
489 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
490 &nInfo
, "\x1B(B", b0208
? 3 : 0, &bWritten
))
492 case sal::detail::textenc::BAD_INPUT_STOP
:
496 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
502 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
509 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
513 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
514 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
518 if (nHighSurrogate
!= 0)
520 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
521 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
523 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
524 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
525 "\x1B(B", b0208
? 3 : 0, &bWritten
))
527 case sal::detail::textenc::BAD_INPUT_STOP
:
532 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
538 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
539 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
545 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
547 if (pDestBufEnd
- pDestBufPtr
>= 3)
549 *pDestBufPtr
++ = 0x1B; // ESC
550 *pDestBufPtr
++ = 0x28; // (
551 *pDestBufPtr
++ = 0x42; // B
555 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
561 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
563 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
= b0208
;
568 *pSrcCvtChars
= nConverted
;
570 return pDestBufPtr
- pDestBuf
;
573 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */