1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
28 #include "converter.hxx"
29 #include "convertiso2022jp.hxx"
30 #include "tenchelp.hxx"
34 enum ImplIso2022JpToUnicodeState
// order is important:
36 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
,
37 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
,
38 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
,
39 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
,
40 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
,
41 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
,
42 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
45 struct ImplIso2022JpToUnicodeContext
47 ImplIso2022JpToUnicodeState m_eState
;
51 struct ImplUnicodeToIso2022JpContext
53 sal_Unicode m_nHighSurrogate
;
59 void * ImplCreateIso2022JpToUnicodeContext()
61 ImplIso2022JpToUnicodeContext
* pContext
=
62 new ImplIso2022JpToUnicodeContext
;
63 pContext
->m_eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
67 void ImplResetIso2022JpToUnicodeContext(void * pContext
)
70 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
71 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
74 void ImplDestroyIso2022JpToUnicodeContext(void * pContext
)
76 delete static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
);
79 sal_Size
ImplConvertIso2022JpToUnicode(void const * pData
,
83 sal_Unicode
* pDestBuf
,
87 sal_Size
* pSrcCvtBytes
)
89 ImplDBCSToUniLeadTab
const * pJisX0208Data
90 = static_cast< ImplIso2022JpConverterData
const * >(pData
)->
91 m_pJisX0208ToUnicodeData
;
92 ImplIso2022JpToUnicodeState eState
93 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
96 sal_Size nConverted
= 0;
97 sal_Unicode
* pDestBufPtr
= pDestBuf
;
98 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
99 sal_Size startOfCurrentChar
= 0;
103 eState
= static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
;
104 nRow
= static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_nRow
;
107 for (; nConverted
< nSrcBytes
; ++nConverted
)
109 bool bUndefined
= true;
110 sal_uInt32 nChar
= *reinterpret_cast<unsigned char const *>(pSrcBuf
++);
113 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
:
114 if (nChar
== 0x1B) // ESC
115 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
116 else if (nChar
< 0x80)
117 if (pDestBufPtr
!= pDestBufEnd
) {
118 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nChar
);
119 startOfCurrentChar
= nConverted
+ 1;
129 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
:
130 if (nChar
== 0x1B) // ESC
131 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
132 else if (nChar
< 0x80)
133 if (pDestBufPtr
!= pDestBufEnd
)
137 case 0x5C: // REVERSE SOLIDUS (\)
138 nChar
= 0xA5; // YEN SIGN
142 nChar
= 0xAF; // MACRON
145 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nChar
);
146 startOfCurrentChar
= nConverted
+ 1;
157 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
:
158 if (nChar
== 0x1B) // ESC
159 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
160 else if (nChar
>= 0x21 && nChar
<= 0x7E)
163 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
;
172 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
:
173 if (nChar
>= 0x21 && nChar
<= 0x7E)
175 sal_uInt16 nUnicode
= 0;
176 sal_uInt32 nFirst
= pJisX0208Data
[nRow
].mnTrailStart
;
178 && nChar
<= pJisX0208Data
[nRow
].mnTrailEnd
)
179 nUnicode
= pJisX0208Data
[nRow
].
180 mpToUniTrailTab
[nChar
- nFirst
];
182 if (pDestBufPtr
!= pDestBufEnd
)
184 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nUnicode
);
185 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
186 startOfCurrentChar
= nConverted
+ 1;
200 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
:
204 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
;
208 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
;
217 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
:
221 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
225 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
;
234 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
:
239 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
251 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
252 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
255 case sal::detail::textenc::BAD_INPUT_STOP
:
256 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
257 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0) {
260 nConverted
= startOfCurrentChar
;
264 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
265 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
266 startOfCurrentChar
= nConverted
+ 1;
269 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
276 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
280 if (eState
> IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
281 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
282 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
))
285 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
286 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
;
288 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
289 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
292 case sal::detail::textenc::BAD_INPUT_STOP
:
293 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) != 0) {
294 nConverted
= startOfCurrentChar
;
297 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
298 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
301 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
302 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
309 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
= eState
;
310 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
315 *pSrcCvtBytes
= nConverted
;
317 return pDestBufPtr
- pDestBuf
;
320 void * ImplCreateUnicodeToIso2022JpContext()
322 ImplUnicodeToIso2022JpContext
* pContext
=
323 new ImplUnicodeToIso2022JpContext
;
324 pContext
->m_nHighSurrogate
= 0;
325 pContext
->m_b0208
= false;
329 void ImplResetUnicodeToIso2022JpContext(void * pContext
)
333 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
= 0;
334 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
= false;
338 void ImplDestroyUnicodeToIso2022JpContext(void * pContext
)
340 delete static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
);
343 sal_Size
ImplConvertUnicodeToIso2022Jp(void const * pData
,
345 sal_Unicode
const * pSrcBuf
,
351 sal_Size
* pSrcCvtChars
)
353 ImplUniToDBCSHighTab
const * pJisX0208Data
354 = static_cast< ImplIso2022JpConverterData
const * >(pData
)->
355 m_pUnicodeToJisX0208Data
;
356 sal_Unicode nHighSurrogate
= 0;
358 sal_uInt32 nInfo
= 0;
359 sal_Size nConverted
= 0;
360 char * pDestBufPtr
= pDestBuf
;
361 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
367 = static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
;
368 b0208
= static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
;
371 for (; nConverted
< nSrcChars
; ++nConverted
)
373 bool bUndefined
= true;
374 sal_uInt32 nChar
= *pSrcBuf
++;
375 if (nHighSurrogate
== 0)
377 if (rtl::isHighSurrogate(nChar
))
379 nHighSurrogate
= static_cast<sal_Unicode
>(nChar
);
382 else if (rtl::isLowSurrogate(nChar
))
388 else if (rtl::isLowSurrogate(nChar
))
389 nChar
= rtl::combineSurrogates(nHighSurrogate
, nChar
);
396 assert(rtl::isUnicodeScalarValue(nChar
));
398 if (nChar
== 0x0A || nChar
== 0x0D) // LF, CR
402 if (pDestBufEnd
- pDestBufPtr
>= 3)
404 *pDestBufPtr
++ = 0x1B; // ESC
405 *pDestBufPtr
++ = 0x28; // (
406 *pDestBufPtr
++ = 0x42; // B
412 if (pDestBufPtr
!= pDestBufEnd
)
413 *pDestBufPtr
++ = static_cast< char >(nChar
);
417 else if (nChar
== 0x1B)
419 else if (nChar
< 0x80)
423 if (pDestBufEnd
- pDestBufPtr
>= 3)
425 *pDestBufPtr
++ = 0x1B; // ESC
426 *pDestBufPtr
++ = 0x28; // (
427 *pDestBufPtr
++ = 0x42; // B
433 if (pDestBufPtr
!= pDestBufEnd
)
434 *pDestBufPtr
++ = static_cast< char >(nChar
);
440 sal_uInt16 nBytes
= 0;
441 sal_uInt32 nIndex1
= nChar
>> 8;
444 sal_uInt32 nIndex2
= nChar
& 0xFF;
445 sal_uInt32 nFirst
= pJisX0208Data
[nIndex1
].mnLowStart
;
446 if (nIndex2
>= nFirst
447 && nIndex2
<= pJisX0208Data
[nIndex1
].mnLowEnd
)
449 nBytes
= pJisX0208Data
[nIndex1
].
450 mpToUniTrailTab
[nIndex2
- nFirst
];
452 // For some reason, the tables in tcvtjp4.tab do not
453 // include these two conversions:
456 case 0xA5: // YEN SIGN
470 if (pDestBufEnd
- pDestBufPtr
>= 3)
472 *pDestBufPtr
++ = 0x1B; // ESC
473 *pDestBufPtr
++ = 0x24; // $
474 *pDestBufPtr
++ = 0x42; // B
480 if (pDestBufEnd
- pDestBufPtr
>= 2)
482 *pDestBufPtr
++ = static_cast< char >(nBytes
>> 8);
483 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0xFF);
495 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
496 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
497 &nInfo
, "\x1B(B", b0208
? 3 : 0, &bWritten
))
499 case sal::detail::textenc::BAD_INPUT_STOP
:
503 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
509 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
516 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
520 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
521 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
525 if (nHighSurrogate
!= 0)
527 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
528 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
530 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
531 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
532 "\x1B(B", b0208
? 3 : 0, &bWritten
))
534 case sal::detail::textenc::BAD_INPUT_STOP
:
539 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
545 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
546 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
552 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
554 if (pDestBufEnd
- pDestBufPtr
>= 3)
556 *pDestBufPtr
++ = 0x1B; // ESC
557 *pDestBufPtr
++ = 0x28; // (
558 *pDestBufPtr
++ = 0x42; // B
562 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
568 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
570 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
= b0208
;
575 *pSrcCvtChars
= nConverted
;
577 return pDestBufPtr
- pDestBuf
;
580 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */