1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
28 #include "converter.hxx"
29 #include "convertiso2022jp.hxx"
30 #include "tenchelp.hxx"
31 #include "unichars.hxx"
35 enum ImplIso2022JpToUnicodeState
// order is important:
37 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
,
38 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
,
39 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
,
40 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
,
41 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
,
42 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
,
43 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
46 struct ImplIso2022JpToUnicodeContext
48 ImplIso2022JpToUnicodeState m_eState
;
52 struct ImplUnicodeToIso2022JpContext
54 sal_Unicode m_nHighSurrogate
;
60 void * ImplCreateIso2022JpToUnicodeContext()
62 ImplIso2022JpToUnicodeContext
* pContext
=
63 new ImplIso2022JpToUnicodeContext
;
64 pContext
->m_eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
68 void ImplResetIso2022JpToUnicodeContext(void * pContext
)
71 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
72 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
75 void ImplDestroyIso2022JpToUnicodeContext(void * pContext
)
77 delete static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
);
80 sal_Size
ImplConvertIso2022JpToUnicode(void const * pData
,
84 sal_Unicode
* pDestBuf
,
88 sal_Size
* pSrcCvtBytes
)
90 ImplDBCSToUniLeadTab
const * pJisX0208Data
91 = static_cast< ImplIso2022JpConverterData
const * >(pData
)->
92 m_pJisX0208ToUnicodeData
;
93 ImplIso2022JpToUnicodeState eState
94 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
97 sal_Size nConverted
= 0;
98 sal_Unicode
* pDestBufPtr
= pDestBuf
;
99 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
100 sal_Size startOfCurrentChar
= 0;
104 eState
= static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
;
105 nRow
= static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_nRow
;
108 for (; nConverted
< nSrcBytes
; ++nConverted
)
110 bool bUndefined
= true;
111 sal_uInt32 nChar
= *reinterpret_cast<unsigned char const *>(pSrcBuf
++);
114 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
:
115 if (nChar
== 0x1B) // ESC
116 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
117 else if (nChar
< 0x80)
118 if (pDestBufPtr
!= pDestBufEnd
) {
119 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nChar
);
120 startOfCurrentChar
= nConverted
+ 1;
130 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
:
131 if (nChar
== 0x1B) // ESC
132 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
133 else if (nChar
< 0x80)
134 if (pDestBufPtr
!= pDestBufEnd
)
138 case 0x5C: // REVERSE SOLIDUS (\)
139 nChar
= 0xA5; // YEN SIGN
143 nChar
= 0xAF; // MACRON
146 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nChar
);
147 startOfCurrentChar
= nConverted
+ 1;
158 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
:
159 if (nChar
== 0x1B) // ESC
160 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
161 else if (nChar
>= 0x21 && nChar
<= 0x7E)
164 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
;
173 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
:
174 if (nChar
>= 0x21 && nChar
<= 0x7E)
176 sal_uInt16 nUnicode
= 0;
177 sal_uInt32 nFirst
= pJisX0208Data
[nRow
].mnTrailStart
;
179 && nChar
<= pJisX0208Data
[nRow
].mnTrailEnd
)
180 nUnicode
= pJisX0208Data
[nRow
].
181 mpToUniTrailTab
[nChar
- nFirst
];
183 if (pDestBufPtr
!= pDestBufEnd
)
185 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nUnicode
);
186 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
187 startOfCurrentChar
= nConverted
+ 1;
201 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
:
205 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
;
209 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
;
218 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
:
222 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
226 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
;
235 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
:
240 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
252 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
253 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
256 case sal::detail::textenc::BAD_INPUT_STOP
:
257 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
258 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0) {
261 nConverted
= startOfCurrentChar
;
265 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
266 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
267 startOfCurrentChar
= nConverted
+ 1;
270 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
277 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
281 if (eState
> IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
282 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
283 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
))
286 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
287 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
;
289 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
290 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
293 case sal::detail::textenc::BAD_INPUT_STOP
:
294 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) != 0) {
295 nConverted
= startOfCurrentChar
;
298 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
299 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
302 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
303 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
310 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
= eState
;
311 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
316 *pSrcCvtBytes
= nConverted
;
318 return pDestBufPtr
- pDestBuf
;
321 void * ImplCreateUnicodeToIso2022JpContext()
323 ImplUnicodeToIso2022JpContext
* pContext
=
324 new ImplUnicodeToIso2022JpContext
;
325 pContext
->m_nHighSurrogate
= 0;
326 pContext
->m_b0208
= false;
330 void ImplResetUnicodeToIso2022JpContext(void * pContext
)
334 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
= 0;
335 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
= false;
339 void ImplDestroyUnicodeToIso2022JpContext(void * pContext
)
341 delete static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
);
344 sal_Size
ImplConvertUnicodeToIso2022Jp(void const * pData
,
346 sal_Unicode
const * pSrcBuf
,
352 sal_Size
* pSrcCvtChars
)
354 ImplUniToDBCSHighTab
const * pJisX0208Data
355 = static_cast< ImplIso2022JpConverterData
const * >(pData
)->
356 m_pUnicodeToJisX0208Data
;
357 sal_Unicode nHighSurrogate
= 0;
359 sal_uInt32 nInfo
= 0;
360 sal_Size nConverted
= 0;
361 char * pDestBufPtr
= pDestBuf
;
362 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
368 = static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
;
369 b0208
= static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
;
372 for (; nConverted
< nSrcChars
; ++nConverted
)
374 bool bUndefined
= true;
375 sal_uInt32 nChar
= *pSrcBuf
++;
376 if (nHighSurrogate
== 0)
378 if (rtl::isHighSurrogate(nChar
))
380 nHighSurrogate
= static_cast<sal_Unicode
>(nChar
);
383 else if (rtl::isLowSurrogate(nChar
))
389 else if (rtl::isLowSurrogate(nChar
))
390 nChar
= rtl::combineSurrogates(nHighSurrogate
, nChar
);
397 assert(rtl::isUnicodeScalarValue(nChar
));
399 if (nChar
== 0x0A || nChar
== 0x0D) // LF, CR
403 if (pDestBufEnd
- pDestBufPtr
>= 3)
405 *pDestBufPtr
++ = 0x1B; // ESC
406 *pDestBufPtr
++ = 0x28; // (
407 *pDestBufPtr
++ = 0x42; // B
413 if (pDestBufPtr
!= pDestBufEnd
)
414 *pDestBufPtr
++ = static_cast< char >(nChar
);
418 else if (nChar
== 0x1B)
420 else if (nChar
< 0x80)
424 if (pDestBufEnd
- pDestBufPtr
>= 3)
426 *pDestBufPtr
++ = 0x1B; // ESC
427 *pDestBufPtr
++ = 0x28; // (
428 *pDestBufPtr
++ = 0x42; // B
434 if (pDestBufPtr
!= pDestBufEnd
)
435 *pDestBufPtr
++ = static_cast< char >(nChar
);
441 sal_uInt16 nBytes
= 0;
442 sal_uInt32 nIndex1
= nChar
>> 8;
445 sal_uInt32 nIndex2
= nChar
& 0xFF;
446 sal_uInt32 nFirst
= pJisX0208Data
[nIndex1
].mnLowStart
;
447 if (nIndex2
>= nFirst
448 && nIndex2
<= pJisX0208Data
[nIndex1
].mnLowEnd
)
450 nBytes
= pJisX0208Data
[nIndex1
].
451 mpToUniTrailTab
[nIndex2
- nFirst
];
453 // For some reason, the tables in tcvtjp4.tab do not
454 // include these two conversions:
457 case 0xA5: // YEN SIGN
471 if (pDestBufEnd
- pDestBufPtr
>= 3)
473 *pDestBufPtr
++ = 0x1B; // ESC
474 *pDestBufPtr
++ = 0x24; // $
475 *pDestBufPtr
++ = 0x42; // B
481 if (pDestBufEnd
- pDestBufPtr
>= 2)
483 *pDestBufPtr
++ = static_cast< char >(nBytes
>> 8);
484 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0xFF);
496 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
497 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
498 &nInfo
, "\x1B(B", b0208
? 3 : 0, &bWritten
))
500 case sal::detail::textenc::BAD_INPUT_STOP
:
504 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
510 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
517 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
521 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
522 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
526 if (nHighSurrogate
!= 0)
528 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
529 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
531 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
532 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
533 "\x1B(B", b0208
? 3 : 0, &bWritten
))
535 case sal::detail::textenc::BAD_INPUT_STOP
:
540 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
546 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
547 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
553 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
555 if (pDestBufEnd
- pDestBufPtr
>= 3)
557 *pDestBufPtr
++ = 0x1B; // ESC
558 *pDestBufPtr
++ = 0x28; // (
559 *pDestBufPtr
++ = 0x42; // B
563 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
569 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
571 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
= b0208
;
576 *pSrcCvtChars
= nConverted
;
578 return pDestBufPtr
- pDestBuf
;
581 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */