1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include "rtl/textcvt.h"
23 #include "sal/types.h"
25 #include "converter.hxx"
26 #include "convertiso2022jp.hxx"
27 #include "tenchelp.hxx"
28 #include "unichars.hxx"
32 enum ImplIso2022JpToUnicodeState
// order is important:
34 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
,
35 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
,
36 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
,
37 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
,
38 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
,
39 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
,
40 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
43 struct ImplIso2022JpToUnicodeContext
45 ImplIso2022JpToUnicodeState m_eState
;
49 struct ImplUnicodeToIso2022JpContext
51 sal_Unicode m_nHighSurrogate
;
57 void * ImplCreateIso2022JpToUnicodeContext()
59 ImplIso2022JpToUnicodeContext
* pContext
=
60 new ImplIso2022JpToUnicodeContext
;
61 pContext
->m_eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
65 void ImplResetIso2022JpToUnicodeContext(void * pContext
)
68 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
69 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
72 void ImplDestroyIso2022JpToUnicodeContext(void * pContext
)
74 delete static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
);
77 sal_Size
ImplConvertIso2022JpToUnicode(void const * pData
,
81 sal_Unicode
* pDestBuf
,
85 sal_Size
* pSrcCvtBytes
)
87 ImplDBCSToUniLeadTab
const * pJisX0208Data
88 = static_cast< ImplIso2022JpConverterData
const * >(pData
)->
89 m_pJisX0208ToUnicodeData
;
90 ImplIso2022JpToUnicodeState eState
91 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
94 sal_Size nConverted
= 0;
95 sal_Unicode
* pDestBufPtr
= pDestBuf
;
96 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
100 eState
= static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
;
101 nRow
= static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_nRow
;
104 for (; nConverted
< nSrcBytes
; ++nConverted
)
106 bool bUndefined
= true;
107 sal_uInt32 nChar
= *reinterpret_cast<unsigned char const *>(pSrcBuf
++);
110 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
:
111 if (nChar
== 0x1B) // ESC
112 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
113 else if (nChar
< 0x80)
114 if (pDestBufPtr
!= pDestBufEnd
)
115 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
125 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
:
126 if (nChar
== 0x1B) // ESC
127 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
128 else if (nChar
< 0x80)
129 if (pDestBufPtr
!= pDestBufEnd
)
133 case 0x5C: // REVERSE SOLIDUS (\)
134 nChar
= 0xA5; // YEN SIGN
138 nChar
= 0xAF; // MACRON
141 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
152 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
:
153 if (nChar
== 0x1B) // ESC
154 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
155 else if (nChar
>= 0x21 && nChar
<= 0x7E)
158 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
;
167 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
:
168 if (nChar
>= 0x21 && nChar
<= 0x7E)
170 sal_uInt16 nUnicode
= 0;
171 sal_uInt32 nFirst
= pJisX0208Data
[nRow
].mnTrailStart
;
173 && nChar
<= pJisX0208Data
[nRow
].mnTrailEnd
)
174 nUnicode
= pJisX0208Data
[nRow
].
175 mpToUniTrailTab
[nChar
- nFirst
];
177 if (pDestBufPtr
!= pDestBufEnd
)
179 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
180 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
194 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
:
198 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
;
202 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
;
211 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
:
215 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
219 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
;
228 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
:
233 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
245 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
246 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
249 case sal::detail::textenc::BAD_INPUT_STOP
:
250 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
253 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
254 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
257 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
264 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
268 if (eState
> IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
269 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
270 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
273 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
274 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
276 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
277 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
280 case sal::detail::textenc::BAD_INPUT_STOP
:
281 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
282 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
285 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
286 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
293 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
= eState
;
294 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
299 *pSrcCvtBytes
= nConverted
;
301 return pDestBufPtr
- pDestBuf
;
304 void * ImplCreateUnicodeToIso2022JpContext()
306 ImplUnicodeToIso2022JpContext
* pContext
=
307 new ImplUnicodeToIso2022JpContext
;
308 pContext
->m_nHighSurrogate
= 0;
309 pContext
->m_b0208
= false;
313 void ImplResetUnicodeToIso2022JpContext(void * pContext
)
317 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
= 0;
318 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
= false;
322 void ImplDestroyUnicodeToIso2022JpContext(void * pContext
)
324 delete static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
);
327 sal_Size
ImplConvertUnicodeToIso2022Jp(void const * pData
,
329 sal_Unicode
const * pSrcBuf
,
335 sal_Size
* pSrcCvtChars
)
337 ImplUniToDBCSHighTab
const * pJisX0208Data
338 = static_cast< ImplIso2022JpConverterData
const * >(pData
)->
339 m_pUnicodeToJisX0208Data
;
340 sal_Unicode nHighSurrogate
= 0;
342 sal_uInt32 nInfo
= 0;
343 sal_Size nConverted
= 0;
344 char * pDestBufPtr
= pDestBuf
;
345 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
351 = static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
;
352 b0208
= static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
;
355 for (; nConverted
< nSrcChars
; ++nConverted
)
357 bool bUndefined
= true;
358 sal_uInt32 nChar
= *pSrcBuf
++;
359 if (nHighSurrogate
== 0)
361 if (ImplIsHighSurrogate(nChar
))
363 nHighSurrogate
= (sal_Unicode
) nChar
;
367 else if (ImplIsLowSurrogate(nChar
))
368 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
375 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
381 if (nChar
== 0x0A || nChar
== 0x0D) // LF, CR
385 if (pDestBufEnd
- pDestBufPtr
>= 3)
387 *pDestBufPtr
++ = 0x1B; // ESC
388 *pDestBufPtr
++ = 0x28; // (
389 *pDestBufPtr
++ = 0x42; // B
395 if (pDestBufPtr
!= pDestBufEnd
)
396 *pDestBufPtr
++ = static_cast< char >(nChar
);
400 else if (nChar
== 0x1B)
402 else if (nChar
< 0x80)
406 if (pDestBufEnd
- pDestBufPtr
>= 3)
408 *pDestBufPtr
++ = 0x1B; // ESC
409 *pDestBufPtr
++ = 0x28; // (
410 *pDestBufPtr
++ = 0x42; // B
416 if (pDestBufPtr
!= pDestBufEnd
)
417 *pDestBufPtr
++ = static_cast< char >(nChar
);
423 sal_uInt16 nBytes
= 0;
424 sal_uInt32 nIndex1
= nChar
>> 8;
427 sal_uInt32 nIndex2
= nChar
& 0xFF;
428 sal_uInt32 nFirst
= pJisX0208Data
[nIndex1
].mnLowStart
;
429 if (nIndex2
>= nFirst
430 && nIndex2
<= pJisX0208Data
[nIndex1
].mnLowEnd
)
432 nBytes
= pJisX0208Data
[nIndex1
].
433 mpToUniTrailTab
[nIndex2
- nFirst
];
435 // For some reason, the tables in tcvtjp4.tab do not
436 // include these two conversions:
439 case 0xA5: // YEN SIGN
453 if (pDestBufEnd
- pDestBufPtr
>= 3)
455 *pDestBufPtr
++ = 0x1B; // ESC
456 *pDestBufPtr
++ = 0x24; // $
457 *pDestBufPtr
++ = 0x42; // B
463 if (pDestBufEnd
- pDestBufPtr
>= 2)
465 *pDestBufPtr
++ = static_cast< char >(nBytes
>> 8);
466 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0xFF);
478 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
479 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
480 &nInfo
, "\x1B(B", b0208
? 3 : 0, &bWritten
))
482 case sal::detail::textenc::BAD_INPUT_STOP
:
486 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
492 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
499 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
503 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
504 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
508 if (nHighSurrogate
!= 0)
510 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
511 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
513 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
514 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
515 "\x1B(B", b0208
? 3 : 0, &bWritten
))
517 case sal::detail::textenc::BAD_INPUT_STOP
:
522 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
528 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
529 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
535 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
537 if (pDestBufEnd
- pDestBufPtr
>= 3)
539 *pDestBufPtr
++ = 0x1B; // ESC
540 *pDestBufPtr
++ = 0x28; // (
541 *pDestBufPtr
++ = 0x42; // B
545 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
551 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
553 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
= b0208
;
558 *pSrcCvtChars
= nConverted
;
560 return pDestBufPtr
- pDestBuf
;
563 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */