1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include "rtl/textcvt.h"
23 #include "sal/types.h"
25 #include "context.hxx"
26 #include "converter.hxx"
27 #include "convertiso2022jp.hxx"
28 #include "tenchelp.hxx"
29 #include "unichars.hxx"
33 enum ImplIso2022JpToUnicodeState
// order is important:
35 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
,
36 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
,
37 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
,
38 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
,
39 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
,
40 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
,
41 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
44 struct ImplIso2022JpToUnicodeContext
46 ImplIso2022JpToUnicodeState m_eState
;
50 struct ImplUnicodeToIso2022JpContext
52 sal_Unicode m_nHighSurrogate
;
58 void * ImplCreateIso2022JpToUnicodeContext()
60 ImplIso2022JpToUnicodeContext
* pContext
=
61 new ImplIso2022JpToUnicodeContext
;
62 pContext
->m_eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
66 void ImplResetIso2022JpToUnicodeContext(void * pContext
)
69 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
70 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
73 void ImplDestroyIso2022JpToUnicodeContext(void * pContext
)
75 delete static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
);
78 sal_Size
ImplConvertIso2022JpToUnicode(void const * pData
,
82 sal_Unicode
* pDestBuf
,
86 sal_Size
* pSrcCvtBytes
)
88 ImplDBCSToUniLeadTab
const * pJisX0208Data
89 = static_cast< ImplIso2022JpConverterData
const * >(pData
)->
90 m_pJisX0208ToUnicodeData
;
91 ImplIso2022JpToUnicodeState eState
92 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
95 sal_Size nConverted
= 0;
96 sal_Unicode
* pDestBufPtr
= pDestBuf
;
97 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
101 eState
= static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
;
102 nRow
= static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_nRow
;
105 for (; nConverted
< nSrcBytes
; ++nConverted
)
107 bool bUndefined
= true;
108 sal_uInt32 nChar
= *(sal_uChar
const *) pSrcBuf
++;
111 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
:
112 if (nChar
== 0x1B) // ESC
113 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
114 else if (nChar
< 0x80)
115 if (pDestBufPtr
!= pDestBufEnd
)
116 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
126 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
:
127 if (nChar
== 0x1B) // ESC
128 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
129 else if (nChar
< 0x80)
130 if (pDestBufPtr
!= pDestBufEnd
)
134 case 0x5C: // REVERSE SOLIDUS (\)
135 nChar
= 0xA5; // YEN SIGN
139 nChar
= 0xAF; // MACRON
142 *pDestBufPtr
++ = (sal_Unicode
) nChar
;
153 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
:
154 if (nChar
== 0x1B) // ESC
155 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
;
156 else if (nChar
>= 0x21 && nChar
<= 0x7E)
159 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
;
168 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2
:
169 if (nChar
>= 0x21 && nChar
<= 0x7E)
171 sal_uInt16 nUnicode
= 0;
172 sal_uInt32 nFirst
= pJisX0208Data
[nRow
].mnTrailStart
;
174 && nChar
<= pJisX0208Data
[nRow
].mnTrailEnd
)
175 nUnicode
= pJisX0208Data
[nRow
].
176 mpToUniTrailTab
[nChar
- nFirst
];
178 if (pDestBufPtr
!= pDestBufEnd
)
180 *pDestBufPtr
++ = (sal_Unicode
) nUnicode
;
181 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
195 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC
:
199 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
;
203 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
;
212 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN
:
216 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
220 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN
;
229 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
:
234 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
;
246 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
247 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
250 case sal::detail::textenc::BAD_INPUT_STOP
:
251 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
254 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
255 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
258 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
265 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
269 if (eState
> IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
270 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
271 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
))
274 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
275 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
;
277 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
278 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
281 case sal::detail::textenc::BAD_INPUT_STOP
:
282 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
283 eState
= IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII
;
286 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
287 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
;
294 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_eState
= eState
;
295 static_cast< ImplIso2022JpToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
300 *pSrcCvtBytes
= nConverted
;
302 return pDestBufPtr
- pDestBuf
;
305 void * ImplCreateUnicodeToIso2022JpContext()
307 ImplUnicodeToIso2022JpContext
* pContext
=
308 new ImplUnicodeToIso2022JpContext
;
309 pContext
->m_nHighSurrogate
= 0;
310 pContext
->m_b0208
= false;
314 void ImplResetUnicodeToIso2022JpContext(void * pContext
)
318 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
= 0;
319 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
= false;
323 void ImplDestroyUnicodeToIso2022JpContext(void * pContext
)
325 delete static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
);
328 sal_Size
ImplConvertUnicodeToIso2022Jp(void const * pData
,
330 sal_Unicode
const * pSrcBuf
,
336 sal_Size
* pSrcCvtChars
)
338 ImplUniToDBCSHighTab
const * pJisX0208Data
339 = static_cast< ImplIso2022JpConverterData
const * >(pData
)->
340 m_pUnicodeToJisX0208Data
;
341 sal_Unicode nHighSurrogate
= 0;
343 sal_uInt32 nInfo
= 0;
344 sal_Size nConverted
= 0;
345 char * pDestBufPtr
= pDestBuf
;
346 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
352 = static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
;
353 b0208
= static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
;
356 for (; nConverted
< nSrcChars
; ++nConverted
)
358 bool bUndefined
= true;
359 sal_uInt32 nChar
= *pSrcBuf
++;
360 if (nHighSurrogate
== 0)
362 if (ImplIsHighSurrogate(nChar
))
364 nHighSurrogate
= (sal_Unicode
) nChar
;
368 else if (ImplIsLowSurrogate(nChar
))
369 nChar
= ImplCombineSurrogates(nHighSurrogate
, nChar
);
376 if (ImplIsLowSurrogate(nChar
) || ImplIsNoncharacter(nChar
))
382 if (nChar
== 0x0A || nChar
== 0x0D) // LF, CR
386 if (pDestBufEnd
- pDestBufPtr
>= 3)
388 *pDestBufPtr
++ = 0x1B; // ESC
389 *pDestBufPtr
++ = 0x28; // (
390 *pDestBufPtr
++ = 0x42; // B
396 if (pDestBufPtr
!= pDestBufEnd
)
397 *pDestBufPtr
++ = static_cast< char >(nChar
);
401 else if (nChar
== 0x1B)
403 else if (nChar
< 0x80)
407 if (pDestBufEnd
- pDestBufPtr
>= 3)
409 *pDestBufPtr
++ = 0x1B; // ESC
410 *pDestBufPtr
++ = 0x28; // (
411 *pDestBufPtr
++ = 0x42; // B
417 if (pDestBufPtr
!= pDestBufEnd
)
418 *pDestBufPtr
++ = static_cast< char >(nChar
);
424 sal_uInt16 nBytes
= 0;
425 sal_uInt32 nIndex1
= nChar
>> 8;
428 sal_uInt32 nIndex2
= nChar
& 0xFF;
429 sal_uInt32 nFirst
= pJisX0208Data
[nIndex1
].mnLowStart
;
430 if (nIndex2
>= nFirst
431 && nIndex2
<= pJisX0208Data
[nIndex1
].mnLowEnd
)
433 nBytes
= pJisX0208Data
[nIndex1
].
434 mpToUniTrailTab
[nIndex2
- nFirst
];
436 // For some reason, the tables in tcvtjp4.tab do not
437 // include these two conversions:
440 case 0xA5: // YEN SIGN
454 if (pDestBufEnd
- pDestBufPtr
>= 3)
456 *pDestBufPtr
++ = 0x1B; // ESC
457 *pDestBufPtr
++ = 0x24; // $
458 *pDestBufPtr
++ = 0x42; // B
464 if (pDestBufEnd
- pDestBufPtr
>= 2)
466 *pDestBufPtr
++ = static_cast< char >(nBytes
>> 8);
467 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0xFF);
479 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
480 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
481 &nInfo
, "\x1B(B", b0208
? 3 : 0, &bWritten
))
483 case sal::detail::textenc::BAD_INPUT_STOP
:
487 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
493 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
500 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
504 if ((nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
505 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
509 if (nHighSurrogate
!= 0)
511 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
512 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
514 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
515 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
516 "\x1B(B", b0208
? 3 : 0, &bWritten
))
518 case sal::detail::textenc::BAD_INPUT_STOP
:
523 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
529 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
530 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
536 && (nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
538 if (pDestBufEnd
- pDestBufPtr
>= 3)
540 *pDestBufPtr
++ = 0x1B; // ESC
541 *pDestBufPtr
++ = 0x28; // (
542 *pDestBufPtr
++ = 0x42; // B
546 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
552 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_nHighSurrogate
554 static_cast< ImplUnicodeToIso2022JpContext
* >(pContext
)->m_b0208
= b0208
;
559 *pSrcCvtChars
= nConverted
;
561 return pDestBufPtr
- pDestBuf
;
564 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */