Version 4.2.0.1, tag libreoffice-4.2.0.1
[LibreOffice.git] / sal / textenc / convertiso2022jp.cxx
blobb7230424fa1506b7c0a0e719194d5a951510d370
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include "rtl/textcvt.h"
23 #include "sal/types.h"
25 #include "context.hxx"
26 #include "converter.hxx"
27 #include "convertiso2022jp.hxx"
28 #include "tenchelp.hxx"
29 #include "unichars.hxx"
31 namespace {
33 enum ImplIso2022JpToUnicodeState // order is important:
35 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII,
36 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN,
37 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208,
38 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2,
39 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC,
40 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN,
41 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
44 struct ImplIso2022JpToUnicodeContext
46 ImplIso2022JpToUnicodeState m_eState;
47 sal_uInt32 m_nRow;
50 struct ImplUnicodeToIso2022JpContext
52 sal_Unicode m_nHighSurrogate;
53 bool m_b0208;
58 void * ImplCreateIso2022JpToUnicodeContext()
60 ImplIso2022JpToUnicodeContext * pContext =
61 new ImplIso2022JpToUnicodeContext;
62 pContext->m_eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
63 return pContext;
66 void ImplResetIso2022JpToUnicodeContext(void * pContext)
68 if (pContext)
69 static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState
70 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
73 void ImplDestroyIso2022JpToUnicodeContext(void * pContext)
75 delete static_cast< ImplIso2022JpToUnicodeContext * >(pContext);
78 sal_Size ImplConvertIso2022JpToUnicode(void const * pData,
79 void * pContext,
80 char const * pSrcBuf,
81 sal_Size nSrcBytes,
82 sal_Unicode * pDestBuf,
83 sal_Size nDestChars,
84 sal_uInt32 nFlags,
85 sal_uInt32 * pInfo,
86 sal_Size * pSrcCvtBytes)
88 ImplDBCSToUniLeadTab const * pJisX0208Data
89 = static_cast< ImplIso2022JpConverterData const * >(pData)->
90 m_pJisX0208ToUnicodeData;
91 ImplIso2022JpToUnicodeState eState
92 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
93 sal_uInt32 nRow = 0;
94 sal_uInt32 nInfo = 0;
95 sal_Size nConverted = 0;
96 sal_Unicode * pDestBufPtr = pDestBuf;
97 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
99 if (pContext)
101 eState = static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState;
102 nRow = static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_nRow;
105 for (; nConverted < nSrcBytes; ++nConverted)
107 bool bUndefined = true;
108 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
109 switch (eState)
111 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII:
112 if (nChar == 0x1B) // ESC
113 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
114 else if (nChar < 0x80)
115 if (pDestBufPtr != pDestBufEnd)
116 *pDestBufPtr++ = (sal_Unicode) nChar;
117 else
118 goto no_output;
119 else
121 bUndefined = false;
122 goto bad_input;
124 break;
126 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN:
127 if (nChar == 0x1B) // ESC
128 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
129 else if (nChar < 0x80)
130 if (pDestBufPtr != pDestBufEnd)
132 switch (nChar)
134 case 0x5C: // REVERSE SOLIDUS (\)
135 nChar = 0xA5; // YEN SIGN
136 break;
138 case 0x7E: // ~
139 nChar = 0xAF; // MACRON
140 break;
142 *pDestBufPtr++ = (sal_Unicode) nChar;
144 else
145 goto no_output;
146 else
148 bUndefined = false;
149 goto bad_input;
151 break;
153 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208:
154 if (nChar == 0x1B) // ESC
155 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
156 else if (nChar >= 0x21 && nChar <= 0x7E)
158 nRow = nChar;
159 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2;
161 else
163 bUndefined = false;
164 goto bad_input;
166 break;
168 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2:
169 if (nChar >= 0x21 && nChar <= 0x7E)
171 sal_uInt16 nUnicode = 0;
172 sal_uInt32 nFirst = pJisX0208Data[nRow].mnTrailStart;
173 if (nChar >= nFirst
174 && nChar <= pJisX0208Data[nRow].mnTrailEnd)
175 nUnicode = pJisX0208Data[nRow].
176 mpToUniTrailTab[nChar - nFirst];
177 if (nUnicode != 0)
178 if (pDestBufPtr != pDestBufEnd)
180 *pDestBufPtr++ = (sal_Unicode) nUnicode;
181 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
183 else
184 goto no_output;
185 else
186 goto bad_input;
188 else
190 bUndefined = false;
191 goto bad_input;
193 break;
195 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC:
196 switch (nChar)
198 case 0x24: // $
199 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR;
200 break;
202 case 0x28: // (
203 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN;
204 break;
206 default:
207 bUndefined = false;
208 goto bad_input;
210 break;
212 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN:
213 switch (nChar)
215 case 0x42: // A
216 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
217 break;
219 case 0x4A: // J
220 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN;
221 break;
223 default:
224 bUndefined = false;
225 goto bad_input;
227 break;
229 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR:
230 switch (nChar)
232 case 0x40: // @
233 case 0x42: // B
234 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
235 break;
237 default:
238 bUndefined = false;
239 goto bad_input;
241 break;
243 continue;
245 bad_input:
246 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
247 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
248 &nInfo))
250 case sal::detail::textenc::BAD_INPUT_STOP:
251 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
252 break;
254 case sal::detail::textenc::BAD_INPUT_CONTINUE:
255 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
256 continue;
258 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
259 goto no_output;
261 break;
263 no_output:
264 --pSrcBuf;
265 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
266 break;
269 if (eState > IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
270 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
271 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
272 == 0)
274 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
275 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
276 else
277 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
278 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
279 &nInfo))
281 case sal::detail::textenc::BAD_INPUT_STOP:
282 case sal::detail::textenc::BAD_INPUT_CONTINUE:
283 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
284 break;
286 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
287 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
288 break;
292 if (pContext)
294 static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState = eState;
295 static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_nRow = nRow;
297 if (pInfo)
298 *pInfo = nInfo;
299 if (pSrcCvtBytes)
300 *pSrcCvtBytes = nConverted;
302 return pDestBufPtr - pDestBuf;
305 void * ImplCreateUnicodeToIso2022JpContext()
307 ImplUnicodeToIso2022JpContext * pContext =
308 new ImplUnicodeToIso2022JpContext;
309 pContext->m_nHighSurrogate = 0;
310 pContext->m_b0208 = false;
311 return pContext;
314 void ImplResetUnicodeToIso2022JpContext(void * pContext)
316 if (pContext)
318 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate = 0;
319 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208 = false;
323 void ImplDestroyUnicodeToIso2022JpContext(void * pContext)
325 delete static_cast< ImplUnicodeToIso2022JpContext * >(pContext);
328 sal_Size ImplConvertUnicodeToIso2022Jp(void const * pData,
329 void * pContext,
330 sal_Unicode const * pSrcBuf,
331 sal_Size nSrcChars,
332 char * pDestBuf,
333 sal_Size nDestBytes,
334 sal_uInt32 nFlags,
335 sal_uInt32 * pInfo,
336 sal_Size * pSrcCvtChars)
338 ImplUniToDBCSHighTab const * pJisX0208Data
339 = static_cast< ImplIso2022JpConverterData const * >(pData)->
340 m_pUnicodeToJisX0208Data;
341 sal_Unicode nHighSurrogate = 0;
342 bool b0208 = false;
343 sal_uInt32 nInfo = 0;
344 sal_Size nConverted = 0;
345 char * pDestBufPtr = pDestBuf;
346 char * pDestBufEnd = pDestBuf + nDestBytes;
347 bool bWritten;
349 if (pContext)
351 nHighSurrogate
352 = static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate;
353 b0208 = static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208;
356 for (; nConverted < nSrcChars; ++nConverted)
358 bool bUndefined = true;
359 sal_uInt32 nChar = *pSrcBuf++;
360 if (nHighSurrogate == 0)
362 if (ImplIsHighSurrogate(nChar))
364 nHighSurrogate = (sal_Unicode) nChar;
365 continue;
368 else if (ImplIsLowSurrogate(nChar))
369 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
370 else
372 bUndefined = false;
373 goto bad_input;
376 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
378 bUndefined = false;
379 goto bad_input;
382 if (nChar == 0x0A || nChar == 0x0D) // LF, CR
384 if (b0208)
386 if (pDestBufEnd - pDestBufPtr >= 3)
388 *pDestBufPtr++ = 0x1B; // ESC
389 *pDestBufPtr++ = 0x28; // (
390 *pDestBufPtr++ = 0x42; // B
391 b0208 = false;
393 else
394 goto no_output;
396 if (pDestBufPtr != pDestBufEnd)
397 *pDestBufPtr++ = static_cast< char >(nChar);
398 else
399 goto no_output;
401 else if (nChar == 0x1B)
402 goto bad_input;
403 else if (nChar < 0x80)
405 if (b0208)
407 if (pDestBufEnd - pDestBufPtr >= 3)
409 *pDestBufPtr++ = 0x1B; // ESC
410 *pDestBufPtr++ = 0x28; // (
411 *pDestBufPtr++ = 0x42; // B
412 b0208 = false;
414 else
415 goto no_output;
417 if (pDestBufPtr != pDestBufEnd)
418 *pDestBufPtr++ = static_cast< char >(nChar);
419 else
420 goto no_output;
422 else
424 sal_uInt16 nBytes = 0;
425 sal_uInt32 nIndex1 = nChar >> 8;
426 if (nIndex1 < 0x100)
428 sal_uInt32 nIndex2 = nChar & 0xFF;
429 sal_uInt32 nFirst = pJisX0208Data[nIndex1].mnLowStart;
430 if (nIndex2 >= nFirst
431 && nIndex2 <= pJisX0208Data[nIndex1].mnLowEnd)
433 nBytes = pJisX0208Data[nIndex1].
434 mpToUniTrailTab[nIndex2 - nFirst];
435 if (nBytes == 0)
436 // For some reason, the tables in tcvtjp4.tab do not
437 // include these two conversions:
438 switch (nChar)
440 case 0xA5: // YEN SIGN
441 nBytes = 0x216F;
442 break;
444 case 0xAF: // MACRON
445 nBytes = 0x2131;
446 break;
450 if (nBytes != 0)
452 if (!b0208)
454 if (pDestBufEnd - pDestBufPtr >= 3)
456 *pDestBufPtr++ = 0x1B; // ESC
457 *pDestBufPtr++ = 0x24; // $
458 *pDestBufPtr++ = 0x42; // B
459 b0208 = true;
461 else
462 goto no_output;
464 if (pDestBufEnd - pDestBufPtr >= 2)
466 *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
467 *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
469 else
470 goto no_output;
472 else
473 goto bad_input;
475 nHighSurrogate = 0;
476 continue;
478 bad_input:
479 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
480 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
481 &nInfo, "\x1B(B", b0208 ? 3 : 0, &bWritten))
483 case sal::detail::textenc::BAD_INPUT_STOP:
484 nHighSurrogate = 0;
485 break;
487 case sal::detail::textenc::BAD_INPUT_CONTINUE:
488 if (bWritten)
489 b0208 = false;
490 nHighSurrogate = 0;
491 continue;
493 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
494 goto no_output;
496 break;
498 no_output:
499 --pSrcBuf;
500 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
501 break;
504 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
505 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
506 == 0)
508 bool bFlush = true;
509 if (nHighSurrogate != 0)
511 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
512 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
513 else
514 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
515 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
516 "\x1B(B", b0208 ? 3 : 0, &bWritten))
518 case sal::detail::textenc::BAD_INPUT_STOP:
519 nHighSurrogate = 0;
520 bFlush = false;
521 break;
523 case sal::detail::textenc::BAD_INPUT_CONTINUE:
524 if (bWritten)
525 b0208 = false;
526 nHighSurrogate = 0;
527 break;
529 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
530 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
531 break;
534 if (bFlush
535 && b0208
536 && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
538 if (pDestBufEnd - pDestBufPtr >= 3)
540 *pDestBufPtr++ = 0x1B; // ESC
541 *pDestBufPtr++ = 0x28; // (
542 *pDestBufPtr++ = 0x42; // B
543 b0208 = false;
545 else
546 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
550 if (pContext)
552 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate
553 = nHighSurrogate;
554 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208 = b0208;
556 if (pInfo)
557 *pInfo = nInfo;
558 if (pSrcCvtChars)
559 *pSrcCvtChars = nConverted;
561 return pDestBufPtr - pDestBuf;
564 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */