bump product version to 5.0.4.1
[LibreOffice.git] / sal / textenc / convertiso2022jp.cxx
blobbb89509e2e6930830d151496b26a44c33934f90a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include "rtl/textcvt.h"
23 #include "sal/types.h"
25 #include "converter.hxx"
26 #include "convertiso2022jp.hxx"
27 #include "tenchelp.hxx"
28 #include "unichars.hxx"
30 namespace {
32 enum ImplIso2022JpToUnicodeState // order is important:
34 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII,
35 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN,
36 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208,
37 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2,
38 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC,
39 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN,
40 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
43 struct ImplIso2022JpToUnicodeContext
45 ImplIso2022JpToUnicodeState m_eState;
46 sal_uInt32 m_nRow;
49 struct ImplUnicodeToIso2022JpContext
51 sal_Unicode m_nHighSurrogate;
52 bool m_b0208;
57 void * ImplCreateIso2022JpToUnicodeContext()
59 ImplIso2022JpToUnicodeContext * pContext =
60 new ImplIso2022JpToUnicodeContext;
61 pContext->m_eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
62 return pContext;
65 void ImplResetIso2022JpToUnicodeContext(void * pContext)
67 if (pContext)
68 static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState
69 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
72 void ImplDestroyIso2022JpToUnicodeContext(void * pContext)
74 delete static_cast< ImplIso2022JpToUnicodeContext * >(pContext);
77 sal_Size ImplConvertIso2022JpToUnicode(void const * pData,
78 void * pContext,
79 char const * pSrcBuf,
80 sal_Size nSrcBytes,
81 sal_Unicode * pDestBuf,
82 sal_Size nDestChars,
83 sal_uInt32 nFlags,
84 sal_uInt32 * pInfo,
85 sal_Size * pSrcCvtBytes)
87 ImplDBCSToUniLeadTab const * pJisX0208Data
88 = static_cast< ImplIso2022JpConverterData const * >(pData)->
89 m_pJisX0208ToUnicodeData;
90 ImplIso2022JpToUnicodeState eState
91 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
92 sal_uInt32 nRow = 0;
93 sal_uInt32 nInfo = 0;
94 sal_Size nConverted = 0;
95 sal_Unicode * pDestBufPtr = pDestBuf;
96 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
98 if (pContext)
100 eState = static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState;
101 nRow = static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_nRow;
104 for (; nConverted < nSrcBytes; ++nConverted)
106 bool bUndefined = true;
107 sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
108 switch (eState)
110 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII:
111 if (nChar == 0x1B) // ESC
112 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
113 else if (nChar < 0x80)
114 if (pDestBufPtr != pDestBufEnd)
115 *pDestBufPtr++ = (sal_Unicode) nChar;
116 else
117 goto no_output;
118 else
120 bUndefined = false;
121 goto bad_input;
123 break;
125 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN:
126 if (nChar == 0x1B) // ESC
127 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
128 else if (nChar < 0x80)
129 if (pDestBufPtr != pDestBufEnd)
131 switch (nChar)
133 case 0x5C: // REVERSE SOLIDUS (\)
134 nChar = 0xA5; // YEN SIGN
135 break;
137 case 0x7E: // ~
138 nChar = 0xAF; // MACRON
139 break;
141 *pDestBufPtr++ = (sal_Unicode) nChar;
143 else
144 goto no_output;
145 else
147 bUndefined = false;
148 goto bad_input;
150 break;
152 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208:
153 if (nChar == 0x1B) // ESC
154 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
155 else if (nChar >= 0x21 && nChar <= 0x7E)
157 nRow = nChar;
158 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2;
160 else
162 bUndefined = false;
163 goto bad_input;
165 break;
167 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2:
168 if (nChar >= 0x21 && nChar <= 0x7E)
170 sal_uInt16 nUnicode = 0;
171 sal_uInt32 nFirst = pJisX0208Data[nRow].mnTrailStart;
172 if (nChar >= nFirst
173 && nChar <= pJisX0208Data[nRow].mnTrailEnd)
174 nUnicode = pJisX0208Data[nRow].
175 mpToUniTrailTab[nChar - nFirst];
176 if (nUnicode != 0)
177 if (pDestBufPtr != pDestBufEnd)
179 *pDestBufPtr++ = (sal_Unicode) nUnicode;
180 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
182 else
183 goto no_output;
184 else
185 goto bad_input;
187 else
189 bUndefined = false;
190 goto bad_input;
192 break;
194 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC:
195 switch (nChar)
197 case 0x24: // $
198 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR;
199 break;
201 case 0x28: // (
202 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN;
203 break;
205 default:
206 bUndefined = false;
207 goto bad_input;
209 break;
211 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN:
212 switch (nChar)
214 case 0x42: // A
215 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
216 break;
218 case 0x4A: // J
219 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN;
220 break;
222 default:
223 bUndefined = false;
224 goto bad_input;
226 break;
228 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR:
229 switch (nChar)
231 case 0x40: // @
232 case 0x42: // B
233 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
234 break;
236 default:
237 bUndefined = false;
238 goto bad_input;
240 break;
242 continue;
244 bad_input:
245 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
246 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
247 &nInfo))
249 case sal::detail::textenc::BAD_INPUT_STOP:
250 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
251 break;
253 case sal::detail::textenc::BAD_INPUT_CONTINUE:
254 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
255 continue;
257 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
258 goto no_output;
260 break;
262 no_output:
263 --pSrcBuf;
264 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
265 break;
268 if (eState > IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
269 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
270 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
271 == 0)
273 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
274 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
275 else
276 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
277 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
278 &nInfo))
280 case sal::detail::textenc::BAD_INPUT_STOP:
281 case sal::detail::textenc::BAD_INPUT_CONTINUE:
282 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
283 break;
285 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
286 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
287 break;
291 if (pContext)
293 static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState = eState;
294 static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_nRow = nRow;
296 if (pInfo)
297 *pInfo = nInfo;
298 if (pSrcCvtBytes)
299 *pSrcCvtBytes = nConverted;
301 return pDestBufPtr - pDestBuf;
304 void * ImplCreateUnicodeToIso2022JpContext()
306 ImplUnicodeToIso2022JpContext * pContext =
307 new ImplUnicodeToIso2022JpContext;
308 pContext->m_nHighSurrogate = 0;
309 pContext->m_b0208 = false;
310 return pContext;
313 void ImplResetUnicodeToIso2022JpContext(void * pContext)
315 if (pContext)
317 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate = 0;
318 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208 = false;
322 void ImplDestroyUnicodeToIso2022JpContext(void * pContext)
324 delete static_cast< ImplUnicodeToIso2022JpContext * >(pContext);
327 sal_Size ImplConvertUnicodeToIso2022Jp(void const * pData,
328 void * pContext,
329 sal_Unicode const * pSrcBuf,
330 sal_Size nSrcChars,
331 char * pDestBuf,
332 sal_Size nDestBytes,
333 sal_uInt32 nFlags,
334 sal_uInt32 * pInfo,
335 sal_Size * pSrcCvtChars)
337 ImplUniToDBCSHighTab const * pJisX0208Data
338 = static_cast< ImplIso2022JpConverterData const * >(pData)->
339 m_pUnicodeToJisX0208Data;
340 sal_Unicode nHighSurrogate = 0;
341 bool b0208 = false;
342 sal_uInt32 nInfo = 0;
343 sal_Size nConverted = 0;
344 char * pDestBufPtr = pDestBuf;
345 char * pDestBufEnd = pDestBuf + nDestBytes;
346 bool bWritten;
348 if (pContext)
350 nHighSurrogate
351 = static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate;
352 b0208 = static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208;
355 for (; nConverted < nSrcChars; ++nConverted)
357 bool bUndefined = true;
358 sal_uInt32 nChar = *pSrcBuf++;
359 if (nHighSurrogate == 0)
361 if (ImplIsHighSurrogate(nChar))
363 nHighSurrogate = (sal_Unicode) nChar;
364 continue;
367 else if (ImplIsLowSurrogate(nChar))
368 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
369 else
371 bUndefined = false;
372 goto bad_input;
375 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
377 bUndefined = false;
378 goto bad_input;
381 if (nChar == 0x0A || nChar == 0x0D) // LF, CR
383 if (b0208)
385 if (pDestBufEnd - pDestBufPtr >= 3)
387 *pDestBufPtr++ = 0x1B; // ESC
388 *pDestBufPtr++ = 0x28; // (
389 *pDestBufPtr++ = 0x42; // B
390 b0208 = false;
392 else
393 goto no_output;
395 if (pDestBufPtr != pDestBufEnd)
396 *pDestBufPtr++ = static_cast< char >(nChar);
397 else
398 goto no_output;
400 else if (nChar == 0x1B)
401 goto bad_input;
402 else if (nChar < 0x80)
404 if (b0208)
406 if (pDestBufEnd - pDestBufPtr >= 3)
408 *pDestBufPtr++ = 0x1B; // ESC
409 *pDestBufPtr++ = 0x28; // (
410 *pDestBufPtr++ = 0x42; // B
411 b0208 = false;
413 else
414 goto no_output;
416 if (pDestBufPtr != pDestBufEnd)
417 *pDestBufPtr++ = static_cast< char >(nChar);
418 else
419 goto no_output;
421 else
423 sal_uInt16 nBytes = 0;
424 sal_uInt32 nIndex1 = nChar >> 8;
425 if (nIndex1 < 0x100)
427 sal_uInt32 nIndex2 = nChar & 0xFF;
428 sal_uInt32 nFirst = pJisX0208Data[nIndex1].mnLowStart;
429 if (nIndex2 >= nFirst
430 && nIndex2 <= pJisX0208Data[nIndex1].mnLowEnd)
432 nBytes = pJisX0208Data[nIndex1].
433 mpToUniTrailTab[nIndex2 - nFirst];
434 if (nBytes == 0)
435 // For some reason, the tables in tcvtjp4.tab do not
436 // include these two conversions:
437 switch (nChar)
439 case 0xA5: // YEN SIGN
440 nBytes = 0x216F;
441 break;
443 case 0xAF: // MACRON
444 nBytes = 0x2131;
445 break;
449 if (nBytes != 0)
451 if (!b0208)
453 if (pDestBufEnd - pDestBufPtr >= 3)
455 *pDestBufPtr++ = 0x1B; // ESC
456 *pDestBufPtr++ = 0x24; // $
457 *pDestBufPtr++ = 0x42; // B
458 b0208 = true;
460 else
461 goto no_output;
463 if (pDestBufEnd - pDestBufPtr >= 2)
465 *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
466 *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
468 else
469 goto no_output;
471 else
472 goto bad_input;
474 nHighSurrogate = 0;
475 continue;
477 bad_input:
478 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
479 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
480 &nInfo, "\x1B(B", b0208 ? 3 : 0, &bWritten))
482 case sal::detail::textenc::BAD_INPUT_STOP:
483 nHighSurrogate = 0;
484 break;
486 case sal::detail::textenc::BAD_INPUT_CONTINUE:
487 if (bWritten)
488 b0208 = false;
489 nHighSurrogate = 0;
490 continue;
492 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
493 goto no_output;
495 break;
497 no_output:
498 --pSrcBuf;
499 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
500 break;
503 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
504 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
505 == 0)
507 bool bFlush = true;
508 if (nHighSurrogate != 0)
510 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
511 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
512 else
513 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
514 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
515 "\x1B(B", b0208 ? 3 : 0, &bWritten))
517 case sal::detail::textenc::BAD_INPUT_STOP:
518 nHighSurrogate = 0;
519 bFlush = false;
520 break;
522 case sal::detail::textenc::BAD_INPUT_CONTINUE:
523 if (bWritten)
524 b0208 = false;
525 nHighSurrogate = 0;
526 break;
528 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
529 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
530 break;
533 if (bFlush
534 && b0208
535 && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
537 if (pDestBufEnd - pDestBufPtr >= 3)
539 *pDestBufPtr++ = 0x1B; // ESC
540 *pDestBufPtr++ = 0x28; // (
541 *pDestBufPtr++ = 0x42; // B
542 b0208 = false;
544 else
545 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
549 if (pContext)
551 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate
552 = nHighSurrogate;
553 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208 = b0208;
555 if (pInfo)
556 *pInfo = nInfo;
557 if (pSrcCvtChars)
558 *pSrcCvtChars = nConverted;
560 return pDestBufPtr - pDestBuf;
563 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */