Bump for 3.6-28
[LibreOffice.git] / sal / textenc / convertiso2022jp.cxx
blob98ede65d062095d2a5db86895cc01cae190793bd
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "sal/config.h"
31 #include "rtl/textcvt.h"
32 #include "sal/types.h"
34 #include "context.hxx"
35 #include "converter.hxx"
36 #include "convertiso2022jp.hxx"
37 #include "tenchelp.hxx"
38 #include "unichars.hxx"
40 namespace {
42 enum ImplIso2022JpToUnicodeState // order is important:
44 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII,
45 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN,
46 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208,
47 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2,
48 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC,
49 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN,
50 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
53 struct ImplIso2022JpToUnicodeContext
55 ImplIso2022JpToUnicodeState m_eState;
56 sal_uInt32 m_nRow;
59 struct ImplUnicodeToIso2022JpContext
61 sal_Unicode m_nHighSurrogate;
62 bool m_b0208;
67 void * ImplCreateIso2022JpToUnicodeContext()
69 ImplIso2022JpToUnicodeContext * pContext =
70 new ImplIso2022JpToUnicodeContext;
71 pContext->m_eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
72 return pContext;
75 void ImplResetIso2022JpToUnicodeContext(void * pContext)
77 if (pContext)
78 static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState
79 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
82 void ImplDestroyIso2022JpToUnicodeContext(void * pContext)
84 delete static_cast< ImplIso2022JpToUnicodeContext * >(pContext);
87 sal_Size ImplConvertIso2022JpToUnicode(void const * pData,
88 void * pContext,
89 char const * pSrcBuf,
90 sal_Size nSrcBytes,
91 sal_Unicode * pDestBuf,
92 sal_Size nDestChars,
93 sal_uInt32 nFlags,
94 sal_uInt32 * pInfo,
95 sal_Size * pSrcCvtBytes)
97 ImplDBCSToUniLeadTab const * pJisX0208Data
98 = static_cast< ImplIso2022JpConverterData const * >(pData)->
99 m_pJisX0208ToUnicodeData;
100 ImplIso2022JpToUnicodeState eState
101 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
102 sal_uInt32 nRow = 0;
103 sal_uInt32 nInfo = 0;
104 sal_Size nConverted = 0;
105 sal_Unicode * pDestBufPtr = pDestBuf;
106 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
108 if (pContext)
110 eState = static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState;
111 nRow = static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_nRow;
114 for (; nConverted < nSrcBytes; ++nConverted)
116 bool bUndefined = true;
117 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
118 switch (eState)
120 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII:
121 if (nChar == 0x1B) // ESC
122 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
123 else if (nChar < 0x80)
124 if (pDestBufPtr != pDestBufEnd)
125 *pDestBufPtr++ = (sal_Unicode) nChar;
126 else
127 goto no_output;
128 else
130 bUndefined = false;
131 goto bad_input;
133 break;
135 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN:
136 if (nChar == 0x1B) // ESC
137 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
138 else if (nChar < 0x80)
139 if (pDestBufPtr != pDestBufEnd)
141 switch (nChar)
143 case 0x5C: // REVERSE SOLIDUS (\)
144 nChar = 0xA5; // YEN SIGN
145 break;
147 case 0x7E: // ~
148 nChar = 0xAF; // MACRON
149 break;
151 *pDestBufPtr++ = (sal_Unicode) nChar;
153 else
154 goto no_output;
155 else
157 bUndefined = false;
158 goto bad_input;
160 break;
162 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208:
163 if (nChar == 0x1B) // ESC
164 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
165 else if (nChar >= 0x21 && nChar <= 0x7E)
167 nRow = nChar;
168 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2;
170 else
172 bUndefined = false;
173 goto bad_input;
175 break;
177 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2:
178 if (nChar >= 0x21 && nChar <= 0x7E)
180 sal_uInt16 nUnicode = 0;
181 sal_uInt32 nFirst = pJisX0208Data[nRow].mnTrailStart;
182 if (nChar >= nFirst
183 && nChar <= pJisX0208Data[nRow].mnTrailEnd)
184 nUnicode = pJisX0208Data[nRow].
185 mpToUniTrailTab[nChar - nFirst];
186 if (nUnicode != 0)
187 if (pDestBufPtr != pDestBufEnd)
189 *pDestBufPtr++ = (sal_Unicode) nUnicode;
190 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
192 else
193 goto no_output;
194 else
195 goto bad_input;
197 else
199 bUndefined = false;
200 goto bad_input;
202 break;
204 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC:
205 switch (nChar)
207 case 0x24: // $
208 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR;
209 break;
211 case 0x28: // (
212 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN;
213 break;
215 default:
216 bUndefined = false;
217 goto bad_input;
219 break;
221 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN:
222 switch (nChar)
224 case 0x42: // A
225 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
226 break;
228 case 0x4A: // J
229 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN;
230 break;
232 default:
233 bUndefined = false;
234 goto bad_input;
236 break;
238 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR:
239 switch (nChar)
241 case 0x40: // @
242 case 0x42: // B
243 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
244 break;
246 default:
247 bUndefined = false;
248 goto bad_input;
250 break;
252 continue;
254 bad_input:
255 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
256 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
257 &nInfo))
259 case sal::detail::textenc::BAD_INPUT_STOP:
260 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
261 break;
263 case sal::detail::textenc::BAD_INPUT_CONTINUE:
264 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
265 continue;
267 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
268 goto no_output;
270 break;
272 no_output:
273 --pSrcBuf;
274 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
275 break;
278 if (eState > IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
279 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
280 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
281 == 0)
283 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
284 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
285 else
286 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
287 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
288 &nInfo))
290 case sal::detail::textenc::BAD_INPUT_STOP:
291 case sal::detail::textenc::BAD_INPUT_CONTINUE:
292 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
293 break;
295 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
296 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
297 break;
301 if (pContext)
303 static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_eState = eState;
304 static_cast< ImplIso2022JpToUnicodeContext * >(pContext)->m_nRow = nRow;
306 if (pInfo)
307 *pInfo = nInfo;
308 if (pSrcCvtBytes)
309 *pSrcCvtBytes = nConverted;
311 return pDestBufPtr - pDestBuf;
314 void * ImplCreateUnicodeToIso2022JpContext()
316 ImplUnicodeToIso2022JpContext * pContext =
317 new ImplUnicodeToIso2022JpContext;
318 pContext->m_nHighSurrogate = 0;
319 pContext->m_b0208 = false;
320 return pContext;
323 void ImplResetUnicodeToIso2022JpContext(void * pContext)
325 if (pContext)
327 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate = 0;
328 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208 = false;
332 void ImplDestroyUnicodeToIso2022JpContext(void * pContext)
334 delete static_cast< ImplUnicodeToIso2022JpContext * >(pContext);
337 sal_Size ImplConvertUnicodeToIso2022Jp(void const * pData,
338 void * pContext,
339 sal_Unicode const * pSrcBuf,
340 sal_Size nSrcChars,
341 char * pDestBuf,
342 sal_Size nDestBytes,
343 sal_uInt32 nFlags,
344 sal_uInt32 * pInfo,
345 sal_Size * pSrcCvtChars)
347 ImplUniToDBCSHighTab const * pJisX0208Data
348 = static_cast< ImplIso2022JpConverterData const * >(pData)->
349 m_pUnicodeToJisX0208Data;
350 sal_Unicode nHighSurrogate = 0;
351 bool b0208 = false;
352 sal_uInt32 nInfo = 0;
353 sal_Size nConverted = 0;
354 char * pDestBufPtr = pDestBuf;
355 char * pDestBufEnd = pDestBuf + nDestBytes;
356 bool bWritten;
358 if (pContext)
360 nHighSurrogate
361 = static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate;
362 b0208 = static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208;
365 for (; nConverted < nSrcChars; ++nConverted)
367 bool bUndefined = true;
368 sal_uInt32 nChar = *pSrcBuf++;
369 if (nHighSurrogate == 0)
371 if (ImplIsHighSurrogate(nChar))
373 nHighSurrogate = (sal_Unicode) nChar;
374 continue;
377 else if (ImplIsLowSurrogate(nChar))
378 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
379 else
381 bUndefined = false;
382 goto bad_input;
385 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
387 bUndefined = false;
388 goto bad_input;
391 if (nChar == 0x0A || nChar == 0x0D) // LF, CR
393 if (b0208)
395 if (pDestBufEnd - pDestBufPtr >= 3)
397 *pDestBufPtr++ = 0x1B; // ESC
398 *pDestBufPtr++ = 0x28; // (
399 *pDestBufPtr++ = 0x42; // B
400 b0208 = false;
402 else
403 goto no_output;
405 if (pDestBufPtr != pDestBufEnd)
406 *pDestBufPtr++ = static_cast< char >(nChar);
407 else
408 goto no_output;
410 else if (nChar == 0x1B)
411 goto bad_input;
412 else if (nChar < 0x80)
414 if (b0208)
416 if (pDestBufEnd - pDestBufPtr >= 3)
418 *pDestBufPtr++ = 0x1B; // ESC
419 *pDestBufPtr++ = 0x28; // (
420 *pDestBufPtr++ = 0x42; // B
421 b0208 = false;
423 else
424 goto no_output;
426 if (pDestBufPtr != pDestBufEnd)
427 *pDestBufPtr++ = static_cast< char >(nChar);
428 else
429 goto no_output;
431 else
433 sal_uInt16 nBytes = 0;
434 sal_uInt32 nIndex1 = nChar >> 8;
435 if (nIndex1 < 0x100)
437 sal_uInt32 nIndex2 = nChar & 0xFF;
438 sal_uInt32 nFirst = pJisX0208Data[nIndex1].mnLowStart;
439 if (nIndex2 >= nFirst
440 && nIndex2 <= pJisX0208Data[nIndex1].mnLowEnd)
442 nBytes = pJisX0208Data[nIndex1].
443 mpToUniTrailTab[nIndex2 - nFirst];
444 if (nBytes == 0)
445 // For some reason, the tables in tcvtjp4.tab do not
446 // include these two conversions:
447 switch (nChar)
449 case 0xA5: // YEN SIGN
450 nBytes = 0x216F;
451 break;
453 case 0xAF: // MACRON
454 nBytes = 0x2131;
455 break;
459 if (nBytes != 0)
461 if (!b0208)
463 if (pDestBufEnd - pDestBufPtr >= 3)
465 *pDestBufPtr++ = 0x1B; // ESC
466 *pDestBufPtr++ = 0x24; // $
467 *pDestBufPtr++ = 0x42; // B
468 b0208 = true;
470 else
471 goto no_output;
473 if (pDestBufEnd - pDestBufPtr >= 2)
475 *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
476 *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
478 else
479 goto no_output;
481 else
482 goto bad_input;
484 nHighSurrogate = 0;
485 continue;
487 bad_input:
488 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
489 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
490 &nInfo, "\x1B(B", b0208 ? 3 : 0, &bWritten))
492 case sal::detail::textenc::BAD_INPUT_STOP:
493 nHighSurrogate = 0;
494 break;
496 case sal::detail::textenc::BAD_INPUT_CONTINUE:
497 if (bWritten)
498 b0208 = false;
499 nHighSurrogate = 0;
500 continue;
502 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
503 goto no_output;
505 break;
507 no_output:
508 --pSrcBuf;
509 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
510 break;
513 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
514 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
515 == 0)
517 bool bFlush = true;
518 if (nHighSurrogate != 0)
520 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
521 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
522 else
523 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
524 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
525 "\x1B(B", b0208 ? 3 : 0, &bWritten))
527 case sal::detail::textenc::BAD_INPUT_STOP:
528 nHighSurrogate = 0;
529 bFlush = false;
530 break;
532 case sal::detail::textenc::BAD_INPUT_CONTINUE:
533 if (bWritten)
534 b0208 = false;
535 nHighSurrogate = 0;
536 break;
538 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
539 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
540 break;
543 if (bFlush
544 && b0208
545 && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
547 if (pDestBufEnd - pDestBufPtr >= 3)
549 *pDestBufPtr++ = 0x1B; // ESC
550 *pDestBufPtr++ = 0x28; // (
551 *pDestBufPtr++ = 0x42; // B
552 b0208 = false;
554 else
555 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
559 if (pContext)
561 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_nHighSurrogate
562 = nHighSurrogate;
563 static_cast< ImplUnicodeToIso2022JpContext * >(pContext)->m_b0208 = b0208;
565 if (pInfo)
566 *pInfo = nInfo;
567 if (pSrcCvtChars)
568 *pSrcCvtChars = nConverted;
570 return pDestBufPtr - pDestBuf;
573 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */