Update ooo320-m1
[ooovba.git] / sal / textenc / convertiso2022jp.c
blob20cf5286038e1913de27e316ec73e99b7ad1f001
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: convertiso2022jp.c,v $
10 * $Revision: 1.10 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include "convertiso2022jp.h"
32 #include "context.h"
33 #include "converter.h"
34 #include "tenchelp.h"
35 #include "unichars.h"
36 #include "rtl/alloc.h"
37 #include "rtl/textcvt.h"
38 #include "sal/types.h"
40 typedef enum /* order is important: */
42 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII,
43 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN,
44 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208,
45 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2,
46 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC,
47 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN,
48 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
49 } ImplIso2022JpToUnicodeState;
51 typedef struct
53 ImplIso2022JpToUnicodeState m_eState;
54 sal_uInt32 m_nRow;
55 } ImplIso2022JpToUnicodeContext;
57 typedef struct
59 sal_Unicode m_nHighSurrogate;
60 sal_Bool m_b0208;
61 } ImplUnicodeToIso2022JpContext;
63 void * ImplCreateIso2022JpToUnicodeContext(void)
65 void * pContext
66 = rtl_allocateMemory(sizeof (ImplIso2022JpToUnicodeContext));
67 ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState
68 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
69 return pContext;
72 void ImplResetIso2022JpToUnicodeContext(void * pContext)
74 if (pContext)
75 ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState
76 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
79 sal_Size ImplConvertIso2022JpToUnicode(ImplTextConverterData const * pData,
80 void * pContext,
81 sal_Char const * pSrcBuf,
82 sal_Size nSrcBytes,
83 sal_Unicode * pDestBuf,
84 sal_Size nDestChars,
85 sal_uInt32 nFlags,
86 sal_uInt32 * pInfo,
87 sal_Size * pSrcCvtBytes)
89 ImplDBCSToUniLeadTab const * pJisX0208Data
90 = ((ImplIso2022JpConverterData const *) pData)->
91 m_pJisX0208ToUnicodeData;
92 ImplIso2022JpToUnicodeState eState
93 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
94 sal_uInt32 nRow = 0;
95 sal_uInt32 nInfo = 0;
96 sal_Size nConverted = 0;
97 sal_Unicode * pDestBufPtr = pDestBuf;
98 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
100 if (pContext)
102 eState = ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState;
103 nRow = ((ImplIso2022JpToUnicodeContext *) pContext)->m_nRow;
106 for (; nConverted < nSrcBytes; ++nConverted)
108 sal_Bool bUndefined = sal_True;
109 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
110 switch (eState)
112 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII:
113 if (nChar == 0x1B) /* ESC */
114 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
115 else if (nChar < 0x80)
116 if (pDestBufPtr != pDestBufEnd)
117 *pDestBufPtr++ = (sal_Unicode) nChar;
118 else
119 goto no_output;
120 else
122 bUndefined = sal_False;
123 goto bad_input;
125 break;
127 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN:
128 if (nChar == 0x1B) /* ESC */
129 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
130 else if (nChar < 0x80)
131 if (pDestBufPtr != pDestBufEnd)
133 switch (nChar)
135 case 0x5C: /* \ */
136 nChar = 0xA5; /* YEN SIGN */
137 break;
139 case 0x7E: /* ~ */
140 nChar = 0xAF; /* MACRON */
141 break;
143 *pDestBufPtr++ = (sal_Unicode) nChar;
145 else
146 goto no_output;
147 else
149 bUndefined = sal_False;
150 goto bad_input;
152 break;
154 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208:
155 if (nChar == 0x1B) /* ESC */
156 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
157 else if (nChar >= 0x21 && nChar <= 0x7E)
159 nRow = nChar;
160 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2;
162 else
164 bUndefined = sal_False;
165 goto bad_input;
167 break;
169 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2:
170 if (nChar >= 0x21 && nChar <= 0x7E)
172 sal_uInt16 nUnicode = 0;
173 sal_uInt32 nFirst = pJisX0208Data[nRow].mnTrailStart;
174 if (nChar >= nFirst
175 && nChar <= pJisX0208Data[nRow].mnTrailEnd)
176 nUnicode = pJisX0208Data[nRow].
177 mpToUniTrailTab[nChar - nFirst];
178 if (nUnicode != 0)
179 if (pDestBufPtr != pDestBufEnd)
181 *pDestBufPtr++ = (sal_Unicode) nUnicode;
182 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
184 else
185 goto no_output;
186 else
187 goto bad_input;
189 else
191 bUndefined = sal_False;
192 goto bad_input;
194 break;
196 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC:
197 switch (nChar)
199 case 0x24: /* $ */
200 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR;
201 break;
203 case 0x28: /* ( */
204 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN;
205 break;
207 default:
208 bUndefined = sal_False;
209 goto bad_input;
211 break;
213 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN:
214 switch (nChar)
216 case 0x42: /* A */
217 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
218 break;
220 case 0x4A: /* J */
221 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN;
222 break;
224 default:
225 bUndefined = sal_False;
226 goto bad_input;
228 break;
230 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR:
231 switch (nChar)
233 case 0x40: /* @ */
234 case 0x42: /* B */
235 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
236 break;
238 default:
239 bUndefined = sal_False;
240 goto bad_input;
242 break;
244 continue;
246 bad_input:
247 switch (ImplHandleBadInputTextToUnicodeConversion(
248 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
249 &nInfo))
251 case IMPL_BAD_INPUT_STOP:
252 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
253 break;
255 case IMPL_BAD_INPUT_CONTINUE:
256 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
257 continue;
259 case IMPL_BAD_INPUT_NO_OUTPUT:
260 goto no_output;
262 break;
264 no_output:
265 --pSrcBuf;
266 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
267 break;
270 if (eState > IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
271 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
272 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
273 == 0)
275 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
276 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
277 else
278 switch (ImplHandleBadInputTextToUnicodeConversion(
279 sal_False, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
280 &nInfo))
282 case IMPL_BAD_INPUT_STOP:
283 case IMPL_BAD_INPUT_CONTINUE:
284 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
285 break;
287 case IMPL_BAD_INPUT_NO_OUTPUT:
288 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
289 break;
293 if (pContext)
295 ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState = eState;
296 ((ImplIso2022JpToUnicodeContext *) pContext)->m_nRow = nRow;
298 if (pInfo)
299 *pInfo = nInfo;
300 if (pSrcCvtBytes)
301 *pSrcCvtBytes = nConverted;
303 return pDestBufPtr - pDestBuf;
306 void * ImplCreateUnicodeToIso2022JpContext(void)
308 void * pContext
309 = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022JpContext));
310 ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate = 0;
311 ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = sal_False;
312 return pContext;
315 void ImplResetUnicodeToIso2022JpContext(void * pContext)
317 if (pContext)
319 ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate = 0;
320 ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = sal_False;
324 sal_Size ImplConvertUnicodeToIso2022Jp(ImplTextConverterData const * pData,
325 void * pContext,
326 sal_Unicode const * pSrcBuf,
327 sal_Size nSrcChars,
328 sal_Char * pDestBuf,
329 sal_Size nDestBytes,
330 sal_uInt32 nFlags,
331 sal_uInt32 * pInfo,
332 sal_Size * pSrcCvtChars)
334 ImplUniToDBCSHighTab const * pJisX0208Data
335 = ((ImplIso2022JpConverterData const *) pData)->
336 m_pUnicodeToJisX0208Data;
337 sal_Unicode nHighSurrogate = 0;
338 sal_Bool b0208 = sal_False;
339 sal_uInt32 nInfo = 0;
340 sal_Size nConverted = 0;
341 sal_Char * pDestBufPtr = pDestBuf;
342 sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
343 sal_Bool bWritten;
345 if (pContext)
347 nHighSurrogate
348 = ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate;
349 b0208 = ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208;
352 for (; nConverted < nSrcChars; ++nConverted)
354 sal_Bool bUndefined = sal_True;
355 sal_uInt32 nChar = *pSrcBuf++;
356 if (nHighSurrogate == 0)
358 if (ImplIsHighSurrogate(nChar))
360 nHighSurrogate = (sal_Unicode) nChar;
361 continue;
364 else if (ImplIsLowSurrogate(nChar))
365 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
366 else
368 bUndefined = sal_False;
369 goto bad_input;
372 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
374 bUndefined = sal_False;
375 goto bad_input;
378 if (nChar == 0x0A || nChar == 0x0D) /* LF, CR */
380 if (b0208)
382 if (pDestBufEnd - pDestBufPtr >= 3)
384 *pDestBufPtr++ = 0x1B; /* ESC */
385 *pDestBufPtr++ = 0x28; /* ( */
386 *pDestBufPtr++ = 0x42; /* B */
387 b0208 = sal_False;
389 else
390 goto no_output;
392 if (pDestBufPtr != pDestBufEnd)
393 *pDestBufPtr++ = (sal_Char) nChar;
394 else
395 goto no_output;
397 else if (nChar == 0x1B)
398 goto bad_input;
399 else if (nChar < 0x80)
401 if (b0208)
403 if (pDestBufEnd - pDestBufPtr >= 3)
405 *pDestBufPtr++ = 0x1B; /* ESC */
406 *pDestBufPtr++ = 0x28; /* ( */
407 *pDestBufPtr++ = 0x42; /* B */
408 b0208 = sal_False;
410 else
411 goto no_output;
413 if (pDestBufPtr != pDestBufEnd)
414 *pDestBufPtr++ = (sal_Char) nChar;
415 else
416 goto no_output;
418 else
420 sal_uInt16 nBytes = 0;
421 sal_uInt32 nIndex1 = nChar >> 8;
422 if (nIndex1 < 0x100)
424 sal_uInt32 nIndex2 = nChar & 0xFF;
425 sal_uInt32 nFirst = pJisX0208Data[nIndex1].mnLowStart;
426 if (nIndex2 >= nFirst
427 && nIndex2 <= pJisX0208Data[nIndex1].mnLowEnd)
429 nBytes = pJisX0208Data[nIndex1].
430 mpToUniTrailTab[nIndex2 - nFirst];
431 if (nBytes == 0)
432 /* For some reason, the tables in tcvtjp4.tab do not
433 include these two conversions: */
434 switch (nChar)
436 case 0xA5: /* YEN SIGN */
437 nBytes = 0x216F;
438 break;
440 case 0xAF: /* MACRON */
441 nBytes = 0x2131;
442 break;
446 if (nBytes != 0)
448 if (!b0208)
450 if (pDestBufEnd - pDestBufPtr >= 3)
452 *pDestBufPtr++ = 0x1B; /* ESC */
453 *pDestBufPtr++ = 0x24; /* $ */
454 *pDestBufPtr++ = 0x42; /* B */
455 b0208 = sal_True;
457 else
458 goto no_output;
460 if (pDestBufEnd - pDestBufPtr >= 2)
462 *pDestBufPtr++ = (sal_Char) (nBytes >> 8);
463 *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF);
465 else
466 goto no_output;
468 else
469 goto bad_input;
471 nHighSurrogate = 0;
472 continue;
474 bad_input:
475 switch (ImplHandleBadInputUnicodeToTextConversion(
476 bUndefined,
477 nChar,
478 nFlags,
479 &pDestBufPtr,
480 pDestBufEnd,
481 &nInfo,
482 "\x1B(B",
483 b0208 ? 3 : 0,
484 &bWritten))
486 case IMPL_BAD_INPUT_STOP:
487 nHighSurrogate = 0;
488 break;
490 case IMPL_BAD_INPUT_CONTINUE:
491 if (bWritten)
492 b0208 = sal_False;
493 nHighSurrogate = 0;
494 continue;
496 case IMPL_BAD_INPUT_NO_OUTPUT:
497 goto no_output;
499 break;
501 no_output:
502 --pSrcBuf;
503 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
504 break;
507 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
508 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
509 == 0)
511 sal_Bool bFlush = sal_True;
512 if (nHighSurrogate != 0)
514 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
515 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
516 else
517 switch (ImplHandleBadInputUnicodeToTextConversion(
518 sal_False,
520 nFlags,
521 &pDestBufPtr,
522 pDestBufEnd,
523 &nInfo,
524 "\x1B(B",
525 b0208 ? 3 : 0,
526 &bWritten))
528 case IMPL_BAD_INPUT_STOP:
529 nHighSurrogate = 0;
530 bFlush = sal_False;
531 break;
533 case IMPL_BAD_INPUT_CONTINUE:
534 if (bWritten)
535 b0208 = sal_False;
536 nHighSurrogate = 0;
537 break;
539 case IMPL_BAD_INPUT_NO_OUTPUT:
540 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
541 break;
544 if (bFlush
545 && b0208
546 && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
548 if (pDestBufEnd - pDestBufPtr >= 3)
550 *pDestBufPtr++ = 0x1B; /* ESC */
551 *pDestBufPtr++ = 0x28; /* ( */
552 *pDestBufPtr++ = 0x42; /* B */
553 b0208 = sal_False;
555 else
556 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
560 if (pContext)
562 ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate
563 = nHighSurrogate;
564 ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = b0208;
566 if (pInfo)
567 *pInfo = nInfo;
568 if (pSrcCvtChars)
569 *pSrcCvtChars = nConverted;
571 return pDestBufPtr - pDestBuf;