bump product version to 5.0.4.1
[LibreOffice.git] / sal / textenc / convertiso2022kr.cxx
blob687e8dfc3db1af84295118226e01720d5cfb5aed
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include "rtl/textcvt.h"
23 #include "sal/types.h"
25 #include "converter.hxx"
26 #include "convertiso2022kr.hxx"
27 #include "tenchelp.hxx"
28 #include "unichars.hxx"
30 namespace {
32 enum ImplIso2022KrToUnicodeState // order is important:
34 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII,
35 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001,
36 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2,
37 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC,
38 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR,
39 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
42 struct ImplIso2022KrToUnicodeContext
44 ImplIso2022KrToUnicodeState m_eState;
45 sal_uInt32 m_nRow;
48 enum ImplUnicodeToIso2022KrSet
50 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE,
51 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII,
52 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
55 struct ImplUnicodeToIso2022KrContext
57 sal_Unicode m_nHighSurrogate;
58 ImplUnicodeToIso2022KrSet m_eSet;
63 void * ImplCreateIso2022KrToUnicodeContext()
65 ImplIso2022KrToUnicodeContext * pContext =
66 new ImplIso2022KrToUnicodeContext;
67 pContext->m_eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
68 return pContext;
71 void ImplResetIso2022KrToUnicodeContext(void * pContext)
73 if (pContext)
74 static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_eState
75 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
78 void ImplDestroyIso2022KrToUnicodeContext(void * pContext)
80 delete static_cast< ImplIso2022KrToUnicodeContext * >(pContext);
83 sal_Size ImplConvertIso2022KrToUnicode(void const * pData,
84 void * pContext,
85 char const * pSrcBuf,
86 sal_Size nSrcBytes,
87 sal_Unicode * pDestBuf,
88 sal_Size nDestChars,
89 sal_uInt32 nFlags,
90 sal_uInt32 * pInfo,
91 sal_Size * pSrcCvtBytes)
93 ImplDBCSToUniLeadTab const * pKsX1001Data
94 = static_cast< ImplIso2022KrConverterData const * >(pData)->
95 m_pKsX1001ToUnicodeData;
96 ImplIso2022KrToUnicodeState eState
97 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
98 sal_uInt32 nRow = 0;
99 sal_uInt32 nInfo = 0;
100 sal_Size nConverted = 0;
101 sal_Unicode * pDestBufPtr = pDestBuf;
102 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
104 if (pContext)
106 eState = static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_eState;
107 nRow = static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_nRow;
110 for (; nConverted < nSrcBytes; ++nConverted)
112 bool bUndefined = true;
113 sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
114 switch (eState)
116 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII:
117 if (nChar == 0x0E) // SO
118 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001;
119 else if (nChar == 0x1B) // ESC
120 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC;
121 else if (nChar < 0x80)
122 if (pDestBufPtr != pDestBufEnd)
123 *pDestBufPtr++ = (sal_Unicode) nChar;
124 else
125 goto no_output;
126 else
128 bUndefined = false;
129 goto bad_input;
131 break;
133 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001:
134 if (nChar == 0x0F) // SI
135 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
136 else if (nChar >= 0x21 && nChar <= 0x7E)
138 nRow = nChar + 0x80;
139 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2;
141 else
143 bUndefined = false;
144 goto bad_input;
146 break;
148 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2:
149 if (nChar >= 0x21 && nChar <= 0x7E)
151 sal_uInt16 nUnicode = 0;
152 sal_uInt32 nFirst = pKsX1001Data[nRow].mnTrailStart;
153 nChar += 0x80;
154 if (nChar >= nFirst && nChar <= pKsX1001Data[nRow].mnTrailEnd)
155 nUnicode = pKsX1001Data[nRow].
156 mpToUniTrailTab[nChar - nFirst];
157 if (nUnicode != 0)
158 if (pDestBufPtr != pDestBufEnd)
160 *pDestBufPtr++ = (sal_Unicode) nUnicode;
161 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001;
163 else
164 goto no_output;
165 else
166 goto bad_input;
168 else
170 bUndefined = false;
171 goto bad_input;
173 break;
175 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC:
176 if (nChar == 0x24) // $
177 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR;
178 else
180 bUndefined = false;
181 goto bad_input;
183 break;
185 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR:
186 if (nChar == 0x29) // )
187 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN;
188 else
190 bUndefined = false;
191 goto bad_input;
193 break;
195 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN:
196 if (nChar == 0x43) // C
197 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
198 else
200 bUndefined = false;
201 goto bad_input;
203 break;
205 continue;
207 bad_input:
208 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
209 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
210 &nInfo))
212 case sal::detail::textenc::BAD_INPUT_STOP:
213 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
214 break;
216 case sal::detail::textenc::BAD_INPUT_CONTINUE:
217 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
218 continue;
220 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
221 goto no_output;
223 break;
225 no_output:
226 --pSrcBuf;
227 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
228 break;
231 if (eState > IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
232 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
233 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
234 == 0)
236 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
237 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
238 else
239 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
240 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
241 &nInfo))
243 case sal::detail::textenc::BAD_INPUT_STOP:
244 case sal::detail::textenc::BAD_INPUT_CONTINUE:
245 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
246 break;
248 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
249 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
250 break;
254 if (pContext)
256 static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_eState = eState;
257 static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_nRow = nRow;
259 if (pInfo)
260 *pInfo = nInfo;
261 if (pSrcCvtBytes)
262 *pSrcCvtBytes = nConverted;
264 return pDestBufPtr - pDestBuf;
267 void * ImplCreateUnicodeToIso2022KrContext()
269 ImplUnicodeToIso2022KrContext * pContext =
270 new ImplUnicodeToIso2022KrContext;
271 pContext->m_nHighSurrogate = 0;
272 pContext->m_eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
273 return pContext;
276 void ImplResetUnicodeToIso2022KrContext(void * pContext)
278 if (pContext)
280 static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_nHighSurrogate = 0;
281 static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_eSet
282 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
286 void ImplDestroyUnicodeToIso2022KrContext(void * pContext)
288 delete static_cast< ImplUnicodeToIso2022KrContext * >(pContext);
291 sal_Size ImplConvertUnicodeToIso2022Kr(void const * pData,
292 void * pContext,
293 sal_Unicode const * pSrcBuf,
294 sal_Size nSrcChars,
295 char * pDestBuf,
296 sal_Size nDestBytes,
297 sal_uInt32 nFlags,
298 sal_uInt32 * pInfo,
299 sal_Size * pSrcCvtChars)
301 ImplUniToDBCSHighTab const * pKsX1001Data
302 = static_cast< ImplIso2022KrConverterData const * >(pData)->
303 m_pUnicodeToKsX1001Data;
304 sal_Unicode nHighSurrogate = 0;
305 ImplUnicodeToIso2022KrSet eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
306 sal_uInt32 nInfo = 0;
307 sal_Size nConverted = 0;
308 char * pDestBufPtr = pDestBuf;
309 char * pDestBufEnd = pDestBuf + nDestBytes;
310 bool bWritten;
312 if (pContext)
314 nHighSurrogate
315 = static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_nHighSurrogate;
316 eSet = static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_eSet;
319 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE)
321 if (pDestBufEnd - pDestBufPtr >= 4)
323 *pDestBufPtr++ = 0x1B; // ESC
324 *pDestBufPtr++ = 0x24; // $
325 *pDestBufPtr++ = 0x29; // )
326 *pDestBufPtr++ = 0x43; // C
327 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
329 else
330 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
333 if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
334 for (; nConverted < nSrcChars; ++nConverted)
336 bool bUndefined = true;
337 sal_uInt32 nChar = *pSrcBuf++;
338 if (nHighSurrogate == 0)
340 if (ImplIsHighSurrogate(nChar))
342 nHighSurrogate = (sal_Unicode) nChar;
343 continue;
346 else if (ImplIsLowSurrogate(nChar))
347 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
348 else
350 bUndefined = false;
351 goto bad_input;
354 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
356 bUndefined = false;
357 goto bad_input;
360 if (nChar == 0x0A || nChar == 0x0D) // LF, CR
362 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001)
364 if (pDestBufPtr != pDestBufEnd)
366 *pDestBufPtr++ = 0x0F; // SI
367 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
369 else
370 goto no_output;
372 if (pDestBufPtr != pDestBufEnd)
373 *pDestBufPtr++ = static_cast< char >(nChar);
374 else
375 goto no_output;
377 else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B)
378 goto bad_input;
379 else if (nChar < 0x80)
381 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001)
383 if (pDestBufPtr != pDestBufEnd)
385 *pDestBufPtr++ = 0x0F; // SI
386 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
388 else
389 goto no_output;
391 if (pDestBufPtr != pDestBufEnd)
392 *pDestBufPtr++ = static_cast< char >(nChar);
393 else
394 goto no_output;
396 else
398 sal_uInt16 nBytes = 0;
399 sal_uInt32 nIndex1 = nChar >> 8;
400 if (nIndex1 < 0x100)
402 sal_uInt32 nIndex2 = nChar & 0xFF;
403 sal_uInt32 nFirst = pKsX1001Data[nIndex1].mnLowStart;
404 if (nIndex2 >= nFirst
405 && nIndex2 <= pKsX1001Data[nIndex1].mnLowEnd)
406 nBytes = pKsX1001Data[nIndex1].
407 mpToUniTrailTab[nIndex2 - nFirst];
409 if (nBytes != 0)
411 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII)
413 if (pDestBufPtr != pDestBufEnd)
415 *pDestBufPtr++ = 0x0E; // SO
416 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_1001;
418 else
419 goto no_output;
421 if (pDestBufEnd - pDestBufPtr >= 2)
423 *pDestBufPtr++ = static_cast< char >((nBytes >> 8) & 0x7F);
424 *pDestBufPtr++ = static_cast< char >(nBytes & 0x7F);
426 else
427 goto no_output;
429 else
430 goto bad_input;
432 nHighSurrogate = 0;
433 continue;
435 bad_input:
436 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
437 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
438 &nInfo, "\x0F" /* SI */,
439 eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ? 0 : 1,
440 &bWritten))
442 case sal::detail::textenc::BAD_INPUT_STOP:
443 nHighSurrogate = 0;
444 break;
446 case sal::detail::textenc::BAD_INPUT_CONTINUE:
447 if (bWritten)
448 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
449 nHighSurrogate = 0;
450 continue;
452 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
453 goto no_output;
455 break;
457 no_output:
458 --pSrcBuf;
459 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
460 break;
463 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
464 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
465 == 0)
467 bool bFlush = true;
468 if (nHighSurrogate != 0)
470 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
471 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
472 else
473 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
474 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
475 "\x0F" /* SI */,
476 (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
477 ? 0 : 1),
478 &bWritten))
480 case sal::detail::textenc::BAD_INPUT_STOP:
481 nHighSurrogate = 0;
482 bFlush = false;
483 break;
485 case sal::detail::textenc::BAD_INPUT_CONTINUE:
486 if (bWritten)
487 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
488 nHighSurrogate = 0;
489 break;
491 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
492 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
493 break;
496 if (bFlush
497 && eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
498 && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
500 if (pDestBufPtr != pDestBufEnd)
502 *pDestBufPtr++ = 0x0F; // SI
503 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
505 else
506 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
510 if (pContext)
512 static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_nHighSurrogate
513 = nHighSurrogate;
514 static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_eSet = eSet;
516 if (pInfo)
517 *pInfo = nInfo;
518 if (pSrcCvtChars)
519 *pSrcCvtChars = nConverted;
521 return pDestBufPtr - pDestBuf;
524 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */