Version 4.2.0.1, tag libreoffice-4.2.0.1
[LibreOffice.git] / sal / textenc / convertiso2022kr.cxx
blob50e209b5649e53aaedd9e53af83ed76cc449d317
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include "rtl/textcvt.h"
23 #include "sal/types.h"
25 #include "context.hxx"
26 #include "converter.hxx"
27 #include "convertiso2022kr.hxx"
28 #include "tenchelp.hxx"
29 #include "unichars.hxx"
31 namespace {
33 enum ImplIso2022KrToUnicodeState // order is important:
35 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII,
36 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001,
37 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2,
38 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC,
39 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR,
40 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN
43 struct ImplIso2022KrToUnicodeContext
45 ImplIso2022KrToUnicodeState m_eState;
46 sal_uInt32 m_nRow;
49 enum ImplUnicodeToIso2022KrSet
51 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE,
52 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII,
53 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
56 struct ImplUnicodeToIso2022KrContext
58 sal_Unicode m_nHighSurrogate;
59 ImplUnicodeToIso2022KrSet m_eSet;
64 void * ImplCreateIso2022KrToUnicodeContext()
66 ImplIso2022KrToUnicodeContext * pContext =
67 new ImplIso2022KrToUnicodeContext;
68 pContext->m_eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
69 return pContext;
72 void ImplResetIso2022KrToUnicodeContext(void * pContext)
74 if (pContext)
75 static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_eState
76 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
79 void ImplDestroyIso2022KrToUnicodeContext(void * pContext)
81 delete static_cast< ImplIso2022KrToUnicodeContext * >(pContext);
84 sal_Size ImplConvertIso2022KrToUnicode(void const * pData,
85 void * pContext,
86 char const * pSrcBuf,
87 sal_Size nSrcBytes,
88 sal_Unicode * pDestBuf,
89 sal_Size nDestChars,
90 sal_uInt32 nFlags,
91 sal_uInt32 * pInfo,
92 sal_Size * pSrcCvtBytes)
94 ImplDBCSToUniLeadTab const * pKsX1001Data
95 = static_cast< ImplIso2022KrConverterData const * >(pData)->
96 m_pKsX1001ToUnicodeData;
97 ImplIso2022KrToUnicodeState eState
98 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
99 sal_uInt32 nRow = 0;
100 sal_uInt32 nInfo = 0;
101 sal_Size nConverted = 0;
102 sal_Unicode * pDestBufPtr = pDestBuf;
103 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
105 if (pContext)
107 eState = static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_eState;
108 nRow = static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_nRow;
111 for (; nConverted < nSrcBytes; ++nConverted)
113 bool bUndefined = true;
114 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
115 switch (eState)
117 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII:
118 if (nChar == 0x0E) // SO
119 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001;
120 else if (nChar == 0x1B) // ESC
121 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC;
122 else if (nChar < 0x80)
123 if (pDestBufPtr != pDestBufEnd)
124 *pDestBufPtr++ = (sal_Unicode) nChar;
125 else
126 goto no_output;
127 else
129 bUndefined = false;
130 goto bad_input;
132 break;
134 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001:
135 if (nChar == 0x0F) // SI
136 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
137 else if (nChar >= 0x21 && nChar <= 0x7E)
139 nRow = nChar + 0x80;
140 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2;
142 else
144 bUndefined = false;
145 goto bad_input;
147 break;
149 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2:
150 if (nChar >= 0x21 && nChar <= 0x7E)
152 sal_uInt16 nUnicode = 0;
153 sal_uInt32 nFirst = pKsX1001Data[nRow].mnTrailStart;
154 nChar += 0x80;
155 if (nChar >= nFirst && nChar <= pKsX1001Data[nRow].mnTrailEnd)
156 nUnicode = pKsX1001Data[nRow].
157 mpToUniTrailTab[nChar - nFirst];
158 if (nUnicode != 0)
159 if (pDestBufPtr != pDestBufEnd)
161 *pDestBufPtr++ = (sal_Unicode) nUnicode;
162 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001;
164 else
165 goto no_output;
166 else
167 goto bad_input;
169 else
171 bUndefined = false;
172 goto bad_input;
174 break;
176 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC:
177 if (nChar == 0x24) // $
178 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR;
179 else
181 bUndefined = false;
182 goto bad_input;
184 break;
186 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR:
187 if (nChar == 0x29) // )
188 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN;
189 else
191 bUndefined = false;
192 goto bad_input;
194 break;
196 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN:
197 if (nChar == 0x43) // C
198 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
199 else
201 bUndefined = false;
202 goto bad_input;
204 break;
206 continue;
208 bad_input:
209 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
210 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
211 &nInfo))
213 case sal::detail::textenc::BAD_INPUT_STOP:
214 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
215 break;
217 case sal::detail::textenc::BAD_INPUT_CONTINUE:
218 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
219 continue;
221 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
222 goto no_output;
224 break;
226 no_output:
227 --pSrcBuf;
228 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
229 break;
232 if (eState > IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001
233 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
234 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
235 == 0)
237 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
238 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
239 else
240 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
241 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
242 &nInfo))
244 case sal::detail::textenc::BAD_INPUT_STOP:
245 case sal::detail::textenc::BAD_INPUT_CONTINUE:
246 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII;
247 break;
249 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
250 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
251 break;
255 if (pContext)
257 static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_eState = eState;
258 static_cast< ImplIso2022KrToUnicodeContext * >(pContext)->m_nRow = nRow;
260 if (pInfo)
261 *pInfo = nInfo;
262 if (pSrcCvtBytes)
263 *pSrcCvtBytes = nConverted;
265 return pDestBufPtr - pDestBuf;
268 void * ImplCreateUnicodeToIso2022KrContext()
270 ImplUnicodeToIso2022KrContext * pContext =
271 new ImplUnicodeToIso2022KrContext;
272 pContext->m_nHighSurrogate = 0;
273 pContext->m_eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
274 return pContext;
277 void ImplResetUnicodeToIso2022KrContext(void * pContext)
279 if (pContext)
281 static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_nHighSurrogate = 0;
282 static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_eSet
283 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
287 void ImplDestroyUnicodeToIso2022KrContext(void * pContext)
289 delete static_cast< ImplUnicodeToIso2022KrContext * >(pContext);
292 sal_Size ImplConvertUnicodeToIso2022Kr(void const * pData,
293 void * pContext,
294 sal_Unicode const * pSrcBuf,
295 sal_Size nSrcChars,
296 char * pDestBuf,
297 sal_Size nDestBytes,
298 sal_uInt32 nFlags,
299 sal_uInt32 * pInfo,
300 sal_Size * pSrcCvtChars)
302 ImplUniToDBCSHighTab const * pKsX1001Data
303 = static_cast< ImplIso2022KrConverterData const * >(pData)->
304 m_pUnicodeToKsX1001Data;
305 sal_Unicode nHighSurrogate = 0;
306 ImplUnicodeToIso2022KrSet eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE;
307 sal_uInt32 nInfo = 0;
308 sal_Size nConverted = 0;
309 char * pDestBufPtr = pDestBuf;
310 char * pDestBufEnd = pDestBuf + nDestBytes;
311 bool bWritten;
313 if (pContext)
315 nHighSurrogate
316 = static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_nHighSurrogate;
317 eSet = static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_eSet;
320 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE)
322 if (pDestBufEnd - pDestBufPtr >= 4)
324 *pDestBufPtr++ = 0x1B; // ESC
325 *pDestBufPtr++ = 0x24; // $
326 *pDestBufPtr++ = 0x29; // )
327 *pDestBufPtr++ = 0x43; // C
328 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
330 else
331 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
334 if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
335 for (; nConverted < nSrcChars; ++nConverted)
337 bool bUndefined = true;
338 sal_uInt32 nChar = *pSrcBuf++;
339 if (nHighSurrogate == 0)
341 if (ImplIsHighSurrogate(nChar))
343 nHighSurrogate = (sal_Unicode) nChar;
344 continue;
347 else if (ImplIsLowSurrogate(nChar))
348 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
349 else
351 bUndefined = false;
352 goto bad_input;
355 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
357 bUndefined = false;
358 goto bad_input;
361 if (nChar == 0x0A || nChar == 0x0D) // LF, CR
363 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001)
365 if (pDestBufPtr != pDestBufEnd)
367 *pDestBufPtr++ = 0x0F; // SI
368 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
370 else
371 goto no_output;
373 if (pDestBufPtr != pDestBufEnd)
374 *pDestBufPtr++ = static_cast< char >(nChar);
375 else
376 goto no_output;
378 else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B)
379 goto bad_input;
380 else if (nChar < 0x80)
382 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001)
384 if (pDestBufPtr != pDestBufEnd)
386 *pDestBufPtr++ = 0x0F; // SI
387 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
389 else
390 goto no_output;
392 if (pDestBufPtr != pDestBufEnd)
393 *pDestBufPtr++ = static_cast< char >(nChar);
394 else
395 goto no_output;
397 else
399 sal_uInt16 nBytes = 0;
400 sal_uInt32 nIndex1 = nChar >> 8;
401 if (nIndex1 < 0x100)
403 sal_uInt32 nIndex2 = nChar & 0xFF;
404 sal_uInt32 nFirst = pKsX1001Data[nIndex1].mnLowStart;
405 if (nIndex2 >= nFirst
406 && nIndex2 <= pKsX1001Data[nIndex1].mnLowEnd)
407 nBytes = pKsX1001Data[nIndex1].
408 mpToUniTrailTab[nIndex2 - nFirst];
410 if (nBytes != 0)
412 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII)
414 if (pDestBufPtr != pDestBufEnd)
416 *pDestBufPtr++ = 0x0E; // SO
417 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_1001;
419 else
420 goto no_output;
422 if (pDestBufEnd - pDestBufPtr >= 2)
424 *pDestBufPtr++ = static_cast< char >((nBytes >> 8) & 0x7F);
425 *pDestBufPtr++ = static_cast< char >(nBytes & 0x7F);
427 else
428 goto no_output;
430 else
431 goto bad_input;
433 nHighSurrogate = 0;
434 continue;
436 bad_input:
437 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
438 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
439 &nInfo, "\x0F" /* SI */,
440 eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ? 0 : 1,
441 &bWritten))
443 case sal::detail::textenc::BAD_INPUT_STOP:
444 nHighSurrogate = 0;
445 break;
447 case sal::detail::textenc::BAD_INPUT_CONTINUE:
448 if (bWritten)
449 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
450 nHighSurrogate = 0;
451 continue;
453 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
454 goto no_output;
456 break;
458 no_output:
459 --pSrcBuf;
460 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
461 break;
464 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
465 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
466 == 0)
468 bool bFlush = true;
469 if (nHighSurrogate != 0)
471 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
472 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
473 else
474 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
475 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
476 "\x0F" /* SI */,
477 (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII
478 ? 0 : 1),
479 &bWritten))
481 case sal::detail::textenc::BAD_INPUT_STOP:
482 nHighSurrogate = 0;
483 bFlush = false;
484 break;
486 case sal::detail::textenc::BAD_INPUT_CONTINUE:
487 if (bWritten)
488 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
489 nHighSurrogate = 0;
490 break;
492 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
493 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
494 break;
497 if (bFlush
498 && eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001
499 && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
501 if (pDestBufPtr != pDestBufEnd)
503 *pDestBufPtr++ = 0x0F; // SI
504 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII;
506 else
507 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
511 if (pContext)
513 static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_nHighSurrogate
514 = nHighSurrogate;
515 static_cast< ImplUnicodeToIso2022KrContext * >(pContext)->m_eSet = eSet;
517 if (pInfo)
518 *pInfo = nInfo;
519 if (pSrcCvtChars)
520 *pSrcCvtChars = nConverted;
522 return pDestBufPtr - pDestBuf;
525 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */