1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include "converter.hxx"
11 #include "unichars.hxx"
12 #include "convertisciidevangari.hxx"
13 #include "convertsinglebytetobmpunicode.hxx"
14 #include <rtl/textcvt.h>
16 using namespace sal::detail::textenc
;
17 using namespace rtl::textenc
;
19 struct IsciiDevanagariToUnicode
21 sal_uInt8 m_cPrevChar
;
22 IsciiDevanagariToUnicode()
30 sal_Size
convert(char const* pSrcBuf
, sal_Size nSrcBytes
,
31 sal_Unicode
* pDestBuf
, sal_Size nDestChars
, sal_uInt32 nFlags
,
32 sal_uInt32
* pInfo
, sal_Size
* pSrcCvtBytes
);
35 struct UnicodeToIsciiDevanagari
37 sal_Unicode m_cPrevChar
;
38 sal_Unicode m_cHighSurrogate
;
39 UnicodeToIsciiDevanagari()
49 sal_Size
convert(sal_Unicode
const* pSrcBuf
, sal_Size nSrcChars
,
50 char* pDestBuf
, sal_Size nDestBytes
, sal_uInt32 nFlags
,
51 sal_uInt32
* pInfo
, sal_Size
* pSrcCvtChars
);
54 static const sal_Unicode IsciiDevanagariMap
[256] =
56 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
57 0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
58 0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
59 0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
60 0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
61 0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
62 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
63 0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
64 0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
65 0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
66 0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
67 0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
68 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
69 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
70 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
71 0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
72 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
73 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
74 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
75 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
76 0xFFFF,0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,
77 0x0909,0x090A,0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,
78 0x0913,0x0914,0x0911,0x0915,0x0916,0x0917,0x0918,0x0919,
79 0x091A,0x091B,0x091C,0x091D,0x091E,0x091F,0x0920,0x0921,
80 0x0922,0x0923,0x0924,0x0925,0x0926,0x0927,0x0928,0x0929,
81 0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,0x095F,0x0930,
82 0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,0x0938,
83 0x0939,0xFFFF,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
84 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,
85 0x094D,0x093C,0x0964,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
86 0xFFFF,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
87 0x096D,0x096E,0x096F,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF
90 sal_Size
IsciiDevanagariToUnicode::convert(
91 char const* pSrcBuf
, sal_Size nSrcBytes
,
92 sal_Unicode
* pDestBuf
, sal_Size nDestChars
, sal_uInt32 nFlags
,
93 sal_uInt32
* pInfo
, sal_Size
* pSrcCvtBytes
)
96 sal_Size nConverted
= 0;
97 sal_Unicode
* pDestBufPtr
= pDestBuf
;
98 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
100 while (nConverted
< nSrcBytes
)
102 if (pDestBufPtr
== pDestBufEnd
)
104 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
108 sal_Unicode cChar
= sal_Unicode();
109 sal_uInt8 nIn
= static_cast<sal_uInt8
>(pSrcBuf
[nConverted
]);
110 sal_uInt8 nNext
= nConverted
< nSrcBytes
+ 1 ? static_cast<sal_uInt8
>(pSrcBuf
[nConverted
+1]) : 0;
112 bool bDouble
= false;
113 //halant + halant E8 E8 -> halant + ZWNJ 094D 200C
114 //halant + nukta E8 E9 halant + ZWJ 094D 200D
115 if (m_cPrevChar
== 0xE8 && nIn
== 0xE8)
120 else if (m_cPrevChar
== 0xE8 && nIn
== 0xE9)
125 else if (nNext
== 0xE9)
184 cChar
= IsciiDevanagariMap
[nIn
];
186 bool bUndefined
= cChar
== 0xffff;
190 BadInputConversionAction eAction
= handleBadInputTextToUnicodeConversion(
191 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
193 if (eAction
== BAD_INPUT_CONTINUE
)
195 if (eAction
== BAD_INPUT_STOP
)
197 assert(eAction
== BAD_INPUT_NO_OUTPUT
);
198 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
205 *pDestBufPtr
++ = cChar
;
206 m_cPrevChar
= bNormal
? nIn
: 0;
212 *pSrcCvtBytes
= nConverted
;
214 return pDestBufPtr
- pDestBuf
;
217 BmpUnicodeToSingleByteRange
const unicodeToISCIIEncoding
[] =
219 { 0x0000, 0x007F - 0x0000, 0x00 }, { 0x0901, 0x0903 - 0x0901, 0xA1 },
220 { 0x0905, 0x090B - 0x0905, 0xA4 }, { 0x090D, 0x090D - 0x090D, 0xAE },
221 { 0x090E, 0x0910 - 0x090E, 0xAB }, { 0x0911, 0x0911 - 0x0911, 0xB2 },
222 { 0x0912, 0x0914 - 0x0912, 0xAF }, { 0x0915, 0x092F - 0x0915, 0xB3 },
223 { 0x0930, 0x0939 - 0x0930, 0xCF }, { 0x093C, 0x093C - 0x093C, 0xE9 },
224 { 0x093E, 0x0943 - 0x093E, 0xDA }, { 0x0945, 0x0945 - 0x0945, 0xE3 },
225 { 0x0946, 0x0948 - 0x0946, 0xE0 }, { 0x0949, 0x0949 - 0x0949, 0xE7 },
226 { 0x094A, 0x094C - 0x094A, 0xE4 }, { 0x094D, 0x094D - 0x094D, 0xE8 },
227 { 0x095F, 0x095F - 0x095F, 0xCE }, { 0x0964, 0x0964 - 0x0964, 0xEA },
228 { 0x0966, 0x096F - 0x0966, 0xF1 }
231 sal_Size
UnicodeToIsciiDevanagari::convert(sal_Unicode
const* pSrcBuf
, sal_Size nSrcChars
,
232 char* pDestBuf
, sal_Size nDestBytes
, sal_uInt32 nFlags
,
233 sal_uInt32
* pInfo
, sal_Size
* pSrcCvtChars
)
235 size_t const entries
= SAL_N_ELEMENTS(unicodeToISCIIEncoding
);
236 BmpUnicodeToSingleByteRange
const * ranges
= unicodeToISCIIEncoding
;
238 sal_Unicode cHighSurrogate
= m_cHighSurrogate
;
239 sal_uInt32 nInfo
= 0;
240 sal_Size nConverted
= 0;
241 sal_Char
* pDestBufPtr
= pDestBuf
;
242 sal_Char
* pDestBufEnd
= pDestBuf
+ nDestBytes
;
243 for (; nConverted
< nSrcChars
; ++nConverted
)
245 bool bUndefined
= true;
246 sal_uInt32 c
= *pSrcBuf
++;
247 sal_Char cSpecialChar
= 0;
248 if (cHighSurrogate
== 0)
250 if (ImplIsHighSurrogate(c
))
252 cHighSurrogate
= static_cast< sal_Unicode
>(c
);
256 else if (ImplIsLowSurrogate(c
))
258 c
= ImplCombineSurrogates(cHighSurrogate
, c
);
265 if (ImplIsLowSurrogate(c
) || ImplIsNoncharacter(c
))
271 //halant + halant E8 E8 -> halant + ZWNJ 094D 200C
272 //halant + nukta E8 E9 halant + ZWJ 094D 200D
273 if (m_cPrevChar
== 0x094D && c
== 0x200C)
274 cSpecialChar
= '\xE8';
275 else if (m_cPrevChar
== 0x094D && c
== 0x200D)
276 cSpecialChar
= '\xE9';
279 if (pDestBufEnd
- pDestBufPtr
< 1)
281 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
284 *pDestBufPtr
++ = cSpecialChar
;
291 cSpecialChar
= '\xA1';
294 cSpecialChar
= '\xA6';
297 cSpecialChar
= '\xA7';
300 cSpecialChar
= '\xAA';
303 cSpecialChar
= '\xB3';
306 cSpecialChar
= '\xB4';
309 cSpecialChar
= '\xB5';
312 cSpecialChar
= '\xBA';
315 cSpecialChar
= '\xBF';
318 cSpecialChar
= '\xC0';
321 cSpecialChar
= '\xC9';
324 cSpecialChar
= '\xDB';
327 cSpecialChar
= '\xDC';
330 cSpecialChar
= '\xDF';
333 cSpecialChar
= '\xEA';
340 if (pDestBufEnd
- pDestBufPtr
< 2)
342 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
345 *pDestBufPtr
++ = cSpecialChar
;
346 *pDestBufPtr
++ = '\xE9';
351 // Linearly searching through the ranges if probably fastest, assuming
352 // that most converted characters belong to the ASCII subset:
353 for (size_t i
= 0; i
< entries
; ++i
)
355 if (c
< ranges
[i
].unicode
)
359 if (c
<= sal::static_int_cast
< sal_uInt32
>(
360 ranges
[i
].unicode
+ ranges
[i
].range
))
362 if (pDestBufEnd
- pDestBufPtr
< 1)
366 *pDestBufPtr
++ = static_cast< sal_Char
>(
367 ranges
[i
].byte
+ (c
- ranges
[i
].unicode
));
377 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
378 bUndefined
, c
, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
, nullptr,
381 case sal::detail::textenc::BAD_INPUT_STOP
:
385 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
389 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
395 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
399 if (cHighSurrogate
!= 0
401 & (RTL_UNICODETOTEXT_INFO_ERROR
402 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
405 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
407 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
411 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
412 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
, nullptr,
415 case sal::detail::textenc::BAD_INPUT_STOP
:
416 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
420 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
421 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
426 m_cHighSurrogate
= cHighSurrogate
;
430 *pSrcCvtChars
= nConverted
;
432 return pDestBufPtr
- pDestBuf
;
435 sal_Size
ImplConvertIsciiDevanagariToUnicode(void const*,
436 void* pContext
, char const* pSrcBuf
, sal_Size nSrcBytes
,
437 sal_Unicode
* pDestBuf
, sal_Size nDestChars
, sal_uInt32 nFlags
,
438 sal_uInt32
* pInfo
, sal_Size
* pSrcCvtBytes
)
440 IsciiDevanagariToUnicode
*pCtx
=
441 static_cast<IsciiDevanagariToUnicode
*>(pContext
);
442 return pCtx
->convert(pSrcBuf
, nSrcBytes
, pDestBuf
, nDestChars
, nFlags
,
443 pInfo
, pSrcCvtBytes
);
446 sal_Size
ImplConvertUnicodeToIsciiDevanagari(void const*,
447 void * pContext
, sal_Unicode
const * pSrcBuf
, sal_Size nSrcChars
,
448 char * pDestBuf
, sal_Size nDestBytes
, sal_uInt32 nFlags
,
449 sal_uInt32
* pInfo
, sal_Size
* pSrcCvtChars
)
451 UnicodeToIsciiDevanagari
*pCtx
=
452 static_cast<UnicodeToIsciiDevanagari
*>(pContext
);
453 return pCtx
->convert(pSrcBuf
, nSrcChars
,
454 pDestBuf
, nDestBytes
, nFlags
, pInfo
, pSrcCvtChars
);
457 void *ImplCreateIsciiDevanagariToUnicodeContext()
459 return new IsciiDevanagariToUnicode
;
462 void ImplDestroyIsciiDevanagariToUnicodeContext(void * pContext
)
464 IsciiDevanagariToUnicode
*pCtx
=
465 static_cast<IsciiDevanagariToUnicode
*>(pContext
);
469 void ImplResetIsciiDevanagariToUnicodeContext(void * pContext
)
471 IsciiDevanagariToUnicode
*pCtx
=
472 static_cast<IsciiDevanagariToUnicode
*>(pContext
);
476 void *ImplCreateUnicodeToIsciiDevanagariContext()
478 return new UnicodeToIsciiDevanagari
;
481 void ImplResetUnicodeToIsciiDevanagariContext(void * pContext
)
483 UnicodeToIsciiDevanagari
*pCtx
=
484 static_cast<UnicodeToIsciiDevanagari
*>(pContext
);
488 void ImplDestroyUnicodeToIsciiDevanagariContext(void * pContext
)
490 UnicodeToIsciiDevanagari
*pCtx
=
491 static_cast<UnicodeToIsciiDevanagari
*>(pContext
);
495 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */