Version 6.4.0.3, tag libreoffice-6.4.0.3
[LibreOffice.git] / sal / textenc / convertsinglebytetobmpunicode.cxx
blob6dc7891a30012a808da604d47225c4464d0b7ff8
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <cassert>
23 #include <cstddef>
25 #include <rtl/character.hxx>
26 #include <rtl/textcvt.h>
27 #include <sal/types.h>
29 #include "context.hxx"
30 #include "converter.hxx"
31 #include "convertsinglebytetobmpunicode.hxx"
32 #include "unichars.hxx"
34 sal_Size rtl_textenc_convertSingleByteToBmpUnicode(
35 void const * data, SAL_UNUSED_PARAMETER void *, sal_Char const * srcBuf,
36 sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars,
37 sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes)
39 sal_Unicode const * map = static_cast<
40 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
41 data)->byteToUnicode;
42 sal_uInt32 infoFlags = 0;
43 sal_Size converted = 0;
44 sal_Unicode * destBufPtr = destBuf;
45 sal_Unicode * destBufEnd = destBuf + destChars;
46 for (; converted < srcBytes; ++converted) {
47 sal_Char b = *srcBuf++;
48 sal_Unicode c = map[static_cast< sal_uInt8 >(b)];
49 if (c == 0xFFFF) {
50 goto bad_input;
52 if (destBufEnd - destBufPtr < 1) {
53 goto no_output;
55 *destBufPtr++ = c;
56 continue;
57 bad_input:
58 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
59 true/*undefined*/, false, b, flags, &destBufPtr, destBufEnd,
60 &infoFlags))
62 case sal::detail::textenc::BAD_INPUT_STOP:
63 if ((flags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
64 ++converted;
66 break;
68 case sal::detail::textenc::BAD_INPUT_CONTINUE:
69 continue;
71 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
72 goto no_output;
74 break;
75 no_output:
76 --srcBuf;
77 infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
78 break;
80 if (info != nullptr) {
81 *info = infoFlags;
83 if (srcCvtBytes != nullptr) {
84 *srcCvtBytes = converted;
86 return destBufPtr - destBuf;
89 sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
90 void const * data, void * context,
91 sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf,
92 sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info,
93 sal_Size * srcCvtChars)
95 std::size_t entries = static_cast<
96 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
97 data)->unicodeToByteEntries;
98 rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast<
99 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
100 data)->unicodeToByte;
101 sal_Unicode highSurrogate = 0;
102 sal_uInt32 infoFlags = 0;
103 sal_Size converted = 0;
104 sal_Char * destBufPtr = destBuf;
105 sal_Char * destBufEnd = destBuf + destBytes;
106 if (context != nullptr) {
107 highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)->
108 m_nHighSurrogate;
110 for (; converted < srcChars; ++converted) {
111 bool undefined = true;
112 sal_uInt32 c = *srcBuf++;
113 if (highSurrogate == 0) {
114 if (ImplIsHighSurrogate(c)) {
115 highSurrogate = static_cast< sal_Unicode >(c);
116 continue;
118 else if (ImplIsLowSurrogate(c))
120 undefined = false;
121 goto bad_input;
123 } else if (ImplIsLowSurrogate(c)) {
124 c = ImplCombineSurrogates(highSurrogate, c);
125 } else {
126 undefined = false;
127 goto bad_input;
129 assert(rtl::isUnicodeScalarValue(c));
130 // Linearly searching through the ranges if probably fastest, assuming
131 // that most converted characters belong to the ASCII subset:
132 for (std::size_t i = 0; i < entries; ++i) {
133 if (c < ranges[i].unicode) {
134 break;
136 if (c <= sal::static_int_cast< sal_uInt32 >(
137 ranges[i].unicode + ranges[i].range))
139 if (destBufEnd - destBufPtr < 1) {
140 goto no_output;
142 *destBufPtr++ = static_cast< sal_Char >(
143 ranges[i].byte + (c - ranges[i].unicode));
144 goto done;
147 goto bad_input;
148 done:
149 highSurrogate = 0;
150 continue;
151 bad_input:
152 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
153 undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, nullptr,
154 0, nullptr))
156 case sal::detail::textenc::BAD_INPUT_STOP:
157 highSurrogate = 0;
158 break;
160 case sal::detail::textenc::BAD_INPUT_CONTINUE:
161 highSurrogate = 0;
162 continue;
164 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
165 goto no_output;
167 break;
168 no_output:
169 --srcBuf;
170 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
171 break;
173 if (highSurrogate != 0
174 && ((infoFlags
175 & (RTL_UNICODETOTEXT_INFO_ERROR
176 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
177 == 0))
179 if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) {
180 infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
181 } else {
182 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
183 false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, nullptr,
184 0, nullptr))
186 case sal::detail::textenc::BAD_INPUT_STOP:
187 case sal::detail::textenc::BAD_INPUT_CONTINUE:
188 highSurrogate = 0;
189 break;
191 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
192 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
193 break;
197 if (context != nullptr) {
198 static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate
199 = highSurrogate;
201 if (info != nullptr) {
202 *info = infoFlags;
204 if (srcCvtChars != nullptr) {
205 *srcCvtChars = converted;
207 return destBufPtr - destBuf;
210 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */