bump product version to 5.0.4.1
[LibreOffice.git] / sal / textenc / convertsinglebytetobmpunicode.cxx
blob8464b82328c20f294dd6a89f58be358c9743a318
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include <cstddef>
24 #include "rtl/textcvt.h"
25 #include "sal/types.h"
27 #include "context.hxx"
28 #include "converter.hxx"
29 #include "convertsinglebytetobmpunicode.hxx"
30 #include "unichars.hxx"
32 sal_Size rtl_textenc_convertSingleByteToBmpUnicode(
33 void const * data, SAL_UNUSED_PARAMETER void *, sal_Char const * srcBuf,
34 sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars,
35 sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes)
37 sal_Unicode const * map = static_cast<
38 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
39 data)->byteToUnicode;
40 sal_uInt32 infoFlags = 0;
41 sal_Size converted = 0;
42 sal_Unicode * destBufPtr = destBuf;
43 sal_Unicode * destBufEnd = destBuf + destChars;
44 for (; converted < srcBytes; ++converted) {
45 bool undefined = true;
46 sal_Char b = *srcBuf++;
47 sal_Unicode c = map[static_cast< sal_uInt8 >(b)];
48 if (c == 0xFFFF) {
49 goto bad_input;
51 if (destBufEnd - destBufPtr < 1) {
52 goto no_output;
54 *destBufPtr++ = c;
55 continue;
56 bad_input:
57 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
58 undefined, false, b, flags, &destBufPtr, destBufEnd,
59 &infoFlags))
61 case sal::detail::textenc::BAD_INPUT_STOP:
62 break;
64 case sal::detail::textenc::BAD_INPUT_CONTINUE:
65 continue;
67 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
68 goto no_output;
70 break;
71 no_output:
72 --srcBuf;
73 infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
74 break;
76 if (info != 0) {
77 *info = infoFlags;
79 if (srcCvtBytes != 0) {
80 *srcCvtBytes = converted;
82 return destBufPtr - destBuf;
85 sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
86 void const * data, void * context,
87 sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf,
88 sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info,
89 sal_Size * srcCvtChars)
91 std::size_t entries = static_cast<
92 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
93 data)->unicodeToByteEntries;
94 rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast<
95 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
96 data)->unicodeToByte;
97 sal_Unicode highSurrogate = 0;
98 sal_uInt32 infoFlags = 0;
99 sal_Size converted = 0;
100 sal_Char * destBufPtr = destBuf;
101 sal_Char * destBufEnd = destBuf + destBytes;
102 if (context != 0) {
103 highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)->
104 m_nHighSurrogate;
106 for (; converted < srcChars; ++converted) {
107 bool undefined = true;
108 sal_uInt32 c = *srcBuf++;
109 if (highSurrogate == 0) {
110 if (ImplIsHighSurrogate(c)) {
111 highSurrogate = static_cast< sal_Unicode >(c);
112 continue;
114 } else if (ImplIsLowSurrogate(c)) {
115 c = ImplCombineSurrogates(highSurrogate, c);
116 } else {
117 undefined = false;
118 goto bad_input;
120 if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) {
121 undefined = false;
122 goto bad_input;
124 // Linearly searching through the ranges if probably fastest, assuming
125 // that most converted characters belong to the ASCII subset:
126 for (std::size_t i = 0; i < entries; ++i) {
127 if (c < ranges[i].unicode) {
128 break;
129 } else if (c <= sal::static_int_cast< sal_uInt32 >(
130 ranges[i].unicode + ranges[i].range))
132 if (destBufEnd - destBufPtr < 1) {
133 goto no_output;
135 *destBufPtr++ = static_cast< sal_Char >(
136 ranges[i].byte + (c - ranges[i].unicode));
137 goto done;
140 goto bad_input;
141 done:
142 highSurrogate = 0;
143 continue;
144 bad_input:
145 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
146 undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
147 0, 0))
149 case sal::detail::textenc::BAD_INPUT_STOP:
150 highSurrogate = 0;
151 break;
153 case sal::detail::textenc::BAD_INPUT_CONTINUE:
154 highSurrogate = 0;
155 continue;
157 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
158 goto no_output;
160 break;
161 no_output:
162 --srcBuf;
163 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
164 break;
166 if (highSurrogate != 0
167 && ((infoFlags
168 & (RTL_UNICODETOTEXT_INFO_ERROR
169 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
170 == 0))
172 if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) {
173 infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
174 } else {
175 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
176 false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
177 0, 0))
179 case sal::detail::textenc::BAD_INPUT_STOP:
180 case sal::detail::textenc::BAD_INPUT_CONTINUE:
181 highSurrogate = 0;
182 break;
184 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
185 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
186 break;
190 if (context != 0) {
191 static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate
192 = highSurrogate;
194 if (info != 0) {
195 *info = infoFlags;
197 if (srcCvtChars != 0) {
198 *srcCvtChars = converted;
200 return destBufPtr - destBuf;
203 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */