Version 4.2.0.1, tag libreoffice-4.2.0.1
[LibreOffice.git] / sal / textenc / convertsinglebytetobmpunicode.cxx
blobfb522877cf03946a8ca32d9859e5075040c628bd
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #include <cstddef>
24 #include "osl/diagnose.h"
25 #include "rtl/textcvt.h"
26 #include "sal/types.h"
28 #include "context.hxx"
29 #include "converter.hxx"
30 #include "convertsinglebytetobmpunicode.hxx"
31 #include "unichars.hxx"
33 sal_Size rtl_textenc_convertSingleByteToBmpUnicode(
34 void const * data, SAL_UNUSED_PARAMETER void *, sal_Char const * srcBuf,
35 sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars,
36 sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes)
38 sal_Unicode const * map = static_cast<
39 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
40 data)->byteToUnicode;
41 sal_uInt32 infoFlags = 0;
42 sal_Size converted = 0;
43 sal_Unicode * destBufPtr = destBuf;
44 sal_Unicode * destBufEnd = destBuf + destChars;
45 for (; converted < srcBytes; ++converted) {
46 bool undefined = true;
47 sal_Char b = *srcBuf++;
48 sal_Unicode c = map[static_cast< sal_uInt8 >(b)];
49 if (c == 0xFFFF) {
50 goto bad_input;
52 if (destBufEnd - destBufPtr < 1) {
53 goto no_output;
55 *destBufPtr++ = c;
56 continue;
57 bad_input:
58 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
59 undefined, false, b, flags, &destBufPtr, destBufEnd,
60 &infoFlags))
62 case sal::detail::textenc::BAD_INPUT_STOP:
63 break;
65 case sal::detail::textenc::BAD_INPUT_CONTINUE:
66 continue;
68 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
69 goto no_output;
71 break;
72 no_output:
73 --srcBuf;
74 infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
75 break;
77 if (info != 0) {
78 *info = infoFlags;
80 if (srcCvtBytes != 0) {
81 *srcCvtBytes = converted;
83 return destBufPtr - destBuf;
86 sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
87 void const * data, void * context,
88 sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf,
89 sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info,
90 sal_Size * srcCvtChars)
92 std::size_t entries = static_cast<
93 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
94 data)->unicodeToByteEntries;
95 rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast<
96 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
97 data)->unicodeToByte;
98 sal_Unicode highSurrogate = 0;
99 sal_uInt32 infoFlags = 0;
100 sal_Size converted = 0;
101 sal_Char * destBufPtr = destBuf;
102 sal_Char * destBufEnd = destBuf + destBytes;
103 if (context != 0) {
104 highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)->
105 m_nHighSurrogate;
107 for (; converted < srcChars; ++converted) {
108 bool undefined = true;
109 sal_uInt32 c = *srcBuf++;
110 if (highSurrogate == 0) {
111 if (ImplIsHighSurrogate(c)) {
112 highSurrogate = static_cast< sal_Unicode >(c);
113 continue;
115 } else if (ImplIsLowSurrogate(c)) {
116 c = ImplCombineSurrogates(highSurrogate, c);
117 } else {
118 undefined = false;
119 goto bad_input;
121 if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) {
122 undefined = false;
123 goto bad_input;
125 // Linearly searching through the ranges if probably fastest, assuming
126 // that most converted characters belong to the ASCII subset:
127 for (std::size_t i = 0; i < entries; ++i) {
128 if (c < ranges[i].unicode) {
129 break;
130 } else if (c <= sal::static_int_cast< sal_uInt32 >(
131 ranges[i].unicode + ranges[i].range))
133 if (destBufEnd - destBufPtr < 1) {
134 goto no_output;
136 *destBufPtr++ = static_cast< sal_Char >(
137 ranges[i].byte + (c - ranges[i].unicode));
138 goto done;
141 goto bad_input;
142 done:
143 highSurrogate = 0;
144 continue;
145 bad_input:
146 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
147 undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
148 0, 0))
150 case sal::detail::textenc::BAD_INPUT_STOP:
151 highSurrogate = 0;
152 break;
154 case sal::detail::textenc::BAD_INPUT_CONTINUE:
155 highSurrogate = 0;
156 continue;
158 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
159 goto no_output;
161 break;
162 no_output:
163 --srcBuf;
164 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
165 break;
167 if (highSurrogate != 0
168 && ((infoFlags
169 & (RTL_UNICODETOTEXT_INFO_ERROR
170 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
171 == 0))
173 if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) {
174 infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
175 } else {
176 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
177 false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
178 0, 0))
180 case sal::detail::textenc::BAD_INPUT_STOP:
181 case sal::detail::textenc::BAD_INPUT_CONTINUE:
182 highSurrogate = 0;
183 break;
185 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
186 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
187 break;
191 if (context != 0) {
192 static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate
193 = highSurrogate;
195 if (info != 0) {
196 *info = infoFlags;
198 if (srcCvtChars != 0) {
199 *srcCvtChars = converted;
201 return destBufPtr - destBuf;
204 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */