Update ooo320-m1
[ooovba.git] / sal / textenc / convertsinglebytetobmpunicode.cxx
blob2c04250c620a3da713ef13ac6310966fda349ed1
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: convertsinglebytetobmpunicode.cxx,v $
10 * $Revision: 1.6 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_sal.hxx"
34 #include "context.h"
35 #include "converter.h"
36 #include "convertsinglebytetobmpunicode.hxx"
37 #include "unichars.h"
39 #include "osl/diagnose.h"
40 #include "rtl/textcvt.h"
41 #include "sal/types.h"
43 #include <cstddef>
45 sal_Size rtl_textenc_convertSingleByteToBmpUnicode(
46 ImplTextConverterData const * data, void *, sal_Char const * srcBuf,
47 sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars,
48 sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes)
50 sal_Unicode const * map = static_cast<
51 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
52 data)->byteToUnicode;
53 sal_uInt32 infoFlags = 0;
54 sal_Size converted = 0;
55 sal_Unicode * destBufPtr = destBuf;
56 sal_Unicode * destBufEnd = destBuf + destChars;
57 for (; converted < srcBytes; ++converted) {
58 bool undefined = true;
59 sal_Char b = *srcBuf++;
60 sal_Unicode c = map[static_cast< sal_uInt8 >(b)];
61 if (c == 0xFFFF) {
62 goto bad_input;
64 if (destBufEnd - destBufPtr < 1) {
65 goto no_output;
67 *destBufPtr++ = c;
68 continue;
69 bad_input:
70 switch (ImplHandleBadInputTextToUnicodeConversion(
71 undefined, false, b, flags, &destBufPtr, destBufEnd,
72 &infoFlags))
74 case IMPL_BAD_INPUT_STOP:
75 break;
77 case IMPL_BAD_INPUT_CONTINUE:
78 continue;
80 case IMPL_BAD_INPUT_NO_OUTPUT:
81 goto no_output;
83 break;
84 no_output:
85 --srcBuf;
86 infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
87 break;
89 if (info != 0) {
90 *info = infoFlags;
92 if (srcCvtBytes != 0) {
93 *srcCvtBytes = converted;
95 return destBufPtr - destBuf;
98 sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
99 ImplTextConverterData const * data, void * context,
100 sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf,
101 sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info,
102 sal_Size * srcCvtChars)
104 std::size_t entries = static_cast<
105 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
106 data)->unicodeToByteEntries;
107 rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast<
108 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
109 data)->unicodeToByte;
110 sal_Unicode highSurrogate = 0;
111 sal_uInt32 infoFlags = 0;
112 sal_Size converted = 0;
113 sal_Char * destBufPtr = destBuf;
114 sal_Char * destBufEnd = destBuf + destBytes;
115 if (context != 0) {
116 highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)->
117 m_nHighSurrogate;
119 for (; converted < srcChars; ++converted) {
120 bool undefined = true;
121 sal_uInt32 c = *srcBuf++;
122 if (highSurrogate == 0) {
123 if (ImplIsHighSurrogate(c)) {
124 highSurrogate = static_cast< sal_Unicode >(c);
125 continue;
127 } else if (ImplIsLowSurrogate(c)) {
128 c = ImplCombineSurrogates(highSurrogate, c);
129 } else {
130 undefined = false;
131 goto bad_input;
133 if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) {
134 undefined = false;
135 goto bad_input;
137 // Linearly searching through the ranges if probably fastest, assuming
138 // that most converted characters belong to the ASCII subset:
139 for (std::size_t i = 0; i < entries; ++i) {
140 if (c < ranges[i].unicode) {
141 break;
142 } else if (c <= sal::static_int_cast< sal_uInt32 >(
143 ranges[i].unicode + ranges[i].range))
145 if (destBufEnd - destBufPtr < 1) {
146 goto no_output;
148 *destBufPtr++ = static_cast< sal_Char >(
149 ranges[i].byte + (c - ranges[i].unicode));
150 goto done;
153 goto bad_input;
154 done:
155 highSurrogate = 0;
156 continue;
157 bad_input:
158 switch (ImplHandleBadInputUnicodeToTextConversion(
159 undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
160 0, 0))
162 case IMPL_BAD_INPUT_STOP:
163 highSurrogate = 0;
164 break;
166 case IMPL_BAD_INPUT_CONTINUE:
167 highSurrogate = 0;
168 continue;
170 case IMPL_BAD_INPUT_NO_OUTPUT:
171 goto no_output;
173 break;
174 no_output:
175 --srcBuf;
176 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
177 break;
179 if (highSurrogate != 0
180 && ((infoFlags
181 & (RTL_UNICODETOTEXT_INFO_ERROR
182 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
183 == 0))
185 if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) {
186 infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
187 } else {
188 switch (ImplHandleBadInputUnicodeToTextConversion(
189 false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
190 0, 0))
192 case IMPL_BAD_INPUT_STOP:
193 case IMPL_BAD_INPUT_CONTINUE:
194 highSurrogate = 0;
195 break;
197 case IMPL_BAD_INPUT_NO_OUTPUT:
198 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
199 break;
203 if (context != 0) {
204 static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate
205 = highSurrogate;
207 if (info != 0) {
208 *info = infoFlags;
210 if (srcCvtChars != 0) {
211 *srcCvtChars = converted;
213 return destBufPtr - destBuf;