1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "ppapi/shared_impl/private/ppb_char_set_shared.h"
9 #include "base/i18n/icu_string_conversions.h"
10 #include "ppapi/c/dev/ppb_memory_dev.h"
11 #include "ppapi/thunk/thunk.h"
12 #include "third_party/icu/source/common/unicode/ucnv.h"
13 #include "third_party/icu/source/common/unicode/ucnv_cb.h"
14 #include "third_party/icu/source/common/unicode/ucnv_err.h"
15 #include "third_party/icu/source/common/unicode/ustring.h"
21 PP_CharSet_Trusted_ConversionError
DeprecatedToConversionError(
22 PP_CharSet_ConversionError on_error
) {
24 case PP_CHARSET_CONVERSIONERROR_SKIP
:
25 return PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP
;
26 case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE
:
27 return PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE
;
28 case PP_CHARSET_CONVERSIONERROR_FAIL
:
30 return PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL
;
34 // Converts the given PP error handling behavior to the version in base,
35 // placing the result in |*result| and returning true on success. Returns false
36 // if the enum is invalid.
37 bool PPToBaseConversionError(PP_CharSet_Trusted_ConversionError on_error
,
38 base::OnStringConversionError::Type
* result
) {
40 case PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL
:
41 *result
= base::OnStringConversionError::FAIL
;
43 case PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP
:
44 *result
= base::OnStringConversionError::SKIP
;
46 case PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE
:
47 *result
= base::OnStringConversionError::SUBSTITUTE
;
57 // The "substitution" behavior of this function does not match the
58 // implementation in base, so we partially duplicate the code from
59 // icu_string_conversions.cc with the correct error handling setup required
60 // by the PPAPI interface.
61 char* PPB_CharSet_Shared::UTF16ToCharSetDeprecated(
62 const uint16_t* utf16
,
64 const char* output_char_set
,
65 PP_CharSet_ConversionError deprecated_on_error
,
66 uint32_t* output_length
) {
68 PP_CharSet_Trusted_ConversionError on_error
= DeprecatedToConversionError(
71 // Compute required length.
72 uint32_t required_length
= 0;
73 UTF16ToCharSet(utf16
, utf16_len
, output_char_set
, on_error
, NULL
,
76 // Our output is null terminated, so need one more byte.
77 char* ret_buf
= static_cast<char*>(
78 thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(required_length
+ 1));
80 // Do the conversion into the buffer.
81 PP_Bool result
= UTF16ToCharSet(utf16
, utf16_len
, output_char_set
, on_error
,
82 ret_buf
, &required_length
);
83 if (result
== PP_FALSE
) {
84 thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf
);
87 ret_buf
[required_length
] = 0; // Null terminate.
88 *output_length
= required_length
;
93 PP_Bool
PPB_CharSet_Shared::UTF16ToCharSet(
94 const uint16_t utf16
[],
96 const char* output_char_set
,
97 PP_CharSet_Trusted_ConversionError on_error
,
99 uint32_t* output_length
) {
100 if (!utf16
|| !output_char_set
|| !output_length
) {
105 UErrorCode status
= U_ZERO_ERROR
;
106 UConverter
* converter
= ucnv_open(output_char_set
, &status
);
107 if (!U_SUCCESS(status
)) {
112 // Setup our error handler.
114 case PP_CHARSET_CONVERSIONERROR_FAIL
:
115 ucnv_setFromUCallBack(converter
, UCNV_FROM_U_CALLBACK_STOP
, 0,
116 NULL
, NULL
, &status
);
118 case PP_CHARSET_CONVERSIONERROR_SKIP
:
119 ucnv_setFromUCallBack(converter
, UCNV_FROM_U_CALLBACK_SKIP
, 0,
120 NULL
, NULL
, &status
);
122 case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE
: {
123 // ICU sets the substitution char for some character sets (like latin1)
124 // to be the ASCII "substitution character" (26). We want to use '?'
125 // instead for backwards-compat with Windows behavior.
126 char subst_chars
[32];
127 int8_t subst_chars_len
= 32;
128 ucnv_getSubstChars(converter
, subst_chars
, &subst_chars_len
, &status
);
129 if (subst_chars_len
== 1 && subst_chars
[0] == 26) {
130 // Override to the question mark character if possible. When using
131 // setSubstString, the input is a Unicode character. The function will
132 // try to convert it to the destination character set and fail if that
133 // can not be converted to the destination character set.
135 // We just ignore any failure. If the dest char set has no
136 // representation for '?', then we'll just stick to the ICU default
137 // substitution character.
138 UErrorCode subst_status
= U_ZERO_ERROR
;
139 UChar question_mark
= '?';
140 ucnv_setSubstString(converter
, &question_mark
, 1, &subst_status
);
143 ucnv_setFromUCallBack(converter
, UCNV_FROM_U_CALLBACK_SUBSTITUTE
, 0,
144 NULL
, NULL
, &status
);
149 ucnv_close(converter
);
153 // ucnv_fromUChars returns required size not including terminating null.
154 *output_length
= static_cast<uint32_t>(ucnv_fromUChars(
155 converter
, output_buffer
, output_buffer
? *output_length
: 0,
156 reinterpret_cast<const UChar
*>(utf16
), utf16_len
, &status
));
158 ucnv_close(converter
);
159 if (status
== U_BUFFER_OVERFLOW_ERROR
) {
160 // Don't treat this as a fatal error since we need to return the string
163 } else if (!U_SUCCESS(status
)) {
171 uint16_t* PPB_CharSet_Shared::CharSetToUTF16Deprecated(
174 const char* input_char_set
,
175 PP_CharSet_ConversionError deprecated_on_error
,
176 uint32_t* output_length
) {
178 PP_CharSet_Trusted_ConversionError on_error
= DeprecatedToConversionError(
179 deprecated_on_error
);
181 // Compute required length.
182 uint32_t required_length
= 0;
183 CharSetToUTF16(input
, input_len
, input_char_set
, on_error
, NULL
,
186 // Our output is null terminated, so need one more byte.
187 uint16_t* ret_buf
= static_cast<uint16_t*>(
188 thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(
189 (required_length
+ 1) * sizeof(uint16_t)));
191 // Do the conversion into the buffer.
192 PP_Bool result
= CharSetToUTF16(input
, input_len
, input_char_set
, on_error
,
193 ret_buf
, &required_length
);
194 if (result
== PP_FALSE
) {
195 thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf
);
198 ret_buf
[required_length
] = 0; // Null terminate.
199 *output_length
= required_length
;
203 PP_Bool
PPB_CharSet_Shared::CharSetToUTF16(
206 const char* input_char_set
,
207 PP_CharSet_Trusted_ConversionError on_error
,
208 uint16_t* output_buffer
,
209 uint32_t* output_utf16_length
) {
210 if (!input
|| !input_char_set
|| !output_utf16_length
) {
211 *output_utf16_length
= 0;
215 base::OnStringConversionError::Type base_on_error
;
216 if (!PPToBaseConversionError(on_error
, &base_on_error
)) {
217 *output_utf16_length
= 0;
218 return PP_FALSE
; // Invalid enum value.
221 // We can convert this call to the implementation in base to avoid code
222 // duplication, although this does introduce an extra copy of the data.
223 base::string16 output
;
224 if (!base::CodepageToUTF16(std::string(input
, input_len
), input_char_set
,
225 base_on_error
, &output
)) {
226 *output_utf16_length
= 0;
231 memcpy(output_buffer
, output
.c_str(),
232 std::min(*output_utf16_length
, static_cast<uint32_t>(output
.size()))
235 *output_utf16_length
= static_cast<uint32_t>(output
.size());