1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_string_util.h"
7 #include "base/i18n/i18n_constants.h"
8 #include "base/i18n/icu_string_conversions.h"
9 #include "base/strings/string_util.h"
10 #include "third_party/icu/source/common/unicode/ucnv.h"
14 bool ConvertToUtf8(const std::string
& text
, const char* charset
,
15 std::string
* output
) {
18 UErrorCode err
= U_ZERO_ERROR
;
19 UConverter
* converter(ucnv_open(charset
, &err
));
23 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8.
24 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes
25 // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a
27 size_t output_length
= text
.length() * 3 + 1;
28 char* buf
= WriteInto(output
, output_length
);
29 output_length
= ucnv_toAlgorithmic(UCNV_UTF8
, converter
, buf
, output_length
,
30 text
.data(), text
.length(), &err
);
31 ucnv_close(converter
);
37 output
->resize(output_length
);
41 bool ConvertToUtf8AndNormalize(const std::string
& text
, const char* charset
,
42 std::string
* output
) {
43 return base::ConvertToUtf8AndNormalize(text
, charset
, output
);
46 bool ConvertLatin1ToUtf8AndNormalize(const std::string
& text
,
47 std::string
* output
) {
48 return net::ConvertToUtf8AndNormalize(text
, base::kCodepageLatin1
, output
);
51 bool ConvertToUTF16(const std::string
& text
, const char* charset
,
52 base::string16
* output
) {
53 return base::CodepageToUTF16(text
, charset
,
54 base::OnStringConversionError::FAIL
, output
);
57 bool ConvertLatin1ToUTF16(const std::string
& text
, base::string16
* output
) {
58 return base::CodepageToUTF16(text
, base::kCodepageLatin1
,
59 base::OnStringConversionError::FAIL
, output
);