1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_string_util.h"
7 #include "base/i18n/i18n_constants.h"
8 #include "base/i18n/icu_string_conversions.h"
9 #include "base/strings/string_util.h"
10 #include "third_party/icu/source/common/unicode/ucnv.h"
14 const char* const kCharsetLatin1
= base::kCodepageLatin1
;
16 bool ConvertToUtf8(const std::string
& text
, const char* charset
,
17 std::string
* output
) {
20 UErrorCode err
= U_ZERO_ERROR
;
21 UConverter
* converter(ucnv_open(charset
, &err
));
25 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8.
26 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes
27 // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a
29 size_t output_length
= text
.length() * 3 + 1;
30 char* buf
= base::WriteInto(output
, output_length
);
31 output_length
= ucnv_toAlgorithmic(UCNV_UTF8
, converter
, buf
, output_length
,
32 text
.data(), text
.length(), &err
);
33 ucnv_close(converter
);
39 output
->resize(output_length
);
43 bool ConvertToUtf8AndNormalize(const std::string
& text
, const char* charset
,
44 std::string
* output
) {
45 return base::ConvertToUtf8AndNormalize(text
, charset
, output
);
48 bool ConvertToUTF16(const std::string
& text
, const char* charset
,
49 base::string16
* output
) {
50 return base::CodepageToUTF16(text
, charset
,
51 base::OnStringConversionError::FAIL
, output
);
54 bool ConvertToUTF16WithSubstitutions(const std::string
& text
,
56 base::string16
* output
) {
57 return base::CodepageToUTF16(text
, charset
,
58 base::OnStringConversionError::SUBSTITUTE
,