1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "cpp/src/util/canonicalize_string.h"
7 #include "base/logging.h"
8 #include "cpp/include/libaddressinput/util/scoped_ptr.h"
9 #include "third_party/icu/source/i18n/unicode/coll.h"
12 namespace addressinput
{
16 class ChromeStringCanonicalizer
: public StringCanonicalizer
{
18 ChromeStringCanonicalizer()
19 : error_code_(U_ZERO_ERROR
),
20 collator_(icu::Collator::createInstance(error_code_
)) {
21 collator_
->setStrength(icu::Collator::PRIMARY
);
22 DCHECK(U_SUCCESS(error_code_
));
25 virtual ~ChromeStringCanonicalizer() {}
27 // StringCanonicalizer implementation.
28 virtual std::string
CanonicalizeString(const std::string
& original
) {
29 // Returns a canonical version of the string that can be used for comparing
30 // strings regardless of diacritics and capitalization.
31 // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas");
32 // CanonicalizeString("Texas") == CanonicalizeString("teXas");
33 // CanonicalizeString("Texas") != CanonicalizeString("California");
35 // The output is not human-readable.
36 // CanonicalizeString("Texas") != "Texas";
37 icu::UnicodeString
icu_str(
38 original
.c_str(), static_cast<int32_t>(original
.length()));
39 int32_t buffer_size
= collator_
->getSortKey(icu_str
, NULL
, 0);
40 scoped_ptr
<uint8_t[]> buffer(new uint8_t[buffer_size
]);
43 collator_
->getSortKey(icu_str
, buffer
.get(), buffer_size
);
44 DCHECK_EQ(buffer_size
, filled_size
);
45 return std::string(reinterpret_cast<const char*>(buffer
.get()));
49 UErrorCode error_code_
;
50 scoped_ptr
<icu::Collator
> collator_
;
52 DISALLOW_COPY_AND_ASSIGN(ChromeStringCanonicalizer
);
58 scoped_ptr
<StringCanonicalizer
> StringCanonicalizer::Build() {
59 return scoped_ptr
<StringCanonicalizer
>(new ChromeStringCanonicalizer
);
62 } // namespace addressinput