1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
11 #include "base/logging.h"
12 #include "third_party/libaddressinput/chromium/trie.h"
13 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h"
14 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h"
15 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h"
16 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h"
20 using ::i18n::addressinput::AddressData
;
21 using ::i18n::addressinput::AddressField
;
22 using ::i18n::addressinput::BuildCallback
;
23 using ::i18n::addressinput::FieldProblemMap
;
24 using ::i18n::addressinput::PreloadSupplier
;
25 using ::i18n::addressinput::RegionData
;
26 using ::i18n::addressinput::RegionDataBuilder
;
28 using ::i18n::addressinput::ADMIN_AREA
;
29 using ::i18n::addressinput::COUNTRY
;
30 using ::i18n::addressinput::DEPENDENT_LOCALITY
;
31 using ::i18n::addressinput::LOCALITY
;
32 using ::i18n::addressinput::POSTAL_CODE
;
34 using ::i18n::addressinput::INVALID_FORMAT
;
35 using ::i18n::addressinput::MISMATCHING_VALUE
;
39 // Initial size for the buffer used in the canonicalizer.
40 static const size_t kInitialBufferSize
= 32;
42 // A region and its metadata useful for constructing a suggestion.
45 // Builds a suggestion of |region_to_suggest|. Does not take ownership of
46 // |region_to_suggest|, which should not be NULL.
47 Suggestion(const RegionData
* region_to_suggest
,
48 AddressField matching_address_field
,
49 bool region_key_matches
)
50 : region_to_suggest(region_to_suggest
),
51 matching_address_field(matching_address_field
),
52 region_key_matches(region_key_matches
) {
53 DCHECK(region_to_suggest
);
58 // The region that should be suggested. For example, if the region is ("CA",
59 // "California"), then either "CA" or "California" should be suggested.
60 const RegionData
* region_to_suggest
;
62 // The field in the address for which the suggestion should be made. For
63 // example, ADMIN_AREA in US means the suggestion should be made for the field
65 AddressField matching_address_field
;
67 // True if the key of the region matches user input (the name may or may not
68 // match). "CA" should be suggested for a ("CA", "California") region.
70 // False if only the name of the region matches user input (the key does not
71 // match). "California" should be suggested for a ("CA", "California") region.
72 bool region_key_matches
;
75 // Suggestions for an address. Contains lists of suggestions for administrative
76 // area, locality, and dependent locality fields of an address.
77 class AddressSuggestions
{
79 AddressSuggestions() {}
80 ~AddressSuggestions() {}
82 // Marks all regions at |address_field| level as matching user input.
83 void AllRegionsMatchForField(AddressField address_field
) {
84 all_regions_match_input_
.insert(address_field
);
87 // Marks given regions at |address_field| level as matching user input. The
88 // |regions_match_key| parameter contains the regions that match user input by
89 // their keys. The |regions_match_name| parameter contains the regions that
90 // match user input by their names.
92 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
93 // DEPENDENT_LOCALITY.
94 bool AddRegions(AddressField address_field
,
95 const std::set
<const RegionData
*>& regions_match_key
,
96 const std::set
<const RegionData
*>& regions_match_name
) {
97 DCHECK(address_field
>= ADMIN_AREA
);
98 DCHECK(address_field
<= DEPENDENT_LOCALITY
);
100 AddressField parent_address_field
=
101 static_cast<AddressField
>(address_field
- 1);
103 bool all_parents_match
=
104 parent_address_field
== COUNTRY
||
105 all_regions_match_input_
.find(parent_address_field
) !=
106 all_regions_match_input_
.end();
108 // Cannot build |address_field| level suggestions if there are no matches in
109 // |parent_address_field| level regions.
110 const RegionsMatchInput
* parents
= NULL
;
111 if (address_field
> ADMIN_AREA
&& !all_parents_match
) {
112 parents
= ®ions_match_input_
[parent_address_field
];
113 if (parents
->keys
.empty() && parents
->names
.empty())
117 RegionsMatchInput
* regions
= NULL
;
118 if (address_field
< DEPENDENT_LOCALITY
)
119 regions
= ®ions_match_input_
[address_field
];
121 std::vector
<Suggestion
>* suggestions
= &suggestions_
[address_field
];
122 bool added_suggestions
= false;
124 // Iterate over both |regions_match_key| and |regions_match_name| and build
125 // Suggestion objects based on the given RegionData objects. Advance either
126 // one iterator at a time (if they point to different data) or both
127 // iterators at once (if they point to the same data).
128 for (std::set
<const RegionData
*>::const_iterator
129 key_it
= regions_match_key
.begin(),
130 name_it
= regions_match_name
.begin();
131 key_it
!= regions_match_key
.end() ||
132 name_it
!= regions_match_name
.end();) {
133 const RegionData
* key_region
=
134 key_it
!= regions_match_key
.end() ? *key_it
: NULL
;
135 const RegionData
* name_region
=
136 name_it
!= regions_match_name
.end() ? *name_it
: NULL
;
138 // Regions that do not have a parent that also matches input will not
139 // become suggestions.
140 bool key_region_has_parent
=
142 (parents
&& !parents
->keys
.empty() && key_region
&&
143 parents
->keys
.find(&key_region
->parent()) != parents
->keys
.end());
144 bool name_region_has_parent
=
146 (parents
&& !parents
->names
.empty() && name_region
&&
147 parents
->names
.find(&name_region
->parent()) != parents
->names
.end());
149 if (name_region
&& (!key_region
|| name_region
< key_region
)) {
150 if (name_region_has_parent
) {
151 suggestions
->push_back(Suggestion(name_region
, address_field
, false));
152 added_suggestions
= true;
154 regions
->names
.insert(name_region
);
158 } else if (key_region
&& (!name_region
|| key_region
< name_region
)) {
159 if (key_region_has_parent
) {
160 suggestions
->push_back(Suggestion(key_region
, address_field
, true));
161 added_suggestions
= true;
163 regions
->keys
.insert(key_region
);
168 if (key_region_has_parent
) {
169 suggestions
->push_back(Suggestion(key_region
, address_field
, true));
170 added_suggestions
= true;
172 regions
->keys
.insert(key_region
);
173 regions
->names
.insert(name_region
);
182 return added_suggestions
;
185 // Swaps the suggestions for the smallest sub-region into |suggestions|.
186 // |this| is not usable after this call due to using the swap() operation.
188 // The |suggestions| parameter should not be NULL.
189 void SwapSmallestSubRegionSuggestions(std::vector
<Suggestion
>* suggestions
) {
191 for (int i
= DEPENDENT_LOCALITY
; i
>= ADMIN_AREA
; --i
) {
192 std::vector
<Suggestion
>* result
=
193 &suggestions_
[static_cast<AddressField
>(i
)];
194 if (!result
->empty()) {
195 suggestions
->swap(*result
);
202 // The sets of non-owned regions used for looking up regions that match user
203 // input by keys and names.
204 struct RegionsMatchInput
{
205 std::set
<const RegionData
*> keys
;
206 std::set
<const RegionData
*> names
;
209 // The regions that match user input at ADMIN_AREA and LOCALITY levels.
210 std::map
<AddressField
, RegionsMatchInput
> regions_match_input_
;
212 // The set of fields for which all regions match user input. Used to avoid
213 // storing a long list in |regions_match_input_| and later looking it up
215 std::set
<AddressField
> all_regions_match_input_
;
217 // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels.
218 std::map
<AddressField
, std::vector
<Suggestion
> > suggestions_
;
220 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions
);
225 InputSuggester::StringCanonicalizer::StringCanonicalizer()
226 : buffer_(kInitialBufferSize
, 0) {
227 UErrorCode error_code
= U_ZERO_ERROR
;
229 icu::Collator::createInstance(icu::Locale::getRoot(), error_code
));
230 DCHECK(U_SUCCESS(error_code
));
231 collator_
->setStrength(icu::Collator::PRIMARY
);
234 InputSuggester::StringCanonicalizer::~StringCanonicalizer() {}
236 const std::vector
<uint8_t>& InputSuggester::StringCanonicalizer::Canonicalize(
237 const std::string
& original
) const {
238 DCHECK(!original
.empty());
240 icu::UnicodeString
icu_str(original
.c_str(),
241 static_cast<int32_t>(original
.length()));
242 int32_t sort_key_size
=
243 collator_
->getSortKey(icu_str
, &buffer_
[0], buffer_size());
244 DCHECK_LT(0, sort_key_size
);
246 if (sort_key_size
> buffer_size()) {
247 buffer_
.resize(sort_key_size
* 2, 0);
248 sort_key_size
= collator_
->getSortKey(icu_str
, &buffer_
[0], buffer_size());
249 DCHECK_LT(0, sort_key_size
);
250 DCHECK_GT(buffer_size(), sort_key_size
);
256 int32_t InputSuggester::StringCanonicalizer::buffer_size() const {
257 return static_cast<int32_t>(buffer_
.size());
260 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
261 // region name or key.
262 class InputSuggester::SubRegionData
{
265 : initialized_(false),
266 smallest_region_size_(COUNTRY
),
267 canonicalizer_(NULL
) {}
271 bool is_initialized() const { return initialized_
; }
273 // Adds the sub-regions of |country_region| into tries. Uses
274 // |shared_canonicalizer| for case and diacritic insensitive lookup of the
275 // sub-regions. Should be called at most once.
276 void Initialize(const RegionData
& country_region
,
277 const StringCanonicalizer
& shared_canonicalizer
) {
278 DCHECK(!initialized_
);
279 DCHECK(!country_region
.has_parent());
282 canonicalizer_
= &shared_canonicalizer
;
284 if (!country_region
.sub_regions().empty())
285 AddSubRegionsOf(country_region
, COUNTRY
);
288 // Adds the suggestions for |user_input| into |suggestions| when user is
289 // typing in |focused_field|.
290 void BuildSuggestions(const AddressData
& user_input
,
291 AddressField focused_field
,
292 std::vector
<Suggestion
>* suggestions
) {
293 DCHECK(initialized_
);
295 // Do not suggest anything if there's no suggestion data for the focused
297 if (focused_field
!= POSTAL_CODE
&& smallest_region_size_
< focused_field
)
300 // Non-owned regions that match a field value by region key.
301 std::set
<const RegionData
*> regions_match_key
;
303 // Non-owned regions that match a field value by region name.
304 std::set
<const RegionData
*> regions_match_name
;
306 AddressSuggestions address_suggestions
;
307 for (int i
= ADMIN_AREA
; i
<= focused_field
&& i
<= DEPENDENT_LOCALITY
;
309 AddressField address_field
= static_cast<AddressField
>(i
);
310 AddressField parent_address_field
= static_cast<AddressField
>(i
- 1);
312 const std::string
& field_value
= user_input
.GetFieldValue(address_field
);
313 const std::string
& parent_field_value
=
314 user_input
.GetFieldValue(parent_address_field
);
316 if (field_value
.empty() &&
317 (address_field
== ADMIN_AREA
|| parent_field_value
.empty())) {
318 address_suggestions
.AllRegionsMatchForField(address_field
);
322 if (field_value
.empty()) {
323 DCHECK_NE(address_field
, focused_field
);
327 regions_match_key
.clear();
328 regions_match_name
.clear();
330 const FieldTries
& field_tries
= field_tries_
[address_field
];
332 const std::vector
<uint8_t>& canonicalized_value
=
333 canonicalizer_
->Canonicalize(field_value
);
335 field_tries
.keys
.FindDataForKeyPrefix(canonicalized_value
,
337 field_tries
.names
.FindDataForKeyPrefix(canonicalized_value
,
338 ®ions_match_name
);
340 bool added_suggestions
= address_suggestions
.AddRegions(
341 address_field
, regions_match_key
, regions_match_name
);
343 // Do not suggest anything if the focused field does not have suggestions.
344 if (address_field
== focused_field
&& !added_suggestions
)
348 address_suggestions
.SwapSmallestSubRegionSuggestions(suggestions
);
352 // The tries to lookup regions for a specific field by keys and names. For
353 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
354 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
355 // struct is uncopyable due to Trie objects being uncopyable.
357 Trie
<const RegionData
*> keys
;
358 Trie
<const RegionData
*> names
;
361 // Adds the sub-regions of |parent_region| into tries.
362 void AddSubRegionsOf(const RegionData
& parent_region
,
363 AddressField parent_field
) {
364 DCHECK(!parent_region
.sub_regions().empty());
366 AddressField address_field
= static_cast<AddressField
>(parent_field
+ 1);
367 DCHECK(address_field
>= ADMIN_AREA
);
368 DCHECK(address_field
<= DEPENDENT_LOCALITY
);
370 FieldTries
* field_tries
= &field_tries_
[address_field
];
371 for (std::vector
<const RegionData
*>::const_iterator it
=
372 parent_region
.sub_regions().begin();
373 it
!= parent_region
.sub_regions().end();
375 const RegionData
* region
= *it
;
377 DCHECK(!region
->key().empty());
378 DCHECK(!region
->name().empty());
380 field_tries
->keys
.AddDataForKey(
381 canonicalizer_
->Canonicalize(region
->key()), region
);
383 field_tries
->names
.AddDataForKey(
384 canonicalizer_
->Canonicalize(region
->name()), region
);
386 if (smallest_region_size_
< address_field
)
387 smallest_region_size_
= address_field
;
389 if (!region
->sub_regions().empty())
390 AddSubRegionsOf(*region
, address_field
);
394 // True after Initialize() has been called.
397 // The tries to lookup regions for ADMIN_AREA, LOCALITY, and
398 // DEPENDENT_LOCALITY.
399 std::map
<AddressField
, FieldTries
> field_tries_
;
401 // The smallest size of a sub-region that has data. For example, this is
402 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
403 AddressField smallest_region_size_
;
405 // A shared instance of string canonicalizer for case and diacritic comparison
406 // of region keys and names.
407 const StringCanonicalizer
* canonicalizer_
;
410 InputSuggester::InputSuggester(PreloadSupplier
* supplier
)
411 : region_data_builder_(supplier
),
412 input_helper_(supplier
),
413 validator_(supplier
),
414 validated_(BuildCallback(this, &InputSuggester::Validated
)) {}
416 InputSuggester::~InputSuggester() {}
418 void InputSuggester::GetSuggestions(const AddressData
& user_input
,
419 AddressField focused_field
,
420 size_t suggestions_limit
,
421 std::vector
<AddressData
>* suggestions
) {
423 DCHECK(focused_field
== POSTAL_CODE
||
424 (focused_field
>= ADMIN_AREA
&& focused_field
<= DEPENDENT_LOCALITY
));
426 AddressData address_copy
= user_input
;
428 // Do not suggest anything if the user input is empty.
429 if (address_copy
.IsFieldEmpty(focused_field
))
432 if (focused_field
== POSTAL_CODE
) {
433 // Do not suggest anything if the user is typing an invalid postal code.
434 FieldProblemMap problems
;
435 FieldProblemMap filter
;
436 filter
.insert(std::make_pair(POSTAL_CODE
, INVALID_FORMAT
));
437 validator_
.Validate(address_copy
,
438 true, // Allow postal office boxes.
439 false, // Do not require recipient name.
443 if (!problems
.empty())
446 // Fill in the sub-regions based on the postal code.
447 input_helper_
.FillAddress(&address_copy
);
450 // Lazily initialize the mapping from COUNTRY level regions to all of their
451 // sub-regions with metadata for generating suggestions.
452 std::string unused_best_language
;
453 const RegionData
& region_data
=
454 region_data_builder_
.Build(address_copy
.region_code
,
455 address_copy
.language_code
,
456 &unused_best_language
);
457 SubRegionData
* sub_region_data
= &sub_regions_
[®ion_data
];
458 if (!sub_region_data
->is_initialized())
459 sub_region_data
->Initialize(region_data
, canonicalizer_
);
461 // Build the list of regions that match |address_copy| when the user is typing
462 // in the |focused_field|.
463 std::vector
<Suggestion
> suggested_regions
;
464 sub_region_data
->BuildSuggestions(
465 address_copy
, focused_field
, &suggested_regions
);
467 FieldProblemMap problems
;
468 FieldProblemMap filter
;
469 filter
.insert(std::make_pair(POSTAL_CODE
, MISMATCHING_VALUE
));
471 // Generate suggestions based on the regions.
472 for (std::vector
<Suggestion
>::const_iterator suggested_region_it
=
473 suggested_regions
.begin();
474 suggested_region_it
!= suggested_regions
.end();
475 ++suggested_region_it
) {
477 address
.region_code
= address_copy
.region_code
;
478 address
.postal_code
= address_copy
.postal_code
;
480 // Traverse the tree of regions from the smallest |region_to_suggest| to the
481 // country-wide "root" of the tree. Use the region names or keys found at
482 // each of the levels of the tree to build the |address| to suggest.
483 AddressField address_field
= suggested_region_it
->matching_address_field
;
484 for (const RegionData
* region
= suggested_region_it
->region_to_suggest
;
485 region
->has_parent();
486 region
= ®ion
->parent()) {
487 address
.SetFieldValue(address_field
,
488 suggested_region_it
->region_key_matches
491 address_field
= static_cast<AddressField
>(address_field
- 1);
494 // Do not suggest an address with a mismatching postal code.
496 validator_
.Validate(address
,
497 true, // Allow postal office boxes.
498 false, // Do not require recipient name.
502 if (!problems
.empty())
505 // Do not add more suggestions than |suggestions_limit|.
506 if (suggestions
->size() >= suggestions_limit
) {
507 suggestions
->clear();
511 suggestions
->push_back(address
);
515 void InputSuggester::Validated(bool success
,
517 const FieldProblemMap
&) {
521 } // namespace autofill