Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / libaddressinput / chromium / input_suggester.cc
blob5e6c7e769e6c420b05c3844712ee730f448316db
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
7 #include <cstddef>
8 #include <set>
9 #include <utility>
11 #include "base/logging.h"
12 #include "third_party/libaddressinput/chromium/trie.h"
13 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h"
14 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h"
15 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h"
16 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h"
18 namespace autofill {
20 using ::i18n::addressinput::AddressData;
21 using ::i18n::addressinput::AddressField;
22 using ::i18n::addressinput::BuildCallback;
23 using ::i18n::addressinput::FieldProblemMap;
24 using ::i18n::addressinput::PreloadSupplier;
25 using ::i18n::addressinput::RegionData;
26 using ::i18n::addressinput::RegionDataBuilder;
28 using ::i18n::addressinput::ADMIN_AREA;
29 using ::i18n::addressinput::COUNTRY;
30 using ::i18n::addressinput::DEPENDENT_LOCALITY;
31 using ::i18n::addressinput::LOCALITY;
32 using ::i18n::addressinput::POSTAL_CODE;
34 using ::i18n::addressinput::INVALID_FORMAT;
35 using ::i18n::addressinput::MISMATCHING_VALUE;
37 namespace {
39 // Initial size for the buffer used in the canonicalizer.
40 static const size_t kInitialBufferSize = 32;
42 // A region and its metadata useful for constructing a suggestion.
43 struct Suggestion {
44 public:
45 // Builds a suggestion of |region_to_suggest|. Does not take ownership of
46 // |region_to_suggest|, which should not be NULL.
47 Suggestion(const RegionData* region_to_suggest,
48 AddressField matching_address_field,
49 bool region_key_matches)
50 : region_to_suggest(region_to_suggest),
51 matching_address_field(matching_address_field),
52 region_key_matches(region_key_matches) {
53 DCHECK(region_to_suggest);
56 ~Suggestion() {}
58 // The region that should be suggested. For example, if the region is ("CA",
59 // "California"), then either "CA" or "California" should be suggested.
60 const RegionData* region_to_suggest;
62 // The field in the address for which the suggestion should be made. For
63 // example, ADMIN_AREA in US means the suggestion should be made for the field
64 // labeled "State".
65 AddressField matching_address_field;
67 // True if the key of the region matches user input (the name may or may not
68 // match). "CA" should be suggested for a ("CA", "California") region.
70 // False if only the name of the region matches user input (the key does not
71 // match). "California" should be suggested for a ("CA", "California") region.
72 bool region_key_matches;
75 // Suggestions for an address. Contains lists of suggestions for administrative
76 // area, locality, and dependent locality fields of an address.
77 class AddressSuggestions {
78 public:
79 AddressSuggestions() {}
80 ~AddressSuggestions() {}
82 // Marks all regions at |address_field| level as matching user input.
83 void AllRegionsMatchForField(AddressField address_field) {
84 all_regions_match_input_.insert(address_field);
87 // Marks given regions at |address_field| level as matching user input. The
88 // |regions_match_key| parameter contains the regions that match user input by
89 // their keys. The |regions_match_name| parameter contains the regions that
90 // match user input by their names.
92 // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
93 // DEPENDENT_LOCALITY.
94 bool AddRegions(AddressField address_field,
95 const std::set<const RegionData*>& regions_match_key,
96 const std::set<const RegionData*>& regions_match_name) {
97 DCHECK(address_field >= ADMIN_AREA);
98 DCHECK(address_field <= DEPENDENT_LOCALITY);
100 AddressField parent_address_field =
101 static_cast<AddressField>(address_field - 1);
103 bool all_parents_match =
104 parent_address_field == COUNTRY ||
105 all_regions_match_input_.find(parent_address_field) !=
106 all_regions_match_input_.end();
108 // Cannot build |address_field| level suggestions if there are no matches in
109 // |parent_address_field| level regions.
110 const RegionsMatchInput* parents = NULL;
111 if (address_field > ADMIN_AREA && !all_parents_match) {
112 parents = &regions_match_input_[parent_address_field];
113 if (parents->keys.empty() && parents->names.empty())
114 return false;
117 RegionsMatchInput* regions = NULL;
118 if (address_field < DEPENDENT_LOCALITY)
119 regions = &regions_match_input_[address_field];
121 std::vector<Suggestion>* suggestions = &suggestions_[address_field];
122 bool added_suggestions = false;
124 // Iterate over both |regions_match_key| and |regions_match_name| and build
125 // Suggestion objects based on the given RegionData objects. Advance either
126 // one iterator at a time (if they point to different data) or both
127 // iterators at once (if they point to the same data).
128 for (std::set<const RegionData*>::const_iterator
129 key_it = regions_match_key.begin(),
130 name_it = regions_match_name.begin();
131 key_it != regions_match_key.end() ||
132 name_it != regions_match_name.end();) {
133 const RegionData* key_region =
134 key_it != regions_match_key.end() ? *key_it : NULL;
135 const RegionData* name_region =
136 name_it != regions_match_name.end() ? *name_it : NULL;
138 // Regions that do not have a parent that also matches input will not
139 // become suggestions.
140 bool key_region_has_parent =
141 all_parents_match ||
142 (parents && !parents->keys.empty() && key_region &&
143 parents->keys.find(&key_region->parent()) != parents->keys.end());
144 bool name_region_has_parent =
145 all_parents_match ||
146 (parents && !parents->names.empty() && name_region &&
147 parents->names.find(&name_region->parent()) != parents->names.end());
149 if (name_region && (!key_region || name_region < key_region)) {
150 if (name_region_has_parent) {
151 suggestions->push_back(Suggestion(name_region, address_field, false));
152 added_suggestions = true;
153 if (regions)
154 regions->names.insert(name_region);
157 ++name_it;
158 } else if (key_region && (!name_region || key_region < name_region)) {
159 if (key_region_has_parent) {
160 suggestions->push_back(Suggestion(key_region, address_field, true));
161 added_suggestions = true;
162 if (regions)
163 regions->keys.insert(key_region);
166 ++key_it;
167 } else {
168 if (key_region_has_parent) {
169 suggestions->push_back(Suggestion(key_region, address_field, true));
170 added_suggestions = true;
171 if (regions) {
172 regions->keys.insert(key_region);
173 regions->names.insert(name_region);
177 ++key_it;
178 ++name_it;
182 return added_suggestions;
185 // Swaps the suggestions for the smallest sub-region into |suggestions|.
186 // |this| is not usable after this call due to using the swap() operation.
188 // The |suggestions| parameter should not be NULL.
189 void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) {
190 DCHECK(suggestions);
191 for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
192 std::vector<Suggestion>* result =
193 &suggestions_[static_cast<AddressField>(i)];
194 if (!result->empty()) {
195 suggestions->swap(*result);
196 return;
201 private:
202 // The sets of non-owned regions used for looking up regions that match user
203 // input by keys and names.
204 struct RegionsMatchInput {
205 std::set<const RegionData*> keys;
206 std::set<const RegionData*> names;
209 // The regions that match user input at ADMIN_AREA and LOCALITY levels.
210 std::map<AddressField, RegionsMatchInput> regions_match_input_;
212 // The set of fields for which all regions match user input. Used to avoid
213 // storing a long list in |regions_match_input_| and later looking it up
214 // there.
215 std::set<AddressField> all_regions_match_input_;
217 // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels.
218 std::map<AddressField, std::vector<Suggestion> > suggestions_;
220 DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
223 } // namespace
225 InputSuggester::StringCanonicalizer::StringCanonicalizer()
226 : buffer_(kInitialBufferSize, 0) {
227 UErrorCode error_code = U_ZERO_ERROR;
228 collator_.reset(
229 icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
230 DCHECK(U_SUCCESS(error_code));
231 collator_->setStrength(icu::Collator::PRIMARY);
234 InputSuggester::StringCanonicalizer::~StringCanonicalizer() {}
236 const std::vector<uint8_t>& InputSuggester::StringCanonicalizer::Canonicalize(
237 const std::string& original) const {
238 DCHECK(!original.empty());
240 icu::UnicodeString icu_str(original.c_str(),
241 static_cast<int32_t>(original.length()));
242 int32_t sort_key_size =
243 collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
244 DCHECK_LT(0, sort_key_size);
246 if (sort_key_size > buffer_size()) {
247 buffer_.resize(sort_key_size * 2, 0);
248 sort_key_size = collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
249 DCHECK_LT(0, sort_key_size);
250 DCHECK_GT(buffer_size(), sort_key_size);
253 return buffer_;
256 int32_t InputSuggester::StringCanonicalizer::buffer_size() const {
257 return static_cast<int32_t>(buffer_.size());
260 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
261 // region name or key.
262 class InputSuggester::SubRegionData {
263 public:
264 SubRegionData()
265 : initialized_(false),
266 smallest_region_size_(COUNTRY),
267 canonicalizer_(NULL) {}
269 ~SubRegionData() {}
271 bool is_initialized() const { return initialized_; }
273 // Adds the sub-regions of |country_region| into tries. Uses
274 // |shared_canonicalizer| for case and diacritic insensitive lookup of the
275 // sub-regions. Should be called at most once.
276 void Initialize(const RegionData& country_region,
277 const StringCanonicalizer& shared_canonicalizer) {
278 DCHECK(!initialized_);
279 DCHECK(!country_region.has_parent());
281 initialized_ = true;
282 canonicalizer_ = &shared_canonicalizer;
284 if (!country_region.sub_regions().empty())
285 AddSubRegionsOf(country_region, COUNTRY);
288 // Adds the suggestions for |user_input| into |suggestions| when user is
289 // typing in |focused_field|.
290 void BuildSuggestions(const AddressData& user_input,
291 AddressField focused_field,
292 std::vector<Suggestion>* suggestions) {
293 DCHECK(initialized_);
295 // Do not suggest anything if there's no suggestion data for the focused
296 // field.
297 if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
298 return;
300 // Non-owned regions that match a field value by region key.
301 std::set<const RegionData*> regions_match_key;
303 // Non-owned regions that match a field value by region name.
304 std::set<const RegionData*> regions_match_name;
306 AddressSuggestions address_suggestions;
307 for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
308 ++i) {
309 AddressField address_field = static_cast<AddressField>(i);
310 AddressField parent_address_field = static_cast<AddressField>(i - 1);
312 const std::string& field_value = user_input.GetFieldValue(address_field);
313 const std::string& parent_field_value =
314 user_input.GetFieldValue(parent_address_field);
316 if (field_value.empty() &&
317 (address_field == ADMIN_AREA || parent_field_value.empty())) {
318 address_suggestions.AllRegionsMatchForField(address_field);
319 continue;
322 if (field_value.empty()) {
323 DCHECK_NE(address_field, focused_field);
324 continue;
327 regions_match_key.clear();
328 regions_match_name.clear();
330 const FieldTries& field_tries = field_tries_[address_field];
332 const std::vector<uint8_t>& canonicalized_value =
333 canonicalizer_->Canonicalize(field_value);
335 field_tries.keys.FindDataForKeyPrefix(canonicalized_value,
336 &regions_match_key);
337 field_tries.names.FindDataForKeyPrefix(canonicalized_value,
338 &regions_match_name);
340 bool added_suggestions = address_suggestions.AddRegions(
341 address_field, regions_match_key, regions_match_name);
343 // Do not suggest anything if the focused field does not have suggestions.
344 if (address_field == focused_field && !added_suggestions)
345 return;
348 address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
351 private:
352 // The tries to lookup regions for a specific field by keys and names. For
353 // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
354 // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
355 // struct is uncopyable due to Trie objects being uncopyable.
356 struct FieldTries {
357 Trie<const RegionData*> keys;
358 Trie<const RegionData*> names;
361 // Adds the sub-regions of |parent_region| into tries.
362 void AddSubRegionsOf(const RegionData& parent_region,
363 AddressField parent_field) {
364 DCHECK(!parent_region.sub_regions().empty());
366 AddressField address_field = static_cast<AddressField>(parent_field + 1);
367 DCHECK(address_field >= ADMIN_AREA);
368 DCHECK(address_field <= DEPENDENT_LOCALITY);
370 FieldTries* field_tries = &field_tries_[address_field];
371 for (std::vector<const RegionData*>::const_iterator it =
372 parent_region.sub_regions().begin();
373 it != parent_region.sub_regions().end();
374 ++it) {
375 const RegionData* region = *it;
376 DCHECK(region);
377 DCHECK(!region->key().empty());
378 DCHECK(!region->name().empty());
380 field_tries->keys.AddDataForKey(
381 canonicalizer_->Canonicalize(region->key()), region);
383 field_tries->names.AddDataForKey(
384 canonicalizer_->Canonicalize(region->name()), region);
386 if (smallest_region_size_ < address_field)
387 smallest_region_size_ = address_field;
389 if (!region->sub_regions().empty())
390 AddSubRegionsOf(*region, address_field);
394 // True after Initialize() has been called.
395 bool initialized_;
397 // The tries to lookup regions for ADMIN_AREA, LOCALITY, and
398 // DEPENDENT_LOCALITY.
399 std::map<AddressField, FieldTries> field_tries_;
401 // The smallest size of a sub-region that has data. For example, this is
402 // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
403 AddressField smallest_region_size_;
405 // A shared instance of string canonicalizer for case and diacritic comparison
406 // of region keys and names.
407 const StringCanonicalizer* canonicalizer_;
410 InputSuggester::InputSuggester(PreloadSupplier* supplier)
411 : region_data_builder_(supplier),
412 input_helper_(supplier),
413 validator_(supplier),
414 validated_(BuildCallback(this, &InputSuggester::Validated)) {}
416 InputSuggester::~InputSuggester() {}
418 void InputSuggester::GetSuggestions(const AddressData& user_input,
419 AddressField focused_field,
420 size_t suggestions_limit,
421 std::vector<AddressData>* suggestions) {
422 DCHECK(suggestions);
423 DCHECK(focused_field == POSTAL_CODE ||
424 (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
426 AddressData address_copy = user_input;
428 // Do not suggest anything if the user input is empty.
429 if (address_copy.IsFieldEmpty(focused_field))
430 return;
432 if (focused_field == POSTAL_CODE) {
433 // Do not suggest anything if the user is typing an invalid postal code.
434 FieldProblemMap problems;
435 FieldProblemMap filter;
436 filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
437 validator_.Validate(address_copy,
438 true, // Allow postal office boxes.
439 false, // Do not require recipient name.
440 &filter,
441 &problems,
442 *validated_);
443 if (!problems.empty())
444 return;
446 // Fill in the sub-regions based on the postal code.
447 input_helper_.FillAddress(&address_copy);
450 // Lazily initialize the mapping from COUNTRY level regions to all of their
451 // sub-regions with metadata for generating suggestions.
452 std::string unused_best_language;
453 const RegionData& region_data =
454 region_data_builder_.Build(address_copy.region_code,
455 address_copy.language_code,
456 &unused_best_language);
457 SubRegionData* sub_region_data = &sub_regions_[&region_data];
458 if (!sub_region_data->is_initialized())
459 sub_region_data->Initialize(region_data, canonicalizer_);
461 // Build the list of regions that match |address_copy| when the user is typing
462 // in the |focused_field|.
463 std::vector<Suggestion> suggested_regions;
464 sub_region_data->BuildSuggestions(
465 address_copy, focused_field, &suggested_regions);
467 FieldProblemMap problems;
468 FieldProblemMap filter;
469 filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
471 // Generate suggestions based on the regions.
472 for (std::vector<Suggestion>::const_iterator suggested_region_it =
473 suggested_regions.begin();
474 suggested_region_it != suggested_regions.end();
475 ++suggested_region_it) {
476 AddressData address;
477 address.region_code = address_copy.region_code;
478 address.postal_code = address_copy.postal_code;
480 // Traverse the tree of regions from the smallest |region_to_suggest| to the
481 // country-wide "root" of the tree. Use the region names or keys found at
482 // each of the levels of the tree to build the |address| to suggest.
483 AddressField address_field = suggested_region_it->matching_address_field;
484 for (const RegionData* region = suggested_region_it->region_to_suggest;
485 region->has_parent();
486 region = &region->parent()) {
487 address.SetFieldValue(address_field,
488 suggested_region_it->region_key_matches
489 ? region->key()
490 : region->name());
491 address_field = static_cast<AddressField>(address_field - 1);
494 // Do not suggest an address with a mismatching postal code.
495 problems.clear();
496 validator_.Validate(address,
497 true, // Allow postal office boxes.
498 false, // Do not require recipient name.
499 &filter,
500 &problems,
501 *validated_);
502 if (!problems.empty())
503 continue;
505 // Do not add more suggestions than |suggestions_limit|.
506 if (suggestions->size() >= suggestions_limit) {
507 suggestions->clear();
508 return;
511 suggestions->push_back(address);
515 void InputSuggester::Validated(bool success,
516 const AddressData&,
517 const FieldProblemMap&) {
518 DCHECK(success);
521 } // namespace autofill