1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "ui/app_list/search/term_break_iterator.h"
7 #include "base/i18n/char_iterator.h"
8 #include "base/logging.h"
9 #include "base/strings/string_util.h"
10 #include "third_party/icu/source/common/unicode/uchar.h"
14 TermBreakIterator::TermBreakIterator(const base::string16
& word
)
18 iter_(new base::i18n::UTF16CharIterator(&word
)),
22 TermBreakIterator::~TermBreakIterator() {}
24 bool TermBreakIterator::Advance() {
25 // 2D matrix that defines term boundaries. Each row represents current state.
26 // Each col represents new state from input char. Cells with true value
27 // represents a term boundary.
28 const bool kBoundary
[][STATE_LAST
] = {
29 // START NUMBER UPPER LOWER CHAR
30 { false, false, false, false, false }, // START
31 { false, false, true, true, true }, // NUMBER
32 { false, true, false, false, true }, // UPPER
33 { false, true, true, false, true }, // LOWER
34 { false, true, true, true, false }, // CHAR
37 while (iter_
->Advance()) {
38 const State new_state
= GetNewState(word_
[iter_
->array_pos()]);
39 const bool is_boundary
= kBoundary
[state_
][new_state
];
46 pos_
= iter_
->array_pos();
48 return prev_
!= pos_
|| !iter_
->end();
51 const base::string16
TermBreakIterator::GetCurrentTerm() const {
52 DCHECK(prev_
!= npos
&& pos_
!= npos
);
53 return word_
.substr(prev_
, pos_
- prev_
);
56 TermBreakIterator::State
TermBreakIterator::GetNewState(base::char16 ch
) {
57 if (IsAsciiDigit(ch
) || ch
== '.' || ch
== ',')
60 const bool is_upper
= !!u_isUUppercase(ch
);
61 const bool is_lower
= !!u_isULowercase(ch
);
63 if (is_upper
&& is_lower
) {
64 NOTREACHED() << "Invalid state for ch=" << ch
;
76 } // namespace app_list