1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/break_iterator.h"
7 #include "base/logging.h"
8 #include "third_party/icu/source/common/unicode/ubrk.h"
9 #include "third_party/icu/source/common/unicode/uchar.h"
10 #include "third_party/icu/source/common/unicode/ustring.h"
15 const size_t npos
= -1;
17 BreakIterator::BreakIterator(const string16
& str
, BreakType break_type
)
20 break_type_(break_type
),
25 BreakIterator::~BreakIterator() {
27 ubrk_close(static_cast<UBreakIterator
*>(iter_
));
30 bool BreakIterator::Init() {
31 UErrorCode status
= U_ZERO_ERROR
;
32 UBreakIteratorType break_type
;
33 switch (break_type_
) {
35 break_type
= UBRK_CHARACTER
;
38 break_type
= UBRK_WORD
;
42 break_type
= UBRK_LINE
;
45 NOTREACHED() << "invalid break_type_";
48 iter_
= ubrk_open(break_type
, NULL
,
49 string_
.data(), static_cast<int32_t>(string_
.size()),
51 if (U_FAILURE(status
)) {
52 NOTREACHED() << "ubrk_open failed";
55 // Move the iterator to the beginning of the string.
56 ubrk_first(static_cast<UBreakIterator
*>(iter_
));
60 bool BreakIterator::Advance() {
64 switch (break_type_
) {
68 pos
= ubrk_next(static_cast<UBreakIterator
*>(iter_
));
69 if (pos
== UBRK_DONE
) {
73 pos_
= static_cast<size_t>(pos
);
77 pos
= ubrk_next(static_cast<UBreakIterator
*>(iter_
));
80 pos_
= static_cast<size_t>(pos
);
81 status
= ubrk_getRuleStatus(static_cast<UBreakIterator
*>(iter_
));
82 } while (status
>= UBRK_LINE_SOFT
&& status
< UBRK_LINE_SOFT_LIMIT
);
83 if (pos
== UBRK_DONE
&& prev_
== pos_
) {
89 NOTREACHED() << "invalid break_type_";
94 bool BreakIterator::IsWord() const {
95 int32_t status
= ubrk_getRuleStatus(static_cast<UBreakIterator
*>(iter_
));
96 return (break_type_
== BREAK_WORD
&& status
!= UBRK_WORD_NONE
);
99 bool BreakIterator::IsEndOfWord(size_t position
) const {
100 if (break_type_
!= BREAK_WORD
)
103 UBreakIterator
* iter
= static_cast<UBreakIterator
*>(iter_
);
104 UBool boundary
= ubrk_isBoundary(iter
, static_cast<int32_t>(position
));
105 int32_t status
= ubrk_getRuleStatus(iter
);
106 return (!!boundary
&& status
!= UBRK_WORD_NONE
);
109 bool BreakIterator::IsStartOfWord(size_t position
) const {
110 if (break_type_
!= BREAK_WORD
)
113 UBreakIterator
* iter
= static_cast<UBreakIterator
*>(iter_
);
114 UBool boundary
= ubrk_isBoundary(iter
, static_cast<int32_t>(position
));
116 int32_t next_status
= ubrk_getRuleStatus(iter
);
117 return (!!boundary
&& next_status
!= UBRK_WORD_NONE
);
120 string16
BreakIterator::GetString() const {
121 DCHECK(prev_
!= npos
&& pos_
!= npos
);
122 return string_
.substr(prev_
, pos_
- prev_
);