1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This implementation doesn't use ICU. The ICU macros are oriented towards
6 // character-at-a-time processing, whereas byte-at-a-time processing is easier
7 // with streaming input.
9 #include "base/i18n/streaming_utf8_validator.h"
11 #include "base/i18n/utf8_validator_tables.h"
12 #include "base/logging.h"
17 uint8
StateTableLookup(uint8 offset
) {
18 // Skip the bounds check on non-debug builds so that it isn't necessary to set
19 // LOGGING_IS_OFFICIAL_BUILD just to do a performance test.
20 if (logging::DEBUG_MODE
)
21 DCHECK_LT(offset
, internal::kUtf8ValidatorTablesSize
);
22 return internal::kUtf8ValidatorTables
[offset
];
27 StreamingUtf8Validator::State
StreamingUtf8Validator::AddBytes(const char* data
,
29 // Copy |state_| into a local variable so that the compiler doesn't have to be
30 // careful of aliasing.
32 for (const char* p
= data
; p
!= data
+ size
; ++p
) {
33 if ((*p
& 0x80) == 0) {
36 state
= internal::I18N_UTF8_VALIDATOR_INVALID_INDEX
;
39 const uint8 shift_amount
= StateTableLookup(state
);
40 const uint8 shifted_char
= (*p
& 0x7F) >> shift_amount
;
41 state
= StateTableLookup(state
+ shifted_char
+ 1);
42 // State may be INVALID here, but this code is optimised for the case of
43 // valid UTF-8 and it is more efficient (by about 2%) to not attempt an
44 // early loop exit unless we hit an ASCII character.
47 return state
== 0 ? VALID_ENDPOINT
48 : state
== internal::I18N_UTF8_VALIDATOR_INVALID_INDEX
53 void StreamingUtf8Validator::Reset() {
57 bool StreamingUtf8Validator::Validate(const std::string
& string
) {
58 return StreamingUtf8Validator().AddBytes(string
.data(), string
.size()) ==