Don't preload rarely seen large images
[chromium-blink-merge.git] / third_party / cld / encodings / compact_lang_det / win / cld_utf8statetable.h
blobb2b332879107e5043ad8a1e876e219609f1e9f96
1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UTF8STATETABLE_H_
6 #define ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UTF8STATETABLE_H_
8 #if !defined(CLD_WINDOWS)
10 #include "util/utf8/utf8statetable.h"
12 #else
14 #include "encodings/compact_lang_det/win/cld_basictypes.h"
16 // These four-byte entries compactly encode how many bytes 0..255 to delete
17 // in making a string replacement, how many bytes to add 0..255, and the offset
18 // 0..64k-1 of the replacement string in remap_string.
19 struct RemapEntry {
20 uint8 delete_bytes;
21 uint8 add_bytes;
22 uint16 bytes_offset;
25 // Exit type codes for state tables. All but the first get stuffed into
26 // signed one-byte entries. The first is only generated by executable code.
27 // To distinguish from next-state entries, these must be contiguous and
28 // all <= kExitNone
29 typedef enum {
30 kExitDstSpaceFull = 239,
31 kExitIllegalStructure, // 240
32 kExitOK, // 241
33 kExitReject, // ...
34 kExitReplace1,
35 kExitReplace2,
36 kExitReplace3,
37 kExitReplace21,
38 kExitReplace31,
39 kExitReplace32,
40 kExitReplaceOffset1,
41 kExitReplaceOffset2,
42 kExitReplace1S0,
43 kExitSpecial,
44 kExitDoAgain,
45 kExitRejectAlt,
46 kExitNone // 255
47 } ExitReason;
49 typedef enum {
50 kExitDstSpaceFull_2 = -32769,
51 kExitIllegalStructure_2, // -32768
52 kExitOK_2, // -32767
53 kExitReject_2, // ...
54 kExitReplace1_2,
55 kExitReplace2_2,
56 kExitReplace3_2,
57 kExitReplace21_2,
58 kExitReplace31_2,
59 kExitReplace32_2,
60 kExitReplaceOffset1_2,
61 kExitReplaceOffset2_2,
62 kExitReplace1S0_2,
63 kExitSpecial_2,
64 kExitDoAgain_2,
65 kExitRejectAlt_2,
66 kExitNone_2 // -32753
67 } ExitReason_2;
69 // This struct represents one entire state table. The three initialized byte
70 // areas are state_table, remap_base, and remap_string. state0 and state0_size
71 // give the byte offset and length within state_table of the initial state --
72 // table lookups are expected to start and end in this state, but for
73 // truncated UTF-8 strings, may end in a different state. These allow a quick
74 // test for that condition. entry_shift is 8 for tables subscripted by a full
75 // byte value and 6 for space-optimized tables subscripted by only six
76 // significant bits in UTF-8 continuation bytes.
77 typedef struct {
78 const uint32 state0;
79 const uint32 state0_size;
80 const uint32 total_size;
81 const int max_expand;
82 const int entry_shift;
83 const int bytes_per_entry;
84 const uint32 losub;
85 const uint32 hiadd;
86 const uint8* state_table;
87 const RemapEntry* remap_base;
88 const uint8* remap_string;
89 const uint8* fast_state;
90 } UTF8StateMachineObj;
92 // Near-duplicate declaration for tables with two-byte entries
93 typedef struct {
94 const uint32 state0;
95 const uint32 state0_size;
96 const uint32 total_size;
97 const int max_expand;
98 const int entry_shift;
99 const int bytes_per_entry;
100 const uint32 losub;
101 const uint32 hiadd;
102 const signed short* state_table;
103 const RemapEntry* remap_base;
104 const uint8* remap_string;
105 const uint8* fast_state;
106 } UTF8StateMachineObj_2;
109 typedef UTF8StateMachineObj UTF8PropObj;
110 typedef UTF8StateMachineObj UTF8ScanObj;
111 typedef UTF8StateMachineObj_2 UTF8PropObj_2;
114 // Look up property of one UTF-8 character and advance over it
115 // Return 0 if input length is zero
116 // Return 0 and advance one byte if input is ill-formed
117 uint8 UTF8GenericProperty(const UTF8PropObj* st,
118 const uint8** src,
119 int* srclen);
121 // BigOneByte versions are needed for tables > 240 states, but most
122 // won't need the TwoByte versions.
124 // Look up property of one UTF-8 character and advance over it
125 // Return 0 if input length is zero
126 // Return 0 and advance one byte if input is ill-formed
127 uint8 UTF8GenericPropertyBigOneByte(const UTF8PropObj* st,
128 const uint8** src,
129 int* srclen);
131 // Scan a UTF-8 stringpiece based on a state table.
132 // Always scan complete UTF-8 characters
133 // Set number of bytes scanned. Return reason for exiting
134 int UTF8GenericScan(const UTF8ScanObj* st,
135 const uint8* str,
136 const int len,
137 int* bytes_consumed);
139 #endif
141 #endif // ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UTF8STATETABLE_H_