Re-subimission of https://codereview.chromium.org/1041213003/
[chromium-blink-merge.git] / third_party / brotli / dec / transform.h
blobcd9e1b5b72a2c4aa89175439dc6f5e4f696787a7
1 /* Copyright 2013 Google Inc. All Rights Reserved.
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
15 Transformations on dictionary words.
18 #ifndef BROTLI_DEC_TRANSFORM_H_
19 #define BROTLI_DEC_TRANSFORM_H_
21 #include <stdio.h>
22 #include <ctype.h>
23 #include "./types.h"
25 #if defined(__cplusplus) || defined(c_plusplus)
26 extern "C" {
27 #endif
29 enum WordTransformType {
30 kIdentity = 0,
31 kOmitLast1 = 1,
32 kOmitLast2 = 2,
33 kOmitLast3 = 3,
34 kOmitLast4 = 4,
35 kOmitLast5 = 5,
36 kOmitLast6 = 6,
37 kOmitLast7 = 7,
38 kOmitLast8 = 8,
39 kOmitLast9 = 9,
40 kUppercaseFirst = 10,
41 kUppercaseAll = 11,
42 kOmitFirst1 = 12,
43 kOmitFirst2 = 13,
44 kOmitFirst3 = 14,
45 kOmitFirst4 = 15,
46 kOmitFirst5 = 16,
47 kOmitFirst6 = 17,
48 kOmitFirst7 = 18,
49 kOmitFirst8 = 19,
50 kOmitFirst9 = 20
53 typedef struct {
54 const char* prefix;
55 enum WordTransformType transform;
56 const char* suffix;
57 } Transform;
59 static const Transform kTransforms[] = {
60 { "", kIdentity, "" },
61 { "", kIdentity, " " },
62 { " ", kIdentity, " " },
63 { "", kOmitFirst1, "" },
64 { "", kUppercaseFirst, " " },
65 { "", kIdentity, " the " },
66 { " ", kIdentity, "" },
67 { "s ", kIdentity, " " },
68 { "", kIdentity, " of " },
69 { "", kUppercaseFirst, "" },
70 { "", kIdentity, " and " },
71 { "", kOmitFirst2, "" },
72 { "", kOmitLast1, "" },
73 { ", ", kIdentity, " " },
74 { "", kIdentity, ", " },
75 { " ", kUppercaseFirst, " " },
76 { "", kIdentity, " in " },
77 { "", kIdentity, " to " },
78 { "e ", kIdentity, " " },
79 { "", kIdentity, "\"" },
80 { "", kIdentity, "." },
81 { "", kIdentity, "\">" },
82 { "", kIdentity, "\n" },
83 { "", kOmitLast3, "" },
84 { "", kIdentity, "]" },
85 { "", kIdentity, " for " },
86 { "", kOmitFirst3, "" },
87 { "", kOmitLast2, "" },
88 { "", kIdentity, " a " },
89 { "", kIdentity, " that " },
90 { " ", kUppercaseFirst, "" },
91 { "", kIdentity, ". " },
92 { ".", kIdentity, "" },
93 { " ", kIdentity, ", " },
94 { "", kOmitFirst4, "" },
95 { "", kIdentity, " with " },
96 { "", kIdentity, "'" },
97 { "", kIdentity, " from " },
98 { "", kIdentity, " by " },
99 { "", kOmitFirst5, "" },
100 { "", kOmitFirst6, "" },
101 { " the ", kIdentity, "" },
102 { "", kOmitLast4, "" },
103 { "", kIdentity, ". The " },
104 { "", kUppercaseAll, "" },
105 { "", kIdentity, " on " },
106 { "", kIdentity, " as " },
107 { "", kIdentity, " is " },
108 { "", kOmitLast7, "" },
109 { "", kOmitLast1, "ing " },
110 { "", kIdentity, "\n\t" },
111 { "", kIdentity, ":" },
112 { " ", kIdentity, ". " },
113 { "", kIdentity, "ed " },
114 { "", kOmitFirst9, "" },
115 { "", kOmitFirst7, "" },
116 { "", kOmitLast6, "" },
117 { "", kIdentity, "(" },
118 { "", kUppercaseFirst, ", " },
119 { "", kOmitLast8, "" },
120 { "", kIdentity, " at " },
121 { "", kIdentity, "ly " },
122 { " the ", kIdentity, " of " },
123 { "", kOmitLast5, "" },
124 { "", kOmitLast9, "" },
125 { " ", kUppercaseFirst, ", " },
126 { "", kUppercaseFirst, "\"" },
127 { ".", kIdentity, "(" },
128 { "", kUppercaseAll, " " },
129 { "", kUppercaseFirst, "\">" },
130 { "", kIdentity, "=\"" },
131 { " ", kIdentity, "." },
132 { ".com/", kIdentity, "" },
133 { " the ", kIdentity, " of the " },
134 { "", kUppercaseFirst, "'" },
135 { "", kIdentity, ". This " },
136 { "", kIdentity, "," },
137 { ".", kIdentity, " " },
138 { "", kUppercaseFirst, "(" },
139 { "", kUppercaseFirst, "." },
140 { "", kIdentity, " not " },
141 { " ", kIdentity, "=\"" },
142 { "", kIdentity, "er " },
143 { " ", kUppercaseAll, " " },
144 { "", kIdentity, "al " },
145 { " ", kUppercaseAll, "" },
146 { "", kIdentity, "='" },
147 { "", kUppercaseAll, "\"" },
148 { "", kUppercaseFirst, ". " },
149 { " ", kIdentity, "(" },
150 { "", kIdentity, "ful " },
151 { " ", kUppercaseFirst, ". " },
152 { "", kIdentity, "ive " },
153 { "", kIdentity, "less " },
154 { "", kUppercaseAll, "'" },
155 { "", kIdentity, "est " },
156 { " ", kUppercaseFirst, "." },
157 { "", kUppercaseAll, "\">" },
158 { " ", kIdentity, "='" },
159 { "", kUppercaseFirst, "," },
160 { "", kIdentity, "ize " },
161 { "", kUppercaseAll, "." },
162 { "\xc2\xa0", kIdentity, "" },
163 { " ", kIdentity, "," },
164 { "", kUppercaseFirst, "=\"" },
165 { "", kUppercaseAll, "=\"" },
166 { "", kIdentity, "ous " },
167 { "", kUppercaseAll, ", " },
168 { "", kUppercaseFirst, "='" },
169 { " ", kUppercaseFirst, "," },
170 { " ", kUppercaseAll, "=\"" },
171 { " ", kUppercaseAll, ", " },
172 { "", kUppercaseAll, "," },
173 { "", kUppercaseAll, "(" },
174 { "", kUppercaseAll, ". " },
175 { " ", kUppercaseAll, "." },
176 { "", kUppercaseAll, "='" },
177 { " ", kUppercaseAll, ". " },
178 { " ", kUppercaseFirst, "=\"" },
179 { " ", kUppercaseAll, "='" },
180 { " ", kUppercaseFirst, "='" },
183 static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
185 static int ToUpperCase(uint8_t *p) {
186 if (p[0] < 0xc0) {
187 if (p[0] >= 'a' && p[0] <= 'z') {
188 p[0] ^= 32;
190 return 1;
192 /* An overly simplified uppercasing model for utf-8. */
193 if (p[0] < 0xe0) {
194 p[1] ^= 32;
195 return 2;
197 /* An arbitrary transform for three byte characters. */
198 p[2] ^= 5;
199 return 3;
202 static BROTLI_INLINE int TransformDictionaryWord(
203 uint8_t* dst, const uint8_t* word, int len, int transform) {
204 const char* prefix = kTransforms[transform].prefix;
205 const char* suffix = kTransforms[transform].suffix;
206 const int t = kTransforms[transform].transform;
207 int skip = t < kOmitFirst1 ? 0 : t - (kOmitFirst1 - 1);
208 int idx = 0;
209 int i = 0;
210 uint8_t* uppercase;
211 if (skip > len) {
212 skip = len;
214 while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
215 word += skip;
216 len -= skip;
217 if (t <= kOmitLast9) {
218 len -= t;
220 while (i < len) { dst[idx++] = word[i++]; }
221 uppercase = &dst[idx - len];
222 if (t == kUppercaseFirst) {
223 ToUpperCase(uppercase);
224 } else if (t == kUppercaseAll) {
225 while (len > 0) {
226 int step = ToUpperCase(uppercase);
227 uppercase += step;
228 len -= step;
231 while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
232 return idx;
235 #if defined(__cplusplus) || defined(c_plusplus)
236 } /* extern "C" */
237 #endif
239 #endif /* BROTLI_DEC_TRANSFORM_H_ */