1 /* Copyright 2013 Google Inc. All Rights Reserved.
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
16 /* Transformations on dictionary words. */
18 #ifndef BROTLI_DEC_TRANSFORM_H_
19 #define BROTLI_DEC_TRANSFORM_H_
25 #if defined(__cplusplus) || defined(c_plusplus)
29 enum WordTransformType
{
55 enum WordTransformType transform
;
59 static const Transform kTransforms
[] = {
60 { "", kIdentity
, "" },
61 { "", kIdentity
, " " },
62 { " ", kIdentity
, " " },
63 { "", kOmitFirst1
, "" },
64 { "", kUppercaseFirst
, " " },
65 { "", kIdentity
, " the " },
66 { " ", kIdentity
, "" },
67 { "s ", kIdentity
, " " },
68 { "", kIdentity
, " of " },
69 { "", kUppercaseFirst
, "" },
70 { "", kIdentity
, " and " },
71 { "", kOmitFirst2
, "" },
72 { "", kOmitLast1
, "" },
73 { ", ", kIdentity
, " " },
74 { "", kIdentity
, ", " },
75 { " ", kUppercaseFirst
, " " },
76 { "", kIdentity
, " in " },
77 { "", kIdentity
, " to " },
78 { "e ", kIdentity
, " " },
79 { "", kIdentity
, "\"" },
80 { "", kIdentity
, "." },
81 { "", kIdentity
, "\">" },
82 { "", kIdentity
, "\n" },
83 { "", kOmitLast3
, "" },
84 { "", kIdentity
, "]" },
85 { "", kIdentity
, " for " },
86 { "", kOmitFirst3
, "" },
87 { "", kOmitLast2
, "" },
88 { "", kIdentity
, " a " },
89 { "", kIdentity
, " that " },
90 { " ", kUppercaseFirst
, "" },
91 { "", kIdentity
, ". " },
92 { ".", kIdentity
, "" },
93 { " ", kIdentity
, ", " },
94 { "", kOmitFirst4
, "" },
95 { "", kIdentity
, " with " },
96 { "", kIdentity
, "'" },
97 { "", kIdentity
, " from " },
98 { "", kIdentity
, " by " },
99 { "", kOmitFirst5
, "" },
100 { "", kOmitFirst6
, "" },
101 { " the ", kIdentity
, "" },
102 { "", kOmitLast4
, "" },
103 { "", kIdentity
, ". The " },
104 { "", kUppercaseAll
, "" },
105 { "", kIdentity
, " on " },
106 { "", kIdentity
, " as " },
107 { "", kIdentity
, " is " },
108 { "", kOmitLast7
, "" },
109 { "", kOmitLast1
, "ing " },
110 { "", kIdentity
, "\n\t" },
111 { "", kIdentity
, ":" },
112 { " ", kIdentity
, ". " },
113 { "", kIdentity
, "ed " },
114 { "", kOmitFirst9
, "" },
115 { "", kOmitFirst7
, "" },
116 { "", kOmitLast6
, "" },
117 { "", kIdentity
, "(" },
118 { "", kUppercaseFirst
, ", " },
119 { "", kOmitLast8
, "" },
120 { "", kIdentity
, " at " },
121 { "", kIdentity
, "ly " },
122 { " the ", kIdentity
, " of " },
123 { "", kOmitLast5
, "" },
124 { "", kOmitLast9
, "" },
125 { " ", kUppercaseFirst
, ", " },
126 { "", kUppercaseFirst
, "\"" },
127 { ".", kIdentity
, "(" },
128 { "", kUppercaseAll
, " " },
129 { "", kUppercaseFirst
, "\">" },
130 { "", kIdentity
, "=\"" },
131 { " ", kIdentity
, "." },
132 { ".com/", kIdentity
, "" },
133 { " the ", kIdentity
, " of the " },
134 { "", kUppercaseFirst
, "'" },
135 { "", kIdentity
, ". This " },
136 { "", kIdentity
, "," },
137 { ".", kIdentity
, " " },
138 { "", kUppercaseFirst
, "(" },
139 { "", kUppercaseFirst
, "." },
140 { "", kIdentity
, " not " },
141 { " ", kIdentity
, "=\"" },
142 { "", kIdentity
, "er " },
143 { " ", kUppercaseAll
, " " },
144 { "", kIdentity
, "al " },
145 { " ", kUppercaseAll
, "" },
146 { "", kIdentity
, "='" },
147 { "", kUppercaseAll
, "\"" },
148 { "", kUppercaseFirst
, ". " },
149 { " ", kIdentity
, "(" },
150 { "", kIdentity
, "ful " },
151 { " ", kUppercaseFirst
, ". " },
152 { "", kIdentity
, "ive " },
153 { "", kIdentity
, "less " },
154 { "", kUppercaseAll
, "'" },
155 { "", kIdentity
, "est " },
156 { " ", kUppercaseFirst
, "." },
157 { "", kUppercaseAll
, "\">" },
158 { " ", kIdentity
, "='" },
159 { "", kUppercaseFirst
, "," },
160 { "", kIdentity
, "ize " },
161 { "", kUppercaseAll
, "." },
162 { "\xc2\xa0", kIdentity
, "" },
163 { " ", kIdentity
, "," },
164 { "", kUppercaseFirst
, "=\"" },
165 { "", kUppercaseAll
, "=\"" },
166 { "", kIdentity
, "ous " },
167 { "", kUppercaseAll
, ", " },
168 { "", kUppercaseFirst
, "='" },
169 { " ", kUppercaseFirst
, "," },
170 { " ", kUppercaseAll
, "=\"" },
171 { " ", kUppercaseAll
, ", " },
172 { "", kUppercaseAll
, "," },
173 { "", kUppercaseAll
, "(" },
174 { "", kUppercaseAll
, ". " },
175 { " ", kUppercaseAll
, "." },
176 { "", kUppercaseAll
, "='" },
177 { " ", kUppercaseAll
, ". " },
178 { " ", kUppercaseFirst
, "=\"" },
179 { " ", kUppercaseAll
, "='" },
180 { " ", kUppercaseFirst
, "='" },
183 static const int kNumTransforms
= sizeof(kTransforms
) / sizeof(kTransforms
[0]);
185 static int ToUpperCase(uint8_t *p
) {
187 if (p
[0] >= 'a' && p
[0] <= 'z') {
192 /* An overly simplified uppercasing model for utf-8. */
197 /* An arbitrary transform for three byte characters. */
202 static BROTLI_INLINE
int TransformDictionaryWord(
203 uint8_t* dst
, const uint8_t* word
, int len
, int transform
) {
204 const char* prefix
= kTransforms
[transform
].prefix
;
205 const char* suffix
= kTransforms
[transform
].suffix
;
206 const int t
= kTransforms
[transform
].transform
;
207 int skip
= t
< kOmitFirst1
? 0 : t
- (kOmitFirst1
- 1);
214 while (*prefix
) { dst
[idx
++] = (uint8_t)*prefix
++; }
217 if (t
<= kOmitLast9
) {
220 while (i
< len
) { dst
[idx
++] = word
[i
++]; }
221 uppercase
= &dst
[idx
- len
];
222 if (t
== kUppercaseFirst
) {
223 ToUpperCase(uppercase
);
224 } else if (t
== kUppercaseAll
) {
226 int step
= ToUpperCase(uppercase
);
231 while (*suffix
) { dst
[idx
++] = (uint8_t)*suffix
++; }
235 #if defined(__cplusplus) || defined(c_plusplus)
239 #endif /* BROTLI_DEC_TRANSFORM_H_ */