dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / lib / libcrypto / aes / aes_x86core.c
blobc604fa876f456db19a871219e6cb499611a5f78c
1 /* $OpenBSD: aes_x86core.c,v 1.8 2015/02/10 09:46:30 miod Exp $ */
2 /**
3 * rijndael-alg-fst.c
5 * @version 3.0 (December 2000)
7 * Optimised ANSI C code for the Rijndael cipher (now AES)
9 * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10 * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11 * @author Paulo Barreto <paulo.barreto@terra.com.br>
13 * This code is hereby placed in the public domain.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * This is experimental x86[_64] derivative. It assumes little-endian
30 * byte order and expects CPU to sustain unaligned memory references.
31 * It is used as playground for cache-time attack mitigations and
32 * serves as reference C implementation for x86[_64] assembler.
34 * <appro@fy.chalmers.se>
38 #ifndef AES_DEBUG
39 # ifndef NDEBUG
40 # define NDEBUG
41 # endif
42 #endif
44 #include <stdlib.h>
45 #include <openssl/aes.h>
46 #include "aes_locl.h"
49 * These two parameters control which table, 256-byte or 2KB, is
50 * referenced in outer and respectively inner rounds.
52 #define AES_COMPACT_IN_OUTER_ROUNDS
53 #ifdef AES_COMPACT_IN_OUTER_ROUNDS
54 /* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
55 * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
56 * by factor of ~2. */
57 # undef AES_COMPACT_IN_INNER_ROUNDS
58 #endif
60 #if 1
61 static void
62 prefetch256(const void *table)
64 volatile unsigned long *t = (void *)table, ret;
65 unsigned long sum;
66 int i;
68 /* 32 is common least cache-line size */
69 for (sum = 0, i = 0; i < 256/sizeof(t[0]); i += 32 / sizeof(t[0]))
70 sum ^= t[i];
72 ret = sum;
74 #else
75 # define prefetch256(t)
76 #endif
78 #undef GETU32
79 #define GETU32(p) (*((u32*)(p)))
81 #if defined(_LP64)
82 typedef unsigned long u64;
83 #define U64(C) C##UL
84 #else
85 typedef unsigned long long u64;
86 #define U64(C) C##ULL
87 #endif
89 #undef ROTATE
90 #if defined(__GNUC__) && __GNUC__>=2
91 # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
92 # define ROTATE(a,n) ({ unsigned int ret; \
93 asm ( \
94 "roll %1,%0" \
95 : "=r"(ret) \
96 : "I"(n), "0"(a) \
97 : "cc"); \
98 ret; \
100 # endif
101 #endif
103 Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
104 Te0[x] = S [x].[02, 01, 01, 03];
105 Te1[x] = S [x].[03, 02, 01, 01];
106 Te2[x] = S [x].[01, 03, 02, 01];
107 Te3[x] = S [x].[01, 01, 03, 02];
109 #define Te0 (u32)((u64*)((u8*)Te+0))
110 #define Te1 (u32)((u64*)((u8*)Te+3))
111 #define Te2 (u32)((u64*)((u8*)Te+2))
112 #define Te3 (u32)((u64*)((u8*)Te+1))
114 Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
115 Td0[x] = Si[x].[0e, 09, 0d, 0b];
116 Td1[x] = Si[x].[0b, 0e, 09, 0d];
117 Td2[x] = Si[x].[0d, 0b, 0e, 09];
118 Td3[x] = Si[x].[09, 0d, 0b, 0e];
119 Td4[x] = Si[x].[01];
121 #define Td0 (u32)((u64*)((u8*)Td+0))
122 #define Td1 (u32)((u64*)((u8*)Td+3))
123 #define Td2 (u32)((u64*)((u8*)Td+2))
124 #define Td3 (u32)((u64*)((u8*)Td+1))
126 static const u64 Te[256] = {
127 U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
128 U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
129 U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
130 U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
131 U64(0x5030306050303060), U64(0x0301010203010102),
132 U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
133 U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
134 U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
135 U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
136 U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
137 U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
138 U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
139 U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
140 U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
141 U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
142 U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
143 U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
144 U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
145 U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
146 U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
147 U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
148 U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
149 U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
150 U64(0x5331316253313162), U64(0x3f15152a3f15152a),
151 U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
152 U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
153 U64(0x2818183028181830), U64(0xa1969637a1969637),
154 U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
155 U64(0x0907070e0907070e), U64(0x3612122436121224),
156 U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
157 U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
158 U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
159 U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
160 U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
161 U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
162 U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
163 U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
164 U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
165 U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
166 U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
167 U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
168 U64(0x0000000000000000), U64(0x2cededc12cededc1),
169 U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
170 U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
171 U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
172 U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
173 U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
174 U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
175 U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
176 U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
177 U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
178 U64(0x5533336655333366), U64(0x9485851194858511),
179 U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
180 U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
181 U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
182 U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
183 U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
184 U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
185 U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
186 U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
187 U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
188 U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
189 U64(0x3010102030101020), U64(0x1affffe51affffe5),
190 U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
191 U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
192 U64(0x3513132635131326), U64(0x2fececc32fececc3),
193 U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
194 U64(0xcc444488cc444488), U64(0x3917172e3917172e),
195 U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
196 U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
197 U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
198 U64(0x2b1919322b191932), U64(0x957373e6957373e6),
199 U64(0xa06060c0a06060c0), U64(0x9881811998818119),
200 U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
201 U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
202 U64(0xab90903bab90903b), U64(0x8388880b8388880b),
203 U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
204 U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
205 U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
206 U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
207 U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
208 U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
209 U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
210 U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
211 U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
212 U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
213 U64(0xa8919139a8919139), U64(0xa4959531a4959531),
214 U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
215 U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
216 U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
217 U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
218 U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
219 U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
220 U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
221 U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
222 U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
223 U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
224 U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
225 U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
226 U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
227 U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
228 U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
229 U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
230 U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
231 U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
232 U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
233 U64(0xd8484890d8484890), U64(0x0503030605030306),
234 U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
235 U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
236 U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
237 U64(0x9186861791868617), U64(0x58c1c19958c1c199),
238 U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
239 U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
240 U64(0xb398982bb398982b), U64(0x3311112233111122),
241 U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
242 U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
243 U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
244 U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
245 U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
246 U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
247 U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
248 U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
249 U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
250 U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
251 U64(0xc3414182c3414182), U64(0xb0999929b0999929),
252 U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
253 U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
254 U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
257 static const u8 Te4[256] = {
258 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
259 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
260 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
261 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
262 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
263 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
264 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
265 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
266 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
267 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
268 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
269 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
270 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
271 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
272 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
273 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
274 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
275 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
276 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
277 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
278 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
279 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
280 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
281 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
282 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
283 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
284 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
285 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
286 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
287 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
288 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
289 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
292 static const u64 Td[256] = {
293 U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
294 U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
295 U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
296 U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
297 U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
298 U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
299 U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
300 U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
301 U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
302 U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
303 U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
304 U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
305 U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
306 U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
307 U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
308 U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
309 U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
310 U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
311 U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
312 U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
313 U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
314 U64(0x6033519760335197), U64(0x457f5362457f5362),
315 U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
316 U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
317 U64(0x5868487058684870), U64(0x19fd458f19fd458f),
318 U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
319 U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
320 U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
321 U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
322 U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
323 U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
324 U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
325 U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
326 U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
327 U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
328 U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
329 U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
330 U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
331 U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
332 U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
333 U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
334 U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
335 U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
336 U64(0x6fd406046fd40604), U64(0xff155060ff155060),
337 U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
338 U64(0xcc434089cc434089), U64(0x779ed967779ed967),
339 U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
340 U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
341 U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
342 U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
343 U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
344 U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
345 U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
346 U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
347 U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
348 U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
349 U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
350 U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
351 U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
352 U64(0x694b775a694b775a), U64(0x161a121c161a121c),
353 U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
354 U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
355 U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
356 U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
357 U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
358 U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
359 U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
360 U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
361 U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
362 U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
363 U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
364 U64(0x4022971340229713), U64(0x2011c6842011c684),
365 U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
366 U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
367 U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
368 U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
369 U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
370 U64(0xfa489411fa489411), U64(0x2264e9472264e947),
371 U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
372 U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
373 U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
374 U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
375 U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
376 U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
377 U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
378 U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
379 U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
380 U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
381 U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
382 U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
383 U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
384 U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
385 U64(0x097826cd097826cd), U64(0xf418596ef418596e),
386 U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
387 U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
388 U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
389 U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
390 U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
391 U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
392 U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
393 U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
394 U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
395 U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
396 U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
397 U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
398 U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
399 U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
400 U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
401 U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
402 U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
403 U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
404 U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
405 U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
406 U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
407 U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
408 U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
409 U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
410 U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
411 U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
412 U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
413 U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
414 U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
415 U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
416 U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
417 U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
418 U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
419 U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
420 U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
422 static const u8 Td4[256] = {
423 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
424 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
425 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
426 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
427 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
428 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
429 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
430 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
431 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
432 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
433 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
434 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
435 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
436 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
437 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
438 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
439 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
440 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
441 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
442 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
443 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
444 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
445 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
446 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
447 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
448 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
449 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
450 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
451 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
452 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
453 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
454 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
457 static const u32 rcon[] = {
458 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
459 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
460 0x0000001bU, 0x00000036U,
461 /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
465 * Expand the cipher key into the encryption key schedule.
468 AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) {
469 u32 *rk;
470 int i = 0;
471 u32 temp;
473 if (!userKey || !key)
474 return -1;
475 if (bits != 128 && bits != 192 && bits != 256)
476 return -2;
478 rk = key->rd_key;
480 if (bits == 128)
481 key->rounds = 10;
482 else if (bits == 192)
483 key->rounds = 12;
484 else
485 key->rounds = 14;
487 rk[0] = GETU32(userKey);
488 rk[1] = GETU32(userKey + 4);
489 rk[2] = GETU32(userKey + 8);
490 rk[3] = GETU32(userKey + 12);
491 if (bits == 128) {
492 while (1) {
493 temp = rk[3];
494 rk[4] = rk[0] ^
495 (Te4[(temp >> 8) & 0xff]) ^
496 (Te4[(temp >> 16) & 0xff] << 8) ^
497 (Te4[(temp >> 24)] << 16) ^
498 (Te4[(temp) & 0xff] << 24) ^
499 rcon[i];
500 rk[5] = rk[1] ^ rk[4];
501 rk[6] = rk[2] ^ rk[5];
502 rk[7] = rk[3] ^ rk[6];
503 if (++i == 10) {
504 return 0;
506 rk += 4;
509 rk[4] = GETU32(userKey + 16);
510 rk[5] = GETU32(userKey + 20);
511 if (bits == 192) {
512 while (1) {
513 temp = rk[5];
514 rk[6] = rk[ 0] ^
515 (Te4[(temp >> 8) & 0xff]) ^
516 (Te4[(temp >> 16) & 0xff] << 8) ^
517 (Te4[(temp >> 24)] << 16) ^
518 (Te4[(temp) & 0xff] << 24) ^
519 rcon[i];
520 rk[7] = rk[1] ^ rk[6];
521 rk[8] = rk[2] ^ rk[7];
522 rk[9] = rk[3] ^ rk[8];
523 if (++i == 8) {
524 return 0;
526 rk[10] = rk[4] ^ rk[9];
527 rk[11] = rk[5] ^ rk[10];
528 rk += 6;
531 rk[6] = GETU32(userKey + 24);
532 rk[7] = GETU32(userKey + 28);
533 if (bits == 256) {
534 while (1) {
535 temp = rk[7];
536 rk[8] = rk[0] ^
537 (Te4[(temp >> 8) & 0xff]) ^
538 (Te4[(temp >> 16) & 0xff] << 8) ^
539 (Te4[(temp >> 24)] << 16) ^
540 (Te4[(temp) & 0xff] << 24) ^
541 rcon[i];
542 rk[9] = rk[1] ^ rk[8];
543 rk[10] = rk[2] ^ rk[9];
544 rk[11] = rk[3] ^ rk[10];
545 if (++i == 7) {
546 return 0;
548 temp = rk[11];
549 rk[12] = rk[4] ^
550 (Te4[(temp) & 0xff]) ^
551 (Te4[(temp >> 8) & 0xff] << 8) ^
552 (Te4[(temp >> 16) & 0xff] << 16) ^
553 (Te4[(temp >> 24)] << 24);
554 rk[13] = rk[5] ^ rk[12];
555 rk[14] = rk[6] ^ rk[13];
556 rk[15] = rk[7] ^ rk[14];
558 rk += 8;
561 return 0;
565 * Expand the cipher key into the decryption key schedule.
568 AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
570 u32 *rk;
571 int i, j, status;
572 u32 temp;
574 /* first, start with an encryption schedule */
575 status = AES_set_encrypt_key(userKey, bits, key);
576 if (status < 0)
577 return status;
579 rk = key->rd_key;
581 /* invert the order of the round keys: */
582 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
583 temp = rk[i];
584 rk[i] = rk[j];
585 rk[j] = temp;
586 temp = rk[i + 1];
587 rk[i + 1] = rk[j + 1];
588 rk[j + 1] = temp;
589 temp = rk[i + 2];
590 rk[i + 2] = rk[j + 2];
591 rk[j + 2] = temp;
592 temp = rk[i + 3];
593 rk[i + 3] = rk[j + 3];
594 rk[j + 3] = temp;
596 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
597 for (i = 1; i < (key->rounds); i++) {
598 rk += 4;
599 #if 1
600 for (j = 0; j < 4; j++) {
601 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
603 tp1 = rk[j];
604 m = tp1 & 0x80808080;
605 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
606 ((m - (m >> 7)) & 0x1b1b1b1b);
607 m = tp2 & 0x80808080;
608 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
609 ((m - (m >> 7)) & 0x1b1b1b1b);
610 m = tp4 & 0x80808080;
611 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
612 ((m - (m >> 7)) & 0x1b1b1b1b);
613 tp9 = tp8 ^ tp1;
614 tpb = tp9 ^ tp2;
615 tpd = tp9 ^ tp4;
616 tpe = tp8 ^ tp4 ^ tp2;
617 #if defined(ROTATE)
618 rk[j] = tpe ^ ROTATE(tpd, 16) ^
619 ROTATE(tp9, 8) ^ ROTATE(tpb, 24);
620 #else
621 rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
622 (tp9 >> 24) ^ (tp9 << 8) ^
623 (tpb >> 8) ^ (tpb << 24);
624 #endif
626 #else
627 rk[0] =
628 Td0[Te2[(rk[0]) & 0xff] & 0xff] ^
629 Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^
630 Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
631 Td3[Te2[(rk[0] >> 24)] & 0xff];
632 rk[1] =
633 Td0[Te2[(rk[1]) & 0xff] & 0xff] ^
634 Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^
635 Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
636 Td3[Te2[(rk[1] >> 24)] & 0xff];
637 rk[2] =
638 Td0[Te2[(rk[2]) & 0xff] & 0xff] ^
639 Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^
640 Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
641 Td3[Te2[(rk[2] >> 24)] & 0xff];
642 rk[3] =
643 Td0[Te2[(rk[3]) & 0xff] & 0xff] ^
644 Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^
645 Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
646 Td3[Te2[(rk[3] >> 24)] & 0xff];
647 #endif
649 return 0;
653 * Encrypt a single block
654 * in and out can overlap
656 void
657 AES_encrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
659 const u32 *rk;
660 u32 s0, s1, s2, s3, t[4];
661 int r;
663 rk = key->rd_key;
666 * map byte array block to cipher state
667 * and add initial round key:
669 s0 = GETU32(in) ^ rk[0];
670 s1 = GETU32(in + 4) ^ rk[1];
671 s2 = GETU32(in + 8) ^ rk[2];
672 s3 = GETU32(in + 12) ^ rk[3];
674 #if defined(AES_COMPACT_IN_OUTER_ROUNDS)
675 prefetch256(Te4);
677 t[0] = Te4[(s0) & 0xff] ^
678 Te4[(s1 >> 8) & 0xff] << 8 ^
679 Te4[(s2 >> 16) & 0xff] << 16 ^
680 Te4[(s3 >> 24)] << 24;
681 t[1] = Te4[(s1) & 0xff] ^
682 Te4[(s2 >> 8) & 0xff] << 8 ^
683 Te4[(s3 >> 16) & 0xff] << 16 ^
684 Te4[(s0 >> 24)] << 24;
685 t[2] = Te4[(s2) & 0xff] ^
686 Te4[(s3 >> 8) & 0xff] << 8 ^
687 Te4[(s0 >> 16) & 0xff] << 16 ^
688 Te4[(s1 >> 24)] << 24;
689 t[3] = Te4[(s3) & 0xff] ^
690 Te4[(s0 >> 8) & 0xff] << 8 ^
691 Te4[(s1 >> 16) & 0xff] << 16 ^
692 Te4[(s2 >> 24)] << 24;
694 /* now do the linear transform using words */
696 int i;
697 u32 r0, r1, r2;
699 for (i = 0; i < 4; i++) {
700 r0 = t[i];
701 r1 = r0 & 0x80808080;
702 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
703 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
704 #if defined(ROTATE)
705 t[i] = r2 ^ ROTATE(r2, 24) ^ ROTATE(r0, 24) ^
706 ROTATE(r0, 16) ^ ROTATE(r0, 8);
707 #else
708 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
709 (r0 << 16) ^ (r0 >> 16) ^ (r0 << 8) ^ (r0 >> 24);
710 #endif
711 t[i] ^= rk[4 + i];
714 #else
715 t[0] = Te0[(s0) & 0xff] ^
716 Te1[(s1 >> 8) & 0xff] ^
717 Te2[(s2 >> 16) & 0xff] ^
718 Te3[(s3 >> 24)] ^
719 rk[4];
720 t[1] = Te0[(s1) & 0xff] ^
721 Te1[(s2 >> 8) & 0xff] ^
722 Te2[(s3 >> 16) & 0xff] ^
723 Te3[(s0 >> 24)] ^
724 rk[5];
725 t[2] = Te0[(s2) & 0xff] ^
726 Te1[(s3 >> 8) & 0xff] ^
727 Te2[(s0 >> 16) & 0xff] ^
728 Te3[(s1 >> 24)] ^
729 rk[6];
730 t[3] = Te0[(s3) & 0xff] ^
731 Te1[(s0 >> 8) & 0xff] ^
732 Te2[(s1 >> 16) & 0xff] ^
733 Te3[(s2 >> 24)] ^
734 rk[7];
735 #endif
736 s0 = t[0];
737 s1 = t[1];
738 s2 = t[2];
739 s3 = t[3];
742 * Nr - 2 full rounds:
744 for (rk += 8, r = key->rounds - 2; r > 0; rk += 4, r--) {
745 #if defined(AES_COMPACT_IN_INNER_ROUNDS)
746 t[0] = Te4[(s0) & 0xff] ^
747 Te4[(s1 >> 8) & 0xff] << 8 ^
748 Te4[(s2 >> 16) & 0xff] << 16 ^
749 Te4[(s3 >> 24)] << 24;
750 t[1] = Te4[(s1) & 0xff] ^
751 Te4[(s2 >> 8) & 0xff] << 8 ^
752 Te4[(s3 >> 16) & 0xff] << 16 ^
753 Te4[(s0 >> 24)] << 24;
754 t[2] = Te4[(s2) & 0xff] ^
755 Te4[(s3 >> 8) & 0xff] << 8 ^
756 Te4[(s0 >> 16) & 0xff] << 16 ^
757 Te4[(s1 >> 24)] << 24;
758 t[3] = Te4[(s3) & 0xff] ^
759 Te4[(s0 >> 8) & 0xff] << 8 ^
760 Te4[(s1 >> 16) & 0xff] << 16 ^
761 Te4[(s2 >> 24)] << 24;
763 /* now do the linear transform using words */
765 int i;
766 u32 r0, r1, r2;
768 for (i = 0; i < 4; i++) {
769 r0 = t[i];
770 r1 = r0 & 0x80808080;
771 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
772 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
773 #if defined(ROTATE)
774 t[i] = r2 ^ ROTATE(r2, 24) ^ ROTATE(r0, 24) ^
775 ROTATE(r0, 16) ^ ROTATE(r0, 8);
776 #else
777 t[i] = r2 ^ ((r2 ^ r0) << 24) ^
778 ((r2 ^ r0) >> 8) ^
779 (r0 << 16) ^ (r0 >> 16) ^
780 (r0 << 8) ^ (r0 >> 24);
781 #endif
782 t[i] ^= rk[i];
785 #else
786 t[0] = Te0[(s0) & 0xff] ^
787 Te1[(s1 >> 8) & 0xff] ^
788 Te2[(s2 >> 16) & 0xff] ^
789 Te3[(s3 >> 24)] ^
790 rk[0];
791 t[1] = Te0[(s1) & 0xff] ^
792 Te1[(s2 >> 8) & 0xff] ^
793 Te2[(s3 >> 16) & 0xff] ^
794 Te3[(s0 >> 24)] ^
795 rk[1];
796 t[2] = Te0[(s2) & 0xff] ^
797 Te1[(s3 >> 8) & 0xff] ^
798 Te2[(s0 >> 16) & 0xff] ^
799 Te3[(s1 >> 24)] ^
800 rk[2];
801 t[3] = Te0[(s3) & 0xff] ^
802 Te1[(s0 >> 8) & 0xff] ^
803 Te2[(s1 >> 16) & 0xff] ^
804 Te3[(s2 >> 24)] ^
805 rk[3];
806 #endif
807 s0 = t[0];
808 s1 = t[1];
809 s2 = t[2];
810 s3 = t[3];
813 * apply last round and
814 * map cipher state to byte array block:
816 #if defined(AES_COMPACT_IN_OUTER_ROUNDS)
817 prefetch256(Te4);
819 *(u32*)(out + 0) =
820 Te4[(s0) & 0xff] ^
821 Te4[(s1 >> 8) & 0xff] << 8 ^
822 Te4[(s2 >> 16) & 0xff] << 16 ^
823 Te4[(s3 >> 24)] << 24 ^
824 rk[0];
825 *(u32*)(out + 4) =
826 Te4[(s1) & 0xff] ^
827 Te4[(s2 >> 8) & 0xff] << 8 ^
828 Te4[(s3 >> 16) & 0xff] << 16 ^
829 Te4[(s0 >> 24)] << 24 ^
830 rk[1];
831 *(u32*)(out + 8) =
832 Te4[(s2) & 0xff] ^
833 Te4[(s3 >> 8) & 0xff] << 8 ^
834 Te4[(s0 >> 16) & 0xff] << 16 ^
835 Te4[(s1 >> 24)] << 24 ^
836 rk[2];
837 *(u32*)(out + 12) =
838 Te4[(s3) & 0xff] ^
839 Te4[(s0 >> 8) & 0xff] << 8 ^
840 Te4[(s1 >> 16) & 0xff] << 16 ^
841 Te4[(s2 >> 24)] << 24 ^
842 rk[3];
843 #else
844 *(u32*)(out + 0) =
845 (Te2[(s0) & 0xff] & 0x000000ffU) ^
846 (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^
847 (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
848 (Te1[(s3 >> 24)] & 0xff000000U) ^
849 rk[0];
850 *(u32*)(out + 4) =
851 (Te2[(s1) & 0xff] & 0x000000ffU) ^
852 (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^
853 (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
854 (Te1[(s0 >> 24)] & 0xff000000U) ^
855 rk[1];
856 *(u32*)(out + 8) =
857 (Te2[(s2) & 0xff] & 0x000000ffU) ^
858 (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^
859 (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
860 (Te1[(s1 >> 24)] & 0xff000000U) ^
861 rk[2];
862 *(u32*)(out + 12) =
863 (Te2[(s3) & 0xff] & 0x000000ffU) ^
864 (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^
865 (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
866 (Te1[(s2 >> 24)] & 0xff000000U) ^
867 rk[3];
868 #endif
872 * Decrypt a single block
873 * in and out can overlap
875 void
876 AES_decrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
878 const u32 *rk;
879 u32 s0, s1, s2, s3, t[4];
880 int r;
882 rk = key->rd_key;
885 * map byte array block to cipher state
886 * and add initial round key:
888 s0 = GETU32(in) ^ rk[0];
889 s1 = GETU32(in + 4) ^ rk[1];
890 s2 = GETU32(in + 8) ^ rk[2];
891 s3 = GETU32(in + 12) ^ rk[3];
893 #if defined(AES_COMPACT_IN_OUTER_ROUNDS)
894 prefetch256(Td4);
896 t[0] = Td4[(s0) & 0xff] ^
897 Td4[(s3 >> 8) & 0xff] << 8 ^
898 Td4[(s2 >> 16) & 0xff] << 16 ^
899 Td4[(s1 >> 24)] << 24;
900 t[1] = Td4[(s1) & 0xff] ^
901 Td4[(s0 >> 8) & 0xff] << 8 ^
902 Td4[(s3 >> 16) & 0xff] << 16 ^
903 Td4[(s2 >> 24)] << 24;
904 t[2] = Td4[(s2) & 0xff] ^
905 Td4[(s1 >> 8) & 0xff] << 8 ^
906 Td4[(s0 >> 16) & 0xff] << 16 ^
907 Td4[(s3 >> 24)] << 24;
908 t[3] = Td4[(s3) & 0xff] ^
909 Td4[(s2 >> 8) & 0xff] << 8 ^
910 Td4[(s1 >> 16) & 0xff] << 16 ^
911 Td4[(s0 >> 24)] << 24;
913 /* now do the linear transform using words */
915 int i;
916 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
918 for (i = 0; i < 4; i++) {
919 tp1 = t[i];
920 m = tp1 & 0x80808080;
921 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
922 ((m - (m >> 7)) & 0x1b1b1b1b);
923 m = tp2 & 0x80808080;
924 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
925 ((m - (m >> 7)) & 0x1b1b1b1b);
926 m = tp4 & 0x80808080;
927 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
928 ((m - (m >> 7)) & 0x1b1b1b1b);
929 tp9 = tp8 ^ tp1;
930 tpb = tp9 ^ tp2;
931 tpd = tp9 ^ tp4;
932 tpe = tp8 ^ tp4 ^ tp2;
933 #if defined(ROTATE)
934 t[i] = tpe ^ ROTATE(tpd, 16) ^
935 ROTATE(tp9, 8) ^ ROTATE(tpb, 24);
936 #else
937 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
938 (tp9 >> 24) ^ (tp9 << 8) ^ (tpb >> 8) ^ (tpb << 24);
939 #endif
940 t[i] ^= rk[4 + i];
943 #else
944 t[0] = Td0[(s0) & 0xff] ^
945 Td1[(s3 >> 8) & 0xff] ^
946 Td2[(s2 >> 16) & 0xff] ^
947 Td3[(s1 >> 24)] ^
948 rk[4];
949 t[1] = Td0[(s1) & 0xff] ^
950 Td1[(s0 >> 8) & 0xff] ^
951 Td2[(s3 >> 16) & 0xff] ^
952 Td3[(s2 >> 24)] ^
953 rk[5];
954 t[2] = Td0[(s2) & 0xff] ^
955 Td1[(s1 >> 8) & 0xff] ^
956 Td2[(s0 >> 16) & 0xff] ^
957 Td3[(s3 >> 24)] ^
958 rk[6];
959 t[3] = Td0[(s3) & 0xff] ^
960 Td1[(s2 >> 8) & 0xff] ^
961 Td2[(s1 >> 16) & 0xff] ^
962 Td3[(s0 >> 24)] ^
963 rk[7];
964 #endif
965 s0 = t[0];
966 s1 = t[1];
967 s2 = t[2];
968 s3 = t[3];
971 * Nr - 2 full rounds:
973 for (rk += 8, r = key->rounds - 2; r > 0; rk += 4, r--) {
974 #if defined(AES_COMPACT_IN_INNER_ROUNDS)
975 t[0] = Td4[(s0) & 0xff] ^
976 Td4[(s3 >> 8) & 0xff] << 8 ^
977 Td4[(s2 >> 16) & 0xff] << 16 ^
978 Td4[(s1 >> 24)] << 24;
979 t[1] = Td4[(s1) & 0xff] ^
980 Td4[(s0 >> 8) & 0xff] << 8 ^
981 Td4[(s3 >> 16) & 0xff] << 16 ^
982 Td4[(s2 >> 24)] << 24;
983 t[2] = Td4[(s2) & 0xff] ^
984 Td4[(s1 >> 8) & 0xff] << 8 ^
985 Td4[(s0 >> 16) & 0xff] << 16 ^
986 Td4[(s3 >> 24)] << 24;
987 t[3] = Td4[(s3) & 0xff] ^
988 Td4[(s2 >> 8) & 0xff] << 8 ^
989 Td4[(s1 >> 16) & 0xff] << 16 ^
990 Td4[(s0 >> 24)] << 24;
992 /* now do the linear transform using words */
994 int i;
995 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
997 for (i = 0; i < 4; i++) {
998 tp1 = t[i];
999 m = tp1 & 0x80808080;
1000 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
1001 ((m - (m >> 7)) & 0x1b1b1b1b);
1002 m = tp2 & 0x80808080;
1003 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
1004 ((m - (m >> 7)) & 0x1b1b1b1b);
1005 m = tp4 & 0x80808080;
1006 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1007 ((m - (m >> 7)) & 0x1b1b1b1b);
1008 tp9 = tp8 ^ tp1;
1009 tpb = tp9 ^ tp2;
1010 tpd = tp9 ^ tp4;
1011 tpe = tp8 ^ tp4 ^ tp2;
1012 #if defined(ROTATE)
1013 t[i] = tpe ^ ROTATE(tpd, 16) ^
1014 ROTATE(tp9, 8) ^ ROTATE(tpb, 24);
1015 #else
1016 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1017 (tp9 >> 24) ^ (tp9 << 8) ^
1018 (tpb >> 8) ^ (tpb << 24);
1019 #endif
1020 t[i] ^= rk[i];
1023 #else
1024 t[0] = Td0[(s0) & 0xff] ^
1025 Td1[(s3 >> 8) & 0xff] ^
1026 Td2[(s2 >> 16) & 0xff] ^
1027 Td3[(s1 >> 24)] ^
1028 rk[0];
1029 t[1] = Td0[(s1) & 0xff] ^
1030 Td1[(s0 >> 8) & 0xff] ^
1031 Td2[(s3 >> 16) & 0xff] ^
1032 Td3[(s2 >> 24)] ^
1033 rk[1];
1034 t[2] = Td0[(s2) & 0xff] ^
1035 Td1[(s1 >> 8) & 0xff] ^
1036 Td2[(s0 >> 16) & 0xff] ^
1037 Td3[(s3 >> 24)] ^
1038 rk[2];
1039 t[3] = Td0[(s3) & 0xff] ^
1040 Td1[(s2 >> 8) & 0xff] ^
1041 Td2[(s1 >> 16) & 0xff] ^
1042 Td3[(s0 >> 24)] ^
1043 rk[3];
1044 #endif
1045 s0 = t[0];
1046 s1 = t[1];
1047 s2 = t[2];
1048 s3 = t[3];
1051 * apply last round and
1052 * map cipher state to byte array block:
1054 prefetch256(Td4);
1056 *(u32*)(out + 0) =
1057 (Td4[(s0) & 0xff]) ^
1058 (Td4[(s3 >> 8) & 0xff] << 8) ^
1059 (Td4[(s2 >> 16) & 0xff] << 16) ^
1060 (Td4[(s1 >> 24)] << 24) ^
1061 rk[0];
1062 *(u32*)(out + 4) =
1063 (Td4[(s1) & 0xff]) ^
1064 (Td4[(s0 >> 8) & 0xff] << 8) ^
1065 (Td4[(s3 >> 16) & 0xff] << 16) ^
1066 (Td4[(s2 >> 24)] << 24) ^
1067 rk[1];
1068 *(u32*)(out + 8) =
1069 (Td4[(s2) & 0xff]) ^
1070 (Td4[(s1 >> 8) & 0xff] << 8) ^
1071 (Td4[(s0 >> 16) & 0xff] << 16) ^
1072 (Td4[(s3 >> 24)] << 24) ^
1073 rk[2];
1074 *(u32*)(out + 12) =
1075 (Td4[(s3) & 0xff]) ^
1076 (Td4[(s2 >> 8) & 0xff] << 8) ^
1077 (Td4[(s1 >> 16) & 0xff] << 16) ^
1078 (Td4[(s0 >> 24)] << 24) ^
1079 rk[3];