1 /////////////////////////////////////////////////////////////////////////
2 // $Id: aes.cc,v 1.4 2008/09/25 19:19:40 sshwarts Exp $
3 /////////////////////////////////////////////////////////////////////////
5 // Copyright (c) 2008 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 /////////////////////////////////////////////////////////////////////////
24 #define NEED_CPU_REG_SHORTCUTS 1
27 #define LOG_THIS BX_CPU_THIS_PTR
32 // XMM - Byte Representation of a 128-bit AES State
36 // 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
37 // --+-+-+-+-+-+-+-+-+-+-+-+-+-+-+--
38 // P O N M L K J I H G F E D C B A
41 // XMM - Matrix Representation of a 128-bit AES State
43 // | A E I M | | S(0,0) S(0,1) S(0,2) S(0,3) | | S(0) S(4) S(8) S(C) |
44 // | B F J N | = | S(1,0) S(1,1) S(1,2) S(1,3) | = | S(1) S(5) S(9) S(D) |
45 // | C G K O | | S(2,0) S(2,1) S(2,2) S(2,3) | | S(2) S(6) S(A) S(E) |
46 // | D H L P | | S(3,0) S(3,1) S(2,3) S(3,3) | | S(3) S(7) S(B) S(F) |
50 // AES ShiftRows transformation
52 // | A E I M | | A E I M |
53 // | B F J N | => | F J N B |
54 // | C G K O | | K O C G |
55 // | D H L P | | P D H L |
58 BX_CPP_INLINE
void AES_ShiftRows(BxPackedXmmRegister
&state
)
60 BxPackedXmmRegister tmp
= state
;
62 state
.xmmubyte(0x0) = tmp
.xmmubyte(0x0); // A => A
63 state
.xmmubyte(0x1) = tmp
.xmmubyte(0x5);
64 state
.xmmubyte(0x2) = tmp
.xmmubyte(0xA);
65 state
.xmmubyte(0x3) = tmp
.xmmubyte(0xF);
66 state
.xmmubyte(0x4) = tmp
.xmmubyte(0x4); // E => E
67 state
.xmmubyte(0x5) = tmp
.xmmubyte(0x9);
68 state
.xmmubyte(0x6) = tmp
.xmmubyte(0xE);
69 state
.xmmubyte(0x7) = tmp
.xmmubyte(0x3);
70 state
.xmmubyte(0x8) = tmp
.xmmubyte(0x8); // I => I
71 state
.xmmubyte(0x9) = tmp
.xmmubyte(0xD);
72 state
.xmmubyte(0xA) = tmp
.xmmubyte(0x2);
73 state
.xmmubyte(0xB) = tmp
.xmmubyte(0x7);
74 state
.xmmubyte(0xC) = tmp
.xmmubyte(0xC); // M => M
75 state
.xmmubyte(0xD) = tmp
.xmmubyte(0x1);
76 state
.xmmubyte(0xE) = tmp
.xmmubyte(0x6);
77 state
.xmmubyte(0xF) = tmp
.xmmubyte(0xB);
81 // AES InverseShiftRows transformation
83 // | A E I M | | A E I M |
84 // | B F J N | => | N B F J |
85 // | C G K O | | K O C G |
86 // | D H L P | | H L P D |
89 BX_CPP_INLINE
void AES_InverseShiftRows(BxPackedXmmRegister
&state
)
91 BxPackedXmmRegister tmp
= state
;
93 state
.xmmubyte(0x0) = tmp
.xmmubyte(0x0); // A => A
94 state
.xmmubyte(0x1) = tmp
.xmmubyte(0xD);
95 state
.xmmubyte(0x2) = tmp
.xmmubyte(0xA);
96 state
.xmmubyte(0x3) = tmp
.xmmubyte(0x7);
97 state
.xmmubyte(0x4) = tmp
.xmmubyte(0x4); // E => E
98 state
.xmmubyte(0x5) = tmp
.xmmubyte(0x1);
99 state
.xmmubyte(0x6) = tmp
.xmmubyte(0xE);
100 state
.xmmubyte(0x7) = tmp
.xmmubyte(0xB);
101 state
.xmmubyte(0x8) = tmp
.xmmubyte(0x8); // I => I
102 state
.xmmubyte(0x9) = tmp
.xmmubyte(0x5);
103 state
.xmmubyte(0xA) = tmp
.xmmubyte(0x2);
104 state
.xmmubyte(0xB) = tmp
.xmmubyte(0xF);
105 state
.xmmubyte(0xC) = tmp
.xmmubyte(0xC); // M => M
106 state
.xmmubyte(0xD) = tmp
.xmmubyte(0x9);
107 state
.xmmubyte(0xE) = tmp
.xmmubyte(0x6);
108 state
.xmmubyte(0xF) = tmp
.xmmubyte(0x3);
111 static Bit8u sbox_transformation
[256] = {
112 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
113 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
114 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
115 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
116 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
117 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
118 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
119 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
120 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
121 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
122 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
123 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
124 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
125 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
126 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
127 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
128 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
129 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
130 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
131 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
132 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
133 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
134 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
135 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
136 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
137 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
138 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
139 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
140 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
141 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
142 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
143 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
146 static Bit8u inverse_sbox_transformation
[256] = {
147 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
148 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
149 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
150 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
151 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
152 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
153 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
154 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
155 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
156 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
157 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
158 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
159 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
160 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
161 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
162 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
163 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
164 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
165 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
166 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
167 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
168 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
169 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
170 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
171 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
172 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
173 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
174 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
175 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
176 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
177 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
178 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
181 BX_CPP_INLINE
void AES_SubstituteBytes(BxPackedXmmRegister
&state
)
183 for (int i
=0; i
<16; i
++)
184 state
.xmmubyte(i
) = sbox_transformation
[state
.xmmubyte(i
)];
187 BX_CPP_INLINE
void AES_InverseSubstituteBytes(BxPackedXmmRegister
&state
)
189 for (int i
=0; i
<16; i
++)
190 state
.xmmubyte(i
) = inverse_sbox_transformation
[state
.xmmubyte(i
)];
194 * Galois Field multiplication of a by b, modulo m.
195 * Just like arithmetic multiplication, except that additions and
196 * subtractions are replaced by XOR.
197 * The code was taken from: http://www.darkside.com.au/ice/index.html
200 BX_CPP_INLINE
unsigned gf_mul(unsigned a
, unsigned b
)
202 unsigned res
= 0, m
= 0x11b;
218 #define AES_STATE(s,a,b) (s.xmmubyte((b)*4+(a)))
220 static void AES_MixColumns(BxPackedXmmRegister
&state
)
222 BxPackedXmmRegister tmp
= state
;
224 for(int j
=0; j
<4; j
++) {
225 AES_STATE(state
, 0, j
) = gf_mul(0x2, AES_STATE(tmp
, 0, j
)) ^
226 gf_mul(0x3, AES_STATE(tmp
, 1, j
)) ^
227 AES_STATE(tmp
, 2, j
) ^
228 AES_STATE(tmp
, 3, j
);
230 AES_STATE(state
, 1, j
) = AES_STATE(tmp
, 0, j
) ^
231 gf_mul(0x2, AES_STATE(tmp
, 1, j
)) ^
232 gf_mul(0x3, AES_STATE(tmp
, 2, j
)) ^
233 AES_STATE(tmp
, 3, j
);
235 AES_STATE(state
, 2, j
) = AES_STATE(tmp
, 0, j
) ^
236 AES_STATE(tmp
, 1, j
) ^
237 gf_mul(0x2, AES_STATE(tmp
, 2, j
)) ^
238 gf_mul(0x3, AES_STATE(tmp
, 3, j
));
240 AES_STATE(state
, 3, j
) = gf_mul(0x3, AES_STATE(tmp
, 0, j
)) ^
241 AES_STATE(tmp
, 1, j
) ^
242 AES_STATE(tmp
, 2, j
) ^
243 gf_mul(0x2, AES_STATE(tmp
, 3, j
));
247 static void AES_InverseMixColumns(BxPackedXmmRegister
&state
)
249 BxPackedXmmRegister tmp
= state
;
251 for(int j
=0; j
<4; j
++) {
252 AES_STATE(state
, 0, j
) = gf_mul(0xE, AES_STATE(tmp
, 0, j
)) ^
253 gf_mul(0xB, AES_STATE(tmp
, 1, j
)) ^
254 gf_mul(0xD, AES_STATE(tmp
, 2, j
)) ^
255 gf_mul(0x9, AES_STATE(tmp
, 3, j
));
257 AES_STATE(state
, 1, j
) = gf_mul(0x9, AES_STATE(tmp
, 0, j
)) ^
258 gf_mul(0xE, AES_STATE(tmp
, 1, j
)) ^
259 gf_mul(0xB, AES_STATE(tmp
, 2, j
)) ^
260 gf_mul(0xD, AES_STATE(tmp
, 3, j
));
262 AES_STATE(state
, 2, j
) = gf_mul(0xD, AES_STATE(tmp
, 0, j
)) ^
263 gf_mul(0x9, AES_STATE(tmp
, 1, j
)) ^
264 gf_mul(0xE, AES_STATE(tmp
, 2, j
)) ^
265 gf_mul(0xB, AES_STATE(tmp
, 3, j
));
267 AES_STATE(state
, 3, j
) = gf_mul(0xB, AES_STATE(tmp
, 0, j
)) ^
268 gf_mul(0xD, AES_STATE(tmp
, 1, j
)) ^
269 gf_mul(0x9, AES_STATE(tmp
, 2, j
)) ^
270 gf_mul(0xE, AES_STATE(tmp
, 3, j
));
274 BX_CPP_INLINE Bit32u
AES_SubWord(Bit32u x
)
276 Bit8u b0
= sbox_transformation
[(x
) & 0xff];
277 Bit8u b1
= sbox_transformation
[(x
>>8) & 0xff];
278 Bit8u b2
= sbox_transformation
[(x
>>16) & 0xff];
279 Bit8u b3
= sbox_transformation
[(x
>>24) & 0xff];
281 return b0
| ((Bit32u
)(b1
) << 8) |
282 ((Bit32u
)(b2
) << 16) | ((Bit32u
)(b3
) << 24);
285 BX_CPP_INLINE Bit32u
AES_RotWord(Bit32u x
)
287 return (x
>> 8) | (x
<< 24);
293 void BX_CPP_AttrRegparmN(1) BX_CPU_C::AESIMC_VdqWdq(bxInstruction_c
*i
)
296 BX_CPU_THIS_PTR
prepareSSE();
298 BxPackedXmmRegister op
;
300 /* op is a register or memory reference */
302 op
= BX_READ_XMM_REG(i
->rm());
305 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
306 /* pointer, segment address pair */
307 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
310 AES_InverseMixColumns(op
);
312 BX_WRITE_XMM_REG(i
->nnn(), op
);
314 BX_INFO(("AESIMC_VdqWdq: required AES support, use --enable-aes option"));
315 exception(BX_UD_EXCEPTION
, 0, 0);
320 void BX_CPP_AttrRegparmN(1) BX_CPU_C::AESENC_VdqWdq(bxInstruction_c
*i
)
323 BX_CPU_THIS_PTR
prepareSSE();
325 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
327 /* op is a register or memory reference */
329 op2
= BX_READ_XMM_REG(i
->rm());
332 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
333 /* pointer, segment address pair */
334 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
338 AES_SubstituteBytes(op1
);
341 op1
.xmm64u(0) ^= op2
.xmm64u(0);
342 op1
.xmm64u(1) ^= op2
.xmm64u(1);
344 BX_WRITE_XMM_REG(i
->nnn(), op1
);
346 BX_INFO(("AESENC_VdqWdq: required AES support, use --enable-aes option"));
347 exception(BX_UD_EXCEPTION
, 0, 0);
352 void BX_CPP_AttrRegparmN(1) BX_CPU_C::AESENCLAST_VdqWdq(bxInstruction_c
*i
)
355 BX_CPU_THIS_PTR
prepareSSE();
357 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
359 /* op is a register or memory reference */
361 op2
= BX_READ_XMM_REG(i
->rm());
364 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
365 /* pointer, segment address pair */
366 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
370 AES_SubstituteBytes(op1
);
372 op1
.xmm64u(0) ^= op2
.xmm64u(0);
373 op1
.xmm64u(1) ^= op2
.xmm64u(1);
375 BX_WRITE_XMM_REG(i
->nnn(), op1
);
377 BX_INFO(("AESENCLAST_VdqWdq: required AES support, use --enable-aes option"));
378 exception(BX_UD_EXCEPTION
, 0, 0);
383 void BX_CPP_AttrRegparmN(1) BX_CPU_C::AESDEC_VdqWdq(bxInstruction_c
*i
)
386 BX_CPU_THIS_PTR
prepareSSE();
388 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
390 /* op is a register or memory reference */
392 op2
= BX_READ_XMM_REG(i
->rm());
395 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
396 /* pointer, segment address pair */
397 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
400 AES_InverseShiftRows(op1
);
401 AES_InverseSubstituteBytes(op1
);
402 AES_InverseMixColumns(op1
);
404 op1
.xmm64u(0) ^= op2
.xmm64u(0);
405 op1
.xmm64u(1) ^= op2
.xmm64u(1);
407 BX_WRITE_XMM_REG(i
->nnn(), op1
);
409 BX_INFO(("AESDEC_VdqWdq: required AES support, use --enable-aes option"));
410 exception(BX_UD_EXCEPTION
, 0, 0);
415 void BX_CPP_AttrRegparmN(1) BX_CPU_C::AESDECLAST_VdqWdq(bxInstruction_c
*i
)
418 BX_CPU_THIS_PTR
prepareSSE();
420 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
422 /* op is a register or memory reference */
424 op2
= BX_READ_XMM_REG(i
->rm());
427 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
428 /* pointer, segment address pair */
429 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
432 AES_InverseShiftRows(op1
);
433 AES_InverseSubstituteBytes(op1
);
435 op1
.xmm64u(0) ^= op2
.xmm64u(0);
436 op1
.xmm64u(1) ^= op2
.xmm64u(1);
438 BX_WRITE_XMM_REG(i
->nnn(), op1
);
440 BX_INFO(("AESDECLAST_VdqWdq: required AES support, use --enable-aes option"));
441 exception(BX_UD_EXCEPTION
, 0, 0);
446 void BX_CPP_AttrRegparmN(1) BX_CPU_C::AESKEYGENASSIST_VdqWdqIb(bxInstruction_c
*i
)
449 BX_CPU_THIS_PTR
prepareSSE();
451 BxPackedXmmRegister op
, result
;
453 /* op is a register or memory reference */
455 op
= BX_READ_XMM_REG(i
->rm());
458 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
459 /* pointer, segment address pair */
460 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op
);
463 Bit32u rcon32
= i
->Ib();
465 result
.xmm32u(0) = AES_SubWord(op
.xmm32u(1));
466 result
.xmm32u(1) = AES_RotWord(result
.xmm32u(0)) ^ rcon32
;
467 result
.xmm32u(2) = AES_SubWord(op
.xmm32u(3));
468 result
.xmm32u(3) = AES_RotWord(result
.xmm32u(2)) ^ rcon32
;
470 BX_WRITE_XMM_REG(i
->nnn(), result
);
472 BX_INFO(("AESKEYGENASSIST_VdqWdqIb: required AES support, use --enable-aes option"));
473 exception(BX_UD_EXCEPTION
, 0, 0);