2 ; ---------------------------------------------------------------------------
3 ; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
7 ; The free distribution and use of this software is allowed (with or without
8 ; changes) provided that:
10 ; 1. source code distributions include the above copyright notice, this
11 ; list of conditions and the following disclaimer;
13 ; 2. binary distributions include the above copyright notice, this list
14 ; of conditions and the following disclaimer in their documentation;
16 ; 3. the name of the copyright holder is not used to endorse products
17 ; built using this software without specific written permission.
21 ; This software is provided 'as is' with no explicit or implied warranties
22 ; in respect of its properties, including, but not limited to, correctness
23 ; and/or fitness for purpose.
24 ; ---------------------------------------------------------------------------
27 ; This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h
28 ; and the same define to be set here as well. If AES_V2C is set this file
29 ; requires the C files aeskey.c and aestab.c for support.
31 ; An AES implementation for x86 processors using the YASM (or NASM) assembler.
32 ; This is a full assembler implementation covering encryption, decryption and
33 ; key scheduling. It uses 2k bytes of tables but its encryption and decryption
34 ; performance is very close to that obtained using large tables. Key schedule
35 ; expansion is slower for both encryption and decryption but this is likely to
36 ; be offset by the much smaller load that this version places on the processor
37 ; cache. I acknowledge the contribution made by Daniel Bernstein to aspects of
38 ; the design of the AES round function used here.
40 ; This code provides the standard AES block size (128 bits, 16 bytes) and the
41 ; three standard AES key sizes (128, 192 and 256 bits). It has the same call
42 ; interface as my C implementation. The ebx, esi, edi and ebp registers are
43 ; preserved across calls but eax, ecx and edx and the artihmetic status flags
44 ; are not. Although this is a full assembler implementation, it can be used
45 ; in conjunction with my C code which provides faster key scheduling using
46 ; large tables. In this case aeskey.c should be compiled with ASM_X86_V2C
47 ; defined. It is also important that the defines below match those used in the
48 ; C code. This code uses the VC++ register saving conentions; if it is used
49 ; with another compiler, conventions for using and saving registers may need
50 ; to be checked (and calling conventions). The YASM command line for the VC++
51 ; custom build step is:
53 ; yasm -Xvc -f win32 -D <Z> -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
55 ; For the cryptlib build this is (pcg):
57 ; yasm -Xvc -f win32 -D ASM_X86_V2C -o aescrypt2.obj aes_x86_v2.asm
59 ; where <Z> is ASM_X86_V2 or ASM_X86_V2C. The calling intefaces are:
61 ; AES_RETURN aes_encrypt(const unsigned char in_blk[],
62 ; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
64 ; AES_RETURN aes_decrypt(const unsigned char in_blk[],
65 ; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
67 ; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
68 ; const aes_encrypt_ctx cx[1]);
70 ; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
71 ; const aes_decrypt_ctx cx[1]);
73 ; AES_RETURN aes_encrypt_key(const unsigned char key[],
74 ; unsigned int len, const aes_decrypt_ctx cx[1]);
76 ; AES_RETURN aes_decrypt_key(const unsigned char key[],
77 ; unsigned int len, const aes_decrypt_ctx cx[1]);
79 ; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
80 ; either bits or bytes.
82 ; The DLL interface must use the _stdcall convention in which the number
83 ; of bytes of parameter space is added after an @ to the sutine's name.
84 ; We must also remove our parameters from the stack before return (see
85 ; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
88 ; Adapted for TrueCrypt by the TrueCrypt Foundation:
89 ; - All tables generated at run-time
90 ; - Adapted for 16-bit environment
95 SEGMENT _TEXT
PUBLIC CLASS
=CODE USE16
96 SEGMENT _DATA
PUBLIC CLASS
=DATA USE16
98 GROUP DGROUP _TEXT _DATA
100 extern _aes_dec_tab
; Aestab.c
105 ; The size of the code can be reduced by using functions for the encryption
106 ; and decryption rounds in place of macro expansion
108 %define REDUCE_CODE_SIZE
110 ; Comment in/out the following lines to obtain the desired subroutines. These
111 ; selections MUST match those in the C header file aes.h
113 ; %define AES_128 ; define if AES with 128 bit keys is needed
114 ; %define AES_192 ; define if AES with 192 bit keys is needed
115 %define AES_256
; define if AES with 256 bit keys is needed
116 ; %define AES_VAR ; define if a variable key size is needed
117 %define ENCRYPTION
; define if encryption is needed
118 %define DECRYPTION
; define if decryption is needed
119 ; %define AES_REV_DKS ; define if key decryption schedule is reversed
122 %define ENCRYPTION_KEY_SCHEDULE
; define if encryption key expansion is needed
123 %define DECRYPTION_KEY_SCHEDULE
; define if decryption key expansion is needed
126 ; The encryption key schedule has the following in memory layout where N is the
127 ; number of rounds (10, 12 or 14):
129 ; lo: | input key (round 0) | ; each round is four 32-bit words
130 ; | encryption round 1 |
131 ; | encryption round 2 |
133 ; | encryption round N-1 |
134 ; hi: | encryption round N |
136 ; The decryption key schedule is normally set up so that it has the same
137 ; layout as above by actually reversing the order of the encryption key
138 ; schedule in memory (this happens when AES_REV_DKS is set):
140 ; lo: | decryption round 0 | = | encryption round N |
141 ; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
142 ; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
144 ; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
145 ; hi: | decryption round N | = | input key (round 0) |
147 ; with rounds except the first and last modified using inv_mix_column()
148 ; But if AES_REV_DKS is NOT set the order of keys is left as it is for
149 ; encryption so that it has to be accessed in reverse when used for
150 ; decryption (although the inverse mix column modifications are done)
152 ; lo: | decryption round 0 | = | input key (round 0) |
153 ; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
154 ; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
156 ; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
157 ; hi: | decryption round N | = | encryption round N |
159 ; This layout is faster when the assembler key scheduling provided here
162 ; End of user defines
186 ; These macros implement stack based local variables
196 %ifdef REDUCE_CODE_SIZE
206 ; the DLL has to implement the _stdcall calling interface on return
207 ; In this case we have to take our parameters (3 4-byte pointers)
212 %macro do_name
1-2 parms
223 %macro do_call
1-2 parms
232 %macro do_exit
0-1 parms
240 ; finite field multiplies by {02}, {04} and {08}
242 %define f2
(x
) ((x
<<1)^
(((x
>>7)&1)*0x11b))
243 %define f4
(x
) ((x
<<2)^
(((x
>>6)&1)*0x11b)^
(((x
>>6)&2)*0x11b))
244 %define f8
(x
) ((x
<<3)^
(((x
>>5)&1)*0x11b)^
(((x
>>5)&2)*0x11b)^
(((x
>>5)&4)*0x11b))
246 ; finite field multiplies required in table generation
248 %define f3
(x
) (f2
(x
) ^ x
)
249 %define f9
(x
) (f8
(x
) ^ x
)
250 %define fb
(x
) (f8
(x
) ^ f2
(x
) ^ x
)
251 %define fd
(x
) (f8
(x
) ^ f4
(x
) ^ x
)
252 %define fe
(x
) (f8
(x
) ^ f4
(x
) ^ f2
(x
))
254 %define etab_0
(x
) [_aes_enc_tab
+4+8*x
]
255 %define etab_1
(x
) [_aes_enc_tab
+3+8*x
]
256 %define etab_2
(x
) [_aes_enc_tab
+2+8*x
]
257 %define etab_3
(x
) [_aes_enc_tab
+1+8*x
]
258 %define etab_b
(x
) byte [_aes_enc_tab
+1+8*x
] ; used with movzx for 0x000000xx
259 %define etab_w
(x
) word [_aes_enc_tab
+8*x
] ; used with movzx for 0x0000xx00
261 %define btab_0
(x
) [_aes_enc_tab
+6+8*x
]
262 %define btab_1
(x
) [_aes_enc_tab
+5+8*x
]
263 %define btab_2
(x
) [_aes_enc_tab
+4+8*x
]
264 %define btab_3
(x
) [_aes_enc_tab
+3+8*x
]
266 ; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
267 ; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
275 ; ESI column key[round][2]
276 ; EDI column key[round][3]
281 ; EBP column[0] unkeyed
282 ; EBX column[1] unkeyed
283 ; ESI column[2] keyed
284 ; EDI column[3] keyed
317 ; Basic MOV and XOR Operations for normal rounds
329 ; Basic MOV and XOR Operations for last round
350 %else
; less effective but worth leaving as an option
355 and %4,0x000000ff << 8 * %3
362 and %1,0x000000ff << 8 * %3
367 ; Apply S-Box to the 4 bytes in a 32-bit word and rotate byte positions
369 %ifdef REDUCE_CODE_SIZE
372 movzx ecx,al ; in eax
373 movzx ecx, etab_b
(ecx) ; out eax
374 xor edx,ecx ; scratch ecx,edx
376 movzx ecx, etab_b
(ecx)
381 movzx ecx, etab_b
(ecx)
385 movzx ecx, etab_b
(ecx)
395 movzx ecx,al ; in eax
396 movzx ecx, etab_b
(ecx) ; out eax
397 xor edx,ecx ; scratch ecx,edx
399 movzx ecx, etab_b
(ecx)
404 movzx ecx, etab_b
(ecx)
408 movzx ecx, etab_b
(ecx)
417 ; offsets to parameters
419 in_blk
equ 2 ; input byte array address parameter
420 out_blk
equ 4 ; output byte array address parameter
421 ctx
equ 6 ; AES context structure
422 stk_spc
equ 20 ; stack space
426 ; %define ENCRYPTION_TABLE
428 %ifdef REDUCE_CODE_SIZE
437 rnd_fun nr_xor
, nr_mov
457 rnd_fun nr_xor
, nr_mov
470 %macro enc_last_round
0
477 rnd_fun lr_xor
, lr_mov
488 ; AES Encryption Subroutine
490 do_name _aes_encrypt
,12
501 movzx esi,word [esp+in_blk
+stk_spc
] ; input pointer
507 movzx ebp,word [esp+ctx
+stk_spc
] ; key pointer
508 movzx edi,byte [ebp+4*KS_LENGTH
]
514 ; determine the number of rounds
527 .1: mf_call enc_round
529 .2: mf_call enc_round
531 .3: mf_call enc_round
542 movzx edx,word [esp+out_blk
+stk_spc
]
572 mov [ebp+%1*%2+4],edi
574 mov [ebp+%1*%2+8],ecx
576 mov [ebp+%1*%2+12],edx
582 xor eax,[ebp+%1*%2+16-%2]
583 mov [ebp+%1*%2+16],eax
584 xor eax,[ebp+%1*%2+20-%2]
585 mov [ebp+%1*%2+20],eax
593 mov edx,[ebp+%1*%2+16-%2]
597 mov [ebp+%1*%2+16],eax
598 xor eax,[ebp+%1*%2+20-%2]
599 mov [ebp+%1*%2+20],eax
600 xor eax,[ebp+%1*%2+24-%2]
601 mov [ebp+%1*%2+24],eax
602 xor eax,[ebp+%1*%2+28-%2]
603 mov [ebp+%1*%2+28],eax
608 %assign rc_val f2
(rc_val
)
612 %ifdef ENCRYPTION_KEY_SCHEDULE
616 %ifndef ENCRYPTION_TABLE
617 ; %define ENCRYPTION_TABLE
622 do_name _aes_encrypt_key128
,8
630 mov [ebp+4*KS_LENGTH
],dword 10*16
644 f_key
0,16 ; 11 * 4 = 44 unsigned longs
645 f_key
1,16 ; 4 + 4 * 10 generated = 44
666 %ifndef ENCRYPTION_TABLE
667 ; %define ENCRYPTION_TABLE
672 do_name _aes_encrypt_key192
,8
680 mov [ebp+4*KS_LENGTH
],dword 12 * 16
697 f_key
0,24 ; 13 * 4 = 52 unsigned longs
698 f_key
1,24 ; 6 + 6 * 8 generated = 54
717 %ifndef ENCRYPTION_TABLE
718 ; %define ENCRYPTION_TABLE
723 do_name _aes_encrypt_key256
,8
733 movzx ebp, word [esp+20] ; ks
734 mov [ebp+4*KS_LENGTH
],dword 14 * 16
735 movzx ebx, word [esp+18] ; key
755 f_key
0,32 ; 15 * 4 = 60 unsigned longs
756 f_key
1,32 ; 8 + 8 * 7 generated = 64
774 %ifndef ENCRYPTION_TABLE
775 ; %define ENCRYPTION_TABLE
778 do_name _aes_encrypt_key
,12
804 .1: do_call _aes_encrypt_key128
,8
806 .2: do_call _aes_encrypt_key192
,8
808 .3: do_call _aes_encrypt_key256
,8
815 %ifdef ENCRYPTION_TABLE
817 ; S-box data - 256 entries
821 %define u8
(x
) 0, x
, x
, f3
(x
), f2
(x
), x
, x
, f3
(x
)
824 db u8
(0x63),u8
(0x7c),u8
(0x77),u8
(0x7b),u8
(0xf2),u8
(0x6b),u8
(0x6f),u8
(0xc5)
825 db u8
(0x30),u8
(0x01),u8
(0x67),u8
(0x2b),u8
(0xfe),u8
(0xd7),u8
(0xab),u8
(0x76)
826 db u8
(0xca),u8
(0x82),u8
(0xc9),u8
(0x7d),u8
(0xfa),u8
(0x59),u8
(0x47),u8
(0xf0)
827 db u8
(0xad),u8
(0xd4),u8
(0xa2),u8
(0xaf),u8
(0x9c),u8
(0xa4),u8
(0x72),u8
(0xc0)
828 db u8
(0xb7),u8
(0xfd),u8
(0x93),u8
(0x26),u8
(0x36),u8
(0x3f),u8
(0xf7),u8
(0xcc)
829 db u8
(0x34),u8
(0xa5),u8
(0xe5),u8
(0xf1),u8
(0x71),u8
(0xd8),u8
(0x31),u8
(0x15)
830 db u8
(0x04),u8
(0xc7),u8
(0x23),u8
(0xc3),u8
(0x18),u8
(0x96),u8
(0x05),u8
(0x9a)
831 db u8
(0x07),u8
(0x12),u8
(0x80),u8
(0xe2),u8
(0xeb),u8
(0x27),u8
(0xb2),u8
(0x75)
832 db u8
(0x09),u8
(0x83),u8
(0x2c),u8
(0x1a),u8
(0x1b),u8
(0x6e),u8
(0x5a),u8
(0xa0)
833 db u8
(0x52),u8
(0x3b),u8
(0xd6),u8
(0xb3),u8
(0x29),u8
(0xe3),u8
(0x2f),u8
(0x84)
834 db u8
(0x53),u8
(0xd1),u8
(0x00),u8
(0xed),u8
(0x20),u8
(0xfc),u8
(0xb1),u8
(0x5b)
835 db u8
(0x6a),u8
(0xcb),u8
(0xbe),u8
(0x39),u8
(0x4a),u8
(0x4c),u8
(0x58),u8
(0xcf)
836 db u8
(0xd0),u8
(0xef),u8
(0xaa),u8
(0xfb),u8
(0x43),u8
(0x4d),u8
(0x33),u8
(0x85)
837 db u8
(0x45),u8
(0xf9),u8
(0x02),u8
(0x7f),u8
(0x50),u8
(0x3c),u8
(0x9f),u8
(0xa8)
838 db u8
(0x51),u8
(0xa3),u8
(0x40),u8
(0x8f),u8
(0x92),u8
(0x9d),u8
(0x38),u8
(0xf5)
839 db u8
(0xbc),u8
(0xb6),u8
(0xda),u8
(0x21),u8
(0x10),u8
(0xff),u8
(0xf3),u8
(0xd2)
840 db u8
(0xcd),u8
(0x0c),u8
(0x13),u8
(0xec),u8
(0x5f),u8
(0x97),u8
(0x44),u8
(0x17)
841 db u8
(0xc4),u8
(0xa7),u8
(0x7e),u8
(0x3d),u8
(0x64),u8
(0x5d),u8
(0x19),u8
(0x73)
842 db u8
(0x60),u8
(0x81),u8
(0x4f),u8
(0xdc),u8
(0x22),u8
(0x2a),u8
(0x90),u8
(0x88)
843 db u8
(0x46),u8
(0xee),u8
(0xb8),u8
(0x14),u8
(0xde),u8
(0x5e),u8
(0x0b),u8
(0xdb)
844 db u8
(0xe0),u8
(0x32),u8
(0x3a),u8
(0x0a),u8
(0x49),u8
(0x06),u8
(0x24),u8
(0x5c)
845 db u8
(0xc2),u8
(0xd3),u8
(0xac),u8
(0x62),u8
(0x91),u8
(0x95),u8
(0xe4),u8
(0x79)
846 db u8
(0xe7),u8
(0xc8),u8
(0x37),u8
(0x6d),u8
(0x8d),u8
(0xd5),u8
(0x4e),u8
(0xa9)
847 db u8
(0x6c),u8
(0x56),u8
(0xf4),u8
(0xea),u8
(0x65),u8
(0x7a),u8
(0xae),u8
(0x08)
848 db u8
(0xba),u8
(0x78),u8
(0x25),u8
(0x2e),u8
(0x1c),u8
(0xa6),u8
(0xb4),u8
(0xc6)
849 db u8
(0xe8),u8
(0xdd),u8
(0x74),u8
(0x1f),u8
(0x4b),u8
(0xbd),u8
(0x8b),u8
(0x8a)
850 db u8
(0x70),u8
(0x3e),u8
(0xb5),u8
(0x66),u8
(0x48),u8
(0x03),u8
(0xf6),u8
(0x0e)
851 db u8
(0x61),u8
(0x35),u8
(0x57),u8
(0xb9),u8
(0x86),u8
(0xc1),u8
(0x1d),u8
(0x9e)
852 db u8
(0xe1),u8
(0xf8),u8
(0x98),u8
(0x11),u8
(0x69),u8
(0xd9),u8
(0x8e),u8
(0x94)
853 db u8
(0x9b),u8
(0x1e),u8
(0x87),u8
(0xe9),u8
(0xce),u8
(0x55),u8
(0x28),u8
(0xdf)
854 db u8
(0x8c),u8
(0xa1),u8
(0x89),u8
(0x0d),u8
(0xbf),u8
(0xe6),u8
(0x42),u8
(0x68)
855 db u8
(0x41),u8
(0x99),u8
(0x2d),u8
(0x0f),u8
(0xb0),u8
(0x54),u8
(0xbb),u8
(0x16)
861 ; %define DECRYPTION_TABLE
863 %define dtab_0
(x
) [_aes_dec_tab
+ 8*x
]
864 %define dtab_1
(x
) [_aes_dec_tab
+3+8*x
]
865 %define dtab_2
(x
) [_aes_dec_tab
+2+8*x
]
866 %define dtab_3
(x
) [_aes_dec_tab
+1+8*x
]
867 %define dtab_x
(x
) byte [_aes_dec_tab
+7+8*x
]
897 ; Basic MOV and XOR Operations for normal rounds
909 ; Basic MOV and XOR Operations for last round
928 %ifdef REDUCE_CODE_SIZE
941 irn_fun ni_xor
, ni_mov
965 irn_fun ni_xor
, ni_mov
978 %macro dec_last_round
0
989 irn_fun li_xor
, li_mov
1000 ; AES Decryption Subroutine
1002 do_name _aes_decrypt
,12
1013 ; input four columns and xor in first round key
1015 movzx esi,word [esp+in_blk
+stk_spc
] ; input pointer
1022 movzx ebp, word [esp+ctx
+stk_spc
] ; key pointer
1023 movzx edi,byte[ebp+4*KS_LENGTH
]
1024 %ifndef AES_REV_DKS
; if decryption key schedule is not reversed
1025 lea ebp,[ebp+edi] ; we have to access it from the top down
1027 xor eax,[ebp ] ; key schedule
1032 ; determine the number of rounds
1045 .1: mf_call dec_round
1047 .2: mf_call dec_round
1049 .3: mf_call dec_round
1060 ; move final values to the output array.
1062 movzx ebp,word [esp+out_blk
+stk_spc
]
1069 .5: mov ebp,[esp+16]
1078 %ifdef REDUCE_CODE_SIZE
1081 movzx ecx,dl ; input eax, edx
1082 movzx ecx,etab_b
(ecx) ; output eax
1083 mov eax,dtab_0
(ecx) ; used ecx
1086 movzx ecx,etab_b
(ecx)
1089 movzx ecx,etab_b
(ecx)
1092 movzx ecx,etab_b
(ecx)
1098 %macro inv_mix_col
0
1100 movzx ecx,dl ; input eax, edx
1101 movzx ecx,etab_b
(ecx) ; output eax
1102 mov eax,dtab_0
(ecx) ; used ecx
1105 movzx ecx,etab_b
(ecx)
1108 movzx ecx,etab_b
(ecx)
1111 movzx ecx,etab_b
(ecx)
1118 %ifdef DECRYPTION_KEY_SCHEDULE
1122 %ifndef DECRYPTION_TABLE
1123 ; %define DECRYPTION_TABLE
1126 do_name _aes_decrypt_key128
,8
1132 mov eax,[esp+24] ; context
1133 mov edx,[esp+20] ; key
1136 do_call _aes_encrypt_key128
,8 ; generate expanded encryption key
1138 mov esi,[esp+24] ; pointer to first round key
1139 lea edi,[esi+eax] ; pointer to last round key
1141 ; the inverse mix column transformation
1142 mov edx,[esi-16] ; needs to be applied to all round keys
1143 mf_call inv_mix_col
; except first and last. Hence start by
1144 mov [esi-16],eax ; transforming the four sub-keys in the
1145 mov edx,[esi-12] ; second round key
1147 mov [esi-12],eax ; transformations for subsequent rounds
1148 mov edx,[esi-8] ; can then be made more efficient by
1149 mf_call inv_mix_col
; noting that for three of the four sub-keys
1150 mov [esi-8],eax ; in the encryption round key ek[r]:
1152 mf_call inv_mix_col
; ek[r][n] = ek[r][n-1] ^ ek[r-1][n]
1154 ; where n is 1..3. Hence the corresponding
1155 .0: mov edx,[esi] ; subkeys in the decryption round key dk[r]
1156 mf_call inv_mix_col
; also obey since inv_mix_col is linear in
1157 mov [esi],eax ; GF(256):
1159 mov [esi+4],eax ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n]
1161 mov [esi+8],eax ; So we only need one inverse mix column
1162 xor eax,[esi-4] ; operation (n = 0) for each four word cycle
1163 mov [esi+12],eax ; in the expanded key.
1173 %ifndef DECRYPTION_TABLE
1174 ; %define DECRYPTION_TABLE
1177 do_name _aes_decrypt_key192
,8
1183 mov eax,[esp+24] ; context
1184 mov edx,[esp+20] ; key
1187 do_call _aes_encrypt_key192
,8 ; generate expanded encryption key
1189 mov esi,[esp+24] ; first round key
1190 lea edi,[esi+eax] ; last round key
1191 add esi,48 ; the first 6 words are the key, of
1192 ; which the top 2 words are part of
1193 mov edx,[esi-32] ; the second round key and hence
1194 mf_call inv_mix_col
; need to be modified. After this we
1195 mov [esi-32],eax ; need to do a further six values prior
1196 mov edx,[esi-28] ; to using a more efficient technique
1197 mf_call inv_mix_col
; based on:
1199 ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n]
1201 mf_call inv_mix_col
; for n = 1 .. 5 where the key expansion
1202 mov [esi-24],eax ; cycle is now 6 words long
1219 .0: mov edx,[esi] ; the expanded key is 13 * 4 = 44 32-bit words
1220 mf_call inv_mix_col
; of which 11 * 4 = 44 have to be modified
1221 mov [esi],eax ; using inv_mix_col. We have already done 8
1222 xor eax,[esi-20] ; of these so 36 are left - hence we need
1223 mov [esi+4],eax ; exactly 6 loops of six here
1241 %ifndef DECRYPTION_TABLE
1242 ; %define DECRYPTION_TABLE
1245 do_name _aes_decrypt_key256
,8
1254 movzx eax, word [esp+20] ; ks
1255 movzx edx, word [esp+18] ; key
1258 do_call _aes_encrypt_key256
,4 ; generate expanded encryption key
1260 movzx esi, word [esp+20] ; ks
1264 mov edx,[esi-48] ; the primary key is 8 words, of which
1265 mf_call inv_mix_col
; the top four require modification
1277 mov edx,[esi-32] ; the encryption key expansion cycle is
1278 mf_call inv_mix_col
; now eight words long so we need to
1279 mov [esi-32],eax ; start by doing one complete block
1302 .0: mov edx,[esi] ; we can now speed up the remaining
1303 mf_call inv_mix_col
; rounds by using the technique
1304 mov [esi],eax ; outlined earlier. But note that
1305 xor eax,[esi-28] ; there is one extra inverse mix
1306 mov [esi+4],eax ; column operation as the 256 bit
1307 xor eax,[esi-24] ; key has an extra non-linear step
1308 mov [esi+8],eax ; for the midway element.
1310 mov [esi+12],eax ; the expanded key is 15 * 4 = 60
1311 mov edx,[esi+16] ; 32-bit words of which 52 need to
1312 mf_call inv_mix_col
; be modified. We have already done
1313 mov [esi+16],eax ; 12 so 40 are left - which means
1314 xor eax,[esi-12] ; that we need exactly 5 loops of 8
1330 movzx esi,word [esp+20] ; this reverses the order of the
1331 .1: mov eax,[esi] ; round keys if required
1365 do_name _aes_decrypt_key
,12
1391 .1: do_call _aes_decrypt_key128
,8
1393 .2: do_call _aes_decrypt_key192
,8
1395 .3: do_call _aes_decrypt_key256
,8
1402 %ifdef DECRYPTION_TABLE
1404 ; Inverse S-box data - 256 entries
1408 %define v8
(x
) fe
(x
), f9
(x
), fd
(x
), fb
(x
), fe
(x
), f9
(x
), fd
(x
), x
1411 db v8
(0x52),v8
(0x09),v8
(0x6a),v8
(0xd5),v8
(0x30),v8
(0x36),v8
(0xa5),v8
(0x38)
1412 db v8
(0xbf),v8
(0x40),v8
(0xa3),v8
(0x9e),v8
(0x81),v8
(0xf3),v8
(0xd7),v8
(0xfb)
1413 db v8
(0x7c),v8
(0xe3),v8
(0x39),v8
(0x82),v8
(0x9b),v8
(0x2f),v8
(0xff),v8
(0x87)
1414 db v8
(0x34),v8
(0x8e),v8
(0x43),v8
(0x44),v8
(0xc4),v8
(0xde),v8
(0xe9),v8
(0xcb)
1415 db v8
(0x54),v8
(0x7b),v8
(0x94),v8
(0x32),v8
(0xa6),v8
(0xc2),v8
(0x23),v8
(0x3d)
1416 db v8
(0xee),v8
(0x4c),v8
(0x95),v8
(0x0b),v8
(0x42),v8
(0xfa),v8
(0xc3),v8
(0x4e)
1417 db v8
(0x08),v8
(0x2e),v8
(0xa1),v8
(0x66),v8
(0x28),v8
(0xd9),v8
(0x24),v8
(0xb2)
1418 db v8
(0x76),v8
(0x5b),v8
(0xa2),v8
(0x49),v8
(0x6d),v8
(0x8b),v8
(0xd1),v8
(0x25)
1419 db v8
(0x72),v8
(0xf8),v8
(0xf6),v8
(0x64),v8
(0x86),v8
(0x68),v8
(0x98),v8
(0x16)
1420 db v8
(0xd4),v8
(0xa4),v8
(0x5c),v8
(0xcc),v8
(0x5d),v8
(0x65),v8
(0xb6),v8
(0x92)
1421 db v8
(0x6c),v8
(0x70),v8
(0x48),v8
(0x50),v8
(0xfd),v8
(0xed),v8
(0xb9),v8
(0xda)
1422 db v8
(0x5e),v8
(0x15),v8
(0x46),v8
(0x57),v8
(0xa7),v8
(0x8d),v8
(0x9d),v8
(0x84)
1423 db v8
(0x90),v8
(0xd8),v8
(0xab),v8
(0x00),v8
(0x8c),v8
(0xbc),v8
(0xd3),v8
(0x0a)
1424 db v8
(0xf7),v8
(0xe4),v8
(0x58),v8
(0x05),v8
(0xb8),v8
(0xb3),v8
(0x45),v8
(0x06)
1425 db v8
(0xd0),v8
(0x2c),v8
(0x1e),v8
(0x8f),v8
(0xca),v8
(0x3f),v8
(0x0f),v8
(0x02)
1426 db v8
(0xc1),v8
(0xaf),v8
(0xbd),v8
(0x03),v8
(0x01),v8
(0x13),v8
(0x8a),v8
(0x6b)
1427 db v8
(0x3a),v8
(0x91),v8
(0x11),v8
(0x41),v8
(0x4f),v8
(0x67),v8
(0xdc),v8
(0xea)
1428 db v8
(0x97),v8
(0xf2),v8
(0xcf),v8
(0xce),v8
(0xf0),v8
(0xb4),v8
(0xe6),v8
(0x73)
1429 db v8
(0x96),v8
(0xac),v8
(0x74),v8
(0x22),v8
(0xe7),v8
(0xad),v8
(0x35),v8
(0x85)
1430 db v8
(0xe2),v8
(0xf9),v8
(0x37),v8
(0xe8),v8
(0x1c),v8
(0x75),v8
(0xdf),v8
(0x6e)
1431 db v8
(0x47),v8
(0xf1),v8
(0x1a),v8
(0x71),v8
(0x1d),v8
(0x29),v8
(0xc5),v8
(0x89)
1432 db v8
(0x6f),v8
(0xb7),v8
(0x62),v8
(0x0e),v8
(0xaa),v8
(0x18),v8
(0xbe),v8
(0x1b)
1433 db v8
(0xfc),v8
(0x56),v8
(0x3e),v8
(0x4b),v8
(0xc6),v8
(0xd2),v8
(0x79),v8
(0x20)
1434 db v8
(0x9a),v8
(0xdb),v8
(0xc0),v8
(0xfe),v8
(0x78),v8
(0xcd),v8
(0x5a),v8
(0xf4)
1435 db v8
(0x1f),v8
(0xdd),v8
(0xa8),v8
(0x33),v8
(0x88),v8
(0x07),v8
(0xc7),v8
(0x31)
1436 db v8
(0xb1),v8
(0x12),v8
(0x10),v8
(0x59),v8
(0x27),v8
(0x80),v8
(0xec),v8
(0x5f)
1437 db v8
(0x60),v8
(0x51),v8
(0x7f),v8
(0xa9),v8
(0x19),v8
(0xb5),v8
(0x4a),v8
(0x0d)
1438 db v8
(0x2d),v8
(0xe5),v8
(0x7a),v8
(0x9f),v8
(0x93),v8
(0xc9),v8
(0x9c),v8
(0xef)
1439 db v8
(0xa0),v8
(0xe0),v8
(0x3b),v8
(0x4d),v8
(0xae),v8
(0x2a),v8
(0xf5),v8
(0xb0)
1440 db v8
(0xc8),v8
(0xeb),v8
(0xbb),v8
(0x3c),v8
(0x83),v8
(0x53),v8
(0x99),v8
(0x61)
1441 db v8
(0x17),v8
(0x2b),v8
(0x04),v8
(0x7e),v8
(0xba),v8
(0x77),v8
(0xd6),v8
(0x26)
1442 db v8
(0xe1),v8
(0x69),v8
(0x14),v8
(0x63),v8
(0x55),v8
(0x21),v8
(0x0c),v8
(0x7d)