2 ; ---------------------------------------------------------------------------
3 ; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
7 ; The free distribution and use of this software is allowed (with or without
8 ; changes) provided that:
10 ; 1. source code distributions include the above copyright notice, this
11 ; list of conditions and the following disclaimer;
13 ; 2. binary distributions include the above copyright notice, this list
14 ; of conditions and the following disclaimer in their documentation;
16 ; 3. the name of the copyright holder is not used to endorse products
17 ; built using this software without specific written permission.
21 ; This software is provided 'as is' with no explicit or implied warranties
22 ; in respect of its properties, including, but not limited to, correctness
23 ; and/or fitness for purpose.
24 ; ---------------------------------------------------------------------------
27 ; This code requires ASM_X86_V1C to be set in aesopt.h. It requires the C files
28 ; aeskey.c and aestab.c for support.
30 ; An AES implementation for x86 processors using the YASM (or NASM) assembler.
31 ; This is an assembler implementation that covers encryption and decryption
32 ; only and is intended as a replacement of the C file aescrypt.c. It hence
33 ; requires the file aeskey.c for keying and aestab.c for the AES tables. It
34 ; employs full tables rather than compressed tables.
36 ; This code provides the standard AES block size (128 bits, 16 bytes) and the
37 ; three standard AES key sizes (128, 192 and 256 bits). It has the same call
38 ; interface as my C implementation. The ebx, esi, edi and ebp registers are
39 ; preserved across calls but eax, ecx and edx and the artihmetic status flags
40 ; are not. It is also important that the defines below match those used in the
41 ; C code. This code uses the VC++ register saving conentions; if it is used
42 ; with another compiler, conventions for using and saving registers may need to
43 ; be checked (and calling conventions). The YASM command line for the VC++
44 ; custom build step is:
46 ; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
48 ; The calling intefaces are:
50 ; AES_RETURN aes_encrypt(const unsigned char in_blk[],
51 ; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
53 ; AES_RETURN aes_decrypt(const unsigned char in_blk[],
54 ; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
56 ; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
57 ; const aes_encrypt_ctx cx[1]);
59 ; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
60 ; const aes_decrypt_ctx cx[1]);
62 ; AES_RETURN aes_encrypt_key(const unsigned char key[],
63 ; unsigned int len, const aes_decrypt_ctx cx[1]);
65 ; AES_RETURN aes_decrypt_key(const unsigned char key[],
66 ; unsigned int len, const aes_decrypt_ctx cx[1]);
68 ; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
69 ; either bits or bytes.
71 ; Comment in/out the following lines to obtain the desired subroutines. These
72 ; selections MUST match those in the C header file aes.h
74 ; %define AES_128 ; define if AES with 128 bit keys is needed
75 ; %define AES_192 ; define if AES with 192 bit keys is needed
76 %define AES_256
; define if AES with 256 bit keys is needed
77 ; %define AES_VAR ; define if a variable key size is needed
78 %define ENCRYPTION
; define if encryption is needed
79 %define DECRYPTION
; define if decryption is needed
80 %define AES_REV_DKS
; define if key decryption schedule is reversed
81 %define LAST_ROUND_TABLES
; define if tables are to be used for last round
83 ; offsets to parameters
85 in_blk
equ 4 ; input byte array address parameter
86 out_blk
equ 8 ; output byte array address parameter
87 ctx
equ 12 ; AES context structure
88 stk_spc
equ 20 ; stack space
89 %define parms
12 ; parameter space on stack
91 ; The encryption key schedule has the following in memory layout where N is the
92 ; number of rounds (10, 12 or 14):
94 ; lo: | input key (round 0) | ; each round is four 32-bit words
95 ; | encryption round 1 |
96 ; | encryption round 2 |
98 ; | encryption round N-1 |
99 ; hi: | encryption round N |
101 ; The decryption key schedule is normally set up so that it has the same
102 ; layout as above by actually reversing the order of the encryption key
103 ; schedule in memory (this happens when AES_REV_DKS is set):
105 ; lo: | decryption round 0 | = | encryption round N |
106 ; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
107 ; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
109 ; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
110 ; hi: | decryption round N | = | input key (round 0) |
112 ; with rounds except the first and last modified using inv_mix_column()
113 ; But if AES_REV_DKS is NOT set the order of keys is left as it is for
114 ; encryption so that it has to be accessed in reverse when used for
115 ; decryption (although the inverse mix column modifications are done)
117 ; lo: | decryption round 0 | = | input key (round 0) |
118 ; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
119 ; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
121 ; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
122 ; hi: | decryption round N | = | encryption round N |
124 ; This layout is faster when the assembler key scheduling provided here
127 ; The DLL interface must use the _stdcall convention in which the number
128 ; of bytes of parameter space is added after an @ to the sutine's name.
129 ; We must also remove our parameters from the stack before return (see
130 ; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
134 ; End of user defines
158 ; These macros implement stack based local variables
168 ; the DLL has to implement the _stdcall calling interface on return
169 ; In this case we have to take our parameters (3 4-byte pointers)
172 %macro do_name
1-2 parms
185 %macro do_call
1-2 parms
194 %macro do_exit
0-1 parms
206 %define etab_0
(x
) [_t_fn
+4*x
]
207 %define etab_1
(x
) [_t_fn
+1024+4*x
]
208 %define etab_2
(x
) [_t_fn
+2048+4*x
]
209 %define etab_3
(x
) [_t_fn
+3072+4*x
]
211 %ifdef LAST_ROUND_TABLES
215 %define eltab_0
(x
) [_t_fl
+4*x
]
216 %define eltab_1
(x
) [_t_fl
+1024+4*x
]
217 %define eltab_2
(x
) [_t_fl
+2048+4*x
]
218 %define eltab_3
(x
) [_t_fl
+3072+4*x
]
222 %define etab_b
(x
) byte [_t_fn
+3072+4*x
]
226 ; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
227 ; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
235 ; ESI column key[round][2]
236 ; EDI column key[round][3]
241 ; EBP column[0] unkeyed
242 ; EBX column[1] unkeyed
243 ; ESI column[2] keyed
244 ; EDI column[3] keyed
277 ; Basic MOV and XOR Operations for normal rounds
289 ; Basic MOV and XOR Operations for last round
291 %ifdef LAST_ROUND_TABLES
331 rnd_fun nr_xor
, nr_mov
342 %macro enc_last_round
0
349 rnd_fun lr_xor
, lr_mov
358 section .text
align=32
360 ; AES Encryption Subroutine
370 mov esi,[esp+in_blk
+stk_spc
] ; input pointer
376 mov ebp,[esp+ctx
+stk_spc
] ; key pointer
377 movzx edi,byte [ebp+4*KS_LENGTH
]
383 ; determine the number of rounds
409 mov edx,[esp+out_blk
+stk_spc
]
429 %define dtab_0
(x
) [_t_in
+4*x
]
430 %define dtab_1
(x
) [_t_in
+1024+4*x
]
431 %define dtab_2
(x
) [_t_in
+2048+4*x
]
432 %define dtab_3
(x
) [_t_in
+3072+4*x
]
434 %ifdef LAST_ROUND_TABLES
438 %define dltab_0
(x
) [_t_il
+4*x
]
439 %define dltab_1
(x
) [_t_il
+1024+4*x
]
440 %define dltab_2
(x
) [_t_il
+2048+4*x
]
441 %define dltab_3
(x
) [_t_il
+3072+4*x
]
447 %define dtab_x
(x
) byte [_t_ibox
+x
]
479 ; Basic MOV and XOR Operations for normal rounds
491 ; Basic MOV and XOR Operations for last round
493 %ifdef LAST_ROUND_TABLES
537 irn_fun ni_xor
, ni_mov
548 %macro dec_last_round
0
559 irn_fun li_xor
, li_mov
570 ; AES Decryption Subroutine
580 ; input four columns and xor in first round key
582 mov esi,[esp+in_blk
+stk_spc
] ; input pointer
589 mov ebp,[esp+ctx
+stk_spc
] ; key pointer
590 movzx edi,byte[ebp+4*KS_LENGTH
]
591 %ifndef AES_REV_DKS
; if decryption key schedule is not reversed
592 lea ebp,[ebp+edi] ; we have to access it from the top down
594 xor eax,[ebp ] ; key schedule
599 ; determine the number of rounds
625 ; move final values to the output array.
627 mov ebp,[esp+out_blk
+stk_spc
]