Add phnxdeco with debian patch set (version 0.33-3).
[delutions.git] / tc / crypto / Aes_x86.asm
blob4a3dea35c2dba63517b915050be4043a8c0773ba
2 ; ---------------------------------------------------------------------------
3 ; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
4 ;
5 ; LICENSE TERMS
6 ;
7 ; The free distribution and use of this software is allowed (with or without
8 ; changes) provided that:
9 ;
10 ; 1. source code distributions include the above copyright notice, this
11 ; list of conditions and the following disclaimer;
13 ; 2. binary distributions include the above copyright notice, this list
14 ; of conditions and the following disclaimer in their documentation;
16 ; 3. the name of the copyright holder is not used to endorse products
17 ; built using this software without specific written permission.
19 ; DISCLAIMER
21 ; This software is provided 'as is' with no explicit or implied warranties
22 ; in respect of its properties, including, but not limited to, correctness
23 ; and/or fitness for purpose.
24 ; ---------------------------------------------------------------------------
25 ; Issue 20/12/2007
27 ; This code requires ASM_X86_V1C to be set in aesopt.h. It requires the C files
28 ; aeskey.c and aestab.c for support.
30 ; An AES implementation for x86 processors using the YASM (or NASM) assembler.
31 ; This is an assembler implementation that covers encryption and decryption
32 ; only and is intended as a replacement of the C file aescrypt.c. It hence
33 ; requires the file aeskey.c for keying and aestab.c for the AES tables. It
34 ; employs full tables rather than compressed tables.
36 ; This code provides the standard AES block size (128 bits, 16 bytes) and the
37 ; three standard AES key sizes (128, 192 and 256 bits). It has the same call
38 ; interface as my C implementation. The ebx, esi, edi and ebp registers are
39 ; preserved across calls but eax, ecx and edx and the artihmetic status flags
40 ; are not. It is also important that the defines below match those used in the
41 ; C code. This code uses the VC++ register saving conentions; if it is used
42 ; with another compiler, conventions for using and saving registers may need to
43 ; be checked (and calling conventions). The YASM command line for the VC++
44 ; custom build step is:
46 ; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
48 ; The calling intefaces are:
50 ; AES_RETURN aes_encrypt(const unsigned char in_blk[],
51 ; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
53 ; AES_RETURN aes_decrypt(const unsigned char in_blk[],
54 ; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
56 ; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
57 ; const aes_encrypt_ctx cx[1]);
59 ; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
60 ; const aes_decrypt_ctx cx[1]);
62 ; AES_RETURN aes_encrypt_key(const unsigned char key[],
63 ; unsigned int len, const aes_decrypt_ctx cx[1]);
65 ; AES_RETURN aes_decrypt_key(const unsigned char key[],
66 ; unsigned int len, const aes_decrypt_ctx cx[1]);
68 ; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
69 ; either bits or bytes.
71 ; Comment in/out the following lines to obtain the desired subroutines. These
72 ; selections MUST match those in the C header file aes.h
74 ; %define AES_128 ; define if AES with 128 bit keys is needed
75 ; %define AES_192 ; define if AES with 192 bit keys is needed
76 %define AES_256 ; define if AES with 256 bit keys is needed
77 ; %define AES_VAR ; define if a variable key size is needed
78 %define ENCRYPTION ; define if encryption is needed
79 %define DECRYPTION ; define if decryption is needed
80 %define AES_REV_DKS ; define if key decryption schedule is reversed
81 %define LAST_ROUND_TABLES ; define if tables are to be used for last round
83 ; offsets to parameters
85 in_blk equ 4 ; input byte array address parameter
86 out_blk equ 8 ; output byte array address parameter
87 ctx equ 12 ; AES context structure
88 stk_spc equ 20 ; stack space
89 %define parms 12 ; parameter space on stack
91 ; The encryption key schedule has the following in memory layout where N is the
92 ; number of rounds (10, 12 or 14):
94 ; lo: | input key (round 0) | ; each round is four 32-bit words
95 ; | encryption round 1 |
96 ; | encryption round 2 |
97 ; ....
98 ; | encryption round N-1 |
99 ; hi: | encryption round N |
101 ; The decryption key schedule is normally set up so that it has the same
102 ; layout as above by actually reversing the order of the encryption key
103 ; schedule in memory (this happens when AES_REV_DKS is set):
105 ; lo: | decryption round 0 | = | encryption round N |
106 ; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
107 ; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
108 ; .... ....
109 ; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
110 ; hi: | decryption round N | = | input key (round 0) |
112 ; with rounds except the first and last modified using inv_mix_column()
113 ; But if AES_REV_DKS is NOT set the order of keys is left as it is for
114 ; encryption so that it has to be accessed in reverse when used for
115 ; decryption (although the inverse mix column modifications are done)
117 ; lo: | decryption round 0 | = | input key (round 0) |
118 ; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
119 ; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
120 ; .... ....
121 ; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
122 ; hi: | decryption round N | = | encryption round N |
124 ; This layout is faster when the assembler key scheduling provided here
125 ; is used.
127 ; The DLL interface must use the _stdcall convention in which the number
128 ; of bytes of parameter space is added after an @ to the sutine's name.
129 ; We must also remove our parameters from the stack before return (see
130 ; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
132 ;%define DLL_EXPORT
134 ; End of user defines
136 %ifdef AES_VAR
137 %ifndef AES_128
138 %define AES_128
139 %endif
140 %ifndef AES_192
141 %define AES_192
142 %endif
143 %ifndef AES_256
144 %define AES_256
145 %endif
146 %endif
148 %ifdef AES_VAR
149 %define KS_LENGTH 60
150 %elifdef AES_256
151 %define KS_LENGTH 60
152 %elifdef AES_192
153 %define KS_LENGTH 52
154 %else
155 %define KS_LENGTH 44
156 %endif
158 ; These macros implement stack based local variables
160 %macro save 2
161 mov [esp+4*%1],%2
162 %endmacro
164 %macro restore 2
165 mov %1,[esp+4*%2]
166 %endmacro
168 ; the DLL has to implement the _stdcall calling interface on return
169 ; In this case we have to take our parameters (3 4-byte pointers)
170 ; off the stack
172 %macro do_name 1-2 parms
173 %ifndef DLL_EXPORT
174 align 32
175 global %1
177 %else
178 align 32
179 global %1@%2
180 export %1@%2
181 %1@%2:
182 %endif
183 %endmacro
185 %macro do_call 1-2 parms
186 %ifndef DLL_EXPORT
187 call %1
188 add esp,%2
189 %else
190 call %1@%2
191 %endif
192 %endmacro
194 %macro do_exit 0-1 parms
195 %ifdef DLL_EXPORT
196 ret %1
197 %else
199 %endif
200 %endmacro
202 %ifdef ENCRYPTION
204 extern _t_fn
206 %define etab_0(x) [_t_fn+4*x]
207 %define etab_1(x) [_t_fn+1024+4*x]
208 %define etab_2(x) [_t_fn+2048+4*x]
209 %define etab_3(x) [_t_fn+3072+4*x]
211 %ifdef LAST_ROUND_TABLES
213 extern _t_fl
215 %define eltab_0(x) [_t_fl+4*x]
216 %define eltab_1(x) [_t_fl+1024+4*x]
217 %define eltab_2(x) [_t_fl+2048+4*x]
218 %define eltab_3(x) [_t_fl+3072+4*x]
220 %else
222 %define etab_b(x) byte [_t_fn+3072+4*x]
224 %endif
226 ; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
227 ; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
229 ; Input:
231 ; EAX column[0]
232 ; EBX column[1]
233 ; ECX column[2]
234 ; EDX column[3]
235 ; ESI column key[round][2]
236 ; EDI column key[round][3]
237 ; EBP scratch
239 ; Output:
241 ; EBP column[0] unkeyed
242 ; EBX column[1] unkeyed
243 ; ESI column[2] keyed
244 ; EDI column[3] keyed
245 ; EAX scratch
246 ; ECX scratch
247 ; EDX scratch
249 %macro rnd_fun 2
251 rol ebx,16
252 %1 esi, cl, 0, ebp
253 %1 esi, dh, 1, ebp
254 %1 esi, bh, 3, ebp
255 %1 edi, dl, 0, ebp
256 %1 edi, ah, 1, ebp
257 %1 edi, bl, 2, ebp
258 %2 ebp, al, 0, ebp
259 shr ebx,16
260 and eax,0xffff0000
261 or eax,ebx
262 shr edx,16
263 %1 ebp, ah, 1, ebx
264 %1 ebp, dh, 3, ebx
265 %2 ebx, dl, 2, ebx
266 %1 ebx, ch, 1, edx
267 %1 ebx, al, 0, edx
268 shr eax,16
269 shr ecx,16
270 %1 ebp, cl, 2, edx
271 %1 edi, ch, 3, edx
272 %1 esi, al, 2, edx
273 %1 ebx, ah, 3, edx
275 %endmacro
277 ; Basic MOV and XOR Operations for normal rounds
279 %macro nr_xor 4
280 movzx %4,%2
281 xor %1,etab_%3(%4)
282 %endmacro
284 %macro nr_mov 4
285 movzx %4,%2
286 mov %1,etab_%3(%4)
287 %endmacro
289 ; Basic MOV and XOR Operations for last round
291 %ifdef LAST_ROUND_TABLES
293 %macro lr_xor 4
294 movzx %4,%2
295 xor %1,eltab_%3(%4)
296 %endmacro
298 %macro lr_mov 4
299 movzx %4,%2
300 mov %1,eltab_%3(%4)
301 %endmacro
303 %else
305 %macro lr_xor 4
306 movzx %4,%2
307 movzx %4,etab_b(%4)
308 %if %3 != 0
309 shl %4,8*%3
310 %endif
311 xor %1,%4
312 %endmacro
314 %macro lr_mov 4
315 movzx %4,%2
316 movzx %1,etab_b(%4)
317 %if %3 != 0
318 shl %1,8*%3
319 %endif
320 %endmacro
322 %endif
324 %macro enc_round 0
326 add ebp,16
327 save 0,ebp
328 mov esi,[ebp+8]
329 mov edi,[ebp+12]
331 rnd_fun nr_xor, nr_mov
333 mov eax,ebp
334 mov ecx,esi
335 mov edx,edi
336 restore ebp,0
337 xor eax,[ebp]
338 xor ebx,[ebp+4]
340 %endmacro
342 %macro enc_last_round 0
344 add ebp,16
345 save 0,ebp
346 mov esi,[ebp+8]
347 mov edi,[ebp+12]
349 rnd_fun lr_xor, lr_mov
351 mov eax,ebp
352 restore ebp,0
353 xor eax,[ebp]
354 xor ebx,[ebp+4]
356 %endmacro
358 section .text align=32
360 ; AES Encryption Subroutine
362 do_name _aes_encrypt
364 sub esp,stk_spc
365 mov [esp+16],ebp
366 mov [esp+12],ebx
367 mov [esp+ 8],esi
368 mov [esp+ 4],edi
370 mov esi,[esp+in_blk+stk_spc] ; input pointer
371 mov eax,[esi ]
372 mov ebx,[esi+ 4]
373 mov ecx,[esi+ 8]
374 mov edx,[esi+12]
376 mov ebp,[esp+ctx+stk_spc] ; key pointer
377 movzx edi,byte [ebp+4*KS_LENGTH]
378 xor eax,[ebp ]
379 xor ebx,[ebp+ 4]
380 xor ecx,[ebp+ 8]
381 xor edx,[ebp+12]
383 ; determine the number of rounds
385 cmp edi,10*16
386 je .3
387 cmp edi,12*16
388 je .2
389 cmp edi,14*16
390 je .1
391 mov eax,-1
392 jmp .5
394 .1: enc_round
395 enc_round
396 .2: enc_round
397 enc_round
398 .3: enc_round
399 enc_round
400 enc_round
401 enc_round
402 enc_round
403 enc_round
404 enc_round
405 enc_round
406 enc_round
407 enc_last_round
409 mov edx,[esp+out_blk+stk_spc]
410 mov [edx],eax
411 mov [edx+4],ebx
412 mov [edx+8],esi
413 mov [edx+12],edi
414 xor eax,eax
416 .5: mov ebp,[esp+16]
417 mov ebx,[esp+12]
418 mov esi,[esp+ 8]
419 mov edi,[esp+ 4]
420 add esp,stk_spc
421 do_exit
423 %endif
425 %ifdef DECRYPTION
427 extern _t_in
429 %define dtab_0(x) [_t_in+4*x]
430 %define dtab_1(x) [_t_in+1024+4*x]
431 %define dtab_2(x) [_t_in+2048+4*x]
432 %define dtab_3(x) [_t_in+3072+4*x]
434 %ifdef LAST_ROUND_TABLES
436 extern _t_il
438 %define dltab_0(x) [_t_il+4*x]
439 %define dltab_1(x) [_t_il+1024+4*x]
440 %define dltab_2(x) [_t_il+2048+4*x]
441 %define dltab_3(x) [_t_il+3072+4*x]
443 %else
445 extern _t_ibox
447 %define dtab_x(x) byte [_t_ibox+x]
449 %endif
451 %macro irn_fun 2
453 rol eax,16
454 %1 esi, cl, 0, ebp
455 %1 esi, bh, 1, ebp
456 %1 esi, al, 2, ebp
457 %1 edi, dl, 0, ebp
458 %1 edi, ch, 1, ebp
459 %1 edi, ah, 3, ebp
460 %2 ebp, bl, 0, ebp
461 shr eax,16
462 and ebx,0xffff0000
463 or ebx,eax
464 shr ecx,16
465 %1 ebp, bh, 1, eax
466 %1 ebp, ch, 3, eax
467 %2 eax, cl, 2, ecx
468 %1 eax, bl, 0, ecx
469 %1 eax, dh, 1, ecx
470 shr ebx,16
471 shr edx,16
472 %1 esi, dh, 3, ecx
473 %1 ebp, dl, 2, ecx
474 %1 eax, bh, 3, ecx
475 %1 edi, bl, 2, ecx
477 %endmacro
479 ; Basic MOV and XOR Operations for normal rounds
481 %macro ni_xor 4
482 movzx %4,%2
483 xor %1,dtab_%3(%4)
484 %endmacro
486 %macro ni_mov 4
487 movzx %4,%2
488 mov %1,dtab_%3(%4)
489 %endmacro
491 ; Basic MOV and XOR Operations for last round
493 %ifdef LAST_ROUND_TABLES
495 %macro li_xor 4
496 movzx %4,%2
497 xor %1,dltab_%3(%4)
498 %endmacro
500 %macro li_mov 4
501 movzx %4,%2
502 mov %1,dltab_%3(%4)
503 %endmacro
505 %else
507 %macro li_xor 4
508 movzx %4,%2
509 movzx %4,dtab_x(%4)
510 %if %3 != 0
511 shl %4,8*%3
512 %endif
513 xor %1,%4
514 %endmacro
516 %macro li_mov 4
517 movzx %4,%2
518 movzx %1,dtab_x(%4)
519 %if %3 != 0
520 shl %1,8*%3
521 %endif
522 %endmacro
524 %endif
526 %macro dec_round 0
528 %ifdef AES_REV_DKS
529 add ebp,16
530 %else
531 sub ebp,16
532 %endif
533 save 0,ebp
534 mov esi,[ebp+8]
535 mov edi,[ebp+12]
537 irn_fun ni_xor, ni_mov
539 mov ebx,ebp
540 mov ecx,esi
541 mov edx,edi
542 restore ebp,0
543 xor eax,[ebp]
544 xor ebx,[ebp+4]
546 %endmacro
548 %macro dec_last_round 0
550 %ifdef AES_REV_DKS
551 add ebp,16
552 %else
553 sub ebp,16
554 %endif
555 save 0,ebp
556 mov esi,[ebp+8]
557 mov edi,[ebp+12]
559 irn_fun li_xor, li_mov
561 mov ebx,ebp
562 restore ebp,0
563 xor eax,[ebp]
564 xor ebx,[ebp+4]
566 %endmacro
568 section .text
570 ; AES Decryption Subroutine
572 do_name _aes_decrypt
574 sub esp,stk_spc
575 mov [esp+16],ebp
576 mov [esp+12],ebx
577 mov [esp+ 8],esi
578 mov [esp+ 4],edi
580 ; input four columns and xor in first round key
582 mov esi,[esp+in_blk+stk_spc] ; input pointer
583 mov eax,[esi ]
584 mov ebx,[esi+ 4]
585 mov ecx,[esi+ 8]
586 mov edx,[esi+12]
587 lea esi,[esi+16]
589 mov ebp,[esp+ctx+stk_spc] ; key pointer
590 movzx edi,byte[ebp+4*KS_LENGTH]
591 %ifndef AES_REV_DKS ; if decryption key schedule is not reversed
592 lea ebp,[ebp+edi] ; we have to access it from the top down
593 %endif
594 xor eax,[ebp ] ; key schedule
595 xor ebx,[ebp+ 4]
596 xor ecx,[ebp+ 8]
597 xor edx,[ebp+12]
599 ; determine the number of rounds
601 cmp edi,10*16
602 je .3
603 cmp edi,12*16
604 je .2
605 cmp edi,14*16
606 je .1
607 mov eax,-1
608 jmp .5
610 .1: dec_round
611 dec_round
612 .2: dec_round
613 dec_round
614 .3: dec_round
615 dec_round
616 dec_round
617 dec_round
618 dec_round
619 dec_round
620 dec_round
621 dec_round
622 dec_round
623 dec_last_round
625 ; move final values to the output array.
627 mov ebp,[esp+out_blk+stk_spc]
628 mov [ebp],eax
629 mov [ebp+4],ebx
630 mov [ebp+8],esi
631 mov [ebp+12],edi
632 xor eax,eax
634 .5: mov ebp,[esp+16]
635 mov ebx,[esp+12]
636 mov esi,[esp+ 8]
637 mov edi,[esp+ 4]
638 add esp,stk_spc
639 do_exit
641 %endif