1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * Fast AES implementation for SPE instruction set (PPC)
5 * This code makes use of the SPE SIMD instruction set as defined in
6 * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
7 * Implementation is based on optimization guide notes from
8 * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
10 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
13 #include <asm/ppc_asm.h>
14 #include "aes-spe-regs.h"
16 #define EAD(in, bpos) \
17 rlwimi rT0,in,28-((bpos+3)%4)*8,20,27;
19 #define DAD(in, bpos) \
20 rlwimi rT1,in,24-((bpos+3)%4)*8,24,31;
22 #define LWH(out, off) \
23 evlwwsplat out,off(rT0); /* load word high */
25 #define LWL(out, off) \
26 lwz out,off(rT0); /* load word low */
28 #define LBZ(out, tab, off) \
29 lbz out,off(tab); /* load byte */
31 #define LAH(out, in, bpos, off) \
32 EAD(in, bpos) /* calc addr + load word high */ \
35 #define LAL(out, in, bpos, off) \
36 EAD(in, bpos) /* calc addr + load word low */ \
39 #define LAE(out, in, bpos) \
40 EAD(in, bpos) /* calc addr + load enc byte */ \
44 LBZ(out, rT0, 8) /* load enc byte */
46 #define LAD(out, in, bpos) \
47 DAD(in, bpos) /* calc addr + load dec byte */ \
54 * ppc_encrypt_block: The central encryption function for a single 16 bytes
55 * block. It does no stack handling or register saving to support fast calls
56 * via bl/blr. It expects that caller has pre-xored input data with first
57 * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
58 * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
59 * and rW0-rW3 and caller must execute a final xor on the output registers.
60 * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
63 _GLOBAL(ppc_encrypt_block)
67 ppc_encrypt_block_loop:
122 evmergehi rD0,rD0,rD1
132 evmergehi rD2,rD2,rD3
135 bdnz ppc_encrypt_block_loop
157 evmergehi rD0,rD0,rD1
167 evmergehi rD2,rD2,rD3
175 rlwimi rW0,rW4,8,16,23
176 rlwimi rW1,rW5,8,16,23
179 rlwimi rW2,rW6,8,16,23
180 rlwimi rW3,rW7,8,16,23
183 rlwimi rW0,rW4,16,8,15
184 rlwimi rW1,rW5,16,8,15
187 rlwimi rW2,rW6,16,8,15
189 rlwimi rW3,rW7,16,8,15
193 rlwimi rW0,rW4,24,0,7
195 rlwimi rW1,rW5,24,0,7
197 rlwimi rW2,rW6,24,0,7
198 rlwimi rW3,rW7,24,0,7
202 * ppc_decrypt_block: The central decryption function for a single 16 bytes
203 * block. It does no stack handling or register saving to support fast calls
204 * via bl/blr. It expects that caller has pre-xored input data with first
205 * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
206 * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
207 * and rW0-rW3 and caller must execute a final xor on the output registers.
208 * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
211 _GLOBAL(ppc_decrypt_block)
215 ppc_decrypt_block_loop:
237 evmergehi rD0,rD0,rD1
247 evmergehi rD2,rD2,rD3
270 evmergehi rD0,rD0,rD1
280 evmergehi rD2,rD2,rD3
283 bdnz ppc_decrypt_block_loop
305 evmergehi rD0,rD0,rD1
315 evmergehi rD2,rD2,rD3
322 rlwimi rW0,rW4,8,16,23
323 rlwimi rW1,rW5,8,16,23
326 rlwimi rW2,rW6,8,16,23
327 rlwimi rW3,rW7,8,16,23
330 rlwimi rW0,rW4,16,8,15
331 rlwimi rW1,rW5,16,8,15
334 rlwimi rW2,rW6,16,8,15
336 rlwimi rW3,rW7,16,8,15
340 rlwimi rW0,rW4,24,0,7
342 rlwimi rW1,rW5,24,0,7
344 rlwimi rW2,rW6,24,0,7
345 rlwimi rW3,rW7,24,0,7