1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * Blowfish Cipher Algorithm (x86_64)
5 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
8 #include <linux/linkage.h>
10 .file "blowfish-x86_64-asm.S"
13 /* structure of crypto context */
15 #define s0 ((16 + 2) * 4)
16 #define s1 ((16 + 2 + (1 * 256)) * 4)
17 #define s2 ((16 + 2 + (2 * 256)) * 4)
18 #define s3 ((16 + 2 + (3 * 256)) * 4)
56 /***********************************************************************
58 ***********************************************************************/
64 movl s0(CTX,RT0,4), RT0d; \
65 addl s1(CTX,RT1,4), RT0d; \
69 xorl s2(CTX,RT1,4), RT0d; \
70 addl s3(CTX,RT2,4), RT0d; \
73 #define add_roundkey_enc(n) \
74 xorq p+4*(n)(CTX), RX0;
76 #define round_enc(n) \
77 add_roundkey_enc(n); \
82 #define add_roundkey_dec(n) \
83 movq p+4*(n-1)(CTX), RT0; \
87 #define round_dec(n) \
88 add_roundkey_dec(n); \
93 #define read_block() \
98 #define write_block() \
102 SYM_FUNC_START(blowfish_enc_blk)
124 add_roundkey_enc(16);
131 SYM_FUNC_END(blowfish_enc_blk)
133 SYM_FUNC_START(blowfish_dec_blk)
163 SYM_FUNC_END(blowfish_dec_blk)
165 /**********************************************************************
166 4-way blowfish, four blocks parallel
167 **********************************************************************/
169 /* F() for 4-way. Slower when used alone/1-way, but faster when used
170 * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
173 movzbl x ## bh, RT1d; \
174 movzbl x ## bl, RT3d; \
176 movzbl x ## bh, RT0d; \
177 movzbl x ## bl, RT2d; \
179 movl s0(CTX,RT0,4), RT0d; \
180 addl s1(CTX,RT2,4), RT0d; \
181 xorl s2(CTX,RT1,4), RT0d; \
182 addl s3(CTX,RT3,4), RT0d; \
185 #define add_preloaded_roundkey4() \
191 #define preload_roundkey_enc(n) \
192 movq p+4*(n)(CTX), RKEY;
194 #define add_roundkey_enc4(n) \
195 add_preloaded_roundkey4(); \
196 preload_roundkey_enc(n + 2);
198 #define round_enc4(n) \
199 add_roundkey_enc4(n); \
211 #define preload_roundkey_dec(n) \
212 movq p+4*((n)-1)(CTX), RKEY; \
215 #define add_roundkey_dec4(n) \
216 add_preloaded_roundkey4(); \
217 preload_roundkey_dec(n - 2);
219 #define round_dec4(n) \
220 add_roundkey_dec4(n); \
232 #define read_block4() \
249 #define write_block4() \
262 #define xor_block4() \
275 SYM_FUNC_START(blowfish_enc_blk_4way)
288 preload_roundkey_enc(0);
300 add_preloaded_roundkey4();
308 SYM_FUNC_END(blowfish_enc_blk_4way)
310 SYM_FUNC_START(__blowfish_dec_blk_4way)
326 preload_roundkey_dec(17);
337 add_preloaded_roundkey4();
354 SYM_FUNC_END(__blowfish_dec_blk_4way)