1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * Camellia Cipher Algorithm (x86_64)
5 * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
8 #include <linux/linkage.h>
10 .file "camellia-x86_64-asm_64.S"
13 .extern camellia_sp10011110;
14 .extern camellia_sp22000222;
15 .extern camellia_sp03303033;
16 .extern camellia_sp00444404;
17 .extern camellia_sp02220222;
18 .extern camellia_sp30333033;
19 .extern camellia_sp44044404;
20 .extern camellia_sp11101110;
22 #define sp10011110 camellia_sp10011110
23 #define sp22000222 camellia_sp22000222
24 #define sp03303033 camellia_sp03303033
25 #define sp00444404 camellia_sp00444404
26 #define sp02220222 camellia_sp02220222
27 #define sp30333033 camellia_sp30333033
28 #define sp44044404 camellia_sp44044404
29 #define sp11101110 camellia_sp11101110
31 #define CAMELLIA_TABLE_BYTE_LEN 272
33 /* struct camellia_ctx: */
35 #define key_length CAMELLIA_TABLE_BYTE_LEN
79 #define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
80 leaq T0(%rip), tmp1; \
81 movzbl ab ## bl, tmp2 ## d; \
82 xorq (tmp1, tmp2, 8), dst; \
83 leaq T1(%rip), tmp2; \
84 movzbl ab ## bh, tmp1 ## d; \
86 xorq (tmp2, tmp1, 8), dst;
88 /**********************************************************************
90 **********************************************************************/
91 #define roundsm(ab, subkey, cd) \
92 movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
94 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
95 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
96 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
97 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
101 #define fls(l, r, kl, kr) \
102 movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
103 andl l ## 0d, RT0d; \
107 movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
112 movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
116 movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
117 andl r ## 0d, RT0d; \
122 #define enc_rounds(i) \
123 roundsm(RAB, i + 2, RCD); \
124 roundsm(RCD, i + 3, RAB); \
125 roundsm(RAB, i + 4, RCD); \
126 roundsm(RCD, i + 5, RAB); \
127 roundsm(RAB, i + 6, RCD); \
128 roundsm(RCD, i + 7, RAB);
131 fls(RAB, RCD, i + 0, i + 1);
133 #define enc_inpack() \
137 movq 4*2(RIO), RCD0; \
140 xorq key_table(CTX), RAB0;
142 #define enc_outunpack(op, max) \
143 xorq key_table(CTX, max, 8), RCD0; \
146 op ## q RCD0, (RIO); \
149 op ## q RAB0, 4*2(RIO);
151 #define dec_rounds(i) \
152 roundsm(RAB, i + 7, RCD); \
153 roundsm(RCD, i + 6, RAB); \
154 roundsm(RAB, i + 5, RCD); \
155 roundsm(RCD, i + 4, RAB); \
156 roundsm(RAB, i + 3, RCD); \
157 roundsm(RCD, i + 2, RAB);
160 fls(RAB, RCD, i + 1, i + 0);
162 #define dec_inpack(max) \
166 movq 4*2(RIO), RCD0; \
169 xorq key_table(CTX, max, 8), RAB0;
171 #define dec_outunpack() \
172 xorq key_table(CTX), RCD0; \
180 SYM_FUNC_START(__camellia_enc_blk)
200 movl $24, RT1d; /* max */
202 cmpb $16, key_length(CTX);
207 movl $32, RT1d; /* max */
210 testb RXORbl, RXORbl;
215 enc_outunpack(mov, RT1);
221 enc_outunpack(xor, RT1);
225 SYM_FUNC_END(__camellia_enc_blk)
227 SYM_FUNC_START(camellia_dec_blk)
233 cmpl $16, key_length(CTX);
236 cmovel RXORd, RT2d; /* max */
263 SYM_FUNC_END(camellia_dec_blk)
265 /**********************************************************************
267 **********************************************************************/
268 #define roundsm2(ab, subkey, cd) \
269 movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
272 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
273 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
274 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
275 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
277 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
279 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
280 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
281 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
283 #define fls2(l, r, kl, kr) \
284 movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
285 andl l ## 0d, RT0d; \
289 movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
294 movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
295 andl l ## 1d, RT2d; \
299 movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
304 movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
308 movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
309 andl r ## 0d, RT2d; \
314 movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
318 movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
319 andl r ## 1d, RT1d; \
324 #define enc_rounds2(i) \
325 roundsm2(RAB, i + 2, RCD); \
326 roundsm2(RCD, i + 3, RAB); \
327 roundsm2(RAB, i + 4, RCD); \
328 roundsm2(RCD, i + 5, RAB); \
329 roundsm2(RAB, i + 6, RCD); \
330 roundsm2(RCD, i + 7, RAB);
332 #define enc_fls2(i) \
333 fls2(RAB, RCD, i + 0, i + 1);
335 #define enc_inpack2() \
339 movq 4*2(RIO), RCD0; \
342 xorq key_table(CTX), RAB0; \
344 movq 8*2(RIO), RAB1; \
347 movq 12*2(RIO), RCD1; \
350 xorq key_table(CTX), RAB1;
352 #define enc_outunpack2(op, max) \
353 xorq key_table(CTX, max, 8), RCD0; \
356 op ## q RCD0, (RIO); \
359 op ## q RAB0, 4*2(RIO); \
361 xorq key_table(CTX, max, 8), RCD1; \
364 op ## q RCD1, 8*2(RIO); \
367 op ## q RAB1, 12*2(RIO);
369 #define dec_rounds2(i) \
370 roundsm2(RAB, i + 7, RCD); \
371 roundsm2(RCD, i + 6, RAB); \
372 roundsm2(RAB, i + 5, RCD); \
373 roundsm2(RCD, i + 4, RAB); \
374 roundsm2(RAB, i + 3, RCD); \
375 roundsm2(RCD, i + 2, RAB);
377 #define dec_fls2(i) \
378 fls2(RAB, RCD, i + 1, i + 0);
380 #define dec_inpack2(max) \
384 movq 4*2(RIO), RCD0; \
387 xorq key_table(CTX, max, 8), RAB0; \
389 movq 8*2(RIO), RAB1; \
392 movq 12*2(RIO), RCD1; \
395 xorq key_table(CTX, max, 8), RAB1;
397 #define dec_outunpack2() \
398 xorq key_table(CTX), RCD0; \
404 movq RAB0, 4*2(RIO); \
406 xorq key_table(CTX), RCD1; \
409 movq RCD1, 8*2(RIO); \
412 movq RAB1, 12*2(RIO);
414 SYM_FUNC_START(__camellia_enc_blk_2way)
435 movl $24, RT2d; /* max */
437 cmpb $16, key_length(CTX);
442 movl $32, RT2d; /* max */
449 enc_outunpack2(mov, RT2);
456 enc_outunpack2(xor, RT2);
461 SYM_FUNC_END(__camellia_enc_blk_2way)
463 SYM_FUNC_START(camellia_dec_blk_2way)
469 cmpl $16, key_length(CTX);
472 cmovel RXORd, RT2d; /* max */
482 je .L__dec2_rounds16;
501 SYM_FUNC_END(camellia_dec_blk_2way)