2 * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org>
5 * Copyright 2015 Netflix, Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <crypto/aesni/aesni_os.h>
35 #include <wmmintrin.h>
38 aesni_enc8(int rounds
, const __m128i
*keysched
, __m128i a
,
39 __m128i b
, __m128i c
, __m128i d
, __m128i e
, __m128i f
, __m128i g
,
40 __m128i h
, __m128i out
[8])
53 for (i
= 0; i
< rounds
; i
++) {
54 a
= _mm_aesenc_si128(a
, keysched
[i
+ 1]);
55 b
= _mm_aesenc_si128(b
, keysched
[i
+ 1]);
56 c
= _mm_aesenc_si128(c
, keysched
[i
+ 1]);
57 d
= _mm_aesenc_si128(d
, keysched
[i
+ 1]);
58 e
= _mm_aesenc_si128(e
, keysched
[i
+ 1]);
59 f
= _mm_aesenc_si128(f
, keysched
[i
+ 1]);
60 g
= _mm_aesenc_si128(g
, keysched
[i
+ 1]);
61 h
= _mm_aesenc_si128(h
, keysched
[i
+ 1]);
64 out
[0] = _mm_aesenclast_si128(a
, keysched
[i
+ 1]);
65 out
[1] = _mm_aesenclast_si128(b
, keysched
[i
+ 1]);
66 out
[2] = _mm_aesenclast_si128(c
, keysched
[i
+ 1]);
67 out
[3] = _mm_aesenclast_si128(d
, keysched
[i
+ 1]);
68 out
[4] = _mm_aesenclast_si128(e
, keysched
[i
+ 1]);
69 out
[5] = _mm_aesenclast_si128(f
, keysched
[i
+ 1]);
70 out
[6] = _mm_aesenclast_si128(g
, keysched
[i
+ 1]);
71 out
[7] = _mm_aesenclast_si128(h
, keysched
[i
+ 1]);
75 aesni_dec8(int rounds
, const __m128i
*keysched
, __m128i a
,
76 __m128i b
, __m128i c
, __m128i d
, __m128i e
, __m128i f
, __m128i g
,
77 __m128i h
, __m128i out
[8])
90 for (i
= 0; i
< rounds
; i
++) {
91 a
= _mm_aesdec_si128(a
, keysched
[i
+ 1]);
92 b
= _mm_aesdec_si128(b
, keysched
[i
+ 1]);
93 c
= _mm_aesdec_si128(c
, keysched
[i
+ 1]);
94 d
= _mm_aesdec_si128(d
, keysched
[i
+ 1]);
95 e
= _mm_aesdec_si128(e
, keysched
[i
+ 1]);
96 f
= _mm_aesdec_si128(f
, keysched
[i
+ 1]);
97 g
= _mm_aesdec_si128(g
, keysched
[i
+ 1]);
98 h
= _mm_aesdec_si128(h
, keysched
[i
+ 1]);
101 out
[0] = _mm_aesdeclast_si128(a
, keysched
[i
+ 1]);
102 out
[1] = _mm_aesdeclast_si128(b
, keysched
[i
+ 1]);
103 out
[2] = _mm_aesdeclast_si128(c
, keysched
[i
+ 1]);
104 out
[3] = _mm_aesdeclast_si128(d
, keysched
[i
+ 1]);
105 out
[4] = _mm_aesdeclast_si128(e
, keysched
[i
+ 1]);
106 out
[5] = _mm_aesdeclast_si128(f
, keysched
[i
+ 1]);
107 out
[6] = _mm_aesdeclast_si128(g
, keysched
[i
+ 1]);
108 out
[7] = _mm_aesdeclast_si128(h
, keysched
[i
+ 1]);
111 /* rounds is passed in as rounds - 1 */
112 static inline __m128i
113 aesni_enc(int rounds
, const __m128i
*keysched
, const __m128i from
)
118 tmp
= from
^ keysched
[0];
119 for (i
= 1; i
< rounds
; i
+= 2) {
120 tmp
= _mm_aesenc_si128(tmp
, keysched
[i
]);
121 tmp
= _mm_aesenc_si128(tmp
, keysched
[i
+ 1]);
124 tmp
= _mm_aesenc_si128(tmp
, keysched
[rounds
]);
125 return _mm_aesenclast_si128(tmp
, keysched
[rounds
+ 1]);
128 static inline __m128i
129 aesni_dec(int rounds
, const __m128i
*keysched
, const __m128i from
)
134 tmp
= from
^ keysched
[0];
136 for (i
= 1; i
< rounds
; i
+= 2) {
137 tmp
= _mm_aesdec_si128(tmp
, keysched
[i
]);
138 tmp
= _mm_aesdec_si128(tmp
, keysched
[i
+ 1]);
141 tmp
= _mm_aesdec_si128(tmp
, keysched
[rounds
]);
142 return _mm_aesdeclast_si128(tmp
, keysched
[rounds
+ 1]);
145 #endif /* _AESENCDEC_H_ */