2 * Copyright (c) 2014-2021 The FreeBSD Foundation
3 * Copyright (c) 2018 iXsystems, Inc
6 * Portions of this software were developed by John-Mark Gurney
7 * under the sponsorship of the FreeBSD Foundation and
8 * Rubicon Communications, LLC (Netgate).
10 * Portions of this software were developed by Ararat River
11 * Consulting, LLC under sponsorship of the FreeBSD Foundation.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * This file implements AES-CCM+CBC-MAC, as described
36 * at https://tools.ietf.org/html/rfc3610, using Intel's
37 * AES-NI instructions.
41 #include <sys/types.h>
42 #include <sys/endian.h>
43 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <crypto/aesni/aesni.h>
47 #include <crypto/aesni/aesni_os.h>
48 #include <crypto/aesni/aesencdec.h>
49 #define AESNI_ENC(d, k, nr) aesni_enc(nr-1, (const __m128i*)k, d)
51 #include <wmmintrin.h>
52 #include <emmintrin.h>
53 #include <smmintrin.h>
56 * Encrypt a single 128-bit block after
57 * doing an xor. This is also used to
58 * decrypt (yay symmetric encryption).
61 xor_and_encrypt(__m128i a
, __m128i b
, const unsigned char *k
, int nr
)
63 __m128i retval
= _mm_xor_si128(a
, b
);
65 retval
= AESNI_ENC(retval
, k
, nr
);
70 * Put value at the end of block, starting at offset.
71 * (This goes backwards, putting bytes in *until* it
75 append_int(size_t value
, __m128i
*block
, size_t offset
)
77 int indx
= sizeof(*block
) - 1;
78 uint8_t *bp
= (uint8_t*)block
;
80 while (indx
> (sizeof(*block
) - offset
)) {
81 bp
[indx
] = value
& 0xff;
88 * Start the CBC-MAC process. This handles the auth data.
91 cbc_mac_start(const unsigned char *auth_data
, size_t auth_len
,
92 const unsigned char *nonce
, size_t nonce_len
,
93 const unsigned char *key
, int nr
,
94 size_t data_len
, size_t tag_len
)
96 __m128i cbc_block
, staging_block
;
98 /* This defines where the message length goes */
99 int L
= sizeof(__m128i
) - 1 - nonce_len
;
102 * Set up B0 here. This has the flags byte,
103 * followed by the nonce, followed by the
104 * length of the message.
106 cbc_block
= _mm_setzero_si128();
107 byte_ptr
= (uint8_t*)&cbc_block
;
108 byte_ptr
[0] = ((auth_len
> 0) ? 1 : 0) * 64 |
109 (((tag_len
- 2) / 2) * 8) |
111 bcopy(nonce
, byte_ptr
+ 1, nonce_len
);
112 append_int(data_len
, &cbc_block
, L
+1);
113 cbc_block
= AESNI_ENC(cbc_block
, key
, nr
);
117 * We need to start by appending the length descriptor.
121 const uint8_t *auth_ptr
= auth_data
;
123 staging_block
= _mm_setzero_si128();
126 * The current OCF calling convention means that
127 * there can never be more than 4g of authentication
128 * data, so we don't handle the 0xffff case.
130 KASSERT(auth_len
< (1ULL << 32),
131 ("%s: auth_len (%zu) larger than 4GB",
132 __FUNCTION__
, auth_len
));
134 if (auth_len
< ((1 << 16) - (1 << 8))) {
136 * If the auth data length is less than
137 * 0xff00, we don't need to encode a length
138 * specifier, just the length of the auth
141 be16enc(&staging_block
, auth_len
);
143 } else if (auth_len
< (1ULL << 32)) {
145 * Two bytes for the length prefix, and then
146 * four bytes for the length. This makes a total
147 * of 6 bytes to describe the auth data length.
149 be16enc(&staging_block
, 0xfffe);
150 be32enc((char*)&staging_block
+ 2, auth_len
);
153 panic("%s: auth len too large", __FUNCTION__
);
156 * Need to copy abytes into blocks. The first block is
157 * already partially filled, by auth_amt, so we need
158 * to handle that. The last block needs to be zero padded.
160 copy_amt
= MIN(auth_len
,
161 sizeof(staging_block
) - auth_amt
);
162 byte_ptr
= (uint8_t*)&staging_block
;
163 bcopy(auth_ptr
, &byte_ptr
[auth_amt
], copy_amt
);
164 auth_ptr
+= copy_amt
;
166 cbc_block
= xor_and_encrypt(cbc_block
, staging_block
, key
, nr
);
168 while (auth_ptr
< auth_data
+ auth_len
) {
169 copy_amt
= MIN((auth_data
+ auth_len
) - auth_ptr
,
170 sizeof(staging_block
));
171 if (copy_amt
< sizeof(staging_block
))
172 bzero(&staging_block
, sizeof(staging_block
));
173 bcopy(auth_ptr
, &staging_block
, copy_amt
);
174 cbc_block
= xor_and_encrypt(cbc_block
, staging_block
,
176 auth_ptr
+= copy_amt
;
183 * Implement AES CCM+CBC-MAC encryption and authentication.
186 * Since abytes is limited to a 32 bit value here, the AAD is
187 * limited to 4 gigabytes or less.
190 AES_CCM_encrypt(const unsigned char *in
, unsigned char *out
,
191 const unsigned char *addt
, const unsigned char *nonce
,
192 unsigned char *tag
, uint32_t nbytes
, uint32_t abytes
, int nlen
,
193 int tag_length
, const unsigned char *key
, int nr
)
196 int counter
= 1; /* S0 has 0, S1 has 1 */
197 size_t copy_amt
, total
= 0;
199 __m128i s0
, rolling_mac
, s_x
, staging_block
;
201 /* NIST 800-38c section A.1 says n is [7, 13]. */
202 if (nlen
< 7 || nlen
> 13)
203 panic("%s: bad nonce length %d", __FUNCTION__
, nlen
);
206 * We need to know how many bytes to use to describe
207 * the length of the data. Normally, nlen should be
208 * 12, which leaves us 3 bytes to do that -- 16mbytes of
209 * data to encrypt. But it can be longer or shorter;
210 * this impacts the length of the message.
212 L
= sizeof(__m128i
) - 1 - nlen
;
215 * Clear out the blocks
217 s0
= _mm_setzero_si128();
219 rolling_mac
= cbc_mac_start(addt
, abytes
, nonce
, nlen
,
220 key
, nr
, nbytes
, tag_length
);
222 /* s0 has flags, nonce, and then 0 */
223 byte_ptr
= (uint8_t*)&s0
;
224 byte_ptr
[0] = L
- 1; /* but the flags byte only has L' */
225 bcopy(nonce
, &byte_ptr
[1], nlen
);
228 * Now to cycle through the rest of the data.
230 bcopy(&s0
, &s_x
, sizeof(s0
));
232 while (total
< nbytes
) {
234 * Copy the plain-text data into staging_block.
235 * This may need to be zero-padded.
237 copy_amt
= MIN(nbytes
- total
, sizeof(staging_block
));
238 bcopy(in
+total
, &staging_block
, copy_amt
);
239 if (copy_amt
< sizeof(staging_block
)) {
240 byte_ptr
= (uint8_t*)&staging_block
;
241 bzero(&byte_ptr
[copy_amt
],
242 sizeof(staging_block
) - copy_amt
);
244 rolling_mac
= xor_and_encrypt(rolling_mac
, staging_block
,
246 /* Put the counter into the s_x block */
247 append_int(counter
++, &s_x
, L
+1);
249 __m128i X
= AESNI_ENC(s_x
, key
, nr
);
250 /* XOR the plain-text with the encrypted counter block */
251 staging_block
= _mm_xor_si128(staging_block
, X
);
252 /* And copy it out */
253 bcopy(&staging_block
, out
+total
, copy_amt
);
257 * Allegedly done with it! Except for the tag.
259 s0
= AESNI_ENC(s0
, key
, nr
);
260 staging_block
= _mm_xor_si128(s0
, rolling_mac
);
261 bcopy(&staging_block
, tag
, tag_length
);
262 explicit_bzero(&s0
, sizeof(s0
));
263 explicit_bzero(&staging_block
, sizeof(staging_block
));
264 explicit_bzero(&s_x
, sizeof(s_x
));
265 explicit_bzero(&rolling_mac
, sizeof(rolling_mac
));
269 * Implement AES CCM+CBC-MAC decryption and authentication.
270 * Returns 0 on failure, 1 on success.
272 * The primary difference here is that each encrypted block
273 * needs to be hashed&encrypted after it is decrypted (since
274 * the CBC-MAC is based on the plain text). This means that
275 * we do the decryption twice -- first to verify the tag,
276 * and second to decrypt and copy it out.
278 * To avoid annoying code copying, we implement the main
279 * loop as a separate function.
281 * Call with out as NULL to not store the decrypted results;
282 * call with hashp as NULL to not run the authentication.
283 * Calling with neither as NULL does the decryption and
284 * authentication as a single pass (which is not allowed
285 * per the specification, really).
287 * If hashp is non-NULL, it points to the post-AAD computed
291 decrypt_loop(const unsigned char *in
, unsigned char *out
, size_t nbytes
,
292 __m128i s0
, size_t nonce_length
, __m128i
*macp
,
293 const unsigned char *key
, int nr
)
296 __m128i s_x
= s0
, mac_block
;
298 const size_t L
= sizeof(__m128i
) - 1 - nonce_length
;
299 __m128i pad_block
, staging_block
;
302 * The starting mac (post AAD, if any).
307 while (total
< nbytes
) {
308 size_t copy_amt
= MIN(nbytes
- total
, sizeof(staging_block
));
310 if (copy_amt
< sizeof(staging_block
)) {
311 staging_block
= _mm_setzero_si128();
313 bcopy(in
+total
, &staging_block
, copy_amt
);
316 * staging_block has the current block of input data,
317 * zero-padded if necessary. This is used in computing
318 * both the decrypted data, and the authentication tag.
320 append_int(counter
++, &s_x
, L
+1);
322 * The tag is computed based on the decrypted data.
324 pad_block
= AESNI_ENC(s_x
, key
, nr
);
325 if (copy_amt
< sizeof(staging_block
)) {
327 * Need to pad out pad_block with 0.
328 * (staging_block was set to 0's above.)
330 uint8_t *end_of_buffer
= (uint8_t*)&pad_block
;
331 bzero(end_of_buffer
+ copy_amt
,
332 sizeof(pad_block
) - copy_amt
);
334 staging_block
= _mm_xor_si128(staging_block
, pad_block
);
337 bcopy(&staging_block
, out
+total
, copy_amt
);
340 mac_block
= xor_and_encrypt(mac_block
, staging_block
,
348 explicit_bzero(&pad_block
, sizeof(pad_block
));
349 explicit_bzero(&staging_block
, sizeof(staging_block
));
350 explicit_bzero(&mac_block
, sizeof(mac_block
));
354 * The exposed decryption routine. This is practically a
355 * copy of the encryption routine, except that the order
356 * in which the tag is created is changed.
357 * XXX combine the two functions at some point!
360 AES_CCM_decrypt(const unsigned char *in
, unsigned char *out
,
361 const unsigned char *addt
, const unsigned char *nonce
,
362 const unsigned char *tag
, uint32_t nbytes
, uint32_t abytes
, int nlen
,
363 int tag_length
, const unsigned char *key
, int nr
)
366 __m128i s0
, rolling_mac
, staging_block
;
369 if (nlen
< 0 || nlen
> 15)
370 panic("%s: bad nonce length %d", __FUNCTION__
, nlen
);
373 * We need to know how many bytes to use to describe
374 * the length of the data. Normally, nlen should be
375 * 12, which leaves us 3 bytes to do that -- 16mbytes of
376 * data to encrypt. But it can be longer or shorter.
378 L
= sizeof(__m128i
) - 1 - nlen
;
381 * Clear out the blocks
383 s0
= _mm_setzero_si128();
385 rolling_mac
= cbc_mac_start(addt
, abytes
, nonce
, nlen
,
386 key
, nr
, nbytes
, tag_length
);
387 /* s0 has flags, nonce, and then 0 */
388 byte_ptr
= (uint8_t*)&s0
;
389 byte_ptr
[0] = L
-1; /* but the flags byte only has L' */
390 bcopy(nonce
, &byte_ptr
[1], nlen
);
393 * Now to cycle through the rest of the data.
395 decrypt_loop(in
, NULL
, nbytes
, s0
, nlen
, &rolling_mac
, key
, nr
);
400 staging_block
= _mm_xor_si128(AESNI_ENC(s0
, key
, nr
), rolling_mac
);
401 if (timingsafe_bcmp(&staging_block
, tag
, tag_length
) != 0) {
406 * Push out the decryption results this time.
408 decrypt_loop(in
, out
, nbytes
, s0
, nlen
, NULL
, key
, nr
);