4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/zfs_context.h>
26 #include <sys/cmn_err.h>
27 #include <modes/modes.h>
28 #include <sys/crypto/common.h>
29 #include <sys/crypto/icp.h>
30 #include <sys/crypto/impl.h>
31 #include <sys/byteorder.h>
33 #include <modes/gcm_impl.h>
34 #ifdef CAN_USE_GCM_ASM
35 #include <aes/aes_impl.h>
38 #define GHASH(c, d, t, o) \
39 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
40 (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
41 (uint64_t *)(void *)(t));
43 /* Select GCM implementation */
44 #define IMPL_FASTEST (UINT32_MAX)
45 #define IMPL_CYCLE (UINT32_MAX-1)
46 #ifdef CAN_USE_GCM_ASM
47 #define IMPL_AVX (UINT32_MAX-2)
49 #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
50 static uint32_t icp_gcm_impl
= IMPL_FASTEST
;
51 static uint32_t user_sel_impl
= IMPL_FASTEST
;
53 #ifdef CAN_USE_GCM_ASM
54 /* Does the architecture we run on support the MOVBE instruction? */
55 boolean_t gcm_avx_can_use_movbe
= B_FALSE
;
57 * Whether to use the optimized openssl gcm and ghash implementations.
58 * Set to true if module parameter icp_gcm_impl == "avx".
60 static boolean_t gcm_use_avx
= B_FALSE
;
61 #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx)
63 extern boolean_t ASMABI
atomic_toggle_boolean_nv(volatile boolean_t
*);
65 static inline boolean_t
gcm_avx_will_work(void);
66 static inline void gcm_set_avx(boolean_t
);
67 static inline boolean_t
gcm_toggle_avx(void);
68 static inline size_t gcm_simd_get_htab_size(boolean_t
);
70 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t
*, char *, size_t,
71 crypto_data_t
*, size_t);
73 static int gcm_encrypt_final_avx(gcm_ctx_t
*, crypto_data_t
*, size_t);
74 static int gcm_decrypt_final_avx(gcm_ctx_t
*, crypto_data_t
*, size_t);
75 static int gcm_init_avx(gcm_ctx_t
*, const uint8_t *, size_t, const uint8_t *,
77 #endif /* ifdef CAN_USE_GCM_ASM */
80 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
81 * is done in another function.
84 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t
*ctx
, char *data
, size_t length
,
85 crypto_data_t
*out
, size_t block_size
,
86 int (*encrypt_block
)(const void *, const uint8_t *, uint8_t *),
87 void (*copy_block
)(uint8_t *, uint8_t *),
88 void (*xor_block
)(uint8_t *, uint8_t *))
90 #ifdef CAN_USE_GCM_ASM
91 if (ctx
->gcm_use_avx
== B_TRUE
)
92 return (gcm_mode_encrypt_contiguous_blocks_avx(
93 ctx
, data
, length
, out
, block_size
));
96 const gcm_impl_ops_t
*gops
;
97 size_t remainder
= length
;
99 uint8_t *datap
= (uint8_t *)data
;
106 size_t out_data_1_len
;
108 uint64_t counter_mask
= ntohll(0x00000000ffffffffULL
);
110 if (length
+ ctx
->gcm_remainder_len
< block_size
) {
111 /* accumulate bytes here and return */
112 memcpy((uint8_t *)ctx
->gcm_remainder
+ ctx
->gcm_remainder_len
,
115 ctx
->gcm_remainder_len
+= length
;
116 if (ctx
->gcm_copy_to
== NULL
) {
117 ctx
->gcm_copy_to
= datap
;
119 return (CRYPTO_SUCCESS
);
122 crypto_init_ptrs(out
, &iov_or_mp
, &offset
);
124 gops
= gcm_impl_get_ops();
126 /* Unprocessed data from last call. */
127 if (ctx
->gcm_remainder_len
> 0) {
128 need
= block_size
- ctx
->gcm_remainder_len
;
130 if (need
> remainder
)
131 return (CRYPTO_DATA_LEN_RANGE
);
133 memcpy(&((uint8_t *)ctx
->gcm_remainder
)
134 [ctx
->gcm_remainder_len
], datap
, need
);
136 blockp
= (uint8_t *)ctx
->gcm_remainder
;
142 * Increment counter. Counter bits are confined
143 * to the bottom 32 bits of the counter block.
145 counter
= ntohll(ctx
->gcm_cb
[1] & counter_mask
);
146 counter
= htonll(counter
+ 1);
147 counter
&= counter_mask
;
148 ctx
->gcm_cb
[1] = (ctx
->gcm_cb
[1] & ~counter_mask
) | counter
;
150 encrypt_block(ctx
->gcm_keysched
, (uint8_t *)ctx
->gcm_cb
,
151 (uint8_t *)ctx
->gcm_tmp
);
152 xor_block(blockp
, (uint8_t *)ctx
->gcm_tmp
);
154 lastp
= (uint8_t *)ctx
->gcm_tmp
;
156 ctx
->gcm_processed_data_len
+= block_size
;
158 crypto_get_ptrs(out
, &iov_or_mp
, &offset
, &out_data_1
,
159 &out_data_1_len
, &out_data_2
, block_size
);
161 /* copy block to where it belongs */
162 if (out_data_1_len
== block_size
) {
163 copy_block(lastp
, out_data_1
);
165 memcpy(out_data_1
, lastp
, out_data_1_len
);
166 if (out_data_2
!= NULL
) {
168 lastp
+ out_data_1_len
,
169 block_size
- out_data_1_len
);
173 out
->cd_offset
+= block_size
;
175 /* add ciphertext to the hash */
176 GHASH(ctx
, ctx
->gcm_tmp
, ctx
->gcm_ghash
, gops
);
178 /* Update pointer to next block of data to be processed. */
179 if (ctx
->gcm_remainder_len
!= 0) {
181 ctx
->gcm_remainder_len
= 0;
186 remainder
= (size_t)&data
[length
] - (size_t)datap
;
188 /* Incomplete last block. */
189 if (remainder
> 0 && remainder
< block_size
) {
190 memcpy(ctx
->gcm_remainder
, datap
, remainder
);
191 ctx
->gcm_remainder_len
= remainder
;
192 ctx
->gcm_copy_to
= datap
;
195 ctx
->gcm_copy_to
= NULL
;
197 } while (remainder
> 0);
199 return (CRYPTO_SUCCESS
);
203 gcm_encrypt_final(gcm_ctx_t
*ctx
, crypto_data_t
*out
, size_t block_size
,
204 int (*encrypt_block
)(const void *, const uint8_t *, uint8_t *),
205 void (*copy_block
)(uint8_t *, uint8_t *),
206 void (*xor_block
)(uint8_t *, uint8_t *))
209 #ifdef CAN_USE_GCM_ASM
210 if (ctx
->gcm_use_avx
== B_TRUE
)
211 return (gcm_encrypt_final_avx(ctx
, out
, block_size
));
214 const gcm_impl_ops_t
*gops
;
215 uint64_t counter_mask
= ntohll(0x00000000ffffffffULL
);
216 uint8_t *ghash
, *macp
= NULL
;
220 (ctx
->gcm_remainder_len
+ ctx
->gcm_tag_len
)) {
221 return (CRYPTO_DATA_LEN_RANGE
);
224 gops
= gcm_impl_get_ops();
225 ghash
= (uint8_t *)ctx
->gcm_ghash
;
227 if (ctx
->gcm_remainder_len
> 0) {
229 uint8_t *tmpp
= (uint8_t *)ctx
->gcm_tmp
;
232 * Here is where we deal with data that is not a
233 * multiple of the block size.
239 counter
= ntohll(ctx
->gcm_cb
[1] & counter_mask
);
240 counter
= htonll(counter
+ 1);
241 counter
&= counter_mask
;
242 ctx
->gcm_cb
[1] = (ctx
->gcm_cb
[1] & ~counter_mask
) | counter
;
244 encrypt_block(ctx
->gcm_keysched
, (uint8_t *)ctx
->gcm_cb
,
245 (uint8_t *)ctx
->gcm_tmp
);
247 macp
= (uint8_t *)ctx
->gcm_remainder
;
248 memset(macp
+ ctx
->gcm_remainder_len
, 0,
249 block_size
- ctx
->gcm_remainder_len
);
251 /* XOR with counter block */
252 for (i
= 0; i
< ctx
->gcm_remainder_len
; i
++) {
256 /* add ciphertext to the hash */
257 GHASH(ctx
, macp
, ghash
, gops
);
259 ctx
->gcm_processed_data_len
+= ctx
->gcm_remainder_len
;
262 ctx
->gcm_len_a_len_c
[1] =
263 htonll(CRYPTO_BYTES2BITS(ctx
->gcm_processed_data_len
));
264 GHASH(ctx
, ctx
->gcm_len_a_len_c
, ghash
, gops
);
265 encrypt_block(ctx
->gcm_keysched
, (uint8_t *)ctx
->gcm_J0
,
266 (uint8_t *)ctx
->gcm_J0
);
267 xor_block((uint8_t *)ctx
->gcm_J0
, ghash
);
269 if (ctx
->gcm_remainder_len
> 0) {
270 rv
= crypto_put_output_data(macp
, out
, ctx
->gcm_remainder_len
);
271 if (rv
!= CRYPTO_SUCCESS
)
274 out
->cd_offset
+= ctx
->gcm_remainder_len
;
275 ctx
->gcm_remainder_len
= 0;
276 rv
= crypto_put_output_data(ghash
, out
, ctx
->gcm_tag_len
);
277 if (rv
!= CRYPTO_SUCCESS
)
279 out
->cd_offset
+= ctx
->gcm_tag_len
;
281 return (CRYPTO_SUCCESS
);
285 * This will only deal with decrypting the last block of the input that
286 * might not be a multiple of block length.
289 gcm_decrypt_incomplete_block(gcm_ctx_t
*ctx
, size_t block_size
, size_t index
,
290 int (*encrypt_block
)(const void *, const uint8_t *, uint8_t *),
291 void (*xor_block
)(uint8_t *, uint8_t *))
293 uint8_t *datap
, *outp
, *counterp
;
295 uint64_t counter_mask
= ntohll(0x00000000ffffffffULL
);
300 * Counter bits are confined to the bottom 32 bits
302 counter
= ntohll(ctx
->gcm_cb
[1] & counter_mask
);
303 counter
= htonll(counter
+ 1);
304 counter
&= counter_mask
;
305 ctx
->gcm_cb
[1] = (ctx
->gcm_cb
[1] & ~counter_mask
) | counter
;
307 datap
= (uint8_t *)ctx
->gcm_remainder
;
308 outp
= &((ctx
->gcm_pt_buf
)[index
]);
309 counterp
= (uint8_t *)ctx
->gcm_tmp
;
311 /* authentication tag */
312 memset((uint8_t *)ctx
->gcm_tmp
, 0, block_size
);
313 memcpy((uint8_t *)ctx
->gcm_tmp
, datap
, ctx
->gcm_remainder_len
);
315 /* add ciphertext to the hash */
316 GHASH(ctx
, ctx
->gcm_tmp
, ctx
->gcm_ghash
, gcm_impl_get_ops());
318 /* decrypt remaining ciphertext */
319 encrypt_block(ctx
->gcm_keysched
, (uint8_t *)ctx
->gcm_cb
, counterp
);
321 /* XOR with counter block */
322 for (i
= 0; i
< ctx
->gcm_remainder_len
; i
++) {
323 outp
[i
] = datap
[i
] ^ counterp
[i
];
328 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t
*ctx
, char *data
, size_t length
,
329 crypto_data_t
*out
, size_t block_size
,
330 int (*encrypt_block
)(const void *, const uint8_t *, uint8_t *),
331 void (*copy_block
)(uint8_t *, uint8_t *),
332 void (*xor_block
)(uint8_t *, uint8_t *))
334 (void) out
, (void) block_size
, (void) encrypt_block
, (void) copy_block
,
340 * Copy contiguous ciphertext input blocks to plaintext buffer.
341 * Ciphertext will be decrypted in the final.
344 new_len
= ctx
->gcm_pt_buf_len
+ length
;
345 new = vmem_alloc(new_len
, KM_SLEEP
);
347 vmem_free(ctx
->gcm_pt_buf
, ctx
->gcm_pt_buf_len
);
348 ctx
->gcm_pt_buf
= NULL
;
349 return (CRYPTO_HOST_MEMORY
);
352 if (ctx
->gcm_pt_buf
!= NULL
) {
353 memcpy(new, ctx
->gcm_pt_buf
, ctx
->gcm_pt_buf_len
);
354 vmem_free(ctx
->gcm_pt_buf
, ctx
->gcm_pt_buf_len
);
356 ASSERT0(ctx
->gcm_pt_buf_len
);
359 ctx
->gcm_pt_buf
= new;
360 ctx
->gcm_pt_buf_len
= new_len
;
361 memcpy(&ctx
->gcm_pt_buf
[ctx
->gcm_processed_data_len
], data
,
363 ctx
->gcm_processed_data_len
+= length
;
366 ctx
->gcm_remainder_len
= 0;
367 return (CRYPTO_SUCCESS
);
371 gcm_decrypt_final(gcm_ctx_t
*ctx
, crypto_data_t
*out
, size_t block_size
,
372 int (*encrypt_block
)(const void *, const uint8_t *, uint8_t *),
373 void (*xor_block
)(uint8_t *, uint8_t *))
375 #ifdef CAN_USE_GCM_ASM
376 if (ctx
->gcm_use_avx
== B_TRUE
)
377 return (gcm_decrypt_final_avx(ctx
, out
, block_size
));
380 const gcm_impl_ops_t
*gops
;
387 uint64_t counter_mask
= ntohll(0x00000000ffffffffULL
);
388 int processed
= 0, rv
;
390 ASSERT(ctx
->gcm_processed_data_len
== ctx
->gcm_pt_buf_len
);
392 gops
= gcm_impl_get_ops();
393 pt_len
= ctx
->gcm_processed_data_len
- ctx
->gcm_tag_len
;
394 ghash
= (uint8_t *)ctx
->gcm_ghash
;
395 blockp
= ctx
->gcm_pt_buf
;
397 while (remainder
> 0) {
398 /* Incomplete last block */
399 if (remainder
< block_size
) {
400 memcpy(ctx
->gcm_remainder
, blockp
, remainder
);
401 ctx
->gcm_remainder_len
= remainder
;
403 * not expecting anymore ciphertext, just
404 * compute plaintext for the remaining input
406 gcm_decrypt_incomplete_block(ctx
, block_size
,
407 processed
, encrypt_block
, xor_block
);
408 ctx
->gcm_remainder_len
= 0;
411 /* add ciphertext to the hash */
412 GHASH(ctx
, blockp
, ghash
, gops
);
416 * Counter bits are confined to the bottom 32 bits
418 counter
= ntohll(ctx
->gcm_cb
[1] & counter_mask
);
419 counter
= htonll(counter
+ 1);
420 counter
&= counter_mask
;
421 ctx
->gcm_cb
[1] = (ctx
->gcm_cb
[1] & ~counter_mask
) | counter
;
423 cbp
= (uint8_t *)ctx
->gcm_tmp
;
424 encrypt_block(ctx
->gcm_keysched
, (uint8_t *)ctx
->gcm_cb
, cbp
);
426 /* XOR with ciphertext */
427 xor_block(cbp
, blockp
);
429 processed
+= block_size
;
430 blockp
+= block_size
;
431 remainder
-= block_size
;
434 ctx
->gcm_len_a_len_c
[1] = htonll(CRYPTO_BYTES2BITS(pt_len
));
435 GHASH(ctx
, ctx
->gcm_len_a_len_c
, ghash
, gops
);
436 encrypt_block(ctx
->gcm_keysched
, (uint8_t *)ctx
->gcm_J0
,
437 (uint8_t *)ctx
->gcm_J0
);
438 xor_block((uint8_t *)ctx
->gcm_J0
, ghash
);
440 /* compare the input authentication tag with what we calculated */
441 if (memcmp(&ctx
->gcm_pt_buf
[pt_len
], ghash
, ctx
->gcm_tag_len
)) {
442 /* They don't match */
443 return (CRYPTO_INVALID_MAC
);
445 rv
= crypto_put_output_data(ctx
->gcm_pt_buf
, out
, pt_len
);
446 if (rv
!= CRYPTO_SUCCESS
)
448 out
->cd_offset
+= pt_len
;
450 return (CRYPTO_SUCCESS
);
454 gcm_validate_args(CK_AES_GCM_PARAMS
*gcm_param
)
459 * Check the length of the authentication tag (in bits).
461 tag_len
= gcm_param
->ulTagBits
;
472 return (CRYPTO_MECHANISM_PARAM_INVALID
);
475 if (gcm_param
->ulIvLen
== 0)
476 return (CRYPTO_MECHANISM_PARAM_INVALID
);
478 return (CRYPTO_SUCCESS
);
482 gcm_format_initial_blocks(const uint8_t *iv
, ulong_t iv_len
,
483 gcm_ctx_t
*ctx
, size_t block_size
,
484 void (*copy_block
)(uint8_t *, uint8_t *),
485 void (*xor_block
)(uint8_t *, uint8_t *))
487 const gcm_impl_ops_t
*gops
;
489 ulong_t remainder
= iv_len
;
490 ulong_t processed
= 0;
491 uint8_t *datap
, *ghash
;
492 uint64_t len_a_len_c
[2];
494 gops
= gcm_impl_get_ops();
495 ghash
= (uint8_t *)ctx
->gcm_ghash
;
496 cb
= (uint8_t *)ctx
->gcm_cb
;
503 /* J0 will be used again in the final */
504 copy_block(cb
, (uint8_t *)ctx
->gcm_J0
);
508 if (remainder
< block_size
) {
509 memset(cb
, 0, block_size
);
510 memcpy(cb
, &(iv
[processed
]), remainder
);
511 datap
= (uint8_t *)cb
;
514 datap
= (uint8_t *)(&(iv
[processed
]));
515 processed
+= block_size
;
516 remainder
-= block_size
;
518 GHASH(ctx
, datap
, ghash
, gops
);
519 } while (remainder
> 0);
522 len_a_len_c
[1] = htonll(CRYPTO_BYTES2BITS(iv_len
));
523 GHASH(ctx
, len_a_len_c
, ctx
->gcm_J0
, gops
);
525 /* J0 will be used again in the final */
526 copy_block((uint8_t *)ctx
->gcm_J0
, (uint8_t *)cb
);
531 gcm_init(gcm_ctx_t
*ctx
, const uint8_t *iv
, size_t iv_len
,
532 const uint8_t *auth_data
, size_t auth_data_len
, size_t block_size
,
533 int (*encrypt_block
)(const void *, const uint8_t *, uint8_t *),
534 void (*copy_block
)(uint8_t *, uint8_t *),
535 void (*xor_block
)(uint8_t *, uint8_t *))
537 const gcm_impl_ops_t
*gops
;
538 uint8_t *ghash
, *datap
, *authp
;
539 size_t remainder
, processed
;
541 /* encrypt zero block to get subkey H */
542 memset(ctx
->gcm_H
, 0, sizeof (ctx
->gcm_H
));
543 encrypt_block(ctx
->gcm_keysched
, (uint8_t *)ctx
->gcm_H
,
544 (uint8_t *)ctx
->gcm_H
);
546 gcm_format_initial_blocks(iv
, iv_len
, ctx
, block_size
,
547 copy_block
, xor_block
);
549 gops
= gcm_impl_get_ops();
550 authp
= (uint8_t *)ctx
->gcm_tmp
;
551 ghash
= (uint8_t *)ctx
->gcm_ghash
;
552 memset(authp
, 0, block_size
);
553 memset(ghash
, 0, block_size
);
556 remainder
= auth_data_len
;
558 if (remainder
< block_size
) {
560 * There's not a block full of data, pad rest of
564 if (auth_data
!= NULL
) {
565 memset(authp
, 0, block_size
);
566 memcpy(authp
, &(auth_data
[processed
]),
572 datap
= (uint8_t *)authp
;
575 datap
= (uint8_t *)(&(auth_data
[processed
]));
576 processed
+= block_size
;
577 remainder
-= block_size
;
580 /* add auth data to the hash */
581 GHASH(ctx
, datap
, ghash
, gops
);
583 } while (remainder
> 0);
585 return (CRYPTO_SUCCESS
);
589 * Init the GCM context struct. Handle the cycle and avx implementations here.
592 gcm_init_ctx(gcm_ctx_t
*gcm_ctx
, char *param
,
593 size_t block_size
, int (*encrypt_block
)(const void *, const uint8_t *,
594 uint8_t *), void (*copy_block
)(uint8_t *, uint8_t *),
595 void (*xor_block
)(uint8_t *, uint8_t *))
597 CK_AES_GCM_PARAMS
*gcm_param
;
598 int rv
= CRYPTO_SUCCESS
;
599 size_t tag_len
, iv_len
;
602 gcm_param
= (CK_AES_GCM_PARAMS
*)(void *)param
;
605 if ((rv
= gcm_validate_args(gcm_param
)) != 0) {
608 gcm_ctx
->gcm_flags
|= GCM_MODE
;
610 size_t tbits
= gcm_param
->ulTagBits
;
611 tag_len
= CRYPTO_BITS2BYTES(tbits
);
612 iv_len
= gcm_param
->ulIvLen
;
614 gcm_ctx
->gcm_tag_len
= tag_len
;
615 gcm_ctx
->gcm_processed_data_len
= 0;
617 /* these values are in bits */
618 gcm_ctx
->gcm_len_a_len_c
[0]
619 = htonll(CRYPTO_BYTES2BITS(gcm_param
->ulAADLen
));
621 return (CRYPTO_MECHANISM_PARAM_INVALID
);
624 const uint8_t *iv
= (const uint8_t *)gcm_param
->pIv
;
625 const uint8_t *aad
= (const uint8_t *)gcm_param
->pAAD
;
626 size_t aad_len
= gcm_param
->ulAADLen
;
628 #ifdef CAN_USE_GCM_ASM
629 boolean_t needs_bswap
=
630 ((aes_key_t
*)gcm_ctx
->gcm_keysched
)->ops
->needs_byteswap
;
632 if (GCM_IMPL_READ(icp_gcm_impl
) != IMPL_CYCLE
) {
633 gcm_ctx
->gcm_use_avx
= GCM_IMPL_USE_AVX
;
636 * Handle the "cycle" implementation by creating avx and
637 * non-avx contexts alternately.
639 gcm_ctx
->gcm_use_avx
= gcm_toggle_avx();
641 /* The avx impl. doesn't handle byte swapped key schedules. */
642 if (gcm_ctx
->gcm_use_avx
== B_TRUE
&& needs_bswap
== B_TRUE
) {
643 gcm_ctx
->gcm_use_avx
= B_FALSE
;
646 * If this is a GCM context, use the MOVBE and the BSWAP
647 * variants alternately.
649 if (gcm_ctx
->gcm_use_avx
== B_TRUE
&&
650 zfs_movbe_available() == B_TRUE
) {
651 (void) atomic_toggle_boolean_nv(
652 (volatile boolean_t
*)&gcm_avx_can_use_movbe
);
656 * We don't handle byte swapped key schedules in the avx code path,
657 * still they could be created by the aes generic implementation.
658 * Make sure not to use them since we'll corrupt data if we do.
660 if (gcm_ctx
->gcm_use_avx
== B_TRUE
&& needs_bswap
== B_TRUE
) {
661 gcm_ctx
->gcm_use_avx
= B_FALSE
;
663 cmn_err_once(CE_WARN
,
664 "ICP: Can't use the aes generic or cycle implementations "
665 "in combination with the gcm avx implementation!");
666 cmn_err_once(CE_WARN
,
667 "ICP: Falling back to a compatible implementation, "
668 "aes-gcm performance will likely be degraded.");
669 cmn_err_once(CE_WARN
,
670 "ICP: Choose at least the x86_64 aes implementation to "
671 "restore performance.");
674 /* Allocate Htab memory as needed. */
675 if (gcm_ctx
->gcm_use_avx
== B_TRUE
) {
676 size_t htab_len
= gcm_simd_get_htab_size(gcm_ctx
->gcm_use_avx
);
679 return (CRYPTO_MECHANISM_PARAM_INVALID
);
681 gcm_ctx
->gcm_htab_len
= htab_len
;
682 gcm_ctx
->gcm_Htable
=
683 kmem_alloc(htab_len
, KM_SLEEP
);
685 if (gcm_ctx
->gcm_Htable
== NULL
) {
686 return (CRYPTO_HOST_MEMORY
);
689 /* Avx and non avx context initialization differs from here on. */
690 if (gcm_ctx
->gcm_use_avx
== B_FALSE
) {
691 #endif /* ifdef CAN_USE_GCM_ASM */
692 if (gcm_init(gcm_ctx
, iv
, iv_len
, aad
, aad_len
, block_size
,
693 encrypt_block
, copy_block
, xor_block
) != CRYPTO_SUCCESS
) {
694 rv
= CRYPTO_MECHANISM_PARAM_INVALID
;
696 #ifdef CAN_USE_GCM_ASM
698 if (gcm_init_avx(gcm_ctx
, iv
, iv_len
, aad
, aad_len
,
699 block_size
) != CRYPTO_SUCCESS
) {
700 rv
= CRYPTO_MECHANISM_PARAM_INVALID
;
703 #endif /* ifdef CAN_USE_GCM_ASM */
709 gcm_alloc_ctx(int kmflag
)
713 if ((gcm_ctx
= kmem_zalloc(sizeof (gcm_ctx_t
), kmflag
)) == NULL
)
716 gcm_ctx
->gcm_flags
= GCM_MODE
;
720 /* GCM implementation that contains the fastest methods */
721 static gcm_impl_ops_t gcm_fastest_impl
= {
725 /* All compiled in implementations */
726 static const gcm_impl_ops_t
*gcm_all_impl
[] = {
728 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
733 /* Indicate that benchmark has been completed */
734 static boolean_t gcm_impl_initialized
= B_FALSE
;
736 /* Hold all supported implementations */
737 static size_t gcm_supp_impl_cnt
= 0;
738 static gcm_impl_ops_t
*gcm_supp_impl
[ARRAY_SIZE(gcm_all_impl
)];
741 * Returns the GCM operations for encrypt/decrypt/key setup. When a
742 * SIMD implementation is not allowed in the current context, then
743 * fallback to the fastest generic implementation.
745 const gcm_impl_ops_t
*
746 gcm_impl_get_ops(void)
749 return (&gcm_generic_impl
);
751 const gcm_impl_ops_t
*ops
= NULL
;
752 const uint32_t impl
= GCM_IMPL_READ(icp_gcm_impl
);
756 ASSERT(gcm_impl_initialized
);
757 ops
= &gcm_fastest_impl
;
760 /* Cycle through supported implementations */
761 ASSERT(gcm_impl_initialized
);
762 ASSERT3U(gcm_supp_impl_cnt
, >, 0);
763 static size_t cycle_impl_idx
= 0;
764 size_t idx
= (++cycle_impl_idx
) % gcm_supp_impl_cnt
;
765 ops
= gcm_supp_impl
[idx
];
767 #ifdef CAN_USE_GCM_ASM
770 * Make sure that we return a valid implementation while
771 * switching to the avx implementation since there still
772 * may be unfinished non-avx contexts around.
774 ops
= &gcm_generic_impl
;
778 ASSERT3U(impl
, <, gcm_supp_impl_cnt
);
779 ASSERT3U(gcm_supp_impl_cnt
, >, 0);
780 if (impl
< ARRAY_SIZE(gcm_all_impl
))
781 ops
= gcm_supp_impl
[impl
];
785 ASSERT3P(ops
, !=, NULL
);
791 * Initialize all supported implementations.
796 gcm_impl_ops_t
*curr_impl
;
799 /* Move supported implementations into gcm_supp_impls */
800 for (i
= 0, c
= 0; i
< ARRAY_SIZE(gcm_all_impl
); i
++) {
801 curr_impl
= (gcm_impl_ops_t
*)gcm_all_impl
[i
];
803 if (curr_impl
->is_supported())
804 gcm_supp_impl
[c
++] = (gcm_impl_ops_t
*)curr_impl
;
806 gcm_supp_impl_cnt
= c
;
809 * Set the fastest implementation given the assumption that the
810 * hardware accelerated version is the fastest.
812 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
813 if (gcm_pclmulqdq_impl
.is_supported()) {
814 memcpy(&gcm_fastest_impl
, &gcm_pclmulqdq_impl
,
815 sizeof (gcm_fastest_impl
));
819 memcpy(&gcm_fastest_impl
, &gcm_generic_impl
,
820 sizeof (gcm_fastest_impl
));
823 strlcpy(gcm_fastest_impl
.name
, "fastest", GCM_IMPL_NAME_MAX
);
825 #ifdef CAN_USE_GCM_ASM
827 * Use the avx implementation if it's available and the implementation
828 * hasn't changed from its default value of fastest on module load.
830 if (gcm_avx_will_work()) {
832 if (zfs_movbe_available() == B_TRUE
) {
833 atomic_swap_32(&gcm_avx_can_use_movbe
, B_TRUE
);
836 if (GCM_IMPL_READ(user_sel_impl
) == IMPL_FASTEST
) {
841 /* Finish initialization */
842 atomic_swap_32(&icp_gcm_impl
, user_sel_impl
);
843 gcm_impl_initialized
= B_TRUE
;
846 static const struct {
849 } gcm_impl_opts
[] = {
850 { "cycle", IMPL_CYCLE
},
851 { "fastest", IMPL_FASTEST
},
852 #ifdef CAN_USE_GCM_ASM
858 * Function sets desired gcm implementation.
860 * If we are called before init(), user preference will be saved in
861 * user_sel_impl, and applied in later init() call. This occurs when module
862 * parameter is specified on module load. Otherwise, directly update
865 * @val Name of gcm implementation to use
869 gcm_impl_set(const char *val
)
872 char req_name
[GCM_IMPL_NAME_MAX
];
873 uint32_t impl
= GCM_IMPL_READ(user_sel_impl
);
877 i
= strnlen(val
, GCM_IMPL_NAME_MAX
);
878 if (i
== 0 || i
>= GCM_IMPL_NAME_MAX
)
881 strlcpy(req_name
, val
, GCM_IMPL_NAME_MAX
);
882 while (i
> 0 && isspace(req_name
[i
-1]))
886 /* Check mandatory options */
887 for (i
= 0; i
< ARRAY_SIZE(gcm_impl_opts
); i
++) {
888 #ifdef CAN_USE_GCM_ASM
889 /* Ignore avx implementation if it won't work. */
890 if (gcm_impl_opts
[i
].sel
== IMPL_AVX
&& !gcm_avx_will_work()) {
894 if (strcmp(req_name
, gcm_impl_opts
[i
].name
) == 0) {
895 impl
= gcm_impl_opts
[i
].sel
;
901 /* check all supported impl if init() was already called */
902 if (err
!= 0 && gcm_impl_initialized
) {
903 /* check all supported implementations */
904 for (i
= 0; i
< gcm_supp_impl_cnt
; i
++) {
905 if (strcmp(req_name
, gcm_supp_impl
[i
]->name
) == 0) {
912 #ifdef CAN_USE_GCM_ASM
914 * Use the avx implementation if available and the requested one is
917 if (gcm_avx_will_work() == B_TRUE
&&
918 (impl
== IMPL_AVX
|| impl
== IMPL_FASTEST
)) {
921 gcm_set_avx(B_FALSE
);
926 if (gcm_impl_initialized
)
927 atomic_swap_32(&icp_gcm_impl
, impl
);
929 atomic_swap_32(&user_sel_impl
, impl
);
935 #if defined(_KERNEL) && defined(__linux__)
938 icp_gcm_impl_set(const char *val
, zfs_kernel_param_t
*kp
)
940 return (gcm_impl_set(val
));
944 icp_gcm_impl_get(char *buffer
, zfs_kernel_param_t
*kp
)
948 const uint32_t impl
= GCM_IMPL_READ(icp_gcm_impl
);
950 ASSERT(gcm_impl_initialized
);
952 /* list mandatory options */
953 for (i
= 0; i
< ARRAY_SIZE(gcm_impl_opts
); i
++) {
954 #ifdef CAN_USE_GCM_ASM
955 /* Ignore avx implementation if it won't work. */
956 if (gcm_impl_opts
[i
].sel
== IMPL_AVX
&& !gcm_avx_will_work()) {
960 fmt
= (impl
== gcm_impl_opts
[i
].sel
) ? "[%s] " : "%s ";
961 cnt
+= kmem_scnprintf(buffer
+ cnt
, PAGE_SIZE
- cnt
, fmt
,
962 gcm_impl_opts
[i
].name
);
965 /* list all supported implementations */
966 for (i
= 0; i
< gcm_supp_impl_cnt
; i
++) {
967 fmt
= (i
== impl
) ? "[%s] " : "%s ";
968 cnt
+= kmem_scnprintf(buffer
+ cnt
, PAGE_SIZE
- cnt
, fmt
,
969 gcm_supp_impl
[i
]->name
);
975 module_param_call(icp_gcm_impl
, icp_gcm_impl_set
, icp_gcm_impl_get
,
977 MODULE_PARM_DESC(icp_gcm_impl
, "Select gcm implementation.");
978 #endif /* defined(__KERNEL) */
980 #ifdef CAN_USE_GCM_ASM
981 #define GCM_BLOCK_LEN 16
983 * The openssl asm routines are 6x aggregated and need that many bytes
986 #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
987 #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
989 * Ensure the chunk size is reasonable since we are allocating a
990 * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
992 #define GCM_AVX_MAX_CHUNK_SIZE \
993 (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
995 /* Clear the FPU registers since they hold sensitive internal state. */
996 #define clear_fpu_regs() clear_fpu_regs_avx()
997 #define GHASH_AVX(ctx, in, len) \
998 gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \
1001 #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
1003 /* Get the chunk size module parameter. */
1004 #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
1007 * Module parameter: number of bytes to process at once while owning the FPU.
1008 * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
1009 * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
1011 static uint32_t gcm_avx_chunk_size
=
1012 ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES
) * GCM_AVX_MIN_DECRYPT_BYTES
;
1014 extern void ASMABI
clear_fpu_regs_avx(void);
1015 extern void ASMABI
gcm_xor_avx(const uint8_t *src
, uint8_t *dst
);
1016 extern void ASMABI
aes_encrypt_intel(const uint32_t rk
[], int nr
,
1017 const uint32_t pt
[4], uint32_t ct
[4]);
1019 extern void ASMABI
gcm_init_htab_avx(uint64_t *Htable
, const uint64_t H
[2]);
1020 extern void ASMABI
gcm_ghash_avx(uint64_t ghash
[2], const uint64_t *Htable
,
1021 const uint8_t *in
, size_t len
);
1023 extern size_t ASMABI
aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
1024 const void *, uint64_t *, uint64_t *);
1026 extern size_t ASMABI
aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
1027 const void *, uint64_t *, uint64_t *);
1029 static inline boolean_t
1030 gcm_avx_will_work(void)
1032 /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
1033 return (kfpu_allowed() &&
1034 zfs_avx_available() && zfs_aes_available() &&
1035 zfs_pclmulqdq_available());
1039 gcm_set_avx(boolean_t val
)
1041 if (gcm_avx_will_work() == B_TRUE
) {
1042 atomic_swap_32(&gcm_use_avx
, val
);
1046 static inline boolean_t
1047 gcm_toggle_avx(void)
1049 if (gcm_avx_will_work() == B_TRUE
) {
1050 return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX
));
1056 static inline size_t
1057 gcm_simd_get_htab_size(boolean_t simd_mode
)
1059 switch (simd_mode
) {
1061 return (2 * 6 * 2 * sizeof (uint64_t));
1069 /* Increment the GCM counter block by n. */
1071 gcm_incr_counter_block_by(gcm_ctx_t
*ctx
, int n
)
1073 uint64_t counter_mask
= ntohll(0x00000000ffffffffULL
);
1074 uint64_t counter
= ntohll(ctx
->gcm_cb
[1] & counter_mask
);
1076 counter
= htonll(counter
+ n
);
1077 counter
&= counter_mask
;
1078 ctx
->gcm_cb
[1] = (ctx
->gcm_cb
[1] & ~counter_mask
) | counter
;
1082 * Encrypt multiple blocks of data in GCM mode.
1083 * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
1084 * if possible. While processing a chunk the FPU is "locked".
1087 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t
*ctx
, char *data
,
1088 size_t length
, crypto_data_t
*out
, size_t block_size
)
1090 size_t bleft
= length
;
1093 uint8_t *datap
= (uint8_t *)data
;
1094 size_t chunk_size
= (size_t)GCM_CHUNK_SIZE_READ
;
1095 const aes_key_t
*key
= ((aes_key_t
*)ctx
->gcm_keysched
);
1096 uint64_t *ghash
= ctx
->gcm_ghash
;
1097 uint64_t *cb
= ctx
->gcm_cb
;
1098 uint8_t *ct_buf
= NULL
;
1099 uint8_t *tmp
= (uint8_t *)ctx
->gcm_tmp
;
1100 int rv
= CRYPTO_SUCCESS
;
1102 ASSERT(block_size
== GCM_BLOCK_LEN
);
1103 ASSERT3S(((aes_key_t
*)ctx
->gcm_keysched
)->ops
->needs_byteswap
, ==,
1106 * If the last call left an incomplete block, try to fill
1109 if (ctx
->gcm_remainder_len
> 0) {
1110 need
= block_size
- ctx
->gcm_remainder_len
;
1111 if (length
< need
) {
1112 /* Accumulate bytes here and return. */
1113 memcpy((uint8_t *)ctx
->gcm_remainder
+
1114 ctx
->gcm_remainder_len
, datap
, length
);
1116 ctx
->gcm_remainder_len
+= length
;
1117 if (ctx
->gcm_copy_to
== NULL
) {
1118 ctx
->gcm_copy_to
= datap
;
1120 return (CRYPTO_SUCCESS
);
1122 /* Complete incomplete block. */
1123 memcpy((uint8_t *)ctx
->gcm_remainder
+
1124 ctx
->gcm_remainder_len
, datap
, need
);
1126 ctx
->gcm_copy_to
= NULL
;
1130 /* Allocate a buffer to encrypt to if there is enough input. */
1131 if (bleft
>= GCM_AVX_MIN_ENCRYPT_BYTES
) {
1132 ct_buf
= vmem_alloc(chunk_size
, KM_SLEEP
);
1133 if (ct_buf
== NULL
) {
1134 return (CRYPTO_HOST_MEMORY
);
1138 /* If we completed an incomplete block, encrypt and write it out. */
1139 if (ctx
->gcm_remainder_len
> 0) {
1141 aes_encrypt_intel(key
->encr_ks
.ks32
, key
->nr
,
1142 (const uint32_t *)cb
, (uint32_t *)tmp
);
1144 gcm_xor_avx((const uint8_t *) ctx
->gcm_remainder
, tmp
);
1145 GHASH_AVX(ctx
, tmp
, block_size
);
1148 rv
= crypto_put_output_data(tmp
, out
, block_size
);
1149 out
->cd_offset
+= block_size
;
1150 gcm_incr_counter_block(ctx
);
1151 ctx
->gcm_processed_data_len
+= block_size
;
1154 ctx
->gcm_remainder_len
= 0;
1157 /* Do the bulk encryption in chunk_size blocks. */
1158 for (; bleft
>= chunk_size
; bleft
-= chunk_size
) {
1160 done
= aesni_gcm_encrypt(
1161 datap
, ct_buf
, chunk_size
, key
, cb
, ghash
);
1165 if (done
!= chunk_size
) {
1169 rv
= crypto_put_output_data(ct_buf
, out
, chunk_size
);
1170 if (rv
!= CRYPTO_SUCCESS
) {
1173 out
->cd_offset
+= chunk_size
;
1174 datap
+= chunk_size
;
1175 ctx
->gcm_processed_data_len
+= chunk_size
;
1177 /* Check if we are already done. */
1181 /* Bulk encrypt the remaining data. */
1183 if (bleft
>= GCM_AVX_MIN_ENCRYPT_BYTES
) {
1184 done
= aesni_gcm_encrypt(datap
, ct_buf
, bleft
, key
, cb
, ghash
);
1189 rv
= crypto_put_output_data(ct_buf
, out
, done
);
1190 if (rv
!= CRYPTO_SUCCESS
) {
1193 out
->cd_offset
+= done
;
1194 ctx
->gcm_processed_data_len
+= done
;
1199 /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
1201 if (bleft
< block_size
) {
1202 memcpy(ctx
->gcm_remainder
, datap
, bleft
);
1203 ctx
->gcm_remainder_len
= bleft
;
1204 ctx
->gcm_copy_to
= datap
;
1207 /* Encrypt, hash and write out. */
1208 aes_encrypt_intel(key
->encr_ks
.ks32
, key
->nr
,
1209 (const uint32_t *)cb
, (uint32_t *)tmp
);
1211 gcm_xor_avx(datap
, tmp
);
1212 GHASH_AVX(ctx
, tmp
, block_size
);
1213 rv
= crypto_put_output_data(tmp
, out
, block_size
);
1214 if (rv
!= CRYPTO_SUCCESS
) {
1217 out
->cd_offset
+= block_size
;
1218 gcm_incr_counter_block(ctx
);
1219 ctx
->gcm_processed_data_len
+= block_size
;
1220 datap
+= block_size
;
1221 bleft
-= block_size
;
1227 if (ct_buf
!= NULL
) {
1228 vmem_free(ct_buf
, chunk_size
);
1234 * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
1235 * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
1238 gcm_encrypt_final_avx(gcm_ctx_t
*ctx
, crypto_data_t
*out
, size_t block_size
)
1240 uint8_t *ghash
= (uint8_t *)ctx
->gcm_ghash
;
1241 uint32_t *J0
= (uint32_t *)ctx
->gcm_J0
;
1242 uint8_t *remainder
= (uint8_t *)ctx
->gcm_remainder
;
1243 size_t rem_len
= ctx
->gcm_remainder_len
;
1244 const void *keysched
= ((aes_key_t
*)ctx
->gcm_keysched
)->encr_ks
.ks32
;
1245 int aes_rounds
= ((aes_key_t
*)keysched
)->nr
;
1248 ASSERT(block_size
== GCM_BLOCK_LEN
);
1249 ASSERT3S(((aes_key_t
*)ctx
->gcm_keysched
)->ops
->needs_byteswap
, ==,
1252 if (out
->cd_length
< (rem_len
+ ctx
->gcm_tag_len
)) {
1253 return (CRYPTO_DATA_LEN_RANGE
);
1257 /* Pad last incomplete block with zeros, encrypt and hash. */
1259 uint8_t *tmp
= (uint8_t *)ctx
->gcm_tmp
;
1260 const uint32_t *cb
= (uint32_t *)ctx
->gcm_cb
;
1262 aes_encrypt_intel(keysched
, aes_rounds
, cb
, (uint32_t *)tmp
);
1263 memset(remainder
+ rem_len
, 0, block_size
- rem_len
);
1264 for (int i
= 0; i
< rem_len
; i
++) {
1265 remainder
[i
] ^= tmp
[i
];
1267 GHASH_AVX(ctx
, remainder
, block_size
);
1268 ctx
->gcm_processed_data_len
+= rem_len
;
1269 /* No need to increment counter_block, it's the last block. */
1272 ctx
->gcm_len_a_len_c
[1] =
1273 htonll(CRYPTO_BYTES2BITS(ctx
->gcm_processed_data_len
));
1274 GHASH_AVX(ctx
, (const uint8_t *)ctx
->gcm_len_a_len_c
, block_size
);
1275 aes_encrypt_intel(keysched
, aes_rounds
, J0
, J0
);
1277 gcm_xor_avx((uint8_t *)J0
, ghash
);
1281 /* Output remainder. */
1283 rv
= crypto_put_output_data(remainder
, out
, rem_len
);
1284 if (rv
!= CRYPTO_SUCCESS
)
1287 out
->cd_offset
+= rem_len
;
1288 ctx
->gcm_remainder_len
= 0;
1289 rv
= crypto_put_output_data(ghash
, out
, ctx
->gcm_tag_len
);
1290 if (rv
!= CRYPTO_SUCCESS
)
1293 out
->cd_offset
+= ctx
->gcm_tag_len
;
1294 return (CRYPTO_SUCCESS
);
1298 * Finalize decryption: We just have accumulated crypto text, so now we
1299 * decrypt it here inplace.
1302 gcm_decrypt_final_avx(gcm_ctx_t
*ctx
, crypto_data_t
*out
, size_t block_size
)
1304 ASSERT3U(ctx
->gcm_processed_data_len
, ==, ctx
->gcm_pt_buf_len
);
1305 ASSERT3U(block_size
, ==, 16);
1306 ASSERT3S(((aes_key_t
*)ctx
->gcm_keysched
)->ops
->needs_byteswap
, ==,
1309 size_t chunk_size
= (size_t)GCM_CHUNK_SIZE_READ
;
1310 size_t pt_len
= ctx
->gcm_processed_data_len
- ctx
->gcm_tag_len
;
1311 uint8_t *datap
= ctx
->gcm_pt_buf
;
1312 const aes_key_t
*key
= ((aes_key_t
*)ctx
->gcm_keysched
);
1313 uint32_t *cb
= (uint32_t *)ctx
->gcm_cb
;
1314 uint64_t *ghash
= ctx
->gcm_ghash
;
1315 uint32_t *tmp
= (uint32_t *)ctx
->gcm_tmp
;
1316 int rv
= CRYPTO_SUCCESS
;
1320 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
1321 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
1322 * GCM_AVX_MIN_DECRYPT_BYTES.
1324 for (bleft
= pt_len
; bleft
>= chunk_size
; bleft
-= chunk_size
) {
1326 done
= aesni_gcm_decrypt(datap
, datap
, chunk_size
,
1327 (const void *)key
, ctx
->gcm_cb
, ghash
);
1330 if (done
!= chunk_size
) {
1331 return (CRYPTO_FAILED
);
1335 /* Decrypt remainder, which is less than chunk size, in one go. */
1337 if (bleft
>= GCM_AVX_MIN_DECRYPT_BYTES
) {
1338 done
= aesni_gcm_decrypt(datap
, datap
, bleft
,
1339 (const void *)key
, ctx
->gcm_cb
, ghash
);
1343 return (CRYPTO_FAILED
);
1348 ASSERT(bleft
< GCM_AVX_MIN_DECRYPT_BYTES
);
1351 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
1352 * decrypt them block by block.
1355 /* Incomplete last block. */
1356 if (bleft
< block_size
) {
1357 uint8_t *lastb
= (uint8_t *)ctx
->gcm_remainder
;
1359 memset(lastb
, 0, block_size
);
1360 memcpy(lastb
, datap
, bleft
);
1361 /* The GCM processing. */
1362 GHASH_AVX(ctx
, lastb
, block_size
);
1363 aes_encrypt_intel(key
->encr_ks
.ks32
, key
->nr
, cb
, tmp
);
1364 for (size_t i
= 0; i
< bleft
; i
++) {
1365 datap
[i
] = lastb
[i
] ^ ((uint8_t *)tmp
)[i
];
1369 /* The GCM processing. */
1370 GHASH_AVX(ctx
, datap
, block_size
);
1371 aes_encrypt_intel(key
->encr_ks
.ks32
, key
->nr
, cb
, tmp
);
1372 gcm_xor_avx((uint8_t *)tmp
, datap
);
1373 gcm_incr_counter_block(ctx
);
1375 datap
+= block_size
;
1376 bleft
-= block_size
;
1378 if (rv
!= CRYPTO_SUCCESS
) {
1383 /* Decryption done, finish the tag. */
1384 ctx
->gcm_len_a_len_c
[1] = htonll(CRYPTO_BYTES2BITS(pt_len
));
1385 GHASH_AVX(ctx
, (uint8_t *)ctx
->gcm_len_a_len_c
, block_size
);
1386 aes_encrypt_intel(key
->encr_ks
.ks32
, key
->nr
, (uint32_t *)ctx
->gcm_J0
,
1387 (uint32_t *)ctx
->gcm_J0
);
1389 gcm_xor_avx((uint8_t *)ctx
->gcm_J0
, (uint8_t *)ghash
);
1391 /* We are done with the FPU, restore its state. */
1395 /* Compare the input authentication tag with what we calculated. */
1396 if (memcmp(&ctx
->gcm_pt_buf
[pt_len
], ghash
, ctx
->gcm_tag_len
)) {
1397 /* They don't match. */
1398 return (CRYPTO_INVALID_MAC
);
1400 rv
= crypto_put_output_data(ctx
->gcm_pt_buf
, out
, pt_len
);
1401 if (rv
!= CRYPTO_SUCCESS
) {
1404 out
->cd_offset
+= pt_len
;
1405 return (CRYPTO_SUCCESS
);
1409 * Initialize the GCM params H, Htabtle and the counter block. Save the
1410 * initial counter block.
1413 gcm_init_avx(gcm_ctx_t
*ctx
, const uint8_t *iv
, size_t iv_len
,
1414 const uint8_t *auth_data
, size_t auth_data_len
, size_t block_size
)
1416 uint8_t *cb
= (uint8_t *)ctx
->gcm_cb
;
1417 uint64_t *H
= ctx
->gcm_H
;
1418 const void *keysched
= ((aes_key_t
*)ctx
->gcm_keysched
)->encr_ks
.ks32
;
1419 int aes_rounds
= ((aes_key_t
*)ctx
->gcm_keysched
)->nr
;
1420 const uint8_t *datap
= auth_data
;
1421 size_t chunk_size
= (size_t)GCM_CHUNK_SIZE_READ
;
1424 ASSERT(block_size
== GCM_BLOCK_LEN
);
1425 ASSERT3S(((aes_key_t
*)ctx
->gcm_keysched
)->ops
->needs_byteswap
, ==,
1428 /* Init H (encrypt zero block) and create the initial counter block. */
1429 memset(ctx
->gcm_ghash
, 0, sizeof (ctx
->gcm_ghash
));
1430 memset(H
, 0, sizeof (ctx
->gcm_H
));
1432 aes_encrypt_intel(keysched
, aes_rounds
,
1433 (const uint32_t *)H
, (uint32_t *)H
);
1435 gcm_init_htab_avx(ctx
->gcm_Htable
, H
);
1443 /* We need the ICB later. */
1444 memcpy(ctx
->gcm_J0
, cb
, sizeof (ctx
->gcm_J0
));
1447 * Most consumers use 12 byte IVs, so it's OK to use the
1448 * original routines for other IV sizes, just avoid nesting
1453 gcm_format_initial_blocks(iv
, iv_len
, ctx
, block_size
,
1454 aes_copy_block
, aes_xor_block
);
1458 /* Openssl post increments the counter, adjust for that. */
1459 gcm_incr_counter_block(ctx
);
1461 /* Ghash AAD in chunk_size blocks. */
1462 for (bleft
= auth_data_len
; bleft
>= chunk_size
; bleft
-= chunk_size
) {
1463 GHASH_AVX(ctx
, datap
, chunk_size
);
1464 datap
+= chunk_size
;
1469 /* Ghash the remainder and handle possible incomplete GCM block. */
1471 size_t incomp
= bleft
% block_size
;
1475 GHASH_AVX(ctx
, datap
, bleft
);
1479 /* Zero pad and hash incomplete last block. */
1480 uint8_t *authp
= (uint8_t *)ctx
->gcm_tmp
;
1482 memset(authp
, 0, block_size
);
1483 memcpy(authp
, datap
, incomp
);
1484 GHASH_AVX(ctx
, authp
, block_size
);
1489 return (CRYPTO_SUCCESS
);
1492 #if defined(_KERNEL)
1494 icp_gcm_avx_set_chunk_size(const char *buf
, zfs_kernel_param_t
*kp
)
1497 char val_rounded
[16];
1500 error
= kstrtoul(buf
, 0, &val
);
1504 val
= (val
/ GCM_AVX_MIN_DECRYPT_BYTES
) * GCM_AVX_MIN_DECRYPT_BYTES
;
1506 if (val
< GCM_AVX_MIN_ENCRYPT_BYTES
|| val
> GCM_AVX_MAX_CHUNK_SIZE
)
1509 snprintf(val_rounded
, 16, "%u", (uint32_t)val
);
1510 error
= param_set_uint(val_rounded
, kp
);
1514 module_param_call(icp_gcm_avx_chunk_size
, icp_gcm_avx_set_chunk_size
,
1515 param_get_uint
, &gcm_avx_chunk_size
, 0644);
1517 MODULE_PARM_DESC(icp_gcm_avx_chunk_size
,
1518 "How many bytes to process while owning the FPU");
1520 #endif /* defined(__KERNEL) */
1521 #endif /* ifdef CAN_USE_GCM_ASM */