2 * AMD Cryptographic Coprocessor (CCP) driver
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
26 #include <crypto/sha.h>
31 CCP_MEMTYPE_SYSTEM
= 0,
41 enum dma_data_direction dir
;
44 struct ccp_dm_workarea
{
46 struct dma_pool
*dma_pool
;
50 struct ccp_dma_info dma
;
53 struct ccp_sg_workarea
{
54 struct scatterlist
*sg
;
58 struct scatterlist
*dma_sg
;
59 struct device
*dma_dev
;
60 unsigned int dma_count
;
61 enum dma_data_direction dma_dir
;
69 struct ccp_sg_workarea sg_wa
;
70 struct ccp_dm_workarea dm_wa
;
74 enum ccp_memtype type
;
76 struct ccp_dma_info dma
;
82 enum ccp_aes_type type
;
83 enum ccp_aes_mode mode
;
84 enum ccp_aes_action action
;
87 struct ccp_xts_aes_op
{
88 enum ccp_aes_action action
;
89 enum ccp_xts_aes_unit_size unit_size
;
93 enum ccp_sha_type type
;
102 struct ccp_passthru_op
{
103 enum ccp_passthru_bitwise bit_mod
;
104 enum ccp_passthru_byteswap byte_swap
;
108 enum ccp_ecc_function function
;
112 struct ccp_cmd_queue
*cmd_q
;
126 struct ccp_aes_op aes
;
127 struct ccp_xts_aes_op xts
;
128 struct ccp_sha_op sha
;
129 struct ccp_rsa_op rsa
;
130 struct ccp_passthru_op passthru
;
131 struct ccp_ecc_op ecc
;
135 /* SHA initial context values */
136 static const __be32 ccp_sha1_init
[CCP_SHA_CTXSIZE
/ sizeof(__be32
)] = {
137 cpu_to_be32(SHA1_H0
), cpu_to_be32(SHA1_H1
),
138 cpu_to_be32(SHA1_H2
), cpu_to_be32(SHA1_H3
),
139 cpu_to_be32(SHA1_H4
), 0, 0, 0,
142 static const __be32 ccp_sha224_init
[CCP_SHA_CTXSIZE
/ sizeof(__be32
)] = {
143 cpu_to_be32(SHA224_H0
), cpu_to_be32(SHA224_H1
),
144 cpu_to_be32(SHA224_H2
), cpu_to_be32(SHA224_H3
),
145 cpu_to_be32(SHA224_H4
), cpu_to_be32(SHA224_H5
),
146 cpu_to_be32(SHA224_H6
), cpu_to_be32(SHA224_H7
),
149 static const __be32 ccp_sha256_init
[CCP_SHA_CTXSIZE
/ sizeof(__be32
)] = {
150 cpu_to_be32(SHA256_H0
), cpu_to_be32(SHA256_H1
),
151 cpu_to_be32(SHA256_H2
), cpu_to_be32(SHA256_H3
),
152 cpu_to_be32(SHA256_H4
), cpu_to_be32(SHA256_H5
),
153 cpu_to_be32(SHA256_H6
), cpu_to_be32(SHA256_H7
),
156 /* The CCP cannot perform zero-length sha operations so the caller
157 * is required to buffer data for the final operation. However, a
158 * sha operation for a message with a total length of zero is valid
159 * so known values are required to supply the result.
161 static const u8 ccp_sha1_zero
[CCP_SHA_CTXSIZE
] = {
162 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
163 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
164 0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
165 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
168 static const u8 ccp_sha224_zero
[CCP_SHA_CTXSIZE
] = {
169 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
170 0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
171 0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
172 0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
175 static const u8 ccp_sha256_zero
[CCP_SHA_CTXSIZE
] = {
176 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
177 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
178 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
179 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
182 static u32
ccp_addr_lo(struct ccp_dma_info
*info
)
184 return lower_32_bits(info
->address
+ info
->offset
);
187 static u32
ccp_addr_hi(struct ccp_dma_info
*info
)
189 return upper_32_bits(info
->address
+ info
->offset
) & 0x0000ffff;
192 static int ccp_do_cmd(struct ccp_op
*op
, u32
*cr
, unsigned int cr_count
)
194 struct ccp_cmd_queue
*cmd_q
= op
->cmd_q
;
195 struct ccp_device
*ccp
= cmd_q
->ccp
;
196 void __iomem
*cr_addr
;
201 /* We could read a status register to see how many free slots
202 * are actually available, but reading that register resets it
203 * and you could lose some error information.
207 cr0
= (cmd_q
->id
<< REQ0_CMD_Q_SHIFT
)
208 | (op
->jobid
<< REQ0_JOBID_SHIFT
)
209 | REQ0_WAIT_FOR_WRITE
;
212 cr0
|= REQ0_STOP_ON_COMPLETE
213 | REQ0_INT_ON_COMPLETE
;
215 if (op
->ioc
|| !cmd_q
->free_slots
)
216 cr0
|= REQ0_INT_ON_COMPLETE
;
218 /* Start at CMD_REQ1 */
219 cr_addr
= ccp
->io_regs
+ CMD_REQ0
+ CMD_REQ_INCR
;
221 mutex_lock(&ccp
->req_mutex
);
223 /* Write CMD_REQ1 through CMD_REQx first */
224 for (i
= 0; i
< cr_count
; i
++, cr_addr
+= CMD_REQ_INCR
)
225 iowrite32(*(cr
+ i
), cr_addr
);
227 /* Tell the CCP to start */
229 iowrite32(cr0
, ccp
->io_regs
+ CMD_REQ0
);
231 mutex_unlock(&ccp
->req_mutex
);
233 if (cr0
& REQ0_INT_ON_COMPLETE
) {
234 /* Wait for the job to complete */
235 ret
= wait_event_interruptible(cmd_q
->int_queue
,
237 if (ret
|| cmd_q
->cmd_error
) {
238 /* On error delete all related jobs from the queue */
239 cmd
= (cmd_q
->id
<< DEL_Q_ID_SHIFT
)
242 iowrite32(cmd
, ccp
->io_regs
+ DEL_CMD_Q_JOB
);
246 } else if (op
->soc
) {
247 /* Delete just head job from the queue on SoC */
249 | (cmd_q
->id
<< DEL_Q_ID_SHIFT
)
252 iowrite32(cmd
, ccp
->io_regs
+ DEL_CMD_Q_JOB
);
255 cmd_q
->free_slots
= CMD_Q_DEPTH(cmd_q
->q_status
);
263 static int ccp_perform_aes(struct ccp_op
*op
)
267 /* Fill out the register contents for REQ1 through REQ6 */
268 cr
[0] = (CCP_ENGINE_AES
<< REQ1_ENGINE_SHIFT
)
269 | (op
->u
.aes
.type
<< REQ1_AES_TYPE_SHIFT
)
270 | (op
->u
.aes
.mode
<< REQ1_AES_MODE_SHIFT
)
271 | (op
->u
.aes
.action
<< REQ1_AES_ACTION_SHIFT
)
272 | (op
->ksb_key
<< REQ1_KEY_KSB_SHIFT
);
273 cr
[1] = op
->src
.u
.dma
.length
- 1;
274 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
275 cr
[3] = (op
->ksb_ctx
<< REQ4_KSB_SHIFT
)
276 | (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
277 | ccp_addr_hi(&op
->src
.u
.dma
);
278 cr
[4] = ccp_addr_lo(&op
->dst
.u
.dma
);
279 cr
[5] = (CCP_MEMTYPE_SYSTEM
<< REQ6_MEMTYPE_SHIFT
)
280 | ccp_addr_hi(&op
->dst
.u
.dma
);
282 if (op
->u
.aes
.mode
== CCP_AES_MODE_CFB
)
283 cr
[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT
);
291 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
294 static int ccp_perform_xts_aes(struct ccp_op
*op
)
298 /* Fill out the register contents for REQ1 through REQ6 */
299 cr
[0] = (CCP_ENGINE_XTS_AES_128
<< REQ1_ENGINE_SHIFT
)
300 | (op
->u
.xts
.action
<< REQ1_AES_ACTION_SHIFT
)
301 | (op
->u
.xts
.unit_size
<< REQ1_XTS_AES_SIZE_SHIFT
)
302 | (op
->ksb_key
<< REQ1_KEY_KSB_SHIFT
);
303 cr
[1] = op
->src
.u
.dma
.length
- 1;
304 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
305 cr
[3] = (op
->ksb_ctx
<< REQ4_KSB_SHIFT
)
306 | (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
307 | ccp_addr_hi(&op
->src
.u
.dma
);
308 cr
[4] = ccp_addr_lo(&op
->dst
.u
.dma
);
309 cr
[5] = (CCP_MEMTYPE_SYSTEM
<< REQ6_MEMTYPE_SHIFT
)
310 | ccp_addr_hi(&op
->dst
.u
.dma
);
318 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
321 static int ccp_perform_sha(struct ccp_op
*op
)
325 /* Fill out the register contents for REQ1 through REQ6 */
326 cr
[0] = (CCP_ENGINE_SHA
<< REQ1_ENGINE_SHIFT
)
327 | (op
->u
.sha
.type
<< REQ1_SHA_TYPE_SHIFT
)
329 cr
[1] = op
->src
.u
.dma
.length
- 1;
330 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
331 cr
[3] = (op
->ksb_ctx
<< REQ4_KSB_SHIFT
)
332 | (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
333 | ccp_addr_hi(&op
->src
.u
.dma
);
337 cr
[4] = lower_32_bits(op
->u
.sha
.msg_bits
);
338 cr
[5] = upper_32_bits(op
->u
.sha
.msg_bits
);
344 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
347 static int ccp_perform_rsa(struct ccp_op
*op
)
351 /* Fill out the register contents for REQ1 through REQ6 */
352 cr
[0] = (CCP_ENGINE_RSA
<< REQ1_ENGINE_SHIFT
)
353 | (op
->u
.rsa
.mod_size
<< REQ1_RSA_MOD_SIZE_SHIFT
)
354 | (op
->ksb_key
<< REQ1_KEY_KSB_SHIFT
)
356 cr
[1] = op
->u
.rsa
.input_len
- 1;
357 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
358 cr
[3] = (op
->ksb_ctx
<< REQ4_KSB_SHIFT
)
359 | (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
360 | ccp_addr_hi(&op
->src
.u
.dma
);
361 cr
[4] = ccp_addr_lo(&op
->dst
.u
.dma
);
362 cr
[5] = (CCP_MEMTYPE_SYSTEM
<< REQ6_MEMTYPE_SHIFT
)
363 | ccp_addr_hi(&op
->dst
.u
.dma
);
365 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
368 static int ccp_perform_passthru(struct ccp_op
*op
)
372 /* Fill out the register contents for REQ1 through REQ6 */
373 cr
[0] = (CCP_ENGINE_PASSTHRU
<< REQ1_ENGINE_SHIFT
)
374 | (op
->u
.passthru
.bit_mod
<< REQ1_PT_BW_SHIFT
)
375 | (op
->u
.passthru
.byte_swap
<< REQ1_PT_BS_SHIFT
);
377 if (op
->src
.type
== CCP_MEMTYPE_SYSTEM
)
378 cr
[1] = op
->src
.u
.dma
.length
- 1;
380 cr
[1] = op
->dst
.u
.dma
.length
- 1;
382 if (op
->src
.type
== CCP_MEMTYPE_SYSTEM
) {
383 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
384 cr
[3] = (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
385 | ccp_addr_hi(&op
->src
.u
.dma
);
387 if (op
->u
.passthru
.bit_mod
!= CCP_PASSTHRU_BITWISE_NOOP
)
388 cr
[3] |= (op
->ksb_key
<< REQ4_KSB_SHIFT
);
390 cr
[2] = op
->src
.u
.ksb
* CCP_KSB_BYTES
;
391 cr
[3] = (CCP_MEMTYPE_KSB
<< REQ4_MEMTYPE_SHIFT
);
394 if (op
->dst
.type
== CCP_MEMTYPE_SYSTEM
) {
395 cr
[4] = ccp_addr_lo(&op
->dst
.u
.dma
);
396 cr
[5] = (CCP_MEMTYPE_SYSTEM
<< REQ6_MEMTYPE_SHIFT
)
397 | ccp_addr_hi(&op
->dst
.u
.dma
);
399 cr
[4] = op
->dst
.u
.ksb
* CCP_KSB_BYTES
;
400 cr
[5] = (CCP_MEMTYPE_KSB
<< REQ6_MEMTYPE_SHIFT
);
406 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
409 static int ccp_perform_ecc(struct ccp_op
*op
)
413 /* Fill out the register contents for REQ1 through REQ6 */
414 cr
[0] = REQ1_ECC_AFFINE_CONVERT
415 | (CCP_ENGINE_ECC
<< REQ1_ENGINE_SHIFT
)
416 | (op
->u
.ecc
.function
<< REQ1_ECC_FUNCTION_SHIFT
)
418 cr
[1] = op
->src
.u
.dma
.length
- 1;
419 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
420 cr
[3] = (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
421 | ccp_addr_hi(&op
->src
.u
.dma
);
422 cr
[4] = ccp_addr_lo(&op
->dst
.u
.dma
);
423 cr
[5] = (CCP_MEMTYPE_SYSTEM
<< REQ6_MEMTYPE_SHIFT
)
424 | ccp_addr_hi(&op
->dst
.u
.dma
);
426 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
429 static u32
ccp_alloc_ksb(struct ccp_device
*ccp
, unsigned int count
)
434 mutex_lock(&ccp
->ksb_mutex
);
436 start
= (u32
)bitmap_find_next_zero_area(ccp
->ksb
,
440 if (start
<= ccp
->ksb_count
) {
441 bitmap_set(ccp
->ksb
, start
, count
);
443 mutex_unlock(&ccp
->ksb_mutex
);
449 mutex_unlock(&ccp
->ksb_mutex
);
451 /* Wait for KSB entries to become available */
452 if (wait_event_interruptible(ccp
->ksb_queue
, ccp
->ksb_avail
))
456 return KSB_START
+ start
;
459 static void ccp_free_ksb(struct ccp_device
*ccp
, unsigned int start
,
465 mutex_lock(&ccp
->ksb_mutex
);
467 bitmap_clear(ccp
->ksb
, start
- KSB_START
, count
);
471 mutex_unlock(&ccp
->ksb_mutex
);
473 wake_up_interruptible_all(&ccp
->ksb_queue
);
476 static u32
ccp_gen_jobid(struct ccp_device
*ccp
)
478 return atomic_inc_return(&ccp
->current_id
) & CCP_JOBID_MASK
;
481 static void ccp_sg_free(struct ccp_sg_workarea
*wa
)
484 dma_unmap_sg(wa
->dma_dev
, wa
->dma_sg
, wa
->nents
, wa
->dma_dir
);
489 static int ccp_init_sg_workarea(struct ccp_sg_workarea
*wa
, struct device
*dev
,
490 struct scatterlist
*sg
, u64 len
,
491 enum dma_data_direction dma_dir
)
493 memset(wa
, 0, sizeof(*wa
));
499 wa
->nents
= sg_nents(sg
);
500 wa
->length
= sg
->length
;
501 wa
->bytes_left
= len
;
507 if (dma_dir
== DMA_NONE
)
512 wa
->dma_dir
= dma_dir
;
513 wa
->dma_count
= dma_map_sg(dev
, sg
, wa
->nents
, dma_dir
);
520 static void ccp_update_sg_workarea(struct ccp_sg_workarea
*wa
, unsigned int len
)
522 unsigned int nbytes
= min_t(u64
, len
, wa
->bytes_left
);
527 wa
->sg_used
+= nbytes
;
528 wa
->bytes_left
-= nbytes
;
529 if (wa
->sg_used
== wa
->sg
->length
) {
530 wa
->sg
= sg_next(wa
->sg
);
535 static void ccp_dm_free(struct ccp_dm_workarea
*wa
)
537 if (wa
->length
<= CCP_DMAPOOL_MAX_SIZE
) {
539 dma_pool_free(wa
->dma_pool
, wa
->address
,
543 dma_unmap_single(wa
->dev
, wa
->dma
.address
, wa
->length
,
552 static int ccp_init_dm_workarea(struct ccp_dm_workarea
*wa
,
553 struct ccp_cmd_queue
*cmd_q
,
555 enum dma_data_direction dir
)
557 memset(wa
, 0, sizeof(*wa
));
562 wa
->dev
= cmd_q
->ccp
->dev
;
565 if (len
<= CCP_DMAPOOL_MAX_SIZE
) {
566 wa
->dma_pool
= cmd_q
->dma_pool
;
568 wa
->address
= dma_pool_alloc(wa
->dma_pool
, GFP_KERNEL
,
573 wa
->dma
.length
= CCP_DMAPOOL_MAX_SIZE
;
575 memset(wa
->address
, 0, CCP_DMAPOOL_MAX_SIZE
);
577 wa
->address
= kzalloc(len
, GFP_KERNEL
);
581 wa
->dma
.address
= dma_map_single(wa
->dev
, wa
->address
, len
,
583 if (!wa
->dma
.address
)
586 wa
->dma
.length
= len
;
593 static void ccp_set_dm_area(struct ccp_dm_workarea
*wa
, unsigned int wa_offset
,
594 struct scatterlist
*sg
, unsigned int sg_offset
,
597 WARN_ON(!wa
->address
);
599 scatterwalk_map_and_copy(wa
->address
+ wa_offset
, sg
, sg_offset
, len
,
603 static void ccp_get_dm_area(struct ccp_dm_workarea
*wa
, unsigned int wa_offset
,
604 struct scatterlist
*sg
, unsigned int sg_offset
,
607 WARN_ON(!wa
->address
);
609 scatterwalk_map_and_copy(wa
->address
+ wa_offset
, sg
, sg_offset
, len
,
613 static void ccp_reverse_set_dm_area(struct ccp_dm_workarea
*wa
,
614 struct scatterlist
*sg
,
615 unsigned int len
, unsigned int se_len
,
618 unsigned int nbytes
, sg_offset
, dm_offset
, ksb_len
, i
;
619 u8 buffer
[CCP_REVERSE_BUF_SIZE
];
621 BUG_ON(se_len
> sizeof(buffer
));
627 ksb_len
= min_t(unsigned int, nbytes
, se_len
);
628 sg_offset
-= ksb_len
;
630 scatterwalk_map_and_copy(buffer
, sg
, sg_offset
, ksb_len
, 0);
631 for (i
= 0; i
< ksb_len
; i
++)
632 wa
->address
[dm_offset
+ i
] = buffer
[ksb_len
- i
- 1];
634 dm_offset
+= ksb_len
;
637 if ((ksb_len
!= se_len
) && sign_extend
) {
638 /* Must sign-extend to nearest sign-extend length */
639 if (wa
->address
[dm_offset
- 1] & 0x80)
640 memset(wa
->address
+ dm_offset
, 0xff,
646 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea
*wa
,
647 struct scatterlist
*sg
,
650 unsigned int nbytes
, sg_offset
, dm_offset
, ksb_len
, i
;
651 u8 buffer
[CCP_REVERSE_BUF_SIZE
];
657 ksb_len
= min_t(unsigned int, nbytes
, sizeof(buffer
));
658 dm_offset
-= ksb_len
;
660 for (i
= 0; i
< ksb_len
; i
++)
661 buffer
[ksb_len
- i
- 1] = wa
->address
[dm_offset
+ i
];
662 scatterwalk_map_and_copy(buffer
, sg
, sg_offset
, ksb_len
, 1);
664 sg_offset
+= ksb_len
;
669 static void ccp_free_data(struct ccp_data
*data
, struct ccp_cmd_queue
*cmd_q
)
671 ccp_dm_free(&data
->dm_wa
);
672 ccp_sg_free(&data
->sg_wa
);
675 static int ccp_init_data(struct ccp_data
*data
, struct ccp_cmd_queue
*cmd_q
,
676 struct scatterlist
*sg
, u64 sg_len
,
678 enum dma_data_direction dir
)
682 memset(data
, 0, sizeof(*data
));
684 ret
= ccp_init_sg_workarea(&data
->sg_wa
, cmd_q
->ccp
->dev
, sg
, sg_len
,
689 ret
= ccp_init_dm_workarea(&data
->dm_wa
, cmd_q
, dm_len
, dir
);
696 ccp_free_data(data
, cmd_q
);
701 static unsigned int ccp_queue_buf(struct ccp_data
*data
, unsigned int from
)
703 struct ccp_sg_workarea
*sg_wa
= &data
->sg_wa
;
704 struct ccp_dm_workarea
*dm_wa
= &data
->dm_wa
;
705 unsigned int buf_count
, nbytes
;
707 /* Clear the buffer if setting it */
709 memset(dm_wa
->address
, 0, dm_wa
->length
);
714 /* Perform the copy operation
715 * nbytes will always be <= UINT_MAX because dm_wa->length is
718 nbytes
= min_t(u64
, sg_wa
->bytes_left
, dm_wa
->length
);
719 scatterwalk_map_and_copy(dm_wa
->address
, sg_wa
->sg
, sg_wa
->sg_used
,
722 /* Update the structures and generate the count */
724 while (sg_wa
->bytes_left
&& (buf_count
< dm_wa
->length
)) {
725 nbytes
= min(sg_wa
->sg
->length
- sg_wa
->sg_used
,
726 dm_wa
->length
- buf_count
);
727 nbytes
= min_t(u64
, sg_wa
->bytes_left
, nbytes
);
730 ccp_update_sg_workarea(sg_wa
, nbytes
);
736 static unsigned int ccp_fill_queue_buf(struct ccp_data
*data
)
738 return ccp_queue_buf(data
, 0);
741 static unsigned int ccp_empty_queue_buf(struct ccp_data
*data
)
743 return ccp_queue_buf(data
, 1);
746 static void ccp_prepare_data(struct ccp_data
*src
, struct ccp_data
*dst
,
747 struct ccp_op
*op
, unsigned int block_size
,
750 unsigned int sg_src_len
, sg_dst_len
, op_len
;
752 /* The CCP can only DMA from/to one address each per operation. This
753 * requires that we find the smallest DMA area between the source
754 * and destination. The resulting len values will always be <= UINT_MAX
755 * because the dma length is an unsigned int.
757 sg_src_len
= sg_dma_len(src
->sg_wa
.sg
) - src
->sg_wa
.sg_used
;
758 sg_src_len
= min_t(u64
, src
->sg_wa
.bytes_left
, sg_src_len
);
761 sg_dst_len
= sg_dma_len(dst
->sg_wa
.sg
) - dst
->sg_wa
.sg_used
;
762 sg_dst_len
= min_t(u64
, src
->sg_wa
.bytes_left
, sg_dst_len
);
763 op_len
= min(sg_src_len
, sg_dst_len
);
768 /* The data operation length will be at least block_size in length
769 * or the smaller of available sg room remaining for the source or
772 op_len
= max(op_len
, block_size
);
774 /* Unless we have to buffer data, there's no reason to wait */
777 if (sg_src_len
< block_size
) {
778 /* Not enough data in the sg element, so it
779 * needs to be buffered into a blocksize chunk
781 int cp_len
= ccp_fill_queue_buf(src
);
784 op
->src
.u
.dma
.address
= src
->dm_wa
.dma
.address
;
785 op
->src
.u
.dma
.offset
= 0;
786 op
->src
.u
.dma
.length
= (blocksize_op
) ? block_size
: cp_len
;
788 /* Enough data in the sg element, but we need to
789 * adjust for any previously copied data
791 op
->src
.u
.dma
.address
= sg_dma_address(src
->sg_wa
.sg
);
792 op
->src
.u
.dma
.offset
= src
->sg_wa
.sg_used
;
793 op
->src
.u
.dma
.length
= op_len
& ~(block_size
- 1);
795 ccp_update_sg_workarea(&src
->sg_wa
, op
->src
.u
.dma
.length
);
799 if (sg_dst_len
< block_size
) {
800 /* Not enough room in the sg element or we're on the
801 * last piece of data (when using padding), so the
802 * output needs to be buffered into a blocksize chunk
805 op
->dst
.u
.dma
.address
= dst
->dm_wa
.dma
.address
;
806 op
->dst
.u
.dma
.offset
= 0;
807 op
->dst
.u
.dma
.length
= op
->src
.u
.dma
.length
;
809 /* Enough room in the sg element, but we need to
810 * adjust for any previously used area
812 op
->dst
.u
.dma
.address
= sg_dma_address(dst
->sg_wa
.sg
);
813 op
->dst
.u
.dma
.offset
= dst
->sg_wa
.sg_used
;
814 op
->dst
.u
.dma
.length
= op
->src
.u
.dma
.length
;
819 static void ccp_process_data(struct ccp_data
*src
, struct ccp_data
*dst
,
825 if (op
->dst
.u
.dma
.address
== dst
->dm_wa
.dma
.address
)
826 ccp_empty_queue_buf(dst
);
828 ccp_update_sg_workarea(&dst
->sg_wa
,
829 op
->dst
.u
.dma
.length
);
833 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue
*cmd_q
,
834 struct ccp_dm_workarea
*wa
, u32 jobid
, u32 ksb
,
835 u32 byte_swap
, bool from
)
839 memset(&op
, 0, sizeof(op
));
847 op
.src
.type
= CCP_MEMTYPE_KSB
;
849 op
.dst
.type
= CCP_MEMTYPE_SYSTEM
;
850 op
.dst
.u
.dma
.address
= wa
->dma
.address
;
851 op
.dst
.u
.dma
.length
= wa
->length
;
853 op
.src
.type
= CCP_MEMTYPE_SYSTEM
;
854 op
.src
.u
.dma
.address
= wa
->dma
.address
;
855 op
.src
.u
.dma
.length
= wa
->length
;
856 op
.dst
.type
= CCP_MEMTYPE_KSB
;
860 op
.u
.passthru
.byte_swap
= byte_swap
;
862 return ccp_perform_passthru(&op
);
865 static int ccp_copy_to_ksb(struct ccp_cmd_queue
*cmd_q
,
866 struct ccp_dm_workarea
*wa
, u32 jobid
, u32 ksb
,
869 return ccp_copy_to_from_ksb(cmd_q
, wa
, jobid
, ksb
, byte_swap
, false);
872 static int ccp_copy_from_ksb(struct ccp_cmd_queue
*cmd_q
,
873 struct ccp_dm_workarea
*wa
, u32 jobid
, u32 ksb
,
876 return ccp_copy_to_from_ksb(cmd_q
, wa
, jobid
, ksb
, byte_swap
, true);
879 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue
*cmd_q
,
882 struct ccp_aes_engine
*aes
= &cmd
->u
.aes
;
883 struct ccp_dm_workarea key
, ctx
;
886 unsigned int dm_offset
;
889 if (!((aes
->key_len
== AES_KEYSIZE_128
) ||
890 (aes
->key_len
== AES_KEYSIZE_192
) ||
891 (aes
->key_len
== AES_KEYSIZE_256
)))
894 if (aes
->src_len
& (AES_BLOCK_SIZE
- 1))
897 if (aes
->iv_len
!= AES_BLOCK_SIZE
)
900 if (!aes
->key
|| !aes
->iv
|| !aes
->src
)
903 if (aes
->cmac_final
) {
904 if (aes
->cmac_key_len
!= AES_BLOCK_SIZE
)
911 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT
!= 1);
912 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT
!= 1);
915 memset(&op
, 0, sizeof(op
));
917 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
918 op
.ksb_key
= cmd_q
->ksb_key
;
919 op
.ksb_ctx
= cmd_q
->ksb_ctx
;
921 op
.u
.aes
.type
= aes
->type
;
922 op
.u
.aes
.mode
= aes
->mode
;
923 op
.u
.aes
.action
= aes
->action
;
925 /* All supported key sizes fit in a single (32-byte) KSB entry
926 * and must be in little endian format. Use the 256-bit byte
927 * swap passthru option to convert from big endian to little
930 ret
= ccp_init_dm_workarea(&key
, cmd_q
,
931 CCP_AES_KEY_KSB_COUNT
* CCP_KSB_BYTES
,
936 dm_offset
= CCP_KSB_BYTES
- aes
->key_len
;
937 ccp_set_dm_area(&key
, dm_offset
, aes
->key
, 0, aes
->key_len
);
938 ret
= ccp_copy_to_ksb(cmd_q
, &key
, op
.jobid
, op
.ksb_key
,
939 CCP_PASSTHRU_BYTESWAP_256BIT
);
941 cmd
->engine_error
= cmd_q
->cmd_error
;
945 /* The AES context fits in a single (32-byte) KSB entry and
946 * must be in little endian format. Use the 256-bit byte swap
947 * passthru option to convert from big endian to little endian.
949 ret
= ccp_init_dm_workarea(&ctx
, cmd_q
,
950 CCP_AES_CTX_KSB_COUNT
* CCP_KSB_BYTES
,
955 dm_offset
= CCP_KSB_BYTES
- AES_BLOCK_SIZE
;
956 ccp_set_dm_area(&ctx
, dm_offset
, aes
->iv
, 0, aes
->iv_len
);
957 ret
= ccp_copy_to_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
958 CCP_PASSTHRU_BYTESWAP_256BIT
);
960 cmd
->engine_error
= cmd_q
->cmd_error
;
964 /* Send data to the CCP AES engine */
965 ret
= ccp_init_data(&src
, cmd_q
, aes
->src
, aes
->src_len
,
966 AES_BLOCK_SIZE
, DMA_TO_DEVICE
);
970 while (src
.sg_wa
.bytes_left
) {
971 ccp_prepare_data(&src
, NULL
, &op
, AES_BLOCK_SIZE
, true);
972 if (aes
->cmac_final
&& !src
.sg_wa
.bytes_left
) {
975 /* Push the K1/K2 key to the CCP now */
976 ret
= ccp_copy_from_ksb(cmd_q
, &ctx
, op
.jobid
,
978 CCP_PASSTHRU_BYTESWAP_256BIT
);
980 cmd
->engine_error
= cmd_q
->cmd_error
;
984 ccp_set_dm_area(&ctx
, 0, aes
->cmac_key
, 0,
986 ret
= ccp_copy_to_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
987 CCP_PASSTHRU_BYTESWAP_256BIT
);
989 cmd
->engine_error
= cmd_q
->cmd_error
;
994 ret
= ccp_perform_aes(&op
);
996 cmd
->engine_error
= cmd_q
->cmd_error
;
1000 ccp_process_data(&src
, NULL
, &op
);
1003 /* Retrieve the AES context - convert from LE to BE using
1004 * 32-byte (256-bit) byteswapping
1006 ret
= ccp_copy_from_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1007 CCP_PASSTHRU_BYTESWAP_256BIT
);
1009 cmd
->engine_error
= cmd_q
->cmd_error
;
1013 /* ...but we only need AES_BLOCK_SIZE bytes */
1014 dm_offset
= CCP_KSB_BYTES
- AES_BLOCK_SIZE
;
1015 ccp_get_dm_area(&ctx
, dm_offset
, aes
->iv
, 0, aes
->iv_len
);
1018 ccp_free_data(&src
, cmd_q
);
1029 static int ccp_run_aes_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
1031 struct ccp_aes_engine
*aes
= &cmd
->u
.aes
;
1032 struct ccp_dm_workarea key
, ctx
;
1033 struct ccp_data src
, dst
;
1035 unsigned int dm_offset
;
1036 bool in_place
= false;
1039 if (aes
->mode
== CCP_AES_MODE_CMAC
)
1040 return ccp_run_aes_cmac_cmd(cmd_q
, cmd
);
1042 if (!((aes
->key_len
== AES_KEYSIZE_128
) ||
1043 (aes
->key_len
== AES_KEYSIZE_192
) ||
1044 (aes
->key_len
== AES_KEYSIZE_256
)))
1047 if (((aes
->mode
== CCP_AES_MODE_ECB
) ||
1048 (aes
->mode
== CCP_AES_MODE_CBC
) ||
1049 (aes
->mode
== CCP_AES_MODE_CFB
)) &&
1050 (aes
->src_len
& (AES_BLOCK_SIZE
- 1)))
1053 if (!aes
->key
|| !aes
->src
|| !aes
->dst
)
1056 if (aes
->mode
!= CCP_AES_MODE_ECB
) {
1057 if (aes
->iv_len
!= AES_BLOCK_SIZE
)
1064 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT
!= 1);
1065 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT
!= 1);
1068 memset(&op
, 0, sizeof(op
));
1070 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1071 op
.ksb_key
= cmd_q
->ksb_key
;
1072 op
.ksb_ctx
= cmd_q
->ksb_ctx
;
1073 op
.init
= (aes
->mode
== CCP_AES_MODE_ECB
) ? 0 : 1;
1074 op
.u
.aes
.type
= aes
->type
;
1075 op
.u
.aes
.mode
= aes
->mode
;
1076 op
.u
.aes
.action
= aes
->action
;
1078 /* All supported key sizes fit in a single (32-byte) KSB entry
1079 * and must be in little endian format. Use the 256-bit byte
1080 * swap passthru option to convert from big endian to little
1083 ret
= ccp_init_dm_workarea(&key
, cmd_q
,
1084 CCP_AES_KEY_KSB_COUNT
* CCP_KSB_BYTES
,
1089 dm_offset
= CCP_KSB_BYTES
- aes
->key_len
;
1090 ccp_set_dm_area(&key
, dm_offset
, aes
->key
, 0, aes
->key_len
);
1091 ret
= ccp_copy_to_ksb(cmd_q
, &key
, op
.jobid
, op
.ksb_key
,
1092 CCP_PASSTHRU_BYTESWAP_256BIT
);
1094 cmd
->engine_error
= cmd_q
->cmd_error
;
1098 /* The AES context fits in a single (32-byte) KSB entry and
1099 * must be in little endian format. Use the 256-bit byte swap
1100 * passthru option to convert from big endian to little endian.
1102 ret
= ccp_init_dm_workarea(&ctx
, cmd_q
,
1103 CCP_AES_CTX_KSB_COUNT
* CCP_KSB_BYTES
,
1108 if (aes
->mode
!= CCP_AES_MODE_ECB
) {
1109 /* Load the AES context - conver to LE */
1110 dm_offset
= CCP_KSB_BYTES
- AES_BLOCK_SIZE
;
1111 ccp_set_dm_area(&ctx
, dm_offset
, aes
->iv
, 0, aes
->iv_len
);
1112 ret
= ccp_copy_to_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1113 CCP_PASSTHRU_BYTESWAP_256BIT
);
1115 cmd
->engine_error
= cmd_q
->cmd_error
;
1120 /* Prepare the input and output data workareas. For in-place
1121 * operations we need to set the dma direction to BIDIRECTIONAL
1122 * and copy the src workarea to the dst workarea.
1124 if (sg_virt(aes
->src
) == sg_virt(aes
->dst
))
1127 ret
= ccp_init_data(&src
, cmd_q
, aes
->src
, aes
->src_len
,
1129 in_place
? DMA_BIDIRECTIONAL
: DMA_TO_DEVICE
);
1136 ret
= ccp_init_data(&dst
, cmd_q
, aes
->dst
, aes
->src_len
,
1137 AES_BLOCK_SIZE
, DMA_FROM_DEVICE
);
1142 /* Send data to the CCP AES engine */
1143 while (src
.sg_wa
.bytes_left
) {
1144 ccp_prepare_data(&src
, &dst
, &op
, AES_BLOCK_SIZE
, true);
1145 if (!src
.sg_wa
.bytes_left
) {
1148 /* Since we don't retrieve the AES context in ECB
1149 * mode we have to wait for the operation to complete
1150 * on the last piece of data
1152 if (aes
->mode
== CCP_AES_MODE_ECB
)
1156 ret
= ccp_perform_aes(&op
);
1158 cmd
->engine_error
= cmd_q
->cmd_error
;
1162 ccp_process_data(&src
, &dst
, &op
);
1165 if (aes
->mode
!= CCP_AES_MODE_ECB
) {
1166 /* Retrieve the AES context - convert from LE to BE using
1167 * 32-byte (256-bit) byteswapping
1169 ret
= ccp_copy_from_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1170 CCP_PASSTHRU_BYTESWAP_256BIT
);
1172 cmd
->engine_error
= cmd_q
->cmd_error
;
1176 /* ...but we only need AES_BLOCK_SIZE bytes */
1177 dm_offset
= CCP_KSB_BYTES
- AES_BLOCK_SIZE
;
1178 ccp_get_dm_area(&ctx
, dm_offset
, aes
->iv
, 0, aes
->iv_len
);
1183 ccp_free_data(&dst
, cmd_q
);
1186 ccp_free_data(&src
, cmd_q
);
1197 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue
*cmd_q
,
1198 struct ccp_cmd
*cmd
)
1200 struct ccp_xts_aes_engine
*xts
= &cmd
->u
.xts
;
1201 struct ccp_dm_workarea key
, ctx
;
1202 struct ccp_data src
, dst
;
1204 unsigned int unit_size
, dm_offset
;
1205 bool in_place
= false;
1208 switch (xts
->unit_size
) {
1209 case CCP_XTS_AES_UNIT_SIZE_16
:
1212 case CCP_XTS_AES_UNIT_SIZE_512
:
1215 case CCP_XTS_AES_UNIT_SIZE_1024
:
1218 case CCP_XTS_AES_UNIT_SIZE_2048
:
1221 case CCP_XTS_AES_UNIT_SIZE_4096
:
1229 if (xts
->key_len
!= AES_KEYSIZE_128
)
1232 if (!xts
->final
&& (xts
->src_len
& (AES_BLOCK_SIZE
- 1)))
1235 if (xts
->iv_len
!= AES_BLOCK_SIZE
)
1238 if (!xts
->key
|| !xts
->iv
|| !xts
->src
|| !xts
->dst
)
1241 BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT
!= 1);
1242 BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT
!= 1);
1245 memset(&op
, 0, sizeof(op
));
1247 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1248 op
.ksb_key
= cmd_q
->ksb_key
;
1249 op
.ksb_ctx
= cmd_q
->ksb_ctx
;
1251 op
.u
.xts
.action
= xts
->action
;
1252 op
.u
.xts
.unit_size
= xts
->unit_size
;
1254 /* All supported key sizes fit in a single (32-byte) KSB entry
1255 * and must be in little endian format. Use the 256-bit byte
1256 * swap passthru option to convert from big endian to little
1259 ret
= ccp_init_dm_workarea(&key
, cmd_q
,
1260 CCP_XTS_AES_KEY_KSB_COUNT
* CCP_KSB_BYTES
,
1265 dm_offset
= CCP_KSB_BYTES
- AES_KEYSIZE_128
;
1266 ccp_set_dm_area(&key
, dm_offset
, xts
->key
, 0, xts
->key_len
);
1267 ccp_set_dm_area(&key
, 0, xts
->key
, dm_offset
, xts
->key_len
);
1268 ret
= ccp_copy_to_ksb(cmd_q
, &key
, op
.jobid
, op
.ksb_key
,
1269 CCP_PASSTHRU_BYTESWAP_256BIT
);
1271 cmd
->engine_error
= cmd_q
->cmd_error
;
1275 /* The AES context fits in a single (32-byte) KSB entry and
1276 * for XTS is already in little endian format so no byte swapping
1279 ret
= ccp_init_dm_workarea(&ctx
, cmd_q
,
1280 CCP_XTS_AES_CTX_KSB_COUNT
* CCP_KSB_BYTES
,
1285 ccp_set_dm_area(&ctx
, 0, xts
->iv
, 0, xts
->iv_len
);
1286 ret
= ccp_copy_to_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1287 CCP_PASSTHRU_BYTESWAP_NOOP
);
1289 cmd
->engine_error
= cmd_q
->cmd_error
;
1293 /* Prepare the input and output data workareas. For in-place
1294 * operations we need to set the dma direction to BIDIRECTIONAL
1295 * and copy the src workarea to the dst workarea.
1297 if (sg_virt(xts
->src
) == sg_virt(xts
->dst
))
1300 ret
= ccp_init_data(&src
, cmd_q
, xts
->src
, xts
->src_len
,
1302 in_place
? DMA_BIDIRECTIONAL
: DMA_TO_DEVICE
);
1309 ret
= ccp_init_data(&dst
, cmd_q
, xts
->dst
, xts
->src_len
,
1310 unit_size
, DMA_FROM_DEVICE
);
1315 /* Send data to the CCP AES engine */
1316 while (src
.sg_wa
.bytes_left
) {
1317 ccp_prepare_data(&src
, &dst
, &op
, unit_size
, true);
1318 if (!src
.sg_wa
.bytes_left
)
1321 ret
= ccp_perform_xts_aes(&op
);
1323 cmd
->engine_error
= cmd_q
->cmd_error
;
1327 ccp_process_data(&src
, &dst
, &op
);
1330 /* Retrieve the AES context - convert from LE to BE using
1331 * 32-byte (256-bit) byteswapping
1333 ret
= ccp_copy_from_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1334 CCP_PASSTHRU_BYTESWAP_256BIT
);
1336 cmd
->engine_error
= cmd_q
->cmd_error
;
1340 /* ...but we only need AES_BLOCK_SIZE bytes */
1341 dm_offset
= CCP_KSB_BYTES
- AES_BLOCK_SIZE
;
1342 ccp_get_dm_area(&ctx
, dm_offset
, xts
->iv
, 0, xts
->iv_len
);
1346 ccp_free_data(&dst
, cmd_q
);
1349 ccp_free_data(&src
, cmd_q
);
1360 static int ccp_run_sha_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
1362 struct ccp_sha_engine
*sha
= &cmd
->u
.sha
;
1363 struct ccp_dm_workarea ctx
;
1364 struct ccp_data src
;
1368 if (sha
->ctx_len
!= CCP_SHA_CTXSIZE
)
1374 if (!sha
->final
&& (sha
->src_len
& (CCP_SHA_BLOCKSIZE
- 1)))
1377 if (!sha
->src_len
) {
1380 /* Not final, just return */
1384 /* CCP can't do a zero length sha operation so the caller
1385 * must buffer the data.
1390 /* A sha operation for a message with a total length of zero,
1391 * return known result.
1393 switch (sha
->type
) {
1394 case CCP_SHA_TYPE_1
:
1395 sha_zero
= ccp_sha1_zero
;
1397 case CCP_SHA_TYPE_224
:
1398 sha_zero
= ccp_sha224_zero
;
1400 case CCP_SHA_TYPE_256
:
1401 sha_zero
= ccp_sha256_zero
;
1407 scatterwalk_map_and_copy((void *)sha_zero
, sha
->ctx
, 0,
1416 BUILD_BUG_ON(CCP_SHA_KSB_COUNT
!= 1);
1418 memset(&op
, 0, sizeof(op
));
1420 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1421 op
.ksb_ctx
= cmd_q
->ksb_ctx
;
1422 op
.u
.sha
.type
= sha
->type
;
1423 op
.u
.sha
.msg_bits
= sha
->msg_bits
;
1425 /* The SHA context fits in a single (32-byte) KSB entry and
1426 * must be in little endian format. Use the 256-bit byte swap
1427 * passthru option to convert from big endian to little endian.
1429 ret
= ccp_init_dm_workarea(&ctx
, cmd_q
,
1430 CCP_SHA_KSB_COUNT
* CCP_KSB_BYTES
,
1438 switch (sha
->type
) {
1439 case CCP_SHA_TYPE_1
:
1440 init
= ccp_sha1_init
;
1442 case CCP_SHA_TYPE_224
:
1443 init
= ccp_sha224_init
;
1445 case CCP_SHA_TYPE_256
:
1446 init
= ccp_sha256_init
;
1452 memcpy(ctx
.address
, init
, CCP_SHA_CTXSIZE
);
1454 ccp_set_dm_area(&ctx
, 0, sha
->ctx
, 0, sha
->ctx_len
);
1457 ret
= ccp_copy_to_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1458 CCP_PASSTHRU_BYTESWAP_256BIT
);
1460 cmd
->engine_error
= cmd_q
->cmd_error
;
1464 /* Send data to the CCP SHA engine */
1465 ret
= ccp_init_data(&src
, cmd_q
, sha
->src
, sha
->src_len
,
1466 CCP_SHA_BLOCKSIZE
, DMA_TO_DEVICE
);
1470 while (src
.sg_wa
.bytes_left
) {
1471 ccp_prepare_data(&src
, NULL
, &op
, CCP_SHA_BLOCKSIZE
, false);
1472 if (sha
->final
&& !src
.sg_wa
.bytes_left
)
1475 ret
= ccp_perform_sha(&op
);
1477 cmd
->engine_error
= cmd_q
->cmd_error
;
1481 ccp_process_data(&src
, NULL
, &op
);
1484 /* Retrieve the SHA context - convert from LE to BE using
1485 * 32-byte (256-bit) byteswapping to BE
1487 ret
= ccp_copy_from_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1488 CCP_PASSTHRU_BYTESWAP_256BIT
);
1490 cmd
->engine_error
= cmd_q
->cmd_error
;
1494 ccp_get_dm_area(&ctx
, 0, sha
->ctx
, 0, sha
->ctx_len
);
1496 if (sha
->final
&& sha
->opad
) {
1497 /* HMAC operation, recursively perform final SHA */
1498 struct ccp_cmd hmac_cmd
;
1499 struct scatterlist sg
;
1500 u64 block_size
, digest_size
;
1503 switch (sha
->type
) {
1504 case CCP_SHA_TYPE_1
:
1505 block_size
= SHA1_BLOCK_SIZE
;
1506 digest_size
= SHA1_DIGEST_SIZE
;
1508 case CCP_SHA_TYPE_224
:
1509 block_size
= SHA224_BLOCK_SIZE
;
1510 digest_size
= SHA224_DIGEST_SIZE
;
1512 case CCP_SHA_TYPE_256
:
1513 block_size
= SHA256_BLOCK_SIZE
;
1514 digest_size
= SHA256_DIGEST_SIZE
;
1521 if (sha
->opad_len
!= block_size
) {
1526 hmac_buf
= kmalloc(block_size
+ digest_size
, GFP_KERNEL
);
1531 sg_init_one(&sg
, hmac_buf
, block_size
+ digest_size
);
1533 scatterwalk_map_and_copy(hmac_buf
, sha
->opad
, 0, block_size
, 0);
1534 memcpy(hmac_buf
+ block_size
, ctx
.address
, digest_size
);
1536 memset(&hmac_cmd
, 0, sizeof(hmac_cmd
));
1537 hmac_cmd
.engine
= CCP_ENGINE_SHA
;
1538 hmac_cmd
.u
.sha
.type
= sha
->type
;
1539 hmac_cmd
.u
.sha
.ctx
= sha
->ctx
;
1540 hmac_cmd
.u
.sha
.ctx_len
= sha
->ctx_len
;
1541 hmac_cmd
.u
.sha
.src
= &sg
;
1542 hmac_cmd
.u
.sha
.src_len
= block_size
+ digest_size
;
1543 hmac_cmd
.u
.sha
.opad
= NULL
;
1544 hmac_cmd
.u
.sha
.opad_len
= 0;
1545 hmac_cmd
.u
.sha
.first
= 1;
1546 hmac_cmd
.u
.sha
.final
= 1;
1547 hmac_cmd
.u
.sha
.msg_bits
= (block_size
+ digest_size
) << 3;
1549 ret
= ccp_run_sha_cmd(cmd_q
, &hmac_cmd
);
1551 cmd
->engine_error
= hmac_cmd
.engine_error
;
1557 ccp_free_data(&src
, cmd_q
);
1565 static int ccp_run_rsa_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
1567 struct ccp_rsa_engine
*rsa
= &cmd
->u
.rsa
;
1568 struct ccp_dm_workarea exp
, src
;
1569 struct ccp_data dst
;
1571 unsigned int ksb_count
, i_len
, o_len
;
1574 if (rsa
->key_size
> CCP_RSA_MAX_WIDTH
)
1577 if (!rsa
->exp
|| !rsa
->mod
|| !rsa
->src
|| !rsa
->dst
)
1580 /* The RSA modulus must precede the message being acted upon, so
1581 * it must be copied to a DMA area where the message and the
1582 * modulus can be concatenated. Therefore the input buffer
1583 * length required is twice the output buffer length (which
1584 * must be a multiple of 256-bits).
1586 o_len
= ((rsa
->key_size
+ 255) / 256) * 32;
1589 ksb_count
= o_len
/ CCP_KSB_BYTES
;
1591 memset(&op
, 0, sizeof(op
));
1593 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1594 op
.ksb_key
= ccp_alloc_ksb(cmd_q
->ccp
, ksb_count
);
1598 /* The RSA exponent may span multiple (32-byte) KSB entries and must
1599 * be in little endian format. Reverse copy each 32-byte chunk
1600 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1601 * and each byte within that chunk and do not perform any byte swap
1602 * operations on the passthru operation.
1604 ret
= ccp_init_dm_workarea(&exp
, cmd_q
, o_len
, DMA_TO_DEVICE
);
1608 ccp_reverse_set_dm_area(&exp
, rsa
->exp
, rsa
->exp_len
, CCP_KSB_BYTES
,
1610 ret
= ccp_copy_to_ksb(cmd_q
, &exp
, op
.jobid
, op
.ksb_key
,
1611 CCP_PASSTHRU_BYTESWAP_NOOP
);
1613 cmd
->engine_error
= cmd_q
->cmd_error
;
1617 /* Concatenate the modulus and the message. Both the modulus and
1618 * the operands must be in little endian format. Since the input
1619 * is in big endian format it must be converted.
1621 ret
= ccp_init_dm_workarea(&src
, cmd_q
, i_len
, DMA_TO_DEVICE
);
1625 ccp_reverse_set_dm_area(&src
, rsa
->mod
, rsa
->mod_len
, CCP_KSB_BYTES
,
1627 src
.address
+= o_len
; /* Adjust the address for the copy operation */
1628 ccp_reverse_set_dm_area(&src
, rsa
->src
, rsa
->src_len
, CCP_KSB_BYTES
,
1630 src
.address
-= o_len
; /* Reset the address to original value */
1632 /* Prepare the output area for the operation */
1633 ret
= ccp_init_data(&dst
, cmd_q
, rsa
->dst
, rsa
->mod_len
,
1634 o_len
, DMA_FROM_DEVICE
);
1639 op
.src
.u
.dma
.address
= src
.dma
.address
;
1640 op
.src
.u
.dma
.offset
= 0;
1641 op
.src
.u
.dma
.length
= i_len
;
1642 op
.dst
.u
.dma
.address
= dst
.dm_wa
.dma
.address
;
1643 op
.dst
.u
.dma
.offset
= 0;
1644 op
.dst
.u
.dma
.length
= o_len
;
1646 op
.u
.rsa
.mod_size
= rsa
->key_size
;
1647 op
.u
.rsa
.input_len
= i_len
;
1649 ret
= ccp_perform_rsa(&op
);
1651 cmd
->engine_error
= cmd_q
->cmd_error
;
1655 ccp_reverse_get_dm_area(&dst
.dm_wa
, rsa
->dst
, rsa
->mod_len
);
1658 ccp_free_data(&dst
, cmd_q
);
1667 ccp_free_ksb(cmd_q
->ccp
, op
.ksb_key
, ksb_count
);
1672 static int ccp_run_passthru_cmd(struct ccp_cmd_queue
*cmd_q
,
1673 struct ccp_cmd
*cmd
)
1675 struct ccp_passthru_engine
*pt
= &cmd
->u
.passthru
;
1676 struct ccp_dm_workarea mask
;
1677 struct ccp_data src
, dst
;
1679 bool in_place
= false;
1683 if (!pt
->final
&& (pt
->src_len
& (CCP_PASSTHRU_BLOCKSIZE
- 1)))
1686 if (!pt
->src
|| !pt
->dst
)
1689 if (pt
->bit_mod
!= CCP_PASSTHRU_BITWISE_NOOP
) {
1690 if (pt
->mask_len
!= CCP_PASSTHRU_MASKSIZE
)
1696 BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT
!= 1);
1698 memset(&op
, 0, sizeof(op
));
1700 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1702 if (pt
->bit_mod
!= CCP_PASSTHRU_BITWISE_NOOP
) {
1704 op
.ksb_key
= cmd_q
->ksb_key
;
1706 ret
= ccp_init_dm_workarea(&mask
, cmd_q
,
1707 CCP_PASSTHRU_KSB_COUNT
*
1713 ccp_set_dm_area(&mask
, 0, pt
->mask
, 0, pt
->mask_len
);
1714 ret
= ccp_copy_to_ksb(cmd_q
, &mask
, op
.jobid
, op
.ksb_key
,
1715 CCP_PASSTHRU_BYTESWAP_NOOP
);
1717 cmd
->engine_error
= cmd_q
->cmd_error
;
1722 /* Prepare the input and output data workareas. For in-place
1723 * operations we need to set the dma direction to BIDIRECTIONAL
1724 * and copy the src workarea to the dst workarea.
1726 if (sg_virt(pt
->src
) == sg_virt(pt
->dst
))
1729 ret
= ccp_init_data(&src
, cmd_q
, pt
->src
, pt
->src_len
,
1730 CCP_PASSTHRU_MASKSIZE
,
1731 in_place
? DMA_BIDIRECTIONAL
: DMA_TO_DEVICE
);
1738 ret
= ccp_init_data(&dst
, cmd_q
, pt
->dst
, pt
->src_len
,
1739 CCP_PASSTHRU_MASKSIZE
, DMA_FROM_DEVICE
);
1744 /* Send data to the CCP Passthru engine
1745 * Because the CCP engine works on a single source and destination
1746 * dma address at a time, each entry in the source scatterlist
1747 * (after the dma_map_sg call) must be less than or equal to the
1748 * (remaining) length in the destination scatterlist entry and the
1749 * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1751 dst
.sg_wa
.sg_used
= 0;
1752 for (i
= 1; i
<= src
.sg_wa
.dma_count
; i
++) {
1753 if (!dst
.sg_wa
.sg
||
1754 (dst
.sg_wa
.sg
->length
< src
.sg_wa
.sg
->length
)) {
1759 if (i
== src
.sg_wa
.dma_count
) {
1764 op
.src
.type
= CCP_MEMTYPE_SYSTEM
;
1765 op
.src
.u
.dma
.address
= sg_dma_address(src
.sg_wa
.sg
);
1766 op
.src
.u
.dma
.offset
= 0;
1767 op
.src
.u
.dma
.length
= sg_dma_len(src
.sg_wa
.sg
);
1769 op
.dst
.type
= CCP_MEMTYPE_SYSTEM
;
1770 op
.dst
.u
.dma
.address
= sg_dma_address(dst
.sg_wa
.sg
);
1771 op
.dst
.u
.dma
.offset
= dst
.sg_wa
.sg_used
;
1772 op
.dst
.u
.dma
.length
= op
.src
.u
.dma
.length
;
1774 ret
= ccp_perform_passthru(&op
);
1776 cmd
->engine_error
= cmd_q
->cmd_error
;
1780 dst
.sg_wa
.sg_used
+= src
.sg_wa
.sg
->length
;
1781 if (dst
.sg_wa
.sg_used
== dst
.sg_wa
.sg
->length
) {
1782 dst
.sg_wa
.sg
= sg_next(dst
.sg_wa
.sg
);
1783 dst
.sg_wa
.sg_used
= 0;
1785 src
.sg_wa
.sg
= sg_next(src
.sg_wa
.sg
);
1790 ccp_free_data(&dst
, cmd_q
);
1793 ccp_free_data(&src
, cmd_q
);
1796 if (pt
->bit_mod
!= CCP_PASSTHRU_BITWISE_NOOP
)
1802 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
1804 struct ccp_ecc_engine
*ecc
= &cmd
->u
.ecc
;
1805 struct ccp_dm_workarea src
, dst
;
1810 if (!ecc
->u
.mm
.operand_1
||
1811 (ecc
->u
.mm
.operand_1_len
> CCP_ECC_MODULUS_BYTES
))
1814 if (ecc
->function
!= CCP_ECC_FUNCTION_MINV_384BIT
)
1815 if (!ecc
->u
.mm
.operand_2
||
1816 (ecc
->u
.mm
.operand_2_len
> CCP_ECC_MODULUS_BYTES
))
1819 if (!ecc
->u
.mm
.result
||
1820 (ecc
->u
.mm
.result_len
< CCP_ECC_MODULUS_BYTES
))
1823 memset(&op
, 0, sizeof(op
));
1825 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1827 /* Concatenate the modulus and the operands. Both the modulus and
1828 * the operands must be in little endian format. Since the input
1829 * is in big endian format it must be converted and placed in a
1830 * fixed length buffer.
1832 ret
= ccp_init_dm_workarea(&src
, cmd_q
, CCP_ECC_SRC_BUF_SIZE
,
1837 /* Save the workarea address since it is updated in order to perform
1842 /* Copy the ECC modulus */
1843 ccp_reverse_set_dm_area(&src
, ecc
->mod
, ecc
->mod_len
,
1844 CCP_ECC_OPERAND_SIZE
, false);
1845 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1847 /* Copy the first operand */
1848 ccp_reverse_set_dm_area(&src
, ecc
->u
.mm
.operand_1
,
1849 ecc
->u
.mm
.operand_1_len
,
1850 CCP_ECC_OPERAND_SIZE
, false);
1851 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1853 if (ecc
->function
!= CCP_ECC_FUNCTION_MINV_384BIT
) {
1854 /* Copy the second operand */
1855 ccp_reverse_set_dm_area(&src
, ecc
->u
.mm
.operand_2
,
1856 ecc
->u
.mm
.operand_2_len
,
1857 CCP_ECC_OPERAND_SIZE
, false);
1858 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1861 /* Restore the workarea address */
1864 /* Prepare the output area for the operation */
1865 ret
= ccp_init_dm_workarea(&dst
, cmd_q
, CCP_ECC_DST_BUF_SIZE
,
1871 op
.src
.u
.dma
.address
= src
.dma
.address
;
1872 op
.src
.u
.dma
.offset
= 0;
1873 op
.src
.u
.dma
.length
= src
.length
;
1874 op
.dst
.u
.dma
.address
= dst
.dma
.address
;
1875 op
.dst
.u
.dma
.offset
= 0;
1876 op
.dst
.u
.dma
.length
= dst
.length
;
1878 op
.u
.ecc
.function
= cmd
->u
.ecc
.function
;
1880 ret
= ccp_perform_ecc(&op
);
1882 cmd
->engine_error
= cmd_q
->cmd_error
;
1886 ecc
->ecc_result
= le16_to_cpup(
1887 (const __le16
*)(dst
.address
+ CCP_ECC_RESULT_OFFSET
));
1888 if (!(ecc
->ecc_result
& CCP_ECC_RESULT_SUCCESS
)) {
1893 /* Save the ECC result */
1894 ccp_reverse_get_dm_area(&dst
, ecc
->u
.mm
.result
, CCP_ECC_MODULUS_BYTES
);
1905 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
1907 struct ccp_ecc_engine
*ecc
= &cmd
->u
.ecc
;
1908 struct ccp_dm_workarea src
, dst
;
1913 if (!ecc
->u
.pm
.point_1
.x
||
1914 (ecc
->u
.pm
.point_1
.x_len
> CCP_ECC_MODULUS_BYTES
) ||
1915 !ecc
->u
.pm
.point_1
.y
||
1916 (ecc
->u
.pm
.point_1
.y_len
> CCP_ECC_MODULUS_BYTES
))
1919 if (ecc
->function
== CCP_ECC_FUNCTION_PADD_384BIT
) {
1920 if (!ecc
->u
.pm
.point_2
.x
||
1921 (ecc
->u
.pm
.point_2
.x_len
> CCP_ECC_MODULUS_BYTES
) ||
1922 !ecc
->u
.pm
.point_2
.y
||
1923 (ecc
->u
.pm
.point_2
.y_len
> CCP_ECC_MODULUS_BYTES
))
1926 if (!ecc
->u
.pm
.domain_a
||
1927 (ecc
->u
.pm
.domain_a_len
> CCP_ECC_MODULUS_BYTES
))
1930 if (ecc
->function
== CCP_ECC_FUNCTION_PMUL_384BIT
)
1931 if (!ecc
->u
.pm
.scalar
||
1932 (ecc
->u
.pm
.scalar_len
> CCP_ECC_MODULUS_BYTES
))
1936 if (!ecc
->u
.pm
.result
.x
||
1937 (ecc
->u
.pm
.result
.x_len
< CCP_ECC_MODULUS_BYTES
) ||
1938 !ecc
->u
.pm
.result
.y
||
1939 (ecc
->u
.pm
.result
.y_len
< CCP_ECC_MODULUS_BYTES
))
1942 memset(&op
, 0, sizeof(op
));
1944 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1946 /* Concatenate the modulus and the operands. Both the modulus and
1947 * the operands must be in little endian format. Since the input
1948 * is in big endian format it must be converted and placed in a
1949 * fixed length buffer.
1951 ret
= ccp_init_dm_workarea(&src
, cmd_q
, CCP_ECC_SRC_BUF_SIZE
,
1956 /* Save the workarea address since it is updated in order to perform
1961 /* Copy the ECC modulus */
1962 ccp_reverse_set_dm_area(&src
, ecc
->mod
, ecc
->mod_len
,
1963 CCP_ECC_OPERAND_SIZE
, false);
1964 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1966 /* Copy the first point X and Y coordinate */
1967 ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.point_1
.x
,
1968 ecc
->u
.pm
.point_1
.x_len
,
1969 CCP_ECC_OPERAND_SIZE
, false);
1970 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1971 ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.point_1
.y
,
1972 ecc
->u
.pm
.point_1
.y_len
,
1973 CCP_ECC_OPERAND_SIZE
, false);
1974 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1976 /* Set the first point Z coordianate to 1 */
1977 *src
.address
= 0x01;
1978 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1980 if (ecc
->function
== CCP_ECC_FUNCTION_PADD_384BIT
) {
1981 /* Copy the second point X and Y coordinate */
1982 ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.point_2
.x
,
1983 ecc
->u
.pm
.point_2
.x_len
,
1984 CCP_ECC_OPERAND_SIZE
, false);
1985 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1986 ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.point_2
.y
,
1987 ecc
->u
.pm
.point_2
.y_len
,
1988 CCP_ECC_OPERAND_SIZE
, false);
1989 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1991 /* Set the second point Z coordianate to 1 */
1992 *src
.address
= 0x01;
1993 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1995 /* Copy the Domain "a" parameter */
1996 ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.domain_a
,
1997 ecc
->u
.pm
.domain_a_len
,
1998 CCP_ECC_OPERAND_SIZE
, false);
1999 src
.address
+= CCP_ECC_OPERAND_SIZE
;
2001 if (ecc
->function
== CCP_ECC_FUNCTION_PMUL_384BIT
) {
2002 /* Copy the scalar value */
2003 ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.scalar
,
2004 ecc
->u
.pm
.scalar_len
,
2005 CCP_ECC_OPERAND_SIZE
, false);
2006 src
.address
+= CCP_ECC_OPERAND_SIZE
;
2010 /* Restore the workarea address */
2013 /* Prepare the output area for the operation */
2014 ret
= ccp_init_dm_workarea(&dst
, cmd_q
, CCP_ECC_DST_BUF_SIZE
,
2020 op
.src
.u
.dma
.address
= src
.dma
.address
;
2021 op
.src
.u
.dma
.offset
= 0;
2022 op
.src
.u
.dma
.length
= src
.length
;
2023 op
.dst
.u
.dma
.address
= dst
.dma
.address
;
2024 op
.dst
.u
.dma
.offset
= 0;
2025 op
.dst
.u
.dma
.length
= dst
.length
;
2027 op
.u
.ecc
.function
= cmd
->u
.ecc
.function
;
2029 ret
= ccp_perform_ecc(&op
);
2031 cmd
->engine_error
= cmd_q
->cmd_error
;
2035 ecc
->ecc_result
= le16_to_cpup(
2036 (const __le16
*)(dst
.address
+ CCP_ECC_RESULT_OFFSET
));
2037 if (!(ecc
->ecc_result
& CCP_ECC_RESULT_SUCCESS
)) {
2042 /* Save the workarea address since it is updated as we walk through
2043 * to copy the point math result
2047 /* Save the ECC result X and Y coordinates */
2048 ccp_reverse_get_dm_area(&dst
, ecc
->u
.pm
.result
.x
,
2049 CCP_ECC_MODULUS_BYTES
);
2050 dst
.address
+= CCP_ECC_OUTPUT_SIZE
;
2051 ccp_reverse_get_dm_area(&dst
, ecc
->u
.pm
.result
.y
,
2052 CCP_ECC_MODULUS_BYTES
);
2053 dst
.address
+= CCP_ECC_OUTPUT_SIZE
;
2055 /* Restore the workarea address */
2067 static int ccp_run_ecc_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
2069 struct ccp_ecc_engine
*ecc
= &cmd
->u
.ecc
;
2071 ecc
->ecc_result
= 0;
2074 (ecc
->mod_len
> CCP_ECC_MODULUS_BYTES
))
2077 switch (ecc
->function
) {
2078 case CCP_ECC_FUNCTION_MMUL_384BIT
:
2079 case CCP_ECC_FUNCTION_MADD_384BIT
:
2080 case CCP_ECC_FUNCTION_MINV_384BIT
:
2081 return ccp_run_ecc_mm_cmd(cmd_q
, cmd
);
2083 case CCP_ECC_FUNCTION_PADD_384BIT
:
2084 case CCP_ECC_FUNCTION_PMUL_384BIT
:
2085 case CCP_ECC_FUNCTION_PDBL_384BIT
:
2086 return ccp_run_ecc_pm_cmd(cmd_q
, cmd
);
2093 int ccp_run_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
2097 cmd
->engine_error
= 0;
2098 cmd_q
->cmd_error
= 0;
2099 cmd_q
->int_rcvd
= 0;
2100 cmd_q
->free_slots
= CMD_Q_DEPTH(ioread32(cmd_q
->reg_status
));
2102 switch (cmd
->engine
) {
2103 case CCP_ENGINE_AES
:
2104 ret
= ccp_run_aes_cmd(cmd_q
, cmd
);
2106 case CCP_ENGINE_XTS_AES_128
:
2107 ret
= ccp_run_xts_aes_cmd(cmd_q
, cmd
);
2109 case CCP_ENGINE_SHA
:
2110 ret
= ccp_run_sha_cmd(cmd_q
, cmd
);
2112 case CCP_ENGINE_RSA
:
2113 ret
= ccp_run_rsa_cmd(cmd_q
, cmd
);
2115 case CCP_ENGINE_PASSTHRU
:
2116 ret
= ccp_run_passthru_cmd(cmd_q
, cmd
);
2118 case CCP_ENGINE_ECC
:
2119 ret
= ccp_run_ecc_cmd(cmd_q
, cmd
);