1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * sun4i-ss-hash.c - hardware cryptographic accelerator for Allwinner A20 SoC
5 * Copyright (C) 2013-2015 Corentin LABBE <clabbe.montjoie@gmail.com>
7 * This file add support for MD5 and SHA1.
9 * You could find the datasheet in Documentation/arm/sunxi.rst
12 #include <asm/unaligned.h>
13 #include <linux/scatterlist.h>
15 /* This is a totally arbitrary value */
16 #define SS_TIMEOUT 100
18 int sun4i_hash_crainit(struct crypto_tfm
*tfm
)
20 struct sun4i_tfm_ctx
*op
= crypto_tfm_ctx(tfm
);
21 struct ahash_alg
*alg
= __crypto_ahash_alg(tfm
->__crt_alg
);
22 struct sun4i_ss_alg_template
*algt
;
25 memset(op
, 0, sizeof(struct sun4i_tfm_ctx
));
27 algt
= container_of(alg
, struct sun4i_ss_alg_template
, alg
.hash
);
30 err
= pm_runtime_get_sync(op
->ss
->dev
);
34 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm
),
35 sizeof(struct sun4i_req_ctx
));
39 void sun4i_hash_craexit(struct crypto_tfm
*tfm
)
41 struct sun4i_tfm_ctx
*op
= crypto_tfm_ctx(tfm
);
43 pm_runtime_put(op
->ss
->dev
);
46 /* sun4i_hash_init: initialize request context */
47 int sun4i_hash_init(struct ahash_request
*areq
)
49 struct sun4i_req_ctx
*op
= ahash_request_ctx(areq
);
50 struct crypto_ahash
*tfm
= crypto_ahash_reqtfm(areq
);
51 struct ahash_alg
*alg
= __crypto_ahash_alg(tfm
->base
.__crt_alg
);
52 struct sun4i_ss_alg_template
*algt
;
54 memset(op
, 0, sizeof(struct sun4i_req_ctx
));
56 algt
= container_of(alg
, struct sun4i_ss_alg_template
, alg
.hash
);
57 op
->mode
= algt
->mode
;
62 int sun4i_hash_export_md5(struct ahash_request
*areq
, void *out
)
64 struct sun4i_req_ctx
*op
= ahash_request_ctx(areq
);
65 struct md5_state
*octx
= out
;
68 octx
->byte_count
= op
->byte_count
+ op
->len
;
70 memcpy(octx
->block
, op
->buf
, op
->len
);
73 for (i
= 0; i
< 4; i
++)
74 octx
->hash
[i
] = op
->hash
[i
];
76 octx
->hash
[0] = SHA1_H0
;
77 octx
->hash
[1] = SHA1_H1
;
78 octx
->hash
[2] = SHA1_H2
;
79 octx
->hash
[3] = SHA1_H3
;
85 int sun4i_hash_import_md5(struct ahash_request
*areq
, const void *in
)
87 struct sun4i_req_ctx
*op
= ahash_request_ctx(areq
);
88 const struct md5_state
*ictx
= in
;
91 sun4i_hash_init(areq
);
93 op
->byte_count
= ictx
->byte_count
& ~0x3F;
94 op
->len
= ictx
->byte_count
& 0x3F;
96 memcpy(op
->buf
, ictx
->block
, op
->len
);
98 for (i
= 0; i
< 4; i
++)
99 op
->hash
[i
] = ictx
->hash
[i
];
104 int sun4i_hash_export_sha1(struct ahash_request
*areq
, void *out
)
106 struct sun4i_req_ctx
*op
= ahash_request_ctx(areq
);
107 struct sha1_state
*octx
= out
;
110 octx
->count
= op
->byte_count
+ op
->len
;
112 memcpy(octx
->buffer
, op
->buf
, op
->len
);
114 if (op
->byte_count
) {
115 for (i
= 0; i
< 5; i
++)
116 octx
->state
[i
] = op
->hash
[i
];
118 octx
->state
[0] = SHA1_H0
;
119 octx
->state
[1] = SHA1_H1
;
120 octx
->state
[2] = SHA1_H2
;
121 octx
->state
[3] = SHA1_H3
;
122 octx
->state
[4] = SHA1_H4
;
128 int sun4i_hash_import_sha1(struct ahash_request
*areq
, const void *in
)
130 struct sun4i_req_ctx
*op
= ahash_request_ctx(areq
);
131 const struct sha1_state
*ictx
= in
;
134 sun4i_hash_init(areq
);
136 op
->byte_count
= ictx
->count
& ~0x3F;
137 op
->len
= ictx
->count
& 0x3F;
139 memcpy(op
->buf
, ictx
->buffer
, op
->len
);
141 for (i
= 0; i
< 5; i
++)
142 op
->hash
[i
] = ictx
->state
[i
];
147 #define SS_HASH_UPDATE 1
148 #define SS_HASH_FINAL 2
151 * sun4i_hash_update: update hash engine
153 * Could be used for both SHA1 and MD5
154 * Write data by step of 32bits and put then in the SS.
156 * Since we cannot leave partial data and hash state in the engine,
157 * we need to get the hash state at the end of this function.
158 * We can get the hash state every 64 bytes
160 * So the first work is to get the number of bytes to write to SS modulo 64
161 * The extra bytes will go to a temporary buffer op->buf storing op->len bytes
163 * So at the begin of update()
164 * if op->len + areq->nbytes < 64
165 * => all data will be written to wait buffer (op->buf) and end=0
166 * if not, write all data from op->buf to the device and position end to
167 * complete to 64bytes
170 * update1 60o => op->len=60
171 * update2 60o => need one more word to have 64 bytes
173 * so write all data from op->buf and one word of SGs
174 * write remaining data in op->buf
175 * final state op->len=56
177 static int sun4i_hash(struct ahash_request
*areq
)
180 * i is the total bytes read from SGs, to be compared to areq->nbytes
181 * i is important because we cannot rely on SG length since the sum of
182 * SG->length could be greater than areq->nbytes
184 * end is the position when we need to stop writing to the device,
185 * to be compared to i
187 * in_i: advancement in the current SG
189 unsigned int i
= 0, end
, fill
, min_fill
, nwait
, nbw
= 0, j
= 0, todo
;
190 unsigned int in_i
= 0;
191 u32 spaces
, rx_cnt
= SS_RX_DEFAULT
, bf
[32] = {0}, v
, ivmode
= 0;
192 struct sun4i_req_ctx
*op
= ahash_request_ctx(areq
);
193 struct crypto_ahash
*tfm
= crypto_ahash_reqtfm(areq
);
194 struct sun4i_tfm_ctx
*tfmctx
= crypto_ahash_ctx(tfm
);
195 struct sun4i_ss_ctx
*ss
= tfmctx
->ss
;
196 struct scatterlist
*in_sg
= areq
->src
;
197 struct sg_mapping_iter mi
;
202 dev_dbg(ss
->dev
, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
203 __func__
, crypto_tfm_alg_name(areq
->base
.tfm
),
204 op
->byte_count
, areq
->nbytes
, op
->mode
,
205 op
->len
, op
->hash
[0]);
207 if (unlikely(!areq
->nbytes
) && !(op
->flags
& SS_HASH_FINAL
))
210 /* protect against overflow */
211 if (unlikely(areq
->nbytes
> UINT_MAX
- op
->len
)) {
212 dev_err(ss
->dev
, "Cannot process too large request\n");
216 if (op
->len
+ areq
->nbytes
< 64 && !(op
->flags
& SS_HASH_FINAL
)) {
217 /* linearize data to op->buf */
218 copied
= sg_pcopy_to_buffer(areq
->src
, sg_nents(areq
->src
),
219 op
->buf
+ op
->len
, areq
->nbytes
, 0);
224 spin_lock_bh(&ss
->slock
);
227 * if some data have been processed before,
228 * we need to restore the partial hash state
230 if (op
->byte_count
) {
231 ivmode
= SS_IV_ARBITRARY
;
232 for (i
= 0; i
< crypto_ahash_digestsize(tfm
) / 4; i
++)
233 writel(op
->hash
[i
], ss
->base
+ SS_IV0
+ i
* 4);
235 /* Enable the device */
236 writel(op
->mode
| SS_ENABLED
| ivmode
, ss
->base
+ SS_CTL
);
238 if (!(op
->flags
& SS_HASH_UPDATE
))
241 /* start of handling data */
242 if (!(op
->flags
& SS_HASH_FINAL
)) {
243 end
= ((areq
->nbytes
+ op
->len
) / 64) * 64 - op
->len
;
245 if (end
> areq
->nbytes
|| areq
->nbytes
- end
> 63) {
246 dev_err(ss
->dev
, "ERROR: Bound error %u %u\n",
252 /* Since we have the flag final, we can go up to modulo 4 */
253 if (areq
->nbytes
< 4)
256 end
= ((areq
->nbytes
+ op
->len
) / 4) * 4 - op
->len
;
259 /* TODO if SGlen % 4 and !op->len then DMA */
261 while (in_sg
&& i
== 1) {
262 if (in_sg
->length
% 4)
264 in_sg
= sg_next(in_sg
);
266 if (i
== 1 && !op
->len
&& areq
->nbytes
)
267 dev_dbg(ss
->dev
, "We can DMA\n");
270 sg_miter_start(&mi
, areq
->src
, sg_nents(areq
->src
),
271 SG_MITER_FROM_SG
| SG_MITER_ATOMIC
);
277 * we need to linearize in two case:
278 * - the buffer is already used
279 * - the SG does not have enough byte remaining ( < 4)
281 if (op
->len
|| (mi
.length
- in_i
) < 4) {
283 * if we have entered here we have two reason to stop
284 * - the buffer is full
287 while (op
->len
< 64 && i
< end
) {
288 /* how many bytes we can read from current SG */
289 in_r
= min(end
- i
, 64 - op
->len
);
290 in_r
= min_t(size_t, mi
.length
- in_i
, in_r
);
291 memcpy(op
->buf
+ op
->len
, mi
.addr
+ in_i
, in_r
);
295 if (in_i
== mi
.length
) {
300 if (op
->len
> 3 && !(op
->len
% 4)) {
301 /* write buf to the device */
302 writesl(ss
->base
+ SS_RXFIFO
, op
->buf
,
304 op
->byte_count
+= op
->len
;
308 if (mi
.length
- in_i
> 3 && i
< end
) {
309 /* how many bytes we can read from current SG */
310 in_r
= min_t(size_t, mi
.length
- in_i
, areq
->nbytes
- i
);
311 in_r
= min_t(size_t, ((mi
.length
- in_i
) / 4) * 4, in_r
);
312 /* how many bytes we can write in the device*/
313 todo
= min3((u32
)(end
- i
) / 4, rx_cnt
, (u32
)in_r
/ 4);
314 writesl(ss
->base
+ SS_RXFIFO
, mi
.addr
+ in_i
, todo
);
315 op
->byte_count
+= todo
* 4;
320 spaces
= readl(ss
->base
+ SS_FCSR
);
321 rx_cnt
= SS_RXFIFO_SPACES(spaces
);
323 if (in_i
== mi
.length
) {
331 * Now we have written to the device all that we can,
332 * store the remaining bytes in op->buf
334 if ((areq
->nbytes
- i
) < 64) {
335 while (i
< areq
->nbytes
&& in_i
< mi
.length
&& op
->len
< 64) {
336 /* how many bytes we can read from current SG */
337 in_r
= min(areq
->nbytes
- i
, 64 - op
->len
);
338 in_r
= min_t(size_t, mi
.length
- in_i
, in_r
);
339 memcpy(op
->buf
+ op
->len
, mi
.addr
+ in_i
, in_r
);
343 if (in_i
== mi
.length
) {
353 * End of data process
354 * Now if we have the flag final go to finalize part
355 * If not, store the partial hash
357 if (op
->flags
& SS_HASH_FINAL
)
360 writel(op
->mode
| SS_ENABLED
| SS_DATA_END
, ss
->base
+ SS_CTL
);
363 v
= readl(ss
->base
+ SS_CTL
);
365 } while (i
< SS_TIMEOUT
&& (v
& SS_DATA_END
));
366 if (unlikely(i
>= SS_TIMEOUT
)) {
367 dev_err_ratelimited(ss
->dev
,
368 "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
369 i
, SS_TIMEOUT
, v
, areq
->nbytes
);
375 * The datasheet isn't very clear about when to retrieve the digest. The
376 * bit SS_DATA_END is cleared when the engine has processed the data and
377 * when the digest is computed *but* it doesn't mean the digest is
378 * available in the digest registers. Hence the delay to be sure we can
383 for (i
= 0; i
< crypto_ahash_digestsize(tfm
) / 4; i
++)
384 op
->hash
[i
] = readl(ss
->base
+ SS_MD0
+ i
* 4);
389 * hash_final: finalize hashing operation
391 * If we have some remaining bytes, we write them.
392 * Then ask the SS for finalizing the hashing operation
394 * I do not check RX FIFO size in this function since the size is 32
395 * after each enabling and this function neither write more than 32 words.
396 * If we come from the update part, we cannot have more than
397 * 3 remaining bytes to write and SS is fast enough to not care about it.
402 /* write the remaining words of the wait buffer */
406 writesl(ss
->base
+ SS_RXFIFO
, op
->buf
, nwait
);
407 op
->byte_count
+= 4 * nwait
;
410 nbw
= op
->len
- 4 * nwait
;
412 wb
= le32_to_cpup((__le32
*)(op
->buf
+ nwait
* 4));
413 wb
&= GENMASK((nbw
* 8) - 1, 0);
415 op
->byte_count
+= nbw
;
419 /* write the remaining bytes of the nbw buffer */
420 wb
|= ((1 << 7) << (nbw
* 8));
421 ((__le32
*)bf
)[j
++] = cpu_to_le32(wb
);
424 * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1)
425 * I take the operations from other MD5/SHA1 implementations
428 /* last block size */
429 fill
= 64 - (op
->byte_count
% 64);
430 min_fill
= 2 * sizeof(u32
) + (nbw
? 0 : sizeof(u32
));
432 /* if we can't fill all data, jump to the next 64 block */
436 j
+= (fill
- min_fill
) / sizeof(u32
);
438 /* write the length of data */
439 if (op
->mode
== SS_OP_SHA1
) {
440 __be64
*bits
= (__be64
*)&bf
[j
];
441 *bits
= cpu_to_be64(op
->byte_count
<< 3);
444 __le64
*bits
= (__le64
*)&bf
[j
];
445 *bits
= cpu_to_le64(op
->byte_count
<< 3);
448 writesl(ss
->base
+ SS_RXFIFO
, bf
, j
);
450 /* Tell the SS to stop the hashing */
451 writel(op
->mode
| SS_ENABLED
| SS_DATA_END
, ss
->base
+ SS_CTL
);
454 * Wait for SS to finish the hash.
455 * The timeout could happen only in case of bad overclocking
460 v
= readl(ss
->base
+ SS_CTL
);
462 } while (i
< SS_TIMEOUT
&& (v
& SS_DATA_END
));
463 if (unlikely(i
>= SS_TIMEOUT
)) {
464 dev_err_ratelimited(ss
->dev
,
465 "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
466 i
, SS_TIMEOUT
, v
, areq
->nbytes
);
472 * The datasheet isn't very clear about when to retrieve the digest. The
473 * bit SS_DATA_END is cleared when the engine has processed the data and
474 * when the digest is computed *but* it doesn't mean the digest is
475 * available in the digest registers. Hence the delay to be sure we can
480 /* Get the hash from the device */
481 if (op
->mode
== SS_OP_SHA1
) {
482 for (i
= 0; i
< 5; i
++) {
483 v
= readl(ss
->base
+ SS_MD0
+ i
* 4);
484 if (ss
->variant
->sha1_in_be
)
485 put_unaligned_le32(v
, areq
->result
+ i
* 4);
487 put_unaligned_be32(v
, areq
->result
+ i
* 4);
490 for (i
= 0; i
< 4; i
++) {
491 v
= readl(ss
->base
+ SS_MD0
+ i
* 4);
492 put_unaligned_le32(v
, areq
->result
+ i
* 4);
497 writel(0, ss
->base
+ SS_CTL
);
498 spin_unlock_bh(&ss
->slock
);
502 int sun4i_hash_final(struct ahash_request
*areq
)
504 struct sun4i_req_ctx
*op
= ahash_request_ctx(areq
);
506 op
->flags
= SS_HASH_FINAL
;
507 return sun4i_hash(areq
);
510 int sun4i_hash_update(struct ahash_request
*areq
)
512 struct sun4i_req_ctx
*op
= ahash_request_ctx(areq
);
514 op
->flags
= SS_HASH_UPDATE
;
515 return sun4i_hash(areq
);
518 /* sun4i_hash_finup: finalize hashing operation after an update */
519 int sun4i_hash_finup(struct ahash_request
*areq
)
521 struct sun4i_req_ctx
*op
= ahash_request_ctx(areq
);
523 op
->flags
= SS_HASH_UPDATE
| SS_HASH_FINAL
;
524 return sun4i_hash(areq
);
527 /* combo of init/update/final functions */
528 int sun4i_hash_digest(struct ahash_request
*areq
)
531 struct sun4i_req_ctx
*op
= ahash_request_ctx(areq
);
533 err
= sun4i_hash_init(areq
);
537 op
->flags
= SS_HASH_UPDATE
| SS_HASH_FINAL
;
538 return sun4i_hash(areq
);