1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Cryptographic API for the NX-842 hardware compression.
5 * Copyright (C) IBM Corporation, 2011-2015
7 * Designer of the Power data compression engine:
8 * Bulent Abali <abali@us.ibm.com>
10 * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
11 * Seth Jennings <sjenning@linux.vnet.ibm.com>
13 * Rewrite: Dan Streetman <ddstreet@ieee.org>
15 * This is an interface to the NX-842 compression hardware in PowerPC
16 * processors. Most of the complexity of this drvier is due to the fact that
17 * the NX-842 compression hardware requires the input and output data buffers
18 * to be specifically aligned, to be a specific multiple in length, and within
19 * specific minimum and maximum lengths. Those restrictions, provided by the
20 * nx-842 driver via nx842_constraints, mean this driver must use bounce
21 * buffers and headers to correct misaligned in or out buffers, and to split
22 * input buffers that are too large.
24 * This driver will fall back to software decompression if the hardware
25 * decompression fails, so this driver's decompression should never fail as
26 * long as the provided compressed buffer is valid. Any compressed buffer
27 * created by this driver will have a header (except ones where the input
28 * perfectly matches the constraints); so users of this driver cannot simply
29 * pass a compressed buffer created by this driver over to the 842 software
30 * decompression library. Instead, users must use this driver to decompress;
31 * if the hardware fails or is unavailable, the compressed buffer will be
32 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
33 * software decompression library.
35 * This does not fall back to software compression, however, since the caller
36 * of this function is specifically requesting hardware compression; if the
37 * hardware compression fails, the caller can fall back to software
38 * compression, and the raw 842 compressed buffer that the software compressor
39 * creates can be passed to this driver for hardware decompression; any
40 * buffer without our specific header magic is assumed to be a raw 842 buffer
41 * and passed directly to the hardware. Note that the software compression
42 * library will produce a compressed buffer that is incompatible with the
43 * hardware decompressor if the original input buffer length is not a multiple
44 * of 8; if such a compressed buffer is passed to this driver for
45 * decompression, the hardware will reject it and this driver will then pass
46 * it over to the software library for decompression.
49 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51 #include <linux/vmalloc.h>
52 #include <linux/sw842.h>
53 #include <linux/spinlock.h>
57 /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
58 * template (see lib/842/842.h), so this magic number will never appear at
59 * the start of a raw 842 compressed buffer. That is important, as any buffer
60 * passed to us without this magic is assumed to be a raw 842 compressed
61 * buffer, and passed directly to the hardware to decompress.
63 #define NX842_CRYPTO_MAGIC (0xf842)
64 #define NX842_CRYPTO_HEADER_SIZE(g) \
65 (sizeof(struct nx842_crypto_header) + \
66 sizeof(struct nx842_crypto_header_group) * (g))
67 #define NX842_CRYPTO_HEADER_MAX_SIZE \
68 NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
70 /* bounce buffer size */
71 #define BOUNCE_BUFFER_ORDER (2)
72 #define BOUNCE_BUFFER_SIZE \
73 ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
75 /* try longer on comp because we can fallback to sw decomp if hw is busy */
76 #define COMP_BUSY_TIMEOUT (250) /* ms */
77 #define DECOMP_BUSY_TIMEOUT (50) /* ms */
79 struct nx842_crypto_param
{
87 static int update_param(struct nx842_crypto_param
*p
,
88 unsigned int slen
, unsigned int dlen
)
90 if (p
->iremain
< slen
)
92 if (p
->oremain
< dlen
)
104 int nx842_crypto_init(struct crypto_tfm
*tfm
, struct nx842_driver
*driver
)
106 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
108 spin_lock_init(&ctx
->lock
);
109 ctx
->driver
= driver
;
110 ctx
->wmem
= kmalloc(driver
->workmem_size
, GFP_KERNEL
);
111 ctx
->sbounce
= (u8
*)__get_free_pages(GFP_KERNEL
, BOUNCE_BUFFER_ORDER
);
112 ctx
->dbounce
= (u8
*)__get_free_pages(GFP_KERNEL
, BOUNCE_BUFFER_ORDER
);
113 if (!ctx
->wmem
|| !ctx
->sbounce
|| !ctx
->dbounce
) {
115 free_page((unsigned long)ctx
->sbounce
);
116 free_page((unsigned long)ctx
->dbounce
);
122 EXPORT_SYMBOL_GPL(nx842_crypto_init
);
124 void nx842_crypto_exit(struct crypto_tfm
*tfm
)
126 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
129 free_page((unsigned long)ctx
->sbounce
);
130 free_page((unsigned long)ctx
->dbounce
);
132 EXPORT_SYMBOL_GPL(nx842_crypto_exit
);
134 static void check_constraints(struct nx842_constraints
*c
)
136 /* limit maximum, to always have enough bounce buffer to decompress */
137 if (c
->maximum
> BOUNCE_BUFFER_SIZE
)
138 c
->maximum
= BOUNCE_BUFFER_SIZE
;
141 static int nx842_crypto_add_header(struct nx842_crypto_header
*hdr
, u8
*buf
)
143 int s
= NX842_CRYPTO_HEADER_SIZE(hdr
->groups
);
145 /* compress should have added space for header */
146 if (s
> be16_to_cpu(hdr
->group
[0].padding
)) {
147 pr_err("Internal error: no space for header\n");
153 print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET
, 16, 1, buf
, s
, 0);
158 static int compress(struct nx842_crypto_ctx
*ctx
,
159 struct nx842_crypto_param
*p
,
160 struct nx842_crypto_header_group
*g
,
161 struct nx842_constraints
*c
,
163 unsigned int hdrsize
)
165 unsigned int slen
= p
->iremain
, dlen
= p
->oremain
, tmplen
;
166 unsigned int adj_slen
= slen
;
167 u8
*src
= p
->in
, *dst
= p
->out
;
174 if (p
->oremain
== 0 || hdrsize
+ c
->minimum
> dlen
)
177 if (slen
% c
->multiple
)
178 adj_slen
= round_up(slen
, c
->multiple
);
179 if (slen
< c
->minimum
)
180 adj_slen
= c
->minimum
;
181 if (slen
> c
->maximum
)
182 adj_slen
= slen
= c
->maximum
;
183 if (adj_slen
> slen
|| (u64
)src
% c
->alignment
) {
184 adj_slen
= min(adj_slen
, BOUNCE_BUFFER_SIZE
);
185 slen
= min(slen
, BOUNCE_BUFFER_SIZE
);
187 memset(ctx
->sbounce
+ slen
, 0, adj_slen
- slen
);
188 memcpy(ctx
->sbounce
, src
, slen
);
191 pr_debug("using comp sbounce buffer, len %x\n", slen
);
197 if ((u64
)dst
% c
->alignment
) {
198 dskip
= (int)(PTR_ALIGN(dst
, c
->alignment
) - dst
);
202 if (dlen
% c
->multiple
)
203 dlen
= round_down(dlen
, c
->multiple
);
204 if (dlen
< c
->minimum
) {
207 dlen
= min(p
->oremain
, BOUNCE_BUFFER_SIZE
);
208 dlen
= round_down(dlen
, c
->multiple
);
210 pr_debug("using comp dbounce buffer, len %x\n", dlen
);
212 if (dlen
> c
->maximum
)
216 timeout
= ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT
);
218 dlen
= tmplen
; /* reset dlen, if we're retrying */
219 ret
= ctx
->driver
->compress(src
, slen
, dst
, &dlen
, ctx
->wmem
);
220 /* possibly we should reduce the slen here, instead of
221 * retrying with the dbounce buffer?
223 if (ret
== -ENOSPC
&& dst
!= ctx
->dbounce
)
225 } while (ret
== -EBUSY
&& ktime_before(ktime_get(), timeout
));
231 if (dst
== ctx
->dbounce
)
232 memcpy(p
->out
+ dskip
, dst
, dlen
);
234 g
->padding
= cpu_to_be16(dskip
);
235 g
->compressed_length
= cpu_to_be32(dlen
);
236 g
->uncompressed_length
= cpu_to_be32(slen
);
238 if (p
->iremain
< slen
) {
239 *ignore
= slen
- p
->iremain
;
243 pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
244 slen
, *ignore
, dlen
, dskip
);
246 return update_param(p
, slen
, dskip
+ dlen
);
249 int nx842_crypto_compress(struct crypto_tfm
*tfm
,
250 const u8
*src
, unsigned int slen
,
251 u8
*dst
, unsigned int *dlen
)
253 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
254 struct nx842_crypto_header
*hdr
=
255 container_of(&ctx
->header
,
256 struct nx842_crypto_header
, hdr
);
257 struct nx842_crypto_param p
;
258 struct nx842_constraints c
= *ctx
->driver
->constraints
;
259 unsigned int groups
, hdrsize
, h
;
264 check_constraints(&c
);
274 groups
= min_t(unsigned int, NX842_CRYPTO_GROUP_MAX
,
275 DIV_ROUND_UP(p
.iremain
, c
.maximum
));
276 hdrsize
= NX842_CRYPTO_HEADER_SIZE(groups
);
278 spin_lock_bh(&ctx
->lock
);
280 /* skip adding header if the buffers meet all constraints */
281 add_header
= (p
.iremain
% c
.multiple
||
282 p
.iremain
< c
.minimum
||
283 p
.iremain
> c
.maximum
||
284 (u64
)p
.in
% c
.alignment
||
285 p
.oremain
% c
.multiple
||
286 p
.oremain
< c
.minimum
||
287 p
.oremain
> c
.maximum
||
288 (u64
)p
.out
% c
.alignment
);
290 hdr
->magic
= cpu_to_be16(NX842_CRYPTO_MAGIC
);
294 while (p
.iremain
> 0) {
297 if (hdr
->groups
> NX842_CRYPTO_GROUP_MAX
)
300 /* header goes before first group */
301 h
= !n
&& add_header
? hdrsize
: 0;
304 pr_warn("internal error, ignore is set %x\n", ignore
);
306 ret
= compress(ctx
, &p
, &hdr
->group
[n
], &c
, &ignore
, h
);
311 if (!add_header
&& hdr
->groups
> 1) {
312 pr_err("Internal error: No header but multiple groups\n");
317 /* ignore indicates the input stream needed to be padded */
318 hdr
->ignore
= cpu_to_be16(ignore
);
320 pr_debug("marked %d bytes as ignore\n", ignore
);
323 ret
= nx842_crypto_add_header(hdr
, dst
);
329 pr_debug("compress total slen %x dlen %x\n", slen
, *dlen
);
332 spin_unlock_bh(&ctx
->lock
);
335 EXPORT_SYMBOL_GPL(nx842_crypto_compress
);
337 static int decompress(struct nx842_crypto_ctx
*ctx
,
338 struct nx842_crypto_param
*p
,
339 struct nx842_crypto_header_group
*g
,
340 struct nx842_constraints
*c
,
343 unsigned int slen
= be32_to_cpu(g
->compressed_length
);
344 unsigned int required_len
= be32_to_cpu(g
->uncompressed_length
);
345 unsigned int dlen
= p
->oremain
, tmplen
;
346 unsigned int adj_slen
= slen
;
347 u8
*src
= p
->in
, *dst
= p
->out
;
348 u16 padding
= be16_to_cpu(g
->padding
);
349 int ret
, spadding
= 0;
352 if (!slen
|| !required_len
)
355 if (p
->iremain
<= 0 || padding
+ slen
> p
->iremain
)
358 if (p
->oremain
<= 0 || required_len
- ignore
> p
->oremain
)
363 if (slen
% c
->multiple
)
364 adj_slen
= round_up(slen
, c
->multiple
);
365 if (slen
< c
->minimum
)
366 adj_slen
= c
->minimum
;
367 if (slen
> c
->maximum
)
369 if (slen
< adj_slen
|| (u64
)src
% c
->alignment
) {
370 /* we can append padding bytes because the 842 format defines
371 * an "end" template (see lib/842/842_decompress.c) and will
372 * ignore any bytes following it.
375 memset(ctx
->sbounce
+ slen
, 0, adj_slen
- slen
);
376 memcpy(ctx
->sbounce
, src
, slen
);
378 spadding
= adj_slen
- slen
;
380 pr_debug("using decomp sbounce buffer, len %x\n", slen
);
383 if (dlen
% c
->multiple
)
384 dlen
= round_down(dlen
, c
->multiple
);
385 if (dlen
< required_len
|| (u64
)dst
% c
->alignment
) {
387 dlen
= min(required_len
, BOUNCE_BUFFER_SIZE
);
388 pr_debug("using decomp dbounce buffer, len %x\n", dlen
);
390 if (dlen
< c
->minimum
)
392 if (dlen
> c
->maximum
)
396 timeout
= ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT
);
398 dlen
= tmplen
; /* reset dlen, if we're retrying */
399 ret
= ctx
->driver
->decompress(src
, slen
, dst
, &dlen
, ctx
->wmem
);
400 } while (ret
== -EBUSY
&& ktime_before(ktime_get(), timeout
));
403 /* reset everything, sw doesn't have constraints */
404 src
= p
->in
+ padding
;
405 slen
= be32_to_cpu(g
->compressed_length
);
409 if (dlen
< required_len
) { /* have ignore bytes */
411 dlen
= BOUNCE_BUFFER_SIZE
;
413 pr_info_ratelimited("using software 842 decompression\n");
414 ret
= sw842_decompress(src
, slen
, dst
, &dlen
);
423 pr_debug("ignoring last %x bytes\n", ignore
);
425 if (dst
== ctx
->dbounce
)
426 memcpy(p
->out
, dst
, dlen
);
428 pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
429 slen
, padding
, dlen
, ignore
);
431 return update_param(p
, slen
+ padding
, dlen
);
434 int nx842_crypto_decompress(struct crypto_tfm
*tfm
,
435 const u8
*src
, unsigned int slen
,
436 u8
*dst
, unsigned int *dlen
)
438 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
439 struct nx842_crypto_header
*hdr
;
440 struct nx842_crypto_param p
;
441 struct nx842_constraints c
= *ctx
->driver
->constraints
;
445 check_constraints(&c
);
455 hdr
= (struct nx842_crypto_header
*)src
;
457 spin_lock_bh(&ctx
->lock
);
459 /* If it doesn't start with our header magic number, assume it's a raw
460 * 842 compressed buffer and pass it directly to the hardware driver
462 if (be16_to_cpu(hdr
->magic
) != NX842_CRYPTO_MAGIC
) {
463 struct nx842_crypto_header_group g
= {
465 .compressed_length
= cpu_to_be32(p
.iremain
),
466 .uncompressed_length
= cpu_to_be32(p
.oremain
),
469 ret
= decompress(ctx
, &p
, &g
, &c
, 0);
477 pr_err("header has no groups\n");
481 if (hdr
->groups
> NX842_CRYPTO_GROUP_MAX
) {
482 pr_err("header has too many groups %x, max %x\n",
483 hdr
->groups
, NX842_CRYPTO_GROUP_MAX
);
488 hdr_len
= NX842_CRYPTO_HEADER_SIZE(hdr
->groups
);
489 if (hdr_len
> slen
) {
494 memcpy(&ctx
->header
, src
, hdr_len
);
495 hdr
= container_of(&ctx
->header
, struct nx842_crypto_header
, hdr
);
497 for (n
= 0; n
< hdr
->groups
; n
++) {
498 /* ignore applies to last group */
499 if (n
+ 1 == hdr
->groups
)
500 ignore
= be16_to_cpu(hdr
->ignore
);
502 ret
= decompress(ctx
, &p
, &hdr
->group
[n
], &c
, ignore
);
510 pr_debug("decompress total slen %x dlen %x\n", slen
, *dlen
);
515 spin_unlock_bh(&ctx
->lock
);
519 EXPORT_SYMBOL_GPL(nx842_crypto_decompress
);
521 MODULE_LICENSE("GPL");
522 MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
523 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");