1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Cryptographic API for the NX-842 hardware compression.
5 * Copyright (C) IBM Corporation, 2011-2015
7 * Designer of the Power data compression engine:
8 * Bulent Abali <abali@us.ibm.com>
10 * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
11 * Seth Jennings <sjenning@linux.vnet.ibm.com>
13 * Rewrite: Dan Streetman <ddstreet@ieee.org>
15 * This is an interface to the NX-842 compression hardware in PowerPC
16 * processors. Most of the complexity of this drvier is due to the fact that
17 * the NX-842 compression hardware requires the input and output data buffers
18 * to be specifically aligned, to be a specific multiple in length, and within
19 * specific minimum and maximum lengths. Those restrictions, provided by the
20 * nx-842 driver via nx842_constraints, mean this driver must use bounce
21 * buffers and headers to correct misaligned in or out buffers, and to split
22 * input buffers that are too large.
24 * This driver will fall back to software decompression if the hardware
25 * decompression fails, so this driver's decompression should never fail as
26 * long as the provided compressed buffer is valid. Any compressed buffer
27 * created by this driver will have a header (except ones where the input
28 * perfectly matches the constraints); so users of this driver cannot simply
29 * pass a compressed buffer created by this driver over to the 842 software
30 * decompression library. Instead, users must use this driver to decompress;
31 * if the hardware fails or is unavailable, the compressed buffer will be
32 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
33 * software decompression library.
35 * This does not fall back to software compression, however, since the caller
36 * of this function is specifically requesting hardware compression; if the
37 * hardware compression fails, the caller can fall back to software
38 * compression, and the raw 842 compressed buffer that the software compressor
39 * creates can be passed to this driver for hardware decompression; any
40 * buffer without our specific header magic is assumed to be a raw 842 buffer
41 * and passed directly to the hardware. Note that the software compression
42 * library will produce a compressed buffer that is incompatible with the
43 * hardware decompressor if the original input buffer length is not a multiple
44 * of 8; if such a compressed buffer is passed to this driver for
45 * decompression, the hardware will reject it and this driver will then pass
46 * it over to the software library for decompression.
49 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51 #include <linux/vmalloc.h>
52 #include <linux/sw842.h>
53 #include <linux/spinlock.h>
57 /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
58 * template (see lib/842/842.h), so this magic number will never appear at
59 * the start of a raw 842 compressed buffer. That is important, as any buffer
60 * passed to us without this magic is assumed to be a raw 842 compressed
61 * buffer, and passed directly to the hardware to decompress.
63 #define NX842_CRYPTO_MAGIC (0xf842)
64 #define NX842_CRYPTO_HEADER_SIZE(g) \
65 (sizeof(struct nx842_crypto_header) + \
66 sizeof(struct nx842_crypto_header_group) * (g))
67 #define NX842_CRYPTO_HEADER_MAX_SIZE \
68 NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
70 /* bounce buffer size */
71 #define BOUNCE_BUFFER_ORDER (2)
72 #define BOUNCE_BUFFER_SIZE \
73 ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
75 /* try longer on comp because we can fallback to sw decomp if hw is busy */
76 #define COMP_BUSY_TIMEOUT (250) /* ms */
77 #define DECOMP_BUSY_TIMEOUT (50) /* ms */
79 struct nx842_crypto_param
{
87 static int update_param(struct nx842_crypto_param
*p
,
88 unsigned int slen
, unsigned int dlen
)
90 if (p
->iremain
< slen
)
92 if (p
->oremain
< dlen
)
104 int nx842_crypto_init(struct crypto_tfm
*tfm
, struct nx842_driver
*driver
)
106 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
108 spin_lock_init(&ctx
->lock
);
109 ctx
->driver
= driver
;
110 ctx
->wmem
= kmalloc(driver
->workmem_size
, GFP_KERNEL
);
111 ctx
->sbounce
= (u8
*)__get_free_pages(GFP_KERNEL
, BOUNCE_BUFFER_ORDER
);
112 ctx
->dbounce
= (u8
*)__get_free_pages(GFP_KERNEL
, BOUNCE_BUFFER_ORDER
);
113 if (!ctx
->wmem
|| !ctx
->sbounce
|| !ctx
->dbounce
) {
115 free_page((unsigned long)ctx
->sbounce
);
116 free_page((unsigned long)ctx
->dbounce
);
122 EXPORT_SYMBOL_GPL(nx842_crypto_init
);
124 void nx842_crypto_exit(struct crypto_tfm
*tfm
)
126 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
129 free_page((unsigned long)ctx
->sbounce
);
130 free_page((unsigned long)ctx
->dbounce
);
132 EXPORT_SYMBOL_GPL(nx842_crypto_exit
);
134 static void check_constraints(struct nx842_constraints
*c
)
136 /* limit maximum, to always have enough bounce buffer to decompress */
137 if (c
->maximum
> BOUNCE_BUFFER_SIZE
)
138 c
->maximum
= BOUNCE_BUFFER_SIZE
;
141 static int nx842_crypto_add_header(struct nx842_crypto_header
*hdr
, u8
*buf
)
143 int s
= NX842_CRYPTO_HEADER_SIZE(hdr
->groups
);
145 /* compress should have added space for header */
146 if (s
> be16_to_cpu(hdr
->group
[0].padding
)) {
147 pr_err("Internal error: no space for header\n");
153 print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET
, 16, 1, buf
, s
, 0);
158 static int compress(struct nx842_crypto_ctx
*ctx
,
159 struct nx842_crypto_param
*p
,
160 struct nx842_crypto_header_group
*g
,
161 struct nx842_constraints
*c
,
163 unsigned int hdrsize
)
165 unsigned int slen
= p
->iremain
, dlen
= p
->oremain
, tmplen
;
166 unsigned int adj_slen
= slen
;
167 u8
*src
= p
->in
, *dst
= p
->out
;
174 if (p
->oremain
== 0 || hdrsize
+ c
->minimum
> dlen
)
177 if (slen
% c
->multiple
)
178 adj_slen
= round_up(slen
, c
->multiple
);
179 if (slen
< c
->minimum
)
180 adj_slen
= c
->minimum
;
181 if (slen
> c
->maximum
)
182 adj_slen
= slen
= c
->maximum
;
183 if (adj_slen
> slen
|| (u64
)src
% c
->alignment
) {
184 adj_slen
= min(adj_slen
, BOUNCE_BUFFER_SIZE
);
185 slen
= min(slen
, BOUNCE_BUFFER_SIZE
);
187 memset(ctx
->sbounce
+ slen
, 0, adj_slen
- slen
);
188 memcpy(ctx
->sbounce
, src
, slen
);
191 pr_debug("using comp sbounce buffer, len %x\n", slen
);
197 if ((u64
)dst
% c
->alignment
) {
198 dskip
= (int)(PTR_ALIGN(dst
, c
->alignment
) - dst
);
202 if (dlen
% c
->multiple
)
203 dlen
= round_down(dlen
, c
->multiple
);
204 if (dlen
< c
->minimum
) {
207 dlen
= min(p
->oremain
, BOUNCE_BUFFER_SIZE
);
208 dlen
= round_down(dlen
, c
->multiple
);
210 pr_debug("using comp dbounce buffer, len %x\n", dlen
);
212 if (dlen
> c
->maximum
)
216 timeout
= ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT
);
218 dlen
= tmplen
; /* reset dlen, if we're retrying */
219 ret
= ctx
->driver
->compress(src
, slen
, dst
, &dlen
, ctx
->wmem
);
220 /* possibly we should reduce the slen here, instead of
221 * retrying with the dbounce buffer?
223 if (ret
== -ENOSPC
&& dst
!= ctx
->dbounce
)
225 } while (ret
== -EBUSY
&& ktime_before(ktime_get(), timeout
));
231 if (dst
== ctx
->dbounce
)
232 memcpy(p
->out
+ dskip
, dst
, dlen
);
234 g
->padding
= cpu_to_be16(dskip
);
235 g
->compressed_length
= cpu_to_be32(dlen
);
236 g
->uncompressed_length
= cpu_to_be32(slen
);
238 if (p
->iremain
< slen
) {
239 *ignore
= slen
- p
->iremain
;
243 pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
244 slen
, *ignore
, dlen
, dskip
);
246 return update_param(p
, slen
, dskip
+ dlen
);
249 int nx842_crypto_compress(struct crypto_tfm
*tfm
,
250 const u8
*src
, unsigned int slen
,
251 u8
*dst
, unsigned int *dlen
)
253 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
254 struct nx842_crypto_header
*hdr
= &ctx
->header
;
255 struct nx842_crypto_param p
;
256 struct nx842_constraints c
= *ctx
->driver
->constraints
;
257 unsigned int groups
, hdrsize
, h
;
262 check_constraints(&c
);
272 groups
= min_t(unsigned int, NX842_CRYPTO_GROUP_MAX
,
273 DIV_ROUND_UP(p
.iremain
, c
.maximum
));
274 hdrsize
= NX842_CRYPTO_HEADER_SIZE(groups
);
276 spin_lock_bh(&ctx
->lock
);
278 /* skip adding header if the buffers meet all constraints */
279 add_header
= (p
.iremain
% c
.multiple
||
280 p
.iremain
< c
.minimum
||
281 p
.iremain
> c
.maximum
||
282 (u64
)p
.in
% c
.alignment
||
283 p
.oremain
% c
.multiple
||
284 p
.oremain
< c
.minimum
||
285 p
.oremain
> c
.maximum
||
286 (u64
)p
.out
% c
.alignment
);
288 hdr
->magic
= cpu_to_be16(NX842_CRYPTO_MAGIC
);
292 while (p
.iremain
> 0) {
295 if (hdr
->groups
> NX842_CRYPTO_GROUP_MAX
)
298 /* header goes before first group */
299 h
= !n
&& add_header
? hdrsize
: 0;
302 pr_warn("internal error, ignore is set %x\n", ignore
);
304 ret
= compress(ctx
, &p
, &hdr
->group
[n
], &c
, &ignore
, h
);
309 if (!add_header
&& hdr
->groups
> 1) {
310 pr_err("Internal error: No header but multiple groups\n");
315 /* ignore indicates the input stream needed to be padded */
316 hdr
->ignore
= cpu_to_be16(ignore
);
318 pr_debug("marked %d bytes as ignore\n", ignore
);
321 ret
= nx842_crypto_add_header(hdr
, dst
);
327 pr_debug("compress total slen %x dlen %x\n", slen
, *dlen
);
330 spin_unlock_bh(&ctx
->lock
);
333 EXPORT_SYMBOL_GPL(nx842_crypto_compress
);
335 static int decompress(struct nx842_crypto_ctx
*ctx
,
336 struct nx842_crypto_param
*p
,
337 struct nx842_crypto_header_group
*g
,
338 struct nx842_constraints
*c
,
341 unsigned int slen
= be32_to_cpu(g
->compressed_length
);
342 unsigned int required_len
= be32_to_cpu(g
->uncompressed_length
);
343 unsigned int dlen
= p
->oremain
, tmplen
;
344 unsigned int adj_slen
= slen
;
345 u8
*src
= p
->in
, *dst
= p
->out
;
346 u16 padding
= be16_to_cpu(g
->padding
);
347 int ret
, spadding
= 0;
350 if (!slen
|| !required_len
)
353 if (p
->iremain
<= 0 || padding
+ slen
> p
->iremain
)
356 if (p
->oremain
<= 0 || required_len
- ignore
> p
->oremain
)
361 if (slen
% c
->multiple
)
362 adj_slen
= round_up(slen
, c
->multiple
);
363 if (slen
< c
->minimum
)
364 adj_slen
= c
->minimum
;
365 if (slen
> c
->maximum
)
367 if (slen
< adj_slen
|| (u64
)src
% c
->alignment
) {
368 /* we can append padding bytes because the 842 format defines
369 * an "end" template (see lib/842/842_decompress.c) and will
370 * ignore any bytes following it.
373 memset(ctx
->sbounce
+ slen
, 0, adj_slen
- slen
);
374 memcpy(ctx
->sbounce
, src
, slen
);
376 spadding
= adj_slen
- slen
;
378 pr_debug("using decomp sbounce buffer, len %x\n", slen
);
381 if (dlen
% c
->multiple
)
382 dlen
= round_down(dlen
, c
->multiple
);
383 if (dlen
< required_len
|| (u64
)dst
% c
->alignment
) {
385 dlen
= min(required_len
, BOUNCE_BUFFER_SIZE
);
386 pr_debug("using decomp dbounce buffer, len %x\n", dlen
);
388 if (dlen
< c
->minimum
)
390 if (dlen
> c
->maximum
)
394 timeout
= ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT
);
396 dlen
= tmplen
; /* reset dlen, if we're retrying */
397 ret
= ctx
->driver
->decompress(src
, slen
, dst
, &dlen
, ctx
->wmem
);
398 } while (ret
== -EBUSY
&& ktime_before(ktime_get(), timeout
));
401 /* reset everything, sw doesn't have constraints */
402 src
= p
->in
+ padding
;
403 slen
= be32_to_cpu(g
->compressed_length
);
407 if (dlen
< required_len
) { /* have ignore bytes */
409 dlen
= BOUNCE_BUFFER_SIZE
;
411 pr_info_ratelimited("using software 842 decompression\n");
412 ret
= sw842_decompress(src
, slen
, dst
, &dlen
);
421 pr_debug("ignoring last %x bytes\n", ignore
);
423 if (dst
== ctx
->dbounce
)
424 memcpy(p
->out
, dst
, dlen
);
426 pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
427 slen
, padding
, dlen
, ignore
);
429 return update_param(p
, slen
+ padding
, dlen
);
432 int nx842_crypto_decompress(struct crypto_tfm
*tfm
,
433 const u8
*src
, unsigned int slen
,
434 u8
*dst
, unsigned int *dlen
)
436 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
437 struct nx842_crypto_header
*hdr
;
438 struct nx842_crypto_param p
;
439 struct nx842_constraints c
= *ctx
->driver
->constraints
;
443 check_constraints(&c
);
453 hdr
= (struct nx842_crypto_header
*)src
;
455 spin_lock_bh(&ctx
->lock
);
457 /* If it doesn't start with our header magic number, assume it's a raw
458 * 842 compressed buffer and pass it directly to the hardware driver
460 if (be16_to_cpu(hdr
->magic
) != NX842_CRYPTO_MAGIC
) {
461 struct nx842_crypto_header_group g
= {
463 .compressed_length
= cpu_to_be32(p
.iremain
),
464 .uncompressed_length
= cpu_to_be32(p
.oremain
),
467 ret
= decompress(ctx
, &p
, &g
, &c
, 0);
475 pr_err("header has no groups\n");
479 if (hdr
->groups
> NX842_CRYPTO_GROUP_MAX
) {
480 pr_err("header has too many groups %x, max %x\n",
481 hdr
->groups
, NX842_CRYPTO_GROUP_MAX
);
486 hdr_len
= NX842_CRYPTO_HEADER_SIZE(hdr
->groups
);
487 if (hdr_len
> slen
) {
492 memcpy(&ctx
->header
, src
, hdr_len
);
495 for (n
= 0; n
< hdr
->groups
; n
++) {
496 /* ignore applies to last group */
497 if (n
+ 1 == hdr
->groups
)
498 ignore
= be16_to_cpu(hdr
->ignore
);
500 ret
= decompress(ctx
, &p
, &hdr
->group
[n
], &c
, ignore
);
508 pr_debug("decompress total slen %x dlen %x\n", slen
, *dlen
);
513 spin_unlock_bh(&ctx
->lock
);
517 EXPORT_SYMBOL_GPL(nx842_crypto_decompress
);
519 MODULE_LICENSE("GPL");
520 MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
521 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");