2 * Cryptographic API for the NX-842 hardware compression.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * Copyright (C) IBM Corporation, 2011-2015
16 * Designer of the Power data compression engine:
17 * Bulent Abali <abali@us.ibm.com>
19 * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
20 * Seth Jennings <sjenning@linux.vnet.ibm.com>
22 * Rewrite: Dan Streetman <ddstreet@ieee.org>
24 * This is an interface to the NX-842 compression hardware in PowerPC
25 * processors. Most of the complexity of this drvier is due to the fact that
26 * the NX-842 compression hardware requires the input and output data buffers
27 * to be specifically aligned, to be a specific multiple in length, and within
28 * specific minimum and maximum lengths. Those restrictions, provided by the
29 * nx-842 driver via nx842_constraints, mean this driver must use bounce
30 * buffers and headers to correct misaligned in or out buffers, and to split
31 * input buffers that are too large.
33 * This driver will fall back to software decompression if the hardware
34 * decompression fails, so this driver's decompression should never fail as
35 * long as the provided compressed buffer is valid. Any compressed buffer
36 * created by this driver will have a header (except ones where the input
37 * perfectly matches the constraints); so users of this driver cannot simply
38 * pass a compressed buffer created by this driver over to the 842 software
39 * decompression library. Instead, users must use this driver to decompress;
40 * if the hardware fails or is unavailable, the compressed buffer will be
41 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
42 * software decompression library.
44 * This does not fall back to software compression, however, since the caller
45 * of this function is specifically requesting hardware compression; if the
46 * hardware compression fails, the caller can fall back to software
47 * compression, and the raw 842 compressed buffer that the software compressor
48 * creates can be passed to this driver for hardware decompression; any
49 * buffer without our specific header magic is assumed to be a raw 842 buffer
50 * and passed directly to the hardware. Note that the software compression
51 * library will produce a compressed buffer that is incompatible with the
52 * hardware decompressor if the original input buffer length is not a multiple
53 * of 8; if such a compressed buffer is passed to this driver for
54 * decompression, the hardware will reject it and this driver will then pass
55 * it over to the software library for decompression.
58 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
60 #include <linux/vmalloc.h>
61 #include <linux/sw842.h>
62 #include <linux/spinlock.h>
66 /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
67 * template (see lib/842/842.h), so this magic number will never appear at
68 * the start of a raw 842 compressed buffer. That is important, as any buffer
69 * passed to us without this magic is assumed to be a raw 842 compressed
70 * buffer, and passed directly to the hardware to decompress.
72 #define NX842_CRYPTO_MAGIC (0xf842)
73 #define NX842_CRYPTO_HEADER_SIZE(g) \
74 (sizeof(struct nx842_crypto_header) + \
75 sizeof(struct nx842_crypto_header_group) * (g))
76 #define NX842_CRYPTO_HEADER_MAX_SIZE \
77 NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
79 /* bounce buffer size */
80 #define BOUNCE_BUFFER_ORDER (2)
81 #define BOUNCE_BUFFER_SIZE \
82 ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
84 /* try longer on comp because we can fallback to sw decomp if hw is busy */
85 #define COMP_BUSY_TIMEOUT (250) /* ms */
86 #define DECOMP_BUSY_TIMEOUT (50) /* ms */
88 struct nx842_crypto_param
{
96 static int update_param(struct nx842_crypto_param
*p
,
97 unsigned int slen
, unsigned int dlen
)
99 if (p
->iremain
< slen
)
101 if (p
->oremain
< dlen
)
113 int nx842_crypto_init(struct crypto_tfm
*tfm
, struct nx842_driver
*driver
)
115 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
117 spin_lock_init(&ctx
->lock
);
118 ctx
->driver
= driver
;
119 ctx
->wmem
= kmalloc(driver
->workmem_size
, GFP_KERNEL
);
120 ctx
->sbounce
= (u8
*)__get_free_pages(GFP_KERNEL
, BOUNCE_BUFFER_ORDER
);
121 ctx
->dbounce
= (u8
*)__get_free_pages(GFP_KERNEL
, BOUNCE_BUFFER_ORDER
);
122 if (!ctx
->wmem
|| !ctx
->sbounce
|| !ctx
->dbounce
) {
124 free_page((unsigned long)ctx
->sbounce
);
125 free_page((unsigned long)ctx
->dbounce
);
131 EXPORT_SYMBOL_GPL(nx842_crypto_init
);
133 void nx842_crypto_exit(struct crypto_tfm
*tfm
)
135 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
138 free_page((unsigned long)ctx
->sbounce
);
139 free_page((unsigned long)ctx
->dbounce
);
141 EXPORT_SYMBOL_GPL(nx842_crypto_exit
);
143 static void check_constraints(struct nx842_constraints
*c
)
145 /* limit maximum, to always have enough bounce buffer to decompress */
146 if (c
->maximum
> BOUNCE_BUFFER_SIZE
)
147 c
->maximum
= BOUNCE_BUFFER_SIZE
;
150 static int nx842_crypto_add_header(struct nx842_crypto_header
*hdr
, u8
*buf
)
152 int s
= NX842_CRYPTO_HEADER_SIZE(hdr
->groups
);
154 /* compress should have added space for header */
155 if (s
> be16_to_cpu(hdr
->group
[0].padding
)) {
156 pr_err("Internal error: no space for header\n");
162 print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET
, 16, 1, buf
, s
, 0);
167 static int compress(struct nx842_crypto_ctx
*ctx
,
168 struct nx842_crypto_param
*p
,
169 struct nx842_crypto_header_group
*g
,
170 struct nx842_constraints
*c
,
172 unsigned int hdrsize
)
174 unsigned int slen
= p
->iremain
, dlen
= p
->oremain
, tmplen
;
175 unsigned int adj_slen
= slen
;
176 u8
*src
= p
->in
, *dst
= p
->out
;
183 if (p
->oremain
== 0 || hdrsize
+ c
->minimum
> dlen
)
186 if (slen
% c
->multiple
)
187 adj_slen
= round_up(slen
, c
->multiple
);
188 if (slen
< c
->minimum
)
189 adj_slen
= c
->minimum
;
190 if (slen
> c
->maximum
)
191 adj_slen
= slen
= c
->maximum
;
192 if (adj_slen
> slen
|| (u64
)src
% c
->alignment
) {
193 adj_slen
= min(adj_slen
, BOUNCE_BUFFER_SIZE
);
194 slen
= min(slen
, BOUNCE_BUFFER_SIZE
);
196 memset(ctx
->sbounce
+ slen
, 0, adj_slen
- slen
);
197 memcpy(ctx
->sbounce
, src
, slen
);
200 pr_debug("using comp sbounce buffer, len %x\n", slen
);
206 if ((u64
)dst
% c
->alignment
) {
207 dskip
= (int)(PTR_ALIGN(dst
, c
->alignment
) - dst
);
211 if (dlen
% c
->multiple
)
212 dlen
= round_down(dlen
, c
->multiple
);
213 if (dlen
< c
->minimum
) {
216 dlen
= min(p
->oremain
, BOUNCE_BUFFER_SIZE
);
217 dlen
= round_down(dlen
, c
->multiple
);
219 pr_debug("using comp dbounce buffer, len %x\n", dlen
);
221 if (dlen
> c
->maximum
)
225 timeout
= ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT
);
227 dlen
= tmplen
; /* reset dlen, if we're retrying */
228 ret
= ctx
->driver
->compress(src
, slen
, dst
, &dlen
, ctx
->wmem
);
229 /* possibly we should reduce the slen here, instead of
230 * retrying with the dbounce buffer?
232 if (ret
== -ENOSPC
&& dst
!= ctx
->dbounce
)
234 } while (ret
== -EBUSY
&& ktime_before(ktime_get(), timeout
));
240 if (dst
== ctx
->dbounce
)
241 memcpy(p
->out
+ dskip
, dst
, dlen
);
243 g
->padding
= cpu_to_be16(dskip
);
244 g
->compressed_length
= cpu_to_be32(dlen
);
245 g
->uncompressed_length
= cpu_to_be32(slen
);
247 if (p
->iremain
< slen
) {
248 *ignore
= slen
- p
->iremain
;
252 pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
253 slen
, *ignore
, dlen
, dskip
);
255 return update_param(p
, slen
, dskip
+ dlen
);
258 int nx842_crypto_compress(struct crypto_tfm
*tfm
,
259 const u8
*src
, unsigned int slen
,
260 u8
*dst
, unsigned int *dlen
)
262 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
263 struct nx842_crypto_header
*hdr
= &ctx
->header
;
264 struct nx842_crypto_param p
;
265 struct nx842_constraints c
= *ctx
->driver
->constraints
;
266 unsigned int groups
, hdrsize
, h
;
271 check_constraints(&c
);
281 groups
= min_t(unsigned int, NX842_CRYPTO_GROUP_MAX
,
282 DIV_ROUND_UP(p
.iremain
, c
.maximum
));
283 hdrsize
= NX842_CRYPTO_HEADER_SIZE(groups
);
285 spin_lock_bh(&ctx
->lock
);
287 /* skip adding header if the buffers meet all constraints */
288 add_header
= (p
.iremain
% c
.multiple
||
289 p
.iremain
< c
.minimum
||
290 p
.iremain
> c
.maximum
||
291 (u64
)p
.in
% c
.alignment
||
292 p
.oremain
% c
.multiple
||
293 p
.oremain
< c
.minimum
||
294 p
.oremain
> c
.maximum
||
295 (u64
)p
.out
% c
.alignment
);
297 hdr
->magic
= cpu_to_be16(NX842_CRYPTO_MAGIC
);
301 while (p
.iremain
> 0) {
304 if (hdr
->groups
> NX842_CRYPTO_GROUP_MAX
)
307 /* header goes before first group */
308 h
= !n
&& add_header
? hdrsize
: 0;
311 pr_warn("internal error, ignore is set %x\n", ignore
);
313 ret
= compress(ctx
, &p
, &hdr
->group
[n
], &c
, &ignore
, h
);
318 if (!add_header
&& hdr
->groups
> 1) {
319 pr_err("Internal error: No header but multiple groups\n");
324 /* ignore indicates the input stream needed to be padded */
325 hdr
->ignore
= cpu_to_be16(ignore
);
327 pr_debug("marked %d bytes as ignore\n", ignore
);
330 ret
= nx842_crypto_add_header(hdr
, dst
);
336 pr_debug("compress total slen %x dlen %x\n", slen
, *dlen
);
339 spin_unlock_bh(&ctx
->lock
);
342 EXPORT_SYMBOL_GPL(nx842_crypto_compress
);
344 static int decompress(struct nx842_crypto_ctx
*ctx
,
345 struct nx842_crypto_param
*p
,
346 struct nx842_crypto_header_group
*g
,
347 struct nx842_constraints
*c
,
350 unsigned int slen
= be32_to_cpu(g
->compressed_length
);
351 unsigned int required_len
= be32_to_cpu(g
->uncompressed_length
);
352 unsigned int dlen
= p
->oremain
, tmplen
;
353 unsigned int adj_slen
= slen
;
354 u8
*src
= p
->in
, *dst
= p
->out
;
355 u16 padding
= be16_to_cpu(g
->padding
);
356 int ret
, spadding
= 0, dpadding
= 0;
359 if (!slen
|| !required_len
)
362 if (p
->iremain
<= 0 || padding
+ slen
> p
->iremain
)
365 if (p
->oremain
<= 0 || required_len
- ignore
> p
->oremain
)
370 if (slen
% c
->multiple
)
371 adj_slen
= round_up(slen
, c
->multiple
);
372 if (slen
< c
->minimum
)
373 adj_slen
= c
->minimum
;
374 if (slen
> c
->maximum
)
376 if (slen
< adj_slen
|| (u64
)src
% c
->alignment
) {
377 /* we can append padding bytes because the 842 format defines
378 * an "end" template (see lib/842/842_decompress.c) and will
379 * ignore any bytes following it.
382 memset(ctx
->sbounce
+ slen
, 0, adj_slen
- slen
);
383 memcpy(ctx
->sbounce
, src
, slen
);
385 spadding
= adj_slen
- slen
;
387 pr_debug("using decomp sbounce buffer, len %x\n", slen
);
390 if (dlen
% c
->multiple
)
391 dlen
= round_down(dlen
, c
->multiple
);
392 if (dlen
< required_len
|| (u64
)dst
% c
->alignment
) {
394 dlen
= min(required_len
, BOUNCE_BUFFER_SIZE
);
395 pr_debug("using decomp dbounce buffer, len %x\n", dlen
);
397 if (dlen
< c
->minimum
)
399 if (dlen
> c
->maximum
)
403 timeout
= ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT
);
405 dlen
= tmplen
; /* reset dlen, if we're retrying */
406 ret
= ctx
->driver
->decompress(src
, slen
, dst
, &dlen
, ctx
->wmem
);
407 } while (ret
== -EBUSY
&& ktime_before(ktime_get(), timeout
));
410 /* reset everything, sw doesn't have constraints */
411 src
= p
->in
+ padding
;
412 slen
= be32_to_cpu(g
->compressed_length
);
417 if (dlen
< required_len
) { /* have ignore bytes */
419 dlen
= BOUNCE_BUFFER_SIZE
;
421 pr_info_ratelimited("using software 842 decompression\n");
422 ret
= sw842_decompress(src
, slen
, dst
, &dlen
);
431 pr_debug("ignoring last %x bytes\n", ignore
);
433 if (dst
== ctx
->dbounce
)
434 memcpy(p
->out
, dst
, dlen
);
436 pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
437 slen
, padding
, dlen
, ignore
);
439 return update_param(p
, slen
+ padding
, dlen
);
442 int nx842_crypto_decompress(struct crypto_tfm
*tfm
,
443 const u8
*src
, unsigned int slen
,
444 u8
*dst
, unsigned int *dlen
)
446 struct nx842_crypto_ctx
*ctx
= crypto_tfm_ctx(tfm
);
447 struct nx842_crypto_header
*hdr
;
448 struct nx842_crypto_param p
;
449 struct nx842_constraints c
= *ctx
->driver
->constraints
;
453 check_constraints(&c
);
463 hdr
= (struct nx842_crypto_header
*)src
;
465 spin_lock_bh(&ctx
->lock
);
467 /* If it doesn't start with our header magic number, assume it's a raw
468 * 842 compressed buffer and pass it directly to the hardware driver
470 if (be16_to_cpu(hdr
->magic
) != NX842_CRYPTO_MAGIC
) {
471 struct nx842_crypto_header_group g
= {
473 .compressed_length
= cpu_to_be32(p
.iremain
),
474 .uncompressed_length
= cpu_to_be32(p
.oremain
),
477 ret
= decompress(ctx
, &p
, &g
, &c
, 0);
485 pr_err("header has no groups\n");
489 if (hdr
->groups
> NX842_CRYPTO_GROUP_MAX
) {
490 pr_err("header has too many groups %x, max %x\n",
491 hdr
->groups
, NX842_CRYPTO_GROUP_MAX
);
496 hdr_len
= NX842_CRYPTO_HEADER_SIZE(hdr
->groups
);
497 if (hdr_len
> slen
) {
502 memcpy(&ctx
->header
, src
, hdr_len
);
505 for (n
= 0; n
< hdr
->groups
; n
++) {
506 /* ignore applies to last group */
507 if (n
+ 1 == hdr
->groups
)
508 ignore
= be16_to_cpu(hdr
->ignore
);
510 ret
= decompress(ctx
, &p
, &hdr
->group
[n
], &c
, ignore
);
518 pr_debug("decompress total slen %x dlen %x\n", slen
, *dlen
);
523 spin_unlock_bh(&ctx
->lock
);
527 EXPORT_SYMBOL_GPL(nx842_crypto_decompress
);
529 MODULE_LICENSE("GPL");
530 MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
531 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");