2 * 842 Software Compression
4 * Copyright (C) 2015 Dan Streetman, IBM Corp
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * See 842.h for details of the 842 compressed format.
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #define MODULE_NAME "842_compress"
22 #include <linux/hashtable.h>
25 #include "842_debugfs.h"
27 #define SW842_HASHTABLE8_BITS (10)
28 #define SW842_HASHTABLE4_BITS (11)
29 #define SW842_HASHTABLE2_BITS (10)
31 /* By default, we allow compressing input buffers of any length, but we must
32 * use the non-standard "short data" template so the decompressor can correctly
33 * reproduce the uncompressed data buffer at the right length. However the
34 * hardware 842 compressor will not recognize the "short data" template, and
35 * will fail to decompress any compressed buffer containing it (I have no idea
36 * why anyone would want to use software to compress and hardware to decompress
37 * but that's beside the point). This parameter forces the compression
38 * function to simply reject any input buffer that isn't a multiple of 8 bytes
39 * long, instead of using the "short data" template, so that all compressed
40 * buffers produced by this function will be decompressable by the 842 hardware
41 * decompressor. Unless you have a specific need for that, leave this disabled
42 * so that any length buffer can be compressed.
44 static bool sw842_strict
;
45 module_param_named(strict
, sw842_strict
, bool, 0644);
47 static u8 comp_ops
[OPS_MAX
][5] = { /* params size in bits */
48 { I8
, N0
, N0
, N0
, 0x19 }, /* 8 */
49 { I4
, I4
, N0
, N0
, 0x18 }, /* 18 */
50 { I4
, I2
, I2
, N0
, 0x17 }, /* 25 */
51 { I2
, I2
, I4
, N0
, 0x13 }, /* 25 */
52 { I2
, I2
, I2
, I2
, 0x12 }, /* 32 */
53 { I4
, I2
, D2
, N0
, 0x16 }, /* 33 */
54 { I4
, D2
, I2
, N0
, 0x15 }, /* 33 */
55 { I2
, D2
, I4
, N0
, 0x0e }, /* 33 */
56 { D2
, I2
, I4
, N0
, 0x09 }, /* 33 */
57 { I2
, I2
, I2
, D2
, 0x11 }, /* 40 */
58 { I2
, I2
, D2
, I2
, 0x10 }, /* 40 */
59 { I2
, D2
, I2
, I2
, 0x0d }, /* 40 */
60 { D2
, I2
, I2
, I2
, 0x08 }, /* 40 */
61 { I4
, D4
, N0
, N0
, 0x14 }, /* 41 */
62 { D4
, I4
, N0
, N0
, 0x04 }, /* 41 */
63 { I2
, I2
, D4
, N0
, 0x0f }, /* 48 */
64 { I2
, D2
, I2
, D2
, 0x0c }, /* 48 */
65 { I2
, D4
, I2
, N0
, 0x0b }, /* 48 */
66 { D2
, I2
, I2
, D2
, 0x07 }, /* 48 */
67 { D2
, I2
, D2
, I2
, 0x06 }, /* 48 */
68 { D4
, I2
, I2
, N0
, 0x03 }, /* 48 */
69 { I2
, D2
, D4
, N0
, 0x0a }, /* 56 */
70 { D2
, I2
, D4
, N0
, 0x05 }, /* 56 */
71 { D4
, I2
, D2
, N0
, 0x02 }, /* 56 */
72 { D4
, D2
, I2
, N0
, 0x01 }, /* 56 */
73 { D8
, N0
, N0
, N0
, 0x00 }, /* 64 */
76 struct sw842_hlist_node8
{
77 struct hlist_node node
;
82 struct sw842_hlist_node4
{
83 struct hlist_node node
;
88 struct sw842_hlist_node2
{
89 struct hlist_node node
;
94 #define INDEX_NOT_FOUND (-1)
95 #define INDEX_NOT_CHECKED (-2)
110 DECLARE_HASHTABLE(htable8
, SW842_HASHTABLE8_BITS
);
111 DECLARE_HASHTABLE(htable4
, SW842_HASHTABLE4_BITS
);
112 DECLARE_HASHTABLE(htable2
, SW842_HASHTABLE2_BITS
);
113 struct sw842_hlist_node8 node8
[1 << I8_BITS
];
114 struct sw842_hlist_node4 node4
[1 << I4_BITS
];
115 struct sw842_hlist_node2 node2
[1 << I2_BITS
];
118 #define get_input_data(p, o, b) \
119 be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
121 #define init_hashtable_nodes(p, b) do { \
123 hash_init((p)->htable##b); \
124 for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \
125 (p)->node##b[_i].index = _i; \
126 (p)->node##b[_i].data = 0; \
127 INIT_HLIST_NODE(&(p)->node##b[_i].node); \
131 #define find_index(p, b, n) ({ \
132 struct sw842_hlist_node##b *_n; \
133 p->index##b[n] = INDEX_NOT_FOUND; \
134 hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
135 if (p->data##b[n] == _n->data) { \
136 p->index##b[n] = _n->index; \
140 p->index##b[n] >= 0; \
143 #define check_index(p, b, n) \
144 ((p)->index##b[n] == INDEX_NOT_CHECKED \
145 ? find_index(p, b, n) \
146 : (p)->index##b[n] >= 0)
148 #define replace_hash(p, b, i, d) do { \
149 struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \
150 hash_del(&_n->node); \
151 _n->data = (p)->data##b[d]; \
152 pr_debug("add hash index%x %x pos %x data %lx\n", b, \
153 (unsigned int)_n->index, \
154 (unsigned int)((p)->in - (p)->instart), \
155 (unsigned long)_n->data); \
156 hash_add((p)->htable##b, &_n->node, _n->data); \
159 static u8 bmask
[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
161 static int add_bits(struct sw842_param
*p
, u64 d
, u8 n
);
163 static int __split_add_bits(struct sw842_param
*p
, u64 d
, u8 n
, u8 s
)
170 ret
= add_bits(p
, d
>> s
, n
- s
);
173 return add_bits(p
, d
& GENMASK_ULL(s
- 1, 0), s
);
176 static int add_bits(struct sw842_param
*p
, u64 d
, u8 n
)
178 int b
= p
->bit
, bits
= b
+ n
, s
= round_up(bits
, 8) - bits
;
182 pr_debug("add %u bits %lx\n", (unsigned char)n
, (unsigned long)d
);
187 /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
188 * or if we're at the end of the output buffer and would write past end
191 return __split_add_bits(p
, d
, n
, 32);
192 else if (p
->olen
< 8 && bits
> 32 && bits
<= 56)
193 return __split_add_bits(p
, d
, n
, 16);
194 else if (p
->olen
< 4 && bits
> 16 && bits
<= 24)
195 return __split_add_bits(p
, d
, n
, 8);
197 if (DIV_ROUND_UP(bits
, 8) > p
->olen
)
206 put_unaligned(cpu_to_be16(o
<< 8 | d
), (__be16
*)out
);
208 put_unaligned(cpu_to_be32(o
<< 24 | d
<< 8), (__be32
*)out
);
210 put_unaligned(cpu_to_be32(o
<< 24 | d
), (__be32
*)out
);
212 put_unaligned(cpu_to_be64(o
<< 56 | d
<< 24), (__be64
*)out
);
214 put_unaligned(cpu_to_be64(o
<< 56 | d
<< 16), (__be64
*)out
);
216 put_unaligned(cpu_to_be64(o
<< 56 | d
<< 8), (__be64
*)out
);
218 put_unaligned(cpu_to_be64(o
<< 56 | d
), (__be64
*)out
);
223 p
->out
+= p
->bit
/ 8;
224 p
->olen
-= p
->bit
/ 8;
231 static int add_template(struct sw842_param
*p
, u8 c
)
240 pr_debug("template %x\n", t
[4]);
242 ret
= add_bits(p
, t
[4], OP_BITS
);
246 for (i
= 0; i
< 4; i
++) {
247 pr_debug("op %x\n", t
[i
]);
249 switch (t
[i
] & OP_AMOUNT
) {
253 else if (t
[i
] & OP_ACTION_INDEX
)
254 ret
= add_bits(p
, p
->index8
[0], I8_BITS
);
255 else if (t
[i
] & OP_ACTION_DATA
)
256 ret
= add_bits(p
, p
->data8
[0], 64);
261 if (b
== 2 && t
[i
] & OP_ACTION_DATA
)
262 ret
= add_bits(p
, get_input_data(p
, 2, 32), 32);
263 else if (b
!= 0 && b
!= 4)
265 else if (t
[i
] & OP_ACTION_INDEX
)
266 ret
= add_bits(p
, p
->index4
[b
>> 2], I4_BITS
);
267 else if (t
[i
] & OP_ACTION_DATA
)
268 ret
= add_bits(p
, p
->data4
[b
>> 2], 32);
273 if (b
!= 0 && b
!= 2 && b
!= 4 && b
!= 6)
275 if (t
[i
] & OP_ACTION_INDEX
)
276 ret
= add_bits(p
, p
->index2
[b
>> 1], I2_BITS
);
277 else if (t
[i
] & OP_ACTION_DATA
)
278 ret
= add_bits(p
, p
->data2
[b
>> 1], 16);
283 inv
= (b
!= 8) || !(t
[i
] & OP_ACTION_NOOP
);
294 pr_err("Invalid templ %x op %d : %x %x %x %x\n",
295 c
, i
, t
[0], t
[1], t
[2], t
[3]);
299 b
+= t
[i
] & OP_AMOUNT
;
303 pr_err("Invalid template %x len %x : %x %x %x %x\n",
304 c
, b
, t
[0], t
[1], t
[2], t
[3]);
308 if (sw842_template_counts
)
309 atomic_inc(&template_count
[t
[4]]);
314 static int add_repeat_template(struct sw842_param
*p
, u8 r
)
318 /* repeat param is 0-based */
319 if (!r
|| --r
> REPEAT_BITS_MAX
)
322 ret
= add_bits(p
, OP_REPEAT
, OP_BITS
);
326 ret
= add_bits(p
, r
, REPEAT_BITS
);
330 if (sw842_template_counts
)
331 atomic_inc(&template_repeat_count
);
336 static int add_short_data_template(struct sw842_param
*p
, u8 b
)
340 if (!b
|| b
> SHORT_DATA_BITS_MAX
)
343 ret
= add_bits(p
, OP_SHORT_DATA
, OP_BITS
);
347 ret
= add_bits(p
, b
, SHORT_DATA_BITS
);
351 for (i
= 0; i
< b
; i
++) {
352 ret
= add_bits(p
, p
->in
[i
], 8);
357 if (sw842_template_counts
)
358 atomic_inc(&template_short_data_count
);
363 static int add_zeros_template(struct sw842_param
*p
)
365 int ret
= add_bits(p
, OP_ZEROS
, OP_BITS
);
370 if (sw842_template_counts
)
371 atomic_inc(&template_zeros_count
);
376 static int add_end_template(struct sw842_param
*p
)
378 int ret
= add_bits(p
, OP_END
, OP_BITS
);
383 if (sw842_template_counts
)
384 atomic_inc(&template_end_count
);
389 static bool check_template(struct sw842_param
*p
, u8 c
)
397 for (i
= 0; i
< 4; i
++) {
398 if (t
[i
] & OP_ACTION_INDEX
) {
399 if (t
[i
] & OP_AMOUNT_2
)
400 match
= check_index(p
, 2, b
>> 1);
401 else if (t
[i
] & OP_AMOUNT_4
)
402 match
= check_index(p
, 4, b
>> 2);
403 else if (t
[i
] & OP_AMOUNT_8
)
404 match
= check_index(p
, 8, 0);
411 b
+= t
[i
] & OP_AMOUNT
;
417 static void get_next_data(struct sw842_param
*p
)
419 p
->data8
[0] = get_input_data(p
, 0, 64);
420 p
->data4
[0] = get_input_data(p
, 0, 32);
421 p
->data4
[1] = get_input_data(p
, 4, 32);
422 p
->data2
[0] = get_input_data(p
, 0, 16);
423 p
->data2
[1] = get_input_data(p
, 2, 16);
424 p
->data2
[2] = get_input_data(p
, 4, 16);
425 p
->data2
[3] = get_input_data(p
, 6, 16);
428 /* update the hashtable entries.
429 * only call this after finding/adding the current template
430 * the dataN fields for the current 8 byte block must be already updated
432 static void update_hashtables(struct sw842_param
*p
)
434 u64 pos
= p
->in
- p
->instart
;
435 u64 n8
= (pos
>> 3) % (1 << I8_BITS
);
436 u64 n4
= (pos
>> 2) % (1 << I4_BITS
);
437 u64 n2
= (pos
>> 1) % (1 << I2_BITS
);
439 replace_hash(p
, 8, n8
, 0);
440 replace_hash(p
, 4, n4
, 0);
441 replace_hash(p
, 4, n4
, 1);
442 replace_hash(p
, 2, n2
, 0);
443 replace_hash(p
, 2, n2
, 1);
444 replace_hash(p
, 2, n2
, 2);
445 replace_hash(p
, 2, n2
, 3);
448 /* find the next template to use, and add it
449 * the p->dataN fields must already be set for the current 8 byte block
451 static int process_next(struct sw842_param
*p
)
455 p
->index8
[0] = INDEX_NOT_CHECKED
;
456 p
->index4
[0] = INDEX_NOT_CHECKED
;
457 p
->index4
[1] = INDEX_NOT_CHECKED
;
458 p
->index2
[0] = INDEX_NOT_CHECKED
;
459 p
->index2
[1] = INDEX_NOT_CHECKED
;
460 p
->index2
[2] = INDEX_NOT_CHECKED
;
461 p
->index2
[3] = INDEX_NOT_CHECKED
;
463 /* check up to OPS_MAX - 1; last op is our fallback */
464 for (i
= 0; i
< OPS_MAX
- 1; i
++) {
465 if (check_template(p
, i
))
469 ret
= add_template(p
, i
);
479 * Compress the uncompressed buffer of length @ilen at @in to the output buffer
480 * @out, using no more than @olen bytes, using the 842 compression format.
482 * Returns: 0 on success, error on failure. The @olen parameter
483 * will contain the number of output bytes written on success, or
486 int sw842_compress(const u8
*in
, unsigned int ilen
,
487 u8
*out
, unsigned int *olen
, void *wmem
)
489 struct sw842_param
*p
= (struct sw842_param
*)wmem
;
491 u64 last
, next
, pad
, total
;
495 BUILD_BUG_ON(sizeof(*p
) > SW842_MEM_COMPRESS
);
497 init_hashtable_nodes(p
, 8);
498 init_hashtable_nodes(p
, 4);
499 init_hashtable_nodes(p
, 2);
512 /* if using strict mode, we can only compress a multiple of 8 */
513 if (sw842_strict
&& (ilen
% 8)) {
514 pr_err("Using strict mode, can't compress len %d\n", ilen
);
518 /* let's compress at least 8 bytes, mkay? */
519 if (unlikely(ilen
< 8))
522 /* make initial 'last' different so we don't match the first time */
523 last
= ~get_unaligned((u64
*)p
->in
);
525 while (p
->ilen
> 7) {
526 next
= get_unaligned((u64
*)p
->in
);
528 /* must get the next data, as we need to update the hashtable
529 * entries with the new data every time
533 /* we don't care about endianness in last or next;
534 * we're just comparing 8 bytes to another 8 bytes,
535 * they're both the same endianness
538 /* repeat count bits are 0-based, so we stop at +1 */
539 if (++repeat_count
<= REPEAT_BITS_MAX
)
543 ret
= add_repeat_template(p
, repeat_count
);
545 if (next
== last
) /* reached max repeat bits */
550 ret
= add_zeros_template(p
);
552 ret
= process_next(p
);
559 update_hashtables(p
);
565 ret
= add_repeat_template(p
, repeat_count
);
572 ret
= add_short_data_template(p
, p
->ilen
);
580 ret
= add_end_template(p
);
585 * crc(0:31) is appended to target data starting with the next
586 * bit after End of stream template.
587 * nx842 calculates CRC for data in big-endian format. So doing
588 * same here so that sw842 decompression can be used for both
591 crc
= crc32_be(0, in
, ilen
);
592 ret
= add_bits(p
, crc
, CRC_BITS
);
602 /* pad compressed length to multiple of 8 */
603 pad
= (8 - ((total
- p
->olen
) % 8)) % 8;
605 if (pad
> p
->olen
) /* we were so close! */
607 memset(p
->out
, 0, pad
);
612 if (unlikely((total
- p
->olen
) > UINT_MAX
))
615 *olen
= total
- p
->olen
;
619 EXPORT_SYMBOL_GPL(sw842_compress
);
621 static int __init
sw842_init(void)
623 if (sw842_template_counts
)
624 sw842_debugfs_create();
628 module_init(sw842_init
);
630 static void __exit
sw842_exit(void)
632 if (sw842_template_counts
)
633 sw842_debugfs_remove();
635 module_exit(sw842_exit
);
637 MODULE_LICENSE("GPL");
638 MODULE_DESCRIPTION("Software 842 Compressor");
639 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");