1 // SPDX-License-Identifier: GPL-2.0-only
3 * Accelerated CRC32 implementation with Zbc extension.
5 * Copyright (C) 2024 Intel Corporation
9 #include <asm/alternative-macros.h>
10 #include <asm/byteorder.h>
12 #include <linux/types.h>
13 #include <linux/minmax.h>
14 #include <linux/crc32poly.h>
15 #include <linux/crc32.h>
16 #include <linux/byteorder/generic.h>
19 * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
20 * better understanding of how this math works.
22 * let "+" denotes polynomial add (XOR)
23 * let "-" denotes polynomial sub (XOR)
24 * let "*" denotes polynomial multiplication
25 * let "/" denotes polynomial floor division
26 * let "S" denotes source data, XLEN bit wide
27 * let "P" denotes CRC32 polynomial
28 * let "T" denotes 2^(XLEN+32)
29 * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
32 * => S * (2^32) - S * (2^32) / P * P
33 * => lowest 32 bits of: S * (2^32) / P * P
34 * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
35 * => lowest 32 bits of: S * (2^32) * quotient / T * P
36 * => lowest 32 bits of: S * quotient / 2^XLEN * P
37 * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
38 * => clmul_low_part(clmul_high_part(S, QT) + S, P)
40 * In terms of below implementations, the BE case is more intuitive, since the
41 * higher order bit sits at more significant position.
44 #if __riscv_xlen == 64
45 /* Slide by XLEN bits per iteration */
48 /* Each below polynomial quotient has an implicit bit for 2^XLEN */
50 /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
51 # define CRC32_POLY_QT_LE 0x5a72d812fb808b20
53 /* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
54 # define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8
56 /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
57 * the same as the bit-reversed version of CRC32_POLY_QT_LE
59 # define CRC32_POLY_QT_BE 0x04d101df481b4e5a
61 static inline u64
crc32_le_prep(u32 crc
, unsigned long const *ptr
)
63 return (u64
)crc
^ (__force u64
)__cpu_to_le64(*ptr
);
66 static inline u32
crc32_le_zbc(unsigned long s
, u32 poly
, unsigned long poly_qt
)
70 /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
71 asm volatile (".option push\n"
87 static inline u64
crc32_be_prep(u32 crc
, unsigned long const *ptr
)
89 return ((u64
)crc
<< 32) ^ (__force u64
)__cpu_to_be64(*ptr
);
92 #elif __riscv_xlen == 32
94 /* Each quotient should match the upper half of its analog in RV64 */
95 # define CRC32_POLY_QT_LE 0xfb808b20
96 # define CRC32C_POLY_QT_LE 0x6f5389f8
97 # define CRC32_POLY_QT_BE 0x04d101df
99 static inline u32
crc32_le_prep(u32 crc
, unsigned long const *ptr
)
101 return crc
^ (__force u32
)__cpu_to_le32(*ptr
);
104 static inline u32
crc32_le_zbc(unsigned long s
, u32 poly
, unsigned long poly_qt
)
108 /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
109 asm volatile (".option push\n"
110 ".option arch,+zbc\n"
114 "clmulr %0, %0, %3\n"
124 static inline u32
crc32_be_prep(u32 crc
, unsigned long const *ptr
)
126 return crc
^ (__force u32
)__cpu_to_be32(*ptr
);
130 # error "Unexpected __riscv_xlen"
133 static inline u32
crc32_be_zbc(unsigned long s
)
137 asm volatile (".option push\n"
138 ".option arch,+zbc\n"
139 "clmulh %0, %1, %2\n"
145 "r" (CRC32_POLY_QT_BE
),
151 #define STEP (1 << STEP_ORDER)
152 #define OFFSET_MASK (STEP - 1)
154 typedef u32 (*fallback
)(u32 crc
, unsigned char const *p
, size_t len
);
156 static inline u32
crc32_le_unaligned(u32 crc
, unsigned char const *p
,
157 size_t len
, u32 poly
,
158 unsigned long poly_qt
)
160 size_t bits
= len
* 8;
164 for (int i
= 0; i
< len
; i
++)
165 s
= ((unsigned long)*p
++ << (__riscv_xlen
- 8)) | (s
>> 8);
167 s
^= (unsigned long)crc
<< (__riscv_xlen
- bits
);
168 if (__riscv_xlen
== 32 || len
< sizeof(u32
))
169 crc_low
= crc
>> bits
;
171 crc
= crc32_le_zbc(s
, poly
, poly_qt
);
177 static inline u32 __pure
crc32_le_generic(u32 crc
, unsigned char const *p
,
178 size_t len
, u32 poly
,
179 unsigned long poly_qt
,
182 size_t offset
, head_len
, tail_len
;
183 unsigned long const *p_ul
;
186 asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
187 RISCV_ISA_EXT_ZBC
, 1)
190 /* Handle the unaligned head. */
191 offset
= (unsigned long)p
& OFFSET_MASK
;
193 head_len
= min(STEP
- offset
, len
);
194 crc
= crc32_le_unaligned(crc
, p
, head_len
, poly
, poly_qt
);
199 tail_len
= len
& OFFSET_MASK
;
200 len
= len
>> STEP_ORDER
;
201 p_ul
= (unsigned long const *)p
;
203 for (int i
= 0; i
< len
; i
++) {
204 s
= crc32_le_prep(crc
, p_ul
);
205 crc
= crc32_le_zbc(s
, poly
, poly_qt
);
209 /* Handle the tail bytes. */
210 p
= (unsigned char const *)p_ul
;
212 crc
= crc32_le_unaligned(crc
, p
, tail_len
, poly
, poly_qt
);
217 return crc_fb(crc
, p
, len
);
220 u32 __pure
crc32_le(u32 crc
, unsigned char const *p
, size_t len
)
222 return crc32_le_generic(crc
, p
, len
, CRC32_POLY_LE
, CRC32_POLY_QT_LE
,
226 u32 __pure
__crc32c_le(u32 crc
, unsigned char const *p
, size_t len
)
228 return crc32_le_generic(crc
, p
, len
, CRC32C_POLY_LE
,
229 CRC32C_POLY_QT_LE
, __crc32c_le_base
);
232 static inline u32
crc32_be_unaligned(u32 crc
, unsigned char const *p
,
235 size_t bits
= len
* 8;
240 for (int i
= 0; i
< len
; i
++)
243 if (__riscv_xlen
== 32 || len
< sizeof(u32
)) {
244 s
^= crc
>> (32 - bits
);
245 crc_low
= crc
<< bits
;
247 s
^= (unsigned long)crc
<< (bits
- 32);
250 crc
= crc32_be_zbc(s
);
256 u32 __pure
crc32_be(u32 crc
, unsigned char const *p
, size_t len
)
258 size_t offset
, head_len
, tail_len
;
259 unsigned long const *p_ul
;
262 asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
263 RISCV_ISA_EXT_ZBC
, 1)
266 /* Handle the unaligned head. */
267 offset
= (unsigned long)p
& OFFSET_MASK
;
269 head_len
= min(STEP
- offset
, len
);
270 crc
= crc32_be_unaligned(crc
, p
, head_len
);
275 tail_len
= len
& OFFSET_MASK
;
276 len
= len
>> STEP_ORDER
;
277 p_ul
= (unsigned long const *)p
;
279 for (int i
= 0; i
< len
; i
++) {
280 s
= crc32_be_prep(crc
, p_ul
);
281 crc
= crc32_be_zbc(s
);
285 /* Handle the tail bytes. */
286 p
= (unsigned char const *)p_ul
;
288 crc
= crc32_be_unaligned(crc
, p
, tail_len
);
293 return crc32_be_base(crc
, p
, len
);