1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2012 Intel Corporation
4 * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
12 * AArch32 does not provide this intrinsic natively because it does not
13 * implement the underlying instruction. AArch32 only provides a 64-bit
14 * wide vtbl.8 instruction, so use that instead.
16 static uint8x16_t
vqtbl1q_u8(uint8x16_t a
, uint8x16_t b
)
23 return vcombine_u8(vtbl2_u8(__a
.pair
, vget_low_u8(b
)),
24 vtbl2_u8(__a
.pair
, vget_high_u8(b
)));
28 void __raid6_2data_recov_neon(int bytes
, uint8_t *p
, uint8_t *q
, uint8_t *dp
,
29 uint8_t *dq
, const uint8_t *pbmul
,
32 uint8x16_t pm0
= vld1q_u8(pbmul
);
33 uint8x16_t pm1
= vld1q_u8(pbmul
+ 16);
34 uint8x16_t qm0
= vld1q_u8(qmul
);
35 uint8x16_t qm1
= vld1q_u8(qmul
+ 16);
36 uint8x16_t x0f
= vdupq_n_u8(0x0f);
43 * qx = qmul[*q ^ *dq];
44 * *dq++ = db = pbmul[px] ^ qx;
51 uint8x16_t vx
, vy
, px
, qx
, db
;
53 px
= veorq_u8(vld1q_u8(p
), vld1q_u8(dp
));
54 vx
= veorq_u8(vld1q_u8(q
), vld1q_u8(dq
));
56 vy
= vshrq_n_u8(vx
, 4);
57 vx
= vqtbl1q_u8(qm0
, vandq_u8(vx
, x0f
));
58 vy
= vqtbl1q_u8(qm1
, vy
);
59 qx
= veorq_u8(vx
, vy
);
61 vy
= vshrq_n_u8(px
, 4);
62 vx
= vqtbl1q_u8(pm0
, vandq_u8(px
, x0f
));
63 vy
= vqtbl1q_u8(pm1
, vy
);
64 vx
= veorq_u8(vx
, vy
);
65 db
= veorq_u8(vx
, qx
);
68 vst1q_u8(dp
, veorq_u8(db
, px
));
78 void __raid6_datap_recov_neon(int bytes
, uint8_t *p
, uint8_t *q
, uint8_t *dq
,
81 uint8x16_t qm0
= vld1q_u8(qmul
);
82 uint8x16_t qm1
= vld1q_u8(qmul
+ 16);
83 uint8x16_t x0f
= vdupq_n_u8(0x0f);
87 * *p++ ^= *dq = qmul[*q ^ *dq];
95 vx
= veorq_u8(vld1q_u8(q
), vld1q_u8(dq
));
97 vy
= vshrq_n_u8(vx
, 4);
98 vx
= vqtbl1q_u8(qm0
, vandq_u8(vx
, x0f
));
99 vy
= vqtbl1q_u8(qm1
, vy
);
100 vx
= veorq_u8(vx
, vy
);
101 vy
= veorq_u8(vx
, vld1q_u8(p
));