1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2012 Intel Corporation
4 * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
11 * AArch32 does not provide this intrinsic natively because it does not
12 * implement the underlying instruction. AArch32 only provides a 64-bit
13 * wide vtbl.8 instruction, so use that instead.
15 static uint8x16_t
vqtbl1q_u8(uint8x16_t a
, uint8x16_t b
)
22 return vcombine_u8(vtbl2_u8(__a
.pair
, vget_low_u8(b
)),
23 vtbl2_u8(__a
.pair
, vget_high_u8(b
)));
27 void __raid6_2data_recov_neon(int bytes
, uint8_t *p
, uint8_t *q
, uint8_t *dp
,
28 uint8_t *dq
, const uint8_t *pbmul
,
31 uint8x16_t pm0
= vld1q_u8(pbmul
);
32 uint8x16_t pm1
= vld1q_u8(pbmul
+ 16);
33 uint8x16_t qm0
= vld1q_u8(qmul
);
34 uint8x16_t qm1
= vld1q_u8(qmul
+ 16);
35 uint8x16_t x0f
= vdupq_n_u8(0x0f);
42 * qx = qmul[*q ^ *dq];
43 * *dq++ = db = pbmul[px] ^ qx;
50 uint8x16_t vx
, vy
, px
, qx
, db
;
52 px
= veorq_u8(vld1q_u8(p
), vld1q_u8(dp
));
53 vx
= veorq_u8(vld1q_u8(q
), vld1q_u8(dq
));
55 vy
= vshrq_n_u8(vx
, 4);
56 vx
= vqtbl1q_u8(qm0
, vandq_u8(vx
, x0f
));
57 vy
= vqtbl1q_u8(qm1
, vy
);
58 qx
= veorq_u8(vx
, vy
);
60 vy
= vshrq_n_u8(px
, 4);
61 vx
= vqtbl1q_u8(pm0
, vandq_u8(px
, x0f
));
62 vy
= vqtbl1q_u8(pm1
, vy
);
63 vx
= veorq_u8(vx
, vy
);
64 db
= veorq_u8(vx
, qx
);
67 vst1q_u8(dp
, veorq_u8(db
, px
));
77 void __raid6_datap_recov_neon(int bytes
, uint8_t *p
, uint8_t *q
, uint8_t *dq
,
80 uint8x16_t qm0
= vld1q_u8(qmul
);
81 uint8x16_t qm1
= vld1q_u8(qmul
+ 16);
82 uint8x16_t x0f
= vdupq_n_u8(0x0f);
86 * *p++ ^= *dq = qmul[*q ^ *dq];
94 vx
= veorq_u8(vld1q_u8(q
), vld1q_u8(dq
));
96 vy
= vshrq_n_u8(vx
, 4);
97 vx
= vqtbl1q_u8(qm0
, vandq_u8(vx
, x0f
));
98 vy
= vqtbl1q_u8(qm1
, vy
);
99 vx
= veorq_u8(vx
, vy
);
100 vy
= veorq_u8(vx
, vld1q_u8(p
));