8 ve(const ve&) = default;
9 ve& operator=(const ve&) = default;
11 // note that the code usually uses the first half of this array
15 static ve First8_(void) {
17 __builtin_memset(m.raw, 0xff, 8);
21 static ve And_(ve a, ve b) {
23 __builtin_memcpy(au.raw, a.raw, 16);
24 for (size_t i = 0; i < 8; ++i) {
25 au.raw[i] &= b.raw[i];
30 __attribute__((noipa, optimize(0)))
31 static void vec_assert(ve a) {
32 if (a.raw[6] != 0x06 && a.raw[6] != 0x07)
36 static ve Reverse4_(ve v) {
38 for (size_t i = 0; i < 8; i += 4) {
39 ret.raw[i + 0] = v.raw[i + 3];
40 ret.raw[i + 1] = v.raw[i + 2];
41 ret.raw[i + 2] = v.raw[i + 1];
42 ret.raw[i + 3] = v.raw[i + 0];
47 static ve DupEven_(ve v) {
48 for (size_t i = 0; i < 8; i += 2) {
49 v.raw[i + 1] = v.raw[i];
55 ve Per4LaneBlockShuffle_(ve v) {
64 static inline __attribute__((always_inline)) void DoTestPer4LaneBlkShuffle(const ve v) {
65 ve actual = Per4LaneBlockShuffle_<b>(v);
66 const auto valid_lanes_mask = First8_();
67 ve actual_masked = And_(valid_lanes_mask, actual);
68 vec_assert(actual_masked);
71 static void DoTestPer4LaneBlkShuffles(const ve v) {
72 alignas(128) uint8_t src_lanes[8];
73 __builtin_memcpy(src_lanes, v.raw, 8);
75 DoTestPer4LaneBlkShuffle<true >(v);
76 DoTestPer4LaneBlkShuffle<false>(v);
79 __attribute__((noipa, optimize(0)))
80 static void bug(void) {
81 uint8_t iv[8] = {1,2,3,4,5,6,7,8};
83 __builtin_memcpy(v.raw, iv, 8);
84 DoTestPer4LaneBlkShuffles(v);