Revert "[llvm] Improve llvm.objectsize computation by computing GEP, alloca and mallo...
[llvm-project.git] / clang / test / CodeGen / AArch64 / neon-tbl.c
blobd51f0ae2e11c5f5723baafb9d0e915a0aaa6ecc1
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
2 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
3 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
5 // REQUIRES: aarch64-registered-target || arm-registered-target
7 #include <arm_neon.h>
9 // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_s8
10 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
11 // CHECK-NEXT: entry:
12 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
13 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
14 // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
16 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
17 return vtbl1_s8(a, b);
20 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_s8
21 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
22 // CHECK-NEXT: entry:
23 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
24 // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
26 int8x8_t test_vqtbl1_s8(int8x16_t a, uint8x8_t b) {
27 return vqtbl1_s8(a, b);
30 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_s8
31 // CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
32 // CHECK-NEXT: entry:
33 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
34 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
35 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X2_T]], ptr [[A]], i32 0, i32 0
36 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
37 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X2_T]], ptr [[A]], i32 0, i32 0
38 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
39 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
40 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
41 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
42 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
43 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
44 // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
45 // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
47 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
48 return vtbl2_s8(a, b);
51 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_s8
52 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
53 // CHECK-NEXT: entry:
54 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
55 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
56 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
57 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
58 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
59 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
60 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
61 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
62 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
63 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
64 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
65 // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
67 int8x8_t test_vqtbl2_s8(int8x16x2_t a, uint8x8_t b) {
68 return vqtbl2_s8(a, b);
71 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_s8
72 // CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
73 // CHECK-NEXT: entry:
74 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
75 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
76 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X3_T]], ptr [[A]], i32 0, i32 0
77 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
78 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X3_T]], ptr [[A]], i32 0, i32 0
79 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
80 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
81 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
82 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
83 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
84 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
85 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
86 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
87 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
88 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
89 // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
91 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
92 return vtbl3_s8(a, b);
95 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_s8
96 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
97 // CHECK-NEXT: entry:
98 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
99 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
100 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
101 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
102 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
103 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
104 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
105 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
106 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
107 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
108 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
109 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
110 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
111 // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
113 int8x8_t test_vqtbl3_s8(int8x16x3_t a, uint8x8_t b) {
114 return vqtbl3_s8(a, b);
117 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_s8
118 // CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
119 // CHECK-NEXT: entry:
120 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
121 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
122 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X4_T]], ptr [[A]], i32 0, i32 0
123 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
124 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X4_T]], ptr [[A]], i32 0, i32 0
125 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
126 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
127 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
128 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
129 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
130 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
131 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
132 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3
133 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
134 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
135 // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
136 // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
137 // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
139 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
140 return vtbl4_s8(a, b);
143 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_s8
144 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
145 // CHECK-NEXT: entry:
146 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
147 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
148 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
149 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
150 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
151 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
152 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
153 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
154 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
155 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
156 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
157 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
158 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
159 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
160 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
161 // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
163 int8x8_t test_vqtbl4_s8(int8x16x4_t a, uint8x8_t b) {
164 return vqtbl4_s8(a, b);
167 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_s8
168 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
169 // CHECK-NEXT: entry:
170 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
171 // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
173 int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
174 return vqtbl1q_s8(a, b);
177 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_s8
178 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
179 // CHECK-NEXT: entry:
180 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
181 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
182 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
183 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
184 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
185 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
186 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
187 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
188 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
189 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
190 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
191 // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
193 int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
194 return vqtbl2q_s8(a, b);
197 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_s8
198 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
199 // CHECK-NEXT: entry:
200 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
201 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
202 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
203 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
204 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
205 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
206 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
207 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
208 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
209 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
210 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
211 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
212 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
213 // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
215 int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
216 return vqtbl3q_s8(a, b);
219 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_s8
220 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
221 // CHECK-NEXT: entry:
222 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
223 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
224 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
225 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
226 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
227 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
228 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
229 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
230 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
231 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
232 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
233 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
234 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
235 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
236 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
237 // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
239 int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
240 return vqtbl4q_s8(a, b);
243 // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_s8
244 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
245 // CHECK-NEXT: entry:
246 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
247 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
248 // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8)
249 // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
250 // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
251 // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1)
252 // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
253 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
254 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
256 int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
257 return vtbx1_s8(a, b, c);
260 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_s8
261 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
262 // CHECK-NEXT: entry:
263 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
264 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
265 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X2_T]], ptr [[B]], i32 0, i32 0
266 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
267 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X2_T]], ptr [[B]], i32 0, i32 0
268 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
269 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
270 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
271 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
272 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
273 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
274 // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
275 // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
277 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
278 return vtbx2_s8(a, b, c);
281 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_s8
282 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
283 // CHECK-NEXT: entry:
284 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
285 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
286 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X3_T]], ptr [[B]], i32 0, i32 0
287 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
288 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X3_T]], ptr [[B]], i32 0, i32 0
289 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
290 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
291 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
292 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
293 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
294 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
295 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
296 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
297 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
298 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
299 // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24)
300 // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
301 // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
302 // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1)
303 // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
304 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
305 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
307 int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
308 return vtbx3_s8(a, b, c);
311 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_s8
312 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
313 // CHECK-NEXT: entry:
314 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
315 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
316 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X4_T]], ptr [[B]], i32 0, i32 0
317 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
318 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X8X4_T]], ptr [[B]], i32 0, i32 0
319 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
320 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
321 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
322 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
323 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
324 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
325 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
326 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3
327 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
328 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
329 // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
330 // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
331 // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
333 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
334 return vtbx4_s8(a, b, c);
337 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_s8
338 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
339 // CHECK-NEXT: entry:
340 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
341 // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
343 int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, uint8x8_t c) {
344 return vqtbx1_s8(a, b, c);
347 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_s8
348 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
349 // CHECK-NEXT: entry:
350 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
351 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
352 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
353 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
354 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
355 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
356 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
357 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
358 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
359 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
360 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
361 // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
363 int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, uint8x8_t c) {
364 return vqtbx2_s8(a, b, c);
367 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_s8
368 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
369 // CHECK-NEXT: entry:
370 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
371 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
372 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
373 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
374 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
375 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
376 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
377 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
378 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
379 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
380 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
381 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
382 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
383 // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
385 int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, uint8x8_t c) {
386 return vqtbx3_s8(a, b, c);
389 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_s8
390 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
391 // CHECK-NEXT: entry:
392 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
393 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
394 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
395 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
396 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
397 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
398 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
399 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
400 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
401 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
402 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
403 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
404 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
405 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
406 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
407 // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
409 int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, uint8x8_t c) {
410 return vqtbx4_s8(a, b, c);
413 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_s8
414 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
415 // CHECK-NEXT: entry:
416 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
417 // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
419 int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, uint8x16_t c) {
420 return vqtbx1q_s8(a, b, c);
423 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_s8
424 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
425 // CHECK-NEXT: entry:
426 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
427 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
428 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
429 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
430 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
431 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
432 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
433 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
434 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
435 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
436 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
437 // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
439 int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
440 return vqtbx2q_s8(a, b, c);
443 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_s8
444 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
445 // CHECK-NEXT: entry:
446 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
447 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
448 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
449 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
450 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
451 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
452 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
453 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
454 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
455 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
456 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
457 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
458 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
459 // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
461 int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
462 return vqtbx3q_s8(a, b, c);
465 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_s8
466 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
467 // CHECK-NEXT: entry:
468 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
469 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
470 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
471 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
472 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
473 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
474 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
475 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
476 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
477 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
478 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
479 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
480 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
481 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
482 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
483 // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
485 int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
486 return vqtbx4q_s8(a, b, c);
489 // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_u8
490 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
491 // CHECK-NEXT: entry:
492 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
493 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
494 // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
496 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
497 return vtbl1_u8(a, b);
500 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_u8
501 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
502 // CHECK-NEXT: entry:
503 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
504 // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
506 uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
507 return vqtbl1_u8(a, b);
510 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_u8
511 // CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
512 // CHECK-NEXT: entry:
513 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
514 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
515 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X2_T]], ptr [[A]], i32 0, i32 0
516 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
517 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X2_T]], ptr [[A]], i32 0, i32 0
518 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
519 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
520 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
521 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
522 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
523 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
524 // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
525 // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
527 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
528 return vtbl2_u8(a, b);
531 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_u8
532 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
533 // CHECK-NEXT: entry:
534 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
535 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
536 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
537 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
538 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
539 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
540 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
541 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
542 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
543 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
544 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
545 // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
547 uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
548 return vqtbl2_u8(a, b);
551 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_u8
552 // CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
553 // CHECK-NEXT: entry:
554 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
555 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
556 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X3_T]], ptr [[A]], i32 0, i32 0
557 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
558 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X3_T]], ptr [[A]], i32 0, i32 0
559 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
560 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
561 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
562 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
563 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
564 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
565 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
566 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
567 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
568 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
569 // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
571 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
572 return vtbl3_u8(a, b);
575 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_u8
576 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
577 // CHECK-NEXT: entry:
578 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
579 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
580 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
581 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
582 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
583 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
584 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
585 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
586 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
587 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
588 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
589 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
590 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
591 // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
593 uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
594 return vqtbl3_u8(a, b);
597 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_u8
598 // CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
599 // CHECK-NEXT: entry:
600 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
601 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
602 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X4_T]], ptr [[A]], i32 0, i32 0
603 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
604 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X4_T]], ptr [[A]], i32 0, i32 0
605 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
606 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
607 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
608 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
609 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
610 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
611 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
612 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3
613 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
614 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
615 // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
616 // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
617 // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
619 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
620 return vtbl4_u8(a, b);
623 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_u8
624 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
625 // CHECK-NEXT: entry:
626 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
627 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
628 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
629 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
630 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
631 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
632 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
633 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
634 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
635 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
636 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
637 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
638 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
639 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
640 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
641 // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
643 uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
644 return vqtbl4_u8(a, b);
647 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_u8
648 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
649 // CHECK-NEXT: entry:
650 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
651 // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
653 uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
654 return vqtbl1q_u8(a, b);
657 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_u8
658 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
659 // CHECK-NEXT: entry:
660 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
661 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
662 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
663 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
664 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
665 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
666 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
667 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
668 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
669 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
670 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
671 // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
673 uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
674 return vqtbl2q_u8(a, b);
677 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_u8
678 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
679 // CHECK-NEXT: entry:
680 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
681 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
682 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
683 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
684 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
685 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
686 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
687 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
688 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
689 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
690 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
691 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
692 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
693 // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
695 uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
696 return vqtbl3q_u8(a, b);
699 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_u8
700 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
701 // CHECK-NEXT: entry:
702 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
703 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
704 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
705 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
706 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
707 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
708 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
709 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
710 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
711 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
712 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
713 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
714 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
715 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
716 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
717 // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
719 uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
720 return vqtbl4q_u8(a, b);
723 // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_u8
724 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
725 // CHECK-NEXT: entry:
726 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
727 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
728 // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8)
729 // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
730 // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
731 // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1)
732 // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
733 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
734 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
736 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
737 return vtbx1_u8(a, b, c);
740 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_u8
741 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
742 // CHECK-NEXT: entry:
743 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
744 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
745 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X2_T]], ptr [[B]], i32 0, i32 0
746 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
747 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X2_T]], ptr [[B]], i32 0, i32 0
748 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
749 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
750 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
751 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
752 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
753 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
754 // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
755 // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
757 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
758 return vtbx2_u8(a, b, c);
761 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_u8
762 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
763 // CHECK-NEXT: entry:
764 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
765 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
766 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X3_T]], ptr [[B]], i32 0, i32 0
767 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
768 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X3_T]], ptr [[B]], i32 0, i32 0
769 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
770 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
771 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
772 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
773 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
774 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
775 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
776 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
777 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
778 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
779 // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24)
780 // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
781 // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
782 // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1)
783 // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
784 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
785 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
787 uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
788 return vtbx3_u8(a, b, c);
791 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_u8
792 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
793 // CHECK-NEXT: entry:
794 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
795 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
796 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X4_T]], ptr [[B]], i32 0, i32 0
797 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
798 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X8X4_T]], ptr [[B]], i32 0, i32 0
799 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
800 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
801 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
802 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
803 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
804 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
805 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
806 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3
807 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
808 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
809 // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
810 // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
811 // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
813 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
814 return vtbx4_u8(a, b, c);
817 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_u8
818 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
819 // CHECK-NEXT: entry:
820 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
821 // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
823 uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
824 return vqtbx1_u8(a, b, c);
827 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_u8
828 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
829 // CHECK-NEXT: entry:
830 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
831 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
832 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
833 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
834 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
835 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
836 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
837 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
838 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
839 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
840 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
841 // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
843 uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
844 return vqtbx2_u8(a, b, c);
847 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_u8
848 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
849 // CHECK-NEXT: entry:
850 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
851 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
852 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
853 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
854 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
855 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
856 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
857 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
858 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
859 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
860 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
861 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
862 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
863 // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
865 uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
866 return vqtbx3_u8(a, b, c);
869 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_u8
870 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
871 // CHECK-NEXT: entry:
872 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
873 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
874 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
875 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
876 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
877 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
878 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
879 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
880 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
881 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
882 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
883 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
884 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
885 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
886 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
887 // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
889 uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
890 return vqtbx4_u8(a, b, c);
893 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_u8
894 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
895 // CHECK-NEXT: entry:
896 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
897 // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
899 uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
900 return vqtbx1q_u8(a, b, c);
903 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_u8
904 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
905 // CHECK-NEXT: entry:
906 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
907 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
908 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
909 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
910 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
911 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
912 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
913 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
914 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
915 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
916 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
917 // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
919 uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
920 return vqtbx2q_u8(a, b, c);
923 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_u8
924 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
925 // CHECK-NEXT: entry:
926 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
927 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
928 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
929 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
930 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
931 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
932 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
933 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
934 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
935 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
936 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
937 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
938 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
939 // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
941 uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
942 return vqtbx3q_u8(a, b, c);
945 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_u8
946 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
947 // CHECK-NEXT: entry:
948 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
949 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
950 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
951 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
952 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
953 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
954 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
955 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
956 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
957 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
958 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
959 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
960 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
961 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
962 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
963 // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
965 uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
966 return vqtbx4q_u8(a, b, c);
969 // CHECK-LABEL: define {{[^@]+}}@test_vtbl1_p8
970 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
971 // CHECK-NEXT: entry:
972 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
973 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
974 // CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
976 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
977 return vtbl1_p8(a, b);
980 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_p8
981 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
982 // CHECK-NEXT: entry:
983 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
984 // CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
986 poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
987 return vqtbl1_p8(a, b);
990 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_p8
991 // CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
992 // CHECK-NEXT: entry:
993 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
994 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
995 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X2_T]], ptr [[A]], i32 0, i32 0
996 // CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
997 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X2_T]], ptr [[A]], i32 0, i32 0
998 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
999 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
1000 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
1001 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
1002 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
1003 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1004 // CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
1005 // CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
1007 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
1008 return vtbl2_p8(a, b);
1011 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_p8
1012 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
1013 // CHECK-NEXT: entry:
1014 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
1015 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
1016 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
1017 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
1018 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
1019 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1020 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
1021 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
1022 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
1023 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1024 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
1025 // CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
1027 poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
1028 return vqtbl2_p8(a, b);
1031 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_p8
1032 // CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
1033 // CHECK-NEXT: entry:
1034 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
1035 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
1036 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X3_T]], ptr [[A]], i32 0, i32 0
1037 // CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
1038 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X3_T]], ptr [[A]], i32 0, i32 0
1039 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
1040 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
1041 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
1042 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
1043 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
1044 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
1045 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
1046 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1047 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1048 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
1049 // CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
1051 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
1052 return vtbl3_p8(a, b);
1055 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_p8
1056 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
1057 // CHECK-NEXT: entry:
1058 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
1059 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
1060 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
1061 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
1062 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
1063 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1064 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
1065 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
1066 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
1067 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1068 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
1069 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
1070 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
1071 // CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
1073 poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
1074 return vqtbl3_p8(a, b);
1077 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_p8
1078 // CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
1079 // CHECK-NEXT: entry:
1080 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
1081 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
1082 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X4_T]], ptr [[A]], i32 0, i32 0
1083 // CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
1084 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X4_T]], ptr [[A]], i32 0, i32 0
1085 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
1086 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
1087 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
1088 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
1089 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
1090 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
1091 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
1092 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3
1093 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
1094 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1095 // CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1096 // CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
1097 // CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
1099 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
1100 return vtbl4_p8(a, b);
1103 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_p8
1104 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
1105 // CHECK-NEXT: entry:
1106 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
1107 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
1108 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
1109 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
1110 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
1111 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1112 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
1113 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
1114 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
1115 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1116 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
1117 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
1118 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
1119 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
1120 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
1121 // CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
1123 poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
1124 return vqtbl4_p8(a, b);
1127 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_p8
1128 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
1129 // CHECK-NEXT: entry:
1130 // CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
1131 // CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
1133 poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
1134 return vqtbl1q_p8(a, b);
1137 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_p8
1138 // CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
1139 // CHECK-NEXT: entry:
1140 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
1141 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
1142 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
1143 // CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
1144 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
1145 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1146 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
1147 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
1148 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
1149 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1150 // CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
1151 // CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
1153 poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
1154 return vqtbl2q_p8(a, b);
1157 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_p8
1158 // CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
1159 // CHECK-NEXT: entry:
1160 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
1161 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
1162 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
1163 // CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
1164 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
1165 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1166 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
1167 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
1168 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
1169 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1170 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
1171 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
1172 // CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
1173 // CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
1175 poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
1176 return vqtbl3q_p8(a, b);
1179 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_p8
1180 // CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
1181 // CHECK-NEXT: entry:
1182 // CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
1183 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
1184 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
1185 // CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
1186 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
1187 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1188 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
1189 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
1190 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
1191 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1192 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
1193 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
1194 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
1195 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
1196 // CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
1197 // CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
1199 poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
1200 return vqtbl4q_p8(a, b);
1203 // CHECK-LABEL: define {{[^@]+}}@test_vtbx1_p8
1204 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1205 // CHECK-NEXT: entry:
1206 // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1207 // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
1208 // CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8)
1209 // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
1210 // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
1211 // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1)
1212 // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
1213 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
1214 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
1216 poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
1217 return vtbx1_p8(a, b, c);
1220 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_p8
1221 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1222 // CHECK-NEXT: entry:
1223 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
1224 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
1225 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X2_T]], ptr [[B]], i32 0, i32 0
1226 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
1227 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X2_T]], ptr [[B]], i32 0, i32 0
1228 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
1229 // CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
1230 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
1231 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
1232 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
1233 // CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1234 // CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
1235 // CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
1237 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
1238 return vtbx2_p8(a, b, c);
1241 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_p8
1242 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1243 // CHECK-NEXT: entry:
1244 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
1245 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
1246 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X3_T]], ptr [[B]], i32 0, i32 0
1247 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
1248 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X3_T]], ptr [[B]], i32 0, i32 0
1249 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
1250 // CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
1251 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
1252 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
1253 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
1254 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
1255 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
1256 // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1257 // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1258 // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
1259 // CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24)
1260 // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
1261 // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
1262 // CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1)
1263 // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
1264 // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
1265 // CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
1267 poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
1268 return vtbx3_p8(a, b, c);
1271 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_p8
1272 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1273 // CHECK-NEXT: entry:
1274 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
1275 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
1276 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X4_T]], ptr [[B]], i32 0, i32 0
1277 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
1278 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X8X4_T]], ptr [[B]], i32 0, i32 0
1279 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
1280 // CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
1281 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
1282 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
1283 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
1284 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
1285 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
1286 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3
1287 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
1288 // CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1289 // CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1290 // CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
1291 // CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
1293 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
1294 return vtbx4_p8(a, b, c);
1297 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_p8
1298 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1299 // CHECK-NEXT: entry:
1300 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
1301 // CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
1303 poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
1304 return vqtbx1_p8(a, b, c);
1307 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_p8
1308 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1309 // CHECK-NEXT: entry:
1310 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
1311 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
1312 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
1313 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
1314 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
1315 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1316 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
1317 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
1318 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
1319 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1320 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
1321 // CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
1323 poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
1324 return vqtbx2_p8(a, b, c);
1327 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_p8
1328 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1329 // CHECK-NEXT: entry:
1330 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
1331 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
1332 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
1333 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
1334 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
1335 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1336 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
1337 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
1338 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
1339 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1340 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
1341 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
1342 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
1343 // CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
1345 poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
1346 return vqtbx3_p8(a, b, c);
1349 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_p8
1350 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1351 // CHECK-NEXT: entry:
1352 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
1353 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
1354 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
1355 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
1356 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
1357 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1358 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
1359 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
1360 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
1361 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1362 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
1363 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
1364 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
1365 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
1366 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
1367 // CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
1369 poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
1370 return vqtbx4_p8(a, b, c);
1373 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_p8
1374 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1375 // CHECK-NEXT: entry:
1376 // CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
1377 // CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
1379 poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
1380 return vqtbx1q_p8(a, b, c);
1383 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_p8
1384 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1385 // CHECK-NEXT: entry:
1386 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
1387 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
1388 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
1389 // CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
1390 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
1391 // CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1392 // CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
1393 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
1394 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
1395 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1396 // CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
1397 // CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
1399 poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
1400 return vqtbx2q_p8(a, b, c);
1403 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_p8
1404 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1405 // CHECK-NEXT: entry:
1406 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
1407 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
1408 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
1409 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
1410 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
1411 // CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1412 // CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
1413 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
1414 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
1415 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1416 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
1417 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
1418 // CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
1419 // CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
1421 poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
1422 return vqtbx3q_p8(a, b, c);
1425 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_p8
1426 // CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
1427 // CHECK-NEXT: entry:
1428 // CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
1429 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
1430 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
1431 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
1432 // CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
1433 // CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
1434 // CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
1435 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
1436 // CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
1437 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
1438 // CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
1439 // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
1440 // CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
1441 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
1442 // CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
1443 // CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
1445 poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
1446 return vqtbx4q_p8(a, b, c);