[AMDGPU] Update base addr of dyn alloca considering GrowingUp stack (#119822)
[llvm-project.git] / clang / test / CodeGen / AArch64 / neon-luti.c
blob4b485636d45b165a6202ada03c56e9ab63359c49
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2 // REQUIRES: aarch64-registered-target
3 #include <arm_neon.h>
4 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -O3 -emit-llvm -o - %s | FileCheck %s
5 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
7 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_u8(
8 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
9 // CHECK-NEXT: entry:
10 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
11 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]]
13 uint8x16_t test_vluti2_lane_u8(uint8x8_t vn, uint8x8_t vm) {
14 return vluti2_lane_u8(vn, vm, 0);
17 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_u8(
18 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
19 // CHECK-NEXT: entry:
20 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
21 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]]
23 uint8x16_t test_vluti2_laneq_u8(uint8x8_t vn, uint8x16_t vm) {
24 return vluti2_laneq_u8(vn, vm, 0);
27 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_u8(
28 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
29 // CHECK-NEXT: entry:
30 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
31 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]]
33 uint8x16_t test_vluti2q_lane_u8(uint8x16_t vn, uint8x8_t vm) {
34 return vluti2q_lane_u8(vn, vm, 1);
37 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_u8(
38 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
39 // CHECK-NEXT: entry:
40 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
41 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]]
43 uint8x16_t test_vluti2q_laneq_u8(uint8x16_t vn, uint8x16_t vm) {
44 return vluti2q_laneq_u8(vn, vm, 3);
47 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_s8(
48 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
49 // CHECK-NEXT: entry:
50 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
51 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]]
53 int8x16_t test_vluti2_lane_s8(int8x8_t vn, uint8x8_t vm) {
54 return vluti2_lane_s8(vn, vm, 0);
57 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_s8(
58 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
59 // CHECK-NEXT: entry:
60 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
61 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]]
63 int8x16_t test_vluti2_laneq_s8(int8x8_t vn, uint8x16_t vm) {
64 return vluti2_laneq_s8(vn, vm, 0);
67 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_s8(
68 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
69 // CHECK-NEXT: entry:
70 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
71 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]]
73 int8x16_t test_vluti2q_lane_s8(int8x16_t vn, uint8x8_t vm) {
74 return vluti2q_lane_s8(vn, vm, 1);
77 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_s8(
78 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
79 // CHECK-NEXT: entry:
80 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
81 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]]
83 int8x16_t test_vluti2q_laneq_s8(int8x16_t vn, uint8x16_t vm) {
84 return vluti2q_laneq_s8(vn, vm, 3);
87 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_p8(
88 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
89 // CHECK-NEXT: entry:
90 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
91 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]]
93 poly8x16_t test_vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm) {
94 return vluti2_lane_p8(vn, vm, 0);
97 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_p8(
98 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
99 // CHECK-NEXT: entry:
100 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
101 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]]
103 poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) {
104 return vluti2_laneq_p8(vn, vm, 0);
107 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_p8(
108 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
109 // CHECK-NEXT: entry:
110 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
111 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]]
113 poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) {
114 return vluti2q_lane_p8(vn, vm, 1);
117 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_p8(
118 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
119 // CHECK-NEXT: entry:
120 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
121 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]]
123 poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) {
124 return vluti2q_laneq_p8(vn, vm, 3);
127 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_u16(
128 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
129 // CHECK-NEXT: entry:
130 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0)
131 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]]
133 uint16x8_t test_vluti2_lane_u16(uint16x4_t vn, uint8x8_t vm) {
134 return vluti2_lane_u16(vn, vm, 0);
137 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_u16(
138 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
139 // CHECK-NEXT: entry:
140 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0)
141 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]]
143 uint16x8_t test_vluti2_laneq_u16(uint16x4_t vn, uint8x16_t vm) {
144 return vluti2_laneq_u16(vn, vm, 0);
147 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_u16(
148 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
149 // CHECK-NEXT: entry:
150 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3)
151 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]]
153 uint16x8_t test_vluti2q_lane_u16(uint16x8_t vn, uint8x8_t vm) {
154 return vluti2q_lane_u16(vn, vm, 3);
157 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_u16(
158 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
159 // CHECK-NEXT: entry:
160 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7)
161 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]]
163 uint16x8_t test_vluti2q_laneq_u16(uint16x8_t vn, uint8x16_t vm) {
164 return vluti2q_laneq_u16(vn, vm, 7);
167 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_s16(
168 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
169 // CHECK-NEXT: entry:
170 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0)
171 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]]
173 int16x8_t test_vluti2_lane_s16(int16x4_t vn, uint8x8_t vm) {
174 return vluti2_lane_s16(vn, vm, 0);
177 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_s16(
178 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
179 // CHECK-NEXT: entry:
180 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0)
181 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]]
183 int16x8_t test_vluti2_laneq_s16(int16x4_t vn, uint8x16_t vm) {
184 return vluti2_laneq_s16(vn, vm, 0);
187 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_s16(
188 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
189 // CHECK-NEXT: entry:
190 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3)
191 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]]
193 int16x8_t test_vluti2q_lane_s16(int16x8_t vn, uint8x8_t vm) {
194 return vluti2q_lane_s16(vn, vm, 3);
197 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_s16(
198 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
199 // CHECK-NEXT: entry:
200 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7)
201 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]]
203 int16x8_t test_vluti2q_laneq_s16(int16x8_t vn, uint8x16_t vm) {
204 return vluti2q_laneq_s16(vn, vm, 7);
207 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2_lane_f16(
208 // CHECK-SAME: <4 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
209 // CHECK-NEXT: entry:
210 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v4f16(<4 x half> [[VN]], <8 x i8> [[VM]], i32 0)
211 // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANE1]]
213 float16x8_t test_vluti2_lane_f16(float16x4_t vn, uint8x8_t vm) {
214 return vluti2_lane_f16(vn, vm, 0);
217 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2_laneq_f16(
218 // CHECK-SAME: <4 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
219 // CHECK-NEXT: entry:
220 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v4f16(<4 x half> [[VN]], <16 x i8> [[VM]], i32 0)
221 // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANEQ1]]
223 float16x8_t test_vluti2_laneq_f16(float16x4_t vn, uint8x16_t vm) {
224 return vluti2_laneq_f16(vn, vm, 0);
227 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_lane_f16(
228 // CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
229 // CHECK-NEXT: entry:
230 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8f16(<8 x half> [[VN]], <8 x i8> [[VM]], i32 3)
231 // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANE1]]
233 float16x8_t test_vluti2q_lane_f16(float16x8_t vn, uint8x8_t vm) {
234 return vluti2q_lane_f16(vn, vm, 3);
237 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_laneq_f16(
238 // CHECK-SAME: <8 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
239 // CHECK-NEXT: entry:
240 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v8f16(<8 x half> [[VN]], <16 x i8> [[VM]], i32 7)
241 // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANEQ1]]
243 float16x8_t test_vluti2q_laneq_f16(float16x8_t vn, uint8x16_t vm) {
244 return vluti2q_laneq_f16(vn, vm, 7);
247 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_lane_bf16(
248 // CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
249 // CHECK-NEXT: entry:
250 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v4bf16(<4 x bfloat> [[VN]], <8 x i8> [[VM]], i32 0)
251 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANE1]]
253 bfloat16x8_t test_vluti2_lane_bf16(bfloat16x4_t vn, uint8x8_t vm) {
254 return vluti2_lane_bf16(vn, vm, 0);
257 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_laneq_bf16(
258 // CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
259 // CHECK-NEXT: entry:
260 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v4bf16(<4 x bfloat> [[VN]], <16 x i8> [[VM]], i32 0)
261 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANEQ1]]
263 bfloat16x8_t test_vluti2_laneq_bf16(bfloat16x4_t vn, uint8x16_t vm) {
264 return vluti2_laneq_bf16(vn, vm, 0);
267 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_lane_bf16(
268 // CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
269 // CHECK-NEXT: entry:
270 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8bf16(<8 x bfloat> [[VN]], <8 x i8> [[VM]], i32 3)
271 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANE1]]
273 bfloat16x8_t test_vluti2q_lane_bf16(bfloat16x8_t vn, uint8x8_t vm) {
274 return vluti2q_lane_bf16(vn, vm, 3);
277 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_laneq_bf16(
278 // CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
279 // CHECK-NEXT: entry:
280 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v8bf16(<8 x bfloat> [[VN]], <16 x i8> [[VM]], i32 7)
281 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANEQ1]]
283 bfloat16x8_t test_vluti2q_laneq_bf16(bfloat16x8_t vn, uint8x16_t vm) {
284 return vluti2q_laneq_bf16(vn, vm, 7);
287 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_p16(
288 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
289 // CHECK-NEXT: entry:
290 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0)
291 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]]
293 poly16x8_t test_vluti2_lane_p16(poly16x4_t vn, uint8x8_t vm) {
294 return vluti2_lane_p16(vn, vm, 0);
297 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_p16(
298 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
299 // CHECK-NEXT: entry:
300 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0)
301 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]]
303 poly16x8_t test_vluti2_laneq_p16(poly16x4_t vn, uint8x16_t vm) {
304 return vluti2_laneq_p16(vn, vm, 0);
307 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_p16(
308 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
309 // CHECK-NEXT: entry:
310 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3)
311 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]]
313 poly16x8_t test_vluti2q_lane_p16(poly16x8_t vn, uint8x8_t vm) {
314 return vluti2q_lane_p16(vn, vm, 3);
317 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_p16(
318 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
319 // CHECK-NEXT: entry:
320 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7)
321 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]]
323 poly16x8_t test_vluti2q_laneq_p16(poly16x8_t vn, uint8x16_t vm) {
324 return vluti2q_laneq_p16(vn, vm, 7);
329 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_u8(
330 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
331 // CHECK-NEXT: entry:
332 // CHECK-NEXT: [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
333 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANE]]
335 uint8x16_t test_vluti4q_lane_u8(uint8x16_t vn, uint8x8_t vm) {
336 return vluti4q_lane_u8(vn, vm, 0);
339 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_u8(
340 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
341 // CHECK-NEXT: entry:
342 // CHECK-NEXT: [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
343 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANEQ]]
345 uint8x16_t test_vluti4q_laneq_u8(uint8x16_t vn, uint8x16_t vm) {
346 return vluti4q_laneq_u8(vn, vm, 0);
349 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_s8(
350 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
351 // CHECK-NEXT: entry:
352 // CHECK-NEXT: [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
353 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANE]]
355 int8x16_t test_vluti4q_lane_s8(int8x16_t vn, uint8x8_t vm) {
356 return vluti4q_lane_s8(vn, vm, 0);
359 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_s8(
360 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
361 // CHECK-NEXT: entry:
362 // CHECK-NEXT: [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1)
363 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANEQ]]
365 int8x16_t test_vluti4q_laneq_s8(int8x16_t vn, uint8x16_t vm) {
366 return vluti4q_laneq_s8(vn, vm, 1);
369 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_p8(
370 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
371 // CHECK-NEXT: entry:
372 // CHECK-NEXT: [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
373 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANE]]
375 poly8x16_t test_vluti4q_lane_p8(poly8x16_t vn, uint8x8_t vm) {
376 return vluti4q_lane_p8(vn, vm, 0);
379 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_p8(
380 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
381 // CHECK-NEXT: entry:
382 // CHECK-NEXT: [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1)
383 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANEQ]]
385 poly8x16_t test_vluti4q_laneq_p8(poly8x16_t vn, uint8x16_t vm) {
386 return vluti4q_laneq_p8(vn, vm, 1);
389 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_u16_x2(
390 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
391 // CHECK-NEXT: entry:
392 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
393 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
394 // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 0)
395 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANE_X24]]
397 uint16x8_t test_vluti4q_lane_u16_x2(uint16x8x2_t vn, uint8x8_t vm) {
398 return vluti4q_lane_u16_x2(vn, vm, 0);
401 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_u16_x2(
402 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
403 // CHECK-NEXT: entry:
404 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
405 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
406 // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 0)
407 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANEQ_X24]]
409 uint16x8_t test_vluti4q_laneq_u16_x2(uint16x8x2_t vn, uint8x16_t vm) {
410 return vluti4q_laneq_u16_x2(vn, vm, 0);
413 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_s16_x2(
414 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
415 // CHECK-NEXT: entry:
416 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
417 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
418 // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
419 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANE_X24]]
421 int16x8_t test_vluti4q_lane_s16_x2(int16x8x2_t vn, uint8x8_t vm) {
422 return vluti4q_lane_s16_x2(vn, vm, 1);
425 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_s16_x2(
426 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
427 // CHECK-NEXT: entry:
428 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
429 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
430 // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 3)
431 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANEQ_X24]]
433 int16x8_t test_vluti4q_laneq_s16_x2(int16x8x2_t vn, uint8x16_t vm) {
434 return vluti4q_laneq_s16_x2(vn, vm, 3);
437 // CHECK-LABEL: define dso_local <8 x half> @test_vluti4q_lane_f16_x2(
438 // CHECK-SAME: [2 x <8 x half>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
439 // CHECK-NEXT: entry:
440 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 0
441 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 1
442 // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti4q.lane.x2.v8f16(<8 x half> [[VN_COERCE_FCA_0_EXTRACT]], <8 x half> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
443 // CHECK-NEXT: ret <8 x half> [[VLUTI4Q_LANE_X24]]
445 float16x8_t test_vluti4q_lane_f16_x2(float16x8x2_t vn, uint8x8_t vm) {
446 return vluti4q_lane_f16_x2(vn, vm, 1);
449 // CHECK-LABEL: define dso_local <8 x half> @test_vluti4q_laneq_f16_x2(
450 // CHECK-SAME: [2 x <8 x half>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
451 // CHECK-NEXT: entry:
452 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 0
453 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 1
454 // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti4q.laneq.x2.v8f16(<8 x half> [[VN_COERCE_FCA_0_EXTRACT]], <8 x half> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 1)
455 // CHECK-NEXT: ret <8 x half> [[VLUTI4Q_LANEQ_X24]]
457 float16x8_t test_vluti4q_laneq_f16_x2(float16x8x2_t vn, uint8x16_t vm) {
458 return vluti4q_laneq_f16_x2(vn, vm, 1);
461 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_lane_bf16_x2(
462 // CHECK-SAME: [2 x <8 x bfloat>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
463 // CHECK-NEXT: entry:
464 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0
465 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1
466 // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
467 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI4Q_LANE_X24]]
469 bfloat16x8_t test_vluti4q_lane_bf16_x2(bfloat16x8x2_t vn, uint8x8_t vm) {
470 return vluti4q_lane_bf16_x2(vn, vm, 1);
473 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_laneq_bf16_x2(
474 // CHECK-SAME: [2 x <8 x bfloat>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
475 // CHECK-NEXT: entry:
476 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0
477 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1
478 // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.laneq.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 2)
479 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI4Q_LANEQ_X24]]
481 bfloat16x8_t test_vluti4q_laneq_bf16_x2(bfloat16x8x2_t vn, uint8x16_t vm) {
482 return vluti4q_laneq_bf16_x2(vn, vm, 2);
485 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_p16_x2(
486 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
487 // CHECK-NEXT: entry:
488 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
489 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
490 // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 0)
491 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANE_X24]]
493 poly16x8_t test_vluti4q_lane_p16_x2(poly16x8x2_t vn, uint8x8_t vm) {
494 return vluti4q_lane_p16_x2(vn, vm, 0);
497 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_p16_x2(
498 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
499 // CHECK-NEXT: entry:
500 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
501 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
502 // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 0)
503 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANEQ_X24]]
505 poly16x8_t test_vluti4q_laneq_p16_x2(poly16x8x2_t vn, uint8x16_t vm) {
506 return vluti4q_laneq_p16_x2(vn, vm, 0);