[AMDGPU] Update base addr of dyn alloca considering GrowingUp stack (#119822)
[llvm-project.git] / clang / test / CodeGen / AArch64 / v8.2a-neon-intrinsics-generic.c
blob4d2ef318005bd311f906620f4457764021c20766
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
2 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature -fullfp16 -target-feature +v8a\
3 // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
4 // RUN: | opt -S -passes=mem2reg \
5 // RUN: | FileCheck %s
6 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
7 // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
8 // RUN: | opt -S -passes=mem2reg \
9 // RUN: | FileCheck %s
11 // REQUIRES: aarch64-registered-target
13 #include <arm_neon.h>
15 // CHECK-LABEL: define {{[^@]+}}@test_vbsl_f16
16 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
17 // CHECK-NEXT: entry:
18 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
19 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
20 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
21 // CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
22 // CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
23 // CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]]
24 // CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], splat (i16 -1)
25 // CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
26 // CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
27 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half>
28 // CHECK-NEXT: ret <4 x half> [[TMP4]]
30 float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
31 return vbsl_f16(a, b, c);
34 // CHECK-LABEL: define {{[^@]+}}@test_vbslq_f16
35 // CHECK-SAME: (<8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
36 // CHECK-NEXT: entry:
37 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
38 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
39 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
40 // CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
41 // CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
42 // CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]]
43 // CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], splat (i16 -1)
44 // CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
45 // CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
46 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half>
47 // CHECK-NEXT: ret <8 x half> [[TMP4]]
49 float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
50 return vbslq_f16(a, b, c);
53 // CHECK-LABEL: define {{[^@]+}}@test_vzip_f16
54 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
55 // CHECK-NEXT: entry:
56 // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
57 // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
58 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
59 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
60 // CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
61 // CHECK-NEXT: store <4 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 8
62 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
63 // CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
64 // CHECK-NEXT: store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 8
65 // CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
66 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
67 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
68 // CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
69 // CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
70 // CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
72 float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
73 return vzip_f16(a, b);
76 // CHECK-LABEL: define {{[^@]+}}@test_vzipq_f16
77 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
78 // CHECK-NEXT: entry:
79 // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
80 // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
81 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
82 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
83 // CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
84 // CHECK-NEXT: store <8 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 16
85 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
86 // CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
87 // CHECK-NEXT: store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 16
88 // CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
89 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
90 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
91 // CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
92 // CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
93 // CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
95 float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
96 return vzipq_f16(a, b);
99 // CHECK-LABEL: define {{[^@]+}}@test_vuzp_f16
100 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
101 // CHECK-NEXT: entry:
102 // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
103 // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
104 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
105 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
106 // CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
107 // CHECK-NEXT: store <4 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 8
108 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
109 // CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
110 // CHECK-NEXT: store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 8
111 // CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
112 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
113 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
114 // CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
115 // CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
116 // CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
118 float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
119 return vuzp_f16(a, b);
122 // CHECK-LABEL: define {{[^@]+}}@test_vuzpq_f16
123 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
124 // CHECK-NEXT: entry:
125 // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
126 // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
127 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
128 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
129 // CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
130 // CHECK-NEXT: store <8 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 16
131 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
132 // CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
133 // CHECK-NEXT: store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 16
134 // CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
135 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
136 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
137 // CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
138 // CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
139 // CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
141 float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
142 return vuzpq_f16(a, b);
145 // CHECK-LABEL: define {{[^@]+}}@test_vtrn_f16
146 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
147 // CHECK-NEXT: entry:
148 // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
149 // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
150 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
151 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
152 // CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
153 // CHECK-NEXT: store <4 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 8
154 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
155 // CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
156 // CHECK-NEXT: store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 8
157 // CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
158 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
159 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
160 // CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
161 // CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
162 // CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
164 float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
165 return vtrn_f16(a, b);
168 // CHECK-LABEL: define {{[^@]+}}@test_vtrnq_f16
169 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
170 // CHECK-NEXT: entry:
171 // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
172 // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
173 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
174 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
175 // CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
176 // CHECK-NEXT: store <8 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 16
177 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
178 // CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
179 // CHECK-NEXT: store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 16
180 // CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
181 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
182 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
183 // CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
184 // CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
185 // CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
187 float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
188 return vtrnq_f16(a, b);
191 // CHECK-LABEL: define {{[^@]+}}@test_vmov_n_f16
192 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
193 // CHECK-NEXT: entry:
194 // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
195 // CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
196 // CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
197 // CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
198 // CHECK-NEXT: ret <4 x half> [[VECINIT3]]
200 float16x4_t test_vmov_n_f16(float16_t a) {
201 return vmov_n_f16(a);
204 // CHECK-LABEL: define {{[^@]+}}@test_vmovq_n_f16
205 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
206 // CHECK-NEXT: entry:
207 // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
208 // CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
209 // CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
210 // CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
211 // CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
212 // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
213 // CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
214 // CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
215 // CHECK-NEXT: ret <8 x half> [[VECINIT7]]
217 float16x8_t test_vmovq_n_f16(float16_t a) {
218 return vmovq_n_f16(a);
221 // CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16
222 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
223 // CHECK-NEXT: entry:
224 // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
225 // CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
226 // CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
227 // CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
228 // CHECK-NEXT: ret <4 x half> [[VECINIT3]]
230 float16x4_t test_vdup_n_f16(float16_t a) {
231 return vdup_n_f16(a);
234 // CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16
235 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
236 // CHECK-NEXT: entry:
237 // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
238 // CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
239 // CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
240 // CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
241 // CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
242 // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
243 // CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
244 // CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
245 // CHECK-NEXT: ret <8 x half> [[VECINIT7]]
247 float16x8_t test_vdupq_n_f16(float16_t a) {
248 return vdupq_n_f16(a);
251 // CHECK-LABEL: define {{[^@]+}}@test_vdup_lane_f16
252 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
253 // CHECK-NEXT: entry:
254 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
255 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
256 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
257 // CHECK-NEXT: ret <4 x half> [[LANE]]
259 float16x4_t test_vdup_lane_f16(float16x4_t a) {
260 return vdup_lane_f16(a, 3);
263 // CHECK-LABEL: define {{[^@]+}}@test_vdupq_lane_f16
264 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
265 // CHECK-NEXT: entry:
266 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
267 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
268 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
269 // CHECK-NEXT: ret <8 x half> [[LANE]]
271 float16x8_t test_vdupq_lane_f16(float16x4_t a) {
272 return vdupq_lane_f16(a, 3);
275 // CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16
276 // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
277 // CHECK-NEXT: entry:
278 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
279 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
280 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
281 // CHECK-NEXT: ret <4 x half> [[LANE]]
283 float16x4_t test_vdup_laneq_f16(float16x8_t a) {
284 return vdup_laneq_f16(a, 1);
287 // CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16
288 // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
289 // CHECK-NEXT: entry:
290 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
291 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
292 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
293 // CHECK-NEXT: ret <8 x half> [[LANE]]
295 float16x8_t test_vdupq_laneq_f16(float16x8_t a) {
296 return vdupq_laneq_f16(a, 7);
299 // CHECK-LABEL: define {{[^@]+}}@test_vext_f16
300 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
301 // CHECK-NEXT: entry:
302 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
303 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
304 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
305 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
306 // CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
307 // CHECK-NEXT: ret <4 x half> [[VEXT]]
309 float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
310 return vext_f16(a, b, 2);
313 // CHECK-LABEL: define {{[^@]+}}@test_vextq_f16
314 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
315 // CHECK-NEXT: entry:
316 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
317 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
318 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
319 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
320 // CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
321 // CHECK-NEXT: ret <8 x half> [[VEXT]]
323 float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
324 return vextq_f16(a, b, 5);
327 // CHECK-LABEL: define {{[^@]+}}@test_vrev64_f16
328 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
329 // CHECK-NEXT: entry:
330 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
331 // CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]]
333 float16x4_t test_vrev64_f16(float16x4_t a) {
334 return vrev64_f16(a);
337 // CHECK-LABEL: define {{[^@]+}}@test_vrev64q_f16
338 // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
339 // CHECK-NEXT: entry:
340 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
341 // CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]]
343 float16x8_t test_vrev64q_f16(float16x8_t a) {
344 return vrev64q_f16(a);
347 // CHECK-LABEL: define {{[^@]+}}@test_vzip1_f16
348 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
349 // CHECK-NEXT: entry:
350 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
351 // CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]]
353 float16x4_t test_vzip1_f16(float16x4_t a, float16x4_t b) {
354 return vzip1_f16(a, b);
357 // CHECK-LABEL: define {{[^@]+}}@test_vzip1q_f16
358 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
359 // CHECK-NEXT: entry:
360 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
361 // CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]]
363 float16x8_t test_vzip1q_f16(float16x8_t a, float16x8_t b) {
364 return vzip1q_f16(a, b);
367 // CHECK-LABEL: define {{[^@]+}}@test_vzip2_f16
368 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
369 // CHECK-NEXT: entry:
370 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
371 // CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]]
373 float16x4_t test_vzip2_f16(float16x4_t a, float16x4_t b) {
374 return vzip2_f16(a, b);
377 // CHECK-LABEL: define {{[^@]+}}@test_vzip2q_f16
378 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
379 // CHECK-NEXT: entry:
380 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
381 // CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]]
383 float16x8_t test_vzip2q_f16(float16x8_t a, float16x8_t b) {
384 return vzip2q_f16(a, b);
387 // CHECK-LABEL: define {{[^@]+}}@test_vuzp1_f16
388 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
389 // CHECK-NEXT: entry:
390 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
391 // CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]]
393 float16x4_t test_vuzp1_f16(float16x4_t a, float16x4_t b) {
394 return vuzp1_f16(a, b);
397 // CHECK-LABEL: define {{[^@]+}}@test_vuzp1q_f16
398 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
399 // CHECK-NEXT: entry:
400 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
401 // CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]]
403 float16x8_t test_vuzp1q_f16(float16x8_t a, float16x8_t b) {
404 return vuzp1q_f16(a, b);
407 // CHECK-LABEL: define {{[^@]+}}@test_vuzp2_f16
408 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
409 // CHECK-NEXT: entry:
410 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
411 // CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]]
413 float16x4_t test_vuzp2_f16(float16x4_t a, float16x4_t b) {
414 return vuzp2_f16(a, b);
417 // CHECK-LABEL: define {{[^@]+}}@test_vuzp2q_f16
418 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
419 // CHECK-NEXT: entry:
420 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
421 // CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]]
423 float16x8_t test_vuzp2q_f16(float16x8_t a, float16x8_t b) {
424 return vuzp2q_f16(a, b);
427 // CHECK-LABEL: define {{[^@]+}}@test_vtrn1_f16
428 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
429 // CHECK-NEXT: entry:
430 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
431 // CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]]
433 float16x4_t test_vtrn1_f16(float16x4_t a, float16x4_t b) {
434 return vtrn1_f16(a, b);
437 // CHECK-LABEL: define {{[^@]+}}@test_vtrn1q_f16
438 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
439 // CHECK-NEXT: entry:
440 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
441 // CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]]
443 float16x8_t test_vtrn1q_f16(float16x8_t a, float16x8_t b) {
444 return vtrn1q_f16(a, b);
447 // CHECK-LABEL: define {{[^@]+}}@test_vtrn2_f16
448 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
449 // CHECK-NEXT: entry:
450 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
451 // CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]]
453 float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) {
454 return vtrn2_f16(a, b);
457 // CHECK-LABEL: define {{[^@]+}}@test_vtrn2q_f16
458 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
459 // CHECK-NEXT: entry:
460 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
461 // CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]]
463 float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) {
464 return vtrn2q_f16(a, b);
467 // CHECK-LABEL: define {{[^@]+}}@test_vduph_laneq_f16
468 // CHECK-SAME: (<8 x half> noundef [[VEC:%.*]]) #[[ATTR0]] {
469 // CHECK-NEXT: entry:
470 // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7
471 // CHECK-NEXT: ret half [[VGETQ_LANE]]
473 float16_t test_vduph_laneq_f16(float16x8_t vec) {
474 return vduph_laneq_f16(vec, 7);
477 // CHECK-LABEL: define {{[^@]+}}@test_vduph_lane_f16
478 // CHECK-SAME: (<4 x half> noundef [[VEC:%.*]]) #[[ATTR0]] {
479 // CHECK-NEXT: entry:
480 // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3
481 // CHECK-NEXT: ret half [[VGET_LANE]]
483 float16_t test_vduph_lane_f16(float16x4_t vec) {
484 return vduph_lane_f16(vec, 3);