Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / arm-bf16-reinterpret-intrinsics.c
blob9c93f3773559add76ec0103f7f3819e2118345c4
1 // RUN: %clang_cc1 -triple armv8.2a-arm-none-eabi -target-feature +neon -target-feature +bf16 -mfloat-abi hard \
2 // RUN: -disable-O0-optnone -S -emit-llvm -o - %s \
3 // RUN: | opt -S -passes=instcombine \
4 // RUN: | FileCheck %s
6 // REQUIRES: arm-registered-target
8 #include <arm_neon.h>
10 // CHECK-LABEL: @test_vreinterpret_bf16_s8(
11 // CHECK-NEXT: entry:
12 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A:%.*]] to <4 x bfloat>
13 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
15 bfloat16x4_t test_vreinterpret_bf16_s8(int8x8_t a) { return vreinterpret_bf16_s8(a); }
16 // CHECK-LABEL: @test_vreinterpret_bf16_s16(
17 // CHECK-NEXT: entry:
18 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <4 x bfloat>
19 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
21 bfloat16x4_t test_vreinterpret_bf16_s16(int16x4_t a) { return vreinterpret_bf16_s16(a); }
22 // CHECK-LABEL: @test_vreinterpret_bf16_s32(
23 // CHECK-NEXT: entry:
24 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <4 x bfloat>
25 // CHECK-NEXT: ret <4 x bfloat> [[A:%.*]]
27 bfloat16x4_t test_vreinterpret_bf16_s32(int32x2_t a) { return vreinterpret_bf16_s32(a); }
28 // CHECK-LABEL: @test_vreinterpret_bf16_f32(
29 // CHECK-NEXT: entry:
30 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <4 x bfloat>
31 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
33 bfloat16x4_t test_vreinterpret_bf16_f32(float32x2_t a) { return vreinterpret_bf16_f32(a); }
34 // CHECK-LABEL: @test_vreinterpret_bf16_u8(
35 // CHECK-NEXT: entry:
36 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A:%.*]] to <4 x bfloat>
37 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
39 bfloat16x4_t test_vreinterpret_bf16_u8(uint8x8_t a) { return vreinterpret_bf16_u8(a); }
40 // CHECK-LABEL: @test_vreinterpret_bf16_u16(
41 // CHECK-NEXT: entry:
42 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <4 x bfloat>
43 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
45 bfloat16x4_t test_vreinterpret_bf16_u16(uint16x4_t a) { return vreinterpret_bf16_u16(a); }
46 // CHECK-LABEL: @test_vreinterpret_bf16_u32(
47 // CHECK-NEXT: entry:
48 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <4 x bfloat>
49 // CHECK-NEXT: ret <4 x bfloat> [[A:%.*]]
51 bfloat16x4_t test_vreinterpret_bf16_u32(uint32x2_t a) { return vreinterpret_bf16_u32(a); }
52 // CHECK-LABEL: @test_vreinterpret_bf16_p8(
53 // CHECK-NEXT: entry:
54 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A:%.*]] to <4 x bfloat>
55 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
57 bfloat16x4_t test_vreinterpret_bf16_p8(poly8x8_t a) { return vreinterpret_bf16_p8(a); }
58 // CHECK-LABEL: @test_vreinterpret_bf16_p16(
59 // CHECK-NEXT: entry:
60 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <4 x bfloat>
61 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
63 bfloat16x4_t test_vreinterpret_bf16_p16(poly16x4_t a) { return vreinterpret_bf16_p16(a); }
64 // CHECK-LABEL: @test_vreinterpret_bf16_u64(
65 // CHECK-NEXT: entry:
66 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <4 x bfloat>
67 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
69 bfloat16x4_t test_vreinterpret_bf16_u64(uint64x1_t a) { return vreinterpret_bf16_u64(a); }
70 // CHECK-LABEL: @test_vreinterpret_bf16_s64(
71 // CHECK-NEXT: entry:
72 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <4 x bfloat>
73 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
75 bfloat16x4_t test_vreinterpret_bf16_s64(int64x1_t a) { return vreinterpret_bf16_s64(a); }
76 // CHECK-LABEL: @test_vreinterpretq_bf16_s8(
77 // CHECK-NEXT: entry:
78 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x bfloat>
79 // CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
81 bfloat16x8_t test_vreinterpretq_bf16_s8(int8x16_t a) { return vreinterpretq_bf16_s8(a); }
82 // CHECK-LABEL: @test_vreinterpretq_bf16_s16(
83 // CHECK-NEXT: entry:
84 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x bfloat>
85 // CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
87 bfloat16x8_t test_vreinterpretq_bf16_s16(int16x8_t a) { return vreinterpretq_bf16_s16(a); }
88 // CHECK-LABEL: @test_vreinterpretq_bf16_s32(
89 // CHECK-NEXT: entry:
90 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x bfloat>
91 // CHECK-NEXT: ret <8 x bfloat> [[A:%.*]]
93 bfloat16x8_t test_vreinterpretq_bf16_s32(int32x4_t a) { return vreinterpretq_bf16_s32(a); }
94 // CHECK-LABEL: @test_vreinterpretq_bf16_f32(
95 // CHECK-NEXT: entry:
96 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <8 x bfloat>
97 // CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
99 bfloat16x8_t test_vreinterpretq_bf16_f32(float32x4_t a) { return vreinterpretq_bf16_f32(a); }
100 // CHECK-LABEL: @test_vreinterpretq_bf16_u8(
101 // CHECK-NEXT: entry:
102 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x bfloat>
103 // CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
105 bfloat16x8_t test_vreinterpretq_bf16_u8(uint8x16_t a) { return vreinterpretq_bf16_u8(a); }
106 // CHECK-LABEL: @test_vreinterpretq_bf16_u16(
107 // CHECK-NEXT: entry:
108 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x bfloat>
109 // CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
111 bfloat16x8_t test_vreinterpretq_bf16_u16(uint16x8_t a) { return vreinterpretq_bf16_u16(a); }
112 // CHECK-LABEL: @test_vreinterpretq_bf16_u32(
113 // CHECK-NEXT: entry:
114 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x bfloat>
115 // CHECK-NEXT: ret <8 x bfloat> [[A:%.*]]
117 bfloat16x8_t test_vreinterpretq_bf16_u32(uint32x4_t a) { return vreinterpretq_bf16_u32(a); }
118 // CHECK-LABEL: @test_vreinterpretq_bf16_p8(
119 // CHECK-NEXT: entry:
120 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x bfloat>
121 // CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
123 bfloat16x8_t test_vreinterpretq_bf16_p8(poly8x16_t a) { return vreinterpretq_bf16_p8(a); }
124 // CHECK-LABEL: @test_vreinterpretq_bf16_p16(
125 // CHECK-NEXT: entry:
126 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x bfloat>
127 // CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
129 bfloat16x8_t test_vreinterpretq_bf16_p16(poly16x8_t a) { return vreinterpretq_bf16_p16(a); }
130 // CHECK-LABEL: @test_vreinterpretq_bf16_u64(
131 // CHECK-NEXT: entry:
132 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x bfloat>
133 // CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
135 bfloat16x8_t test_vreinterpretq_bf16_u64(uint64x2_t a) { return vreinterpretq_bf16_u64(a); }
136 // CHECK-LABEL: @test_vreinterpretq_bf16_s64(
137 // CHECK-NEXT: entry:
138 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x bfloat>
139 // CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
141 bfloat16x8_t test_vreinterpretq_bf16_s64(int64x2_t a) { return vreinterpretq_bf16_s64(a); }
142 // CHECK-LABEL: @test_vreinterpret_bf16_p64(
143 // CHECK-NEXT: entry:
144 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <4 x bfloat>
145 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
147 bfloat16x4_t test_vreinterpret_bf16_p64(poly64x1_t a) { return vreinterpret_bf16_p64(a); }
148 // CHECK-LABEL: @test_vreinterpretq_bf16_p64(
149 // CHECK-NEXT: entry:
150 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x bfloat>
151 // CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
153 bfloat16x8_t test_vreinterpretq_bf16_p64(poly64x2_t a) { return vreinterpretq_bf16_p64(a); }
155 // TODO: poly128_t not implemented on aarch32
156 // CHCK-LABEL: @test_vreinterpretq_bf16_p128(
157 // CHCK-NEXT: entry:
158 // CHCK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <4 x i32>
159 // CHCK-NEXT: ret <4 x i32> [[TMP0]]
161 //bfloat16x8_t test_vreinterpretq_bf16_p128(poly128_t a) { return vreinterpretq_bf16_p128(a); }
163 // CHECK-LABEL: @test_vreinterpret_s8_bf16(
164 // CHECK-NEXT: entry:
165 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
166 // CHECK-NEXT: ret <8 x i8> [[TMP0]]
168 int8x8_t test_vreinterpret_s8_bf16(bfloat16x4_t a) { return vreinterpret_s8_bf16(a); }
169 // CHECK-LABEL: @test_vreinterpret_s16_bf16(
170 // CHECK-NEXT: entry:
171 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
172 // CHECK-NEXT: ret <4 x i16> [[TMP0]]
174 int16x4_t test_vreinterpret_s16_bf16(bfloat16x4_t a) { return vreinterpret_s16_bf16(a); }
175 // CHECK-LABEL: @test_vreinterpret_s32_bf16(
176 // CHECK-NEXT: entry:
177 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <2 x i32>
178 // CHECK-NEXT: ret <2 x i32> [[A:%.*]]
180 int32x2_t test_vreinterpret_s32_bf16(bfloat16x4_t a) { return vreinterpret_s32_bf16(a); }
181 // CHECK-LABEL: @test_vreinterpret_f32_bf16(
182 // CHECK-NEXT: entry:
183 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <2 x float>
184 // CHECK-NEXT: ret <2 x float> [[TMP0]]
186 float32x2_t test_vreinterpret_f32_bf16(bfloat16x4_t a) { return vreinterpret_f32_bf16(a); }
187 // CHECK-LABEL: @test_vreinterpret_u8_bf16(
188 // CHECK-NEXT: entry:
189 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
190 // CHECK-NEXT: ret <8 x i8> [[TMP0]]
192 uint8x8_t test_vreinterpret_u8_bf16(bfloat16x4_t a) { return vreinterpret_u8_bf16(a); }
193 // CHECK-LABEL: @test_vreinterpret_u16_bf16(
194 // CHECK-NEXT: entry:
195 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
196 // CHECK-NEXT: ret <4 x i16> [[TMP0]]
198 uint16x4_t test_vreinterpret_u16_bf16(bfloat16x4_t a) { return vreinterpret_u16_bf16(a); }
199 // CHECK-LABEL: @test_vreinterpret_u32_bf16(
200 // CHECK-NEXT: entry:
201 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <2 x i32>
202 // CHECK-NEXT: ret <2 x i32> [[A:%.*]]
204 uint32x2_t test_vreinterpret_u32_bf16(bfloat16x4_t a) { return vreinterpret_u32_bf16(a); }
205 // CHECK-LABEL: @test_vreinterpret_p8_bf16(
206 // CHECK-NEXT: entry:
207 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
208 // CHECK-NEXT: ret <8 x i8> [[TMP0]]
210 poly8x8_t test_vreinterpret_p8_bf16(bfloat16x4_t a) { return vreinterpret_p8_bf16(a); }
211 // CHECK-LABEL: @test_vreinterpret_p16_bf16(
212 // CHECK-NEXT: entry:
213 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
214 // CHECK-NEXT: ret <4 x i16> [[TMP0]]
216 poly16x4_t test_vreinterpret_p16_bf16(bfloat16x4_t a) { return vreinterpret_p16_bf16(a); }
217 // CHECK-LABEL: @test_vreinterpret_u64_bf16(
218 // CHECK-NEXT: entry:
219 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <1 x i64>
220 // CHECK-NEXT: ret <1 x i64> [[TMP0]]
222 uint64x1_t test_vreinterpret_u64_bf16(bfloat16x4_t a) { return vreinterpret_u64_bf16(a); }
223 // CHECK-LABEL: @test_vreinterpret_s64_bf16(
224 // CHECK-NEXT: entry:
225 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <1 x i64>
226 // CHECK-NEXT: ret <1 x i64> [[TMP0]]
228 int64x1_t test_vreinterpret_s64_bf16(bfloat16x4_t a) { return vreinterpret_s64_bf16(a); }
229 // CHECK-LABEL: @test_vreinterpret_p64_bf16(
230 // CHECK-NEXT: entry:
231 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <1 x i64>
232 // CHECK-NEXT: ret <1 x i64> [[TMP0]]
234 poly64x1_t test_vreinterpret_p64_bf16(bfloat16x4_t a) { return vreinterpret_p64_bf16(a); }
235 // CHECK-LABEL: @test_vreinterpretq_s8_bf16(
236 // CHECK-NEXT: entry:
237 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
238 // CHECK-NEXT: ret <16 x i8> [[TMP0]]
240 int8x16_t test_vreinterpretq_s8_bf16(bfloat16x8_t a) { return vreinterpretq_s8_bf16(a); }
241 // CHECK-LABEL: @test_vreinterpretq_s16_bf16(
242 // CHECK-NEXT: entry:
243 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
244 // CHECK-NEXT: ret <8 x i16> [[TMP0]]
246 int16x8_t test_vreinterpretq_s16_bf16(bfloat16x8_t a) { return vreinterpretq_s16_bf16(a); }
247 // CHECK-LABEL: @test_vreinterpretq_s32_bf16(
248 // CHECK-NEXT: entry:
249 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <4 x i32>
250 // CHECK-NEXT: ret <4 x i32> [[A:%.*]]
252 int32x4_t test_vreinterpretq_s32_bf16(bfloat16x8_t a) { return vreinterpretq_s32_bf16(a); }
253 // CHECK-LABEL: @test_vreinterpretq_f32_bf16(
254 // CHECK-NEXT: entry:
255 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <4 x float>
256 // CHECK-NEXT: ret <4 x float> [[TMP0]]
258 float32x4_t test_vreinterpretq_f32_bf16(bfloat16x8_t a) { return vreinterpretq_f32_bf16(a); }
259 // CHECK-LABEL: @test_vreinterpretq_u8_bf16(
260 // CHECK-NEXT: entry:
261 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
262 // CHECK-NEXT: ret <16 x i8> [[TMP0]]
264 uint8x16_t test_vreinterpretq_u8_bf16(bfloat16x8_t a) { return vreinterpretq_u8_bf16(a); }
265 // CHECK-LABEL: @test_vreinterpretq_u16_bf16(
266 // CHECK-NEXT: entry:
267 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
268 // CHECK-NEXT: ret <8 x i16> [[TMP0]]
270 uint16x8_t test_vreinterpretq_u16_bf16(bfloat16x8_t a) { return vreinterpretq_u16_bf16(a); }
271 // CHECK-LABEL: @test_vreinterpretq_u32_bf16(
272 // CHECK-NEXT: entry:
273 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <4 x i32>
274 // CHECK-NEXT: ret <4 x i32> [[A:%.*]]
276 uint32x4_t test_vreinterpretq_u32_bf16(bfloat16x8_t a) { return vreinterpretq_u32_bf16(a); }
277 // CHECK-LABEL: @test_vreinterpretq_p8_bf16(
278 // CHECK-NEXT: entry:
279 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
280 // CHECK-NEXT: ret <16 x i8> [[TMP0]]
282 poly8x16_t test_vreinterpretq_p8_bf16(bfloat16x8_t a) { return vreinterpretq_p8_bf16(a); }
283 // CHECK-LABEL: @test_vreinterpretq_p16_bf16(
284 // CHECK-NEXT: entry:
285 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
286 // CHECK-NEXT: ret <8 x i16> [[TMP0]]
288 poly16x8_t test_vreinterpretq_p16_bf16(bfloat16x8_t a) { return vreinterpretq_p16_bf16(a); }
289 // CHECK-LABEL: @test_vreinterpretq_u64_bf16(
290 // CHECK-NEXT: entry:
291 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <2 x i64>
292 // CHECK-NEXT: ret <2 x i64> [[TMP0]]
294 uint64x2_t test_vreinterpretq_u64_bf16(bfloat16x8_t a) { return vreinterpretq_u64_bf16(a); }
295 // CHECK-LABEL: @test_vreinterpretq_s64_bf16(
296 // CHECK-NEXT: entry:
297 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <2 x i64>
298 // CHECK-NEXT: ret <2 x i64> [[TMP0]]
300 int64x2_t test_vreinterpretq_s64_bf16(bfloat16x8_t a) { return vreinterpretq_s64_bf16(a); }
301 // CHECK-LABEL: @test_vreinterpretq_p64_bf16(
302 // CHECK-NEXT: entry:
303 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <2 x i64>
304 // CHECK-NEXT: ret <2 x i64> [[TMP0]]
306 poly64x2_t test_vreinterpretq_p64_bf16(bfloat16x8_t a) { return vreinterpretq_p64_bf16(a); }
308 // TODO: poly128_t not implemented on aarch32
309 // CHCK-LABEL: @test_vreinterpretq_p128_bf16(
310 // CHCK-NEXT: entry:
311 // CHCK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to i128
312 // CHCK-NEXT: ret i128 [[TMP0]]
314 //poly128_t test_vreinterpretq_p128_bf16(bfloat16x8_t a) { return vreinterpretq_p128_bf16(a); }