1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
6 ; This test just checks that the compiler doesn't crash.
8 ; CHECK-LABEL: {{^}}v32i8_to_v8i32:
9 define amdgpu_ps float @v32i8_to_v8i32(ptr addrspace(4) inreg) #0 {
11 %1 = load <32 x i8>, ptr addrspace(4) %0
12 %2 = bitcast <32 x i8> %1 to <8 x i32>
13 %3 = extractelement <8 x i32> %2, i32 1
14 %4 = icmp ne i32 %3, 0
15 %5 = select i1 %4, float 0.0, float 1.0
19 ; CHECK-LABEL: {{^}}i8ptr_v16i8ptr:
21 define amdgpu_kernel void @i8ptr_v16i8ptr(ptr addrspace(1) %out, ptr addrspace(1) %in) {
23 %0 = load <16 x i8>, ptr addrspace(1) %in
24 store <16 x i8> %0, ptr addrspace(1) %out
28 define amdgpu_kernel void @f32_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
29 %load = load float, ptr addrspace(1) %in, align 4
30 %fadd32 = fadd float %load, 1.0
31 %bc = bitcast float %fadd32 to <2 x i16>
32 %add.bitcast = add <2 x i16> %bc, <i16 2, i16 2>
33 store <2 x i16> %add.bitcast, ptr addrspace(1) %out
37 define amdgpu_kernel void @v2i16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
38 %load = load <2 x i16>, ptr addrspace(1) %in, align 4
39 %add.v2i16 = add <2 x i16> %load, <i16 2, i16 2>
40 %bc = bitcast <2 x i16> %add.v2i16 to float
41 %fadd.bitcast = fadd float %bc, 1.0
42 store float %fadd.bitcast, ptr addrspace(1) %out
46 define amdgpu_kernel void @f32_to_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
47 %load = load float, ptr addrspace(1) %in, align 4
48 %fadd32 = fadd float %load, 1.0
49 %bc = bitcast float %fadd32 to <2 x half>
50 %add.bitcast = fadd <2 x half> %bc, <half 2.0, half 2.0>
51 store <2 x half> %add.bitcast, ptr addrspace(1) %out
55 define amdgpu_kernel void @v2f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
56 %load = load <2 x half>, ptr addrspace(1) %in, align 4
57 %add.v2f16 = fadd <2 x half> %load, <half 2.0, half 2.0>
58 %bc = bitcast <2 x half> %add.v2f16 to float
59 %fadd.bitcast = fadd float %bc, 1.0
60 store float %fadd.bitcast, ptr addrspace(1) %out
64 define amdgpu_kernel void @v4i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
65 %load = load <4 x i8>, ptr addrspace(1) %in, align 4
66 %bc = bitcast <4 x i8> %load to i32
67 store i32 %bc, ptr addrspace(1) %out, align 4
71 define amdgpu_kernel void @i32_to_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
72 %load = load i32, ptr addrspace(1) %in, align 4
73 %bc = bitcast i32 %load to <4 x i8>
74 store <4 x i8> %bc, ptr addrspace(1) %out, align 4
78 ; CHECK-LABEL: {{^}}bitcast_v2i32_to_f64:
80 define amdgpu_kernel void @bitcast_v2i32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
81 %val = load <2 x i32>, ptr addrspace(1) %in, align 8
82 %add = add <2 x i32> %val, <i32 4, i32 9>
83 %bc = bitcast <2 x i32> %add to double
84 %fadd.bc = fadd double %bc, 1.0
85 store double %fadd.bc, ptr addrspace(1) %out, align 8
89 ; CHECK-LABEL: {{^}}bitcast_f64_to_v2i32:
91 define amdgpu_kernel void @bitcast_f64_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
92 %val = load double, ptr addrspace(1) %in, align 8
93 %add = fadd double %val, 4.0
94 %bc = bitcast double %add to <2 x i32>
95 store <2 x i32> %bc, ptr addrspace(1) %out, align 8
99 ; CHECK-LABEL: {{^}}bitcast_v2i64_to_v2f64:
100 define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, ptr addrspace(1) %out, <2 x i64> %value) {
102 %cmp0 = icmp eq i32 %cond, 0
103 br i1 %cmp0, label %if, label %end
106 %cast = bitcast <2 x i64> %value to <2 x double>
110 %phi = phi <2 x double> [zeroinitializer, %entry], [%cast, %if]
111 store <2 x double> %phi, ptr addrspace(1) %out
115 ; CHECK-LABEL: {{^}}bitcast_v2f64_to_v2i64:
116 define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, ptr addrspace(1) %out, <2 x double> %value) {
118 %cmp0 = icmp eq i32 %cond, 0
119 br i1 %cmp0, label %if, label %end
122 %cast = bitcast <2 x double> %value to <2 x i64>
126 %phi = phi <2 x i64> [zeroinitializer, %entry], [%cast, %if]
127 store <2 x i64> %phi, ptr addrspace(1) %out
131 ; CHECK-LABEL: {{^}}v4i16_to_f64:
132 define amdgpu_kernel void @v4i16_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
133 %load = load <4 x i16>, ptr addrspace(1) %in, align 4
134 %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
135 %bc = bitcast <4 x i16> %add.v4i16 to double
136 %fadd.bitcast = fadd double %bc, 1.0
137 store double %fadd.bitcast, ptr addrspace(1) %out
141 ; CHECK-LABEL: {{^}}v4f16_to_f64:
142 define amdgpu_kernel void @v4f16_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
143 %load = load <4 x half>, ptr addrspace(1) %in, align 4
144 %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
145 %bc = bitcast <4 x half> %add.v4half to double
146 %fadd.bitcast = fadd double %bc, 1.0
147 store double %fadd.bitcast, ptr addrspace(1) %out
151 ; CHECK-LABEL: {{^}}f64_to_v4f16:
152 define amdgpu_kernel void @f64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
153 %load = load double, ptr addrspace(1) %in, align 4
154 %fadd32 = fadd double %load, 1.0
155 %bc = bitcast double %fadd32 to <4 x half>
156 %add.bitcast = fadd <4 x half> %bc, <half 2.0, half 2.0, half 2.0, half 2.0>
157 store <4 x half> %add.bitcast, ptr addrspace(1) %out
161 ; CHECK-LABEL: {{^}}f64_to_v4i16:
162 define amdgpu_kernel void @f64_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
163 %load = load double, ptr addrspace(1) %in, align 4
164 %fadd32 = fadd double %load, 1.0
165 %bc = bitcast double %fadd32 to <4 x i16>
166 %add.bitcast = add <4 x i16> %bc, <i16 2, i16 2, i16 2, i16 2>
167 store <4 x i16> %add.bitcast, ptr addrspace(1) %out
171 ; CHECK-LABEL: {{^}}v4i16_to_i64:
172 define amdgpu_kernel void @v4i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
173 %load = load <4 x i16>, ptr addrspace(1) %in, align 4
174 %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
175 %bc = bitcast <4 x i16> %add.v4i16 to i64
176 %add.bitcast = add i64 %bc, 1
177 store i64 %add.bitcast, ptr addrspace(1) %out
181 ; CHECK-LABEL: {{^}}v4f16_to_i64:
182 define amdgpu_kernel void @v4f16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
183 %load = load <4 x half>, ptr addrspace(1) %in, align 4
184 %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
185 %bc = bitcast <4 x half> %add.v4half to i64
186 %add.bitcast = add i64 %bc, 1
187 store i64 %add.bitcast, ptr addrspace(1) %out
191 ; CHECK-LABEL: {{^}}bitcast_i64_to_v4i16:
192 define amdgpu_kernel void @bitcast_i64_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
193 %val = load i64, ptr addrspace(1) %in, align 8
194 %add = add i64 %val, 4
195 %bc = bitcast i64 %add to <4 x i16>
196 %add.v4i16 = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
197 store <4 x i16> %add.v4i16, ptr addrspace(1) %out, align 8
201 ; CHECK-LABEL: {{^}}bitcast_i64_to_v4f16:
202 define amdgpu_kernel void @bitcast_i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
203 %val = load i64, ptr addrspace(1) %in, align 8
204 %add = add i64 %val, 4
205 %bc = bitcast i64 %add to <4 x half>
206 %add.v4i16 = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
207 store <4 x half> %add.v4i16, ptr addrspace(1) %out, align 8
211 ; CHECK-LABEL: {{^}}v4i16_to_v2f32:
212 define amdgpu_kernel void @v4i16_to_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
213 %load = load <4 x i16>, ptr addrspace(1) %in, align 4
214 %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
215 %bc = bitcast <4 x i16> %add.v4i16 to <2 x float>
216 %fadd.bitcast = fadd <2 x float> %bc, <float 1.0, float 1.0>
217 store <2 x float> %fadd.bitcast, ptr addrspace(1) %out
221 ; CHECK-LABEL: {{^}}v4f16_to_v2f32:
222 define amdgpu_kernel void @v4f16_to_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
223 %load = load <4 x half>, ptr addrspace(1) %in, align 4
224 %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
225 %bc = bitcast <4 x half> %add.v4half to <2 x float>
226 %fadd.bitcast = fadd <2 x float> %bc, <float 1.0, float 1.0>
227 store <2 x float> %fadd.bitcast, ptr addrspace(1) %out
231 ; CHECK-LABEL: {{^}}v2f32_to_v4i16:
232 define amdgpu_kernel void @v2f32_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
233 %load = load <2 x float>, ptr addrspace(1) %in, align 4
234 %add.v2f32 = fadd <2 x float> %load, <float 2.0, float 4.0>
235 %bc = bitcast <2 x float> %add.v2f32 to <4 x i16>
236 %add.bitcast = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
237 store <4 x i16> %add.bitcast, ptr addrspace(1) %out
241 ; CHECK-LABEL: {{^}}v2f32_to_v4f16:
242 define amdgpu_kernel void @v2f32_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
243 %load = load <2 x float>, ptr addrspace(1) %in, align 4
244 %add.v2f32 = fadd <2 x float> %load, <float 2.0, float 4.0>
245 %bc = bitcast <2 x float> %add.v2f32 to <4 x half>
246 %add.bitcast = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
247 store <4 x half> %add.bitcast, ptr addrspace(1) %out
251 ; CHECK-LABEL: {{^}}v4i16_to_v2i32:
252 define amdgpu_kernel void @v4i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
253 %load = load <4 x i16>, ptr addrspace(1) %in, align 4
254 %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
255 %bc = bitcast <4 x i16> %add.v4i16 to <2 x i32>
256 %add.bitcast = add <2 x i32> %bc, <i32 1, i32 1>
257 store <2 x i32> %add.bitcast, ptr addrspace(1) %out
261 ; CHECK-LABEL: {{^}}v4f16_to_v2i32:
262 define amdgpu_kernel void @v4f16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
263 %load = load <4 x half>, ptr addrspace(1) %in, align 4
264 %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
265 %bc = bitcast <4 x half> %add.v4half to <2 x i32>
266 %add.bitcast = add <2 x i32> %bc, <i32 1, i32 1>
267 store <2 x i32> %add.bitcast, ptr addrspace(1) %out
271 ; CHECK-LABEL: {{^}}v2i32_to_v4i16:
272 define amdgpu_kernel void @v2i32_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
273 %load = load <2 x i32>, ptr addrspace(1) %in, align 4
274 %add.v2i32 = add <2 x i32> %load, <i32 2, i32 4>
275 %bc = bitcast <2 x i32> %add.v2i32 to <4 x i16>
276 %add.bitcast = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
277 store <4 x i16> %add.bitcast, ptr addrspace(1) %out
281 ; CHECK-LABEL: {{^}}v2i32_to_v4f16:
282 define amdgpu_kernel void @v2i32_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
283 %load = load <2 x i32>, ptr addrspace(1) %in, align 4
284 %add.v2i32 = add <2 x i32> %load, <i32 2, i32 4>
285 %bc = bitcast <2 x i32> %add.v2i32 to <4 x half>
286 %add.bitcast = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
287 store <4 x half> %add.bitcast, ptr addrspace(1) %out
291 declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
293 ; CHECK-LABEL: {{^}}bitcast_v4f32_to_v2i64:
294 ; CHECK: s_buffer_load_{{dwordx4|b128}}
295 define <2 x i64> @bitcast_v4f32_to_v2i64(<2 x i64> %arg) {
296 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> undef, i32 0, i32 0)
297 %cast = bitcast <4 x float> %val to <2 x i64>
298 %div = udiv <2 x i64> %cast, %arg
302 declare half @llvm.canonicalize.f16(half)
304 ; CHECK-LABEL: {{^}}bitcast_f32_to_v1i32:
305 define amdgpu_kernel void @bitcast_f32_to_v1i32(ptr addrspace(1) %out) {
306 %f16 = call arcp afn half @llvm.canonicalize.f16(half 0xH03F0)
307 %f32 = fpext half %f16 to float
308 %v = bitcast float %f32 to <1 x i32>
309 %v1 = extractelement <1 x i32> %v, i32 0
310 store i32 %v1, ptr addrspace(1) %out
314 ; CHECK-LABEL: {{^}}bitcast_v4i64_to_v16i16:
315 define amdgpu_kernel void @bitcast_v4i64_to_v16i16(i32 %cond, ptr addrspace(1) %out, <4 x i64> %value) {
317 %cmp0 = icmp eq i32 %cond, 0
318 br i1 %cmp0, label %if, label %end
321 %phi_value = phi <4 x i64> [zeroinitializer, %entry], [%value, %if]
322 %cast = bitcast <4 x i64> %phi_value to <16 x i16>
323 %cmp1 = icmp eq i32 %cond, 1
324 br i1 %cmp1, label %if, label %end
327 %phi_cast = phi <16 x i16> [zeroinitializer, %entry], [%cast, %if]
328 store <16 x i16> %phi_cast, ptr addrspace(1) %out
332 ; CHECK-LABEL: {{^}}bitcast_v4f64_to_v16f16:
333 define amdgpu_kernel void @bitcast_v4f64_to_v16f16(i32 %cond, ptr addrspace(1) %out, <4 x double> %value) {
335 %cmp0 = icmp eq i32 %cond, 0
336 br i1 %cmp0, label %if, label %end
339 %phi_value = phi <4 x double> [zeroinitializer, %entry], [%value, %if]
340 %cast = bitcast <4 x double> %phi_value to <16 x half>
341 %cmp1 = icmp eq i32 %cond, 1
342 br i1 %cmp1, label %if, label %end
345 %phi_cast = phi <16 x half> [zeroinitializer, %entry], [%cast, %if]
346 store <16 x half> %phi_cast, ptr addrspace(1) %out
350 ; CHECK-LABEL: {{^}}bitcast_v16i16_to_v4i64:
351 define amdgpu_kernel void @bitcast_v16i16_to_v4i64(i32 %cond, ptr addrspace(1) %out, <16 x i16> %value) {
353 %cmp0 = icmp eq i32 %cond, 0
354 br i1 %cmp0, label %if, label %end
357 %phi_value = phi <16 x i16> [zeroinitializer, %entry], [%value, %if]
358 %cast = bitcast <16 x i16> %phi_value to <4 x i64>
359 %cmp1 = icmp eq i32 %cond, 1
360 br i1 %cmp1, label %if, label %end
363 %phi_cast = phi <4 x i64> [zeroinitializer, %entry], [%cast, %if]
364 store <4 x i64> %phi_cast, ptr addrspace(1) %out
368 ; CHECK-LABEL: {{^}}bitcast_v16f16_to_v4f64:
369 define amdgpu_kernel void @bitcast_v16f16_to_v4f64(i32 %cond, ptr addrspace(1) %out, <16 x half> %value) {
371 %cmp0 = icmp eq i32 %cond, 0
372 br i1 %cmp0, label %if, label %end
375 %phi_value = phi <16 x half> [zeroinitializer, %entry], [%value, %if]
376 %cast = bitcast <16 x half> %phi_value to <4 x double>
377 %cmp1 = icmp eq i32 %cond, 1
378 br i1 %cmp1, label %if, label %end
381 %phi_cast = phi <4 x double> [zeroinitializer, %entry], [%cast, %if]
382 store <4 x double> %phi_cast, ptr addrspace(1) %out
386 ; CHECK-LABEL: {{^}}bitcast_v20f16_to_v5f64:
387 ; CHECK: ScratchSize: 0
388 define amdgpu_kernel void @bitcast_v20f16_to_v5f64(i32 %cond, ptr addrspace(1) %out, <20 x half> %value) {
390 %cmp0 = icmp eq i32 %cond, 0
391 br i1 %cmp0, label %if, label %end
394 %phi_value = phi <20 x half> [zeroinitializer, %entry], [%value, %if]
395 %cast = bitcast <20 x half> %phi_value to <5 x double>
396 %cmp1 = icmp eq i32 %cond, 1
397 br i1 %cmp1, label %if, label %end
400 %phi_cast = phi <5 x double> [zeroinitializer, %entry], [%cast, %if]
401 store <5 x double> %phi_cast, ptr addrspace(1) %out
405 ; CHECK-LABEL: {{^}}bitcast_v10f32_to_v5f64:
406 ; CHECK: ScratchSize: 0
407 define amdgpu_kernel void @bitcast_v10f32_to_v5f64(i32 %cond, ptr addrspace(1) %out, <10 x float> %value) {
409 %cmp0 = icmp eq i32 %cond, 0
410 br i1 %cmp0, label %if, label %end
413 %phi_value = phi <10 x float> [zeroinitializer, %entry], [%value, %if]
414 %cast = bitcast <10 x float> %phi_value to <5 x double>
415 %cmp1 = icmp eq i32 %cond, 1
416 br i1 %cmp1, label %if, label %end
419 %phi_cast = phi <5 x double> [zeroinitializer, %entry], [%cast, %if]
420 store <5 x double> %phi_cast, ptr addrspace(1) %out
424 ; CHECK-LABEL: {{^}}bitcast_v10i32_to_v5f64:
425 ; CHECK: ScratchSize: 0
426 define amdgpu_kernel void @bitcast_v10i32_to_v5f64(i32 %cond, ptr addrspace(1) %out, <10 x i32> %value) {
428 %cmp0 = icmp eq i32 %cond, 0
429 br i1 %cmp0, label %if, label %end
432 %phi_value = phi <10 x i32> [zeroinitializer, %entry], [%value, %if]
433 %cast = bitcast <10 x i32> %phi_value to <5 x double>
434 %cmp1 = icmp eq i32 %cond, 1
435 br i1 %cmp1, label %if, label %end
438 %phi_cast = phi <5 x double> [zeroinitializer, %entry], [%cast, %if]
439 store <5 x double> %phi_cast, ptr addrspace(1) %out
443 ; CHECK-LABEL: {{^}}bitcast_v10f32_to_v5i64:
444 ; CHECK: ScratchSize: 0
445 define amdgpu_kernel void @bitcast_v10f32_to_v5i64(i32 %cond, ptr addrspace(1) %out, <10 x float> %value) {
447 %cmp0 = icmp eq i32 %cond, 0
448 br i1 %cmp0, label %if, label %end
451 %phi_value = phi <10 x float> [zeroinitializer, %entry], [%value, %if]
452 %cast = bitcast <10 x float> %phi_value to <5 x i64>
453 %cmp1 = icmp eq i32 %cond, 1
454 br i1 %cmp1, label %if, label %end
457 %phi_cast = phi <5 x i64> [zeroinitializer, %entry], [%cast, %if]
458 store <5 x i64> %phi_cast, ptr addrspace(1) %out
462 ; CHECK-LABEL: {{^}}bitcast_v10i32_to_v5i64:
463 ; CHECK: ScratchSize: 0
464 define amdgpu_kernel void @bitcast_v10i32_to_v5i64(i32 %cond, ptr addrspace(1) %out, <10 x i32> %value) {
466 %cmp0 = icmp eq i32 %cond, 0
467 br i1 %cmp0, label %if, label %end
470 %phi_value = phi <10 x i32> [zeroinitializer, %entry], [%value, %if]
471 %cast = bitcast <10 x i32> %phi_value to <5 x i64>
472 %cmp1 = icmp eq i32 %cond, 1
473 br i1 %cmp1, label %if, label %end
476 %phi_cast = phi <5 x i64> [zeroinitializer, %entry], [%cast, %if]
477 store <5 x i64> %phi_cast, ptr addrspace(1) %out
481 ; CHECK-LABEL: {{^}}bitcast_v40i8_to_v5f64:
482 ; CHECK: ScratchSize: 0
483 define amdgpu_kernel void @bitcast_v40i8_to_v5f64(i32 %cond, ptr addrspace(1) %out, <40 x i8> %value) {
485 %cmp0 = icmp eq i32 %cond, 0
486 br i1 %cmp0, label %if, label %end
489 %phi_value = phi <40 x i8> [zeroinitializer, %entry], [%value, %if]
490 %cast = bitcast <40 x i8> %phi_value to <5 x double>
491 %cmp1 = icmp eq i32 %cond, 1
492 br i1 %cmp1, label %if, label %end
495 %phi_cast = phi <5 x double> [zeroinitializer, %entry], [%cast, %if]
496 store <5 x double> %phi_cast, ptr addrspace(1) %out
500 ; CHECK-LABEL: {{^}}bitcast_v40i8_to_v5i64:
501 ; CHECK: ScratchSize: 0
502 define amdgpu_kernel void @bitcast_v40i8_to_v5i64(i32 %cond, ptr addrspace(1) %out, <40 x i8> %value) {
504 %cmp0 = icmp eq i32 %cond, 0
505 br i1 %cmp0, label %if, label %end
508 %phi_value = phi <40 x i8> [zeroinitializer, %entry], [%value, %if]
509 %cast = bitcast <40 x i8> %phi_value to <5 x i64>
510 %cmp1 = icmp eq i32 %cond, 1
511 br i1 %cmp1, label %if, label %end
514 %phi_cast = phi <5 x i64> [zeroinitializer, %entry], [%cast, %if]
515 store <5 x i64> %phi_cast, ptr addrspace(1) %out
519 ; CHECK-LABEL: {{^}}bitcast_v5f64_to_v10f32:
520 ; CHECK: ScratchSize: 0
521 define amdgpu_kernel void @bitcast_v5f64_to_v10f32(i32 %cond, ptr addrspace(1) %out, <5 x double> %value) {
523 %cmp0 = icmp eq i32 %cond, 0
524 br i1 %cmp0, label %if, label %end
527 %phi_value = phi <5 x double> [zeroinitializer, %entry], [%value, %if]
528 %cast = bitcast <5 x double> %phi_value to <10 x float>
529 %cmp1 = icmp eq i32 %cond, 1
530 br i1 %cmp1, label %if, label %end
533 %phi_cast = phi <10 x float> [zeroinitializer, %entry], [%cast, %if]
534 store <10 x float> %phi_cast, ptr addrspace(1) %out
538 ; CHECK-LABEL: {{^}}bitcast_v5f64_to_v10i32:
539 ; CHECK: ScratchSize: 0
540 define amdgpu_kernel void @bitcast_v5f64_to_v10i32(i32 %cond, ptr addrspace(1) %out, <5 x double> %value) {
542 %cmp0 = icmp eq i32 %cond, 0
543 br i1 %cmp0, label %if, label %end
546 %phi_value = phi <5 x double> [zeroinitializer, %entry], [%value, %if]
547 %cast = bitcast <5 x double> %phi_value to <10 x i32>
548 %cmp1 = icmp eq i32 %cond, 1
549 br i1 %cmp1, label %if, label %end
552 %phi_cast = phi <10 x i32> [zeroinitializer, %entry], [%cast, %if]
553 store <10 x i32> %phi_cast, ptr addrspace(1) %out
557 ; CHECK-LABEL: {{^}}bitcast_v5i64_to_v10f32:
558 ; CHECK: ScratchSize: 0
559 define amdgpu_kernel void @bitcast_v5i64_to_v10f32(i32 %cond, ptr addrspace(1) %out, <5 x i64> %value) {
561 %cmp0 = icmp eq i32 %cond, 0
562 br i1 %cmp0, label %if, label %end
565 %phi_value = phi <5 x i64> [zeroinitializer, %entry], [%value, %if]
566 %cast = bitcast <5 x i64> %phi_value to <10 x float>
567 %cmp1 = icmp eq i32 %cond, 1
568 br i1 %cmp1, label %if, label %end
571 %phi_cast = phi <10 x float> [zeroinitializer, %entry], [%cast, %if]
572 store <10 x float> %phi_cast, ptr addrspace(1) %out
576 ; CHECK-LABEL: {{^}}bitcast_v5i64_to_v10i32:
577 ; CHECK: ScratchSize: 0
578 define amdgpu_kernel void @bitcast_v5i64_to_v10i32(i32 %cond, ptr addrspace(1) %out, <5 x i64> %value) {
580 %cmp0 = icmp eq i32 %cond, 0
581 br i1 %cmp0, label %if, label %end
584 %phi_value = phi <5 x i64> [zeroinitializer, %entry], [%value, %if]
585 %cast = bitcast <5 x i64> %phi_value to <10 x i32>
586 %cmp1 = icmp eq i32 %cond, 1
587 br i1 %cmp1, label %if, label %end
590 %phi_cast = phi <10 x i32> [zeroinitializer, %entry], [%cast, %if]
591 store <10 x i32> %phi_cast, ptr addrspace(1) %out
595 ; CHECK-LABEL: {{^}}bitcast_v6f64_to_v12i32:
596 ; CHECK: ScratchSize: 0
597 define amdgpu_kernel void @bitcast_v6f64_to_v12i32(i32 %cond, ptr addrspace(1) %out, <6 x double> %value) {
599 %cmp0 = icmp eq i32 %cond, 0
600 br i1 %cmp0, label %if, label %end
603 %phi_value = phi <6 x double> [zeroinitializer, %entry], [%value, %if]
604 %cast = bitcast <6 x double> %phi_value to <12 x i32>
605 %cmp1 = icmp eq i32 %cond, 1
606 br i1 %cmp1, label %if, label %end
609 %phi_cast = phi <12 x i32> [zeroinitializer, %entry], [%cast, %if]
610 store <12 x i32> %phi_cast, ptr addrspace(1) %out
614 ; CHECK-LABEL: {{^}}bitcast_v6f64_to_v12f32:
615 ; CHECK: ScratchSize: 0
616 define amdgpu_kernel void @bitcast_v6f64_to_v12f32(i32 %cond, ptr addrspace(1) %out, <6 x double> %value) {
618 %cmp0 = icmp eq i32 %cond, 0
619 br i1 %cmp0, label %if, label %end
622 %phi_value = phi <6 x double> [zeroinitializer, %entry], [%value, %if]
623 %cast = bitcast <6 x double> %phi_value to <12 x float>
624 %cmp1 = icmp eq i32 %cond, 1
625 br i1 %cmp1, label %if, label %end
628 %phi_cast = phi <12 x float> [zeroinitializer, %entry], [%cast, %if]
629 store <12 x float> %phi_cast, ptr addrspace(1) %out
633 ; CHECK-LABEL: {{^}}bitcast_v12i32_to_v6i64:
634 ; CHECK: ScratchSize: 0
635 define amdgpu_kernel void @bitcast_v12i32_to_v6i64(i32 %cond, ptr addrspace(1) %out, <12 x i32> %value) {
637 %cmp0 = icmp eq i32 %cond, 0
638 br i1 %cmp0, label %if, label %end
641 %phi_value = phi <12 x i32> [zeroinitializer, %entry], [%value, %if]
642 %cast = bitcast <12 x i32> %phi_value to <6 x i64>
643 %cmp1 = icmp eq i32 %cond, 1
644 br i1 %cmp1, label %if, label %end
647 %phi_cast = phi <6 x i64> [zeroinitializer, %entry], [%cast, %if]
648 store <6 x i64> %phi_cast, ptr addrspace(1) %out
652 ; CHECK-LABEL: {{^}}bitcast_v12i32_to_v6f64:
653 ; CHECK: ScratchSize: 0
654 define amdgpu_kernel void @bitcast_v12i32_to_v6f64(i32 %cond, ptr addrspace(1) %out, <12 x i32> %value) {
656 %cmp0 = icmp eq i32 %cond, 0
657 br i1 %cmp0, label %if, label %end
660 %phi_value = phi <12 x i32> [zeroinitializer, %entry], [%value, %if]
661 %cast = bitcast <12 x i32> %phi_value to <6 x double>
662 %cmp1 = icmp eq i32 %cond, 1
663 br i1 %cmp1, label %if, label %end
666 %phi_cast = phi <6 x double> [zeroinitializer, %entry], [%cast, %if]
667 store <6 x double> %phi_cast, ptr addrspace(1) %out
671 ; CHECK-LABEL: {{^}}bitcast_v6i64_to_v12i32:
672 ; CHECK: ScratchSize: 0
673 define amdgpu_kernel void @bitcast_v6i64_to_v12i32(i32 %cond, ptr addrspace(1) %out, <6 x i64> %value) {
675 %cmp0 = icmp eq i32 %cond, 0
676 br i1 %cmp0, label %if, label %end
679 %phi_value = phi <6 x i64> [zeroinitializer, %entry], [%value, %if]
680 %cast = bitcast <6 x i64> %phi_value to <12 x i32>
681 %cmp1 = icmp eq i32 %cond, 1
682 br i1 %cmp1, label %if, label %end
685 %phi_cast = phi <12 x i32> [zeroinitializer, %entry], [%cast, %if]
686 store <12 x i32> %phi_cast, ptr addrspace(1) %out
690 ; CHECK-LABEL: {{^}}bitcast_v7i64_to_v14i32:
691 ; CHECK: ScratchSize: 0
692 define amdgpu_kernel void @bitcast_v7i64_to_v14i32(i32 %cond, ptr addrspace(1) %out, <7 x i64> %value) {
694 %cmp0 = icmp eq i32 %cond, 0
695 br i1 %cmp0, label %if, label %end
698 %phi_value = phi <7 x i64> [zeroinitializer, %entry], [%value, %if]
699 %cast = bitcast <7 x i64> %phi_value to <14 x i32>
700 %cmp1 = icmp eq i32 %cond, 1
701 br i1 %cmp1, label %if, label %end
704 %phi_cast = phi <14 x i32> [zeroinitializer, %entry], [%cast, %if]
705 store <14 x i32> %phi_cast, ptr addrspace(1) %out
709 ; CHECK-LABEL: {{^}}bitcast_v7f64_to_v14i32:
710 ; CHECK: ScratchSize: 0
711 define amdgpu_kernel void @bitcast_v7f64_to_v14i32(i32 %cond, ptr addrspace(1) %out, <7 x double> %value) {
713 %cmp0 = icmp eq i32 %cond, 0
714 br i1 %cmp0, label %if, label %end
717 %phi_value = phi <7 x double> [zeroinitializer, %entry], [%value, %if]
718 %cast = bitcast <7 x double> %phi_value to <14 x i32>
719 %cmp1 = icmp eq i32 %cond, 1
720 br i1 %cmp1, label %if, label %end
723 %phi_cast = phi <14 x i32> [zeroinitializer, %entry], [%cast, %if]
724 store <14 x i32> %phi_cast, ptr addrspace(1) %out
728 ; CHECK-LABEL: {{^}}bitcast_v9i64_to_v18i32:
729 ; CHECK: ScratchSize: 0
730 define amdgpu_kernel void @bitcast_v9i64_to_v18i32(i32 %cond, ptr addrspace(1) %out, <9 x i64> %value) {
732 %cmp0 = icmp eq i32 %cond, 0
733 br i1 %cmp0, label %if, label %end
736 %phi_value = phi <9 x i64> [zeroinitializer, %entry], [%value, %if]
737 %cast = bitcast <9 x i64> %phi_value to <18 x i32>
738 %cmp1 = icmp eq i32 %cond, 1
739 br i1 %cmp1, label %if, label %end
742 %phi_cast = phi <18 x i32> [zeroinitializer, %entry], [%cast, %if]
743 store <18 x i32> %phi_cast, ptr addrspace(1) %out
747 ; CHECK-LABEL: {{^}}bitcast_v10i64_to_v20i32:
748 ; CHECK: ScratchSize: 0
749 define amdgpu_kernel void @bitcast_v10i64_to_v20i32(i32 %cond, ptr addrspace(1) %out, <10 x i64> %value) {
751 %cmp0 = icmp eq i32 %cond, 0
752 br i1 %cmp0, label %if, label %end
755 %phi_value = phi <10 x i64> [zeroinitializer, %entry], [%value, %if]
756 %cast = bitcast <10 x i64> %phi_value to <20 x i32>
757 %cmp1 = icmp eq i32 %cond, 1
758 br i1 %cmp1, label %if, label %end
761 %phi_cast = phi <20 x i32> [zeroinitializer, %entry], [%cast, %if]
762 store <20 x i32> %phi_cast, ptr addrspace(1) %out
766 ; CHECK-LABEL: {{^}}bitcast_v11i64_to_v20i32:
767 ; CHECK: ScratchSize: 0
768 define amdgpu_kernel void @bitcast_v11i64_to_v20i32(i32 %cond, ptr addrspace(1) %out, <11 x i64> %value) {
770 %cmp0 = icmp eq i32 %cond, 0
771 br i1 %cmp0, label %if, label %end
774 %phi_value = phi <11 x i64> [zeroinitializer, %entry], [%value, %if]
775 %cast = bitcast <11 x i64> %phi_value to <22 x i32>
776 %cmp1 = icmp eq i32 %cond, 1
777 br i1 %cmp1, label %if, label %end
780 %phi_cast = phi <22 x i32> [zeroinitializer, %entry], [%cast, %if]
781 store <22 x i32> %phi_cast, ptr addrspace(1) %out
785 ; CHECK-LABEL: {{^}}bitcast_v12i64_to_v22i32:
786 ; CHECK: ScratchSize: 0
787 define amdgpu_kernel void @bitcast_v12i64_to_v22i32(i32 %cond, ptr addrspace(1) %out, <12 x i64> %value) {
789 %cmp0 = icmp eq i32 %cond, 0
790 br i1 %cmp0, label %if, label %end
793 %phi_value = phi <12 x i64> [zeroinitializer, %entry], [%value, %if]
794 %cast = bitcast <12 x i64> %phi_value to <24 x i32>
795 %cmp1 = icmp eq i32 %cond, 1
796 br i1 %cmp1, label %if, label %end
799 %phi_cast = phi <24 x i32> [zeroinitializer, %entry], [%cast, %if]
800 store <24 x i32> %phi_cast, ptr addrspace(1) %out
804 ; CHECK-LABEL: {{^}}bitcast_v13i64_to_v24i32:
805 ; CHECK: ScratchSize: 0
806 define amdgpu_kernel void @bitcast_v13i64_to_v24i32(i32 %cond, ptr addrspace(1) %out, <13 x i64> %value) {
808 %cmp0 = icmp eq i32 %cond, 0
809 br i1 %cmp0, label %if, label %end
812 %phi_value = phi <13 x i64> [zeroinitializer, %entry], [%value, %if]
813 %cast = bitcast <13 x i64> %phi_value to <26 x i32>
814 %cmp1 = icmp eq i32 %cond, 1
815 br i1 %cmp1, label %if, label %end
818 %phi_cast = phi <26 x i32> [zeroinitializer, %entry], [%cast, %if]
819 store <26 x i32> %phi_cast, ptr addrspace(1) %out
823 ; CHECK-LABEL: {{^}}bitcast_v14i64_to_v26i32:
824 ; CHECK: ScratchSize: 0
825 define amdgpu_kernel void @bitcast_v14i64_to_v26i32(i32 %cond, ptr addrspace(1) %out, <14 x i64> %value) {
827 %cmp0 = icmp eq i32 %cond, 0
828 br i1 %cmp0, label %if, label %end
831 %phi_value = phi <14 x i64> [zeroinitializer, %entry], [%value, %if]
832 %cast = bitcast <14 x i64> %phi_value to <28 x i32>
833 %cmp1 = icmp eq i32 %cond, 1
834 br i1 %cmp1, label %if, label %end
837 %phi_cast = phi <28 x i32> [zeroinitializer, %entry], [%cast, %if]
838 store <28 x i32> %phi_cast, ptr addrspace(1) %out
842 ; CHECK-LABEL: {{^}}bitcast_v15i64_to_v26i32:
843 ; CHECK: ScratchSize: 0
844 define amdgpu_kernel void @bitcast_v15i64_to_v26i32(i32 %cond, ptr addrspace(1) %out, <15 x i64> %value) {
846 %cmp0 = icmp eq i32 %cond, 0
847 br i1 %cmp0, label %if, label %end
850 %phi_value = phi <15 x i64> [zeroinitializer, %entry], [%value, %if]
851 %cast = bitcast <15 x i64> %phi_value to <30 x i32>
852 %cmp1 = icmp eq i32 %cond, 1
853 br i1 %cmp1, label %if, label %end
856 %phi_cast = phi <30 x i32> [zeroinitializer, %entry], [%cast, %if]
857 store <30 x i32> %phi_cast, ptr addrspace(1) %out