1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
5 ; This test just checks that the compiler doesn't crash.
7 ; FUNC-LABEL: {{^}}v32i8_to_v8i32:
8 define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(4)* inreg) #0 {
10 %1 = load <32 x i8>, <32 x i8> addrspace(4)* %0
11 %2 = bitcast <32 x i8> %1 to <8 x i32>
12 %3 = extractelement <8 x i32> %2, i32 1
13 %4 = icmp ne i32 %3, 0
14 %5 = select i1 %4, float 0.0, float 1.0
18 ; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
20 define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
22 %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
23 %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
24 store <16 x i8> %1, <16 x i8> addrspace(1)* %out
28 define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
29 %load = load float, float addrspace(1)* %in, align 4
30 %fadd32 = fadd float %load, 1.0
31 %bc = bitcast float %fadd32 to <2 x i16>
32 %add.bitcast = add <2 x i16> %bc, <i16 2, i16 2>
33 store <2 x i16> %add.bitcast, <2 x i16> addrspace(1)* %out
37 define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
38 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
39 %add.v2i16 = add <2 x i16> %load, <i16 2, i16 2>
40 %bc = bitcast <2 x i16> %add.v2i16 to float
41 %fadd.bitcast = fadd float %bc, 1.0
42 store float %fadd.bitcast, float addrspace(1)* %out
46 define amdgpu_kernel void @f32_to_v2f16(<2 x half> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
47 %load = load float, float addrspace(1)* %in, align 4
48 %fadd32 = fadd float %load, 1.0
49 %bc = bitcast float %fadd32 to <2 x half>
50 %add.bitcast = fadd <2 x half> %bc, <half 2.0, half 2.0>
51 store <2 x half> %add.bitcast, <2 x half> addrspace(1)* %out
55 define amdgpu_kernel void @v2f16_to_f32(float addrspace(1)* %out, <2 x half> addrspace(1)* %in) nounwind {
56 %load = load <2 x half>, <2 x half> addrspace(1)* %in, align 4
57 %add.v2f16 = fadd <2 x half> %load, <half 2.0, half 2.0>
58 %bc = bitcast <2 x half> %add.v2f16 to float
59 %fadd.bitcast = fadd float %bc, 1.0
60 store float %fadd.bitcast, float addrspace(1)* %out
64 define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
65 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
66 %bc = bitcast <4 x i8> %load to i32
67 store i32 %bc, i32 addrspace(1)* %out, align 4
71 define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
72 %load = load i32, i32 addrspace(1)* %in, align 4
73 %bc = bitcast i32 %load to <4 x i8>
74 store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
78 ; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
80 define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
81 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
82 %add = add <2 x i32> %val, <i32 4, i32 9>
83 %bc = bitcast <2 x i32> %add to double
84 %fadd.bc = fadd double %bc, 1.0
85 store double %fadd.bc, double addrspace(1)* %out, align 8
89 ; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
91 define amdgpu_kernel void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
92 %val = load double, double addrspace(1)* %in, align 8
93 %add = fadd double %val, 4.0
94 %bc = bitcast double %add to <2 x i32>
95 store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
99 ; FUNC-LABEL: {{^}}bitcast_v2i64_to_v2f64:
100 define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, <2 x double> addrspace(1)* %out, <2 x i64> %value) {
102 %cmp0 = icmp eq i32 %cond, 0
103 br i1 %cmp0, label %if, label %end
106 %cast = bitcast <2 x i64> %value to <2 x double>
110 %phi = phi <2 x double> [zeroinitializer, %entry], [%cast, %if]
111 store <2 x double> %phi, <2 x double> addrspace(1)* %out
115 ; FUNC-LABEL: {{^}}bitcast_v2f64_to_v2i64:
116 define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, <2 x i64> addrspace(1)* %out, <2 x double> %value) {
118 %cmp0 = icmp eq i32 %cond, 0
119 br i1 %cmp0, label %if, label %end
122 %cast = bitcast <2 x double> %value to <2 x i64>
126 %phi = phi <2 x i64> [zeroinitializer, %entry], [%cast, %if]
127 store <2 x i64> %phi, <2 x i64> addrspace(1)* %out
131 ; FUNC-LABEL: {{^}}v4i16_to_f64:
132 define amdgpu_kernel void @v4i16_to_f64(double addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
133 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
134 %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
135 %bc = bitcast <4 x i16> %add.v4i16 to double
136 %fadd.bitcast = fadd double %bc, 1.0
137 store double %fadd.bitcast, double addrspace(1)* %out
141 ; FUNC-LABEL: {{^}}v4f16_to_f64:
142 define amdgpu_kernel void @v4f16_to_f64(double addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
143 %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
144 %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
145 %bc = bitcast <4 x half> %add.v4half to double
146 %fadd.bitcast = fadd double %bc, 1.0
147 store double %fadd.bitcast, double addrspace(1)* %out
151 ; FUNC-LABEL: {{^}}f64_to_v4f16:
152 define amdgpu_kernel void @f64_to_v4f16(<4 x half> addrspace(1)* %out, double addrspace(1)* %in) nounwind {
153 %load = load double, double addrspace(1)* %in, align 4
154 %fadd32 = fadd double %load, 1.0
155 %bc = bitcast double %fadd32 to <4 x half>
156 %add.bitcast = fadd <4 x half> %bc, <half 2.0, half 2.0, half 2.0, half 2.0>
157 store <4 x half> %add.bitcast, <4 x half> addrspace(1)* %out
161 ; FUNC-LABEL: {{^}}f64_to_v4i16:
162 define amdgpu_kernel void @f64_to_v4i16(<4 x i16> addrspace(1)* %out, double addrspace(1)* %in) nounwind {
163 %load = load double, double addrspace(1)* %in, align 4
164 %fadd32 = fadd double %load, 1.0
165 %bc = bitcast double %fadd32 to <4 x i16>
166 %add.bitcast = add <4 x i16> %bc, <i16 2, i16 2, i16 2, i16 2>
167 store <4 x i16> %add.bitcast, <4 x i16> addrspace(1)* %out
171 ; FUNC-LABEL: {{^}}v4i16_to_i64:
172 define amdgpu_kernel void @v4i16_to_i64(i64 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
173 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
174 %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
175 %bc = bitcast <4 x i16> %add.v4i16 to i64
176 %add.bitcast = add i64 %bc, 1
177 store i64 %add.bitcast, i64 addrspace(1)* %out
181 ; FUNC-LABEL: {{^}}v4f16_to_i64:
182 define amdgpu_kernel void @v4f16_to_i64(i64 addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
183 %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
184 %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
185 %bc = bitcast <4 x half> %add.v4half to i64
186 %add.bitcast = add i64 %bc, 1
187 store i64 %add.bitcast, i64 addrspace(1)* %out
191 ; FUNC-LABEL: {{^}}bitcast_i64_to_v4i16:
192 define amdgpu_kernel void @bitcast_i64_to_v4i16(<4 x i16> addrspace(1)* %out, i64 addrspace(1)* %in) {
193 %val = load i64, i64 addrspace(1)* %in, align 8
194 %add = add i64 %val, 4
195 %bc = bitcast i64 %add to <4 x i16>
196 %add.v4i16 = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
197 store <4 x i16> %add.v4i16, <4 x i16> addrspace(1)* %out, align 8
201 ; FUNC-LABEL: {{^}}bitcast_i64_to_v4f16:
202 define amdgpu_kernel void @bitcast_i64_to_v4f16(<4 x half> addrspace(1)* %out, i64 addrspace(1)* %in) {
203 %val = load i64, i64 addrspace(1)* %in, align 8
204 %add = add i64 %val, 4
205 %bc = bitcast i64 %add to <4 x half>
206 %add.v4i16 = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
207 store <4 x half> %add.v4i16, <4 x half> addrspace(1)* %out, align 8
211 ; FUNC-LABEL: {{^}}v4i16_to_v2f32:
212 define amdgpu_kernel void @v4i16_to_v2f32(<2 x float> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
213 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
214 %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
215 %bc = bitcast <4 x i16> %add.v4i16 to <2 x float>
216 %fadd.bitcast = fadd <2 x float> %bc, <float 1.0, float 1.0>
217 store <2 x float> %fadd.bitcast, <2 x float> addrspace(1)* %out
221 ; FUNC-LABEL: {{^}}v4f16_to_v2f32:
222 define amdgpu_kernel void @v4f16_to_v2f32(<2 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
223 %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
224 %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
225 %bc = bitcast <4 x half> %add.v4half to <2 x float>
226 %fadd.bitcast = fadd <2 x float> %bc, <float 1.0, float 1.0>
227 store <2 x float> %fadd.bitcast, <2 x float> addrspace(1)* %out
231 ; FUNC-LABEL: {{^}}v2f32_to_v4i16:
232 define amdgpu_kernel void @v2f32_to_v4i16(<4 x i16> addrspace(1)* %out, <2 x float> addrspace(1)* %in) nounwind {
233 %load = load <2 x float>, <2 x float> addrspace(1)* %in, align 4
234 %add.v2f32 = fadd <2 x float> %load, <float 2.0, float 4.0>
235 %bc = bitcast <2 x float> %add.v2f32 to <4 x i16>
236 %add.bitcast = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
237 store <4 x i16> %add.bitcast, <4 x i16> addrspace(1)* %out
241 ; FUNC-LABEL: {{^}}v2f32_to_v4f16:
242 define amdgpu_kernel void @v2f32_to_v4f16(<4 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) nounwind {
243 %load = load <2 x float>, <2 x float> addrspace(1)* %in, align 4
244 %add.v2f32 = fadd <2 x float> %load, <float 2.0, float 4.0>
245 %bc = bitcast <2 x float> %add.v2f32 to <4 x half>
246 %add.bitcast = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
247 store <4 x half> %add.bitcast, <4 x half> addrspace(1)* %out
251 ; FUNC-LABEL: {{^}}v4i16_to_v2i32:
252 define amdgpu_kernel void @v4i16_to_v2i32(<2 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
253 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
254 %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
255 %bc = bitcast <4 x i16> %add.v4i16 to <2 x i32>
256 %add.bitcast = add <2 x i32> %bc, <i32 1, i32 1>
257 store <2 x i32> %add.bitcast, <2 x i32> addrspace(1)* %out
261 ; FUNC-LABEL: {{^}}v4f16_to_v2i32:
262 define amdgpu_kernel void @v4f16_to_v2i32(<2 x i32> addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
263 %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
264 %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
265 %bc = bitcast <4 x half> %add.v4half to <2 x i32>
266 %add.bitcast = add <2 x i32> %bc, <i32 1, i32 1>
267 store <2 x i32> %add.bitcast, <2 x i32> addrspace(1)* %out
271 ; FUNC-LABEL: {{^}}v2i32_to_v4i16:
272 define amdgpu_kernel void @v2i32_to_v4i16(<4 x i16> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
273 %load = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 4
274 %add.v2i32 = add <2 x i32> %load, <i32 2, i32 4>
275 %bc = bitcast <2 x i32> %add.v2i32 to <4 x i16>
276 %add.bitcast = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
277 store <4 x i16> %add.bitcast, <4 x i16> addrspace(1)* %out
281 ; FUNC-LABEL: {{^}}v2i32_to_v4f16:
282 define amdgpu_kernel void @v2i32_to_v4f16(<4 x half> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
283 %load = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 4
284 %add.v2i32 = add <2 x i32> %load, <i32 2, i32 4>
285 %bc = bitcast <2 x i32> %add.v2i32 to <4 x half>
286 %add.bitcast = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
287 store <4 x half> %add.bitcast, <4 x half> addrspace(1)* %out
291 declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
293 ; FUNC-LABEL: {{^}}bitcast_v4f32_to_v2i64:
294 ; GCN: s_buffer_load_dwordx4
295 define <2 x i64> @bitcast_v4f32_to_v2i64(<2 x i64> %arg) {
296 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> undef, i32 0, i32 0)
297 %cast = bitcast <4 x float> %val to <2 x i64>
298 %div = udiv <2 x i64> %cast, %arg
302 declare half @llvm.canonicalize.f16(half)
304 ; FUNC-LABEL: {{^}}bitcast_f32_to_v1i32:
305 define amdgpu_kernel void @bitcast_f32_to_v1i32(i32 addrspace(1)* %out) {
306 %f16 = call arcp afn half @llvm.canonicalize.f16(half 0xH03F0)
307 %f32 = fpext half %f16 to float
308 %v = bitcast float %f32 to <1 x i32>
309 %v1 = extractelement <1 x i32> %v, i32 0
310 store i32 %v1, i32 addrspace(1)* %out