1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
4 define amdgpu_ps void @test_export_zeroes_f32() #0 {
5 ; GCN-LABEL: test_export_zeroes_f32:
7 ; GCN-NEXT: s_setprio 2
8 ; GCN-NEXT: v_mov_b32_e32 v0, 0
9 ; GCN-NEXT: exp mrt0 off, off, off, off
10 ; GCN-NEXT: exp mrt0 off, off, off, off done
11 ; GCN-NEXT: s_setprio 0
15 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
16 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
20 define amdgpu_ps void @test_export_en_src0_f32() #0 {
21 ; GCN-LABEL: test_export_en_src0_f32:
23 ; GCN-NEXT: s_setprio 2
24 ; GCN-NEXT: v_mov_b32_e32 v0, 4.0
25 ; GCN-NEXT: v_mov_b32_e32 v1, 0.5
26 ; GCN-NEXT: v_mov_b32_e32 v2, 2.0
27 ; GCN-NEXT: v_mov_b32_e32 v3, 1.0
28 ; GCN-NEXT: exp mrt0 v3, off, off, off done
29 ; GCN-NEXT: s_setprio 0
33 call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
37 define amdgpu_gs void @test_export_gs() #0 {
38 ; GCN-LABEL: test_export_gs:
40 ; GCN-NEXT: s_setprio 2
41 ; GCN-NEXT: v_mov_b32_e32 v0, 4.0
42 ; GCN-NEXT: v_mov_b32_e32 v1, 0.5
43 ; GCN-NEXT: v_mov_b32_e32 v2, 2.0
44 ; GCN-NEXT: v_mov_b32_e32 v3, 1.0
45 ; GCN-NEXT: exp mrt0 off, v2, off, off done
46 ; GCN-NEXT: s_setprio 0
50 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
54 define amdgpu_hs void @test_export_hs() #0 {
55 ; GCN-LABEL: test_export_hs:
57 ; GCN-NEXT: s_setprio 2
58 ; GCN-NEXT: v_mov_b32_e32 v0, 4.0
59 ; GCN-NEXT: v_mov_b32_e32 v1, 0.5
60 ; GCN-NEXT: v_mov_b32_e32 v2, 2.0
61 ; GCN-NEXT: v_mov_b32_e32 v3, 1.0
62 ; GCN-NEXT: exp mrt0 off, v2, off, off done
63 ; GCN-NEXT: s_setprio 0
67 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
71 define amdgpu_gfx void @test_export_gfx(float %v) #0 {
72 ; GCN-LABEL: test_export_gfx:
74 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GCN-NEXT: v_mov_b32_e32 v1, 4.0
76 ; GCN-NEXT: v_mov_b32_e32 v2, 0.5
77 ; GCN-NEXT: v_mov_b32_e32 v3, 2.0
78 ; GCN-NEXT: exp mrt0 off, v3, off, off done
79 ; GCN-NEXT: s_setprio 0
80 ; GCN-NEXT: s_waitcnt_expcnt null, 0x0
83 ; GCN-NEXT: s_setprio 2
84 ; GCN-NEXT: s_waitcnt expcnt(0)
85 ; GCN-NEXT: s_setpc_b64 s[30:31]
86 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
90 define amdgpu_cs void @test_export_cs() #0 {
91 ; GCN-LABEL: test_export_cs:
93 ; GCN-NEXT: v_mov_b32_e32 v0, 4.0
94 ; GCN-NEXT: v_mov_b32_e32 v1, 0.5
95 ; GCN-NEXT: v_mov_b32_e32 v2, 2.0
96 ; GCN-NEXT: v_mov_b32_e32 v3, 1.0
97 ; GCN-NEXT: exp mrt0 off, v2, off, off done
99 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
103 define amdgpu_kernel void @test_export_kernel() #0 {
104 ; GCN-LABEL: test_export_kernel:
106 ; GCN-NEXT: v_mov_b32_e32 v0, 4.0
107 ; GCN-NEXT: v_mov_b32_e32 v1, 0.5
108 ; GCN-NEXT: v_mov_b32_e32 v2, 2.0
109 ; GCN-NEXT: v_mov_b32_e32 v3, 1.0
110 ; GCN-NEXT: exp mrt0 off, v2, off, off done
112 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
116 define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
117 ; GCN-LABEL: test_no_export_gfx:
119 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; GCN-NEXT: s_setpc_b64 s[30:31]
124 define amdgpu_ps void @test_no_export_ps(float %v) #0 {
125 ; GCN-LABEL: test_no_export_ps:
131 define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
132 ; GCN-LABEL: test_if_export_f32:
134 ; GCN-NEXT: s_setprio 2
135 ; GCN-NEXT: s_mov_b32 s0, exec_lo
136 ; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
137 ; GCN-NEXT: s_cbranch_execz .LBB9_2
138 ; GCN-NEXT: ; %bb.1: ; %exp
139 ; GCN-NEXT: exp mrt0 v1, v2, v3, v4
140 ; GCN-NEXT: s_setprio 0
141 ; GCN-NEXT: s_waitcnt_expcnt null, 0x0
144 ; GCN-NEXT: s_setprio 2
145 ; GCN-NEXT: .LBB9_2: ; %end
147 %cc = icmp eq i32 %flag, 0
148 br i1 %cc, label %end, label %exp
151 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
158 define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
159 ; GCN-LABEL: test_if_export_vm_f32:
161 ; GCN-NEXT: s_setprio 2
162 ; GCN-NEXT: s_mov_b32 s0, exec_lo
163 ; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
164 ; GCN-NEXT: s_cbranch_execz .LBB10_2
165 ; GCN-NEXT: ; %bb.1: ; %exp
166 ; GCN-NEXT: exp mrt0 v1, v2, v3, v4
167 ; GCN-NEXT: s_setprio 0
168 ; GCN-NEXT: s_waitcnt_expcnt null, 0x0
171 ; GCN-NEXT: s_setprio 2
172 ; GCN-NEXT: .LBB10_2: ; %end
174 %cc = icmp eq i32 %flag, 0
175 br i1 %cc, label %end, label %exp
178 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
185 define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
186 ; GCN-LABEL: test_if_export_done_f32:
188 ; GCN-NEXT: s_setprio 2
189 ; GCN-NEXT: s_mov_b32 s0, exec_lo
190 ; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
191 ; GCN-NEXT: s_cbranch_execz .LBB11_2
192 ; GCN-NEXT: ; %bb.1: ; %exp
193 ; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
194 ; GCN-NEXT: s_setprio 0
195 ; GCN-NEXT: s_waitcnt_expcnt null, 0x0
198 ; GCN-NEXT: s_setprio 2
199 ; GCN-NEXT: .LBB11_2: ; %end
201 %cc = icmp eq i32 %flag, 0
202 br i1 %cc, label %end, label %exp
205 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
212 define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
213 ; GCN-LABEL: test_if_export_vm_done_f32:
215 ; GCN-NEXT: s_setprio 2
216 ; GCN-NEXT: s_mov_b32 s0, exec_lo
217 ; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
218 ; GCN-NEXT: s_cbranch_execz .LBB12_2
219 ; GCN-NEXT: ; %bb.1: ; %exp
220 ; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
221 ; GCN-NEXT: s_setprio 0
222 ; GCN-NEXT: s_waitcnt_expcnt null, 0x0
225 ; GCN-NEXT: s_setprio 2
226 ; GCN-NEXT: .LBB12_2: ; %end
228 %cc = icmp eq i32 %flag, 0
229 br i1 %cc, label %end, label %exp
232 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
239 define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
240 ; GCN-LABEL: test_export_pos_before_param_across_load:
242 ; GCN-NEXT: s_setprio 2
243 ; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
244 ; GCN-NEXT: v_mov_b32_e32 v1, 0
245 ; GCN-NEXT: v_mov_b32_e32 v2, 1.0
246 ; GCN-NEXT: v_mov_b32_e32 v3, 0.5
247 ; GCN-NEXT: s_waitcnt vmcnt(0)
248 ; GCN-NEXT: exp pos0 v1, v1, v1, v0 done
249 ; GCN-NEXT: exp invalid_target_32 v2, v2, v2, v2
250 ; GCN-NEXT: exp invalid_target_33 v2, v2, v2, v3
251 ; GCN-NEXT: s_setprio 0
255 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
256 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
257 %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
258 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
262 define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
263 ; GCN-LABEL: test_export_across_store_load:
265 ; GCN-NEXT: s_setprio 2
266 ; GCN-NEXT: v_mov_b32_e32 v2, 16
267 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
268 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
269 ; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
270 ; GCN-NEXT: v_mov_b32_e32 v2, 0
271 ; GCN-NEXT: scratch_store_b32 v0, v1, off
272 ; GCN-NEXT: scratch_load_b32 v0, off, off
273 ; GCN-NEXT: v_mov_b32_e32 v1, 1.0
274 ; GCN-NEXT: exp pos0 v2, v2, v2, v1 done
275 ; GCN-NEXT: s_setprio 0
276 ; GCN-NEXT: s_waitcnt_expcnt null, 0x0
279 ; GCN-NEXT: s_setprio 2
280 ; GCN-NEXT: s_waitcnt vmcnt(0)
281 ; GCN-NEXT: exp invalid_target_32 v0, v2, v1, v2
282 ; GCN-NEXT: exp invalid_target_33 v0, v2, v1, v2
283 ; GCN-NEXT: s_setprio 0
287 %data0 = alloca <4 x float>, align 8, addrspace(5)
288 %data1 = alloca <4 x float>, align 8, addrspace(5)
289 %cmp = icmp eq i32 %idx, 1
290 %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
291 store float %v, ptr addrspace(5) %data, align 8
292 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
293 %load0 = load float, ptr addrspace(5) %data0, align 8
294 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
295 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
299 define amdgpu_ps void @test_export_in_callee(float %v) #0 {
300 ; GCN-LABEL: test_export_in_callee:
302 ; GCN-NEXT: s_setprio 2
303 ; GCN-NEXT: s_getpc_b64 s[0:1]
304 ; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
305 ; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
306 ; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
307 ; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
308 ; GCN-NEXT: s_mov_b32 s32, 0
309 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
310 ; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
312 %x = fadd float %v, 1.0
313 call void @test_export_gfx(float %x)
317 define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
318 ; GCN-LABEL: test_export_in_callee_prio:
320 ; GCN-NEXT: s_setprio 2
321 ; GCN-NEXT: s_mov_b32 s32, 0
322 ; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
323 ; GCN-NEXT: s_setprio 2
324 ; GCN-NEXT: s_getpc_b64 s[0:1]
325 ; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
326 ; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
327 ; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
328 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
329 ; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
331 %x = fadd float %v, 1.0
332 call void @llvm.amdgcn.s.setprio(i16 0)
333 call void @test_export_gfx(float %x)
337 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
338 declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
339 declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
340 declare void @llvm.amdgcn.s.setprio(i16)
342 attributes #0 = { nounwind }
343 attributes #1 = { nounwind inaccessiblememonly }
344 attributes #2 = { nounwind readnone }