1 ; RUN: llc -mtriple=amdgcn -amdgpu-set-wave-priority=true -o - %s | \
4 ; CHECK-LABEL: no_setprio:
6 ; CHECK: ; return to shader part epilog
7 define amdgpu_ps <2 x float> @no_setprio() {
8 ret <2 x float> <float 0.0, float 0.0>
11 ; CHECK-LABEL: vmem_in_exit_block:
13 ; CHECK: buffer_load_dwordx2
14 ; CHECK-NEXT: s_setprio 0
15 ; CHECK: ; return to shader part epilog
16 define amdgpu_ps <2 x float> @vmem_in_exit_block(<4 x i32> inreg %p) {
17 %v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0)
21 ; CHECK-LABEL: branch:
23 ; CHECK: s_cbranch_scc0 [[A:.*]]
25 ; CHECK: buffer_load_dwordx2
26 ; CHECK-NEXT: s_setprio 0
27 ; CHECK: s_branch [[EXIT:.*]]
29 ; CHECK-NEXT: s_setprio 0
30 ; CHECK: s_branch [[EXIT]]
31 ; CHECK-NEXT: [[EXIT]]:
32 define amdgpu_ps <2 x float> @branch(<4 x i32> inreg %p, i32 inreg %i) {
33 %cond = icmp eq i32 %i, 0
34 br i1 %cond, label %a, label %b
37 ret <2 x float> <float 0.0, float 0.0>
40 %v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0)
44 ; CHECK-LABEL: setprio_follows_setprio:
46 ; CHECK: buffer_load_dwordx2
47 ; CHECK: s_cbranch_scc1 [[C:.*]]
49 ; CHECK: buffer_load_dwordx2
50 ; CHECK-NEXT: s_setprio 0
51 ; CHECK: s_cbranch_scc1 [[C]]
53 ; CHECK-NOT: s_setprio
54 ; CHECK: s_branch [[EXIT:.*]]
56 ; CHECK-NEXT: s_setprio 0
57 ; CHECK: s_branch [[EXIT]]
59 define amdgpu_ps <2 x float> @setprio_follows_setprio(<4 x i32> inreg %p, i32 inreg %i) {
61 %v1 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0)
62 %cond1 = icmp ne i32 %i, 0
63 br i1 %cond1, label %a, label %c
66 %v2 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 1, i32 0)
67 %cond2 = icmp ne i32 %i, 1
68 br i1 %cond2, label %b, label %c
74 %v3 = phi <2 x float> [%v1, %entry], [%v2, %a]
75 %v4 = fadd <2 x float> %v1, %v3
80 ; CHECK: {{.*}}: ; %entry
82 ; CHECK-NOT: s_setprio
83 ; CHECK: [[LOOP:.*]]: ; %loop
84 ; CHECK-NOT: s_setprio
85 ; CHECK: buffer_load_dwordx2
86 ; CHECK-NOT: s_setprio
87 ; CHECK: s_cbranch_scc1 [[LOOP]]
88 ; CHECK-NEXT: {{.*}}: ; %exit
89 ; CHECK-NEXT: s_setprio 0
90 define amdgpu_ps <2 x float> @loop(<4 x i32> inreg %p) {
95 %i = phi i32 [0, %entry], [%i2, %loop]
96 %sum = phi <2 x float> [<float 0.0, float 0.0>, %entry], [%sum2, %loop]
100 %v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 %i, i32 0, i32 0, i32 0)
101 %sum2 = fadd <2 x float> %sum, %v
103 %cond = icmp ult i32 %i2, 5
104 br i1 %cond, label %loop, label %exit
107 ret <2 x float> %sum2
110 ; CHECK-LABEL: edge_split:
112 ; CHECK: buffer_load_dwordx2
113 ; CHECK-NOT: s_setprio
114 ; CHECK: s_cbranch_scc1 [[ANOTHER_LOAD:.*]]
115 ; CHECK: {{.*}}: ; %loop.preheader
116 ; CHECK-NEXT: s_setprio 0
117 ; CHECK: [[LOOP:.*]]: ; %loop
118 ; CHECK-NOT: s_setprio
119 ; CHECK: s_cbranch_scc1 [[LOOP]]
120 ; CHECK {{.*}}: ; %exit
121 ; CHECK-NOT: s_setprio
122 ; CHECK: s_branch [[RET:.*]]
123 ; CHECK: [[ANOTHER_LOAD]]: ; %another_load
124 ; CHECK: buffer_load_dwordx2
125 ; CHECK-NEXT: s_setprio 0
126 ; CHECK: s_branch [[RET]]
128 define amdgpu_ps <2 x float> @edge_split(<4 x i32> inreg %p, i32 inreg %x) {
130 %v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0)
131 %cond = icmp ne i32 %x, 0
132 br i1 %cond, label %loop, label %another_load
135 %i = phi i32 [0, %entry], [%i2, %loop]
136 %mul = phi <2 x float> [%v, %entry], [%mul2, %loop]
139 %mul2 = fmul <2 x float> %mul, %v
141 %cond2 = icmp ult i32 %i2, 5
142 br i1 %cond2, label %loop, label %exit
145 ret <2 x float> %mul2
148 %v2 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 1, i32 0)
149 %sum = fadd <2 x float> %v, %v2
153 declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) nounwind