1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-SAFE,GCN,FUNC %s
2 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NONAN,GCN-NONAN,GCN,FUNC %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN,FUNC %s
5 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NONAN,GCN-NONAN,GCN,FUNC %s
7 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope --check-prefixes=EG,FUNC %s
9 declare i32 @llvm.amdgcn.workitem.id.x() #1
11 ; The two inputs to the instruction are different SGPRs from the same
12 ; super register, so we can't fold both SGPR operands even though they
13 ; are both the same register.
15 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_subreg_inputs_f32:
17 ; SI-SAFE: v_min_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
19 ; SI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
21 ; VI-SAFE: v_cmp_nlt_f32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
23 ; VI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
24 define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(ptr addrspace(1) %out, <4 x float> %reg0) #0 {
25 %r0 = extractelement <4 x float> %reg0, i32 0
26 %r1 = extractelement <4 x float> %reg0, i32 1
27 %r2 = fcmp uge float %r0, %r1
28 %r3 = select i1 %r2, float %r1, float %r0
29 store float %r3, ptr addrspace(1) %out
33 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
34 ; GCN-DAG: s_load_dwordx4 s[[[#LOAD:]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, {{0x9|0x24}}
36 ; SI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[#LOAD + 2]]
38 ; GCN-NONAN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[#LOAD + 3]]
40 ; VI-SAFE: v_mov_b32_e32 [[VB:v[0-9]+]], s[[#LOAD + 3]]
42 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, s[[#LOAD + 3]], [[VA]]
44 ; VI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[#LOAD + 2]]
45 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, s[[#LOAD + 2]], [[VB]]
46 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[VB]], [[VA]]
48 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, s[[#LOAD + 2]], [[VB]]
49 define amdgpu_kernel void @s_test_fmin_legacy_ule_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
50 %cmp = fcmp ule float %a, %b
51 %val = select i1 %cmp, float %a, float %b
52 store float %val, ptr addrspace(1) %out, align 4
57 ; FIXME: Should separate tests
58 ; GCN-LABEL: {{^}}s_test_fmin_legacy_ule_f32_nnan_src:
59 ; GCN: s_load_dwordx4 s[[[#LOAD:]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, {{0x9|0x24}}
61 ; GCN-DAG: v_add_f32_e64 [[ADD_A:v[0-9]+]], s[[#LOAD + 2]], 1.0
62 ; GCN-DAG: v_add_f32_e64 [[ADD_B:v[0-9]+]], s[[#LOAD + 3]], 2.0
64 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
66 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
67 ; VI-SAFE: v_cndmask_b32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]], vcc
69 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
70 define amdgpu_kernel void @s_test_fmin_legacy_ule_f32_nnan_src(ptr addrspace(1) %out, float %a, float %b) #0 {
71 %a.nnan = fadd nnan float %a, 1.0
72 %b.nnan = fadd nnan float %b, 2.0
73 %cmp = fcmp ule float %a.nnan, %b.nnan
74 %val = select i1 %cmp, float %a.nnan, float %b.nnan
75 store float %val, ptr addrspace(1) %out, align 4
79 ; FUNC-LABEL: {{^}}test_fmin_legacy_ule_f32:
80 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
81 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
83 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
85 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, [[A]], [[B]]
86 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
88 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
89 define amdgpu_kernel void @test_fmin_legacy_ule_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
90 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
91 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
92 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
94 %a = load volatile float, ptr addrspace(1) %gep.0, align 4
95 %b = load volatile float, ptr addrspace(1) %gep.1, align 4
97 %cmp = fcmp ule float %a, %b
98 %val = select i1 %cmp, float %a, float %b
99 store float %val, ptr addrspace(1) %out, align 4
103 ; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32:
104 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
105 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
107 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
109 ; VI-SAFE: v_cmp_le_f32_e32 vcc, [[A]], [[B]]
110 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
112 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
113 define amdgpu_kernel void @test_fmin_legacy_ole_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
114 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
115 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
116 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
118 %a = load volatile float, ptr addrspace(1) %gep.0, align 4
119 %b = load volatile float, ptr addrspace(1) %gep.1, align 4
121 %cmp = fcmp ole float %a, %b
122 %val = select i1 %cmp, float %a, float %b
123 store float %val, ptr addrspace(1) %out, align 4
127 ; FUNC-LABEL: {{^}}test_fmin_legacy_olt_f32:
128 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
129 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
131 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
133 ; VI-SAFE: v_cmp_lt_f32_e32 vcc, [[A]], [[B]]
134 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
136 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
137 define amdgpu_kernel void @test_fmin_legacy_olt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
138 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
139 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
140 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
142 %a = load volatile float, ptr addrspace(1) %gep.0, align 4
143 %b = load volatile float, ptr addrspace(1) %gep.1, align 4
145 %cmp = fcmp olt float %a, %b
146 %val = select i1 %cmp, float %a, float %b
147 store float %val, ptr addrspace(1) %out, align 4
151 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_f32:
152 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
153 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
155 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
157 ; VI-SAFE: v_cmp_nge_f32_e32 vcc, [[A]], [[B]]
158 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
160 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
161 define amdgpu_kernel void @test_fmin_legacy_ult_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
162 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
163 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
164 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
166 %a = load volatile float, ptr addrspace(1) %gep.0, align 4
167 %b = load volatile float, ptr addrspace(1) %gep.1, align 4
169 %cmp = fcmp ult float %a, %b
170 %val = select i1 %cmp, float %a, float %b
171 store float %val, ptr addrspace(1) %out, align 4
175 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32:
176 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
177 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
179 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
181 ; VI-SAFE: v_cmp_nge_f32_e32 vcc, [[A]], [[B]]
182 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
184 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
185 define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
186 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
187 %gep.0 = getelementptr <1 x float>, ptr addrspace(1) %in, i32 %tid
188 %gep.1 = getelementptr <1 x float>, ptr addrspace(1) %gep.0, i32 1
190 %a = load volatile <1 x float>, ptr addrspace(1) %gep.0
191 %b = load volatile <1 x float>, ptr addrspace(1) %gep.1
193 %cmp = fcmp ult <1 x float> %a, %b
194 %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
195 store <1 x float> %val, ptr addrspace(1) %out
199 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32:
200 ; GCN: {{buffer|flat}}_load_dwordx2
201 ; GCN: {{buffer|flat}}_load_dwordx2
202 ; SI-SAFE: v_min_legacy_f32_e32
203 ; SI-SAFE: v_min_legacy_f32_e32
205 ; VI-SAFE: v_cmp_nge_f32_e32
206 ; VI-SAFE: v_cndmask_b32_e32
207 ; VI-SAFE: v_cmp_nge_f32_e32
208 ; VI-SAFE: v_cndmask_b32_e32
210 ; GCN-NONAN: v_min_f32_e32
211 ; GCN-NONAN: v_min_f32_e32
212 define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
213 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
214 %gep.0 = getelementptr <2 x float>, ptr addrspace(1) %in, i32 %tid
215 %gep.1 = getelementptr <2 x float>, ptr addrspace(1) %gep.0, i32 1
217 %a = load volatile <2 x float>, ptr addrspace(1) %gep.0
218 %b = load volatile <2 x float>, ptr addrspace(1) %gep.1
220 %cmp = fcmp ult <2 x float> %a, %b
221 %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
222 store <2 x float> %val, ptr addrspace(1) %out
226 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32:
227 ; SI-SAFE: v_min_legacy_f32_e32
228 ; SI-SAFE: v_min_legacy_f32_e32
229 ; SI-SAFE: v_min_legacy_f32_e32
230 ; SI-SAFE-NOT: v_min_
232 ; VI-SAFE: v_cmp_nge_f32_e32
233 ; VI-SAFE: v_cndmask_b32_e32
234 ; VI-SAFE: v_cmp_nge_f32_e32
235 ; VI-SAFE: v_cndmask_b32_e32
236 ; VI-SAFE: v_cmp_nge_f32_e32
237 ; VI-SAFE: v_cndmask_b32_e32
241 ; GCN-NONAN: v_min_f32_e32
242 ; GCN-NONAN: v_min_f32_e32
243 ; GCN-NONAN: v_min_f32_e32
244 ; GCN-NONAN-NOT: v_min_
245 define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
246 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
247 %gep.0 = getelementptr <3 x float>, ptr addrspace(1) %in, i32 %tid
248 %gep.1 = getelementptr <3 x float>, ptr addrspace(1) %gep.0, i32 1
250 %a = load <3 x float>, ptr addrspace(1) %gep.0
251 %b = load <3 x float>, ptr addrspace(1) %gep.1
253 %cmp = fcmp ult <3 x float> %a, %b
254 %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
255 store <3 x float> %val, ptr addrspace(1) %out
259 ; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32_multi_use:
260 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
261 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
264 ; GCN-NEXT: v_cndmask_b32
267 define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #0 {
268 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
269 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
270 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
272 %a = load volatile float, ptr addrspace(1) %gep.0, align 4
273 %b = load volatile float, ptr addrspace(1) %gep.1, align 4
275 %cmp = fcmp ole float %a, %b
276 %val0 = select i1 %cmp, float %a, float %b
277 store float %val0, ptr addrspace(1) %out0, align 4
278 store i1 %cmp, ptr addrspace(1) %out1
282 attributes #0 = { nounwind }
283 attributes #1 = { nounwind readnone }