1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN-SAFE,SI-SAFE,GCN,FUNC %s
2 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NONAN,GCN-NONAN,GCN,FUNC %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN-SAFE,GCN,FUNC %s
5 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NONAN,GCN-NONAN,GCN,FUNC %s
7 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
9 declare i32 @llvm.r600.read.tidig.x() #1
11 ; The two inputs to the instruction are different SGPRs from the same
12 ; super register, so we can't fold both SGPR operands even though they
13 ; are both the same register.
15 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_subreg_inputs_f32:
17 ; SI-SAFE: v_min_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
19 ; SI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
21 ; VI-SAFE: v_cmp_nlt_f32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
23 ; VI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
24 define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(float addrspace(1)* %out, <4 x float> %reg0) #0 {
25 %r0 = extractelement <4 x float> %reg0, i32 0
26 %r1 = extractelement <4 x float> %reg0, i32 1
27 %r2 = fcmp uge float %r0, %r1
28 %r3 = select i1 %r2, float %r1, float %r0
29 store float %r3, float addrspace(1)* %out
33 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
34 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
36 ; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
38 ; SI-SAFE: v_min_legacy_f32_e64 {{v[0-9]+}}, [[VB]], s[[A]]
40 ; VI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
41 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, s[[A]], [[VB]]
42 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[VB]], [[VA]]
44 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, s[[A]], [[VB]]
45 define amdgpu_kernel void @s_test_fmin_legacy_ule_f32(float addrspace(1)* %out, float %a, float %b) #0 {
46 %cmp = fcmp ule float %a, %b
47 %val = select i1 %cmp, float %a, float %b
48 store float %val, float addrspace(1)* %out, align 4
53 ; FIXME: Should separate tests
54 ; GCN-LABEL: {{^}}s_test_fmin_legacy_ule_f32_nnan_src:
55 ; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
57 ; GCN-DAG: v_add_f32_e64 [[ADD_A:v[0-9]+]], s[[A]], 1.0
58 ; GCN-DAG: v_add_f32_e64 [[ADD_B:v[0-9]+]], s[[B]], 2.0
60 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
62 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
63 ; VI-SAFE: v_cndmask_b32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]], vcc
65 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
66 define amdgpu_kernel void @s_test_fmin_legacy_ule_f32_nnan_src(float addrspace(1)* %out, float %a, float %b) #0 {
67 %a.nnan = fadd nnan float %a, 1.0
68 %b.nnan = fadd nnan float %b, 2.0
69 %cmp = fcmp ule float %a.nnan, %b.nnan
70 %val = select i1 %cmp, float %a.nnan, float %b.nnan
71 store float %val, float addrspace(1)* %out, align 4
75 ; FUNC-LABEL: {{^}}test_fmin_legacy_ule_f32:
76 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
77 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
79 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
81 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, [[A]], [[B]]
82 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
84 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
85 define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
86 %tid = call i32 @llvm.r600.read.tidig.x() #1
87 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
88 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
90 %a = load volatile float, float addrspace(1)* %gep.0, align 4
91 %b = load volatile float, float addrspace(1)* %gep.1, align 4
93 %cmp = fcmp ule float %a, %b
94 %val = select i1 %cmp, float %a, float %b
95 store float %val, float addrspace(1)* %out, align 4
99 ; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32:
100 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
101 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
103 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
105 ; VI-SAFE v_cmp_le_f32_e32 vcc, [[A]], [[B]]
106 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
108 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
109 define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
110 %tid = call i32 @llvm.r600.read.tidig.x() #1
111 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
112 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
114 %a = load volatile float, float addrspace(1)* %gep.0, align 4
115 %b = load volatile float, float addrspace(1)* %gep.1, align 4
117 %cmp = fcmp ole float %a, %b
118 %val = select i1 %cmp, float %a, float %b
119 store float %val, float addrspace(1)* %out, align 4
123 ; FUNC-LABEL: {{^}}test_fmin_legacy_olt_f32:
124 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
125 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
127 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
129 ; VI-SAFE v_cmp_lt_f32_e32 vcc, [[A]], [[B]]
130 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
132 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
133 define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
134 %tid = call i32 @llvm.r600.read.tidig.x() #1
135 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
136 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
138 %a = load volatile float, float addrspace(1)* %gep.0, align 4
139 %b = load volatile float, float addrspace(1)* %gep.1, align 4
141 %cmp = fcmp olt float %a, %b
142 %val = select i1 %cmp, float %a, float %b
143 store float %val, float addrspace(1)* %out, align 4
147 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_f32:
148 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
149 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
151 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
153 ; VI-SAFE v_cmp_lt_f32_e32 vcc, [[A]], [[B]]
154 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
156 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
157 define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
158 %tid = call i32 @llvm.r600.read.tidig.x() #1
159 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
160 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
162 %a = load volatile float, float addrspace(1)* %gep.0, align 4
163 %b = load volatile float, float addrspace(1)* %gep.1, align 4
165 %cmp = fcmp ult float %a, %b
166 %val = select i1 %cmp, float %a, float %b
167 store float %val, float addrspace(1)* %out, align 4
171 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32:
172 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
173 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
175 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
177 ; VI-SAFE v_cmp_lt_f32_e32 vcc, [[A]], [[B]]
178 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
180 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
181 define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
182 %tid = call i32 @llvm.r600.read.tidig.x() #1
183 %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
184 %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
186 %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
187 %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
189 %cmp = fcmp ult <1 x float> %a, %b
190 %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
191 store <1 x float> %val, <1 x float> addrspace(1)* %out
195 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32:
196 ; GCN: {{buffer|flat}}_load_dwordx2
197 ; GCN: {{buffer|flat}}_load_dwordx2
198 ; SI-SAFE: v_min_legacy_f32_e32
199 ; SI-SAFE: v_min_legacy_f32_e32
201 ; VI-SAFE v_cmp_lt_f32_e32
202 ; VI-SAFE: v_cndmask_b32_e32
203 ; VI-SAFE v_cmp_lt_f32_e32
204 ; VI-SAFE: v_cndmask_b32_e32
206 ; GCN-NONAN: v_min_f32_e32
207 ; GCN-NONAN: v_min_f32_e32
208 define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
209 %tid = call i32 @llvm.r600.read.tidig.x() #1
210 %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
211 %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
213 %a = load <2 x float>, <2 x float> addrspace(1)* %gep.0
214 %b = load <2 x float>, <2 x float> addrspace(1)* %gep.1
216 %cmp = fcmp ult <2 x float> %a, %b
217 %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
218 store <2 x float> %val, <2 x float> addrspace(1)* %out
222 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32:
223 ; SI-SAFE: v_min_legacy_f32_e32
224 ; SI-SAFE: v_min_legacy_f32_e32
225 ; SI-SAFE: v_min_legacy_f32_e32
226 ; SI-SAFE-NOT: v_min_
228 ; VI-SAFE: v_cmp_nge_f32_e32
229 ; VI-SAFE: v_cndmask_b32_e32
230 ; VI-SAFE: v_cmp_nge_f32_e32
231 ; VI-SAFE: v_cndmask_b32_e32
232 ; VI-SAFE: v_cmp_nge_f32_e32
233 ; VI-SAFE: v_cndmask_b32_e32
237 ; GCN-NONAN: v_min_f32_e32
238 ; GCN-NONAN: v_min_f32_e32
239 ; GCN-NONAN: v_min_f32_e32
240 ; GCN-NONAN-NOT: v_min_
241 define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
242 %tid = call i32 @llvm.r600.read.tidig.x() #1
243 %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
244 %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
246 %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
247 %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
249 %cmp = fcmp ult <3 x float> %a, %b
250 %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
251 store <3 x float> %val, <3 x float> addrspace(1)* %out
255 ; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32_multi_use:
256 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
257 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
260 ; GCN-NEXT: v_cndmask_b32
263 define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
264 %tid = call i32 @llvm.r600.read.tidig.x() #1
265 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
266 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
268 %a = load volatile float, float addrspace(1)* %gep.0, align 4
269 %b = load volatile float, float addrspace(1)* %gep.1, align 4
271 %cmp = fcmp ole float %a, %b
272 %val0 = select i1 %cmp, float %a, float %b
273 store float %val0, float addrspace(1)* %out0, align 4
274 store i1 %cmp, i1 addrspace(1)* %out1
278 attributes #0 = { nounwind }
279 attributes #1 = { nounwind readnone }