1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN-SAFE,SI-SAFE,GCN,FUNC %s
2 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NONAN,GCN-NONAN,GCN,FUNC %s
4 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN-SAFE,GCN,FUNC %s
5 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NONAN,GCN-NONAN,GCN,FUNC %s
7 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
9 declare i32 @llvm.r600.read.tidig.x() #1
11 ; The two inputs to the instruction are different SGPRs from the same
12 ; super register, so we can't fold both SGPR operands even though they
13 ; are both the same register.
15 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_subreg_inputs_f32:
17 ; SI-SAFE: v_min_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
19 ; SI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
21 ; VI-SAFE: v_cmp_nlt_f32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
23 ; VI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
24 define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(float addrspace(1)* %out, <4 x float> %reg0) #0 {
25 %r0 = extractelement <4 x float> %reg0, i32 0
26 %r1 = extractelement <4 x float> %reg0, i32 1
27 %r2 = fcmp uge float %r0, %r1
28 %r3 = select i1 %r2, float %r1, float %r0
29 store float %r3, float addrspace(1)* %out
33 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
34 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
36 ; SI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
38 ; GCN-NONAN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
40 ; VI-SAFE: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
42 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, s[[B]], [[VA]]
44 ; VI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
45 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, s[[A]], [[VB]]
46 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[VB]], [[VA]]
48 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, s[[A]], [[VB]]
49 define amdgpu_kernel void @s_test_fmin_legacy_ule_f32(float addrspace(1)* %out, float %a, float %b) #0 {
50 %cmp = fcmp ule float %a, %b
51 %val = select i1 %cmp, float %a, float %b
52 store float %val, float addrspace(1)* %out, align 4
57 ; FIXME: Should separate tests
58 ; GCN-LABEL: {{^}}s_test_fmin_legacy_ule_f32_nnan_src:
59 ; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
61 ; GCN-DAG: v_add_f32_e64 [[ADD_A:v[0-9]+]], s[[A]], 1.0
62 ; GCN-DAG: v_add_f32_e64 [[ADD_B:v[0-9]+]], s[[B]], 2.0
64 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
66 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
67 ; VI-SAFE: v_cndmask_b32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]], vcc
69 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
70 define amdgpu_kernel void @s_test_fmin_legacy_ule_f32_nnan_src(float addrspace(1)* %out, float %a, float %b) #0 {
71 %a.nnan = fadd nnan float %a, 1.0
72 %b.nnan = fadd nnan float %b, 2.0
73 %cmp = fcmp ule float %a.nnan, %b.nnan
74 %val = select i1 %cmp, float %a.nnan, float %b.nnan
75 store float %val, float addrspace(1)* %out, align 4
79 ; FUNC-LABEL: {{^}}test_fmin_legacy_ule_f32:
80 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
81 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
83 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
85 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, [[A]], [[B]]
86 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
88 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
89 define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
90 %tid = call i32 @llvm.r600.read.tidig.x() #1
91 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
92 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
94 %a = load volatile float, float addrspace(1)* %gep.0, align 4
95 %b = load volatile float, float addrspace(1)* %gep.1, align 4
97 %cmp = fcmp ule float %a, %b
98 %val = select i1 %cmp, float %a, float %b
99 store float %val, float addrspace(1)* %out, align 4
103 ; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32:
104 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
105 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
107 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
109 ; VI-SAFE v_cmp_le_f32_e32 vcc, [[A]], [[B]]
110 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
112 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
113 define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
114 %tid = call i32 @llvm.r600.read.tidig.x() #1
115 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
116 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
118 %a = load volatile float, float addrspace(1)* %gep.0, align 4
119 %b = load volatile float, float addrspace(1)* %gep.1, align 4
121 %cmp = fcmp ole float %a, %b
122 %val = select i1 %cmp, float %a, float %b
123 store float %val, float addrspace(1)* %out, align 4
127 ; FUNC-LABEL: {{^}}test_fmin_legacy_olt_f32:
128 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
129 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
131 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
133 ; VI-SAFE v_cmp_lt_f32_e32 vcc, [[A]], [[B]]
134 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
136 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
137 define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
138 %tid = call i32 @llvm.r600.read.tidig.x() #1
139 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
140 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
142 %a = load volatile float, float addrspace(1)* %gep.0, align 4
143 %b = load volatile float, float addrspace(1)* %gep.1, align 4
145 %cmp = fcmp olt float %a, %b
146 %val = select i1 %cmp, float %a, float %b
147 store float %val, float addrspace(1)* %out, align 4
151 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_f32:
152 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
153 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
155 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
157 ; VI-SAFE v_cmp_lt_f32_e32 vcc, [[A]], [[B]]
158 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
160 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
161 define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
162 %tid = call i32 @llvm.r600.read.tidig.x() #1
163 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
164 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
166 %a = load volatile float, float addrspace(1)* %gep.0, align 4
167 %b = load volatile float, float addrspace(1)* %gep.1, align 4
169 %cmp = fcmp ult float %a, %b
170 %val = select i1 %cmp, float %a, float %b
171 store float %val, float addrspace(1)* %out, align 4
175 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32:
176 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
177 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
179 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
181 ; VI-SAFE v_cmp_lt_f32_e32 vcc, [[A]], [[B]]
182 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
184 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
185 define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
186 %tid = call i32 @llvm.r600.read.tidig.x() #1
187 %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
188 %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
190 %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
191 %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
193 %cmp = fcmp ult <1 x float> %a, %b
194 %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
195 store <1 x float> %val, <1 x float> addrspace(1)* %out
199 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32:
200 ; GCN: {{buffer|flat}}_load_dwordx2
201 ; GCN: {{buffer|flat}}_load_dwordx2
202 ; SI-SAFE: v_min_legacy_f32_e32
203 ; SI-SAFE: v_min_legacy_f32_e32
205 ; VI-SAFE v_cmp_lt_f32_e32
206 ; VI-SAFE: v_cndmask_b32_e32
207 ; VI-SAFE v_cmp_lt_f32_e32
208 ; VI-SAFE: v_cndmask_b32_e32
210 ; GCN-NONAN: v_min_f32_e32
211 ; GCN-NONAN: v_min_f32_e32
212 define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
213 %tid = call i32 @llvm.r600.read.tidig.x() #1
214 %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
215 %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
217 %a = load <2 x float>, <2 x float> addrspace(1)* %gep.0
218 %b = load <2 x float>, <2 x float> addrspace(1)* %gep.1
220 %cmp = fcmp ult <2 x float> %a, %b
221 %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
222 store <2 x float> %val, <2 x float> addrspace(1)* %out
226 ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32:
227 ; SI-SAFE: v_min_legacy_f32_e32
228 ; SI-SAFE: v_min_legacy_f32_e32
229 ; SI-SAFE: v_min_legacy_f32_e32
230 ; SI-SAFE-NOT: v_min_
232 ; VI-SAFE: v_cmp_nge_f32_e32
233 ; VI-SAFE: v_cndmask_b32_e32
234 ; VI-SAFE: v_cmp_nge_f32_e32
235 ; VI-SAFE: v_cndmask_b32_e32
236 ; VI-SAFE: v_cmp_nge_f32_e32
237 ; VI-SAFE: v_cndmask_b32_e32
241 ; GCN-NONAN: v_min_f32_e32
242 ; GCN-NONAN: v_min_f32_e32
243 ; GCN-NONAN: v_min_f32_e32
244 ; GCN-NONAN-NOT: v_min_
245 define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
246 %tid = call i32 @llvm.r600.read.tidig.x() #1
247 %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
248 %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
250 %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
251 %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
253 %cmp = fcmp ult <3 x float> %a, %b
254 %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
255 store <3 x float> %val, <3 x float> addrspace(1)* %out
259 ; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32_multi_use:
260 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
261 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
264 ; GCN-NEXT: v_cndmask_b32
267 define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
268 %tid = call i32 @llvm.r600.read.tidig.x() #1
269 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
270 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
272 %a = load volatile float, float addrspace(1)* %gep.0, align 4
273 %b = load volatile float, float addrspace(1)* %gep.1, align 4
275 %cmp = fcmp ole float %a, %b
276 %val0 = select i1 %cmp, float %a, float %b
277 store float %val0, float addrspace(1)* %out0, align 4
278 store i1 %cmp, i1 addrspace(1)* %out1
282 attributes #0 = { nounwind }
283 attributes #1 = { nounwind readnone }