1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
4 declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
5 declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
6 declare float @llvm.fabs.f32(float) #0
8 declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0
9 declare half @llvm.fabs.f16(half) #0
11 ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs:
12 ; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
13 define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
14 %temp = call float @llvm.fabs.f32(float %a)
15 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1)
16 store i64 %result, i64 addrspace(1)* %out
20 ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_both_operands_with_fabs:
21 ; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}|
22 define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
23 %temp = call float @llvm.fabs.f32(float %a)
24 %src_input = call float @llvm.fabs.f32(float %src)
25 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src_input, float %temp, i32 1)
26 store i64 %result, i64 addrspace(1)* %out
30 ; GCN-LABEL: {{^}}v_fcmp_f32:
31 ; GCN-NOT: v_cmp_eq_f32_e64
32 define amdgpu_kernel void @v_fcmp_f32(i64 addrspace(1)* %out, float %src) {
33 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 -1)
34 store i64 %result, i64 addrspace(1)* %out
38 ; GCN-LABEL: {{^}}v_fcmp_f32_oeq:
39 ; GCN: v_cmp_eq_f32_e64
40 define amdgpu_kernel void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) {
41 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1)
42 store i64 %result, i64 addrspace(1)* %out
46 ; GCN-LABEL: {{^}}v_fcmp_f32_one:
47 ; GCN: v_cmp_neq_f32_e64
48 define amdgpu_kernel void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) {
49 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6)
50 store i64 %result, i64 addrspace(1)* %out
54 ; GCN-LABEL: {{^}}v_fcmp_f32_ogt:
55 ; GCN: v_cmp_gt_f32_e64
56 define amdgpu_kernel void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) {
57 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2)
58 store i64 %result, i64 addrspace(1)* %out
62 ; GCN-LABEL: {{^}}v_fcmp_f32_oge:
63 ; GCN: v_cmp_ge_f32_e64
64 define amdgpu_kernel void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) {
65 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3)
66 store i64 %result, i64 addrspace(1)* %out
70 ; GCN-LABEL: {{^}}v_fcmp_f32_olt:
71 ; GCN: v_cmp_lt_f32_e64
72 define amdgpu_kernel void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) {
73 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4)
74 store i64 %result, i64 addrspace(1)* %out
78 ; GCN-LABEL: {{^}}v_fcmp_f32_ole:
79 ; GCN: v_cmp_le_f32_e64
80 define amdgpu_kernel void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) {
81 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5)
82 store i64 %result, i64 addrspace(1)* %out
87 ; GCN-LABEL: {{^}}v_fcmp_f32_ueq:
88 ; GCN: v_cmp_nlg_f32_e64
89 define amdgpu_kernel void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) {
90 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9)
91 store i64 %result, i64 addrspace(1)* %out
95 ; GCN-LABEL: {{^}}v_fcmp_f32_une:
96 ; GCN: v_cmp_neq_f32_e64
97 define amdgpu_kernel void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) {
98 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14)
99 store i64 %result, i64 addrspace(1)* %out
103 ; GCN-LABEL: {{^}}v_fcmp_f32_ugt:
104 ; GCN: v_cmp_nle_f32_e64
105 define amdgpu_kernel void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) {
106 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10)
107 store i64 %result, i64 addrspace(1)* %out
111 ; GCN-LABEL: {{^}}v_fcmp_f32_uge:
112 ; GCN: v_cmp_nlt_f32_e64
113 define amdgpu_kernel void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) {
114 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11)
115 store i64 %result, i64 addrspace(1)* %out
119 ; GCN-LABEL: {{^}}v_fcmp_f32_ult:
120 ; GCN: v_cmp_nge_f32_e64
121 define amdgpu_kernel void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) {
122 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12)
123 store i64 %result, i64 addrspace(1)* %out
127 ; GCN-LABEL: {{^}}v_fcmp_f32_ule:
128 ; GCN: v_cmp_ngt_f32_e64
129 define amdgpu_kernel void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) {
130 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13)
131 store i64 %result, i64 addrspace(1)* %out
135 ; GCN-LABEL: {{^}}v_fcmp_f64_oeq:
136 ; GCN: v_cmp_eq_f64_e64
137 define amdgpu_kernel void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) {
138 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1)
139 store i64 %result, i64 addrspace(1)* %out
143 ; GCN-LABEL: {{^}}v_fcmp_f64_one:
144 ; GCN: v_cmp_neq_f64_e64
145 define amdgpu_kernel void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) {
146 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6)
147 store i64 %result, i64 addrspace(1)* %out
151 ; GCN-LABEL: {{^}}v_fcmp_f64_ogt:
152 ; GCN: v_cmp_gt_f64_e64
153 define amdgpu_kernel void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) {
154 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2)
155 store i64 %result, i64 addrspace(1)* %out
159 ; GCN-LABEL: {{^}}v_fcmp_f64_oge:
160 ; GCN: v_cmp_ge_f64_e64
161 define amdgpu_kernel void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) {
162 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3)
163 store i64 %result, i64 addrspace(1)* %out
167 ; GCN-LABEL: {{^}}v_fcmp_f64_olt:
168 ; GCN: v_cmp_lt_f64_e64
169 define amdgpu_kernel void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) {
170 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4)
171 store i64 %result, i64 addrspace(1)* %out
175 ; GCN-LABEL: {{^}}v_fcmp_f64_ole:
176 ; GCN: v_cmp_le_f64_e64
177 define amdgpu_kernel void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) {
178 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5)
179 store i64 %result, i64 addrspace(1)* %out
183 ; GCN-LABEL: {{^}}v_fcmp_f64_ueq:
184 ; GCN: v_cmp_nlg_f64_e64
185 define amdgpu_kernel void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) {
186 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9)
187 store i64 %result, i64 addrspace(1)* %out
191 ; GCN-LABEL: {{^}}v_fcmp_f64_une:
192 ; GCN: v_cmp_neq_f64_e64
193 define amdgpu_kernel void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) {
194 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14)
195 store i64 %result, i64 addrspace(1)* %out
199 ; GCN-LABEL: {{^}}v_fcmp_f64_ugt:
200 ; GCN: v_cmp_nle_f64_e64
201 define amdgpu_kernel void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) {
202 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10)
203 store i64 %result, i64 addrspace(1)* %out
207 ; GCN-LABEL: {{^}}v_fcmp_f64_uge:
208 ; GCN: v_cmp_nlt_f64_e64
209 define amdgpu_kernel void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) {
210 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11)
211 store i64 %result, i64 addrspace(1)* %out
215 ; GCN-LABEL: {{^}}v_fcmp_f64_ult:
216 ; GCN: v_cmp_nge_f64_e64
217 define amdgpu_kernel void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) {
218 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12)
219 store i64 %result, i64 addrspace(1)* %out
223 ; GCN-LABEL: {{^}}v_fcmp_f64_ule:
224 ; GCN: v_cmp_ngt_f64_e64
225 define amdgpu_kernel void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) {
226 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13)
227 store i64 %result, i64 addrspace(1)* %out
231 ; GCN-LABEL: {{^}}v_fcmp_f16_oeq_with_fabs:
232 ; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
234 ; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], s{{[0-9]+}}
235 ; SI: v_cvt_f32_f16_e64 [[CVT1:v[0-9]+]], |s{{[0-9]+}}|
236 ; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT0]], [[CVT1]]
237 define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(i64 addrspace(1)* %out, half %src, half %a) {
238 %temp = call half @llvm.fabs.f16(half %a)
239 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half %temp, i32 1)
240 store i64 %result, i64 addrspace(1)* %out
244 ; GCN-LABEL: {{^}}v_fcmp_f16_oeq_both_operands_with_fabs:
245 ; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}|
247 ; SI: v_cvt_f32_f16_e64 [[CVT0:v[0-9]+]], |s{{[0-9]+}}|
248 ; SI: v_cvt_f32_f16_e64 [[CVT1:v[0-9]+]], |s{{[0-9]+}}|
249 ; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT0]], [[CVT1]]
250 define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, half %src, half %a) {
251 %temp = call half @llvm.fabs.f16(half %a)
252 %src_input = call half @llvm.fabs.f16(half %src)
253 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src_input, half %temp, i32 1)
254 store i64 %result, i64 addrspace(1)* %out
258 ; GCN-LABEL: {{^}}v_fcmp_f16:
260 define amdgpu_kernel void @v_fcmp_f16(i64 addrspace(1)* %out, half %src) {
261 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 -1)
262 store i64 %result, i64 addrspace(1)* %out
266 ; GCN-LABEL: {{^}}v_fcmp_f16_oeq:
267 ; VI: v_cmp_eq_f16_e64
269 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
270 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
271 ; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
272 define amdgpu_kernel void @v_fcmp_f16_oeq(i64 addrspace(1)* %out, half %src) {
273 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 1)
274 store i64 %result, i64 addrspace(1)* %out
278 ; GCN-LABEL: {{^}}v_fcmp_f16_one:
279 ; VI: v_cmp_neq_f16_e64
281 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
282 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
283 ; SI: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
284 define amdgpu_kernel void @v_fcmp_f16_one(i64 addrspace(1)* %out, half %src) {
285 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 6)
286 store i64 %result, i64 addrspace(1)* %out
290 ; GCN-LABEL: {{^}}v_fcmp_f16_ogt:
291 ; VI: v_cmp_gt_f16_e64
293 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
294 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
295 ; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
296 define amdgpu_kernel void @v_fcmp_f16_ogt(i64 addrspace(1)* %out, half %src) {
297 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 2)
298 store i64 %result, i64 addrspace(1)* %out
302 ; GCN-LABEL: {{^}}v_fcmp_f16_oge:
303 ; VI: v_cmp_ge_f16_e64
305 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
306 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
307 ; SI: v_cmp_le_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
308 define amdgpu_kernel void @v_fcmp_f16_oge(i64 addrspace(1)* %out, half %src) {
309 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 3)
310 store i64 %result, i64 addrspace(1)* %out
314 ; GCN-LABEL: {{^}}v_fcmp_f16_olt:
315 ; VI: v_cmp_lt_f16_e64
317 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
318 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
319 ; SI: v_cmp_gt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
320 define amdgpu_kernel void @v_fcmp_f16_olt(i64 addrspace(1)* %out, half %src) {
321 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 4)
322 store i64 %result, i64 addrspace(1)* %out
326 ; GCN-LABEL: {{^}}v_fcmp_f16_ole:
327 ; VI: v_cmp_le_f16_e64
329 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
330 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
331 ; SI: v_cmp_ge_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
332 define amdgpu_kernel void @v_fcmp_f16_ole(i64 addrspace(1)* %out, half %src) {
333 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 5)
334 store i64 %result, i64 addrspace(1)* %out
338 ; GCN-LABEL: {{^}}v_fcmp_f16_ueq:
339 ; VI: v_cmp_nlg_f16_e64
341 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
342 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
343 ; SI: v_cmp_nlg_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
344 define amdgpu_kernel void @v_fcmp_f16_ueq(i64 addrspace(1)* %out, half %src) {
345 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 9)
346 store i64 %result, i64 addrspace(1)* %out
350 ; GCN-LABEL: {{^}}v_fcmp_f16_une:
351 ; VI: v_cmp_neq_f16_e64
353 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
354 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
355 ; SI: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
356 define amdgpu_kernel void @v_fcmp_f16_une(i64 addrspace(1)* %out, half %src) {
357 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 14)
358 store i64 %result, i64 addrspace(1)* %out
362 ; GCN-LABEL: {{^}}v_fcmp_f16_ugt:
363 ; VI: v_cmp_nle_f16_e64
365 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
366 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
367 ; SI: v_cmp_nge_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
368 define amdgpu_kernel void @v_fcmp_f16_ugt(i64 addrspace(1)* %out, half %src) {
369 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 10)
370 store i64 %result, i64 addrspace(1)* %out
374 ; GCN-LABEL: {{^}}v_fcmp_f16_uge:
375 ; VI: v_cmp_nlt_f16_e64
377 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
378 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
379 ; SI: v_cmp_ngt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
380 define amdgpu_kernel void @v_fcmp_f16_uge(i64 addrspace(1)* %out, half %src) {
381 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 11)
382 store i64 %result, i64 addrspace(1)* %out
386 ; GCN-LABEL: {{^}}v_fcmp_f16_ult:
387 ; VI: v_cmp_nge_f16_e64
389 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
390 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
391 ; SI: v_cmp_nle_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
392 define amdgpu_kernel void @v_fcmp_f16_ult(i64 addrspace(1)* %out, half %src) {
393 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 12)
394 store i64 %result, i64 addrspace(1)* %out
398 ; GCN-LABEL: {{^}}v_fcmp_f16_ule:
399 ; VI: v_cmp_ngt_f16_e64
401 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x42c80000
402 ; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}}
403 ; SI: v_cmp_nlt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[CVT]]
404 define amdgpu_kernel void @v_fcmp_f16_ule(i64 addrspace(1)* %out, half %src) {
405 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 13)
406 store i64 %result, i64 addrspace(1)* %out
410 attributes #0 = { nounwind readnone convergent }