1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
5 define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 {
6 ; GFX10-LABEL: test_fmed3_f32_known_nnan_ieee_true:
8 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
10 ; GFX10-NEXT: s_setpc_b64 s[30:31]
12 ; GFX12-LABEL: test_fmed3_f32_known_nnan_ieee_true:
14 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
15 ; GFX12-NEXT: s_wait_expcnt 0x0
16 ; GFX12-NEXT: s_wait_samplecnt 0x0
17 ; GFX12-NEXT: s_wait_bvhcnt 0x0
18 ; GFX12-NEXT: s_wait_kmcnt 0x0
19 ; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
20 ; GFX12-NEXT: s_setpc_b64 s[30:31]
21 %fmul = fmul float %a, 2.0
22 %fmed = call nnan float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
26 define half @test_fmed3_f16_known_nnan_ieee_false(half %a) #1 {
27 ; GFX10-LABEL: test_fmed3_f16_known_nnan_ieee_false:
29 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
31 ; GFX10-NEXT: s_setpc_b64 s[30:31]
33 ; GFX12-LABEL: test_fmed3_f16_known_nnan_ieee_false:
35 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
36 ; GFX12-NEXT: s_wait_expcnt 0x0
37 ; GFX12-NEXT: s_wait_samplecnt 0x0
38 ; GFX12-NEXT: s_wait_bvhcnt 0x0
39 ; GFX12-NEXT: s_wait_kmcnt 0x0
40 ; GFX12-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
41 ; GFX12-NEXT: s_setpc_b64 s[30:31]
42 %fmul = fmul half %a, 2.0
43 %fmed = call nnan half @llvm.amdgcn.fmed3.f16(half %fmul, half 0.0, half 1.0)
47 ; %fmin is known non-SNaN because fmin inputs are fcanonicalized
48 define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
49 ; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
51 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
53 ; GFX10-NEXT: v_min_f32_e64 v0, 0x41200000, v0 clamp
54 ; GFX10-NEXT: s_setpc_b64 s[30:31]
56 ; GFX12-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
58 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
59 ; GFX12-NEXT: s_wait_expcnt 0x0
60 ; GFX12-NEXT: s_wait_samplecnt 0x0
61 ; GFX12-NEXT: s_wait_bvhcnt 0x0
62 ; GFX12-NEXT: s_wait_kmcnt 0x0
63 ; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
64 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
65 ; GFX12-NEXT: v_min_num_f32_e64 v0, 0x41200000, v0 clamp
66 ; GFX12-NEXT: s_setpc_b64 s[30:31]
67 %fmin = call float @llvm.minnum.f32(float %a, float 10.0)
68 %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmin, float 0.0, float 1.0)
72 ; input may be SNaN. It's safe to clamp since third operand in fmed3 is 0.0
73 define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true(float %a) #2 {
74 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
76 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
78 ; GFX10-NEXT: s_setpc_b64 s[30:31]
80 ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
82 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
83 ; GFX12-NEXT: s_wait_expcnt 0x0
84 ; GFX12-NEXT: s_wait_samplecnt 0x0
85 ; GFX12-NEXT: s_wait_bvhcnt 0x0
86 ; GFX12-NEXT: s_wait_kmcnt 0x0
87 ; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
88 ; GFX12-NEXT: s_setpc_b64 s[30:31]
89 %fmul = fmul float %a, 2.0
90 %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
94 ; global nnan function attribute always forces clamp combine
96 define float @test_fmed3_global_nnan(float %a) #3 {
97 ; GFX10-LABEL: test_fmed3_global_nnan:
99 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
101 ; GFX10-NEXT: s_setpc_b64 s[30:31]
103 ; GFX12-LABEL: test_fmed3_global_nnan:
105 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
106 ; GFX12-NEXT: s_wait_expcnt 0x0
107 ; GFX12-NEXT: s_wait_samplecnt 0x0
108 ; GFX12-NEXT: s_wait_bvhcnt 0x0
109 ; GFX12-NEXT: s_wait_kmcnt 0x0
110 ; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
111 ; GFX12-NEXT: s_setpc_b64 s[30:31]
112 %fmul = fmul float %a, 2.0
113 %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
117 ; ------------------------------------------------------------------------------
119 ; ------------------------------------------------------------------------------
121 ; ieee=false requires known never NaN input
122 define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
123 ; GFX10-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
125 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
127 ; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0
128 ; GFX10-NEXT: s_setpc_b64 s[30:31]
130 ; GFX12-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
132 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
133 ; GFX12-NEXT: s_wait_expcnt 0x0
134 ; GFX12-NEXT: s_wait_samplecnt 0x0
135 ; GFX12-NEXT: s_wait_bvhcnt 0x0
136 ; GFX12-NEXT: s_wait_kmcnt 0x0
137 ; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
138 ; GFX12-NEXT: s_setpc_b64 s[30:31]
139 %fmul = fmul float %a, 2.0
140 %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
144 ; ieee=true input is known non-SNaN but dx10_clamp=false
145 define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false(float %a) #4 {
146 ; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
148 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
150 ; GFX10-NEXT: v_min_f32_e32 v0, 0x41200000, v0
151 ; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
152 ; GFX10-NEXT: s_setpc_b64 s[30:31]
154 ; GFX12-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
156 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
157 ; GFX12-NEXT: s_wait_expcnt 0x0
158 ; GFX12-NEXT: s_wait_samplecnt 0x0
159 ; GFX12-NEXT: s_wait_bvhcnt 0x0
160 ; GFX12-NEXT: s_wait_kmcnt 0x0
161 ; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
162 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
163 ; GFX12-NEXT: v_min_num_f32_e64 v0, 0x41200000, v0 clamp
164 ; GFX12-NEXT: s_setpc_b64 s[30:31]
165 %fmin = call float @llvm.minnum.f32(float %a, float 10.0)
166 %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmin, float 0.0, float 1.0)
170 ; ieee=true dx10_clamp=true but input may be SNaN, clamp requires third operand in fmed3 to be 0.0
171 define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
172 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
174 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175 ; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
176 ; GFX10-NEXT: s_setpc_b64 s[30:31]
178 ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
180 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
181 ; GFX12-NEXT: s_wait_expcnt 0x0
182 ; GFX12-NEXT: s_wait_samplecnt 0x0
183 ; GFX12-NEXT: s_wait_bvhcnt 0x0
184 ; GFX12-NEXT: s_wait_kmcnt 0x0
185 ; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
186 ; GFX12-NEXT: s_setpc_b64 s[30:31]
187 %fmul = fmul float %a, 2.0
188 %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
192 declare half @llvm.amdgcn.fmed3.f16(half, half, half)
193 declare float @llvm.amdgcn.fmed3.f32(float, float, float)
194 declare float @llvm.minnum.f32(float, float)
196 attributes #0 = {"amdgpu-ieee"="true"}
197 attributes #1 = {"amdgpu-ieee"="false"}
198 attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
199 attributes #3 = {"no-nans-fp-math"="true"}
200 attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}