1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; Check for consistency of interpretation of fast math flags on fdiv
3 ; between implementations.
5 ; RUN: llc -global-isel=0 -amdgpu-codegenprepare-disable-fdiv-expansion=0 -march=amdgcn -mcpu=hawaii -denormal-fp-math=ieee < %s | FileCheck -check-prefixes=CHECK,IEEE,CODEGEN-IEEE-SDAG %s
6 ; RUN: llc -global-isel=1 -amdgpu-codegenprepare-disable-fdiv-expansion=0 -march=amdgcn -mcpu=hawaii -denormal-fp-math=ieee < %s | FileCheck -check-prefixes=CHECK,IEEE,CODEGEN-IEEE-GISEL %s
7 ; RUN: llc -global-isel=0 -amdgpu-codegenprepare-disable-fdiv-expansion=1 -march=amdgcn -mcpu=hawaii -denormal-fp-math=ieee < %s | FileCheck -check-prefixes=CHECK,IEEE,IR-IEEE,IR-IEEE-SDAG %s
8 ; RUN: llc -global-isel=1 -amdgpu-codegenprepare-disable-fdiv-expansion=1 -march=amdgcn -mcpu=hawaii -denormal-fp-math=ieee < %s | FileCheck -check-prefixes=CHECK,IEEE,IR-IEEE,IR-IEEE-GISEL %s
10 ; RUN: llc -global-isel=0 -amdgpu-codegenprepare-disable-fdiv-expansion=0 -march=amdgcn -mcpu=hawaii -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefixes=CHECK,DAZ,CODEGEN-DAZ,CODEGEN-DAZ-SDAG %s
11 ; RUN: llc -global-isel=1 -amdgpu-codegenprepare-disable-fdiv-expansion=0 -march=amdgcn -mcpu=hawaii -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefixes=CHECK,DAZ,CODEGEN-DAZ,CODEGEN-DAZ-GISEL %s
12 ; RUN: llc -global-isel=0 -amdgpu-codegenprepare-disable-fdiv-expansion=1 -march=amdgcn -mcpu=hawaii -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefixes=CHECK,DAZ,IR-DAZ,IR-DAZ-SDAG %s
13 ; RUN: llc -global-isel=1 -amdgpu-codegenprepare-disable-fdiv-expansion=1 -march=amdgcn -mcpu=hawaii -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefixes=CHECK,DAZ,IR-DAZ,IR-DAZ-GISEL %s
15 define float @v_fdiv_f32(float %x, float %y) {
16 ; CODEGEN-IEEE-SDAG-LABEL: v_fdiv_f32:
17 ; CODEGEN-IEEE-SDAG: ; %bb.0:
18 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19 ; CODEGEN-IEEE-SDAG-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
20 ; CODEGEN-IEEE-SDAG-NEXT: v_rcp_f32_e32 v3, v2
21 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v4, -v2, v3, 1.0
22 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v3, v4, v3, v3
23 ; CODEGEN-IEEE-SDAG-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
24 ; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v5, v4, v3
25 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v6, -v2, v5, v4
26 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v5, v6, v3, v5
27 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v2, -v2, v5, v4
28 ; CODEGEN-IEEE-SDAG-NEXT: v_div_fmas_f32 v2, v2, v3, v5
29 ; CODEGEN-IEEE-SDAG-NEXT: v_div_fixup_f32 v0, v2, v1, v0
30 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
32 ; CODEGEN-IEEE-GISEL-LABEL: v_fdiv_f32:
33 ; CODEGEN-IEEE-GISEL: ; %bb.0:
34 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; CODEGEN-IEEE-GISEL-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
36 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v3, v2
37 ; CODEGEN-IEEE-GISEL-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
38 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, -v2, v3, 1.0
39 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v3, v5, v3, v3
40 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v5, v4, v3
41 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v6, -v2, v5, v4
42 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, v6, v3, v5
43 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v2, -v2, v5, v4
44 ; CODEGEN-IEEE-GISEL-NEXT: v_div_fmas_f32 v2, v2, v3, v5
45 ; CODEGEN-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v2, v1, v0
46 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
48 ; IR-IEEE-SDAG-LABEL: v_fdiv_f32:
49 ; IR-IEEE-SDAG: ; %bb.0:
50 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
52 ; IR-IEEE-SDAG-NEXT: v_rcp_f32_e32 v3, v2
53 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v4, -v2, v3, 1.0
54 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v3, v4, v3, v3
55 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
56 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v5, v4, v3
57 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v6, -v2, v5, v4
58 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v5, v6, v3, v5
59 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v2, -v2, v5, v4
60 ; IR-IEEE-SDAG-NEXT: v_div_fmas_f32 v2, v2, v3, v5
61 ; IR-IEEE-SDAG-NEXT: v_div_fixup_f32 v0, v2, v1, v0
62 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
64 ; IR-IEEE-GISEL-LABEL: v_fdiv_f32:
65 ; IR-IEEE-GISEL: ; %bb.0:
66 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
68 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v3, v2
69 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
70 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v2, v3, 1.0
71 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v3, v5, v3, v3
72 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v5, v4, v3
73 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v6, -v2, v5, v4
74 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, v6, v3, v5
75 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v2, -v2, v5, v4
76 ; IR-IEEE-GISEL-NEXT: v_div_fmas_f32 v2, v2, v3, v5
77 ; IR-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v2, v1, v0
78 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
80 ; DAZ-LABEL: v_fdiv_f32:
82 ; DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; DAZ-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
84 ; DAZ-NEXT: v_rcp_f32_e32 v3, v2
85 ; DAZ-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
86 ; DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
87 ; DAZ-NEXT: v_fma_f32 v5, -v2, v3, 1.0
88 ; DAZ-NEXT: v_fma_f32 v3, v5, v3, v3
89 ; DAZ-NEXT: v_mul_f32_e32 v5, v4, v3
90 ; DAZ-NEXT: v_fma_f32 v6, -v2, v5, v4
91 ; DAZ-NEXT: v_fma_f32 v5, v6, v3, v5
92 ; DAZ-NEXT: v_fma_f32 v2, -v2, v5, v4
93 ; DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
94 ; DAZ-NEXT: v_div_fmas_f32 v2, v2, v3, v5
95 ; DAZ-NEXT: v_div_fixup_f32 v0, v2, v1, v0
96 ; DAZ-NEXT: s_setpc_b64 s[30:31]
97 %fdiv = fdiv float %x, %y
101 define float @v_fdiv_f32_afn(float %x, float %y) {
102 ; CHECK-LABEL: v_fdiv_f32_afn:
104 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105 ; CHECK-NEXT: v_rcp_f32_e32 v1, v1
106 ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
107 ; CHECK-NEXT: s_setpc_b64 s[30:31]
108 %fdiv = fdiv afn float %x, %y
112 define float @v_fdiv_f32_arcp(float %x, float %y) {
113 ; CODEGEN-IEEE-SDAG-LABEL: v_fdiv_f32_arcp:
114 ; CODEGEN-IEEE-SDAG: ; %bb.0:
115 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116 ; CODEGEN-IEEE-SDAG-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
117 ; CODEGEN-IEEE-SDAG-NEXT: v_rcp_f32_e32 v3, v2
118 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v4, -v2, v3, 1.0
119 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v3, v4, v3, v3
120 ; CODEGEN-IEEE-SDAG-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
121 ; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v5, v4, v3
122 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v6, -v2, v5, v4
123 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v5, v6, v3, v5
124 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v2, -v2, v5, v4
125 ; CODEGEN-IEEE-SDAG-NEXT: v_div_fmas_f32 v2, v2, v3, v5
126 ; CODEGEN-IEEE-SDAG-NEXT: v_div_fixup_f32 v0, v2, v1, v0
127 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
129 ; CODEGEN-IEEE-GISEL-LABEL: v_fdiv_f32_arcp:
130 ; CODEGEN-IEEE-GISEL: ; %bb.0:
131 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; CODEGEN-IEEE-GISEL-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
133 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v3, v2
134 ; CODEGEN-IEEE-GISEL-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
135 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, -v2, v3, 1.0
136 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v3, v5, v3, v3
137 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v5, v4, v3
138 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v6, -v2, v5, v4
139 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, v6, v3, v5
140 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v2, -v2, v5, v4
141 ; CODEGEN-IEEE-GISEL-NEXT: v_div_fmas_f32 v2, v2, v3, v5
142 ; CODEGEN-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v2, v1, v0
143 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
145 ; IR-IEEE-SDAG-LABEL: v_fdiv_f32_arcp:
146 ; IR-IEEE-SDAG: ; %bb.0:
147 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
149 ; IR-IEEE-SDAG-NEXT: v_rcp_f32_e32 v3, v2
150 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v4, -v2, v3, 1.0
151 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v3, v4, v3, v3
152 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
153 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v5, v4, v3
154 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v6, -v2, v5, v4
155 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v5, v6, v3, v5
156 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v2, -v2, v5, v4
157 ; IR-IEEE-SDAG-NEXT: v_div_fmas_f32 v2, v2, v3, v5
158 ; IR-IEEE-SDAG-NEXT: v_div_fixup_f32 v0, v2, v1, v0
159 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
161 ; IR-IEEE-GISEL-LABEL: v_fdiv_f32_arcp:
162 ; IR-IEEE-GISEL: ; %bb.0:
163 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
165 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v3, v2
166 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
167 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v2, v3, 1.0
168 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v3, v5, v3, v3
169 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v5, v4, v3
170 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v6, -v2, v5, v4
171 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, v6, v3, v5
172 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v2, -v2, v5, v4
173 ; IR-IEEE-GISEL-NEXT: v_div_fmas_f32 v2, v2, v3, v5
174 ; IR-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v2, v1, v0
175 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
177 ; DAZ-LABEL: v_fdiv_f32_arcp:
179 ; DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; DAZ-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
181 ; DAZ-NEXT: v_rcp_f32_e32 v3, v2
182 ; DAZ-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
183 ; DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
184 ; DAZ-NEXT: v_fma_f32 v5, -v2, v3, 1.0
185 ; DAZ-NEXT: v_fma_f32 v3, v5, v3, v3
186 ; DAZ-NEXT: v_mul_f32_e32 v5, v4, v3
187 ; DAZ-NEXT: v_fma_f32 v6, -v2, v5, v4
188 ; DAZ-NEXT: v_fma_f32 v5, v6, v3, v5
189 ; DAZ-NEXT: v_fma_f32 v2, -v2, v5, v4
190 ; DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
191 ; DAZ-NEXT: v_div_fmas_f32 v2, v2, v3, v5
192 ; DAZ-NEXT: v_div_fixup_f32 v0, v2, v1, v0
193 ; DAZ-NEXT: s_setpc_b64 s[30:31]
194 %fdiv = fdiv arcp float %x, %y
198 define float @v_fdiv_f32_arcp_afn(float %x, float %y) {
199 ; CHECK-LABEL: v_fdiv_f32_arcp_afn:
201 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202 ; CHECK-NEXT: v_rcp_f32_e32 v1, v1
203 ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
204 ; CHECK-NEXT: s_setpc_b64 s[30:31]
205 %fdiv = fdiv arcp afn float %x, %y
209 define float @v_fdiv_recip_f32(float %x) {
210 ; IEEE-LABEL: v_fdiv_recip_f32:
212 ; IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213 ; IEEE-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
214 ; IEEE-NEXT: v_rcp_f32_e32 v2, v1
215 ; IEEE-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
216 ; IEEE-NEXT: v_fma_f32 v4, -v1, v2, 1.0
217 ; IEEE-NEXT: v_fma_f32 v2, v4, v2, v2
218 ; IEEE-NEXT: v_mul_f32_e32 v4, v3, v2
219 ; IEEE-NEXT: v_fma_f32 v5, -v1, v4, v3
220 ; IEEE-NEXT: v_fma_f32 v4, v5, v2, v4
221 ; IEEE-NEXT: v_fma_f32 v1, -v1, v4, v3
222 ; IEEE-NEXT: v_div_fmas_f32 v1, v1, v2, v4
223 ; IEEE-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
224 ; IEEE-NEXT: s_setpc_b64 s[30:31]
226 ; DAZ-LABEL: v_fdiv_recip_f32:
228 ; DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229 ; DAZ-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
230 ; DAZ-NEXT: v_rcp_f32_e32 v2, v1
231 ; DAZ-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
232 ; DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
233 ; DAZ-NEXT: v_fma_f32 v4, -v1, v2, 1.0
234 ; DAZ-NEXT: v_fma_f32 v2, v4, v2, v2
235 ; DAZ-NEXT: v_mul_f32_e32 v4, v3, v2
236 ; DAZ-NEXT: v_fma_f32 v5, -v1, v4, v3
237 ; DAZ-NEXT: v_fma_f32 v4, v5, v2, v4
238 ; DAZ-NEXT: v_fma_f32 v1, -v1, v4, v3
239 ; DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
240 ; DAZ-NEXT: v_div_fmas_f32 v1, v1, v2, v4
241 ; DAZ-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
242 ; DAZ-NEXT: s_setpc_b64 s[30:31]
243 %fdiv = fdiv float 1.0, %x
247 define float @v_fdiv_recip_f32_afn(float %x) {
248 ; CHECK-LABEL: v_fdiv_recip_f32_afn:
250 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251 ; CHECK-NEXT: v_rcp_f32_e32 v0, v0
252 ; CHECK-NEXT: s_setpc_b64 s[30:31]
253 %fdiv = fdiv afn float 1.0, %x
257 define float @v_fdiv_recip_f32_arcp(float %x) {
258 ; IEEE-LABEL: v_fdiv_recip_f32_arcp:
260 ; IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261 ; IEEE-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
262 ; IEEE-NEXT: v_rcp_f32_e32 v2, v1
263 ; IEEE-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
264 ; IEEE-NEXT: v_fma_f32 v4, -v1, v2, 1.0
265 ; IEEE-NEXT: v_fma_f32 v2, v4, v2, v2
266 ; IEEE-NEXT: v_mul_f32_e32 v4, v3, v2
267 ; IEEE-NEXT: v_fma_f32 v5, -v1, v4, v3
268 ; IEEE-NEXT: v_fma_f32 v4, v5, v2, v4
269 ; IEEE-NEXT: v_fma_f32 v1, -v1, v4, v3
270 ; IEEE-NEXT: v_div_fmas_f32 v1, v1, v2, v4
271 ; IEEE-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
272 ; IEEE-NEXT: s_setpc_b64 s[30:31]
274 ; DAZ-LABEL: v_fdiv_recip_f32_arcp:
276 ; DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277 ; DAZ-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
278 ; DAZ-NEXT: v_rcp_f32_e32 v2, v1
279 ; DAZ-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
280 ; DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
281 ; DAZ-NEXT: v_fma_f32 v4, -v1, v2, 1.0
282 ; DAZ-NEXT: v_fma_f32 v2, v4, v2, v2
283 ; DAZ-NEXT: v_mul_f32_e32 v4, v3, v2
284 ; DAZ-NEXT: v_fma_f32 v5, -v1, v4, v3
285 ; DAZ-NEXT: v_fma_f32 v4, v5, v2, v4
286 ; DAZ-NEXT: v_fma_f32 v1, -v1, v4, v3
287 ; DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
288 ; DAZ-NEXT: v_div_fmas_f32 v1, v1, v2, v4
289 ; DAZ-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
290 ; DAZ-NEXT: s_setpc_b64 s[30:31]
291 %fdiv = fdiv arcp float 1.0, %x
295 define float @v_fdiv_recip_f32_arcp_afn(float %x) {
296 ; CHECK-LABEL: v_fdiv_recip_f32_arcp_afn:
298 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299 ; CHECK-NEXT: v_rcp_f32_e32 v0, v0
300 ; CHECK-NEXT: s_setpc_b64 s[30:31]
301 %fdiv = fdiv arcp afn float 1.0, %x
305 define float @v_fdiv_recip_sqrt_f32(float %x) {
306 ; CODEGEN-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32:
307 ; CODEGEN-IEEE-SDAG: ; %bb.0:
308 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309 ; CODEGEN-IEEE-SDAG-NEXT: s_mov_b32 s4, 0xf800000
310 ; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
311 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
312 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
313 ; CODEGEN-IEEE-SDAG-NEXT: v_sqrt_f32_e32 v1, v0
314 ; CODEGEN-IEEE-SDAG-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
315 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v3, -v2, v1, v0
316 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
317 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
318 ; CODEGEN-IEEE-SDAG-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
319 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v1, -v3, v1, v0
320 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
321 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
322 ; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
323 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
324 ; CODEGEN-IEEE-SDAG-NEXT: v_mov_b32_e32 v2, 0x260
325 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
326 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
327 ; CODEGEN-IEEE-SDAG-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
328 ; CODEGEN-IEEE-SDAG-NEXT: v_rcp_f32_e32 v2, v1
329 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v3, -v1, v2, 1.0
330 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v2, v3, v2, v2
331 ; CODEGEN-IEEE-SDAG-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
332 ; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v4, v3, v2
333 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v5, -v1, v4, v3
334 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v4, v5, v2, v4
335 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v1, -v1, v4, v3
336 ; CODEGEN-IEEE-SDAG-NEXT: v_div_fmas_f32 v1, v1, v2, v4
337 ; CODEGEN-IEEE-SDAG-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
338 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
340 ; CODEGEN-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32:
341 ; CODEGEN-IEEE-GISEL: ; %bb.0:
342 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
344 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
345 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
346 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
347 ; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
348 ; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
349 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
350 ; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
351 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
352 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
353 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
354 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
355 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
356 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
357 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
358 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
359 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
360 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
361 ; CODEGEN-IEEE-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
362 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v2, v1
363 ; CODEGEN-IEEE-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
364 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
365 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
366 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
367 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
368 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
369 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
370 ; CODEGEN-IEEE-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
371 ; CODEGEN-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
372 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
374 ; IR-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32:
375 ; IR-IEEE-SDAG: ; %bb.0:
376 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; IR-IEEE-SDAG-NEXT: s_mov_b32 s4, 0xf800000
378 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
379 ; IR-IEEE-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
380 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
381 ; IR-IEEE-SDAG-NEXT: v_sqrt_f32_e32 v1, v0
382 ; IR-IEEE-SDAG-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
383 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v3, -v2, v1, v0
384 ; IR-IEEE-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
385 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
386 ; IR-IEEE-SDAG-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
387 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v1, -v3, v1, v0
388 ; IR-IEEE-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
389 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
390 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
391 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
392 ; IR-IEEE-SDAG-NEXT: v_mov_b32_e32 v2, 0x260
393 ; IR-IEEE-SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
394 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
395 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
396 ; IR-IEEE-SDAG-NEXT: v_rcp_f32_e32 v2, v1
397 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v3, -v1, v2, 1.0
398 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v2, v3, v2, v2
399 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
400 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v4, v3, v2
401 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v5, -v1, v4, v3
402 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v4, v5, v2, v4
403 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v1, -v1, v4, v3
404 ; IR-IEEE-SDAG-NEXT: v_div_fmas_f32 v1, v1, v2, v4
405 ; IR-IEEE-SDAG-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
406 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
408 ; IR-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32:
409 ; IR-IEEE-GISEL: ; %bb.0:
410 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
411 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
412 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
413 ; IR-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
414 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
415 ; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
416 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
417 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
418 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
419 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
420 ; IR-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
421 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
422 ; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
423 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
424 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
425 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
426 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
427 ; IR-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
428 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
429 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
430 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v2, v1
431 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
432 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
433 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
434 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
435 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
436 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
437 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
438 ; IR-IEEE-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
439 ; IR-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
440 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
442 ; CODEGEN-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32:
443 ; CODEGEN-DAZ-SDAG: ; %bb.0:
444 ; CODEGEN-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445 ; CODEGEN-DAZ-SDAG-NEXT: s_mov_b32 s4, 0xf800000
446 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
447 ; CODEGEN-DAZ-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
448 ; CODEGEN-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
449 ; CODEGEN-DAZ-SDAG-NEXT: v_rsq_f32_e32 v1, v0
450 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v2, v0, v1
451 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, 0.5, v1
452 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v3, -v1, v2, 0.5
453 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v2, v2, v3, v2
454 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v4, -v2, v2, v0
455 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v1, v1, v3, v1
456 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v1, v4, v1, v2
457 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
458 ; CODEGEN-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
459 ; CODEGEN-DAZ-SDAG-NEXT: v_mov_b32_e32 v2, 0x260
460 ; CODEGEN-DAZ-SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
461 ; CODEGEN-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
462 ; CODEGEN-DAZ-SDAG-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
463 ; CODEGEN-DAZ-SDAG-NEXT: v_rcp_f32_e32 v2, v1
464 ; CODEGEN-DAZ-SDAG-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
465 ; CODEGEN-DAZ-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
466 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v4, -v1, v2, 1.0
467 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v2, v4, v2, v2
468 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v4, v3, v2
469 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v5, -v1, v4, v3
470 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v4, v5, v2, v4
471 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v1, -v1, v4, v3
472 ; CODEGEN-DAZ-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
473 ; CODEGEN-DAZ-SDAG-NEXT: v_div_fmas_f32 v1, v1, v2, v4
474 ; CODEGEN-DAZ-SDAG-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
475 ; CODEGEN-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
477 ; CODEGEN-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32:
478 ; CODEGEN-DAZ-GISEL: ; %bb.0:
479 ; CODEGEN-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
481 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
482 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
483 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
484 ; CODEGEN-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
485 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
486 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
487 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
488 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
489 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
490 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
491 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
492 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
493 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
494 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
495 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
496 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
497 ; CODEGEN-DAZ-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
498 ; CODEGEN-DAZ-GISEL-NEXT: v_rcp_f32_e32 v2, v1
499 ; CODEGEN-DAZ-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
500 ; CODEGEN-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
501 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
502 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
503 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
504 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
505 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
506 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
507 ; CODEGEN-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
508 ; CODEGEN-DAZ-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
509 ; CODEGEN-DAZ-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
510 ; CODEGEN-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
512 ; IR-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32:
513 ; IR-DAZ-SDAG: ; %bb.0:
514 ; IR-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515 ; IR-DAZ-SDAG-NEXT: s_mov_b32 s4, 0xf800000
516 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
517 ; IR-DAZ-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
518 ; IR-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
519 ; IR-DAZ-SDAG-NEXT: v_rsq_f32_e32 v1, v0
520 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v2, v0, v1
521 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, 0.5, v1
522 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v3, -v1, v2, 0.5
523 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v2, v2, v3, v2
524 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v4, -v2, v2, v0
525 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v1, v1, v3, v1
526 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v1, v4, v1, v2
527 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
528 ; IR-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
529 ; IR-DAZ-SDAG-NEXT: v_mov_b32_e32 v2, 0x260
530 ; IR-DAZ-SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
531 ; IR-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
532 ; IR-DAZ-SDAG-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
533 ; IR-DAZ-SDAG-NEXT: v_rcp_f32_e32 v2, v1
534 ; IR-DAZ-SDAG-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
535 ; IR-DAZ-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
536 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v4, -v1, v2, 1.0
537 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v2, v4, v2, v2
538 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v4, v3, v2
539 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v5, -v1, v4, v3
540 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v4, v5, v2, v4
541 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v1, -v1, v4, v3
542 ; IR-DAZ-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
543 ; IR-DAZ-SDAG-NEXT: v_div_fmas_f32 v1, v1, v2, v4
544 ; IR-DAZ-SDAG-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
545 ; IR-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
547 ; IR-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32:
548 ; IR-DAZ-GISEL: ; %bb.0:
549 ; IR-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
551 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
552 ; IR-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
553 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
554 ; IR-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
555 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
556 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
557 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
558 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
559 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
560 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
561 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
562 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
563 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
564 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
565 ; IR-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
566 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
567 ; IR-DAZ-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
568 ; IR-DAZ-GISEL-NEXT: v_rcp_f32_e32 v2, v1
569 ; IR-DAZ-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
570 ; IR-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
571 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
572 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
573 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
574 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
575 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
576 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
577 ; IR-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
578 ; IR-DAZ-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
579 ; IR-DAZ-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
580 ; IR-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
581 %sqrt = call float @llvm.sqrt.f32(float %x)
582 %fdiv = fdiv float 1.0, %sqrt
586 define float @v_fdiv_recip_sqrt_f32_afn(float %x) {
587 ; CHECK-LABEL: v_fdiv_recip_sqrt_f32_afn:
589 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590 ; CHECK-NEXT: v_sqrt_f32_e32 v0, v0
591 ; CHECK-NEXT: v_rcp_f32_e32 v0, v0
592 ; CHECK-NEXT: s_setpc_b64 s[30:31]
593 %sqrt = call afn float @llvm.sqrt.f32(float %x)
594 %fdiv = fdiv afn float 1.0, %sqrt
598 define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
599 ; CODEGEN-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp:
600 ; CODEGEN-IEEE-SDAG: ; %bb.0:
601 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
602 ; CODEGEN-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0
603 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
605 ; CODEGEN-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp:
606 ; CODEGEN-IEEE-GISEL: ; %bb.0:
607 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
609 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
610 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
611 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
612 ; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
613 ; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
614 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
615 ; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
616 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
617 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
618 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
619 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
620 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
621 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
622 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
623 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
624 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
625 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
626 ; CODEGEN-IEEE-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
627 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v2, v1
628 ; CODEGEN-IEEE-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
629 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
630 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
631 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
632 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
633 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
634 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
635 ; CODEGEN-IEEE-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
636 ; CODEGEN-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
637 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
639 ; IR-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp:
640 ; IR-IEEE-SDAG: ; %bb.0:
641 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642 ; IR-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0
643 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
645 ; IR-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp:
646 ; IR-IEEE-GISEL: ; %bb.0:
647 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
649 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
650 ; IR-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
651 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
652 ; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
653 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
654 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
655 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
656 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
657 ; IR-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
658 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
659 ; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
660 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
661 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
662 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
663 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
664 ; IR-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
665 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
666 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
667 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v2, v1
668 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
669 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
670 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
671 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
672 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
673 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
674 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
675 ; IR-IEEE-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
676 ; IR-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
677 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
679 ; CODEGEN-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp:
680 ; CODEGEN-DAZ-SDAG: ; %bb.0:
681 ; CODEGEN-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
682 ; CODEGEN-DAZ-SDAG-NEXT: v_rsq_f32_e32 v0, v0
683 ; CODEGEN-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
685 ; CODEGEN-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp:
686 ; CODEGEN-DAZ-GISEL: ; %bb.0:
687 ; CODEGEN-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
689 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
690 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
691 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
692 ; CODEGEN-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
693 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
694 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
695 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
696 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
697 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
698 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
699 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
700 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
701 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
702 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
703 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
704 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
705 ; CODEGEN-DAZ-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
706 ; CODEGEN-DAZ-GISEL-NEXT: v_rcp_f32_e32 v2, v1
707 ; CODEGEN-DAZ-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
708 ; CODEGEN-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
709 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
710 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
711 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
712 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
713 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
714 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
715 ; CODEGEN-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
716 ; CODEGEN-DAZ-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
717 ; CODEGEN-DAZ-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
718 ; CODEGEN-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
720 ; IR-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp:
721 ; IR-DAZ-SDAG: ; %bb.0:
722 ; IR-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723 ; IR-DAZ-SDAG-NEXT: v_rsq_f32_e32 v0, v0
724 ; IR-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
726 ; IR-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp:
727 ; IR-DAZ-GISEL: ; %bb.0:
728 ; IR-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
729 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
730 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
731 ; IR-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
732 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
733 ; IR-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
734 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
735 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
736 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
737 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
738 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
739 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
740 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
741 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
742 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
743 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
744 ; IR-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
745 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
746 ; IR-DAZ-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
747 ; IR-DAZ-GISEL-NEXT: v_rcp_f32_e32 v2, v1
748 ; IR-DAZ-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
749 ; IR-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
750 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
751 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
752 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
753 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
754 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
755 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
756 ; IR-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
757 ; IR-DAZ-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
758 ; IR-DAZ-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
759 ; IR-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
760 %sqrt = call arcp float @llvm.sqrt.f32(float %x)
761 %fdiv = fdiv arcp float 1.0, %sqrt
765 define float @v_fdiv_recip_sqrt_f32_arcp_afn(float %x) {
766 ; CODEGEN-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn:
767 ; CODEGEN-IEEE-SDAG: ; %bb.0:
768 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769 ; CODEGEN-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0
770 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
772 ; CODEGEN-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn:
773 ; CODEGEN-IEEE-GISEL: ; %bb.0:
774 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
775 ; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v0, v0
776 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v0, v0
777 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
779 ; IR-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn:
780 ; IR-IEEE-SDAG: ; %bb.0:
781 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
782 ; IR-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0
783 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
785 ; IR-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn:
786 ; IR-IEEE-GISEL: ; %bb.0:
787 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
788 ; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v0, v0
789 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v0, v0
790 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
792 ; CODEGEN-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn:
793 ; CODEGEN-DAZ-SDAG: ; %bb.0:
794 ; CODEGEN-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
795 ; CODEGEN-DAZ-SDAG-NEXT: v_rsq_f32_e32 v0, v0
796 ; CODEGEN-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
798 ; CODEGEN-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn:
799 ; CODEGEN-DAZ-GISEL: ; %bb.0:
800 ; CODEGEN-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801 ; CODEGEN-DAZ-GISEL-NEXT: v_sqrt_f32_e32 v0, v0
802 ; CODEGEN-DAZ-GISEL-NEXT: v_rcp_f32_e32 v0, v0
803 ; CODEGEN-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
805 ; IR-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn:
806 ; IR-DAZ-SDAG: ; %bb.0:
807 ; IR-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
808 ; IR-DAZ-SDAG-NEXT: v_rsq_f32_e32 v0, v0
809 ; IR-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
811 ; IR-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn:
812 ; IR-DAZ-GISEL: ; %bb.0:
813 ; IR-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814 ; IR-DAZ-GISEL-NEXT: v_sqrt_f32_e32 v0, v0
815 ; IR-DAZ-GISEL-NEXT: v_rcp_f32_e32 v0, v0
816 ; IR-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
817 %sqrt = call arcp afn float @llvm.sqrt.f32(float %x)
818 %fdiv = fdiv arcp afn float 1.0, %sqrt
822 define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
823 ; CODEGEN-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_fdiv_only:
824 ; CODEGEN-IEEE-SDAG: ; %bb.0:
825 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826 ; CODEGEN-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0
827 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
829 ; CODEGEN-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_fdiv_only:
830 ; CODEGEN-IEEE-GISEL: ; %bb.0:
831 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
833 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
834 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
835 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
836 ; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
837 ; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
838 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
839 ; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
840 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
841 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
842 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
843 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
844 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
845 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
846 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
847 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
848 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
849 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
850 ; CODEGEN-IEEE-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
851 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v2, v1
852 ; CODEGEN-IEEE-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
853 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
854 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
855 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
856 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
857 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
858 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
859 ; CODEGEN-IEEE-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
860 ; CODEGEN-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
861 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
863 ; IR-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_fdiv_only:
864 ; IR-IEEE-SDAG: ; %bb.0:
865 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866 ; IR-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0
867 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
869 ; IR-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_fdiv_only:
870 ; IR-IEEE-GISEL: ; %bb.0:
871 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
873 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
874 ; IR-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
875 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
876 ; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
877 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
878 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
879 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
880 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
881 ; IR-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
882 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
883 ; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
884 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
885 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
886 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
887 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
888 ; IR-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
889 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
890 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
891 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v2, v1
892 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
893 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
894 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
895 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
896 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
897 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
898 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
899 ; IR-IEEE-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
900 ; IR-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
901 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
903 ; CODEGEN-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_fdiv_only:
904 ; CODEGEN-DAZ-SDAG: ; %bb.0:
905 ; CODEGEN-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
906 ; CODEGEN-DAZ-SDAG-NEXT: v_rsq_f32_e32 v0, v0
907 ; CODEGEN-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
909 ; CODEGEN-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_fdiv_only:
910 ; CODEGEN-DAZ-GISEL: ; %bb.0:
911 ; CODEGEN-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
912 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
913 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
914 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
915 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
916 ; CODEGEN-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
917 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
918 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
919 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
920 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
921 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
922 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
923 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
924 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
925 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
926 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
927 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
928 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
929 ; CODEGEN-DAZ-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
930 ; CODEGEN-DAZ-GISEL-NEXT: v_rcp_f32_e32 v2, v1
931 ; CODEGEN-DAZ-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
932 ; CODEGEN-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
933 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
934 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
935 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
936 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
937 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
938 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
939 ; CODEGEN-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
940 ; CODEGEN-DAZ-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
941 ; CODEGEN-DAZ-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
942 ; CODEGEN-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
944 ; IR-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_fdiv_only:
945 ; IR-DAZ-SDAG: ; %bb.0:
946 ; IR-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
947 ; IR-DAZ-SDAG-NEXT: v_rsq_f32_e32 v0, v0
948 ; IR-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
950 ; IR-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_fdiv_only:
951 ; IR-DAZ-GISEL: ; %bb.0:
952 ; IR-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
953 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
954 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
955 ; IR-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
956 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
957 ; IR-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
958 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
959 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
960 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
961 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
962 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
963 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
964 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
965 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
966 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
967 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
968 ; IR-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
969 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
970 ; IR-DAZ-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
971 ; IR-DAZ-GISEL-NEXT: v_rcp_f32_e32 v2, v1
972 ; IR-DAZ-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
973 ; IR-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
974 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
975 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
976 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
977 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
978 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
979 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
980 ; IR-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
981 ; IR-DAZ-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
982 ; IR-DAZ-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
983 ; IR-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
984 %sqrt = call float @llvm.sqrt.f32(float %x)
985 %fdiv = fdiv arcp float 1.0, %sqrt
989 define float @v_fdiv_recip_sqrt_f32_afn_fdiv_only(float %x) {
990 ; CODEGEN-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32_afn_fdiv_only:
991 ; CODEGEN-IEEE-SDAG: ; %bb.0:
992 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
993 ; CODEGEN-IEEE-SDAG-NEXT: s_mov_b32 s4, 0xf800000
994 ; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
995 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
996 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
997 ; CODEGEN-IEEE-SDAG-NEXT: v_sqrt_f32_e32 v1, v0
998 ; CODEGEN-IEEE-SDAG-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
999 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v3, -v2, v1, v0
1000 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
1001 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
1002 ; CODEGEN-IEEE-SDAG-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
1003 ; CODEGEN-IEEE-SDAG-NEXT: v_fma_f32 v1, -v3, v1, v0
1004 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
1005 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
1006 ; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1007 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1008 ; CODEGEN-IEEE-SDAG-NEXT: v_mov_b32_e32 v2, 0x260
1009 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1010 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1011 ; CODEGEN-IEEE-SDAG-NEXT: v_rcp_f32_e32 v0, v0
1012 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1014 ; CODEGEN-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32_afn_fdiv_only:
1015 ; CODEGEN-IEEE-GISEL: ; %bb.0:
1016 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1017 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
1018 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
1019 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
1020 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1021 ; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
1022 ; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
1023 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
1024 ; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
1025 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
1026 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
1027 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
1028 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
1029 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
1030 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1031 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1032 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
1033 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1034 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1035 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v0, v0
1036 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1038 ; IR-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32_afn_fdiv_only:
1039 ; IR-IEEE-SDAG: ; %bb.0:
1040 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1041 ; IR-IEEE-SDAG-NEXT: s_mov_b32 s4, 0xf800000
1042 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
1043 ; IR-IEEE-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1044 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1045 ; IR-IEEE-SDAG-NEXT: v_sqrt_f32_e32 v1, v0
1046 ; IR-IEEE-SDAG-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
1047 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v3, -v2, v1, v0
1048 ; IR-IEEE-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
1049 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
1050 ; IR-IEEE-SDAG-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
1051 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v1, -v3, v1, v0
1052 ; IR-IEEE-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
1053 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
1054 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1055 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1056 ; IR-IEEE-SDAG-NEXT: v_mov_b32_e32 v2, 0x260
1057 ; IR-IEEE-SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1058 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1059 ; IR-IEEE-SDAG-NEXT: v_rcp_f32_e32 v0, v0
1060 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1062 ; IR-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32_afn_fdiv_only:
1063 ; IR-IEEE-GISEL: ; %bb.0:
1064 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1065 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
1066 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
1067 ; IR-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
1068 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1069 ; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
1070 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
1071 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
1072 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
1073 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
1074 ; IR-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
1075 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
1076 ; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
1077 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
1078 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1079 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1080 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
1081 ; IR-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1082 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1083 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v0, v0
1084 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1086 ; CODEGEN-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32_afn_fdiv_only:
1087 ; CODEGEN-DAZ-SDAG: ; %bb.0:
1088 ; CODEGEN-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1089 ; CODEGEN-DAZ-SDAG-NEXT: s_mov_b32 s4, 0xf800000
1090 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
1091 ; CODEGEN-DAZ-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1092 ; CODEGEN-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1093 ; CODEGEN-DAZ-SDAG-NEXT: v_rsq_f32_e32 v1, v0
1094 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v2, v0, v1
1095 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, 0.5, v1
1096 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v3, -v1, v2, 0.5
1097 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v2, v2, v3, v2
1098 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v4, -v2, v2, v0
1099 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v1, v1, v3, v1
1100 ; CODEGEN-DAZ-SDAG-NEXT: v_fma_f32 v1, v4, v1, v2
1101 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1102 ; CODEGEN-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1103 ; CODEGEN-DAZ-SDAG-NEXT: v_mov_b32_e32 v2, 0x260
1104 ; CODEGEN-DAZ-SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1105 ; CODEGEN-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1106 ; CODEGEN-DAZ-SDAG-NEXT: v_rcp_f32_e32 v0, v0
1107 ; CODEGEN-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
1109 ; CODEGEN-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32_afn_fdiv_only:
1110 ; CODEGEN-DAZ-GISEL: ; %bb.0:
1111 ; CODEGEN-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1112 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
1113 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
1114 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
1115 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1116 ; CODEGEN-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
1117 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
1118 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
1119 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
1120 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
1121 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
1122 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
1123 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
1124 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1125 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1126 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
1127 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1128 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1129 ; CODEGEN-DAZ-GISEL-NEXT: v_rcp_f32_e32 v0, v0
1130 ; CODEGEN-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
1132 ; IR-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32_afn_fdiv_only:
1133 ; IR-DAZ-SDAG: ; %bb.0:
1134 ; IR-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1135 ; IR-DAZ-SDAG-NEXT: s_mov_b32 s4, 0xf800000
1136 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
1137 ; IR-DAZ-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1138 ; IR-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1139 ; IR-DAZ-SDAG-NEXT: v_rsq_f32_e32 v1, v0
1140 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v2, v0, v1
1141 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, 0.5, v1
1142 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v3, -v1, v2, 0.5
1143 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v2, v2, v3, v2
1144 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v4, -v2, v2, v0
1145 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v1, v1, v3, v1
1146 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v1, v4, v1, v2
1147 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1148 ; IR-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1149 ; IR-DAZ-SDAG-NEXT: v_mov_b32_e32 v2, 0x260
1150 ; IR-DAZ-SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1151 ; IR-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1152 ; IR-DAZ-SDAG-NEXT: v_rcp_f32_e32 v0, v0
1153 ; IR-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
1155 ; IR-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32_afn_fdiv_only:
1156 ; IR-DAZ-GISEL: ; %bb.0:
1157 ; IR-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1158 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
1159 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
1160 ; IR-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
1161 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1162 ; IR-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
1163 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
1164 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
1165 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
1166 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
1167 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
1168 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
1169 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
1170 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1171 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1172 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
1173 ; IR-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1174 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1175 ; IR-DAZ-GISEL-NEXT: v_rcp_f32_e32 v0, v0
1176 ; IR-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
1177 %sqrt = call float @llvm.sqrt.f32(float %x)
1178 %fdiv = fdiv afn float 1.0, %sqrt
1182 define float @v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only(float %x) {
1183 ; CODEGEN-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only:
1184 ; CODEGEN-IEEE-SDAG: ; %bb.0:
1185 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1186 ; CODEGEN-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0
1187 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1189 ; CODEGEN-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only:
1190 ; CODEGEN-IEEE-GISEL: ; %bb.0:
1191 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1192 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
1193 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
1194 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
1195 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1196 ; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
1197 ; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
1198 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
1199 ; CODEGEN-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
1200 ; CODEGEN-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
1201 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
1202 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
1203 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
1204 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
1205 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1206 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1207 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
1208 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1209 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1210 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v0, v0
1211 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1213 ; IR-IEEE-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only:
1214 ; IR-IEEE-SDAG: ; %bb.0:
1215 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1216 ; IR-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0
1217 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1219 ; IR-IEEE-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only:
1220 ; IR-IEEE-GISEL: ; %bb.0:
1221 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
1223 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
1224 ; IR-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
1225 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1226 ; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
1227 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
1228 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
1229 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
1230 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
1231 ; IR-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
1232 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
1233 ; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
1234 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
1235 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1236 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1237 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
1238 ; IR-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1239 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1240 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v0, v0
1241 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1243 ; CODEGEN-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only:
1244 ; CODEGEN-DAZ-SDAG: ; %bb.0:
1245 ; CODEGEN-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1246 ; CODEGEN-DAZ-SDAG-NEXT: v_rsq_f32_e32 v0, v0
1247 ; CODEGEN-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
1249 ; CODEGEN-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only:
1250 ; CODEGEN-DAZ-GISEL: ; %bb.0:
1251 ; CODEGEN-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1252 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
1253 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
1254 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
1255 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1256 ; CODEGEN-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
1257 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
1258 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
1259 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
1260 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
1261 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
1262 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
1263 ; CODEGEN-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
1264 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1265 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1266 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
1267 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1268 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1269 ; CODEGEN-DAZ-GISEL-NEXT: v_rcp_f32_e32 v0, v0
1270 ; CODEGEN-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
1272 ; IR-DAZ-SDAG-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only:
1273 ; IR-DAZ-SDAG: ; %bb.0:
1274 ; IR-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275 ; IR-DAZ-SDAG-NEXT: v_rsq_f32_e32 v0, v0
1276 ; IR-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
1278 ; IR-DAZ-GISEL-LABEL: v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only:
1279 ; IR-DAZ-GISEL: ; %bb.0:
1280 ; IR-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1281 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
1282 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
1283 ; IR-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
1284 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1285 ; IR-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
1286 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
1287 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
1288 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
1289 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
1290 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
1291 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
1292 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
1293 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1294 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1295 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
1296 ; IR-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1297 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1298 ; IR-DAZ-GISEL-NEXT: v_rcp_f32_e32 v0, v0
1299 ; IR-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
1300 %sqrt = call float @llvm.sqrt.f32(float %x)
1301 %fdiv = fdiv arcp afn float 1.0, %sqrt
1305 define float @v_fdiv_f32_ulp25(float %x, float %y) {
1306 ; CODEGEN-IEEE-SDAG-LABEL: v_fdiv_f32_ulp25:
1307 ; CODEGEN-IEEE-SDAG: ; %bb.0:
1308 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1309 ; CODEGEN-IEEE-SDAG-NEXT: v_frexp_mant_f32_e32 v2, v1
1310 ; CODEGEN-IEEE-SDAG-NEXT: v_rcp_f32_e32 v2, v2
1311 ; CODEGEN-IEEE-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
1312 ; CODEGEN-IEEE-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v3, v0
1313 ; CODEGEN-IEEE-SDAG-NEXT: v_frexp_mant_f32_e32 v0, v0
1314 ; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
1315 ; CODEGEN-IEEE-SDAG-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
1316 ; CODEGEN-IEEE-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
1317 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1319 ; CODEGEN-IEEE-GISEL-LABEL: v_fdiv_f32_ulp25:
1320 ; CODEGEN-IEEE-GISEL: ; %bb.0:
1321 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322 ; CODEGEN-IEEE-GISEL-NEXT: v_frexp_mant_f32_e32 v2, v1
1323 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v2, v2
1324 ; CODEGEN-IEEE-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
1325 ; CODEGEN-IEEE-GISEL-NEXT: v_frexp_mant_f32_e32 v3, v0
1326 ; CODEGEN-IEEE-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
1327 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, v3, v2
1328 ; CODEGEN-IEEE-GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
1329 ; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v2, v0
1330 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1332 ; IR-IEEE-SDAG-LABEL: v_fdiv_f32_ulp25:
1333 ; IR-IEEE-SDAG: ; %bb.0:
1334 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1335 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
1336 ; IR-IEEE-SDAG-NEXT: v_rcp_f32_e32 v3, v2
1337 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v4, -v2, v3, 1.0
1338 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v3, v4, v3, v3
1339 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
1340 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v5, v4, v3
1341 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v6, -v2, v5, v4
1342 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v5, v6, v3, v5
1343 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v2, -v2, v5, v4
1344 ; IR-IEEE-SDAG-NEXT: v_div_fmas_f32 v2, v2, v3, v5
1345 ; IR-IEEE-SDAG-NEXT: v_div_fixup_f32 v0, v2, v1, v0
1346 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1348 ; IR-IEEE-GISEL-LABEL: v_fdiv_f32_ulp25:
1349 ; IR-IEEE-GISEL: ; %bb.0:
1350 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1351 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
1352 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v3, v2
1353 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
1354 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v2, v3, 1.0
1355 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v3, v5, v3, v3
1356 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v5, v4, v3
1357 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v6, -v2, v5, v4
1358 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, v6, v3, v5
1359 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v2, -v2, v5, v4
1360 ; IR-IEEE-GISEL-NEXT: v_div_fmas_f32 v2, v2, v3, v5
1361 ; IR-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v2, v1, v0
1362 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1364 ; CODEGEN-DAZ-SDAG-LABEL: v_fdiv_f32_ulp25:
1365 ; CODEGEN-DAZ-SDAG: ; %bb.0:
1366 ; CODEGEN-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367 ; CODEGEN-DAZ-SDAG-NEXT: s_mov_b32 s4, 0x6f800000
1368 ; CODEGEN-DAZ-SDAG-NEXT: v_mov_b32_e32 v2, 0x2f800000
1369 ; CODEGEN-DAZ-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4
1370 ; CODEGEN-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
1371 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
1372 ; CODEGEN-DAZ-SDAG-NEXT: v_rcp_f32_e32 v1, v1
1373 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1374 ; CODEGEN-DAZ-SDAG-NEXT: v_mul_f32_e32 v0, v2, v0
1375 ; CODEGEN-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
1377 ; CODEGEN-DAZ-GISEL-LABEL: v_fdiv_f32_ulp25:
1378 ; CODEGEN-DAZ-GISEL: ; %bb.0:
1379 ; CODEGEN-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x6f800000
1381 ; CODEGEN-DAZ-GISEL-NEXT: v_mov_b32_e32 v3, 0x2f800000
1382 ; CODEGEN-DAZ-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, v2
1383 ; CODEGEN-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
1384 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2
1385 ; CODEGEN-DAZ-GISEL-NEXT: v_rcp_f32_e32 v1, v1
1386 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1387 ; CODEGEN-DAZ-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
1388 ; CODEGEN-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
1390 ; IR-DAZ-LABEL: v_fdiv_f32_ulp25:
1392 ; IR-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1393 ; IR-DAZ-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
1394 ; IR-DAZ-NEXT: v_rcp_f32_e32 v3, v2
1395 ; IR-DAZ-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
1396 ; IR-DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1397 ; IR-DAZ-NEXT: v_fma_f32 v5, -v2, v3, 1.0
1398 ; IR-DAZ-NEXT: v_fma_f32 v3, v5, v3, v3
1399 ; IR-DAZ-NEXT: v_mul_f32_e32 v5, v4, v3
1400 ; IR-DAZ-NEXT: v_fma_f32 v6, -v2, v5, v4
1401 ; IR-DAZ-NEXT: v_fma_f32 v5, v6, v3, v5
1402 ; IR-DAZ-NEXT: v_fma_f32 v2, -v2, v5, v4
1403 ; IR-DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1404 ; IR-DAZ-NEXT: v_div_fmas_f32 v2, v2, v3, v5
1405 ; IR-DAZ-NEXT: v_div_fixup_f32 v0, v2, v1, v0
1406 ; IR-DAZ-NEXT: s_setpc_b64 s[30:31]
1407 %fdiv = fdiv float %x, %y, !fpmath !0
1411 define float @v_fdiv_f32_afn_ulp25(float %x, float %y) {
1412 ; CHECK-LABEL: v_fdiv_f32_afn_ulp25:
1414 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1415 ; CHECK-NEXT: v_rcp_f32_e32 v1, v1
1416 ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
1417 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1418 %fdiv = fdiv afn float %x, %y, !fpmath !0
1422 define float @v_recip_f32_ulp25(float %x) {
1423 ; CODEGEN-IEEE-SDAG-LABEL: v_recip_f32_ulp25:
1424 ; CODEGEN-IEEE-SDAG: ; %bb.0:
1425 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1426 ; CODEGEN-IEEE-SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0
1427 ; CODEGEN-IEEE-SDAG-NEXT: v_rcp_f32_e32 v1, v1
1428 ; CODEGEN-IEEE-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
1429 ; CODEGEN-IEEE-SDAG-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
1430 ; CODEGEN-IEEE-SDAG-NEXT: v_ldexp_f32_e32 v0, v1, v0
1431 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1433 ; CODEGEN-IEEE-GISEL-LABEL: v_recip_f32_ulp25:
1434 ; CODEGEN-IEEE-GISEL: ; %bb.0:
1435 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1436 ; CODEGEN-IEEE-GISEL-NEXT: v_frexp_mant_f32_e32 v1, v0
1437 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v1, v1
1438 ; CODEGEN-IEEE-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
1439 ; CODEGEN-IEEE-GISEL-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
1440 ; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v1, v0
1441 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1443 ; IR-IEEE-LABEL: v_recip_f32_ulp25:
1445 ; IR-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1446 ; IR-IEEE-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
1447 ; IR-IEEE-NEXT: v_rcp_f32_e32 v2, v1
1448 ; IR-IEEE-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
1449 ; IR-IEEE-NEXT: v_fma_f32 v4, -v1, v2, 1.0
1450 ; IR-IEEE-NEXT: v_fma_f32 v2, v4, v2, v2
1451 ; IR-IEEE-NEXT: v_mul_f32_e32 v4, v3, v2
1452 ; IR-IEEE-NEXT: v_fma_f32 v5, -v1, v4, v3
1453 ; IR-IEEE-NEXT: v_fma_f32 v4, v5, v2, v4
1454 ; IR-IEEE-NEXT: v_fma_f32 v1, -v1, v4, v3
1455 ; IR-IEEE-NEXT: v_div_fmas_f32 v1, v1, v2, v4
1456 ; IR-IEEE-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
1457 ; IR-IEEE-NEXT: s_setpc_b64 s[30:31]
1459 ; CODEGEN-DAZ-LABEL: v_recip_f32_ulp25:
1460 ; CODEGEN-DAZ: ; %bb.0:
1461 ; CODEGEN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1462 ; CODEGEN-DAZ-NEXT: v_rcp_f32_e32 v0, v0
1463 ; CODEGEN-DAZ-NEXT: s_setpc_b64 s[30:31]
1465 ; IR-DAZ-LABEL: v_recip_f32_ulp25:
1467 ; IR-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1468 ; IR-DAZ-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
1469 ; IR-DAZ-NEXT: v_rcp_f32_e32 v2, v1
1470 ; IR-DAZ-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
1471 ; IR-DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1472 ; IR-DAZ-NEXT: v_fma_f32 v4, -v1, v2, 1.0
1473 ; IR-DAZ-NEXT: v_fma_f32 v2, v4, v2, v2
1474 ; IR-DAZ-NEXT: v_mul_f32_e32 v4, v3, v2
1475 ; IR-DAZ-NEXT: v_fma_f32 v5, -v1, v4, v3
1476 ; IR-DAZ-NEXT: v_fma_f32 v4, v5, v2, v4
1477 ; IR-DAZ-NEXT: v_fma_f32 v1, -v1, v4, v3
1478 ; IR-DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1479 ; IR-DAZ-NEXT: v_div_fmas_f32 v1, v1, v2, v4
1480 ; IR-DAZ-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
1481 ; IR-DAZ-NEXT: s_setpc_b64 s[30:31]
1482 %fdiv = fdiv float 1.0, %x, !fpmath !0
1486 define float @v_recip_f32_afn_ulp25(float %x) {
1487 ; CHECK-LABEL: v_recip_f32_afn_ulp25:
1489 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490 ; CHECK-NEXT: v_rcp_f32_e32 v0, v0
1491 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1492 %fdiv = fdiv afn float 1.0, %x, !fpmath !0
1496 define float @v_recip_sqrt_f32_ulp25(float %x) {
1497 ; CODEGEN-IEEE-SDAG-LABEL: v_recip_sqrt_f32_ulp25:
1498 ; CODEGEN-IEEE-SDAG: ; %bb.0:
1499 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1500 ; CODEGEN-IEEE-SDAG-NEXT: s_mov_b32 s4, 0x800000
1501 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1502 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
1503 ; CODEGEN-IEEE-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1
1504 ; CODEGEN-IEEE-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
1505 ; CODEGEN-IEEE-SDAG-NEXT: v_sqrt_f32_e32 v0, v0
1506 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
1507 ; CODEGEN-IEEE-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
1508 ; CODEGEN-IEEE-SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0
1509 ; CODEGEN-IEEE-SDAG-NEXT: v_rcp_f32_e32 v1, v1
1510 ; CODEGEN-IEEE-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
1511 ; CODEGEN-IEEE-SDAG-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
1512 ; CODEGEN-IEEE-SDAG-NEXT: v_ldexp_f32_e32 v0, v1, v0
1513 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1515 ; CODEGEN-IEEE-GISEL-LABEL: v_recip_sqrt_f32_ulp25:
1516 ; CODEGEN-IEEE-GISEL: ; %bb.0:
1517 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1518 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
1519 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1520 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
1521 ; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
1522 ; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v0, v0
1523 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
1524 ; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
1525 ; CODEGEN-IEEE-GISEL-NEXT: v_frexp_mant_f32_e32 v1, v0
1526 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v1, v1
1527 ; CODEGEN-IEEE-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
1528 ; CODEGEN-IEEE-GISEL-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
1529 ; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v1, v0
1530 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1532 ; IR-IEEE-SDAG-LABEL: v_recip_sqrt_f32_ulp25:
1533 ; IR-IEEE-SDAG: ; %bb.0:
1534 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535 ; IR-IEEE-SDAG-NEXT: s_mov_b32 s4, 0x800000
1536 ; IR-IEEE-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1537 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
1538 ; IR-IEEE-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1
1539 ; IR-IEEE-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
1540 ; IR-IEEE-SDAG-NEXT: v_sqrt_f32_e32 v0, v0
1541 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
1542 ; IR-IEEE-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
1543 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
1544 ; IR-IEEE-SDAG-NEXT: v_rcp_f32_e32 v2, v1
1545 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
1546 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v4, -v1, v2, 1.0
1547 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v2, v4, v2, v2
1548 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v4, v3, v2
1549 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v5, -v1, v4, v3
1550 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v4, v5, v2, v4
1551 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v1, -v1, v4, v3
1552 ; IR-IEEE-SDAG-NEXT: v_div_fmas_f32 v1, v1, v2, v4
1553 ; IR-IEEE-SDAG-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
1554 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1556 ; IR-IEEE-GISEL-LABEL: v_recip_sqrt_f32_ulp25:
1557 ; IR-IEEE-GISEL: ; %bb.0:
1558 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1559 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
1560 ; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1561 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
1562 ; IR-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
1563 ; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v0, v0
1564 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc
1565 ; IR-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
1566 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
1567 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v2, v1
1568 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
1569 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
1570 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
1571 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
1572 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
1573 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
1574 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
1575 ; IR-IEEE-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
1576 ; IR-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
1577 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1579 ; CODEGEN-DAZ-LABEL: v_recip_sqrt_f32_ulp25:
1580 ; CODEGEN-DAZ: ; %bb.0:
1581 ; CODEGEN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1582 ; CODEGEN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
1583 ; CODEGEN-DAZ-NEXT: v_rcp_f32_e32 v0, v0
1584 ; CODEGEN-DAZ-NEXT: s_setpc_b64 s[30:31]
1586 ; IR-DAZ-LABEL: v_recip_sqrt_f32_ulp25:
1588 ; IR-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1589 ; IR-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
1590 ; IR-DAZ-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
1591 ; IR-DAZ-NEXT: v_rcp_f32_e32 v2, v1
1592 ; IR-DAZ-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
1593 ; IR-DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1594 ; IR-DAZ-NEXT: v_fma_f32 v4, -v1, v2, 1.0
1595 ; IR-DAZ-NEXT: v_fma_f32 v2, v4, v2, v2
1596 ; IR-DAZ-NEXT: v_mul_f32_e32 v4, v3, v2
1597 ; IR-DAZ-NEXT: v_fma_f32 v5, -v1, v4, v3
1598 ; IR-DAZ-NEXT: v_fma_f32 v4, v5, v2, v4
1599 ; IR-DAZ-NEXT: v_fma_f32 v1, -v1, v4, v3
1600 ; IR-DAZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1601 ; IR-DAZ-NEXT: v_div_fmas_f32 v1, v1, v2, v4
1602 ; IR-DAZ-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
1603 ; IR-DAZ-NEXT: s_setpc_b64 s[30:31]
1604 %sqrt = call float @llvm.sqrt.f32(float %x), !fpmath !0
1605 %fdiv = fdiv float 1.0, %sqrt, !fpmath !0
1609 define float @v_recip_sqrt_f32_ulp25_contract(float %x) {
1610 ; CODEGEN-IEEE-SDAG-LABEL: v_recip_sqrt_f32_ulp25_contract:
1611 ; CODEGEN-IEEE-SDAG: ; %bb.0:
1612 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1613 ; CODEGEN-IEEE-SDAG-NEXT: s_mov_b32 s4, 0x800000
1614 ; CODEGEN-IEEE-SDAG-NEXT: v_mov_b32_e32 v1, 0x4b800000
1615 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1616 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1617 ; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1618 ; CODEGEN-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0
1619 ; CODEGEN-IEEE-SDAG-NEXT: v_mov_b32_e32 v1, 0x45800000
1620 ; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1621 ; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1622 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1624 ; CODEGEN-IEEE-GISEL-LABEL: v_recip_sqrt_f32_ulp25_contract:
1625 ; CODEGEN-IEEE-GISEL: ; %bb.0:
1626 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1627 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
1628 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x4b800000
1629 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1630 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
1631 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1632 ; CODEGEN-IEEE-GISEL-NEXT: v_rsq_f32_e32 v0, v0
1633 ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x45800000
1634 ; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1635 ; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1636 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1638 ; IR-IEEE-SDAG-LABEL: v_recip_sqrt_f32_ulp25_contract:
1639 ; IR-IEEE-SDAG: ; %bb.0:
1640 ; IR-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1641 ; IR-IEEE-SDAG-NEXT: s_mov_b32 s4, 0xf800000
1642 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
1643 ; IR-IEEE-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1644 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1645 ; IR-IEEE-SDAG-NEXT: v_sqrt_f32_e32 v1, v0
1646 ; IR-IEEE-SDAG-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
1647 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v3, -v2, v1, v0
1648 ; IR-IEEE-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
1649 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
1650 ; IR-IEEE-SDAG-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1
1651 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v1, -v3, v1, v0
1652 ; IR-IEEE-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1
1653 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5]
1654 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1655 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1656 ; IR-IEEE-SDAG-NEXT: v_mov_b32_e32 v2, 0x260
1657 ; IR-IEEE-SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1658 ; IR-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1659 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
1660 ; IR-IEEE-SDAG-NEXT: v_rcp_f32_e32 v2, v1
1661 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v3, -v1, v2, 1.0
1662 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v2, v3, v2, v2
1663 ; IR-IEEE-SDAG-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
1664 ; IR-IEEE-SDAG-NEXT: v_mul_f32_e32 v4, v3, v2
1665 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v5, -v1, v4, v3
1666 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v4, v5, v2, v4
1667 ; IR-IEEE-SDAG-NEXT: v_fma_f32 v1, -v1, v4, v3
1668 ; IR-IEEE-SDAG-NEXT: v_div_fmas_f32 v1, v1, v2, v4
1669 ; IR-IEEE-SDAG-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
1670 ; IR-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1672 ; IR-IEEE-GISEL-LABEL: v_recip_sqrt_f32_ulp25_contract:
1673 ; IR-IEEE-GISEL: ; %bb.0:
1674 ; IR-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1675 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
1676 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
1677 ; IR-IEEE-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
1678 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1679 ; IR-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v1, v0
1680 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1
1681 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v3, -v2, v1, v0
1682 ; IR-IEEE-GISEL-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1
1683 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v4, v1, v0
1684 ; IR-IEEE-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3
1685 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
1686 ; IR-IEEE-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5
1687 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5]
1688 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1689 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1690 ; IR-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
1691 ; IR-IEEE-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1692 ; IR-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1693 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
1694 ; IR-IEEE-GISEL-NEXT: v_rcp_f32_e32 v2, v1
1695 ; IR-IEEE-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
1696 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
1697 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
1698 ; IR-IEEE-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
1699 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
1700 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
1701 ; IR-IEEE-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
1702 ; IR-IEEE-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
1703 ; IR-IEEE-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
1704 ; IR-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1706 ; CODEGEN-DAZ-LABEL: v_recip_sqrt_f32_ulp25_contract:
1707 ; CODEGEN-DAZ: ; %bb.0:
1708 ; CODEGEN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1709 ; CODEGEN-DAZ-NEXT: v_rsq_f32_e32 v0, v0
1710 ; CODEGEN-DAZ-NEXT: s_setpc_b64 s[30:31]
1712 ; IR-DAZ-SDAG-LABEL: v_recip_sqrt_f32_ulp25_contract:
1713 ; IR-DAZ-SDAG: ; %bb.0:
1714 ; IR-DAZ-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1715 ; IR-DAZ-SDAG-NEXT: s_mov_b32 s4, 0xf800000
1716 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0
1717 ; IR-DAZ-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1718 ; IR-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1719 ; IR-DAZ-SDAG-NEXT: v_rsq_f32_e32 v1, v0
1720 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v2, v0, v1
1721 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v1, 0.5, v1
1722 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v3, -v1, v2, 0.5
1723 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v2, v2, v3, v2
1724 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v4, -v2, v2, v0
1725 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v1, v1, v3, v1
1726 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v1, v4, v1, v2
1727 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1728 ; IR-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1729 ; IR-DAZ-SDAG-NEXT: v_mov_b32_e32 v2, 0x260
1730 ; IR-DAZ-SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1731 ; IR-DAZ-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1732 ; IR-DAZ-SDAG-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
1733 ; IR-DAZ-SDAG-NEXT: v_rcp_f32_e32 v2, v1
1734 ; IR-DAZ-SDAG-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
1735 ; IR-DAZ-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1736 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v4, -v1, v2, 1.0
1737 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v2, v4, v2, v2
1738 ; IR-DAZ-SDAG-NEXT: v_mul_f32_e32 v4, v3, v2
1739 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v5, -v1, v4, v3
1740 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v4, v5, v2, v4
1741 ; IR-DAZ-SDAG-NEXT: v_fma_f32 v1, -v1, v4, v3
1742 ; IR-DAZ-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1743 ; IR-DAZ-SDAG-NEXT: v_div_fmas_f32 v1, v1, v2, v4
1744 ; IR-DAZ-SDAG-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
1745 ; IR-DAZ-SDAG-NEXT: s_setpc_b64 s[30:31]
1747 ; IR-DAZ-GISEL-LABEL: v_recip_sqrt_f32_ulp25_contract:
1748 ; IR-DAZ-GISEL: ; %bb.0:
1749 ; IR-DAZ-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1750 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000
1751 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
1752 ; IR-DAZ-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v0
1753 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1754 ; IR-DAZ-GISEL-NEXT: v_rsq_f32_e32 v1, v0
1755 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
1756 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1
1757 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5
1758 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v2, v2, v3, v2
1759 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v1, v3, v1
1760 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0
1761 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, v3, v1, v2
1762 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1
1763 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1764 ; IR-DAZ-GISEL-NEXT: v_mov_b32_e32 v2, 0x260
1765 ; IR-DAZ-GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2
1766 ; IR-DAZ-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1767 ; IR-DAZ-GISEL-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
1768 ; IR-DAZ-GISEL-NEXT: v_rcp_f32_e32 v2, v1
1769 ; IR-DAZ-GISEL-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
1770 ; IR-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1771 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v4, -v1, v2, 1.0
1772 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v2, v4, v2, v2
1773 ; IR-DAZ-GISEL-NEXT: v_mul_f32_e32 v4, v3, v2
1774 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v5, -v1, v4, v3
1775 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v4, v5, v2, v4
1776 ; IR-DAZ-GISEL-NEXT: v_fma_f32 v1, -v1, v4, v3
1777 ; IR-DAZ-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1778 ; IR-DAZ-GISEL-NEXT: v_div_fmas_f32 v1, v1, v2, v4
1779 ; IR-DAZ-GISEL-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
1780 ; IR-DAZ-GISEL-NEXT: s_setpc_b64 s[30:31]
1781 %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !0
1782 %fdiv = fdiv contract float 1.0, %sqrt, !fpmath !0
1786 define float @v_recip_sqrt_f32_afn_ulp25(float %x) {
1787 ; CHECK-LABEL: v_recip_sqrt_f32_afn_ulp25:
1789 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1790 ; CHECK-NEXT: v_sqrt_f32_e32 v0, v0
1791 ; CHECK-NEXT: v_rcp_f32_e32 v0, v0
1792 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1793 %sqrt = call afn float @llvm.sqrt.f32(float %x), !fpmath !0
1794 %fdiv = fdiv afn float 1.0, %sqrt, !fpmath !0
1798 define float @v_recip_sqrt_f32_afn_ulp25_contract(float %x) {
1799 ; CODEGEN-IEEE-SDAG-LABEL: v_recip_sqrt_f32_afn_ulp25_contract:
1800 ; CODEGEN-IEEE-SDAG: ; %bb.0:
1801 ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1802 ; CODEGEN-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0
1803 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31]
1805 ; CODEGEN-IEEE-GISEL-LABEL: v_recip_sqrt_f32_afn_ulp25_contract:
1806 ; CODEGEN-IEEE-GISEL: ; %bb.0:
1807 ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1808 ; CODEGEN-IEEE-GISEL-NEXT: v_rsq_f32_e32 v0, v0
1809 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
1811 ; IR-IEEE-LABEL: v_recip_sqrt_f32_afn_ulp25_contract:
1813 ; IR-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1814 ; IR-IEEE-NEXT: v_sqrt_f32_e32 v0, v0
1815 ; IR-IEEE-NEXT: v_rcp_f32_e32 v0, v0
1816 ; IR-IEEE-NEXT: s_setpc_b64 s[30:31]
1818 ; CODEGEN-DAZ-LABEL: v_recip_sqrt_f32_afn_ulp25_contract:
1819 ; CODEGEN-DAZ: ; %bb.0:
1820 ; CODEGEN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1821 ; CODEGEN-DAZ-NEXT: v_rsq_f32_e32 v0, v0
1822 ; CODEGEN-DAZ-NEXT: s_setpc_b64 s[30:31]
1824 ; IR-DAZ-LABEL: v_recip_sqrt_f32_afn_ulp25_contract:
1826 ; IR-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1827 ; IR-DAZ-NEXT: v_sqrt_f32_e32 v0, v0
1828 ; IR-DAZ-NEXT: v_rcp_f32_e32 v0, v0
1829 ; IR-DAZ-NEXT: s_setpc_b64 s[30:31]
1830 %sqrt = call contract afn float @llvm.sqrt.f32(float %x), !fpmath !0
1831 %fdiv = fdiv contract afn float 1.0, %sqrt, !fpmath !0
1835 declare float @llvm.sqrt.f32(float)
1837 !0 = !{float 2.500000e+00}