1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX78,GFX7 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GFX78,GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
6 define i16 @v_powi_f16(i16 %l, i32 %r) {
7 ; GFX78-LABEL: v_powi_f16:
9 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; GFX78-NEXT: v_cvt_f32_f16_e32 v0, v0
11 ; GFX78-NEXT: v_cvt_f32_i32_e32 v1, v1
12 ; GFX78-NEXT: v_log_f32_e32 v0, v0
13 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
14 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
15 ; GFX78-NEXT: v_cvt_f16_f32_e32 v0, v0
16 ; GFX78-NEXT: s_setpc_b64 s[30:31]
18 ; GFX11-LABEL: v_powi_f16:
20 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
22 ; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
23 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
24 ; GFX11-NEXT: v_log_f32_e32 v0, v0
25 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
26 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
27 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
28 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
29 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
30 ; GFX11-NEXT: s_setpc_b64 s[30:31]
31 %l.cast = bitcast i16 %l to half
32 %res = call half @llvm.powi.f16.i32(half %l.cast, i32 %r)
33 %res.cast = bitcast half %res to i16
37 define float @v_powi_f32(float %l, i32 %r) {
38 ; GFX78-LABEL: v_powi_f32:
40 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX78-NEXT: v_log_f32_e32 v0, v0
42 ; GFX78-NEXT: v_cvt_f32_i32_e32 v1, v1
43 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
44 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
45 ; GFX78-NEXT: s_setpc_b64 s[30:31]
47 ; GFX11-LABEL: v_powi_f32:
49 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GFX11-NEXT: v_log_f32_e32 v0, v0
51 ; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
52 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
53 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
54 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
55 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
56 ; GFX11-NEXT: s_setpc_b64 s[30:31]
57 %res = call float @llvm.powi.f32.i32(float %l, i32 %r)
61 define float @v_powi_0_f32(float %l) {
62 ; GFX78-LABEL: v_powi_0_f32:
64 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65 ; GFX78-NEXT: v_mov_b32_e32 v0, 1.0
66 ; GFX78-NEXT: s_setpc_b64 s[30:31]
68 ; GFX11-LABEL: v_powi_0_f32:
70 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71 ; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
72 ; GFX11-NEXT: s_setpc_b64 s[30:31]
73 %res = call float @llvm.powi.f32.i32(float %l, i32 0)
77 define float @v_powi_1_f32(float %l) {
78 ; GFX78-LABEL: v_powi_1_f32:
80 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81 ; GFX78-NEXT: s_setpc_b64 s[30:31]
83 ; GFX11-LABEL: v_powi_1_f32:
85 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; GFX11-NEXT: s_setpc_b64 s[30:31]
87 %res = call float @llvm.powi.f32.i32(float %l, i32 1)
91 define float @v_powi_neg1_f32(float %l) {
92 ; GFX7-LABEL: v_powi_neg1_f32:
94 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
96 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1
97 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
98 ; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
99 ; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
100 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
101 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
102 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
103 ; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
104 ; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4
105 ; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
106 ; GFX7-NEXT: s_setpc_b64 s[30:31]
108 ; GFX8-LABEL: v_powi_neg1_f32:
110 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111 ; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
112 ; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
113 ; GFX8-NEXT: v_rcp_f32_e32 v3, v1
114 ; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0
115 ; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3
116 ; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3
117 ; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2
118 ; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4
119 ; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2
120 ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4
121 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
122 ; GFX8-NEXT: s_setpc_b64 s[30:31]
124 ; GFX11-LABEL: v_powi_neg1_f32:
126 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127 ; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0
128 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
129 ; GFX11-NEXT: v_rcp_f32_e32 v2, v1
130 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
131 ; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0
132 ; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2
133 ; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
134 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
135 ; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2
136 ; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3
137 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
138 ; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2
139 ; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3
140 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
141 ; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4
142 ; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
143 ; GFX11-NEXT: s_setpc_b64 s[30:31]
144 %res = call float @llvm.powi.f32.i32(float %l, i32 -1)
148 define float @v_powi_2_f32(float %l) {
149 ; GFX78-LABEL: v_powi_2_f32:
151 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
153 ; GFX78-NEXT: s_setpc_b64 s[30:31]
155 ; GFX11-LABEL: v_powi_2_f32:
157 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
159 ; GFX11-NEXT: s_setpc_b64 s[30:31]
160 %res = call float @llvm.powi.f32.i32(float %l, i32 2)
164 define float @v_powi_neg2_f32(float %l) {
165 ; GFX7-LABEL: v_powi_neg2_f32:
167 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
169 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
170 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1
171 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
172 ; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
173 ; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
174 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
175 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
176 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
177 ; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
178 ; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4
179 ; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
180 ; GFX7-NEXT: s_setpc_b64 s[30:31]
182 ; GFX8-LABEL: v_powi_neg2_f32:
184 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
186 ; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
187 ; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
188 ; GFX8-NEXT: v_rcp_f32_e32 v3, v1
189 ; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0
190 ; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3
191 ; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3
192 ; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2
193 ; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4
194 ; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2
195 ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4
196 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
197 ; GFX8-NEXT: s_setpc_b64 s[30:31]
199 ; GFX11-LABEL: v_powi_neg2_f32:
201 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
203 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
204 ; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0
205 ; GFX11-NEXT: v_rcp_f32_e32 v2, v1
206 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
207 ; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0
208 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
209 ; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2
210 ; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
211 ; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2
212 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
213 ; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3
214 ; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2
215 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
216 ; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3
217 ; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4
218 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
219 ; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
220 ; GFX11-NEXT: s_setpc_b64 s[30:31]
221 %res = call float @llvm.powi.f32.i32(float %l, i32 -2)
225 define float @v_powi_4_f32(float %l) {
226 ; GFX78-LABEL: v_powi_4_f32:
228 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
230 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
231 ; GFX78-NEXT: s_setpc_b64 s[30:31]
233 ; GFX11-LABEL: v_powi_4_f32:
235 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
237 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
238 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
239 ; GFX11-NEXT: s_setpc_b64 s[30:31]
240 %res = call float @llvm.powi.f32.i32(float %l, i32 4)
244 define float @v_powi_8_f32(float %l) {
245 ; GFX78-LABEL: v_powi_8_f32:
247 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
249 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
250 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
251 ; GFX78-NEXT: s_setpc_b64 s[30:31]
253 ; GFX11-LABEL: v_powi_8_f32:
255 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
257 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
258 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
259 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
260 ; GFX11-NEXT: s_setpc_b64 s[30:31]
261 %res = call float @llvm.powi.f32.i32(float %l, i32 8)
265 define float @v_powi_16_f32(float %l) {
266 ; GFX78-LABEL: v_powi_16_f32:
268 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
270 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
271 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
272 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
273 ; GFX78-NEXT: s_setpc_b64 s[30:31]
275 ; GFX11-LABEL: v_powi_16_f32:
277 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
279 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
280 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
281 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
282 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
283 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
284 ; GFX11-NEXT: s_setpc_b64 s[30:31]
285 %res = call float @llvm.powi.f32.i32(float %l, i32 16)
289 define float @v_powi_128_f32(float %l) {
290 ; GFX78-LABEL: v_powi_128_f32:
292 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
294 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
295 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
296 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
297 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
298 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
299 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0
300 ; GFX78-NEXT: s_setpc_b64 s[30:31]
302 ; GFX11-LABEL: v_powi_128_f32:
304 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
306 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
307 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
308 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
309 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
310 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
311 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
312 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
313 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
314 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
315 ; GFX11-NEXT: s_setpc_b64 s[30:31]
316 %res = call float @llvm.powi.f32.i32(float %l, i32 128)
320 define float @v_powi_neg128_f32(float %l) {
321 ; GFX7-LABEL: v_powi_neg128_f32:
323 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
325 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
326 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
327 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
328 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
329 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
330 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
331 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
332 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1
333 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
334 ; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
335 ; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
336 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
337 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
338 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
339 ; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
340 ; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4
341 ; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
342 ; GFX7-NEXT: s_setpc_b64 s[30:31]
344 ; GFX8-LABEL: v_powi_neg128_f32:
346 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
348 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
349 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
350 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
351 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
352 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
353 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
354 ; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
355 ; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
356 ; GFX8-NEXT: v_rcp_f32_e32 v3, v1
357 ; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0
358 ; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3
359 ; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3
360 ; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2
361 ; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4
362 ; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2
363 ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4
364 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
365 ; GFX8-NEXT: s_setpc_b64 s[30:31]
367 ; GFX11-LABEL: v_powi_neg128_f32:
369 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
371 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
372 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
373 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
374 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
375 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
376 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
377 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
378 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
379 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0
380 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
381 ; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0
382 ; GFX11-NEXT: v_rcp_f32_e32 v2, v1
383 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
384 ; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0
385 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
386 ; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2
387 ; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
388 ; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2
389 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
390 ; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3
391 ; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2
392 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
393 ; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3
394 ; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4
395 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
396 ; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
397 ; GFX11-NEXT: s_setpc_b64 s[30:31]
398 %res = call float @llvm.powi.f32.i32(float %l, i32 -128)
403 ; define double @v_powi_f64(double %l, i32 %r) {
404 ; %res = call double @llvm.powi.f64.i32(double %l, i32 %r)
408 declare half @llvm.powi.f16.i32(half, i32) #0
409 declare float @llvm.powi.f32.i32(float, i32) #0
410 declare double @llvm.powi.f64.i32(double, i32) #0
412 attributes #0 = { nounwind readnone speculatable willreturn }