1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX78,GFX7 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GFX78,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
6 define i16 @v_powi_f16(i16 %l, i32 %r) {
7 ; GFX7-LABEL: v_powi_f16:
9 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
11 ; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1
12 ; GFX7-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
13 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x42800000
14 ; GFX7-NEXT: v_log_f32_e32 v0, v0
15 ; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
16 ; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
17 ; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
18 ; GFX7-NEXT: v_add_f32_e32 v0, v0, v1
19 ; GFX7-NEXT: v_exp_f32_e32 v0, v0
20 ; GFX7-NEXT: v_mov_b32_e32 v1, 0x1f800000
21 ; GFX7-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
22 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
23 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
24 ; GFX7-NEXT: s_setpc_b64 s[30:31]
26 ; GFX8-LABEL: v_powi_f16:
28 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29 ; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1
30 ; GFX8-NEXT: v_log_f16_e32 v0, v0
31 ; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v1
32 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
33 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
34 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
35 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
36 ; GFX8-NEXT: v_exp_f16_e32 v0, v0
37 ; GFX8-NEXT: s_setpc_b64 s[30:31]
39 ; GFX11-LABEL: v_powi_f16:
41 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42 ; GFX11-NEXT: v_log_f16_e32 v0, v0
43 ; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
44 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
45 ; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
46 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
47 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
48 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
49 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
50 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
51 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
52 ; GFX11-NEXT: v_exp_f16_e32 v0, v0
53 ; GFX11-NEXT: s_setpc_b64 s[30:31]
54 %l.cast = bitcast i16 %l to half
55 %res = call half @llvm.powi.f16.i32(half %l.cast, i32 %r)
56 %res.cast = bitcast half %res to i16
60 define float @v_powi_f32(float %l, i32 %r) {
61 ; GFX78-LABEL: v_powi_f32:
63 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x800000
65 ; GFX78-NEXT: v_mov_b32_e32 v3, 0x4f800000
66 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
67 ; GFX78-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
68 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v2
69 ; GFX78-NEXT: v_log_f32_e32 v0, v0
70 ; GFX78-NEXT: v_cvt_f32_i32_e32 v1, v1
71 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x42000000
72 ; GFX78-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
73 ; GFX78-NEXT: v_sub_f32_e32 v0, v0, v2
74 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
75 ; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
76 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
77 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
78 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
79 ; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
80 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
81 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
82 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
83 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
84 ; GFX78-NEXT: s_setpc_b64 s[30:31]
86 ; GFX11-LABEL: v_powi_f32:
88 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
90 ; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
91 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
92 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
93 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2
94 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
95 ; GFX11-NEXT: v_log_f32_e32 v0, v0
96 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
97 ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
98 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
99 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
100 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
101 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
102 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
103 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
104 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
105 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
106 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
107 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
108 ; GFX11-NEXT: s_setpc_b64 s[30:31]
109 %res = call float @llvm.powi.f32.i32(float %l, i32 %r)
113 define float @v_powi_0_f32(float %l) {
114 ; GFX78-LABEL: v_powi_0_f32:
116 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117 ; GFX78-NEXT: v_mov_b32_e32 v0, 1.0
118 ; GFX78-NEXT: s_setpc_b64 s[30:31]
120 ; GFX11-LABEL: v_powi_0_f32:
122 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123 ; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
124 ; GFX11-NEXT: s_setpc_b64 s[30:31]
125 %res = call float @llvm.powi.f32.i32(float %l, i32 0)
129 define float @v_powi_1_f32(float %l) {
130 ; GFX78-LABEL: v_powi_1_f32:
132 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133 ; GFX78-NEXT: s_setpc_b64 s[30:31]
135 ; GFX11-LABEL: v_powi_1_f32:
137 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX11-NEXT: s_setpc_b64 s[30:31]
139 %res = call float @llvm.powi.f32.i32(float %l, i32 1)
143 define float @v_powi_neg1_f32(float %l) {
144 ; GFX78-LABEL: v_powi_neg1_f32:
146 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x800000
148 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x4f800000
149 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
150 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
151 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
152 ; GFX78-NEXT: v_log_f32_e32 v0, v0
153 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
154 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
155 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
156 ; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
157 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, -1.0, v0
158 ; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
159 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
160 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
161 ; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
162 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
163 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
164 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
165 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
166 ; GFX78-NEXT: s_setpc_b64 s[30:31]
168 ; GFX11-LABEL: v_powi_neg1_f32:
170 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
172 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
173 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
174 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
175 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
176 ; GFX11-NEXT: v_log_f32_e32 v0, v0
177 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
178 ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
179 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
180 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, -1.0, v0
181 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
182 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
183 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
184 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
185 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
186 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
187 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
188 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
189 ; GFX11-NEXT: s_setpc_b64 s[30:31]
190 %res = call float @llvm.powi.f32.i32(float %l, i32 -1)
194 define float @v_powi_2_f32(float %l) {
195 ; GFX78-LABEL: v_powi_2_f32:
197 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x800000
199 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x4f800000
200 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
201 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
202 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
203 ; GFX78-NEXT: v_log_f32_e32 v0, v0
204 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
205 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
206 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
207 ; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
208 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
209 ; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
210 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
211 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
212 ; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
213 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
214 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
215 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
216 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
217 ; GFX78-NEXT: s_setpc_b64 s[30:31]
219 ; GFX11-LABEL: v_powi_2_f32:
221 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
223 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
224 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
225 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
226 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
227 ; GFX11-NEXT: v_log_f32_e32 v0, v0
228 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
229 ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
230 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
231 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 2.0, v0
232 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
233 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
234 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
235 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
236 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
237 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
238 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
239 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
240 ; GFX11-NEXT: s_setpc_b64 s[30:31]
241 %res = call float @llvm.powi.f32.i32(float %l, i32 2)
245 define float @v_powi_neg2_f32(float %l) {
246 ; GFX78-LABEL: v_powi_neg2_f32:
248 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x800000
250 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x4f800000
251 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
252 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
253 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
254 ; GFX78-NEXT: v_log_f32_e32 v0, v0
255 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
256 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
257 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
258 ; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
259 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, -2.0, v0
260 ; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
261 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
262 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
263 ; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
264 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
265 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
266 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
267 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
268 ; GFX78-NEXT: s_setpc_b64 s[30:31]
270 ; GFX11-LABEL: v_powi_neg2_f32:
272 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
274 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
275 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
276 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
277 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
278 ; GFX11-NEXT: v_log_f32_e32 v0, v0
279 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
280 ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
281 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
282 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, -2.0, v0
283 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
284 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
285 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
286 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
287 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
288 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
289 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
290 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
291 ; GFX11-NEXT: s_setpc_b64 s[30:31]
292 %res = call float @llvm.powi.f32.i32(float %l, i32 -2)
296 define float @v_powi_4_f32(float %l) {
297 ; GFX78-LABEL: v_powi_4_f32:
299 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
300 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x800000
301 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x4f800000
302 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
303 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
304 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
305 ; GFX78-NEXT: v_log_f32_e32 v0, v0
306 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
307 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
308 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
309 ; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
310 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 4.0, v0
311 ; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
312 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
313 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
314 ; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
315 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
316 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
317 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
318 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
319 ; GFX78-NEXT: s_setpc_b64 s[30:31]
321 ; GFX11-LABEL: v_powi_4_f32:
323 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
325 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
326 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
327 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
328 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
329 ; GFX11-NEXT: v_log_f32_e32 v0, v0
330 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
331 ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
332 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
333 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 4.0, v0
334 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
335 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
336 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
337 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
338 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
339 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
340 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
341 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
342 ; GFX11-NEXT: s_setpc_b64 s[30:31]
343 %res = call float @llvm.powi.f32.i32(float %l, i32 4)
347 define float @v_powi_8_f32(float %l) {
348 ; GFX78-LABEL: v_powi_8_f32:
350 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x800000
352 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x4f800000
353 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
354 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
355 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
356 ; GFX78-NEXT: v_log_f32_e32 v0, v0
357 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
358 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
359 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
360 ; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
361 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 0x41000000, v0
362 ; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
363 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
364 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
365 ; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
366 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
367 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
368 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
369 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
370 ; GFX78-NEXT: s_setpc_b64 s[30:31]
372 ; GFX11-LABEL: v_powi_8_f32:
374 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
376 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
377 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
378 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
379 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
380 ; GFX11-NEXT: v_log_f32_e32 v0, v0
381 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
382 ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
383 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
384 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0x41000000, v0
385 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
386 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
387 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
388 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
389 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
390 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
391 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
392 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
393 ; GFX11-NEXT: s_setpc_b64 s[30:31]
394 %res = call float @llvm.powi.f32.i32(float %l, i32 8)
398 define float @v_powi_16_f32(float %l) {
399 ; GFX78-LABEL: v_powi_16_f32:
401 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x800000
403 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x4f800000
404 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
405 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
406 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
407 ; GFX78-NEXT: v_log_f32_e32 v0, v0
408 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
409 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
410 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
411 ; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
412 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 0x41800000, v0
413 ; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
414 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
415 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
416 ; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
417 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
418 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
419 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
420 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
421 ; GFX78-NEXT: s_setpc_b64 s[30:31]
423 ; GFX11-LABEL: v_powi_16_f32:
425 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
426 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
427 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
428 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
429 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
430 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
431 ; GFX11-NEXT: v_log_f32_e32 v0, v0
432 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
433 ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
434 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
435 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0x41800000, v0
436 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
437 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
438 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
439 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
440 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
441 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
442 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
443 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
444 ; GFX11-NEXT: s_setpc_b64 s[30:31]
445 %res = call float @llvm.powi.f32.i32(float %l, i32 16)
449 define float @v_powi_128_f32(float %l) {
450 ; GFX78-LABEL: v_powi_128_f32:
452 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x800000
454 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x4f800000
455 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
456 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
457 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
458 ; GFX78-NEXT: v_log_f32_e32 v0, v0
459 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
460 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
461 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
462 ; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
463 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 0x43000000, v0
464 ; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
465 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
466 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
467 ; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
468 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
469 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
470 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
471 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
472 ; GFX78-NEXT: s_setpc_b64 s[30:31]
474 ; GFX11-LABEL: v_powi_128_f32:
476 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
478 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
479 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
480 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
481 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
482 ; GFX11-NEXT: v_log_f32_e32 v0, v0
483 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
484 ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
485 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
486 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0x43000000, v0
487 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
488 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
489 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
490 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
491 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
492 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
493 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
494 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
495 ; GFX11-NEXT: s_setpc_b64 s[30:31]
496 %res = call float @llvm.powi.f32.i32(float %l, i32 128)
500 define float @v_powi_neg128_f32(float %l) {
501 ; GFX78-LABEL: v_powi_neg128_f32:
503 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
504 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x800000
505 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x4f800000
506 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
507 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
508 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
509 ; GFX78-NEXT: v_log_f32_e32 v0, v0
510 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
511 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
512 ; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
513 ; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
514 ; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 0xc3000000, v0
515 ; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
516 ; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
517 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
518 ; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
519 ; GFX78-NEXT: v_exp_f32_e32 v0, v0
520 ; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
521 ; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
522 ; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
523 ; GFX78-NEXT: s_setpc_b64 s[30:31]
525 ; GFX11-LABEL: v_powi_neg128_f32:
527 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
528 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
529 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
530 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
531 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
532 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
533 ; GFX11-NEXT: v_log_f32_e32 v0, v0
534 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
535 ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
536 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
537 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0xc3000000, v0
538 ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
539 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
540 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
541 ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
542 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
543 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
544 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
545 ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
546 ; GFX11-NEXT: s_setpc_b64 s[30:31]
547 %res = call float @llvm.powi.f32.i32(float %l, i32 -128)
552 ; define double @v_powi_f64(double %l, i32 %r) {
553 ; %res = call double @llvm.powi.f64.i32(double %l, i32 %r)
557 declare half @llvm.powi.f16.i32(half, i32) #0
558 declare float @llvm.powi.f32.i32(float, i32) #0
559 declare double @llvm.powi.f64.i32(double, i32) #0
561 attributes #0 = { nounwind readnone speculatable willreturn }