1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
6 ; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
9 define float @v_pow_f32(float %x, float %y) {
10 ; GFX6-LABEL: v_pow_f32:
12 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13 ; GFX6-NEXT: v_log_f32_e32 v0, v0
14 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
15 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
16 ; GFX6-NEXT: s_setpc_b64 s[30:31]
18 ; GFX8-LABEL: v_pow_f32:
20 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21 ; GFX8-NEXT: v_log_f32_e32 v0, v0
22 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
23 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
24 ; GFX8-NEXT: s_setpc_b64 s[30:31]
26 ; GFX9-LABEL: v_pow_f32:
28 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29 ; GFX9-NEXT: v_log_f32_e32 v0, v0
30 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
31 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
32 ; GFX9-NEXT: s_setpc_b64 s[30:31]
34 ; GFX90A-LABEL: v_pow_f32:
36 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX90A-NEXT: v_log_f32_e32 v0, v0
38 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
39 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
40 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
42 ; GFX10-LABEL: v_pow_f32:
44 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45 ; GFX10-NEXT: v_log_f32_e32 v0, v0
46 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
47 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
48 ; GFX10-NEXT: s_setpc_b64 s[30:31]
50 ; GFX11-LABEL: v_pow_f32:
52 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53 ; GFX11-NEXT: v_log_f32_e32 v0, v0
54 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
55 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
56 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
57 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
58 ; GFX11-NEXT: s_setpc_b64 s[30:31]
59 %pow = call float @llvm.pow.f32(float %x, float %y)
63 define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
64 ; GFX6-LABEL: v_pow_v2f32:
66 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX6-NEXT: v_log_f32_e32 v0, v0
68 ; GFX6-NEXT: v_log_f32_e32 v1, v1
69 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0
70 ; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1
71 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
72 ; GFX6-NEXT: v_exp_f32_e32 v1, v1
73 ; GFX6-NEXT: s_setpc_b64 s[30:31]
75 ; GFX8-LABEL: v_pow_v2f32:
77 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78 ; GFX8-NEXT: v_log_f32_e32 v0, v0
79 ; GFX8-NEXT: v_log_f32_e32 v1, v1
80 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v2, v0
81 ; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v3, v1
82 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
83 ; GFX8-NEXT: v_exp_f32_e32 v1, v1
84 ; GFX8-NEXT: s_setpc_b64 s[30:31]
86 ; GFX9-LABEL: v_pow_v2f32:
88 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89 ; GFX9-NEXT: v_log_f32_e32 v0, v0
90 ; GFX9-NEXT: v_log_f32_e32 v1, v1
91 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v2, v0
92 ; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v3, v1
93 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
94 ; GFX9-NEXT: v_exp_f32_e32 v1, v1
95 ; GFX9-NEXT: s_setpc_b64 s[30:31]
97 ; GFX90A-LABEL: v_pow_v2f32:
99 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100 ; GFX90A-NEXT: v_log_f32_e32 v0, v0
101 ; GFX90A-NEXT: v_log_f32_e32 v1, v1
102 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v2, v0
103 ; GFX90A-NEXT: v_mul_legacy_f32 v1, v3, v1
104 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
105 ; GFX90A-NEXT: v_exp_f32_e32 v1, v1
106 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
108 ; GFX10-LABEL: v_pow_v2f32:
110 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111 ; GFX10-NEXT: v_log_f32_e32 v0, v0
112 ; GFX10-NEXT: v_log_f32_e32 v1, v1
113 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v2, v0
114 ; GFX10-NEXT: v_mul_legacy_f32_e32 v1, v3, v1
115 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
116 ; GFX10-NEXT: v_exp_f32_e32 v1, v1
117 ; GFX10-NEXT: s_setpc_b64 s[30:31]
119 ; GFX11-LABEL: v_pow_v2f32:
121 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122 ; GFX11-NEXT: v_log_f32_e32 v0, v0
123 ; GFX11-NEXT: v_log_f32_e32 v1, v1
124 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
125 ; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v0, v2, v0 :: v_dual_mul_dx9_zero_f32 v1, v3, v1
126 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
127 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
128 ; GFX11-NEXT: v_exp_f32_e32 v1, v1
129 ; GFX11-NEXT: s_setpc_b64 s[30:31]
130 %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
134 define half @v_pow_f16(half %x, half %y) {
135 ; GFX6-LABEL: v_pow_f16:
137 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
139 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
140 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
141 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
142 ; GFX6-NEXT: v_log_f32_e32 v0, v0
143 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
144 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
145 ; GFX6-NEXT: s_setpc_b64 s[30:31]
147 ; GFX8-LABEL: v_pow_f16:
149 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
151 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
152 ; GFX8-NEXT: v_log_f32_e32 v0, v0
153 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
154 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
155 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
156 ; GFX8-NEXT: s_setpc_b64 s[30:31]
158 ; GFX9-LABEL: v_pow_f16:
160 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
162 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
163 ; GFX9-NEXT: v_log_f32_e32 v0, v0
164 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
165 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
166 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
167 ; GFX9-NEXT: s_setpc_b64 s[30:31]
169 ; GFX90A-LABEL: v_pow_f16:
171 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0
173 ; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1
174 ; GFX90A-NEXT: v_log_f32_e32 v0, v0
175 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
176 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
177 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0
178 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
180 ; GFX10-LABEL: v_pow_f16:
182 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183 ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
184 ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1
185 ; GFX10-NEXT: v_log_f32_e32 v0, v0
186 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
187 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
188 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
189 ; GFX10-NEXT: s_setpc_b64 s[30:31]
191 ; GFX11-LABEL: v_pow_f16:
193 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
195 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
196 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
197 ; GFX11-NEXT: v_log_f32_e32 v0, v0
198 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
199 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
200 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
201 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
202 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
203 ; GFX11-NEXT: s_setpc_b64 s[30:31]
204 %pow = call half @llvm.pow.f16(half %x, half %y)
208 define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
209 ; GFX6-LABEL: v_pow_v2f16:
211 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
213 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
214 ; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
215 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
216 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
217 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
218 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
219 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
220 ; GFX6-NEXT: v_log_f32_e32 v0, v0
221 ; GFX6-NEXT: v_log_f32_e32 v1, v1
222 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0
223 ; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1
224 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
225 ; GFX6-NEXT: v_exp_f32_e32 v1, v1
226 ; GFX6-NEXT: s_setpc_b64 s[30:31]
228 ; GFX8-LABEL: v_pow_v2f16:
230 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
231 ; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
232 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
233 ; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
234 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
235 ; GFX8-NEXT: v_log_f32_e32 v2, v2
236 ; GFX8-NEXT: v_log_f32_e32 v0, v0
237 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
238 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
239 ; GFX8-NEXT: v_exp_f32_e32 v1, v2
240 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
241 ; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
242 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
243 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
244 ; GFX8-NEXT: s_setpc_b64 s[30:31]
246 ; GFX9-LABEL: v_pow_v2f16:
248 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
250 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
251 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
252 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
253 ; GFX9-NEXT: v_log_f32_e32 v2, v2
254 ; GFX9-NEXT: v_log_f32_e32 v0, v0
255 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
256 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
257 ; GFX9-NEXT: v_exp_f32_e32 v1, v2
258 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
259 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
260 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
261 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
262 ; GFX9-NEXT: s_setpc_b64 s[30:31]
264 ; GFX90A-LABEL: v_pow_v2f16:
266 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267 ; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
268 ; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0
269 ; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
270 ; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1
271 ; GFX90A-NEXT: v_log_f32_e32 v2, v2
272 ; GFX90A-NEXT: v_log_f32_e32 v0, v0
273 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2
274 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
275 ; GFX90A-NEXT: v_exp_f32_e32 v1, v2
276 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
277 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1
278 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0
279 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1
280 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
282 ; GFX10-LABEL: v_pow_v2f16:
284 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
286 ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
287 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
288 ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1
289 ; GFX10-NEXT: v_log_f32_e32 v2, v2
290 ; GFX10-NEXT: v_log_f32_e32 v0, v0
291 ; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
292 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
293 ; GFX10-NEXT: v_exp_f32_e32 v1, v2
294 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
295 ; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1
296 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
297 ; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1
298 ; GFX10-NEXT: s_setpc_b64 s[30:31]
300 ; GFX11-LABEL: v_pow_v2f16:
302 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
304 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
305 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
306 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
307 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
308 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2
309 ; GFX11-NEXT: v_log_f32_e32 v0, v0
310 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
311 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3
312 ; GFX11-NEXT: v_log_f32_e32 v2, v2
313 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
314 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
315 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2
316 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
317 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
318 ; GFX11-NEXT: v_exp_f32_e32 v1, v2
319 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
320 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
321 ; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
322 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
323 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
324 ; GFX11-NEXT: s_setpc_b64 s[30:31]
325 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
329 define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
330 ; GFX6-LABEL: v_pow_v2f16_fneg_lhs:
332 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
334 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
335 ; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
336 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
337 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
338 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
339 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v3
340 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0
341 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
342 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
343 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
344 ; GFX6-NEXT: v_log_f32_e32 v3, v3
345 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
346 ; GFX6-NEXT: v_log_f32_e32 v4, v0
347 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v3
348 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
349 ; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v4
350 ; GFX6-NEXT: v_exp_f32_e32 v1, v1
351 ; GFX6-NEXT: s_setpc_b64 s[30:31]
353 ; GFX8-LABEL: v_pow_v2f16_fneg_lhs:
355 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
356 ; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
357 ; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0
358 ; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
359 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
360 ; GFX8-NEXT: v_log_f32_e32 v2, v2
361 ; GFX8-NEXT: v_log_f32_e32 v0, v0
362 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
363 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
364 ; GFX8-NEXT: v_exp_f32_e32 v1, v2
365 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
366 ; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
367 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
368 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
369 ; GFX8-NEXT: s_setpc_b64 s[30:31]
371 ; GFX9-LABEL: v_pow_v2f16_fneg_lhs:
373 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
375 ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0
376 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
377 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
378 ; GFX9-NEXT: v_log_f32_e32 v2, v2
379 ; GFX9-NEXT: v_log_f32_e32 v0, v0
380 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
381 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
382 ; GFX9-NEXT: v_exp_f32_e32 v1, v2
383 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
384 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
385 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
386 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
387 ; GFX9-NEXT: s_setpc_b64 s[30:31]
389 ; GFX90A-LABEL: v_pow_v2f16_fneg_lhs:
391 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392 ; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
393 ; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0
394 ; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
395 ; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1
396 ; GFX90A-NEXT: v_log_f32_e32 v2, v2
397 ; GFX90A-NEXT: v_log_f32_e32 v0, v0
398 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2
399 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
400 ; GFX90A-NEXT: v_exp_f32_e32 v1, v2
401 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
402 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1
403 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0
404 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1
405 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
407 ; GFX10-LABEL: v_pow_v2f16_fneg_lhs:
409 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
411 ; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0
412 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
413 ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1
414 ; GFX10-NEXT: v_log_f32_e32 v2, v2
415 ; GFX10-NEXT: v_log_f32_e32 v0, v0
416 ; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
417 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
418 ; GFX10-NEXT: v_exp_f32_e32 v1, v2
419 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
420 ; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1
421 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
422 ; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1
423 ; GFX10-NEXT: s_setpc_b64 s[30:31]
425 ; GFX11-LABEL: v_pow_v2f16_fneg_lhs:
427 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
429 ; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0
430 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
431 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
432 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
433 ; GFX11-NEXT: v_cvt_f32_f16_e64 v2, -v2
434 ; GFX11-NEXT: v_log_f32_e32 v0, v0
435 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
436 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3
437 ; GFX11-NEXT: v_log_f32_e32 v2, v2
438 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
439 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
440 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2
441 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
442 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
443 ; GFX11-NEXT: v_exp_f32_e32 v1, v2
444 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
445 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
446 ; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
447 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
448 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
449 ; GFX11-NEXT: s_setpc_b64 s[30:31]
450 %x.fneg = fneg <2 x half> %x
451 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y)
455 define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
456 ; GFX6-LABEL: v_pow_v2f16_fneg_rhs:
458 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
460 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
461 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
462 ; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
463 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
464 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
465 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
466 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
467 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
468 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
469 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
470 ; GFX6-NEXT: v_log_f32_e32 v0, v0
471 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
472 ; GFX6-NEXT: v_log_f32_e32 v1, v1
473 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0
474 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
475 ; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1
476 ; GFX6-NEXT: v_exp_f32_e32 v1, v1
477 ; GFX6-NEXT: s_setpc_b64 s[30:31]
479 ; GFX8-LABEL: v_pow_v2f16_fneg_rhs:
481 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482 ; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
483 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
484 ; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
485 ; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1
486 ; GFX8-NEXT: v_log_f32_e32 v2, v2
487 ; GFX8-NEXT: v_log_f32_e32 v0, v0
488 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
489 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
490 ; GFX8-NEXT: v_exp_f32_e32 v1, v2
491 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
492 ; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
493 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
494 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
495 ; GFX8-NEXT: s_setpc_b64 s[30:31]
497 ; GFX9-LABEL: v_pow_v2f16_fneg_rhs:
499 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
500 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
501 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
502 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
503 ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1
504 ; GFX9-NEXT: v_log_f32_e32 v2, v2
505 ; GFX9-NEXT: v_log_f32_e32 v0, v0
506 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
507 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
508 ; GFX9-NEXT: v_exp_f32_e32 v1, v2
509 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
510 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
511 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
512 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
513 ; GFX9-NEXT: s_setpc_b64 s[30:31]
515 ; GFX90A-LABEL: v_pow_v2f16_fneg_rhs:
517 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518 ; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
519 ; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0
520 ; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
521 ; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1
522 ; GFX90A-NEXT: v_log_f32_e32 v2, v2
523 ; GFX90A-NEXT: v_log_f32_e32 v0, v0
524 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2
525 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
526 ; GFX90A-NEXT: v_exp_f32_e32 v1, v2
527 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
528 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1
529 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0
530 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1
531 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
533 ; GFX10-LABEL: v_pow_v2f16_fneg_rhs:
535 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
537 ; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
538 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
539 ; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1
540 ; GFX10-NEXT: v_log_f32_e32 v2, v2
541 ; GFX10-NEXT: v_log_f32_e32 v0, v0
542 ; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
543 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
544 ; GFX10-NEXT: v_exp_f32_e32 v1, v2
545 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
546 ; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1
547 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
548 ; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1
549 ; GFX10-NEXT: s_setpc_b64 s[30:31]
551 ; GFX11-LABEL: v_pow_v2f16_fneg_rhs:
553 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
555 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
556 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
557 ; GFX11-NEXT: v_cvt_f32_f16_e64 v1, -v1
558 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
559 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2
560 ; GFX11-NEXT: v_log_f32_e32 v0, v0
561 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
562 ; GFX11-NEXT: v_cvt_f32_f16_e64 v3, -v3
563 ; GFX11-NEXT: v_log_f32_e32 v2, v2
564 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
565 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
566 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2
567 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
568 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
569 ; GFX11-NEXT: v_exp_f32_e32 v1, v2
570 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
571 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
572 ; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
573 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
574 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
575 ; GFX11-NEXT: s_setpc_b64 s[30:31]
576 %y.fneg = fneg <2 x half> %y
577 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg)
581 define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
582 ; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs:
584 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
586 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
587 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
588 ; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
589 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
590 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
591 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
592 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
593 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
594 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
595 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
596 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
597 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
598 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
599 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
600 ; GFX6-NEXT: v_log_f32_e32 v0, v0
601 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
602 ; GFX6-NEXT: v_log_f32_e32 v1, v1
603 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0
604 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
605 ; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1
606 ; GFX6-NEXT: v_exp_f32_e32 v1, v1
607 ; GFX6-NEXT: s_setpc_b64 s[30:31]
609 ; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs:
611 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612 ; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
613 ; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0
614 ; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
615 ; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1
616 ; GFX8-NEXT: v_log_f32_e32 v2, v2
617 ; GFX8-NEXT: v_log_f32_e32 v0, v0
618 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
619 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
620 ; GFX8-NEXT: v_exp_f32_e32 v1, v2
621 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
622 ; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
623 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
624 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
625 ; GFX8-NEXT: s_setpc_b64 s[30:31]
627 ; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs:
629 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
631 ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0
632 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
633 ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1
634 ; GFX9-NEXT: v_log_f32_e32 v2, v2
635 ; GFX9-NEXT: v_log_f32_e32 v0, v0
636 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
637 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
638 ; GFX9-NEXT: v_exp_f32_e32 v1, v2
639 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
640 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
641 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
642 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
643 ; GFX9-NEXT: s_setpc_b64 s[30:31]
645 ; GFX90A-LABEL: v_pow_v2f16_fneg_lhs_rhs:
647 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648 ; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
649 ; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0
650 ; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
651 ; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1
652 ; GFX90A-NEXT: v_log_f32_e32 v2, v2
653 ; GFX90A-NEXT: v_log_f32_e32 v0, v0
654 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2
655 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
656 ; GFX90A-NEXT: v_exp_f32_e32 v1, v2
657 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
658 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1
659 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0
660 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1
661 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
663 ; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs:
665 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
667 ; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0
668 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
669 ; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1
670 ; GFX10-NEXT: v_log_f32_e32 v2, v2
671 ; GFX10-NEXT: v_log_f32_e32 v0, v0
672 ; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
673 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
674 ; GFX10-NEXT: v_exp_f32_e32 v1, v2
675 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
676 ; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1
677 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
678 ; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1
679 ; GFX10-NEXT: s_setpc_b64 s[30:31]
681 ; GFX11-LABEL: v_pow_v2f16_fneg_lhs_rhs:
683 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
685 ; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0
686 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
687 ; GFX11-NEXT: v_cvt_f32_f16_e64 v1, -v1
688 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
689 ; GFX11-NEXT: v_cvt_f32_f16_e64 v2, -v2
690 ; GFX11-NEXT: v_log_f32_e32 v0, v0
691 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
692 ; GFX11-NEXT: v_cvt_f32_f16_e64 v3, -v3
693 ; GFX11-NEXT: v_log_f32_e32 v2, v2
694 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
695 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
696 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2
697 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
698 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
699 ; GFX11-NEXT: v_exp_f32_e32 v1, v2
700 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
701 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
702 ; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
703 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
704 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
705 ; GFX11-NEXT: s_setpc_b64 s[30:31]
706 %x.fneg = fneg <2 x half> %x
707 %y.fneg = fneg <2 x half> %y
708 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg)
713 ; define double @v_pow_f64(double %x, double %y) {
714 ; %pow = call double @llvm.pow.f64(double %x, double %y)
718 define float @v_pow_f32_fabs_lhs(float %x, float %y) {
719 ; GFX6-LABEL: v_pow_f32_fabs_lhs:
721 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722 ; GFX6-NEXT: v_log_f32_e64 v0, |v0|
723 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
724 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
725 ; GFX6-NEXT: s_setpc_b64 s[30:31]
727 ; GFX8-LABEL: v_pow_f32_fabs_lhs:
729 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730 ; GFX8-NEXT: v_log_f32_e64 v0, |v0|
731 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
732 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
733 ; GFX8-NEXT: s_setpc_b64 s[30:31]
735 ; GFX9-LABEL: v_pow_f32_fabs_lhs:
737 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
738 ; GFX9-NEXT: v_log_f32_e64 v0, |v0|
739 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
740 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
741 ; GFX9-NEXT: s_setpc_b64 s[30:31]
743 ; GFX90A-LABEL: v_pow_f32_fabs_lhs:
745 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
746 ; GFX90A-NEXT: v_log_f32_e64 v0, |v0|
747 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
748 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
749 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
751 ; GFX10-LABEL: v_pow_f32_fabs_lhs:
753 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754 ; GFX10-NEXT: v_log_f32_e64 v0, |v0|
755 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
756 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
757 ; GFX10-NEXT: s_setpc_b64 s[30:31]
759 ; GFX11-LABEL: v_pow_f32_fabs_lhs:
761 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762 ; GFX11-NEXT: v_log_f32_e64 v0, |v0|
763 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
764 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
765 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
766 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
767 ; GFX11-NEXT: s_setpc_b64 s[30:31]
768 %fabs.x = call float @llvm.fabs.f32(float %x)
769 %pow = call float @llvm.pow.f32(float %fabs.x, float %y)
773 define float @v_pow_f32_fabs_rhs(float %x, float %y) {
774 ; GFX6-LABEL: v_pow_f32_fabs_rhs:
776 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777 ; GFX6-NEXT: v_log_f32_e32 v0, v0
778 ; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0
779 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
780 ; GFX6-NEXT: s_setpc_b64 s[30:31]
782 ; GFX8-LABEL: v_pow_f32_fabs_rhs:
784 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
785 ; GFX8-NEXT: v_log_f32_e32 v0, v0
786 ; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0
787 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
788 ; GFX8-NEXT: s_setpc_b64 s[30:31]
790 ; GFX9-LABEL: v_pow_f32_fabs_rhs:
792 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
793 ; GFX9-NEXT: v_log_f32_e32 v0, v0
794 ; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0
795 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
796 ; GFX9-NEXT: s_setpc_b64 s[30:31]
798 ; GFX90A-LABEL: v_pow_f32_fabs_rhs:
800 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801 ; GFX90A-NEXT: v_log_f32_e32 v0, v0
802 ; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0
803 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
804 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
806 ; GFX10-LABEL: v_pow_f32_fabs_rhs:
808 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
809 ; GFX10-NEXT: v_log_f32_e32 v0, v0
810 ; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0
811 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
812 ; GFX10-NEXT: s_setpc_b64 s[30:31]
814 ; GFX11-LABEL: v_pow_f32_fabs_rhs:
816 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817 ; GFX11-NEXT: v_log_f32_e32 v0, v0
818 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
819 ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v1|, v0
820 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
821 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
822 ; GFX11-NEXT: s_setpc_b64 s[30:31]
823 %fabs.y = call float @llvm.fabs.f32(float %y)
824 %pow = call float @llvm.pow.f32(float %x, float %fabs.y)
828 define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
829 ; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs:
831 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832 ; GFX6-NEXT: v_log_f32_e64 v0, |v0|
833 ; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0
834 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
835 ; GFX6-NEXT: s_setpc_b64 s[30:31]
837 ; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
839 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
840 ; GFX8-NEXT: v_log_f32_e64 v0, |v0|
841 ; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0
842 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
843 ; GFX8-NEXT: s_setpc_b64 s[30:31]
845 ; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
847 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
848 ; GFX9-NEXT: v_log_f32_e64 v0, |v0|
849 ; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0
850 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
851 ; GFX9-NEXT: s_setpc_b64 s[30:31]
853 ; GFX90A-LABEL: v_pow_f32_fabs_lhs_rhs:
855 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
856 ; GFX90A-NEXT: v_log_f32_e64 v0, |v0|
857 ; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0
858 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
859 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
861 ; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
863 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
864 ; GFX10-NEXT: v_log_f32_e64 v0, |v0|
865 ; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0
866 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
867 ; GFX10-NEXT: s_setpc_b64 s[30:31]
869 ; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs:
871 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872 ; GFX11-NEXT: v_log_f32_e64 v0, |v0|
873 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
874 ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v1|, v0
875 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
876 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
877 ; GFX11-NEXT: s_setpc_b64 s[30:31]
878 %fabs.x = call float @llvm.fabs.f32(float %x)
879 %fabs.y = call float @llvm.fabs.f32(float %y)
880 %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y)
884 define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
885 ; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
887 ; GFX6-NEXT: v_log_f32_e32 v1, s0
888 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
889 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
890 ; GFX6-NEXT: ; return to shader part epilog
892 ; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
894 ; GFX8-NEXT: v_log_f32_e32 v1, s0
895 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
896 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
897 ; GFX8-NEXT: ; return to shader part epilog
899 ; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
901 ; GFX9-NEXT: v_log_f32_e32 v1, s0
902 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
903 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
904 ; GFX9-NEXT: ; return to shader part epilog
906 ; GFX90A-LABEL: v_pow_f32_sgpr_vgpr:
908 ; GFX90A-NEXT: v_log_f32_e32 v1, s0
909 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v0, v1
910 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
911 ; GFX90A-NEXT: ; return to shader part epilog
913 ; GFX10-LABEL: v_pow_f32_sgpr_vgpr:
915 ; GFX10-NEXT: v_log_f32_e32 v1, s0
916 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
917 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
918 ; GFX10-NEXT: ; return to shader part epilog
920 ; GFX11-LABEL: v_pow_f32_sgpr_vgpr:
922 ; GFX11-NEXT: v_log_f32_e32 v1, s0
923 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
924 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
925 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
926 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
927 ; GFX11-NEXT: ; return to shader part epilog
928 %pow = call float @llvm.pow.f32(float %x, float %y)
932 define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
933 ; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
935 ; GFX6-NEXT: v_log_f32_e32 v0, v0
936 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
937 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
938 ; GFX6-NEXT: ; return to shader part epilog
940 ; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
942 ; GFX8-NEXT: v_log_f32_e32 v0, v0
943 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
944 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
945 ; GFX8-NEXT: ; return to shader part epilog
947 ; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
949 ; GFX9-NEXT: v_log_f32_e32 v0, v0
950 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
951 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
952 ; GFX9-NEXT: ; return to shader part epilog
954 ; GFX90A-LABEL: v_pow_f32_vgpr_sgpr:
956 ; GFX90A-NEXT: v_log_f32_e32 v0, v0
957 ; GFX90A-NEXT: v_mul_legacy_f32 v0, s0, v0
958 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
959 ; GFX90A-NEXT: ; return to shader part epilog
961 ; GFX10-LABEL: v_pow_f32_vgpr_sgpr:
963 ; GFX10-NEXT: v_log_f32_e32 v0, v0
964 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
965 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
966 ; GFX10-NEXT: ; return to shader part epilog
968 ; GFX11-LABEL: v_pow_f32_vgpr_sgpr:
970 ; GFX11-NEXT: v_log_f32_e32 v0, v0
971 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
972 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0
973 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
974 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
975 ; GFX11-NEXT: ; return to shader part epilog
976 %pow = call float @llvm.pow.f32(float %x, float %y)
980 define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
981 ; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
983 ; GFX6-NEXT: v_log_f32_e32 v0, s0
984 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
985 ; GFX6-NEXT: v_exp_f32_e32 v0, v0
986 ; GFX6-NEXT: ; return to shader part epilog
988 ; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
990 ; GFX8-NEXT: v_log_f32_e32 v0, s0
991 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
992 ; GFX8-NEXT: v_exp_f32_e32 v0, v0
993 ; GFX8-NEXT: ; return to shader part epilog
995 ; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
997 ; GFX9-NEXT: v_log_f32_e32 v0, s0
998 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
999 ; GFX9-NEXT: v_exp_f32_e32 v0, v0
1000 ; GFX9-NEXT: ; return to shader part epilog
1002 ; GFX90A-LABEL: v_pow_f32_sgpr_sgpr:
1004 ; GFX90A-NEXT: v_log_f32_e32 v0, s0
1005 ; GFX90A-NEXT: v_mul_legacy_f32 v0, s1, v0
1006 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0
1007 ; GFX90A-NEXT: ; return to shader part epilog
1009 ; GFX10-LABEL: v_pow_f32_sgpr_sgpr:
1011 ; GFX10-NEXT: v_log_f32_e32 v0, s0
1012 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
1013 ; GFX10-NEXT: v_exp_f32_e32 v0, v0
1014 ; GFX10-NEXT: ; return to shader part epilog
1016 ; GFX11-LABEL: v_pow_f32_sgpr_sgpr:
1018 ; GFX11-NEXT: v_log_f32_e32 v0, s0
1019 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
1020 ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s1, v0
1021 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1022 ; GFX11-NEXT: v_exp_f32_e32 v0, v0
1023 ; GFX11-NEXT: ; return to shader part epilog
1024 %pow = call float @llvm.pow.f32(float %x, float %y)
1028 declare half @llvm.pow.f16(half, half)
1029 declare float @llvm.pow.f32(float, float)
1030 declare double @llvm.pow.f64(double, double)
1032 declare half @llvm.fabs.f16(half)
1033 declare float @llvm.fabs.f32(float)
1035 declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
1036 declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)