1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
5 define i16 @v_powi_f16(i16 %l, i32 %r) {
6 ; GCN-LABEL: v_powi_f16:
8 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
10 ; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1
11 ; GCN-NEXT: v_log_f32_e32 v0, v0
12 ; GCN-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
13 ; GCN-NEXT: v_exp_f32_e32 v0, v0
14 ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
15 ; GCN-NEXT: s_setpc_b64 s[30:31]
16 %l.cast = bitcast i16 %l to half
17 %res = call half @llvm.powi.f16.i32(half %l.cast, i32 %r)
18 %res.cast = bitcast half %res to i16
22 define float @v_powi_f32(float %l, i32 %r) {
23 ; GCN-LABEL: v_powi_f32:
25 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26 ; GCN-NEXT: v_log_f32_e32 v0, v0
27 ; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1
28 ; GCN-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
29 ; GCN-NEXT: v_exp_f32_e32 v0, v0
30 ; GCN-NEXT: s_setpc_b64 s[30:31]
31 %res = call float @llvm.powi.f32.i32(float %l, i32 %r)
35 define float @v_powi_0_f32(float %l) {
36 ; GCN-LABEL: v_powi_0_f32:
38 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; GCN-NEXT: v_mov_b32_e32 v0, 1.0
40 ; GCN-NEXT: s_setpc_b64 s[30:31]
41 %res = call float @llvm.powi.f32.i32(float %l, i32 0)
45 define float @v_powi_1_f32(float %l) {
46 ; GCN-LABEL: v_powi_1_f32:
48 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49 ; GCN-NEXT: s_setpc_b64 s[30:31]
50 %res = call float @llvm.powi.f32.i32(float %l, i32 1)
54 define float @v_powi_neg1_f32(float %l) {
55 ; GFX7-LABEL: v_powi_neg1_f32:
57 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
59 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1
60 ; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0
61 ; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2
62 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
63 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
64 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
65 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
66 ; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
67 ; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4
68 ; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
69 ; GFX7-NEXT: s_setpc_b64 s[30:31]
71 ; GFX8-LABEL: v_powi_neg1_f32:
73 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74 ; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
75 ; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
76 ; GFX8-NEXT: v_rcp_f32_e32 v3, v1
77 ; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0
78 ; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3
79 ; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3
80 ; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2
81 ; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4
82 ; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2
83 ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4
84 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
85 ; GFX8-NEXT: s_setpc_b64 s[30:31]
86 %res = call float @llvm.powi.f32.i32(float %l, i32 -1)
90 define float @v_powi_2_f32(float %l) {
91 ; GCN-LABEL: v_powi_2_f32:
93 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
95 ; GCN-NEXT: s_setpc_b64 s[30:31]
96 %res = call float @llvm.powi.f32.i32(float %l, i32 2)
100 define float @v_powi_neg2_f32(float %l) {
101 ; GFX7-LABEL: v_powi_neg2_f32:
103 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
105 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
106 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1
107 ; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0
108 ; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2
109 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
110 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
111 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
112 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
113 ; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
114 ; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4
115 ; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
116 ; GFX7-NEXT: s_setpc_b64 s[30:31]
118 ; GFX8-LABEL: v_powi_neg2_f32:
120 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
122 ; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
123 ; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
124 ; GFX8-NEXT: v_rcp_f32_e32 v3, v1
125 ; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0
126 ; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3
127 ; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3
128 ; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2
129 ; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4
130 ; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2
131 ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4
132 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
133 ; GFX8-NEXT: s_setpc_b64 s[30:31]
134 %res = call float @llvm.powi.f32.i32(float %l, i32 -2)
138 define float @v_powi_4_f32(float %l) {
139 ; GCN-LABEL: v_powi_4_f32:
141 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
143 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
144 ; GCN-NEXT: s_setpc_b64 s[30:31]
145 %res = call float @llvm.powi.f32.i32(float %l, i32 4)
149 define float @v_powi_8_f32(float %l) {
150 ; GCN-LABEL: v_powi_8_f32:
152 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
154 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
155 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
156 ; GCN-NEXT: s_setpc_b64 s[30:31]
157 %res = call float @llvm.powi.f32.i32(float %l, i32 8)
161 define float @v_powi_16_f32(float %l) {
162 ; GCN-LABEL: v_powi_16_f32:
164 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
166 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
167 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
168 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
169 ; GCN-NEXT: s_setpc_b64 s[30:31]
170 %res = call float @llvm.powi.f32.i32(float %l, i32 16)
174 define float @v_powi_128_f32(float %l) {
175 ; GCN-LABEL: v_powi_128_f32:
177 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
179 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
180 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
181 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
182 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
183 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
184 ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
185 ; GCN-NEXT: s_setpc_b64 s[30:31]
186 %res = call float @llvm.powi.f32.i32(float %l, i32 128)
190 define float @v_powi_neg128_f32(float %l) {
191 ; GFX7-LABEL: v_powi_neg128_f32:
193 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
195 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
196 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
197 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
198 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
199 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
200 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0
201 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
202 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1
203 ; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0
204 ; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2
205 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
206 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
207 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
208 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
209 ; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
210 ; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4
211 ; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
212 ; GFX7-NEXT: s_setpc_b64 s[30:31]
214 ; GFX8-LABEL: v_powi_neg128_f32:
216 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
218 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
219 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
220 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
221 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
222 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
223 ; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0
224 ; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
225 ; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
226 ; GFX8-NEXT: v_rcp_f32_e32 v3, v1
227 ; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0
228 ; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3
229 ; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3
230 ; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2
231 ; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4
232 ; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2
233 ; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4
234 ; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0
235 ; GFX8-NEXT: s_setpc_b64 s[30:31]
236 %res = call float @llvm.powi.f32.i32(float %l, i32 -128)
241 ; define double @v_powi_f64(double %l, i32 %r) {
242 ; %res = call double @llvm.powi.f64.i32(double %l, i32 %r)
246 declare half @llvm.powi.f16.i32(half, i32) #0
247 declare float @llvm.powi.f32.i32(float, i32) #0
248 declare double @llvm.powi.f64.i32(double, i32) #0
250 attributes #0 = { nounwind readnone speculatable willreturn }