1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-SDAG,SI-SDAG %s
3 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-GISEL,SI-GISEL %s
4 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-SDAG,VI-SDAG %s
5 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-GISEL,VI-GISEL %s
6 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-SDAG,GFX900-SDAG %s
7 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-GISEL,GFX900-GISEL %s
9 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
10 ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
12 define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) {
13 ; SI-SDAG-LABEL: s_exp2_f32:
15 ; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb
16 ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
17 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
18 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
19 ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
20 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
21 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
22 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
23 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
24 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
25 ; SI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1
26 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
27 ; SI-SDAG-NEXT: s_mov_b32 s2, -1
28 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
29 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
30 ; SI-SDAG-NEXT: s_endpgm
32 ; SI-GISEL-LABEL: s_exp2_f32:
34 ; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb
35 ; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
36 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
37 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
38 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
39 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
40 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
41 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
42 ; SI-GISEL-NEXT: v_add_f32_e32 v0, s2, v0
43 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
44 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
45 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
46 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
47 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
48 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
49 ; SI-GISEL-NEXT: s_endpgm
51 ; VI-SDAG-LABEL: s_exp2_f32:
53 ; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
54 ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
55 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
56 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
57 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
58 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
59 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
60 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
61 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
62 ; VI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1
63 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
64 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, v1, v0
65 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
66 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
67 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
68 ; VI-SDAG-NEXT: s_endpgm
70 ; VI-GISEL-LABEL: s_exp2_f32:
72 ; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
73 ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
74 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
75 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
76 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
77 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
78 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
79 ; VI-GISEL-NEXT: v_add_f32_e32 v0, s2, v0
80 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
81 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
82 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
83 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
84 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
85 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
86 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
87 ; VI-GISEL-NEXT: s_endpgm
89 ; GFX900-SDAG-LABEL: s_exp2_f32:
90 ; GFX900-SDAG: ; %bb.0:
91 ; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
92 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
93 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
94 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
95 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
96 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
97 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
98 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
99 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
100 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
101 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s4, v1
102 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
103 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
104 ; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[2:3]
105 ; GFX900-SDAG-NEXT: s_endpgm
107 ; GFX900-GISEL-LABEL: s_exp2_f32:
108 ; GFX900-GISEL: ; %bb.0:
109 ; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
110 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
111 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
112 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
113 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
114 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
115 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
116 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s2, v0
117 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
118 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
119 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
120 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
121 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
122 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
123 ; GFX900-GISEL-NEXT: s_endpgm
125 ; R600-LABEL: s_exp2_f32:
127 ; R600-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[]
128 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
131 ; R600-NEXT: ALU clause starting at 4:
132 ; R600-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
133 ; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
134 ; R600-NEXT: CNDE * T1.W, PV.W, 0.0, literal.x,
135 ; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
136 ; R600-NEXT: ADD T1.W, KC0[2].Z, PV.W,
137 ; R600-NEXT: CNDE * T0.W, T0.W, 1.0, literal.x,
138 ; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
139 ; R600-NEXT: EXP_IEEE * T0.X, PV.W,
140 ; R600-NEXT: MUL_IEEE T0.X, PS, T0.W,
141 ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
142 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
144 ; CM-LABEL: s_exp2_f32:
146 ; CM-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[]
147 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
150 ; CM-NEXT: ALU clause starting at 4:
151 ; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
152 ; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
153 ; CM-NEXT: CNDE * T1.W, PV.W, 0.0, literal.x,
154 ; CM-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
155 ; CM-NEXT: CNDE T0.Z, T0.W, 1.0, literal.x,
156 ; CM-NEXT: ADD * T0.W, KC0[2].Z, PV.W,
157 ; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
158 ; CM-NEXT: EXP_IEEE T0.X, T0.W,
159 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
160 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
161 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
162 ; CM-NEXT: MUL_IEEE * T0.X, PV.X, T0.Z,
163 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
164 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
165 %result = call float @llvm.exp2.f32(float %in)
166 store float %result, ptr addrspace(1) %out
170 ; FIXME: We should be able to merge these packets together on Cayman so we
171 ; have a maximum of 4 instructions.
172 define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
173 ; SI-SDAG-LABEL: s_exp2_v2f32:
175 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
176 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
177 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
178 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
179 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
180 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
181 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
182 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
183 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
184 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
185 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
186 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
187 ; SI-SDAG-NEXT: v_add_f32_e32 v4, s3, v4
188 ; SI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1
189 ; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4
190 ; SI-SDAG-NEXT: v_exp_f32_e32 v3, v1
191 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
192 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
193 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
194 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v2
195 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v3, v0
196 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
197 ; SI-SDAG-NEXT: s_endpgm
199 ; SI-GISEL-LABEL: s_exp2_v2f32:
201 ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
202 ; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
203 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
204 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
205 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
206 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6
207 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, s7
208 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
209 ; SI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
210 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
211 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
212 ; SI-GISEL-NEXT: v_add_f32_e32 v2, s6, v2
213 ; SI-GISEL-NEXT: v_add_f32_e32 v0, s7, v0
214 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
215 ; SI-GISEL-NEXT: v_exp_f32_e32 v3, v0
216 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
217 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
218 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
219 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
220 ; SI-GISEL-NEXT: s_mov_b32 s6, -1
221 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
222 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
223 ; SI-GISEL-NEXT: s_endpgm
225 ; VI-SDAG-LABEL: s_exp2_v2f32:
227 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
228 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
229 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
230 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
231 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
232 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
233 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc
234 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
235 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
236 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
237 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
238 ; VI-SDAG-NEXT: v_add_f32_e32 v4, s3, v4
239 ; VI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1
240 ; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4
241 ; VI-SDAG-NEXT: v_exp_f32_e32 v2, v1
242 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v3
243 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v2, v0
244 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1
245 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0
246 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
247 ; VI-SDAG-NEXT: s_endpgm
249 ; VI-GISEL-LABEL: s_exp2_v2f32:
251 ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
252 ; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
253 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
254 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
255 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
256 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s6
257 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s7
258 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
259 ; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
260 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
261 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
262 ; VI-GISEL-NEXT: v_add_f32_e32 v2, s6, v2
263 ; VI-GISEL-NEXT: v_add_f32_e32 v0, s7, v0
264 ; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
265 ; VI-GISEL-NEXT: v_exp_f32_e32 v3, v0
266 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
267 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
268 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
269 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
270 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
271 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
272 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
273 ; VI-GISEL-NEXT: s_endpgm
275 ; GFX900-SDAG-LABEL: s_exp2_v2f32:
276 ; GFX900-SDAG: ; %bb.0:
277 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
278 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
279 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
280 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
281 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0
282 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
283 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
284 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc
285 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
286 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
287 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
288 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
289 ; GFX900-SDAG-NEXT: v_add_f32_e32 v4, s3, v4
290 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s2, v1
291 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v4, v4
292 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v1
293 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v4, v3
294 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v2, v0
295 ; GFX900-SDAG-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1]
296 ; GFX900-SDAG-NEXT: s_endpgm
298 ; GFX900-GISEL-LABEL: s_exp2_v2f32:
299 ; GFX900-GISEL: ; %bb.0:
300 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
301 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
302 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
303 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
304 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
305 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6
306 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, s7
307 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
308 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
309 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
310 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
311 ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, s6, v2
312 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s7, v0
313 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
314 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v0
315 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
316 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
317 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
318 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
319 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
320 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
321 ; GFX900-GISEL-NEXT: s_endpgm
323 ; R600-LABEL: s_exp2_v2f32:
325 ; R600-NEXT: ALU 18, @4, KC0[CB0:0-32], KC1[]
326 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
329 ; R600-NEXT: ALU clause starting at 4:
330 ; R600-NEXT: SETGT T0.W, literal.x, KC0[3].X,
331 ; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].W,
332 ; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
333 ; R600-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x,
334 ; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
335 ; R600-NEXT: ADD T2.W, KC0[3].X, PV.W,
336 ; R600-NEXT: CNDE * T3.W, T1.W, 0.0, literal.x,
337 ; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
338 ; R600-NEXT: ADD T0.Z, KC0[2].W, PS,
339 ; R600-NEXT: CNDE T0.W, T0.W, 1.0, literal.x,
340 ; R600-NEXT: EXP_IEEE * T0.X, PV.W,
341 ; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
342 ; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W,
343 ; R600-NEXT: CNDE T0.W, T1.W, 1.0, literal.x,
344 ; R600-NEXT: EXP_IEEE * T0.X, PV.Z,
345 ; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
346 ; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
347 ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
348 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
350 ; CM-LABEL: s_exp2_v2f32:
352 ; CM-NEXT: ALU 23, @4, KC0[CB0:0-32], KC1[]
353 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
356 ; CM-NEXT: ALU clause starting at 4:
357 ; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].X,
358 ; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
359 ; CM-NEXT: CNDE T0.Z, PV.W, 0.0, literal.x,
360 ; CM-NEXT: SETGT * T1.W, literal.y, KC0[2].W,
361 ; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
362 ; CM-NEXT: CNDE T0.Y, PV.W, 0.0, literal.x,
363 ; CM-NEXT: CNDE T1.Z, T0.W, 1.0, literal.y,
364 ; CM-NEXT: ADD * T0.W, KC0[3].X, PV.Z,
365 ; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
366 ; CM-NEXT: EXP_IEEE T0.X, T0.W,
367 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
368 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
369 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
370 ; CM-NEXT: MUL_IEEE T1.Y, PV.X, T1.Z,
371 ; CM-NEXT: CNDE T0.Z, T1.W, 1.0, literal.x,
372 ; CM-NEXT: ADD * T0.W, KC0[2].W, T0.Y,
373 ; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
374 ; CM-NEXT: EXP_IEEE T0.X, T0.W,
375 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
376 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
377 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
378 ; CM-NEXT: MUL_IEEE * T1.X, PV.X, T0.Z,
379 ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
380 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
381 %result = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in)
382 store <2 x float> %result, ptr addrspace(1) %out
386 define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
387 ; SI-SDAG-LABEL: s_exp2_v3f32:
389 ; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
390 ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
391 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
392 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
393 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
394 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
395 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
396 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
397 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
398 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
399 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc
400 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
401 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
402 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
403 ; SI-SDAG-NEXT: v_add_f32_e32 v4, s5, v4
404 ; SI-SDAG-NEXT: v_add_f32_e32 v0, s6, v0
405 ; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4
406 ; SI-SDAG-NEXT: v_add_f32_e32 v6, s4, v6
407 ; SI-SDAG-NEXT: v_exp_f32_e32 v3, v0
408 ; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6
409 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc
410 ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
411 ; SI-SDAG-NEXT: s_mov_b32 s2, -1
412 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v2
413 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, v3, v7
414 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v6, v5
415 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
416 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
417 ; SI-SDAG-NEXT: s_endpgm
419 ; SI-GISEL-LABEL: s_exp2_v3f32:
421 ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
422 ; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
423 ; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
424 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
425 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
426 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
427 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
428 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
429 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
430 ; SI-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
431 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
432 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
433 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
434 ; SI-GISEL-NEXT: s_mov_b32 s10, -1
435 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
436 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, s5
437 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
438 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
439 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v2
440 ; SI-GISEL-NEXT: v_add_f32_e32 v4, s5, v4
441 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
442 ; SI-GISEL-NEXT: v_exp_f32_e32 v4, v4
443 ; SI-GISEL-NEXT: v_add_f32_e32 v1, s6, v1
444 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v1
445 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
446 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1
447 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
448 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
449 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
450 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
451 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:8
452 ; SI-GISEL-NEXT: s_endpgm
454 ; VI-SDAG-LABEL: s_exp2_v3f32:
456 ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
457 ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
458 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
459 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
460 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
461 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
462 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
463 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
464 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
465 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
466 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc
467 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
468 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
469 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
470 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
471 ; VI-SDAG-NEXT: v_add_f32_e32 v4, s6, v4
472 ; VI-SDAG-NEXT: v_add_f32_e32 v1, s4, v1
473 ; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4
474 ; VI-SDAG-NEXT: v_add_f32_e32 v6, s5, v6
475 ; VI-SDAG-NEXT: v_exp_f32_e32 v3, v1
476 ; VI-SDAG-NEXT: v_exp_f32_e32 v6, v6
477 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, v4, v2
478 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v3, v0
479 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1
480 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, v6, v5
481 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0
482 ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
483 ; VI-SDAG-NEXT: s_endpgm
485 ; VI-GISEL-LABEL: s_exp2_v3f32:
487 ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
488 ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
489 ; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
490 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
491 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
492 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
493 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
494 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
495 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
496 ; VI-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
497 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
498 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
499 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
500 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
501 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5
502 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
503 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v2
504 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
505 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
506 ; VI-GISEL-NEXT: v_add_f32_e32 v4, s5, v4
507 ; VI-GISEL-NEXT: v_add_f32_e32 v1, s6, v1
508 ; VI-GISEL-NEXT: v_exp_f32_e32 v4, v4
509 ; VI-GISEL-NEXT: v_exp_f32_e32 v2, v1
510 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
511 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
512 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1
513 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
514 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3
515 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s2
516 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
517 ; VI-GISEL-NEXT: s_endpgm
519 ; GFX900-SDAG-LABEL: s_exp2_v3f32:
520 ; GFX900-SDAG: ; %bb.0:
521 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
522 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
523 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
524 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
525 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
526 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
527 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
528 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
529 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
530 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
531 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc
532 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
533 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
534 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
535 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
536 ; GFX900-SDAG-NEXT: v_add_f32_e32 v4, s6, v4
537 ; GFX900-SDAG-NEXT: v_add_f32_e32 v6, s5, v6
538 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s4, v1
539 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v4, v4
540 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v6, v6
541 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v1
542 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0
543 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v4, v2
544 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v6, v5
545 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v3, v0
546 ; GFX900-SDAG-NEXT: global_store_dwordx3 v7, v[0:2], s[2:3]
547 ; GFX900-SDAG-NEXT: s_endpgm
549 ; GFX900-GISEL-LABEL: s_exp2_v3f32:
550 ; GFX900-GISEL: ; %bb.0:
551 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
552 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
553 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
554 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
555 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
556 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
557 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
558 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
559 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
560 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
561 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
562 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
563 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
564 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
565 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, s5
566 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
567 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v2
568 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
569 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
570 ; GFX900-GISEL-NEXT: v_add_f32_e32 v4, s5, v4
571 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, s6, v1
572 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v4, v4
573 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v1
574 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
575 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
576 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1
577 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
578 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
579 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
580 ; GFX900-GISEL-NEXT: s_endpgm
582 ; R600-LABEL: s_exp2_v3f32:
584 ; R600-NEXT: ALU 29, @4, KC0[CB0:0-32], KC1[]
585 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
586 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
588 ; R600-NEXT: ALU clause starting at 4:
589 ; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z,
590 ; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].Y,
591 ; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
592 ; R600-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x,
593 ; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
594 ; R600-NEXT: ADD T2.W, KC0[3].Z, PV.W,
595 ; R600-NEXT: CNDE * T3.W, T1.W, 0.0, literal.x,
596 ; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
597 ; R600-NEXT: ADD T0.Y, KC0[3].Y, PS,
598 ; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].W,
599 ; R600-NEXT: CNDE T0.W, T0.W, 1.0, literal.y,
600 ; R600-NEXT: EXP_IEEE * T0.X, PV.W,
601 ; R600-NEXT: -1023672320(-1.260000e+02), 528482304(5.421011e-20)
602 ; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W,
603 ; R600-NEXT: CNDE T1.Z, PV.Z, 0.0, literal.x,
604 ; R600-NEXT: CNDE T0.W, T1.W, 1.0, literal.y,
605 ; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
606 ; R600-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
607 ; R600-NEXT: MUL_IEEE T1.X, PS, PV.W,
608 ; R600-NEXT: ADD T0.W, KC0[3].W, PV.Z,
609 ; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
610 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
611 ; R600-NEXT: CNDE T1.W, T0.Z, 1.0, literal.x,
612 ; R600-NEXT: EXP_IEEE * T0.Y, PV.W,
613 ; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
614 ; R600-NEXT: MUL_IEEE T2.X, PS, PV.W,
615 ; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
616 ; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00)
617 ; R600-NEXT: LSHR * T3.X, PV.W, literal.x,
618 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
620 ; CM-LABEL: s_exp2_v3f32:
622 ; CM-NEXT: ALU 35, @4, KC0[CB0:0-32], KC1[]
623 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X
624 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T2.X
626 ; CM-NEXT: ALU clause starting at 4:
627 ; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].W,
628 ; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
629 ; CM-NEXT: CNDE T0.Y, PV.W, 0.0, literal.x,
630 ; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z,
631 ; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Y,
632 ; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
633 ; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x,
634 ; CM-NEXT: CNDE T1.Y, PV.Z, 0.0, literal.x,
635 ; CM-NEXT: CNDE T1.Z, T0.W, 1.0, literal.y,
636 ; CM-NEXT: ADD * T0.W, KC0[3].W, PV.Y,
637 ; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
638 ; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
639 ; CM-NEXT: EXP_IEEE T0.Y, T0.W,
640 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
641 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
642 ; CM-NEXT: MUL_IEEE T1.X, PV.Y, T1.Z,
643 ; CM-NEXT: CNDE T0.Y, T0.Z, 1.0, literal.x,
644 ; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
645 ; CM-NEXT: ADD * T0.W, KC0[3].Z, T1.Y,
646 ; CM-NEXT: 528482304(5.421011e-20), 8(1.121039e-44)
647 ; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
648 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
649 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
650 ; CM-NEXT: EXP_IEEE * T0.W, T0.W,
651 ; CM-NEXT: LSHR T2.X, T0.Z, literal.x,
652 ; CM-NEXT: MUL_IEEE T0.Y, PV.W, T0.Y,
653 ; CM-NEXT: CNDE T0.Z, T1.W, 1.0, literal.y,
654 ; CM-NEXT: ADD * T0.W, KC0[3].Y, T0.X,
655 ; CM-NEXT: 2(2.802597e-45), 528482304(5.421011e-20)
656 ; CM-NEXT: EXP_IEEE T0.X, T0.W,
657 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
658 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
659 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
660 ; CM-NEXT: MUL_IEEE * T0.X, PV.X, T0.Z,
661 ; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
662 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
663 %result = call <3 x float> @llvm.exp2.v3f32(<3 x float> %in)
664 store <3 x float> %result, ptr addrspace(1) %out
668 ; FIXME: We should be able to merge these packets together on Cayman so we
669 ; have a maximum of 4 instructions.
670 define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
671 ; SI-SDAG-LABEL: s_exp2_v4f32:
673 ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
674 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd
675 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
676 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
677 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
678 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
679 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
680 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
681 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
682 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
683 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
684 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc
685 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
686 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0
687 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc
688 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v3, vcc
689 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
690 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
691 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
692 ; SI-SDAG-NEXT: v_add_f32_e32 v4, s3, v4
693 ; SI-SDAG-NEXT: v_add_f32_e32 v6, s2, v6
694 ; SI-SDAG-NEXT: v_add_f32_e32 v8, s1, v8
695 ; SI-SDAG-NEXT: v_add_f32_e32 v1, s0, v1
696 ; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4
697 ; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6
698 ; SI-SDAG-NEXT: v_exp_f32_e32 v8, v8
699 ; SI-SDAG-NEXT: v_exp_f32_e32 v9, v1
700 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
701 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, v4, v2
702 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, v6, v5
703 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, v8, v7
704 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v9, v0
705 ; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
706 ; SI-SDAG-NEXT: s_endpgm
708 ; SI-GISEL-LABEL: s_exp2_v4f32:
710 ; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
711 ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
712 ; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
713 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
714 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
715 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
716 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8
717 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
718 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, s9
719 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
720 ; SI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
721 ; SI-GISEL-NEXT: v_add_f32_e32 v0, s8, v0
722 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v3, s[0:1]
723 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
724 ; SI-GISEL-NEXT: v_add_f32_e32 v1, s9, v1
725 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
726 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
727 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
728 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
729 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1]
730 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
731 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
732 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5
733 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
734 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
735 ; SI-GISEL-NEXT: v_add_f32_e32 v5, s10, v5
736 ; SI-GISEL-NEXT: v_add_f32_e32 v2, s11, v2
737 ; SI-GISEL-NEXT: v_exp_f32_e32 v5, v5
738 ; SI-GISEL-NEXT: v_exp_f32_e32 v3, v2
739 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
740 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
741 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2
742 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
743 ; SI-GISEL-NEXT: s_mov_b32 s6, -1
744 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
745 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
746 ; SI-GISEL-NEXT: s_endpgm
748 ; VI-SDAG-LABEL: s_exp2_v4f32:
750 ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
751 ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
752 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
753 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
754 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
755 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
756 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
757 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
758 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
759 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
760 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc
761 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
762 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
763 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc
764 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v3, vcc
765 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
766 ; VI-SDAG-NEXT: v_add_f32_e32 v4, s7, v4
767 ; VI-SDAG-NEXT: v_add_f32_e32 v6, s6, v6
768 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
769 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
770 ; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4
771 ; VI-SDAG-NEXT: v_exp_f32_e32 v6, v6
772 ; VI-SDAG-NEXT: v_add_f32_e32 v8, s5, v8
773 ; VI-SDAG-NEXT: v_add_f32_e32 v1, s4, v1
774 ; VI-SDAG-NEXT: v_exp_f32_e32 v8, v8
775 ; VI-SDAG-NEXT: v_exp_f32_e32 v9, v1
776 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, v4, v2
777 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, v6, v5
778 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1
779 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, v8, v7
780 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v9, v0
781 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0
782 ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
783 ; VI-SDAG-NEXT: s_endpgm
785 ; VI-GISEL-LABEL: s_exp2_v4f32:
787 ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
788 ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
789 ; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
790 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
791 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
792 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
793 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
794 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
795 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5
796 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
797 ; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
798 ; VI-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
799 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v3, s[0:1]
800 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
801 ; VI-GISEL-NEXT: v_add_f32_e32 v1, s5, v1
802 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
803 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
804 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
805 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
806 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1]
807 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
808 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
809 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5
810 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
811 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
812 ; VI-GISEL-NEXT: v_add_f32_e32 v5, s6, v5
813 ; VI-GISEL-NEXT: v_add_f32_e32 v2, s7, v2
814 ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5
815 ; VI-GISEL-NEXT: v_exp_f32_e32 v3, v2
816 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
817 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
818 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2
819 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
820 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3
821 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2
822 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
823 ; VI-GISEL-NEXT: s_endpgm
825 ; GFX900-SDAG-LABEL: s_exp2_v4f32:
826 ; GFX900-SDAG: ; %bb.0:
827 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
828 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
829 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
830 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
831 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
832 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
833 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
834 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
835 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
836 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
837 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc
838 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v3, vcc
839 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
840 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v8, 1.0, v1, vcc
841 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v3, vcc
842 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
843 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
844 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
845 ; GFX900-SDAG-NEXT: v_add_f32_e32 v5, s7, v5
846 ; GFX900-SDAG-NEXT: v_add_f32_e32 v7, s6, v7
847 ; GFX900-SDAG-NEXT: v_add_f32_e32 v9, s5, v9
848 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s4, v1
849 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v5, v5
850 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v7, v7
851 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v9, v9
852 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v10, v1
853 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
854 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, v5, v2
855 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v7, v6
856 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v9, v8
857 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v10, v0
858 ; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
859 ; GFX900-SDAG-NEXT: s_endpgm
861 ; GFX900-GISEL-LABEL: s_exp2_v4f32:
862 ; GFX900-GISEL: ; %bb.0:
863 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
864 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
865 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
866 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
867 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
868 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
869 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
870 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
871 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s5
872 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
873 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
874 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
875 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v3, s[0:1]
876 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
877 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, s5, v1
878 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
879 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
880 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
881 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
882 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1]
883 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
884 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
885 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5
886 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
887 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
888 ; GFX900-GISEL-NEXT: v_add_f32_e32 v5, s6, v5
889 ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, s7, v2
890 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v5
891 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v2
892 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
893 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
894 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2
895 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
896 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
897 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
898 ; GFX900-GISEL-NEXT: s_endpgm
900 ; R600-LABEL: s_exp2_v4f32:
902 ; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[]
903 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
906 ; R600-NEXT: ALU clause starting at 4:
907 ; R600-NEXT: SETGT T0.W, literal.x, KC0[4].X,
908 ; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].W,
909 ; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
910 ; R600-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x,
911 ; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
912 ; R600-NEXT: ADD T0.Z, KC0[4].X, PV.W,
913 ; R600-NEXT: SETGT T2.W, literal.x, KC0[3].Z,
914 ; R600-NEXT: CNDE * T3.W, T1.W, 0.0, literal.y,
915 ; R600-NEXT: -1023672320(-1.260000e+02), 1115684864(6.400000e+01)
916 ; R600-NEXT: ADD T0.X, KC0[3].W, PS,
917 ; R600-NEXT: CNDE T0.Y, T0.W, 1.0, literal.x,
918 ; R600-NEXT: SETGT T1.Z, literal.y, KC0[3].Y,
919 ; R600-NEXT: CNDE T0.W, PV.W, 0.0, literal.z,
920 ; R600-NEXT: EXP_IEEE * T0.Z, PV.Z,
921 ; R600-NEXT: 528482304(5.421011e-20), -1023672320(-1.260000e+02)
922 ; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
923 ; R600-NEXT: ADD T1.X, KC0[3].Z, PV.W,
924 ; R600-NEXT: CNDE T1.Y, T1.W, 1.0, literal.x,
925 ; R600-NEXT: CNDE T2.Z, PV.Z, 0.0, literal.y,
926 ; R600-NEXT: MUL_IEEE T0.W, PS, PV.Y,
927 ; R600-NEXT: EXP_IEEE * T0.X, PV.X,
928 ; R600-NEXT: 528482304(5.421011e-20), 1115684864(6.400000e+01)
929 ; R600-NEXT: ADD T2.Y, KC0[3].Y, PV.Z,
930 ; R600-NEXT: MUL_IEEE T0.Z, PS, PV.Y,
931 ; R600-NEXT: CNDE T1.W, T2.W, 1.0, literal.x,
932 ; R600-NEXT: EXP_IEEE * T0.X, PV.X,
933 ; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
934 ; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W,
935 ; R600-NEXT: CNDE T1.W, T1.Z, 1.0, literal.x,
936 ; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
937 ; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
938 ; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
939 ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
940 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
942 ; CM-LABEL: s_exp2_v4f32:
944 ; CM-NEXT: ALU 43, @4, KC0[CB0:0-32], KC1[]
945 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
948 ; CM-NEXT: ALU clause starting at 4:
949 ; CM-NEXT: SETGT * T0.W, literal.x, KC0[4].X,
950 ; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
951 ; CM-NEXT: CNDE T0.Y, PV.W, 0.0, literal.x,
952 ; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W,
953 ; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Z,
954 ; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
955 ; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x,
956 ; CM-NEXT: CNDE T1.Y, T0.W, 1.0, literal.y,
957 ; CM-NEXT: CNDE T1.Z, PV.Z, 0.0, literal.x,
958 ; CM-NEXT: ADD * T0.W, KC0[4].X, PV.Y,
959 ; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
960 ; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
961 ; CM-NEXT: EXP_IEEE T0.Y, T0.W,
962 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
963 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
964 ; CM-NEXT: CNDE T1.X, T0.Z, 1.0, literal.x,
965 ; CM-NEXT: SETGT T2.Y, literal.y, KC0[3].Y,
966 ; CM-NEXT: ADD T0.Z, KC0[3].W, T1.Z,
967 ; CM-NEXT: MUL_IEEE * T0.W, PV.Y, T1.Y,
968 ; CM-NEXT: 528482304(5.421011e-20), -1023672320(-1.260000e+02)
969 ; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.Z,
970 ; CM-NEXT: EXP_IEEE T0.Y, T0.Z,
971 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.Z,
972 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.Z,
973 ; CM-NEXT: CNDE T2.X, T2.Y, 0.0, literal.x,
974 ; CM-NEXT: CNDE T1.Y, T1.W, 1.0, literal.y,
975 ; CM-NEXT: MUL_IEEE T0.Z, PV.Y, T1.X,
976 ; CM-NEXT: ADD * T1.W, KC0[3].Z, T0.X, BS:VEC_021/SCL_122
977 ; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
978 ; CM-NEXT: EXP_IEEE T0.X, T1.W,
979 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
980 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
981 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
982 ; CM-NEXT: MUL_IEEE T0.Y, PV.X, T1.Y,
983 ; CM-NEXT: CNDE T1.Z, T2.Y, 1.0, literal.x,
984 ; CM-NEXT: ADD * T1.W, KC0[3].Y, T2.X,
985 ; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
986 ; CM-NEXT: EXP_IEEE T0.X, T1.W,
987 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
988 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
989 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
990 ; CM-NEXT: MUL_IEEE * T0.X, PV.X, T1.Z,
991 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
992 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
993 %result = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
994 store <4 x float> %result, ptr addrspace(1) %out
998 define float @v_exp2_f32(float %in) {
999 ; GCN-SDAG-LABEL: v_exp2_f32:
1000 ; GCN-SDAG: ; %bb.0:
1001 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1002 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1003 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1004 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1005 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1006 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1007 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1008 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1009 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1010 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1011 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1013 ; GCN-GISEL-LABEL: v_exp2_f32:
1014 ; GCN-GISEL: ; %bb.0:
1015 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1016 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1017 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1018 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1019 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1020 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1021 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1022 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1023 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1024 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1025 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1027 ; R600-LABEL: v_exp2_f32:
1032 ; CM-LABEL: v_exp2_f32:
1036 %result = call float @llvm.exp2.f32(float %in)
1040 define float @v_exp2_fabs_f32(float %in) {
1041 ; GCN-SDAG-LABEL: v_exp2_fabs_f32:
1042 ; GCN-SDAG: ; %bb.0:
1043 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1045 ; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
1046 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1047 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1048 ; GCN-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2
1049 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1050 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1051 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1052 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1053 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1055 ; GCN-GISEL-LABEL: v_exp2_fabs_f32:
1056 ; GCN-GISEL: ; %bb.0:
1057 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1059 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1060 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
1061 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1062 ; GCN-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
1063 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1064 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1065 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1066 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1067 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1069 ; R600-LABEL: v_exp2_fabs_f32:
1074 ; CM-LABEL: v_exp2_fabs_f32:
1078 %fabs = call float @llvm.fabs.f32(float %in)
1079 %result = call float @llvm.exp2.f32(float %fabs)
1083 define float @v_exp2_fneg_fabs_f32(float %in) {
1084 ; GCN-SDAG-LABEL: v_exp2_fneg_fabs_f32:
1085 ; GCN-SDAG: ; %bb.0:
1086 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1087 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000
1088 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
1089 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1090 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1091 ; GCN-SDAG-NEXT: v_sub_f32_e64 v0, v2, |v0|
1092 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1093 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1094 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1095 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1096 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1098 ; GCN-GISEL-LABEL: v_exp2_fneg_fabs_f32:
1099 ; GCN-GISEL: ; %bb.0:
1100 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1102 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1103 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
1104 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1105 ; GCN-GISEL-NEXT: v_sub_f32_e64 v0, v1, |v0|
1106 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1107 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1108 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1109 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1110 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1112 ; R600-LABEL: v_exp2_fneg_fabs_f32:
1117 ; CM-LABEL: v_exp2_fneg_fabs_f32:
1121 %fabs = call float @llvm.fabs.f32(float %in)
1122 %fneg.fabs = fneg float %fabs
1123 %result = call float @llvm.exp2.f32(float %fneg.fabs)
1127 define float @v_exp2_fneg_f32(float %in) {
1128 ; GCN-SDAG-LABEL: v_exp2_fneg_f32:
1129 ; GCN-SDAG: ; %bb.0:
1130 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1131 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000
1132 ; GCN-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
1133 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1134 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1135 ; GCN-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
1136 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1137 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1138 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1139 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1140 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1142 ; GCN-GISEL-LABEL: v_exp2_fneg_f32:
1143 ; GCN-GISEL: ; %bb.0:
1144 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1145 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1146 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1147 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
1148 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1149 ; GCN-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
1150 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1151 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1152 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1153 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1154 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1156 ; R600-LABEL: v_exp2_fneg_f32:
1161 ; CM-LABEL: v_exp2_fneg_f32:
1165 %fneg = fneg float %in
1166 %result = call float @llvm.exp2.f32(float %fneg)
1170 define float @v_exp2_f32_fast(float %in) {
1171 ; GCN-SDAG-LABEL: v_exp2_f32_fast:
1172 ; GCN-SDAG: ; %bb.0:
1173 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1174 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1175 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1176 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1177 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1178 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1179 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1180 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1181 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1182 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1183 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1185 ; GCN-GISEL-LABEL: v_exp2_f32_fast:
1186 ; GCN-GISEL: ; %bb.0:
1187 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1188 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1189 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1190 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1191 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1192 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1193 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1194 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1195 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1196 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1197 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1199 ; R600-LABEL: v_exp2_f32_fast:
1204 ; CM-LABEL: v_exp2_f32_fast:
1208 %result = call fast float @llvm.exp2.f32(float %in)
1212 define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
1213 ; GCN-SDAG-LABEL: v_exp2_f32_unsafe_math_attr:
1214 ; GCN-SDAG: ; %bb.0:
1215 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1216 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1217 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1218 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1219 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1220 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1221 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1222 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1223 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1224 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1225 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1227 ; GCN-GISEL-LABEL: v_exp2_f32_unsafe_math_attr:
1228 ; GCN-GISEL: ; %bb.0:
1229 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1231 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1232 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1233 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1234 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1235 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1236 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1237 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1238 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1239 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1241 ; R600-LABEL: v_exp2_f32_unsafe_math_attr:
1246 ; CM-LABEL: v_exp2_f32_unsafe_math_attr:
1250 %result = call float @llvm.exp2.f32(float %in)
1254 define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
1255 ; GCN-SDAG-LABEL: v_exp2_f32_approx_fn_attr:
1256 ; GCN-SDAG: ; %bb.0:
1257 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1258 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1259 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1260 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1261 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1262 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1263 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1264 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1265 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1266 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1267 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1269 ; GCN-GISEL-LABEL: v_exp2_f32_approx_fn_attr:
1270 ; GCN-GISEL: ; %bb.0:
1271 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1272 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1273 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1274 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1275 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1276 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1277 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1278 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1279 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1280 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1281 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1283 ; R600-LABEL: v_exp2_f32_approx_fn_attr:
1288 ; CM-LABEL: v_exp2_f32_approx_fn_attr:
1292 %result = call float @llvm.exp2.f32(float %in)
1296 define float @v_exp2_f32_ninf(float %in) {
1297 ; GCN-SDAG-LABEL: v_exp2_f32_ninf:
1298 ; GCN-SDAG: ; %bb.0:
1299 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1300 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1301 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1302 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1303 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1304 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1305 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1306 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1307 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1308 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1309 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1311 ; GCN-GISEL-LABEL: v_exp2_f32_ninf:
1312 ; GCN-GISEL: ; %bb.0:
1313 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1314 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1315 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1316 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1317 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1318 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1319 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1320 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1321 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1322 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1323 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1325 ; R600-LABEL: v_exp2_f32_ninf:
1330 ; CM-LABEL: v_exp2_f32_ninf:
1334 %result = call ninf float @llvm.exp2.f32(float %in)
1338 define float @v_exp2_f32_afn(float %in) {
1339 ; GCN-SDAG-LABEL: v_exp2_f32_afn:
1340 ; GCN-SDAG: ; %bb.0:
1341 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1343 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1344 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1345 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1346 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1347 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1348 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1349 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1350 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1351 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1353 ; GCN-GISEL-LABEL: v_exp2_f32_afn:
1354 ; GCN-GISEL: ; %bb.0:
1355 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1357 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1358 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1359 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1360 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1361 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1362 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1363 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1364 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1365 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1367 ; R600-LABEL: v_exp2_f32_afn:
1372 ; CM-LABEL: v_exp2_f32_afn:
1376 %result = call afn float @llvm.exp2.f32(float %in)
1380 define float @v_exp2_f32_afn_daz(float %in) #0 {
1381 ; GCN-LABEL: v_exp2_f32_afn_daz:
1383 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1384 ; GCN-NEXT: v_exp_f32_e32 v0, v0
1385 ; GCN-NEXT: s_setpc_b64 s[30:31]
1387 ; R600-LABEL: v_exp2_f32_afn_daz:
1392 ; CM-LABEL: v_exp2_f32_afn_daz:
1396 %result = call afn float @llvm.exp2.f32(float %in)
1400 define float @v_exp2_f32_afn_dynamic(float %in) #1 {
1401 ; GCN-SDAG-LABEL: v_exp2_f32_afn_dynamic:
1402 ; GCN-SDAG: ; %bb.0:
1403 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1405 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1406 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1407 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1408 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1409 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1410 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1411 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1412 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1413 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1415 ; GCN-GISEL-LABEL: v_exp2_f32_afn_dynamic:
1416 ; GCN-GISEL: ; %bb.0:
1417 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1418 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1419 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1420 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1421 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1422 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1423 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1424 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1425 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1426 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1427 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1429 ; R600-LABEL: v_exp2_f32_afn_dynamic:
1434 ; CM-LABEL: v_exp2_f32_afn_dynamic:
1438 %result = call afn float @llvm.exp2.f32(float %in)
1442 define float @v_fabs_exp2_f32_afn(float %in) {
1443 ; GCN-SDAG-LABEL: v_fabs_exp2_f32_afn:
1444 ; GCN-SDAG: ; %bb.0:
1445 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1446 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1447 ; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
1448 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1449 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1450 ; GCN-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2
1451 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1452 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1453 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1454 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1455 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1457 ; GCN-GISEL-LABEL: v_fabs_exp2_f32_afn:
1458 ; GCN-GISEL: ; %bb.0:
1459 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1460 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1461 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1462 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
1463 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1464 ; GCN-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
1465 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1466 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1467 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1468 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1469 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1471 ; R600-LABEL: v_fabs_exp2_f32_afn:
1476 ; CM-LABEL: v_fabs_exp2_f32_afn:
1480 %fabs = call float @llvm.fabs.f32(float %in)
1481 %result = call afn float @llvm.exp2.f32(float %fabs)
1485 define float @v_exp2_f32_daz(float %in) #0 {
1486 ; GCN-LABEL: v_exp2_f32_daz:
1488 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1489 ; GCN-NEXT: v_exp_f32_e32 v0, v0
1490 ; GCN-NEXT: s_setpc_b64 s[30:31]
1492 ; R600-LABEL: v_exp2_f32_daz:
1497 ; CM-LABEL: v_exp2_f32_daz:
1501 %result = call float @llvm.exp2.f32(float %in)
1505 define float @v_exp2_f32_nnan(float %in) {
1506 ; GCN-SDAG-LABEL: v_exp2_f32_nnan:
1507 ; GCN-SDAG: ; %bb.0:
1508 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1509 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1510 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1511 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1512 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1513 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1514 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1515 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1516 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1517 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1518 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1520 ; GCN-GISEL-LABEL: v_exp2_f32_nnan:
1521 ; GCN-GISEL: ; %bb.0:
1522 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1523 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1524 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1525 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1526 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1527 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1528 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1529 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1530 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1531 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1532 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1534 ; R600-LABEL: v_exp2_f32_nnan:
1539 ; CM-LABEL: v_exp2_f32_nnan:
1543 %result = call nnan float @llvm.exp2.f32(float %in)
1547 define float @v_exp2_f32_nnan_daz(float %in) #0 {
1548 ; GCN-LABEL: v_exp2_f32_nnan_daz:
1550 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1551 ; GCN-NEXT: v_exp_f32_e32 v0, v0
1552 ; GCN-NEXT: s_setpc_b64 s[30:31]
1554 ; R600-LABEL: v_exp2_f32_nnan_daz:
1559 ; CM-LABEL: v_exp2_f32_nnan_daz:
1563 %result = call nnan float @llvm.exp2.f32(float %in)
1567 define float @v_exp2_f32_nnan_dynamic(float %in) #1 {
1568 ; GCN-SDAG-LABEL: v_exp2_f32_nnan_dynamic:
1569 ; GCN-SDAG: ; %bb.0:
1570 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1571 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1572 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1573 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1574 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1575 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1576 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1577 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1578 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1579 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1580 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1582 ; GCN-GISEL-LABEL: v_exp2_f32_nnan_dynamic:
1583 ; GCN-GISEL: ; %bb.0:
1584 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1585 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1586 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1587 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1588 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1589 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1590 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1591 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1592 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1593 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1594 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1596 ; R600-LABEL: v_exp2_f32_nnan_dynamic:
1601 ; CM-LABEL: v_exp2_f32_nnan_dynamic:
1605 %result = call nnan float @llvm.exp2.f32(float %in)
1609 define float @v_exp2_f32_ninf_daz(float %in) #0 {
1610 ; GCN-LABEL: v_exp2_f32_ninf_daz:
1612 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1613 ; GCN-NEXT: v_exp_f32_e32 v0, v0
1614 ; GCN-NEXT: s_setpc_b64 s[30:31]
1616 ; R600-LABEL: v_exp2_f32_ninf_daz:
1621 ; CM-LABEL: v_exp2_f32_ninf_daz:
1625 %result = call ninf float @llvm.exp2.f32(float %in)
1629 define float @v_exp2_f32_ninf_dynamic(float %in) #1 {
1630 ; GCN-SDAG-LABEL: v_exp2_f32_ninf_dynamic:
1631 ; GCN-SDAG: ; %bb.0:
1632 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1633 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1634 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1635 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1636 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1637 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1638 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1639 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1640 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1641 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1642 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1644 ; GCN-GISEL-LABEL: v_exp2_f32_ninf_dynamic:
1645 ; GCN-GISEL: ; %bb.0:
1646 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1647 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1648 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1649 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1650 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1651 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1652 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1653 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1654 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1655 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1656 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1658 ; R600-LABEL: v_exp2_f32_ninf_dynamic:
1663 ; CM-LABEL: v_exp2_f32_ninf_dynamic:
1667 %result = call ninf float @llvm.exp2.f32(float %in)
1671 define float @v_exp2_f32_nnan_ninf(float %in) {
1672 ; GCN-SDAG-LABEL: v_exp2_f32_nnan_ninf:
1673 ; GCN-SDAG: ; %bb.0:
1674 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1675 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1676 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1677 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1678 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1679 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1680 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1681 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1682 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1683 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1684 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1686 ; GCN-GISEL-LABEL: v_exp2_f32_nnan_ninf:
1687 ; GCN-GISEL: ; %bb.0:
1688 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1690 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1691 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1692 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1693 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1694 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1695 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1696 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1697 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1698 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1700 ; R600-LABEL: v_exp2_f32_nnan_ninf:
1705 ; CM-LABEL: v_exp2_f32_nnan_ninf:
1709 %result = call nnan ninf float @llvm.exp2.f32(float %in)
1713 define float @v_exp2_f32_nnan_ninf_daz(float %in) #0 {
1714 ; GCN-LABEL: v_exp2_f32_nnan_ninf_daz:
1716 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1717 ; GCN-NEXT: v_exp_f32_e32 v0, v0
1718 ; GCN-NEXT: s_setpc_b64 s[30:31]
1720 ; R600-LABEL: v_exp2_f32_nnan_ninf_daz:
1725 ; CM-LABEL: v_exp2_f32_nnan_ninf_daz:
1729 %result = call nnan ninf float @llvm.exp2.f32(float %in)
1733 define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 {
1734 ; GCN-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic:
1735 ; GCN-SDAG: ; %bb.0:
1736 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1737 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1738 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1739 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1740 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1741 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1742 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1743 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1744 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1745 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1746 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1748 ; GCN-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic:
1749 ; GCN-GISEL: ; %bb.0:
1750 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1751 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1752 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1753 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1754 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1755 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1756 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1757 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1758 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1759 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1760 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1762 ; R600-LABEL: v_exp2_f32_nnan_ninf_dynamic:
1767 ; CM-LABEL: v_exp2_f32_nnan_ninf_dynamic:
1771 %result = call nnan ninf float @llvm.exp2.f32(float %in)
1775 define float @v_exp2_f32_fast_daz(float %in) #0 {
1776 ; GCN-LABEL: v_exp2_f32_fast_daz:
1778 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1779 ; GCN-NEXT: v_exp_f32_e32 v0, v0
1780 ; GCN-NEXT: s_setpc_b64 s[30:31]
1782 ; R600-LABEL: v_exp2_f32_fast_daz:
1787 ; CM-LABEL: v_exp2_f32_fast_daz:
1791 %result = call fast float @llvm.exp2.f32(float %in)
1795 define float @v_exp2_f32_dynamic_mode(float %in) #1 {
1796 ; GCN-SDAG-LABEL: v_exp2_f32_dynamic_mode:
1797 ; GCN-SDAG: ; %bb.0:
1798 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1799 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1800 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1801 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1802 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1803 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1804 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1805 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1806 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1807 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1808 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1810 ; GCN-GISEL-LABEL: v_exp2_f32_dynamic_mode:
1811 ; GCN-GISEL: ; %bb.0:
1812 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1813 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1814 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1815 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1816 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1817 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1818 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1819 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1820 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1821 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1822 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1824 ; R600-LABEL: v_exp2_f32_dynamic_mode:
1829 ; CM-LABEL: v_exp2_f32_dynamic_mode:
1833 %result = call float @llvm.exp2.f32(float %in)
1837 define float @v_exp2_f32_undef() {
1838 ; GCN-SDAG-LABEL: v_exp2_f32_undef:
1839 ; GCN-SDAG: ; %bb.0:
1840 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1841 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, 0x7fc00000
1842 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1844 ; GCN-GISEL-LABEL: v_exp2_f32_undef:
1845 ; GCN-GISEL: ; %bb.0:
1846 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1847 ; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
1848 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
1849 ; GCN-GISEL-NEXT: v_add_f32_e32 v1, s4, v1
1850 ; GCN-GISEL-NEXT: v_add_f32_e64 v2, s4, 0
1851 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
1852 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
1853 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1854 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1855 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1856 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1857 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1859 ; R600-LABEL: v_exp2_f32_undef:
1864 ; CM-LABEL: v_exp2_f32_undef:
1868 %result = call float @llvm.exp2.f32(float undef)
1872 define float @v_exp2_f32_0() {
1873 ; GCN-LABEL: v_exp2_f32_0:
1875 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1876 ; GCN-NEXT: v_mov_b32_e32 v0, 1.0
1877 ; GCN-NEXT: s_setpc_b64 s[30:31]
1879 ; R600-LABEL: v_exp2_f32_0:
1884 ; CM-LABEL: v_exp2_f32_0:
1888 ; GFX89-SDAG-LABEL: v_exp2_f32_0:
1889 ; GFX89-SDAG: ; %bb.0:
1890 ; GFX89-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1891 ; GFX89-SDAG-NEXT: v_log_f32_e32 v0, 0
1892 ; GFX89-SDAG-NEXT: s_setpc_b64 s[30:31]
1893 ; GFX89-GISEL-LABEL: v_exp2_f32_0:
1894 ; GFX89-GISEL: ; %bb.0:
1895 ; GFX89-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1896 ; GFX89-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000
1897 ; GFX89-GISEL-NEXT: s_setpc_b64 s[30:31]
1898 %result = call float @llvm.exp2.f32(float 0.0)
1902 define float @v_exp2_f32_from_fpext_f16(i16 %src.i) {
1903 ; GCN-LABEL: v_exp2_f32_from_fpext_f16:
1905 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1906 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
1907 ; GCN-NEXT: v_exp_f32_e32 v0, v0
1908 ; GCN-NEXT: s_setpc_b64 s[30:31]
1910 ; R600-LABEL: v_exp2_f32_from_fpext_f16:
1915 ; CM-LABEL: v_exp2_f32_from_fpext_f16:
1919 %src = bitcast i16 %src.i to half
1920 %fpext = fpext half %src to float
1921 %result = call float @llvm.exp2.f32(float %fpext)
1925 define float @v_exp2_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
1926 ; SI-SDAG-LABEL: v_exp2_f32_from_fpext_math_f16:
1928 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1929 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1930 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1931 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1932 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
1933 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
1934 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1935 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
1936 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
1937 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
1938 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1939 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1940 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1941 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
1943 ; SI-GISEL-LABEL: v_exp2_f32_from_fpext_math_f16:
1944 ; SI-GISEL: ; %bb.0:
1945 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1946 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1947 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1948 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1949 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1950 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1951 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
1952 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
1954 ; VI-LABEL: v_exp2_f32_from_fpext_math_f16:
1956 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1957 ; VI-NEXT: v_add_f16_e32 v0, v0, v1
1958 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
1959 ; VI-NEXT: v_exp_f32_e32 v0, v0
1960 ; VI-NEXT: s_setpc_b64 s[30:31]
1962 ; GFX900-LABEL: v_exp2_f32_from_fpext_math_f16:
1964 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1965 ; GFX900-NEXT: v_add_f16_e32 v0, v0, v1
1966 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
1967 ; GFX900-NEXT: v_exp_f32_e32 v0, v0
1968 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1970 ; R600-LABEL: v_exp2_f32_from_fpext_math_f16:
1975 ; CM-LABEL: v_exp2_f32_from_fpext_math_f16:
1979 ; GFX89-LABEL: v_exp2_f32_from_fpext_math_f16:
1981 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1982 ; GFX89-NEXT: v_add_f16_e32 v0, v0, v1
1983 ; GFX89-NEXT: v_cvt_f32_f16_e32 v0, v0
1984 ; GFX89-NEXT: v_log_f32_e32 v0, v0
1985 ; GFX89-NEXT: s_setpc_b64 s[30:31]
1986 %src0 = bitcast i16 %src0.i to half
1987 %src1 = bitcast i16 %src1.i to half
1988 %fadd = fadd half %src0, %src1
1989 %fpext = fpext half %fadd to float
1990 %result = call float @llvm.exp2.f32(float %fpext)
1994 define float @v_exp2_f32_from_fpext_bf16(bfloat %src) {
1995 ; GCN-SDAG-LABEL: v_exp2_f32_from_fpext_bf16:
1996 ; GCN-SDAG: ; %bb.0:
1997 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1999 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2000 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
2001 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
2002 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
2003 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
2004 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
2005 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2006 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
2007 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
2009 ; GCN-GISEL-LABEL: v_exp2_f32_from_fpext_bf16:
2010 ; GCN-GISEL: ; %bb.0:
2011 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2012 ; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2013 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
2014 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
2016 ; R600-LABEL: v_exp2_f32_from_fpext_bf16:
2021 ; CM-LABEL: v_exp2_f32_from_fpext_bf16:
2025 ; GFX89-SDAG-LABEL: v_exp2_f32_from_fpext_bf16:
2026 ; GFX89-SDAG: ; %bb.0:
2027 ; GFX89-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2028 ; GFX89-SDAG-NEXT: v_log_f32_e32 v0, v0
2029 ; GFX89-SDAG-NEXT: s_setpc_b64 s[30:31]
2030 ; GFX89-GISEL-LABEL: v_exp2_f32_from_fpext_bf16:
2031 ; GFX89-GISEL: ; %bb.0:
2032 ; GFX89-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2033 ; GFX89-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2034 ; GFX89-GISEL-NEXT: v_log_f32_e32 v0, v0
2035 ; GFX89-GISEL-NEXT: s_setpc_b64 s[30:31]
2036 %fpext = fpext bfloat %src to float
2037 %result = call float @llvm.exp2.f32(float %fpext)
2041 ; FIXME: Fold out fp16_to_fp (FP_TO_FP16) on no-f16 targets
2042 define half @v_exp2_f16(half %in) {
2043 ; SI-SDAG-LABEL: v_exp2_f16:
2045 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2046 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2047 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2048 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2049 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2050 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2051 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2053 ; SI-GISEL-LABEL: v_exp2_f16:
2054 ; SI-GISEL: ; %bb.0:
2055 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2056 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2057 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2058 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2059 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2061 ; VI-LABEL: v_exp2_f16:
2063 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2064 ; VI-NEXT: v_exp_f16_e32 v0, v0
2065 ; VI-NEXT: s_setpc_b64 s[30:31]
2067 ; GFX900-LABEL: v_exp2_f16:
2069 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070 ; GFX900-NEXT: v_exp_f16_e32 v0, v0
2071 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2073 ; R600-LABEL: v_exp2_f16:
2078 ; CM-LABEL: v_exp2_f16:
2082 ; GFX89-LABEL: v_exp2_f16:
2084 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2085 ; GFX89-NEXT: v_log_f16_e32 v0, v0
2086 ; GFX89-NEXT: s_setpc_b64 s[30:31]
2087 %result = call half @llvm.exp2.f16(half %in)
2091 define half @v_exp2_fabs_f16(half %in) {
2092 ; SI-SDAG-LABEL: v_exp2_fabs_f16:
2094 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2095 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2096 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
2097 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2098 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2099 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2100 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2102 ; SI-GISEL-LABEL: v_exp2_fabs_f16:
2103 ; SI-GISEL: ; %bb.0:
2104 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2105 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
2106 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2107 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2108 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2110 ; VI-LABEL: v_exp2_fabs_f16:
2112 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2113 ; VI-NEXT: v_exp_f16_e64 v0, |v0|
2114 ; VI-NEXT: s_setpc_b64 s[30:31]
2116 ; GFX900-LABEL: v_exp2_fabs_f16:
2118 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2119 ; GFX900-NEXT: v_exp_f16_e64 v0, |v0|
2120 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2122 ; R600-LABEL: v_exp2_fabs_f16:
2127 ; CM-LABEL: v_exp2_fabs_f16:
2131 ; GFX89-LABEL: v_exp2_fabs_f16:
2133 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2134 ; GFX89-NEXT: v_log_f16_e64 v0, |v0|
2135 ; GFX89-NEXT: s_setpc_b64 s[30:31]
2136 %fabs = call half @llvm.fabs.f16(half %in)
2137 %result = call half @llvm.exp2.f16(half %fabs)
2141 define half @v_exp2_fneg_fabs_f16(half %in) {
2142 ; SI-SDAG-LABEL: v_exp2_fneg_fabs_f16:
2144 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2145 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2146 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
2147 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2148 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2149 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2150 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2152 ; SI-GISEL-LABEL: v_exp2_fneg_fabs_f16:
2153 ; SI-GISEL: ; %bb.0:
2154 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2155 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
2156 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2157 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2158 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2160 ; VI-LABEL: v_exp2_fneg_fabs_f16:
2162 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2163 ; VI-NEXT: v_exp_f16_e64 v0, -|v0|
2164 ; VI-NEXT: s_setpc_b64 s[30:31]
2166 ; GFX900-LABEL: v_exp2_fneg_fabs_f16:
2168 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169 ; GFX900-NEXT: v_exp_f16_e64 v0, -|v0|
2170 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2172 ; R600-LABEL: v_exp2_fneg_fabs_f16:
2177 ; CM-LABEL: v_exp2_fneg_fabs_f16:
2181 ; GFX89-LABEL: v_exp2_fneg_fabs_f16:
2183 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2184 ; GFX89-NEXT: v_log_f16_e64 v0, -|v0|
2185 ; GFX89-NEXT: s_setpc_b64 s[30:31]
2186 %fabs = call half @llvm.fabs.f16(half %in)
2187 %fneg.fabs = fneg half %fabs
2188 %result = call half @llvm.exp2.f16(half %fneg.fabs)
2192 define half @v_exp2_fneg_f16(half %in) {
2193 ; SI-SDAG-LABEL: v_exp2_fneg_f16:
2195 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2196 ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
2197 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2198 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2199 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2200 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2201 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2203 ; SI-GISEL-LABEL: v_exp2_fneg_f16:
2204 ; SI-GISEL: ; %bb.0:
2205 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2206 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
2207 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2208 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2209 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2211 ; VI-LABEL: v_exp2_fneg_f16:
2213 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2214 ; VI-NEXT: v_exp_f16_e64 v0, -v0
2215 ; VI-NEXT: s_setpc_b64 s[30:31]
2217 ; GFX900-LABEL: v_exp2_fneg_f16:
2219 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2220 ; GFX900-NEXT: v_exp_f16_e64 v0, -v0
2221 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2223 ; R600-LABEL: v_exp2_fneg_f16:
2228 ; CM-LABEL: v_exp2_fneg_f16:
2232 ; GFX89-LABEL: v_exp2_fneg_f16:
2234 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2235 ; GFX89-NEXT: v_log_f16_e64 v0, -v0
2236 ; GFX89-NEXT: s_setpc_b64 s[30:31]
2237 %fneg = fneg half %in
2238 %result = call half @llvm.exp2.f16(half %fneg)
2242 define half @v_exp2_f16_fast(half %in) {
2243 ; SI-SDAG-LABEL: v_exp2_f16_fast:
2245 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2246 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2247 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2248 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2249 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2250 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2251 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2253 ; SI-GISEL-LABEL: v_exp2_f16_fast:
2254 ; SI-GISEL: ; %bb.0:
2255 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2256 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2257 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2258 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2259 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2261 ; VI-LABEL: v_exp2_f16_fast:
2263 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2264 ; VI-NEXT: v_exp_f16_e32 v0, v0
2265 ; VI-NEXT: s_setpc_b64 s[30:31]
2267 ; GFX900-LABEL: v_exp2_f16_fast:
2269 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2270 ; GFX900-NEXT: v_exp_f16_e32 v0, v0
2271 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2273 ; R600-LABEL: v_exp2_f16_fast:
2278 ; CM-LABEL: v_exp2_f16_fast:
2282 ; GFX89-LABEL: v_exp2_f16_fast:
2284 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2285 ; GFX89-NEXT: v_log_f16_e32 v0, v0
2286 ; GFX89-NEXT: s_setpc_b64 s[30:31]
2287 %result = call fast half @llvm.exp2.f16(half %in)
2291 define <2 x half> @v_exp2_v2f16(<2 x half> %in) {
2292 ; SI-SDAG-LABEL: v_exp2_v2f16:
2294 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2295 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2296 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2297 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2298 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2299 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2300 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2301 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2302 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2303 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2304 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2305 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2307 ; SI-GISEL-LABEL: v_exp2_v2f16:
2308 ; SI-GISEL: ; %bb.0:
2309 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2310 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2311 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
2312 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2313 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2314 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2315 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
2316 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2318 ; VI-SDAG-LABEL: v_exp2_v2f16:
2320 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2321 ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2322 ; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
2323 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
2324 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2326 ; VI-GISEL-LABEL: v_exp2_v2f16:
2327 ; VI-GISEL: ; %bb.0:
2328 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2329 ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v0
2330 ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2331 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
2332 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2334 ; GFX900-SDAG-LABEL: v_exp2_v2f16:
2335 ; GFX900-SDAG: ; %bb.0:
2336 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2337 ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2338 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
2339 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
2340 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2342 ; GFX900-GISEL-LABEL: v_exp2_v2f16:
2343 ; GFX900-GISEL: ; %bb.0:
2344 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2345 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v0
2346 ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2347 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2348 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2350 ; R600-LABEL: v_exp2_v2f16:
2355 ; CM-LABEL: v_exp2_v2f16:
2359 %result = call <2 x half> @llvm.exp2.v2f16(<2 x half> %in)
2360 ret <2 x half> %result
2363 define <2 x half> @v_exp2_fabs_v2f16(<2 x half> %in) {
2364 ; SI-SDAG-LABEL: v_exp2_fabs_v2f16:
2366 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2367 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2368 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2369 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
2370 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
2371 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2372 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2373 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2374 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2375 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2376 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2377 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2379 ; SI-GISEL-LABEL: v_exp2_fabs_v2f16:
2380 ; SI-GISEL: ; %bb.0:
2381 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2382 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2383 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2384 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
2385 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
2386 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2387 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2388 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
2389 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2390 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2391 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2392 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
2393 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2395 ; VI-SDAG-LABEL: v_exp2_fabs_v2f16:
2397 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2398 ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, |v0| dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2399 ; VI-SDAG-NEXT: v_exp_f16_e64 v0, |v0|
2400 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
2401 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2403 ; VI-GISEL-LABEL: v_exp2_fabs_v2f16:
2404 ; VI-GISEL: ; %bb.0:
2405 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2406 ; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
2407 ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v0
2408 ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2409 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
2410 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2412 ; GFX900-SDAG-LABEL: v_exp2_fabs_v2f16:
2413 ; GFX900-SDAG: ; %bb.0:
2414 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2415 ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2416 ; GFX900-SDAG-NEXT: v_exp_f16_e64 v0, |v0|
2417 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
2418 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2420 ; GFX900-GISEL-LABEL: v_exp2_fabs_v2f16:
2421 ; GFX900-GISEL: ; %bb.0:
2422 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2423 ; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
2424 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v0
2425 ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2426 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2427 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2429 ; R600-LABEL: v_exp2_fabs_v2f16:
2434 ; CM-LABEL: v_exp2_fabs_v2f16:
2438 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
2439 %result = call <2 x half> @llvm.exp2.v2f16(<2 x half> %fabs)
2440 ret <2 x half> %result
2443 define <2 x half> @v_exp2_fneg_fabs_v2f16(<2 x half> %in) {
2444 ; SI-SDAG-LABEL: v_exp2_fneg_fabs_v2f16:
2446 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2447 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2448 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2449 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2450 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
2451 ; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2452 ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2453 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2454 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2455 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2456 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2457 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2458 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2459 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2460 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2461 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2463 ; SI-GISEL-LABEL: v_exp2_fneg_fabs_v2f16:
2464 ; SI-GISEL: ; %bb.0:
2465 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2466 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2467 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2468 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
2469 ; SI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2470 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2471 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2472 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
2473 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2474 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2475 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2476 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
2477 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2479 ; VI-SDAG-LABEL: v_exp2_fneg_fabs_v2f16:
2481 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2482 ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, -|v0| dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2483 ; VI-SDAG-NEXT: v_exp_f16_e64 v0, -|v0|
2484 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
2485 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2487 ; VI-GISEL-LABEL: v_exp2_fneg_fabs_v2f16:
2488 ; VI-GISEL: ; %bb.0:
2489 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2490 ; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2491 ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v0
2492 ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2493 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
2494 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2496 ; GFX900-SDAG-LABEL: v_exp2_fneg_fabs_v2f16:
2497 ; GFX900-SDAG: ; %bb.0:
2498 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2499 ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2500 ; GFX900-SDAG-NEXT: v_exp_f16_e64 v0, -|v0|
2501 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
2502 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2504 ; GFX900-GISEL-LABEL: v_exp2_fneg_fabs_v2f16:
2505 ; GFX900-GISEL: ; %bb.0:
2506 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2507 ; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
2508 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v0
2509 ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2510 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2511 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2513 ; R600-LABEL: v_exp2_fneg_fabs_v2f16:
2518 ; CM-LABEL: v_exp2_fneg_fabs_v2f16:
2522 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
2523 %fneg.fabs = fneg <2 x half> %fabs
2524 %result = call <2 x half> @llvm.exp2.v2f16(<2 x half> %fneg.fabs)
2525 ret <2 x half> %result
2528 define <2 x half> @v_exp2_fneg_v2f16(<2 x half> %in) {
2529 ; SI-SDAG-LABEL: v_exp2_fneg_v2f16:
2531 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2532 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2533 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2534 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2535 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
2536 ; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2537 ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2538 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2539 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2540 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2541 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2542 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2543 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2544 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2545 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2546 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2548 ; SI-GISEL-LABEL: v_exp2_fneg_v2f16:
2549 ; SI-GISEL: ; %bb.0:
2550 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2551 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2552 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2553 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
2554 ; SI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2555 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2556 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2557 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
2558 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2559 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2560 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2561 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
2562 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2564 ; VI-SDAG-LABEL: v_exp2_fneg_v2f16:
2566 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2567 ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2568 ; VI-SDAG-NEXT: v_exp_f16_e64 v0, -v0
2569 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
2570 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2572 ; VI-GISEL-LABEL: v_exp2_fneg_v2f16:
2573 ; VI-GISEL: ; %bb.0:
2574 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2575 ; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2576 ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v0
2577 ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2578 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
2579 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2581 ; GFX900-SDAG-LABEL: v_exp2_fneg_v2f16:
2582 ; GFX900-SDAG: ; %bb.0:
2583 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2584 ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2585 ; GFX900-SDAG-NEXT: v_exp_f16_e64 v0, -v0
2586 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
2587 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2589 ; GFX900-GISEL-LABEL: v_exp2_fneg_v2f16:
2590 ; GFX900-GISEL: ; %bb.0:
2591 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2592 ; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
2593 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v0
2594 ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2595 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2596 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2598 ; R600-LABEL: v_exp2_fneg_v2f16:
2603 ; CM-LABEL: v_exp2_fneg_v2f16:
2607 %fneg = fneg <2 x half> %in
2608 %result = call <2 x half> @llvm.exp2.v2f16(<2 x half> %fneg)
2609 ret <2 x half> %result
2612 define <2 x half> @v_exp2_v2f16_fast(<2 x half> %in) {
2613 ; SI-SDAG-LABEL: v_exp2_v2f16_fast:
2615 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2616 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2617 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2618 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2619 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2620 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2621 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2622 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2623 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2624 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2625 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2626 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2628 ; SI-GISEL-LABEL: v_exp2_v2f16_fast:
2629 ; SI-GISEL: ; %bb.0:
2630 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2631 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2632 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
2633 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2634 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2635 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2636 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
2637 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2639 ; VI-SDAG-LABEL: v_exp2_v2f16_fast:
2641 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2642 ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2643 ; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
2644 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
2645 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2647 ; VI-GISEL-LABEL: v_exp2_v2f16_fast:
2648 ; VI-GISEL: ; %bb.0:
2649 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2650 ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v0
2651 ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2652 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
2653 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2655 ; GFX900-SDAG-LABEL: v_exp2_v2f16_fast:
2656 ; GFX900-SDAG: ; %bb.0:
2657 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2658 ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2659 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
2660 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
2661 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2663 ; GFX900-GISEL-LABEL: v_exp2_v2f16_fast:
2664 ; GFX900-GISEL: ; %bb.0:
2665 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2666 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v0
2667 ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2668 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2669 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2671 ; R600-LABEL: v_exp2_v2f16_fast:
2676 ; CM-LABEL: v_exp2_v2f16_fast:
2680 %result = call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %in)
2681 ret <2 x half> %result
2684 define <3 x half> @v_exp_v3f16(<3 x half> %in) {
2685 ; SI-SDAG-LABEL: v_exp_v3f16:
2687 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2688 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2689 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2690 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
2691 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2692 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2693 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
2694 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2695 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2696 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
2697 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2698 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2699 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
2700 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2701 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2702 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
2703 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2705 ; SI-GISEL-LABEL: v_exp_v3f16:
2706 ; SI-GISEL: ; %bb.0:
2707 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2708 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2709 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
2710 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
2711 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2712 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2713 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
2714 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2715 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
2716 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
2717 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2719 ; VI-SDAG-LABEL: v_exp_v3f16:
2721 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2722 ; VI-SDAG-NEXT: v_exp_f16_sdwa v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2723 ; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
2724 ; VI-SDAG-NEXT: v_exp_f16_e32 v1, v1
2725 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
2726 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2728 ; VI-GISEL-LABEL: v_exp_v3f16:
2729 ; VI-GISEL: ; %bb.0:
2730 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2731 ; VI-GISEL-NEXT: v_exp_f16_e32 v2, v0
2732 ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2733 ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
2734 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
2735 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2737 ; GFX900-SDAG-LABEL: v_exp_v3f16:
2738 ; GFX900-SDAG: ; %bb.0:
2739 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2740 ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2741 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
2742 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1
2743 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
2744 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2746 ; GFX900-GISEL-LABEL: v_exp_v3f16:
2747 ; GFX900-GISEL: ; %bb.0:
2748 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2749 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v0
2750 ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2751 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
2752 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
2753 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2755 ; R600-LABEL: v_exp_v3f16:
2760 ; CM-LABEL: v_exp_v3f16:
2764 %result = call <3 x half> @llvm.exp2.v3f16(<3 x half> %in)
2765 ret <3 x half> %result
2768 define <3 x half> @v_exp2_v3f16_afn(<3 x half> %in) {
2769 ; SI-SDAG-LABEL: v_exp2_v3f16_afn:
2771 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2772 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2773 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2774 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
2775 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2776 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2777 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
2778 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
2779 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2780 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
2781 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2782 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
2783 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
2784 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
2785 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
2786 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
2787 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2789 ; SI-GISEL-LABEL: v_exp2_v3f16_afn:
2790 ; SI-GISEL: ; %bb.0:
2791 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2792 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2793 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
2794 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
2795 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2796 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2797 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
2798 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
2799 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
2800 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
2801 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2803 ; VI-SDAG-LABEL: v_exp2_v3f16_afn:
2805 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2806 ; VI-SDAG-NEXT: v_exp_f16_sdwa v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2807 ; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
2808 ; VI-SDAG-NEXT: v_exp_f16_e32 v1, v1
2809 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
2810 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2812 ; VI-GISEL-LABEL: v_exp2_v3f16_afn:
2813 ; VI-GISEL: ; %bb.0:
2814 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2815 ; VI-GISEL-NEXT: v_exp_f16_e32 v2, v0
2816 ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
2817 ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
2818 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
2819 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2821 ; GFX900-SDAG-LABEL: v_exp2_v3f16_afn:
2822 ; GFX900-SDAG: ; %bb.0:
2823 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2824 ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2825 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
2826 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1
2827 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
2828 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2830 ; GFX900-GISEL-LABEL: v_exp2_v3f16_afn:
2831 ; GFX900-GISEL: ; %bb.0:
2832 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2833 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v0
2834 ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2835 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
2836 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
2837 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2839 ; R600-LABEL: v_exp2_v3f16_afn:
2844 ; CM-LABEL: v_exp2_v3f16_afn:
2848 %result = call afn <3 x half> @llvm.exp2.v3f16(<3 x half> %in)
2849 ret <3 x half> %result
2852 define float @v_exp2_f32_contract(float %in) {
2853 ; GCN-SDAG-LABEL: v_exp2_f32_contract:
2854 ; GCN-SDAG: ; %bb.0:
2855 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2856 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
2857 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2858 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
2859 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
2860 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
2861 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
2862 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
2863 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2864 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
2865 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
2867 ; GCN-GISEL-LABEL: v_exp2_f32_contract:
2868 ; GCN-GISEL: ; %bb.0:
2869 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2870 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
2871 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
2872 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
2873 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
2874 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
2875 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
2876 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
2877 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2878 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
2879 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
2881 ; R600-LABEL: v_exp2_f32_contract:
2886 ; CM-LABEL: v_exp2_f32_contract:
2890 %result = call contract float @llvm.exp2.f32(float %in)
2894 define float @v_exp2_f32_contract_daz(float %in) #0 {
2895 ; GCN-LABEL: v_exp2_f32_contract_daz:
2897 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2898 ; GCN-NEXT: v_exp_f32_e32 v0, v0
2899 ; GCN-NEXT: s_setpc_b64 s[30:31]
2901 ; R600-LABEL: v_exp2_f32_contract_daz:
2906 ; CM-LABEL: v_exp2_f32_contract_daz:
2910 %result = call contract float @llvm.exp2.f32(float %in)
2914 define float @v_exp2_f32_contract_nnan_ninf(float %in) {
2915 ; GCN-SDAG-LABEL: v_exp2_f32_contract_nnan_ninf:
2916 ; GCN-SDAG: ; %bb.0:
2917 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2918 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
2919 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2920 ; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
2921 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
2922 ; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
2923 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
2924 ; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
2925 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2926 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
2927 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
2929 ; GCN-GISEL-LABEL: v_exp2_f32_contract_nnan_ninf:
2930 ; GCN-GISEL: ; %bb.0:
2931 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2932 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
2933 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
2934 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
2935 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
2936 ; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
2937 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
2938 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
2939 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2940 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
2941 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
2943 ; R600-LABEL: v_exp2_f32_contract_nnan_ninf:
2948 ; CM-LABEL: v_exp2_f32_contract_nnan_ninf:
2952 %result = call contract nnan ninf float @llvm.exp2.f32(float %in)
2956 declare float @llvm.fabs.f32(float) #2
2957 declare float @llvm.exp2.f32(float) #2
2958 declare <2 x float> @llvm.exp2.v2f32(<2 x float>) #2
2959 declare <3 x float> @llvm.exp2.v3f32(<3 x float>) #2
2960 declare <4 x float> @llvm.exp2.v4f32(<4 x float>) #2
2961 declare half @llvm.fabs.f16(half) #2
2962 declare half @llvm.exp2.f16(half) #2
2963 declare <2 x half> @llvm.exp2.v2f16(<2 x half>) #2
2964 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
2965 declare <3 x half> @llvm.exp2.v3f16(<3 x half>) #2
2967 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
2968 attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
2969 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
2970 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: