1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,VI,VI-SDAG %s
3 ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,VI,VI-GISEL %s
4 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,GFX900,GFX900-SDAG %s
5 ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,GFX900,GFX900-GISEL %s
6 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s
7 ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-GISEL %s
9 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
10 ; RUN: llc -mtriple=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
12 define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
13 ; VI-SDAG-LABEL: s_exp_f32:
15 ; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
16 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
17 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
18 ; VI-SDAG-NEXT: s_and_b32 s0, s2, 0xfffff000
19 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s0
20 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, s2, v1
21 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1
22 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
23 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0
24 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
25 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x39a3b295
26 ; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v0
27 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, s0, v3
28 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
29 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
30 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
31 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
32 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v2
33 ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
34 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
35 ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
36 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
37 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1
38 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218
39 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
40 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1
41 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
42 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
43 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
44 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
45 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
46 ; VI-SDAG-NEXT: s_endpgm
48 ; VI-GISEL-LABEL: s_exp_f32:
50 ; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
51 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
52 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x39a3b295
53 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
54 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000
55 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
56 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2
57 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v2
58 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
59 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
60 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
61 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s0, v1
62 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
63 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v0
64 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
65 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
66 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
67 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
68 ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
69 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
70 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
71 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
72 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1
73 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
74 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
75 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
76 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
77 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
78 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
79 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
80 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
81 ; VI-GISEL-NEXT: s_endpgm
83 ; GFX900-SDAG-LABEL: s_exp_f32:
84 ; GFX900-SDAG: ; %bb.0:
85 ; GFX900-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
86 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
87 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
88 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
89 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
90 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0
91 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
92 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v2
93 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
94 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0
95 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
96 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3
97 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
98 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x7f800000
99 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
100 ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
101 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
102 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1
103 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218
104 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
105 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1
106 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
107 ; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1]
108 ; GFX900-SDAG-NEXT: s_endpgm
110 ; GFX900-GISEL-LABEL: s_exp_f32:
111 ; GFX900-GISEL: ; %bb.0:
112 ; GFX900-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
113 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
114 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
115 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
116 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
117 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0
118 ; GFX900-GISEL-NEXT: v_fma_f32 v0, s2, v0, -v2
119 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
120 ; GFX900-GISEL-NEXT: v_fma_f32 v0, s2, v1, v0
121 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3
122 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
123 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3
124 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
125 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
126 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2
127 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
128 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
129 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
130 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
131 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
132 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
133 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
134 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
135 ; GFX900-GISEL-NEXT: s_endpgm
137 ; SI-SDAG-LABEL: s_exp_f32:
139 ; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb
140 ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
141 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
142 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
143 ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
144 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
145 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
146 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
147 ; SI-SDAG-NEXT: v_fma_f32 v0, s6, v0, -v2
148 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
149 ; SI-SDAG-NEXT: v_fma_f32 v0, s6, v1, v0
150 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
151 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
152 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3
153 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
154 ; SI-SDAG-NEXT: s_mov_b32 s2, -1
155 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
156 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
157 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v1
158 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218
159 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
160 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v1
161 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
162 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
163 ; SI-SDAG-NEXT: s_endpgm
165 ; SI-GISEL-LABEL: s_exp_f32:
167 ; SI-GISEL-NEXT: s_load_dword s2, s[4:5], 0xb
168 ; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
169 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
170 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
171 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
172 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
173 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0
174 ; SI-GISEL-NEXT: v_fma_f32 v0, s2, v0, -v2
175 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
176 ; SI-GISEL-NEXT: v_fma_f32 v0, s2, v1, v0
177 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3
178 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
179 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3
180 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
181 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
182 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2
183 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
184 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
185 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
186 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
187 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
188 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
189 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
190 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
191 ; SI-GISEL-NEXT: s_endpgm
193 ; R600-LABEL: s_exp_f32:
195 ; R600-NEXT: ALU 59, @4, KC0[CB0:0-32], KC1[]
196 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
199 ; R600-NEXT: ALU clause starting at 4:
200 ; R600-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
201 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
202 ; R600-NEXT: ADD T1.W, KC0[2].Z, -PV.W,
203 ; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
204 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
205 ; R600-NEXT: RNDNE T3.W, PS,
206 ; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
207 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
208 ; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
209 ; R600-NEXT: TRUNC * T4.W, PV.W,
210 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
211 ; R600-NEXT: FLT_TO_INT T0.Z, PS,
212 ; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
213 ; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
214 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
215 ; R600-NEXT: ADD T1.Z, PS, PV.W,
216 ; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x,
217 ; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y,
218 ; R600-NEXT: -330(nan), 381(5.338947e-43)
219 ; R600-NEXT: ADD_INT T0.X, PS, literal.x,
220 ; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y,
221 ; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
222 ; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w,
223 ; R600-NEXT: EXP_IEEE * T1.X, PV.Z,
224 ; R600-NEXT: -254(nan), 204(2.858649e-43)
225 ; R600-NEXT: 102(1.429324e-43), -229(nan)
226 ; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x,
227 ; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y,
228 ; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
229 ; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x,
230 ; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z,
231 ; R600-NEXT: -127(nan), 254(3.559298e-43)
232 ; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
233 ; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
234 ; R600-NEXT: MUL_IEEE T0.Y, T1.X, literal.y,
235 ; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
236 ; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X,
237 ; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z,
238 ; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
239 ; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
240 ; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
241 ; R600-NEXT: MUL_IEEE T3.W, PV.Y, literal.x,
242 ; R600-NEXT: CNDE_INT * T0.W, T0.W, PV.X, T2.W,
243 ; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
244 ; R600-NEXT: CNDE_INT T1.Z, T1.W, PS, T1.X,
245 ; R600-NEXT: CNDE_INT T0.W, T1.Y, T0.Y, PV.W,
246 ; R600-NEXT: LSHL * T1.W, PV.Z, literal.x,
247 ; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
248 ; R600-NEXT: ADD_INT T1.W, PS, literal.x,
249 ; R600-NEXT: CNDE_INT * T0.W, T4.W, PV.Z, PV.W,
250 ; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
251 ; R600-NEXT: MUL_IEEE T0.W, PS, PV.W,
252 ; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].Z,
253 ; R600-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
254 ; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0,
255 ; R600-NEXT: SETGT * T1.W, KC0[2].Z, literal.x,
256 ; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
257 ; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x,
258 ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
259 ; R600-NEXT: 2139095040(INF), 2(2.802597e-45)
261 ; CM-LABEL: s_exp_f32:
263 ; CM-NEXT: ALU 62, @4, KC0[CB0:0-32], KC1[]
264 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
267 ; CM-NEXT: ALU clause starting at 4:
268 ; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
269 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
270 ; CM-NEXT: ADD * T1.W, KC0[2].Z, -PV.W,
271 ; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
272 ; CM-NEXT: MUL_IEEE * T2.W, T0.W, literal.y,
273 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
274 ; CM-NEXT: RNDNE T1.Z, PV.W,
275 ; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
276 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
277 ; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.x, PV.W,
278 ; CM-NEXT: ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212
279 ; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
280 ; CM-NEXT: TRUNC T1.Z, T1.Z,
281 ; CM-NEXT: ADD * T0.W, PV.W, PV.Z,
282 ; CM-NEXT: EXP_IEEE T0.X, T0.W,
283 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
284 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
285 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
286 ; CM-NEXT: FLT_TO_INT T0.Z, T1.Z,
287 ; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.x,
288 ; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
289 ; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
290 ; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.y,
291 ; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z,
292 ; CM-NEXT: 209715200(1.972152e-31), -330(nan)
293 ; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
294 ; CM-NEXT: ADD_INT T1.X, PV.W, literal.x,
295 ; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.y,
296 ; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
297 ; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w,
298 ; CM-NEXT: -254(nan), 204(2.858649e-43)
299 ; CM-NEXT: 102(1.429324e-43), -229(nan)
300 ; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x,
301 ; CM-NEXT: SETGT_UINT T2.Y, T0.Z, literal.y,
302 ; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
303 ; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x,
304 ; CM-NEXT: -127(nan), 254(3.559298e-43)
305 ; CM-NEXT: MUL_IEEE T3.X, T0.X, literal.x,
306 ; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, T0.Z,
307 ; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.X,
308 ; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.y,
309 ; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43)
310 ; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
311 ; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.x,
312 ; CM-NEXT: CNDE_INT * T0.W, T1.W, T0.Y, T0.W,
313 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
314 ; CM-NEXT: CNDE_INT T0.Y, T2.W, PV.W, T0.X,
315 ; CM-NEXT: CNDE_INT T0.Z, T2.Y, T3.X, PV.Z,
316 ; CM-NEXT: LSHL * T0.W, PV.Y, literal.x,
317 ; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
318 ; CM-NEXT: ADD_INT T1.Z, PV.W, literal.x,
319 ; CM-NEXT: CNDE_INT * T0.W, T3.W, PV.Y, PV.Z,
320 ; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
321 ; CM-NEXT: MUL_IEEE T0.Z, PV.W, PV.Z,
322 ; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
323 ; CM-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
324 ; CM-NEXT: CNDE T0.Z, PV.W, PV.Z, 0.0,
325 ; CM-NEXT: SETGT * T0.W, KC0[2].Z, literal.x,
326 ; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
327 ; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
328 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
329 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
330 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
331 %result = call float @llvm.exp.f32(float %in)
332 store float %result, ptr addrspace(1) %out
336 ; FIXME: We should be able to merge these packets together on Cayman so we
337 ; have a maximum of 4 instructions.
338 define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
339 ; VI-SDAG-LABEL: s_exp_v2f32:
341 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
342 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
343 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
344 ; VI-SDAG-NEXT: s_and_b32 s4, s3, 0xfffff000
345 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4
346 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, s3, v2
347 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
348 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
349 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
350 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295
351 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v0
352 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v4
353 ; VI-SDAG-NEXT: s_and_b32 s4, s2, 0xfffff000
354 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
355 ; VI-SDAG-NEXT: v_mov_b32_e32 v6, s4
356 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
357 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2
358 ; VI-SDAG-NEXT: v_sub_f32_e32 v6, s2, v6
359 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
360 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
361 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v6
362 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6
363 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
364 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
365 ; VI-SDAG-NEXT: v_rndne_f32_e32 v5, v0
366 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v6, v7
367 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, s4, v4
368 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5
369 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
370 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
371 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
372 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v5
373 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
374 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
375 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v2
376 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42b17218
377 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
378 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x7f800000
379 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3
380 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
381 ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4
382 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v2
383 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
384 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3
385 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1
386 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
387 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0
388 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
389 ; VI-SDAG-NEXT: s_endpgm
391 ; VI-GISEL-LABEL: s_exp_v2f32:
393 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
394 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
395 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x39a3b295
396 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
397 ; VI-GISEL-NEXT: s_and_b32 s4, s2, 0xfffff000
398 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
399 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2
400 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
401 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
402 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, s4, v0
403 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
404 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, s4, v1
405 ; VI-GISEL-NEXT: s_and_b32 s4, s3, 0xfffff000
406 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s4
407 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v4, v2
408 ; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3
409 ; VI-GISEL-NEXT: v_sub_f32_e32 v5, s3, v5
410 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
411 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x39a3b295, v5
412 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8a000, v5
413 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
414 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
415 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
416 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
417 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
418 ; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
419 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
420 ; VI-GISEL-NEXT: v_rndne_f32_e32 v5, v0
421 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5
422 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
423 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v5
424 ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v0
425 ; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3
426 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
427 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3
428 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
429 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
430 ; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
431 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4
432 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
433 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v5, v1
434 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v3
435 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
436 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v4
437 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
438 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
439 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
440 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
441 ; VI-GISEL-NEXT: s_endpgm
443 ; GFX900-SDAG-LABEL: s_exp_v2f32:
444 ; GFX900-SDAG: ; %bb.0:
445 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
446 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
447 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
448 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
449 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
450 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0
451 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
452 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2
453 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
454 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4
455 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s2, v0
456 ; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
457 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6
458 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v6
459 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
460 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
461 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7
462 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0
463 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v8, v0
464 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
465 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v7
466 ; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3
467 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5
468 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42b17218
469 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
470 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000
471 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3
472 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
473 ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v6
474 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5
475 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
476 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3
477 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
478 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
479 ; GFX900-SDAG-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
480 ; GFX900-SDAG-NEXT: s_endpgm
482 ; GFX900-GISEL-LABEL: s_exp_v2f32:
483 ; GFX900-GISEL: ; %bb.0:
484 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
485 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
486 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
487 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
488 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
489 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0
490 ; GFX900-GISEL-NEXT: v_fma_f32 v3, s2, v0, -v2
491 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2
492 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s3, v0
493 ; GFX900-GISEL-NEXT: v_fma_f32 v3, s2, v1, v3
494 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
495 ; GFX900-GISEL-NEXT: v_fma_f32 v0, s3, v0, -v5
496 ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
497 ; GFX900-GISEL-NEXT: v_fma_f32 v0, s3, v1, v0
498 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v5
499 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
500 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
501 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v5, v5, v1
502 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v5, v0
503 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
504 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v0
505 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
506 ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v2, v3
507 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4
508 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218
509 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
510 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3
511 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
512 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v5, v1
513 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4
514 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
515 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3
516 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
517 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
518 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
519 ; GFX900-GISEL-NEXT: s_endpgm
521 ; SI-SDAG-LABEL: s_exp_v2f32:
523 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
524 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
525 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
526 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
527 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
528 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
529 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0
530 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
531 ; SI-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2
532 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
533 ; SI-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4
534 ; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
535 ; SI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v0
536 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
537 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
538 ; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5
539 ; SI-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v5
540 ; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6
541 ; SI-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0
542 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v7, v0
543 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
544 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v5, v6
545 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3
546 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
547 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v3
548 ; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42b17218
549 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
550 ; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x7f800000
551 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v4
552 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc
553 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v5
554 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3
555 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
556 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v4
557 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
558 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
559 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
560 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
561 ; SI-SDAG-NEXT: s_endpgm
563 ; SI-GISEL-LABEL: s_exp_v2f32:
565 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
566 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
567 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
568 ; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
569 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
570 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0
571 ; SI-GISEL-NEXT: v_fma_f32 v3, s2, v0, -v2
572 ; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2
573 ; SI-GISEL-NEXT: v_mul_f32_e32 v5, s3, v0
574 ; SI-GISEL-NEXT: v_fma_f32 v3, s2, v1, v3
575 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
576 ; SI-GISEL-NEXT: v_fma_f32 v0, s3, v0, -v5
577 ; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
578 ; SI-GISEL-NEXT: v_fma_f32 v0, s3, v1, v0
579 ; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v5
580 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
581 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
582 ; SI-GISEL-NEXT: v_sub_f32_e32 v5, v5, v1
583 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v5, v0
584 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
585 ; SI-GISEL-NEXT: v_exp_f32_e32 v5, v0
586 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
587 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3
588 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4
589 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218
590 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
591 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3
592 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
593 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v5, v1
594 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4
595 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
596 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3
597 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
598 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
599 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
600 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
601 ; SI-GISEL-NEXT: s_endpgm
603 ; R600-LABEL: s_exp_v2f32:
605 ; R600-NEXT: ALU 96, @4, KC0[CB0:0-32], KC1[]
606 ; R600-NEXT: ALU 12, @101, KC0[CB0:0-32], KC1[]
607 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
609 ; R600-NEXT: ALU clause starting at 4:
610 ; R600-NEXT: AND_INT * T0.W, KC0[3].X, literal.x,
611 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
612 ; R600-NEXT: ADD * T1.W, KC0[3].X, -PV.W,
613 ; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.x,
614 ; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.y,
615 ; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
616 ; R600-NEXT: RNDNE T0.Z, PS,
617 ; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
618 ; R600-NEXT: AND_INT * T2.W, KC0[2].W, literal.y,
619 ; R600-NEXT: 1069064192(1.442383e+00), -4096(nan)
620 ; R600-NEXT: ADD T1.Z, KC0[2].W, -PS,
621 ; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
622 ; R600-NEXT: ADD * T1.W, T3.W, -PV.Z,
623 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
624 ; R600-NEXT: ADD T2.Z, PS, PV.W,
625 ; R600-NEXT: MUL_IEEE T0.W, PV.Z, literal.x,
626 ; R600-NEXT: MUL_IEEE * T1.W, T2.W, literal.y,
627 ; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
628 ; R600-NEXT: RNDNE T0.Y, PS,
629 ; R600-NEXT: MULADD_IEEE T1.Z, T1.Z, literal.x, PV.W,
630 ; R600-NEXT: TRUNC T0.W, T0.Z, BS:VEC_120/SCL_212
631 ; R600-NEXT: EXP_IEEE * T0.X, PV.Z,
632 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
633 ; R600-NEXT: FLT_TO_INT T1.Y, PV.W,
634 ; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x,
635 ; R600-NEXT: MULADD_IEEE T0.W, T2.W, literal.y, PV.Z,
636 ; R600-NEXT: ADD * T1.W, T1.W, -PV.Y,
637 ; R600-NEXT: 209715200(1.972152e-31), 967029397(3.122284e-04)
638 ; R600-NEXT: ADD T1.Z, PS, PV.W,
639 ; R600-NEXT: MUL_IEEE T0.W, PV.Z, literal.x,
640 ; R600-NEXT: SETGT_UINT * T1.W, PV.Y, literal.y,
641 ; R600-NEXT: 209715200(1.972152e-31), -229(nan)
642 ; R600-NEXT: CNDE_INT T0.Z, PS, PV.W, T0.Z,
643 ; R600-NEXT: SETGT_INT T0.W, T1.Y, literal.x,
644 ; R600-NEXT: EXP_IEEE * T1.X, PV.Z,
645 ; R600-NEXT: -127(nan), 0(0.000000e+00)
646 ; R600-NEXT: CNDE_INT T0.Z, PV.W, PV.Z, T0.X,
647 ; R600-NEXT: MAX_INT T2.W, T1.Y, literal.x,
648 ; R600-NEXT: MUL_IEEE * T3.W, PS, literal.y,
649 ; R600-NEXT: -330(nan), 209715200(1.972152e-31)
650 ; R600-NEXT: MUL_IEEE T2.X, PS, literal.x,
651 ; R600-NEXT: ADD_INT T2.Y, PV.W, literal.y,
652 ; R600-NEXT: ADD_INT T1.Z, T1.Y, literal.z,
653 ; R600-NEXT: MIN_INT T2.W, T1.Y, literal.w,
654 ; R600-NEXT: TRUNC * T4.W, T0.Y,
655 ; R600-NEXT: 209715200(1.972152e-31), 204(2.858649e-43)
656 ; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43)
657 ; R600-NEXT: FLT_TO_INT T3.X, PS,
658 ; R600-NEXT: ADD_INT T0.Y, PV.W, literal.x,
659 ; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.y,
660 ; R600-NEXT: SETGT_UINT T2.W, T1.Y, literal.z,
661 ; R600-NEXT: CNDE_INT * T1.W, T1.W, PV.Y, PV.Z,
662 ; R600-NEXT: -254(nan), -127(nan)
663 ; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
664 ; R600-NEXT: MUL_IEEE T4.X, T1.X, literal.x,
665 ; R600-NEXT: MUL_IEEE T2.Y, T0.X, literal.x, BS:VEC_120/SCL_212
666 ; R600-NEXT: CNDE_INT T1.Z, T0.W, PS, T1.Y,
667 ; R600-NEXT: CNDE_INT T0.W, PV.W, PV.Z, PV.Y,
668 ; R600-NEXT: MAX_INT * T1.W, PV.X, literal.y,
669 ; R600-NEXT: 2130706432(1.701412e+38), -330(nan)
670 ; R600-NEXT: SETGT_INT T0.X, T1.Y, literal.x,
671 ; R600-NEXT: ADD_INT T0.Y, PS, literal.y,
672 ; R600-NEXT: ADD_INT T2.Z, T3.X, literal.z,
673 ; R600-NEXT: SETGT_UINT * T1.W, T3.X, literal.w,
674 ; R600-NEXT: 127(1.779649e-43), 204(2.858649e-43)
675 ; R600-NEXT: 102(1.429324e-43), -229(nan)
676 ; R600-NEXT: MIN_INT * T4.W, T3.X, literal.x,
677 ; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
678 ; R600-NEXT: ADD_INT T5.X, PV.W, literal.x,
679 ; R600-NEXT: ADD_INT T1.Y, T3.X, literal.y,
680 ; R600-NEXT: SETGT_UINT T3.Z, T3.X, literal.z,
681 ; R600-NEXT: CNDE_INT T4.W, T1.W, T0.Y, T2.Z,
682 ; R600-NEXT: SETGT_INT * T5.W, T3.X, literal.y,
683 ; R600-NEXT: -254(nan), -127(nan)
684 ; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
685 ; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T3.X,
686 ; R600-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, PV.X,
687 ; R600-NEXT: SETGT_INT T2.Z, T3.X, literal.x,
688 ; R600-NEXT: CNDE_INT T0.W, T0.X, T1.Z, T0.W, BS:VEC_120/SCL_212
689 ; R600-NEXT: MUL_IEEE * T4.W, T2.Y, literal.y,
690 ; R600-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
691 ; R600-NEXT: CNDE_INT T3.X, T2.W, T2.Y, PS, BS:VEC_120/SCL_212
692 ; R600-NEXT: LSHL T1.Y, PV.W, literal.x,
693 ; R600-NEXT: CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
694 ; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.y,
695 ; R600-NEXT: CNDE_INT * T1.W, T1.W, T2.X, T3.W,
696 ; R600-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
697 ; R600-NEXT: CNDE_INT T1.X, T5.W, PS, T1.X, BS:VEC_021/SCL_122
698 ; R600-NEXT: CNDE_INT T0.Y, T3.Z, T4.X, PV.W, BS:VEC_201
699 ; R600-NEXT: LSHL T1.Z, PV.Z, literal.x,
700 ; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y,
701 ; R600-NEXT: CNDE_INT * T1.W, T0.X, T0.Z, PV.X,
702 ; R600-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
703 ; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W,
704 ; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].X,
705 ; R600-NEXT: ADD_INT * T0.W, PV.Z, literal.y,
706 ; R600-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
707 ; R600-NEXT: ALU clause starting at 101:
708 ; R600-NEXT: CNDE_INT * T1.W, T2.Z, T1.X, T0.Y,
709 ; R600-NEXT: MUL_IEEE T0.Y, PV.W, T0.W,
710 ; R600-NEXT: SETGT T1.Z, literal.x, KC0[2].W,
711 ; R600-NEXT: CNDE T0.W, T0.Z, T1.Y, 0.0,
712 ; R600-NEXT: SETGT * T1.W, KC0[3].X, literal.y,
713 ; R600-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
714 ; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x,
715 ; R600-NEXT: CNDE T0.W, PV.Z, PV.Y, 0.0,
716 ; R600-NEXT: SETGT * T1.W, KC0[2].W, literal.y,
717 ; R600-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
718 ; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x,
719 ; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y,
720 ; R600-NEXT: 2139095040(INF), 2(2.802597e-45)
722 ; CM-LABEL: s_exp_v2f32:
724 ; CM-NEXT: ALU 98, @4, KC0[CB0:0-32], KC1[]
725 ; CM-NEXT: ALU 18, @103, KC0[CB0:0-32], KC1[]
726 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
728 ; CM-NEXT: ALU clause starting at 4:
729 ; CM-NEXT: AND_INT * T0.W, KC0[2].W, literal.x,
730 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
731 ; CM-NEXT: ADD * T1.W, KC0[2].W, -PV.W,
732 ; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
733 ; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y,
734 ; CM-NEXT: AND_INT * T2.W, KC0[3].X, literal.z,
735 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
736 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
737 ; CM-NEXT: ADD T1.Y, KC0[3].X, -PV.W,
738 ; CM-NEXT: RNDNE T1.Z, PV.Z,
739 ; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y,
740 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
741 ; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
742 ; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
743 ; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
744 ; CM-NEXT: MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
745 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
746 ; CM-NEXT: TRUNC T1.X, T1.Z,
747 ; CM-NEXT: RNDNE T2.Y, PV.W,
748 ; CM-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
749 ; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
750 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
751 ; CM-NEXT: EXP_IEEE T0.X, T1.W,
752 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
753 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
754 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
755 ; CM-NEXT: MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
756 ; CM-NEXT: ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
757 ; CM-NEXT: FLT_TO_INT T0.Z, T1.X,
758 ; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
759 ; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
760 ; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
761 ; CM-NEXT: SETGT_UINT T1.Y, PV.Z, literal.y,
762 ; CM-NEXT: TRUNC T1.Z, T2.Y,
763 ; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
764 ; CM-NEXT: 209715200(1.972152e-31), -229(nan)
765 ; CM-NEXT: EXP_IEEE T0.X (MASKED), T1.W,
766 ; CM-NEXT: EXP_IEEE T0.Y, T1.W,
767 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
768 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
769 ; CM-NEXT: FLT_TO_INT T2.X, T1.Z,
770 ; CM-NEXT: MUL_IEEE T2.Y, PV.Y, literal.x,
771 ; CM-NEXT: CNDE_INT T1.Z, T1.Y, T1.X, T0.W,
772 ; CM-NEXT: SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212
773 ; CM-NEXT: 209715200(1.972152e-31), -127(nan)
774 ; CM-NEXT: CNDE_INT T1.X, PV.W, PV.Z, T0.X,
775 ; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
776 ; CM-NEXT: SETGT_UINT T1.Z, PV.X, literal.y,
777 ; CM-NEXT: MAX_INT * T1.W, T0.Z, literal.z,
778 ; CM-NEXT: 209715200(1.972152e-31), -229(nan)
779 ; CM-NEXT: -330(nan), 0(0.000000e+00)
780 ; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
781 ; CM-NEXT: ADD_INT T4.Y, T0.Z, literal.y,
782 ; CM-NEXT: CNDE_INT T2.Z, PV.Z, PV.Y, T2.Y,
783 ; CM-NEXT: SETGT_INT * T1.W, T2.X, literal.z,
784 ; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
785 ; CM-NEXT: -127(nan), 0(0.000000e+00)
786 ; CM-NEXT: CNDE_INT T4.X, PV.W, PV.Z, T0.Y,
787 ; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.x,
788 ; CM-NEXT: MAX_INT T2.Z, T2.X, literal.y, BS:VEC_120/SCL_212
789 ; CM-NEXT: CNDE_INT * T2.W, T1.Y, PV.X, PV.Y,
790 ; CM-NEXT: 2130706432(1.701412e+38), -330(nan)
791 ; CM-NEXT: CNDE_INT T0.X, T0.W, PV.W, T0.Z,
792 ; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.x,
793 ; CM-NEXT: ADD_INT T2.Z, T2.X, literal.y,
794 ; CM-NEXT: MIN_INT * T0.W, T2.X, literal.z,
795 ; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
796 ; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
797 ; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
798 ; CM-NEXT: ADD_INT T3.Y, T2.X, literal.y,
799 ; CM-NEXT: SETGT_UINT T3.Z, T2.X, literal.z,
800 ; CM-NEXT: CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z,
801 ; CM-NEXT: -254(nan), -127(nan)
802 ; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
803 ; CM-NEXT: MUL_IEEE T5.X, T0.Y, literal.x,
804 ; CM-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T2.X,
805 ; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, PV.X,
806 ; CM-NEXT: MIN_INT * T0.W, T0.Z, literal.y,
807 ; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
808 ; CM-NEXT: SETGT_INT T2.X, T2.X, literal.x,
809 ; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
810 ; CM-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
811 ; CM-NEXT: SETGT_UINT * T0.W, T0.Z, literal.w,
812 ; CM-NEXT: 127(1.779649e-43), -254(nan)
813 ; CM-NEXT: -127(nan), 254(3.559298e-43)
814 ; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, PV.Y,
815 ; CM-NEXT: SETGT_INT T1.Y, T0.Z, literal.x,
816 ; CM-NEXT: CNDE_INT T0.Z, PV.X, T0.Y, T1.Z,
817 ; CM-NEXT: MUL_IEEE * T1.W, T5.X, literal.y,
818 ; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
819 ; CM-NEXT: CNDE_INT T5.X, T3.Z, T5.X, PV.W,
820 ; CM-NEXT: LSHL T0.Y, PV.Z, literal.x,
821 ; CM-NEXT: CNDE_INT T0.Z, PV.Y, T0.X, PV.X, BS:VEC_021/SCL_122
822 ; CM-NEXT: MUL_IEEE * T1.W, T2.Y, literal.y,
823 ; CM-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
824 ; CM-NEXT: CNDE_INT T0.X, T0.W, T2.Y, PV.W,
825 ; CM-NEXT: LSHL T2.Y, PV.Z, literal.x,
826 ; CM-NEXT: ADD_INT * T0.Z, PV.Y, literal.y,
827 ; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
828 ; CM-NEXT: ALU clause starting at 103:
829 ; CM-NEXT: CNDE_INT * T0.W, T2.X, T4.X, T5.X,
830 ; CM-NEXT: MUL_IEEE T2.X, PV.W, T0.Z,
831 ; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].X,
832 ; CM-NEXT: ADD_INT T0.Z, T2.Y, literal.y,
833 ; CM-NEXT: CNDE_INT * T0.W, T1.Y, T1.X, T0.X, BS:VEC_120/SCL_212
834 ; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
835 ; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
836 ; CM-NEXT: SETGT T1.Y, literal.x, KC0[2].W,
837 ; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
838 ; CM-NEXT: SETGT * T0.W, KC0[3].X, literal.y,
839 ; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
840 ; CM-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.x,
841 ; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
842 ; CM-NEXT: SETGT * T0.W, KC0[2].W, literal.y,
843 ; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
844 ; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
845 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
846 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
847 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
848 %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %in)
849 store <2 x float> %result, ptr addrspace(1) %out
853 define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
854 ; VI-SDAG-LABEL: s_exp_v3f32:
856 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
857 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
858 ; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
859 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
860 ; VI-SDAG-NEXT: s_and_b32 s3, s2, 0xfffff000
861 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s3
862 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, s2, v2
863 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
864 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
865 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, s3, v0
866 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
867 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295
868 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
869 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, s3, v4
870 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
871 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2
872 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
873 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
874 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
875 ; VI-SDAG-NEXT: s_and_b32 s3, s1, 0xfffff000
876 ; VI-SDAG-NEXT: v_mov_b32_e32 v7, s3
877 ; VI-SDAG-NEXT: v_sub_f32_e32 v7, s1, v7
878 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
879 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0
880 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v7
881 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7
882 ; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v2
883 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8
884 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, s3, v4
885 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6
886 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7
887 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7
888 ; VI-SDAG-NEXT: v_exp_f32_e32 v7, v2
889 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6
890 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
891 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3
892 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218
893 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
894 ; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
895 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v5
896 ; VI-SDAG-NEXT: s_and_b32 s2, s0, 0xfffff000
897 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc
898 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v7, v6
899 ; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2
900 ; VI-SDAG-NEXT: v_sub_f32_e32 v7, s0, v7
901 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
902 ; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v7
903 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7
904 ; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v0
905 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v9
906 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4
907 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v6
908 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
909 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
910 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
911 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v6
912 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v3
913 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
914 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v5
915 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
916 ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4
917 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v3
918 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
919 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v5
920 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s4
921 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
922 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s5
923 ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
924 ; VI-SDAG-NEXT: s_endpgm
926 ; VI-GISEL-LABEL: s_exp_v3f32:
928 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
929 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8a000
930 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x39a3b295
931 ; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
932 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
933 ; VI-GISEL-NEXT: s_and_b32 s3, s0, 0xfffff000
934 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
935 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, s0, v0
936 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0
937 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
938 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, s3, v1
939 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4
940 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, s3, v2
941 ; VI-GISEL-NEXT: s_and_b32 s3, s1, 0xfffff000
942 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3
943 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0
944 ; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3
945 ; VI-GISEL-NEXT: v_sub_f32_e32 v5, s1, v5
946 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
947 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v5
948 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8a000, v5
949 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0
950 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, s3, v1
951 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7
952 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, s3, v2
953 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
954 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
955 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v7, v5
956 ; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6
957 ; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7
958 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v6, v5
959 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7
960 ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5
961 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v3
962 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
963 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v3
964 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
965 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
966 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
967 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000
968 ; VI-GISEL-NEXT: v_ldexp_f32 v5, v5, v6
969 ; VI-GISEL-NEXT: v_mov_b32_e32 v6, s0
970 ; VI-GISEL-NEXT: v_sub_f32_e32 v6, s2, v6
971 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v6
972 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6
973 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s0, v1
974 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
975 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, s0, v2
976 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v6
977 ; VI-GISEL-NEXT: v_rndne_f32_e32 v6, v1
978 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
979 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
980 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v6
981 ; VI-GISEL-NEXT: v_exp_f32_e32 v6, v1
982 ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000
983 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
984 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v3
985 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc
986 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v4
987 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
988 ; VI-GISEL-NEXT: v_ldexp_f32 v2, v6, v2
989 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3
990 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
991 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4
992 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4
993 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
994 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5
995 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
996 ; VI-GISEL-NEXT: s_endpgm
998 ; GFX900-SDAG-LABEL: s_exp_v3f32:
999 ; GFX900-SDAG: ; %bb.0:
1000 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
1001 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
1002 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
1003 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
1004 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218
1005 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1006 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s1, v0
1007 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6
1008 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7
1009 ; GFX900-SDAG-NEXT: v_fma_f32 v6, s1, v0, -v6
1010 ; GFX900-SDAG-NEXT: v_fma_f32 v6, s1, v1, v6
1011 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0
1012 ; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
1013 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
1014 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s2, v0, -v2
1015 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v6, v6
1016 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
1017 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
1018 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s2, v1, v4
1019 ; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
1020 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
1021 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
1022 ; GFX900-SDAG-NEXT: v_ldexp_f32 v6, v6, v7
1023 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s0, v0
1024 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v9, v7
1025 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s0, v0, -v7
1026 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9
1027 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s0, v1, v0
1028 ; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3
1029 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
1030 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v10, v0
1031 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3
1032 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
1033 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9
1034 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1035 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
1036 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v5
1037 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
1038 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v3
1039 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc
1040 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v5
1041 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
1042 ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v7
1043 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v3
1044 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
1045 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v5
1046 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
1047 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
1048 ; GFX900-SDAG-NEXT: global_store_dwordx3 v4, v[0:2], s[6:7]
1049 ; GFX900-SDAG-NEXT: s_endpgm
1051 ; GFX900-GISEL-LABEL: s_exp_v3f32:
1052 ; GFX900-GISEL: ; %bb.0:
1053 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
1054 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
1055 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
1056 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
1057 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1058 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s1, v1
1059 ; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v1, -v5
1060 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v7, v5
1061 ; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v2, v6
1062 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v5, v5, v7
1063 ; GFX900-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
1064 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7
1065 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v5
1066 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v1
1067 ; GFX900-GISEL-NEXT: v_fma_f32 v3, s0, v1, -v0
1068 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v0
1069 ; GFX900-GISEL-NEXT: v_fma_f32 v3, s0, v2, v3
1070 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
1071 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
1072 ; GFX900-GISEL-NEXT: v_ldexp_f32 v5, v5, v6
1073 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s2, v1
1074 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
1075 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
1076 ; GFX900-GISEL-NEXT: v_fma_f32 v1, s2, v1, -v6
1077 ; GFX900-GISEL-NEXT: v_fma_f32 v1, s2, v2, v1
1078 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v6
1079 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v6, v6, v2
1080 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
1081 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v6, v1
1082 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v3
1083 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4
1084 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218
1085 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
1086 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v1
1087 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1088 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000
1089 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3
1090 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
1091 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4
1092 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc
1093 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3
1094 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1095 ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v6, v2
1096 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4
1097 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
1098 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3
1099 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
1100 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
1101 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
1102 ; GFX900-GISEL-NEXT: s_endpgm
1104 ; SI-SDAG-LABEL: s_exp_v3f32:
1106 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd
1107 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
1108 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x32a5705f
1109 ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
1110 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
1111 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1112 ; SI-SDAG-NEXT: v_mul_f32_e32 v5, s0, v0
1113 ; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5
1114 ; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6
1115 ; SI-SDAG-NEXT: v_fma_f32 v5, s0, v0, -v5
1116 ; SI-SDAG-NEXT: v_fma_f32 v5, s0, v2, v5
1117 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, s1, v0
1118 ; SI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5
1119 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
1120 ; SI-SDAG-NEXT: v_fma_f32 v4, s1, v0, -v1
1121 ; SI-SDAG-NEXT: v_exp_f32_e32 v5, v5
1122 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6
1123 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
1124 ; SI-SDAG-NEXT: v_fma_f32 v4, s1, v2, v4
1125 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
1126 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
1127 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
1128 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, v5, v6
1129 ; SI-SDAG-NEXT: v_mul_f32_e32 v6, s2, v0
1130 ; SI-SDAG-NEXT: v_rndne_f32_e32 v8, v6
1131 ; SI-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v6
1132 ; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v8
1133 ; SI-SDAG-NEXT: v_fma_f32 v0, s2, v2, v0
1134 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v3
1135 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
1136 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v9, v0
1137 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v3
1138 ; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42b17218
1139 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0
1140 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v8
1141 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
1142 ; SI-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000
1143 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v4
1144 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
1145 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v3
1146 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc
1147 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v4
1148 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
1149 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v6
1150 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3
1151 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1152 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v4
1153 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
1154 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
1155 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
1156 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1157 ; SI-SDAG-NEXT: s_endpgm
1159 ; SI-GISEL-LABEL: s_exp_v3f32:
1160 ; SI-GISEL: ; %bb.0:
1161 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd
1162 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
1163 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
1164 ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
1165 ; SI-GISEL-NEXT: s_mov_b32 s6, -1
1166 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1167 ; SI-GISEL-NEXT: v_mul_f32_e32 v5, s1, v1
1168 ; SI-GISEL-NEXT: v_fma_f32 v6, s1, v1, -v5
1169 ; SI-GISEL-NEXT: v_rndne_f32_e32 v7, v5
1170 ; SI-GISEL-NEXT: v_fma_f32 v6, s1, v2, v6
1171 ; SI-GISEL-NEXT: v_sub_f32_e32 v5, v5, v7
1172 ; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
1173 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7
1174 ; SI-GISEL-NEXT: v_exp_f32_e32 v5, v5
1175 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v1
1176 ; SI-GISEL-NEXT: v_fma_f32 v3, s0, v1, -v0
1177 ; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v0
1178 ; SI-GISEL-NEXT: v_fma_f32 v3, s0, v2, v3
1179 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
1180 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
1181 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v5, v5, v6
1182 ; SI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v1
1183 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
1184 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
1185 ; SI-GISEL-NEXT: v_fma_f32 v1, s2, v1, -v6
1186 ; SI-GISEL-NEXT: v_fma_f32 v1, s2, v2, v1
1187 ; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v6
1188 ; SI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v2
1189 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
1190 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1
1191 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3
1192 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4
1193 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218
1194 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
1195 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v1
1196 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1197 ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000
1198 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3
1199 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
1200 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4
1201 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc
1202 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3
1203 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1204 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v6, v2
1205 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4
1206 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
1207 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3
1208 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
1209 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
1210 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1211 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
1212 ; SI-GISEL-NEXT: s_endpgm
1214 ; R600-LABEL: s_exp_v3f32:
1216 ; R600-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[]
1217 ; R600-NEXT: ALU 69, @106, KC0[CB0:0-32], KC1[]
1218 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
1219 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1222 ; R600-NEXT: ALU clause starting at 6:
1223 ; R600-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
1224 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
1225 ; R600-NEXT: MUL_IEEE T1.W, PV.W, literal.x,
1226 ; R600-NEXT: ADD * T2.W, KC0[3].Y, -PV.W,
1227 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1228 ; R600-NEXT: RNDNE * T3.W, PV.W,
1229 ; R600-NEXT: TRUNC T4.W, PV.W,
1230 ; R600-NEXT: MUL_IEEE * T5.W, T2.W, literal.x,
1231 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
1232 ; R600-NEXT: MULADD_IEEE T2.W, T2.W, literal.x, PS,
1233 ; R600-NEXT: FLT_TO_INT * T4.W, PV.W,
1234 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1235 ; R600-NEXT: MAX_INT T0.Z, PS, literal.x,
1236 ; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.y, PV.W,
1237 ; R600-NEXT: ADD * T1.W, T1.W, -T3.W,
1238 ; R600-NEXT: -330(nan), 967029397(3.122284e-04)
1239 ; R600-NEXT: ADD T0.Y, PS, PV.W,
1240 ; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.x,
1241 ; R600-NEXT: ADD_INT T0.W, T4.W, literal.y,
1242 ; R600-NEXT: SETGT_UINT * T1.W, T4.W, literal.z,
1243 ; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
1244 ; R600-NEXT: -229(nan), 0(0.000000e+00)
1245 ; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
1246 ; R600-NEXT: SETGT_INT T0.W, T4.W, literal.x,
1247 ; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
1248 ; R600-NEXT: -127(nan), 0(0.000000e+00)
1249 ; R600-NEXT: MUL_IEEE T1.X, PS, literal.x,
1250 ; R600-NEXT: CNDE_INT T0.Y, PV.W, PV.Z, T4.W,
1251 ; R600-NEXT: MIN_INT T0.Z, T4.W, literal.y,
1252 ; R600-NEXT: AND_INT T2.W, KC0[3].W, literal.z,
1253 ; R600-NEXT: MUL_IEEE * T3.W, PS, literal.w,
1254 ; R600-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
1255 ; R600-NEXT: -4096(nan), 209715200(1.972152e-31)
1256 ; R600-NEXT: MUL_IEEE T2.X, PS, literal.x,
1257 ; R600-NEXT: ADD T1.Y, KC0[3].W, -PV.W,
1258 ; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
1259 ; R600-NEXT: ADD_INT T5.W, T4.W, literal.z,
1260 ; R600-NEXT: SETGT_UINT * T6.W, T4.W, literal.w,
1261 ; R600-NEXT: 209715200(1.972152e-31), -254(nan)
1262 ; R600-NEXT: -127(nan), 254(3.559298e-43)
1263 ; R600-NEXT: CNDE_INT T3.X, PS, PV.W, PV.Z,
1264 ; R600-NEXT: SETGT_INT T2.Y, T4.W, literal.x,
1265 ; R600-NEXT: MUL_IEEE T0.Z, PV.Y, literal.y,
1266 ; R600-NEXT: MUL_IEEE * T4.W, T2.W, literal.z, BS:VEC_120/SCL_212
1267 ; R600-NEXT: 127(1.779649e-43), 967029397(3.122284e-04)
1268 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1269 ; R600-NEXT: CNDE_INT * T1.W, T1.W, T2.X, T3.W,
1270 ; R600-NEXT: CNDE_INT T0.X, T0.W, PV.W, T0.X, BS:VEC_021/SCL_122
1271 ; R600-NEXT: RNDNE T3.Y, T4.W, BS:VEC_120/SCL_212
1272 ; R600-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, T0.Z,
1273 ; R600-NEXT: CNDE_INT T0.W, T2.Y, T0.Y, T3.X, BS:VEC_120/SCL_212
1274 ; R600-NEXT: MUL_IEEE * T1.W, T1.X, literal.y,
1275 ; R600-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38)
1276 ; R600-NEXT: CNDE_INT T1.X, T6.W, T1.X, PS,
1277 ; R600-NEXT: LSHL T0.Y, PV.W, literal.x,
1278 ; R600-NEXT: AND_INT T1.Z, KC0[3].Z, literal.y,
1279 ; R600-NEXT: MULADD_IEEE T0.W, T2.W, literal.z, PV.Z, BS:VEC_120/SCL_212
1280 ; R600-NEXT: ADD * T1.W, T4.W, -PV.Y,
1281 ; R600-NEXT: 23(3.222986e-44), -4096(nan)
1282 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
1283 ; R600-NEXT: ADD T1.Y, PS, PV.W,
1284 ; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.x,
1285 ; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y,
1286 ; R600-NEXT: CNDE_INT * T1.W, T2.Y, T0.X, PV.X,
1287 ; R600-NEXT: 1069064192(1.442383e+00), 1065353216(1.000000e+00)
1288 ; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
1289 ; R600-NEXT: ADD T0.Y, KC0[3].Z, -T1.Z,
1290 ; R600-NEXT: RNDNE T2.Z, PV.Z,
1291 ; R600-NEXT: TRUNC T0.W, T3.Y,
1292 ; R600-NEXT: EXP_IEEE * T1.X, PV.Y,
1293 ; R600-NEXT: SETGT T2.X, literal.x, KC0[3].Y,
1294 ; R600-NEXT: FLT_TO_INT T1.Y, PV.W,
1295 ; R600-NEXT: TRUNC T3.Z, PV.Z,
1296 ; R600-NEXT: MUL_IEEE T0.W, PV.Y, literal.y,
1297 ; R600-NEXT: MUL_IEEE * T1.W, PS, literal.z,
1298 ; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04)
1299 ; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
1300 ; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
1301 ; R600-NEXT: MUL_IEEE T2.Y, T1.X, literal.y,
1302 ; R600-NEXT: MULADD_IEEE T4.Z, T0.Y, literal.z, PV.W,
1303 ; R600-NEXT: FLT_TO_INT T0.W, PV.Z,
1304 ; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.w,
1305 ; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
1306 ; R600-NEXT: 1069064192(1.442383e+00), 381(5.338947e-43)
1307 ; R600-NEXT: ADD_INT T4.X, PS, literal.x,
1308 ; R600-NEXT: MAX_INT T0.Y, PV.W, literal.y,
1309 ; R600-NEXT: MULADD_IEEE T1.Z, T1.Z, literal.z, PV.Z,
1310 ; R600-NEXT: ADD T2.W, T0.Z, -T2.Z, BS:VEC_120/SCL_212
1311 ; R600-NEXT: MIN_INT * T3.W, PV.W, literal.w,
1312 ; R600-NEXT: -254(nan), -330(nan)
1313 ; R600-NEXT: 967029397(3.122284e-04), 381(5.338947e-43)
1314 ; R600-NEXT: ADD_INT T5.X, PS, literal.x,
1315 ; R600-NEXT: ADD T3.Y, PV.W, PV.Z,
1316 ; R600-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
1317 ; R600-NEXT: ADD_INT T2.W, T0.W, literal.z,
1318 ; R600-NEXT: SETGT_UINT * T3.W, T0.W, literal.w,
1319 ; R600-NEXT: -254(nan), 204(2.858649e-43)
1320 ; R600-NEXT: 102(1.429324e-43), -229(nan)
1321 ; R600-NEXT: ADD_INT * T6.X, T0.W, literal.x,
1322 ; R600-NEXT: -127(nan), 0(0.000000e+00)
1323 ; R600-NEXT: ALU clause starting at 106:
1324 ; R600-NEXT: SETGT_UINT T0.Y, T0.W, literal.x,
1325 ; R600-NEXT: CNDE_INT T0.Z, T3.W, T0.Z, T2.W, BS:VEC_102/SCL_221
1326 ; R600-NEXT: SETGT_INT T2.W, T0.W, literal.y,
1327 ; R600-NEXT: EXP_IEEE * T1.Z, T3.Y,
1328 ; R600-NEXT: 254(3.559298e-43), -127(nan)
1329 ; R600-NEXT: ADD_INT T7.X, T1.Y, literal.x,
1330 ; R600-NEXT: MUL_IEEE T3.Y, PS, literal.y,
1331 ; R600-NEXT: CNDE_INT T0.Z, PV.W, PV.Z, T0.W,
1332 ; R600-NEXT: CNDE_INT T4.W, PV.Y, T6.X, T5.X,
1333 ; R600-NEXT: SETGT_INT * T0.W, T0.W, literal.z,
1334 ; R600-NEXT: -127(nan), 209715200(1.972152e-31)
1335 ; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
1336 ; R600-NEXT: SETGT_UINT T5.X, T1.Y, literal.x,
1337 ; R600-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W,
1338 ; R600-NEXT: MAX_INT T0.Z, T1.Y, literal.y,
1339 ; R600-NEXT: MUL_IEEE T4.W, PV.Y, literal.z,
1340 ; R600-NEXT: MUL_IEEE * T5.W, T1.Z, literal.w,
1341 ; R600-NEXT: 254(3.559298e-43), -330(nan)
1342 ; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
1343 ; R600-NEXT: MUL_IEEE T6.X, PS, literal.x,
1344 ; R600-NEXT: CNDE_INT T3.Y, T3.W, PV.W, T3.Y, BS:VEC_021/SCL_122
1345 ; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
1346 ; R600-NEXT: ADD_INT T3.W, T1.Y, literal.z,
1347 ; R600-NEXT: SETGT_UINT * T4.W, T1.Y, literal.w,
1348 ; R600-NEXT: 2130706432(1.701412e+38), 204(2.858649e-43)
1349 ; R600-NEXT: 102(1.429324e-43), -229(nan)
1350 ; R600-NEXT: CNDE_INT T8.X, PS, PV.Z, PV.W,
1351 ; R600-NEXT: SETGT_INT T5.Y, T1.Y, literal.x,
1352 ; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.Y, T1.Z,
1353 ; R600-NEXT: CNDE_INT T2.W, T0.Y, T5.W, PV.X, BS:VEC_120/SCL_212
1354 ; R600-NEXT: LSHL * T3.W, T4.Y, literal.y,
1355 ; R600-NEXT: -127(nan), 23(3.222986e-44)
1356 ; R600-NEXT: ADD_INT T6.X, PS, literal.x,
1357 ; R600-NEXT: CNDE_INT T0.Y, T0.W, PV.Z, PV.W,
1358 ; R600-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T1.Y,
1359 ; R600-NEXT: CNDE_INT T0.W, T5.X, T7.X, T4.X,
1360 ; R600-NEXT: SETGT_INT * T2.W, T1.Y, literal.y,
1361 ; R600-NEXT: 1065353216(1.000000e+00), 127(1.779649e-43)
1362 ; R600-NEXT: CNDE_INT T4.X, PS, PV.Z, PV.W,
1363 ; R600-NEXT: MUL_IEEE T0.Y, PV.Y, PV.X,
1364 ; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].Z,
1365 ; R600-NEXT: MUL_IEEE T0.W, T2.Y, literal.y,
1366 ; R600-NEXT: CNDE_INT * T1.W, T4.W, T3.X, T1.W,
1367 ; R600-NEXT: -1026650416(-1.032789e+02), 2130706432(1.701412e+38)
1368 ; R600-NEXT: CNDE_INT T1.X, T5.Y, PS, T1.X,
1369 ; R600-NEXT: CNDE_INT T1.Y, T5.X, T2.Y, PV.W,
1370 ; R600-NEXT: CNDE T0.Z, PV.Z, PV.Y, 0.0,
1371 ; R600-NEXT: SETGT T0.W, KC0[3].Z, literal.x,
1372 ; R600-NEXT: LSHL * T1.W, PV.X, literal.y,
1373 ; R600-NEXT: 1118925336(8.872284e+01), 23(3.222986e-44)
1374 ; R600-NEXT: ADD_INT T3.X, PS, literal.x,
1375 ; R600-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.y,
1376 ; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.X, PV.Y,
1377 ; R600-NEXT: CNDE T0.W, T2.X, T0.X, 0.0,
1378 ; R600-NEXT: SETGT * T1.W, KC0[3].Y, literal.z,
1379 ; R600-NEXT: 1065353216(1.000000e+00), 2139095040(INF)
1380 ; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
1381 ; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x,
1382 ; R600-NEXT: MUL_IEEE T0.W, PV.Z, PV.X,
1383 ; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
1384 ; R600-NEXT: 2139095040(INF), -1026650416(-1.032789e+02)
1385 ; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
1386 ; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0,
1387 ; R600-NEXT: SETGT * T1.W, KC0[3].W, literal.y,
1388 ; R600-NEXT: 2(2.802597e-45), 1118925336(8.872284e+01)
1389 ; R600-NEXT: CNDE T2.X, PS, PV.W, literal.x,
1390 ; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
1391 ; R600-NEXT: 2139095040(INF), 8(1.121039e-44)
1392 ; R600-NEXT: LSHR * T3.X, PV.W, literal.x,
1393 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1395 ; CM-LABEL: s_exp_v3f32:
1397 ; CM-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[]
1398 ; CM-NEXT: ALU 77, @108, KC0[CB0:0-32], KC1[]
1399 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
1400 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T3.X
1403 ; CM-NEXT: ALU clause starting at 6:
1404 ; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
1405 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
1406 ; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W,
1407 ; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
1408 ; CM-NEXT: MUL_IEEE * T2.W, T0.W, literal.y,
1409 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
1410 ; CM-NEXT: RNDNE T1.Z, PV.W,
1411 ; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
1412 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1413 ; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.x, PV.W,
1414 ; CM-NEXT: ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212
1415 ; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
1416 ; CM-NEXT: TRUNC T1.Z, T1.Z,
1417 ; CM-NEXT: ADD * T0.W, PV.W, PV.Z,
1418 ; CM-NEXT: EXP_IEEE T0.X, T0.W,
1419 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
1420 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
1421 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
1422 ; CM-NEXT: FLT_TO_INT T0.Z, T1.Z,
1423 ; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.x,
1424 ; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
1425 ; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
1426 ; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.y,
1427 ; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z,
1428 ; CM-NEXT: 209715200(1.972152e-31), -330(nan)
1429 ; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
1430 ; CM-NEXT: ADD_INT T1.X, PV.W, literal.x,
1431 ; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.y,
1432 ; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
1433 ; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w,
1434 ; CM-NEXT: -254(nan), 204(2.858649e-43)
1435 ; CM-NEXT: 102(1.429324e-43), -229(nan)
1436 ; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x,
1437 ; CM-NEXT: SETGT_UINT T2.Y, T0.Z, literal.y,
1438 ; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
1439 ; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x,
1440 ; CM-NEXT: -127(nan), 254(3.559298e-43)
1441 ; CM-NEXT: MUL_IEEE T3.X, T0.X, literal.x,
1442 ; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, T0.Z,
1443 ; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.X,
1444 ; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.y,
1445 ; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43)
1446 ; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
1447 ; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.x,
1448 ; CM-NEXT: CNDE_INT * T0.W, T1.W, T0.Y, T0.W,
1449 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
1450 ; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X,
1451 ; CM-NEXT: CNDE_INT T0.Y, T2.Y, T3.X, PV.Z,
1452 ; CM-NEXT: LSHL T0.Z, PV.Y, literal.x,
1453 ; CM-NEXT: AND_INT * T0.W, KC0[3].Z, literal.y,
1454 ; CM-NEXT: 23(3.222986e-44), -4096(nan)
1455 ; CM-NEXT: ADD T1.Y, KC0[3].Z, -PV.W,
1456 ; CM-NEXT: ADD_INT T0.Z, PV.Z, literal.x,
1457 ; CM-NEXT: CNDE_INT * T1.W, T3.W, PV.X, PV.Y,
1458 ; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
1459 ; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
1460 ; CM-NEXT: MUL_IEEE T0.Y, PV.Y, literal.x,
1461 ; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y,
1462 ; CM-NEXT: AND_INT * T1.W, KC0[3].W, literal.z,
1463 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
1464 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
1465 ; CM-NEXT: SETGT T1.X, literal.x, KC0[3].Y,
1466 ; CM-NEXT: ADD T2.Y, KC0[3].W, -PV.W,
1467 ; CM-NEXT: RNDNE T1.Z, PV.Z,
1468 ; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.y, PV.Y,
1469 ; CM-NEXT: -1026650416(-1.032789e+02), 1069064192(1.442383e+00)
1470 ; CM-NEXT: MULADD_IEEE T2.X, T0.W, literal.x, PV.W,
1471 ; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
1472 ; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
1473 ; CM-NEXT: MUL_IEEE * T0.W, T1.W, literal.y, BS:VEC_120/SCL_212
1474 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
1475 ; CM-NEXT: TRUNC T3.X, T1.Z,
1476 ; CM-NEXT: RNDNE T1.Y, PV.W,
1477 ; CM-NEXT: MULADD_IEEE T0.Z, T2.Y, literal.x, PV.Z,
1478 ; CM-NEXT: ADD * T2.W, PV.Y, PV.X,
1479 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1480 ; CM-NEXT: EXP_IEEE T0.X (MASKED), T2.W,
1481 ; CM-NEXT: EXP_IEEE T0.Y, T2.W,
1482 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T2.W,
1483 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T2.W,
1484 ; CM-NEXT: MULADD_IEEE T2.X, T1.W, literal.x, T0.Z,
1485 ; CM-NEXT: ADD T2.Y, T0.W, -T1.Y, BS:VEC_120/SCL_212
1486 ; CM-NEXT: FLT_TO_INT T0.Z, T3.X,
1487 ; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.y,
1488 ; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
1489 ; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
1490 ; CM-NEXT: SETGT_UINT T3.Y, PV.Z, literal.y,
1491 ; CM-NEXT: TRUNC T1.Z, T1.Y,
1492 ; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
1493 ; CM-NEXT: 209715200(1.972152e-31), -229(nan)
1494 ; CM-NEXT: EXP_IEEE T1.X (MASKED), T1.W,
1495 ; CM-NEXT: EXP_IEEE T1.Y, T1.W,
1496 ; CM-NEXT: EXP_IEEE T1.Z (MASKED), T1.W,
1497 ; CM-NEXT: EXP_IEEE * T1.W (MASKED), T1.W,
1498 ; CM-NEXT: FLT_TO_INT T2.X, T1.Z,
1499 ; CM-NEXT: MUL_IEEE T2.Y, PV.Y, literal.x,
1500 ; CM-NEXT: CNDE_INT T1.Z, T3.Y, T3.X, T0.W,
1501 ; CM-NEXT: SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212
1502 ; CM-NEXT: 209715200(1.972152e-31), -127(nan)
1503 ; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, T0.Y,
1504 ; CM-NEXT: MUL_IEEE * T4.Y, PV.Y, literal.x,
1505 ; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
1506 ; CM-NEXT: ALU clause starting at 108:
1507 ; CM-NEXT: SETGT_UINT T1.Z, T2.X, literal.x,
1508 ; CM-NEXT: MAX_INT * T1.W, T0.Z, literal.y,
1509 ; CM-NEXT: -229(nan), -330(nan)
1510 ; CM-NEXT: ADD_INT T4.X, PV.W, literal.x,
1511 ; CM-NEXT: ADD_INT T5.Y, T0.Z, literal.y,
1512 ; CM-NEXT: CNDE_INT T2.Z, PV.Z, T4.Y, T2.Y,
1513 ; CM-NEXT: SETGT_INT * T1.W, T2.X, literal.z,
1514 ; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
1515 ; CM-NEXT: -127(nan), 0(0.000000e+00)
1516 ; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T1.Y,
1517 ; CM-NEXT: MUL_IEEE T0.Y, T0.Y, literal.x,
1518 ; CM-NEXT: MAX_INT T2.Z, T2.X, literal.y,
1519 ; CM-NEXT: CNDE_INT * T2.W, T3.Y, PV.X, PV.Y, BS:VEC_120/SCL_212
1520 ; CM-NEXT: 2130706432(1.701412e+38), -330(nan)
1521 ; CM-NEXT: CNDE_INT T4.X, T0.W, PV.W, T0.Z,
1522 ; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x,
1523 ; CM-NEXT: ADD_INT T2.Z, T2.X, literal.y,
1524 ; CM-NEXT: MIN_INT * T0.W, T2.X, literal.z,
1525 ; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
1526 ; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
1527 ; CM-NEXT: ADD_INT T6.X, PV.W, literal.x,
1528 ; CM-NEXT: ADD_INT T3.Y, T2.X, literal.y,
1529 ; CM-NEXT: SETGT_UINT T3.Z, T2.X, literal.z,
1530 ; CM-NEXT: CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z,
1531 ; CM-NEXT: -254(nan), -127(nan)
1532 ; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
1533 ; CM-NEXT: MUL_IEEE T7.X, T1.Y, literal.x,
1534 ; CM-NEXT: CNDE_INT T1.Y, T1.W, PV.W, T2.X,
1535 ; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, PV.X,
1536 ; CM-NEXT: MIN_INT * T0.W, T0.Z, literal.y,
1537 ; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
1538 ; CM-NEXT: SETGT_INT T2.X, T2.X, literal.x,
1539 ; CM-NEXT: ADD_INT T2.Y, PV.W, literal.y,
1540 ; CM-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
1541 ; CM-NEXT: SETGT_UINT * T0.W, T0.Z, literal.w,
1542 ; CM-NEXT: 127(1.779649e-43), -254(nan)
1543 ; CM-NEXT: -127(nan), 254(3.559298e-43)
1544 ; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, PV.Y,
1545 ; CM-NEXT: SETGT_INT T2.Y, T0.Z, literal.x,
1546 ; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.Y, T1.Z,
1547 ; CM-NEXT: MUL_IEEE * T1.W, T7.X, literal.y,
1548 ; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
1549 ; CM-NEXT: CNDE_INT T7.X, T3.Z, T7.X, PV.W,
1550 ; CM-NEXT: LSHL T1.Y, PV.Z, literal.x,
1551 ; CM-NEXT: CNDE_INT T0.Z, PV.Y, T4.X, PV.X, BS:VEC_021/SCL_122
1552 ; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.y,
1553 ; CM-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
1554 ; CM-NEXT: CNDE_INT T4.X, T0.W, T0.Y, PV.W,
1555 ; CM-NEXT: LSHL T0.Y, PV.Z, literal.x,
1556 ; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
1557 ; CM-NEXT: CNDE_INT * T0.W, T2.X, T5.X, PV.X,
1558 ; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
1559 ; CM-NEXT: MUL_IEEE T2.X, PV.W, PV.Z,
1560 ; CM-NEXT: SETGT T1.Y, literal.x, KC0[3].W,
1561 ; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
1562 ; CM-NEXT: CNDE_INT * T0.W, T2.Y, T3.X, PV.X,
1563 ; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
1564 ; CM-NEXT: MUL_IEEE T3.X, PV.W, PV.Z,
1565 ; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].Z,
1566 ; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
1567 ; CM-NEXT: SETGT * T0.W, KC0[3].W, literal.y,
1568 ; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
1569 ; CM-NEXT: CNDE T2.X, PV.W, PV.Z, literal.x,
1570 ; CM-NEXT: CNDE T0.Y, PV.Y, PV.X, 0.0,
1571 ; CM-NEXT: SETGT T0.Z, KC0[3].Z, literal.y,
1572 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
1573 ; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
1574 ; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
1575 ; CM-NEXT: LSHR T3.X, PV.W, literal.x,
1576 ; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, literal.y,
1577 ; CM-NEXT: CNDE T0.Z, T1.X, T0.X, 0.0,
1578 ; CM-NEXT: SETGT * T0.W, KC0[3].Y, literal.z,
1579 ; CM-NEXT: 2(2.802597e-45), 2139095040(INF)
1580 ; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
1581 ; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
1582 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
1583 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
1584 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1585 %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %in)
1586 store <3 x float> %result, ptr addrspace(1) %out
1590 ; FIXME: We should be able to merge these packets together on Cayman so we
1591 ; have a maximum of 4 instructions.
1592 define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
1593 ; VI-SDAG-LABEL: s_exp_v4f32:
1595 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
1596 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
1597 ; VI-SDAG-NEXT: v_mov_b32_e32 v6, 0x42b17218
1598 ; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
1599 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1600 ; VI-SDAG-NEXT: s_and_b32 s6, s3, 0xfffff000
1601 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s6
1602 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, s3, v2
1603 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
1604 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
1605 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, s6, v0
1606 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
1607 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295
1608 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
1609 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, s6, v4
1610 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
1611 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2
1612 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
1613 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
1614 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
1615 ; VI-SDAG-NEXT: s_and_b32 s6, s2, 0xfffff000
1616 ; VI-SDAG-NEXT: v_mov_b32_e32 v7, s6
1617 ; VI-SDAG-NEXT: v_sub_f32_e32 v7, s2, v7
1618 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
1619 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
1620 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v7
1621 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7
1622 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
1623 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8
1624 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, s6, v4
1625 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
1626 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7
1627 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
1628 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7
1629 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5
1630 ; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
1631 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v3
1632 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
1633 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v6
1634 ; VI-SDAG-NEXT: s_and_b32 s3, s1, 0xfffff000
1635 ; VI-SDAG-NEXT: v_mov_b32_e32 v9, s3
1636 ; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
1637 ; VI-SDAG-NEXT: v_sub_f32_e32 v9, s1, v9
1638 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v1, vcc
1639 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v2, v7
1640 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0
1641 ; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x39a3b295, v9
1642 ; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3fb8a000, v9
1643 ; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v2
1644 ; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10
1645 ; VI-SDAG-NEXT: v_mul_f32_e32 v10, s3, v4
1646 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v7
1647 ; VI-SDAG-NEXT: v_add_f32_e32 v9, v10, v9
1648 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v9
1649 ; VI-SDAG-NEXT: v_exp_f32_e32 v9, v2
1650 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
1651 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5
1652 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
1653 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v6
1654 ; VI-SDAG-NEXT: s_and_b32 s2, s0, 0xfffff000
1655 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc
1656 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v9, v7
1657 ; VI-SDAG-NEXT: v_mov_b32_e32 v9, s2
1658 ; VI-SDAG-NEXT: v_sub_f32_e32 v9, s0, v9
1659 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
1660 ; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x39a3b295, v9
1661 ; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3fb8a000, v9
1662 ; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v0
1663 ; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10
1664 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4
1665 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v7
1666 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v9
1667 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
1668 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
1669 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v7
1670 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v5
1671 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
1672 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v6
1673 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
1674 ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4
1675 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v5
1676 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
1677 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v6
1678 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s4
1679 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
1680 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s5
1681 ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
1682 ; VI-SDAG-NEXT: s_endpgm
1684 ; VI-GISEL-LABEL: s_exp_v4f32:
1685 ; VI-GISEL: ; %bb.0:
1686 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
1687 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8a000
1688 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x39a3b295
1689 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218
1690 ; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
1691 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1692 ; VI-GISEL-NEXT: s_and_b32 s6, s0, 0xfffff000
1693 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s6
1694 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, s0, v0
1695 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0
1696 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
1697 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v2
1698 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4
1699 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, s6, v3
1700 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0
1701 ; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v1
1702 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
1703 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
1704 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4
1705 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
1706 ; VI-GISEL-NEXT: s_and_b32 s6, s1, 0xfffff000
1707 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2
1708 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
1709 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
1710 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s6
1711 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, s1, v1
1712 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v1
1713 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
1714 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
1715 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3
1716 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v7, v1
1717 ; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6
1718 ; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7
1719 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1
1720 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7
1721 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
1722 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4
1723 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1724 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
1725 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000
1726 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v6
1727 ; VI-GISEL-NEXT: v_mov_b32_e32 v6, s0
1728 ; VI-GISEL-NEXT: v_sub_f32_e32 v6, s2, v6
1729 ; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v6
1730 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6
1731 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, s0, v2
1732 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v9
1733 ; VI-GISEL-NEXT: v_mul_f32_e32 v9, s0, v3
1734 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v9, v6
1735 ; VI-GISEL-NEXT: v_rndne_f32_e32 v9, v8
1736 ; VI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v9
1737 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v8, v6
1738 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9
1739 ; VI-GISEL-NEXT: v_exp_f32_e32 v6, v6
1740 ; VI-GISEL-NEXT: s_and_b32 s0, s3, 0xfffff000
1741 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, s0, v2
1742 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, s0, v3
1743 ; VI-GISEL-NEXT: v_ldexp_f32 v6, v6, v8
1744 ; VI-GISEL-NEXT: v_mov_b32_e32 v8, s0
1745 ; VI-GISEL-NEXT: v_sub_f32_e32 v8, s3, v8
1746 ; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v8
1747 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3fb8a000, v8
1748 ; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
1749 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v8
1750 ; VI-GISEL-NEXT: v_rndne_f32_e32 v8, v2
1751 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
1752 ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000
1753 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
1754 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
1755 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4
1756 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v8
1757 ; VI-GISEL-NEXT: v_exp_f32_e32 v8, v2
1758 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1759 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5
1760 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1761 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4
1762 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
1763 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5
1764 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
1765 ; VI-GISEL-NEXT: v_ldexp_f32 v3, v8, v3
1766 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4
1767 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
1768 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5
1769 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4
1770 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
1771 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
1772 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
1773 ; VI-GISEL-NEXT: s_endpgm
1775 ; GFX900-SDAG-LABEL: s_exp_v4f32:
1776 ; GFX900-SDAG: ; %bb.0:
1777 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
1778 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
1779 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
1780 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
1781 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
1782 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1783 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0
1784 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
1785 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2
1786 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
1787 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4
1788 ; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
1789 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
1790 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
1791 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5
1792 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x42b17218
1793 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v9, 0x7f800000
1794 ; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3
1795 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s2, v0
1796 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v3
1797 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v3, v7
1798 ; GFX900-SDAG-NEXT: v_fma_f32 v3, s2, v0, -v3
1799 ; GFX900-SDAG-NEXT: v_fma_f32 v3, s2, v1, v3
1800 ; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v8, v3
1801 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v8, v3
1802 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
1803 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1804 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v6
1805 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, v9, v2, vcc
1806 ; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v8, v7
1807 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s1, v0
1808 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v8, v7
1809 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v8
1810 ; GFX900-SDAG-NEXT: v_fma_f32 v7, s1, v0, -v7
1811 ; GFX900-SDAG-NEXT: v_fma_f32 v7, s1, v1, v7
1812 ; GFX900-SDAG-NEXT: v_add_f32_e32 v7, v10, v7
1813 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v7, v7
1814 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v8
1815 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5
1816 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1817 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v6
1818 ; GFX900-SDAG-NEXT: v_ldexp_f32 v7, v7, v8
1819 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v8, s0, v0
1820 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v10, v8
1821 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s0, v0, -v8
1822 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v11, v8, v10
1823 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s0, v1, v0
1824 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v11, v0
1825 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
1826 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v10
1827 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
1828 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v5
1829 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
1830 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v6
1831 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
1832 ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v8
1833 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v5
1834 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
1835 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v6
1836 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
1837 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc
1838 ; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
1839 ; GFX900-SDAG-NEXT: s_endpgm
1841 ; GFX900-GISEL-LABEL: s_exp_v4f32:
1842 ; GFX900-GISEL: ; %bb.0:
1843 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
1844 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
1845 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b
1846 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
1847 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218
1848 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1849 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v2
1850 ; GFX900-GISEL-NEXT: v_fma_f32 v1, s0, v2, -v0
1851 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v0
1852 ; GFX900-GISEL-NEXT: v_fma_f32 v1, s0, v3, v1
1853 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
1854 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1855 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4
1856 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
1857 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
1858 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4
1859 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
1860 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2
1861 ; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v2, -v1
1862 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v7, v1
1863 ; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v3, v6
1864 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v7
1865 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
1866 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7
1867 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
1868 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1869 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000
1870 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
1871 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v6
1872 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2
1873 ; GFX900-GISEL-NEXT: v_fma_f32 v8, s2, v2, -v6
1874 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v9, v6
1875 ; GFX900-GISEL-NEXT: v_fma_f32 v8, s2, v3, v8
1876 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v6, v6, v9
1877 ; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
1878 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9
1879 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6
1880 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
1881 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4
1882 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1883 ; GFX900-GISEL-NEXT: v_ldexp_f32 v6, v6, v8
1884 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2
1885 ; GFX900-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8
1886 ; GFX900-GISEL-NEXT: v_fma_f32 v2, s3, v3, v2
1887 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v8
1888 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3
1889 ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v8, v2
1890 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
1891 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v8, v2
1892 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5
1893 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1894 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4
1895 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
1896 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5
1897 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
1898 ; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v8, v3
1899 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4
1900 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
1901 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5
1902 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
1903 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
1904 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
1905 ; GFX900-GISEL-NEXT: s_endpgm
1907 ; SI-SDAG-LABEL: s_exp_v4f32:
1909 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd
1910 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
1911 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
1912 ; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218
1913 ; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
1914 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1915 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0
1916 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
1917 ; SI-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2
1918 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
1919 ; SI-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4
1920 ; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
1921 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
1922 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
1923 ; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
1924 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4
1925 ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
1926 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3
1927 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, s2, v0
1928 ; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v3
1929 ; SI-SDAG-NEXT: v_sub_f32_e32 v7, v3, v6
1930 ; SI-SDAG-NEXT: v_fma_f32 v3, s2, v0, -v3
1931 ; SI-SDAG-NEXT: v_fma_f32 v3, s2, v1, v3
1932 ; SI-SDAG-NEXT: v_add_f32_e32 v3, v7, v3
1933 ; SI-SDAG-NEXT: v_exp_f32_e32 v7, v3
1934 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6
1935 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1936 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v5
1937 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc
1938 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v7, v6
1939 ; SI-SDAG-NEXT: v_mul_f32_e32 v6, s1, v0
1940 ; SI-SDAG-NEXT: v_rndne_f32_e32 v7, v6
1941 ; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v7
1942 ; SI-SDAG-NEXT: v_fma_f32 v6, s1, v0, -v6
1943 ; SI-SDAG-NEXT: v_fma_f32 v6, s1, v1, v6
1944 ; SI-SDAG-NEXT: v_add_f32_e32 v6, v9, v6
1945 ; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6
1946 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
1947 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4
1948 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1949 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v5
1950 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v6, v6, v7
1951 ; SI-SDAG-NEXT: v_mul_f32_e32 v7, s0, v0
1952 ; SI-SDAG-NEXT: v_rndne_f32_e32 v9, v7
1953 ; SI-SDAG-NEXT: v_fma_f32 v0, s0, v0, -v7
1954 ; SI-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9
1955 ; SI-SDAG-NEXT: v_fma_f32 v0, s0, v1, v0
1956 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v10, v0
1957 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
1958 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9
1959 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
1960 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4
1961 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc
1962 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v5
1963 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
1964 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v7
1965 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4
1966 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
1967 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v5
1968 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
1969 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
1970 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
1971 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1972 ; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1973 ; SI-SDAG-NEXT: s_endpgm
1975 ; SI-GISEL-LABEL: s_exp_v4f32:
1976 ; SI-GISEL: ; %bb.0:
1977 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd
1978 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b
1979 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
1980 ; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218
1981 ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
1982 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1983 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v2
1984 ; SI-GISEL-NEXT: v_fma_f32 v1, s0, v2, -v0
1985 ; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v0
1986 ; SI-GISEL-NEXT: v_fma_f32 v1, s0, v3, v1
1987 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
1988 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1989 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4
1990 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
1991 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
1992 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4
1993 ; SI-GISEL-NEXT: s_mov_b32 s6, -1
1994 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
1995 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2
1996 ; SI-GISEL-NEXT: v_fma_f32 v6, s1, v2, -v1
1997 ; SI-GISEL-NEXT: v_rndne_f32_e32 v7, v1
1998 ; SI-GISEL-NEXT: v_fma_f32 v6, s1, v3, v6
1999 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v7
2000 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
2001 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7
2002 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2003 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
2004 ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000
2005 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
2006 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v6
2007 ; SI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2
2008 ; SI-GISEL-NEXT: v_fma_f32 v8, s2, v2, -v6
2009 ; SI-GISEL-NEXT: v_rndne_f32_e32 v9, v6
2010 ; SI-GISEL-NEXT: v_fma_f32 v8, s2, v3, v8
2011 ; SI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v9
2012 ; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
2013 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9
2014 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6
2015 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
2016 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4
2017 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
2018 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v6, v6, v8
2019 ; SI-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2
2020 ; SI-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8
2021 ; SI-GISEL-NEXT: v_fma_f32 v2, s3, v3, v2
2022 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v8
2023 ; SI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3
2024 ; SI-GISEL-NEXT: v_add_f32_e32 v2, v8, v2
2025 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
2026 ; SI-GISEL-NEXT: v_exp_f32_e32 v8, v2
2027 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5
2028 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
2029 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4
2030 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
2031 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5
2032 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
2033 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v8, v3
2034 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4
2035 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
2036 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5
2037 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
2038 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
2039 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2040 ; SI-GISEL-NEXT: s_endpgm
2042 ; R600-LABEL: s_exp_v4f32:
2044 ; R600-NEXT: ALU 98, @6, KC0[CB0:0-32], KC1[]
2045 ; R600-NEXT: ALU 95, @105, KC0[CB0:0-32], KC1[]
2046 ; R600-NEXT: ALU 24, @201, KC0[CB0:0-32], KC1[]
2047 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
2050 ; R600-NEXT: ALU clause starting at 6:
2051 ; R600-NEXT: AND_INT * T0.W, KC0[3].Z, literal.x,
2052 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
2053 ; R600-NEXT: ADD * T1.W, KC0[3].Z, -PV.W,
2054 ; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.x,
2055 ; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.y,
2056 ; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
2057 ; R600-NEXT: RNDNE T4.W, PS,
2058 ; R600-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.W, BS:VEC_021/SCL_122
2059 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
2060 ; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PS,
2061 ; R600-NEXT: ADD * T1.W, T3.W, -PV.W,
2062 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
2063 ; R600-NEXT: ADD T0.W, PS, PV.W,
2064 ; R600-NEXT: TRUNC * T1.W, T4.W,
2065 ; R600-NEXT: FLT_TO_INT T1.W, PS,
2066 ; R600-NEXT: EXP_IEEE * T0.X, PV.W,
2067 ; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x,
2068 ; R600-NEXT: MAX_INT T0.W, PV.W, literal.y,
2069 ; R600-NEXT: MIN_INT * T2.W, PV.W, literal.z,
2070 ; R600-NEXT: 209715200(1.972152e-31), -330(nan)
2071 ; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
2072 ; R600-NEXT: ADD_INT T1.X, PS, literal.x,
2073 ; R600-NEXT: AND_INT T0.Y, KC0[4].X, literal.y,
2074 ; R600-NEXT: ADD_INT T1.Z, PV.W, literal.z,
2075 ; R600-NEXT: ADD_INT * T0.W, T1.W, literal.w,
2076 ; R600-NEXT: -254(nan), -4096(nan)
2077 ; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
2078 ; R600-NEXT: SETGT_UINT * T2.W, T1.W, literal.x,
2079 ; R600-NEXT: -229(nan), 0(0.000000e+00)
2080 ; R600-NEXT: ADD_INT T2.X, T1.W, literal.x,
2081 ; R600-NEXT: SETGT_UINT T1.Y, T1.W, literal.y,
2082 ; R600-NEXT: CNDE_INT T1.Z, PV.W, T1.Z, T0.W,
2083 ; R600-NEXT: SETGT_INT T0.W, T1.W, literal.x,
2084 ; R600-NEXT: ADD * T3.W, KC0[4].X, -T0.Y,
2085 ; R600-NEXT: -127(nan), 254(3.559298e-43)
2086 ; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
2087 ; R600-NEXT: MUL_IEEE T2.Y, T0.Y, literal.y,
2088 ; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T1.W,
2089 ; R600-NEXT: CNDE_INT T4.W, PV.Y, PV.X, T1.X,
2090 ; R600-NEXT: SETGT_INT * T1.W, T1.W, literal.z,
2091 ; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
2092 ; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
2093 ; R600-NEXT: CNDE_INT T1.X, PS, PV.Z, PV.W,
2094 ; R600-NEXT: RNDNE T3.Y, PV.Y,
2095 ; R600-NEXT: MULADD_IEEE T1.Z, T3.W, literal.x, PV.X,
2096 ; R600-NEXT: MUL_IEEE T3.W, T0.Z, literal.y,
2097 ; R600-NEXT: MUL_IEEE * T4.W, T0.X, literal.z,
2098 ; R600-NEXT: 1069064192(1.442383e+00), 209715200(1.972152e-31)
2099 ; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
2100 ; R600-NEXT: MUL_IEEE T2.X, PS, literal.x,
2101 ; R600-NEXT: CNDE_INT T4.Y, T2.W, PV.W, T0.Z,
2102 ; R600-NEXT: MULADD_IEEE T0.Z, T0.Y, literal.y, PV.Z,
2103 ; R600-NEXT: ADD T2.W, T2.Y, -PV.Y, BS:VEC_120/SCL_212
2104 ; R600-NEXT: AND_INT * T3.W, KC0[3].Y, literal.z,
2105 ; R600-NEXT: 2130706432(1.701412e+38), 967029397(3.122284e-04)
2106 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
2107 ; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
2108 ; R600-NEXT: ADD T0.Y, PV.W, PV.Z,
2109 ; R600-NEXT: CNDE_INT T0.Z, T0.W, PV.Y, T0.X, BS:VEC_021/SCL_122
2110 ; R600-NEXT: CNDE_INT T0.W, T1.Y, T4.W, PV.X,
2111 ; R600-NEXT: LSHL * T2.W, T1.X, literal.y,
2112 ; R600-NEXT: 1069064192(1.442383e+00), 23(3.222986e-44)
2113 ; R600-NEXT: AND_INT T0.X, KC0[3].W, literal.x,
2114 ; R600-NEXT: TRUNC T1.Y, T3.Y,
2115 ; R600-NEXT: ADD_INT T1.Z, PS, literal.y,
2116 ; R600-NEXT: CNDE_INT T0.W, T1.W, PV.Z, PV.W,
2117 ; R600-NEXT: EXP_IEEE * T0.Y, PV.Y,
2118 ; R600-NEXT: -4096(nan), 1065353216(1.000000e+00)
2119 ; R600-NEXT: MUL_IEEE T1.X, PV.W, PV.Z,
2120 ; R600-NEXT: FLT_TO_INT T1.Y, PV.Y,
2121 ; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x,
2122 ; R600-NEXT: ADD T0.W, KC0[3].W, -PV.X,
2123 ; R600-NEXT: RNDNE * T1.W, T3.X,
2124 ; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
2125 ; R600-NEXT: SETGT T2.X, literal.x, KC0[3].Z,
2126 ; R600-NEXT: TRUNC T2.Y, PS,
2127 ; R600-NEXT: MUL_IEEE T1.Z, PV.W, literal.y,
2128 ; R600-NEXT: MUL_IEEE T2.W, PV.Z, literal.z,
2129 ; R600-NEXT: MAX_INT * T4.W, PV.Y, literal.w,
2130 ; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04)
2131 ; R600-NEXT: 209715200(1.972152e-31), -330(nan)
2132 ; R600-NEXT: ADD T4.X, KC0[3].Y, -T3.W,
2133 ; R600-NEXT: ADD_INT T3.Y, PS, literal.x,
2134 ; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.y,
2135 ; R600-NEXT: SETGT_UINT T4.W, T1.Y, literal.z,
2136 ; R600-NEXT: MIN_INT * T5.W, T1.Y, literal.w,
2137 ; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
2138 ; R600-NEXT: -229(nan), 381(5.338947e-43)
2139 ; R600-NEXT: ADD_INT T5.X, PS, literal.x,
2140 ; R600-NEXT: ADD_INT T4.Y, T1.Y, literal.y,
2141 ; R600-NEXT: SETGT_UINT T3.Z, T1.Y, literal.z,
2142 ; R600-NEXT: CNDE_INT T5.W, PV.W, PV.Y, PV.Z,
2143 ; R600-NEXT: SETGT_INT * T6.W, T1.Y, literal.y,
2144 ; R600-NEXT: -254(nan), -127(nan)
2145 ; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
2146 ; R600-NEXT: MUL_IEEE T6.X, T0.Y, literal.x,
2147 ; R600-NEXT: CNDE_INT T3.Y, PS, PV.W, T1.Y,
2148 ; R600-NEXT: CNDE_INT * T2.Z, PV.Z, PV.Y, PV.X,
2149 ; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
2150 ; R600-NEXT: ALU clause starting at 105:
2151 ; R600-NEXT: SETGT_INT T5.W, T1.Y, literal.x,
2152 ; R600-NEXT: MUL_IEEE * T7.W, T4.X, literal.y,
2153 ; R600-NEXT: 127(1.779649e-43), 967029397(3.122284e-04)
2154 ; R600-NEXT: MUL_IEEE T5.X, T0.X, literal.x,
2155 ; R600-NEXT: MULADD_IEEE T1.Y, T4.X, literal.x, PS, BS:VEC_120/SCL_212
2156 ; R600-NEXT: CNDE_INT T2.Z, PV.W, T3.Y, T2.Z,
2157 ; R600-NEXT: MUL_IEEE T7.W, T6.X, literal.y, BS:VEC_201
2158 ; R600-NEXT: CNDE_INT * T2.W, T4.W, T2.W, T0.Z,
2159 ; R600-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38)
2160 ; R600-NEXT: CNDE_INT T4.X, T6.W, PS, T0.Y,
2161 ; R600-NEXT: CNDE_INT T0.Y, T3.Z, T6.X, PV.W,
2162 ; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
2163 ; R600-NEXT: MULADD_IEEE T2.W, T3.W, literal.y, PV.Y, BS:VEC_201
2164 ; R600-NEXT: ADD * T1.W, T3.X, -T1.W,
2165 ; R600-NEXT: 23(3.222986e-44), 967029397(3.122284e-04)
2166 ; R600-NEXT: ADD T3.X, PS, PV.W,
2167 ; R600-NEXT: ADD_INT T1.Y, PV.Z, literal.x,
2168 ; R600-NEXT: CNDE_INT T0.Z, T5.W, PV.X, PV.Y,
2169 ; R600-NEXT: RNDNE T1.W, T5.X,
2170 ; R600-NEXT: MULADD_IEEE * T0.W, T0.W, literal.y, T1.Z, BS:VEC_021/SCL_122
2171 ; R600-NEXT: 1065353216(1.000000e+00), 1069064192(1.442383e+00)
2172 ; R600-NEXT: MULADD_IEEE T0.X, T0.X, literal.x, PS,
2173 ; R600-NEXT: ADD T0.Y, T5.X, -PV.W, BS:VEC_120/SCL_212
2174 ; R600-NEXT: MUL_IEEE T0.Z, PV.Z, PV.Y,
2175 ; R600-NEXT: SETGT T0.W, literal.y, KC0[4].X,
2176 ; R600-NEXT: EXP_IEEE * T1.Y, PV.X,
2177 ; R600-NEXT: 967029397(3.122284e-04), -1026650416(-1.032789e+02)
2178 ; R600-NEXT: CNDE T3.X, PV.W, PV.Z, 0.0,
2179 ; R600-NEXT: ADD T0.Y, PV.Y, PV.X,
2180 ; R600-NEXT: FLT_TO_INT T0.Z, T2.Y,
2181 ; R600-NEXT: TRUNC T0.W, T1.W,
2182 ; R600-NEXT: MUL_IEEE * T1.W, PS, literal.x,
2183 ; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
2184 ; R600-NEXT: SETGT T0.X, KC0[4].X, literal.x,
2185 ; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
2186 ; R600-NEXT: FLT_TO_INT T1.Z, PV.W,
2187 ; R600-NEXT: MAX_INT T0.W, PV.Z, literal.z,
2188 ; R600-NEXT: EXP_IEEE * T0.Y, PV.Y,
2189 ; R600-NEXT: 1118925336(8.872284e+01), 209715200(1.972152e-31)
2190 ; R600-NEXT: -330(nan), 0(0.000000e+00)
2191 ; R600-NEXT: MUL_IEEE T4.X, T1.Y, literal.x,
2192 ; R600-NEXT: MUL_IEEE T3.Y, PS, literal.y,
2193 ; R600-NEXT: ADD_INT T2.Z, PV.W, literal.z,
2194 ; R600-NEXT: ADD_INT * T0.W, T0.Z, literal.w,
2195 ; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
2196 ; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
2197 ; R600-NEXT: MAX_INT * T2.W, T1.Z, literal.x,
2198 ; R600-NEXT: -330(nan), 0(0.000000e+00)
2199 ; R600-NEXT: SETGT_UINT T5.X, T0.Z, literal.x,
2200 ; R600-NEXT: ADD_INT T4.Y, PV.W, literal.y,
2201 ; R600-NEXT: ADD_INT T3.Z, T1.Z, literal.z, BS:VEC_120/SCL_212
2202 ; R600-NEXT: SETGT_UINT T2.W, T1.Z, literal.x, BS:VEC_120/SCL_212
2203 ; R600-NEXT: MIN_INT * T3.W, T1.Z, literal.w,
2204 ; R600-NEXT: -229(nan), 204(2.858649e-43)
2205 ; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43)
2206 ; R600-NEXT: ADD_INT T6.X, PS, literal.x,
2207 ; R600-NEXT: ADD_INT T5.Y, T1.Z, literal.y,
2208 ; R600-NEXT: SETGT_UINT T4.Z, T1.Z, literal.z,
2209 ; R600-NEXT: CNDE_INT T3.W, PV.W, PV.Y, PV.Z,
2210 ; R600-NEXT: SETGT_INT * T4.W, T1.Z, literal.y,
2211 ; R600-NEXT: -254(nan), -127(nan)
2212 ; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
2213 ; R600-NEXT: CNDE_INT T7.X, PS, PV.W, T1.Z, BS:VEC_021/SCL_122
2214 ; R600-NEXT: CNDE_INT T4.Y, PV.Z, PV.Y, PV.X,
2215 ; R600-NEXT: SETGT_INT T1.Z, T1.Z, literal.x, BS:VEC_120/SCL_212
2216 ; R600-NEXT: CNDE_INT T0.W, T5.X, T2.Z, T0.W, BS:VEC_102/SCL_221
2217 ; R600-NEXT: SETGT_INT * T3.W, T0.Z, literal.y,
2218 ; R600-NEXT: 127(1.779649e-43), -127(nan)
2219 ; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T0.Z,
2220 ; R600-NEXT: CNDE_INT T4.Y, PV.Z, PV.X, PV.Y,
2221 ; R600-NEXT: MIN_INT T2.Z, T0.Z, literal.x,
2222 ; R600-NEXT: MUL_IEEE T0.W, T3.Y, literal.y,
2223 ; R600-NEXT: MUL_IEEE * T5.W, T0.Y, literal.z,
2224 ; R600-NEXT: 381(5.338947e-43), 209715200(1.972152e-31)
2225 ; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
2226 ; R600-NEXT: MUL_IEEE T7.X, PS, literal.x,
2227 ; R600-NEXT: CNDE_INT T3.Y, T2.W, PV.W, T3.Y,
2228 ; R600-NEXT: ADD_INT T2.Z, PV.Z, literal.y,
2229 ; R600-NEXT: ADD_INT T0.W, T0.Z, literal.z,
2230 ; R600-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w,
2231 ; R600-NEXT: 2130706432(1.701412e+38), -254(nan)
2232 ; R600-NEXT: -127(nan), 254(3.559298e-43)
2233 ; R600-NEXT: CNDE_INT T8.X, PS, PV.W, PV.Z,
2234 ; R600-NEXT: SETGT_INT T5.Y, T0.Z, literal.x,
2235 ; R600-NEXT: CNDE_INT T0.Z, T4.W, PV.Y, T0.Y, BS:VEC_021/SCL_122
2236 ; R600-NEXT: CNDE_INT T0.W, T4.Z, T5.W, PV.X, BS:VEC_120/SCL_212
2237 ; R600-NEXT: LSHL * T4.W, T4.Y, literal.y,
2238 ; R600-NEXT: 127(1.779649e-43), 23(3.222986e-44)
2239 ; R600-NEXT: ADD_INT T7.X, PS, literal.x,
2240 ; R600-NEXT: CNDE_INT T0.Y, T1.Z, PV.Z, PV.W,
2241 ; R600-NEXT: CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
2242 ; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.y,
2243 ; R600-NEXT: CNDE_INT * T1.W, T5.X, T2.Y, T1.W,
2244 ; R600-NEXT: 1065353216(1.000000e+00), 2130706432(1.701412e+38)
2245 ; R600-NEXT: CNDE_INT T5.X, T3.W, PS, T1.Y,
2246 ; R600-NEXT: CNDE_INT * T1.Y, T2.W, T4.X, PV.W, BS:VEC_120/SCL_212
2247 ; R600-NEXT: ALU clause starting at 201:
2248 ; R600-NEXT: LSHL T0.Z, T0.Z, literal.x,
2249 ; R600-NEXT: MUL_IEEE T0.W, T0.Y, T7.X,
2250 ; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
2251 ; R600-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02)
2252 ; R600-NEXT: CNDE T4.X, PS, PV.W, 0.0,
2253 ; R600-NEXT: SETGT T0.Y, KC0[3].W, literal.x,
2254 ; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
2255 ; R600-NEXT: CNDE_INT T0.W, T5.Y, T5.X, T1.Y, BS:VEC_102/SCL_221
2256 ; R600-NEXT: CNDE * T1.W, T0.X, T3.X, literal.z,
2257 ; R600-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00)
2258 ; R600-NEXT: 2139095040(INF), 0(0.000000e+00)
2259 ; R600-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
2260 ; R600-NEXT: SETGT T2.Y, literal.x, KC0[3].Y,
2261 ; R600-NEXT: CNDE T1.Z, PV.Y, PV.X, literal.y,
2262 ; R600-NEXT: CNDE T0.W, T2.X, T1.X, 0.0,
2263 ; R600-NEXT: SETGT * T2.W, KC0[3].Z, literal.z,
2264 ; R600-NEXT: -1026650416(-1.032789e+02), 2139095040(INF)
2265 ; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
2266 ; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x,
2267 ; R600-NEXT: CNDE T0.W, PV.Y, PV.X, 0.0,
2268 ; R600-NEXT: SETGT * T2.W, KC0[3].Y, literal.y,
2269 ; R600-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
2270 ; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x,
2271 ; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y,
2272 ; R600-NEXT: 2139095040(INF), 2(2.802597e-45)
2274 ; CM-LABEL: s_exp_v4f32:
2276 ; CM-NEXT: ALU 97, @6, KC0[CB0:0-32], KC1[]
2277 ; CM-NEXT: ALU 97, @104, KC0[CB0:0-32], KC1[]
2278 ; CM-NEXT: ALU 35, @202, KC0[CB0:0-32], KC1[]
2279 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
2282 ; CM-NEXT: ALU clause starting at 6:
2283 ; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
2284 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
2285 ; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W,
2286 ; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
2287 ; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y,
2288 ; CM-NEXT: AND_INT * T2.W, KC0[3].W, literal.z,
2289 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
2290 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
2291 ; CM-NEXT: ADD T1.Y, KC0[3].W, -PV.W,
2292 ; CM-NEXT: RNDNE T1.Z, PV.Z,
2293 ; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y,
2294 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
2295 ; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
2296 ; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
2297 ; CM-NEXT: MUL_IEEE T0.Z, T2.W, literal.y, BS:VEC_120/SCL_212
2298 ; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.x,
2299 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
2300 ; CM-NEXT: TRUNC T1.X, T1.Z,
2301 ; CM-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.x, PV.W,
2302 ; CM-NEXT: RNDNE T1.Z, PV.Z,
2303 ; CM-NEXT: ADD * T0.W, PV.Y, PV.X,
2304 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
2305 ; CM-NEXT: EXP_IEEE T0.X, T0.W,
2306 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
2307 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
2308 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
2309 ; CM-NEXT: TRUNC T2.X, T1.Z,
2310 ; CM-NEXT: MULADD_IEEE T0.Y, T2.W, literal.x, T1.Y,
2311 ; CM-NEXT: FLT_TO_INT T2.Z, T1.X,
2312 ; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
2313 ; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
2314 ; CM-NEXT: ADD T1.X, T0.Z, -T1.Z,
2315 ; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.x,
2316 ; CM-NEXT: MAX_INT T0.Z, PV.Z, literal.y,
2317 ; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z,
2318 ; CM-NEXT: 209715200(1.972152e-31), -330(nan)
2319 ; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
2320 ; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
2321 ; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y,
2322 ; CM-NEXT: ADD_INT T0.Z, T2.Z, literal.z,
2323 ; CM-NEXT: SETGT_UINT * T1.W, T2.Z, literal.w,
2324 ; CM-NEXT: -254(nan), 204(2.858649e-43)
2325 ; CM-NEXT: 102(1.429324e-43), -229(nan)
2326 ; CM-NEXT: ADD_INT T4.X, T2.Z, literal.x,
2327 ; CM-NEXT: SETGT_UINT T3.Y, T2.Z, literal.y,
2328 ; CM-NEXT: CNDE_INT T0.Z, PV.W, PV.Y, PV.Z,
2329 ; CM-NEXT: SETGT_INT * T2.W, T2.Z, literal.x,
2330 ; CM-NEXT: -127(nan), 254(3.559298e-43)
2331 ; CM-NEXT: MUL_IEEE T5.X, T0.X, literal.x,
2332 ; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Z, T2.Z,
2333 ; CM-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T3.X,
2334 ; CM-NEXT: SETGT_INT * T3.W, T2.Z, literal.y,
2335 ; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43)
2336 ; CM-NEXT: AND_INT T3.X, KC0[3].Z, literal.x,
2337 ; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Y, PV.Z,
2338 ; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.y,
2339 ; CM-NEXT: CNDE_INT * T0.W, T1.W, T1.Y, T0.W,
2340 ; CM-NEXT: -4096(nan), 2130706432(1.701412e+38)
2341 ; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X,
2342 ; CM-NEXT: CNDE_INT T1.Y, T3.Y, T5.X, PV.Z,
2343 ; CM-NEXT: LSHL T0.Z, PV.Y, literal.x,
2344 ; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
2345 ; CM-NEXT: 23(3.222986e-44), 1069064192(1.442383e+00)
2346 ; CM-NEXT: RNDNE T4.X, PV.W,
2347 ; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x,
2348 ; CM-NEXT: CNDE_INT T0.Z, T3.W, PV.X, PV.Y,
2349 ; CM-NEXT: ADD * T1.W, T1.X, T0.Y,
2350 ; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
2351 ; CM-NEXT: EXP_IEEE T0.X, T1.W,
2352 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
2353 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
2354 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
2355 ; CM-NEXT: MUL_IEEE T1.X, T0.Z, T2.Y,
2356 ; CM-NEXT: TRUNC T0.Y, T4.X,
2357 ; CM-NEXT: FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
2358 ; CM-NEXT: MUL_IEEE * T1.W, PV.X, literal.x,
2359 ; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
2360 ; CM-NEXT: MUL_IEEE T2.X, PV.W, literal.x,
2361 ; CM-NEXT: MUL_IEEE T1.Y, T0.X, literal.y,
2362 ; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.z,
2363 ; CM-NEXT: MIN_INT * T2.W, PV.Z, literal.w,
2364 ; CM-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
2365 ; CM-NEXT: -330(nan), 381(5.338947e-43)
2366 ; CM-NEXT: ADD_INT T5.X, PV.W, literal.x,
2367 ; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y,
2368 ; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
2369 ; CM-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w,
2370 ; CM-NEXT: -254(nan), 204(2.858649e-43)
2371 ; CM-NEXT: 102(1.429324e-43), -229(nan)
2372 ; CM-NEXT: ADD_INT T6.X, T0.Z, literal.x,
2373 ; CM-NEXT: SETGT_UINT T3.Y, T0.Z, literal.y,
2374 ; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
2375 ; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.x,
2376 ; CM-NEXT: -127(nan), 254(3.559298e-43)
2377 ; CM-NEXT: CNDE_INT T7.X, PV.W, PV.Z, T0.Z,
2378 ; CM-NEXT: CNDE_INT T2.Y, PV.Y, PV.X, T5.X,
2379 ; CM-NEXT: SETGT_INT * T0.Z, T0.Z, literal.x,
2380 ; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
2381 ; CM-NEXT: ALU clause starting at 104:
2382 ; CM-NEXT: ADD * T4.W, KC0[3].Z, -T3.X,
2383 ; CM-NEXT: MUL_IEEE T5.X, PV.W, literal.x,
2384 ; CM-NEXT: CNDE_INT T2.Y, T0.Z, T7.X, T2.Y,
2385 ; CM-NEXT: MUL_IEEE T1.Z, T1.Y, literal.y,
2386 ; CM-NEXT: CNDE_INT * T1.W, T2.W, T2.X, T1.W, BS:VEC_021/SCL_122
2387 ; CM-NEXT: 967029397(3.122284e-04), 2130706432(1.701412e+38)
2388 ; CM-NEXT: CNDE_INT T0.X, T3.W, PV.W, T0.X,
2389 ; CM-NEXT: CNDE_INT T1.Y, T3.Y, T1.Y, PV.Z,
2390 ; CM-NEXT: LSHL T1.Z, PV.Y, literal.x,
2391 ; CM-NEXT: MULADD_IEEE * T1.W, T4.W, literal.y, PV.X, BS:VEC_120/SCL_212
2392 ; CM-NEXT: 23(3.222986e-44), 1069064192(1.442383e+00)
2393 ; CM-NEXT: MULADD_IEEE T2.X, T3.X, literal.x, PV.W,
2394 ; CM-NEXT: ADD T2.Y, T0.W, -T4.X,
2395 ; CM-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
2396 ; CM-NEXT: CNDE_INT * T0.W, T0.Z, PV.X, PV.Y,
2397 ; CM-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00)
2398 ; CM-NEXT: AND_INT T0.X, KC0[4].X, literal.x,
2399 ; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z,
2400 ; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W,
2401 ; CM-NEXT: ADD * T0.W, PV.Y, PV.X,
2402 ; CM-NEXT: -4096(nan), -1026650416(-1.032789e+02)
2403 ; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
2404 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
2405 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
2406 ; CM-NEXT: EXP_IEEE * T0.W, T0.W,
2407 ; CM-NEXT: CNDE T2.X, T0.Z, T1.Y, 0.0,
2408 ; CM-NEXT: ADD T1.Y, KC0[4].X, -T0.X,
2409 ; CM-NEXT: FLT_TO_INT T0.Z, T0.Y,
2410 ; CM-NEXT: MUL_IEEE * T1.W, PV.W, literal.x,
2411 ; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
2412 ; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
2413 ; CM-NEXT: SETGT_UINT T0.Y, PV.Z, literal.y,
2414 ; CM-NEXT: MUL_IEEE T1.Z, PV.Y, literal.z,
2415 ; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.w,
2416 ; CM-NEXT: 209715200(1.972152e-31), -229(nan)
2417 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
2418 ; CM-NEXT: RNDNE T4.X, PV.W,
2419 ; CM-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.x, PV.Z,
2420 ; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.W,
2421 ; CM-NEXT: SETGT_INT * T1.W, T0.Z, literal.y,
2422 ; CM-NEXT: 1069064192(1.442383e+00), -127(nan)
2423 ; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, T0.W,
2424 ; CM-NEXT: MULADD_IEEE T1.Y, T0.X, literal.x, PV.Y,
2425 ; CM-NEXT: ADD T1.Z, T2.W, -PV.X,
2426 ; CM-NEXT: MAX_INT * T2.W, T0.Z, literal.y,
2427 ; CM-NEXT: 967029397(3.122284e-04), -330(nan)
2428 ; CM-NEXT: ADD_INT T0.X, PV.W, literal.x,
2429 ; CM-NEXT: ADD_INT T2.Y, T0.Z, literal.y,
2430 ; CM-NEXT: TRUNC T2.Z, T4.X,
2431 ; CM-NEXT: ADD * T2.W, PV.Z, PV.Y,
2432 ; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
2433 ; CM-NEXT: EXP_IEEE T1.X (MASKED), T2.W,
2434 ; CM-NEXT: EXP_IEEE T1.Y, T2.W,
2435 ; CM-NEXT: EXP_IEEE T1.Z (MASKED), T2.W,
2436 ; CM-NEXT: EXP_IEEE * T1.W (MASKED), T2.W,
2437 ; CM-NEXT: MUL_IEEE T4.X, T0.W, literal.x,
2438 ; CM-NEXT: FLT_TO_INT T3.Y, T2.Z,
2439 ; CM-NEXT: MUL_IEEE T1.Z, PV.Y, literal.y,
2440 ; CM-NEXT: CNDE_INT * T0.W, T0.Y, T0.X, T2.Y,
2441 ; CM-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
2442 ; CM-NEXT: CNDE_INT T0.X, T1.W, PV.W, T0.Z,
2443 ; CM-NEXT: MUL_IEEE T0.Y, PV.Z, literal.x,
2444 ; CM-NEXT: MAX_INT T2.Z, PV.Y, literal.y,
2445 ; CM-NEXT: MIN_INT * T0.W, PV.Y, literal.z,
2446 ; CM-NEXT: 209715200(1.972152e-31), -330(nan)
2447 ; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
2448 ; CM-NEXT: ADD_INT T5.X, PV.W, literal.x,
2449 ; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y,
2450 ; CM-NEXT: ADD_INT T2.Z, T3.Y, literal.z,
2451 ; CM-NEXT: SETGT_UINT * T0.W, T3.Y, literal.w,
2452 ; CM-NEXT: -254(nan), 204(2.858649e-43)
2453 ; CM-NEXT: 102(1.429324e-43), -229(nan)
2454 ; CM-NEXT: ADD_INT T6.X, T3.Y, literal.x,
2455 ; CM-NEXT: SETGT_UINT T4.Y, T3.Y, literal.y,
2456 ; CM-NEXT: CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
2457 ; CM-NEXT: SETGT_INT * T1.W, T3.Y, literal.x,
2458 ; CM-NEXT: -127(nan), 254(3.559298e-43)
2459 ; CM-NEXT: MUL_IEEE T7.X, T1.Y, literal.x,
2460 ; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Z, T3.Y,
2461 ; CM-NEXT: CNDE_INT T2.Z, PV.Y, PV.X, T5.X,
2462 ; CM-NEXT: MIN_INT * T2.W, T0.Z, literal.y,
2463 ; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
2464 ; CM-NEXT: SETGT_INT T5.X, T3.Y, literal.x,
2465 ; CM-NEXT: ADD_INT T3.Y, PV.W, literal.y,
2466 ; CM-NEXT: ADD_INT T3.Z, T0.Z, literal.z,
2467 ; CM-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w,
2468 ; CM-NEXT: 127(1.779649e-43), -254(nan)
2469 ; CM-NEXT: -127(nan), 254(3.559298e-43)
2470 ; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, PV.Y,
2471 ; CM-NEXT: CNDE_INT T2.Y, PV.X, T2.Y, T2.Z,
2472 ; CM-NEXT: MUL_IEEE T2.Z, T7.X, literal.x,
2473 ; CM-NEXT: CNDE_INT * T0.W, T0.W, T0.Y, T1.Z, BS:VEC_021/SCL_122
2474 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
2475 ; CM-NEXT: SETGT_INT T8.X, T0.Z, literal.x,
2476 ; CM-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T1.Y,
2477 ; CM-NEXT: CNDE_INT T0.Z, T4.Y, T7.X, PV.Z,
2478 ; CM-NEXT: LSHL * T0.W, PV.Y, literal.y,
2479 ; CM-NEXT: 127(1.779649e-43), 23(3.222986e-44)
2480 ; CM-NEXT: ALU clause starting at 202:
2481 ; CM-NEXT: ADD_INT T7.X, T0.W, literal.x,
2482 ; CM-NEXT: CNDE_INT * T0.Y, T5.X, T0.Y, T0.Z,
2483 ; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
2484 ; CM-NEXT: CNDE_INT * T0.Z, T8.X, T0.X, T6.X,
2485 ; CM-NEXT: MUL_IEEE * T0.W, T4.X, literal.x,
2486 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
2487 ; CM-NEXT: CNDE_INT T0.X, T2.W, T4.X, PV.W,
2488 ; CM-NEXT: LSHL T1.Y, T0.Z, literal.x,
2489 ; CM-NEXT: MUL_IEEE T0.Z, T0.Y, T7.X, BS:VEC_021/SCL_122
2490 ; CM-NEXT: SETGT * T0.W, literal.y, KC0[4].X,
2491 ; CM-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02)
2492 ; CM-NEXT: CNDE T4.X, PV.W, PV.Z, 0.0,
2493 ; CM-NEXT: SETGT T0.Y, KC0[4].X, literal.x,
2494 ; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
2495 ; CM-NEXT: CNDE_INT * T0.W, T8.X, T3.X, PV.X,
2496 ; CM-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00)
2497 ; CM-NEXT: SETGT T0.X, KC0[3].W, literal.x,
2498 ; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z,
2499 ; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z,
2500 ; CM-NEXT: CNDE * T0.W, PV.Y, PV.X, literal.z,
2501 ; CM-NEXT: 1118925336(8.872284e+01), -1026650416(-1.032789e+02)
2502 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
2503 ; CM-NEXT: SETGT T3.X, literal.x, KC0[3].Y,
2504 ; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, 0.0,
2505 ; CM-NEXT: CNDE T0.Z, PV.X, T2.X, literal.y,
2506 ; CM-NEXT: SETGT * T1.W, KC0[3].Z, literal.z,
2507 ; CM-NEXT: -1026650416(-1.032789e+02), 2139095040(INF)
2508 ; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
2509 ; CM-NEXT: CNDE T0.Y, PV.W, PV.Y, literal.x,
2510 ; CM-NEXT: CNDE T1.Z, PV.X, T1.X, 0.0,
2511 ; CM-NEXT: SETGT * T1.W, KC0[3].Y, literal.y,
2512 ; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
2513 ; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
2514 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
2515 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
2516 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2517 %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %in)
2518 store <4 x float> %result, ptr addrspace(1) %out
2522 define float @v_exp_f32(float %in) {
2523 ; VI-SDAG-LABEL: v_exp_f32:
2525 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2526 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2527 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
2528 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
2529 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
2530 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
2531 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
2532 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
2533 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2534 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
2535 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
2536 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
2537 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2538 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
2539 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2540 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
2541 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2542 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2543 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2544 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2545 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
2546 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2547 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2549 ; VI-GISEL-LABEL: v_exp_f32:
2550 ; VI-GISEL: ; %bb.0:
2551 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2552 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2553 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
2554 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
2555 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
2556 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
2557 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
2558 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2559 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2560 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
2561 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
2562 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
2563 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
2564 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2565 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2566 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2567 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2568 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
2569 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2570 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
2571 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
2572 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2573 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2575 ; GFX900-SDAG-LABEL: v_exp_f32:
2576 ; GFX900-SDAG: ; %bb.0:
2577 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2578 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
2579 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2580 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2581 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2582 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
2583 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
2584 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
2585 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2586 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
2587 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2588 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2589 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
2590 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2591 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2592 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2593 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2594 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
2595 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2596 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2598 ; GFX900-GISEL-LABEL: v_exp_f32:
2599 ; GFX900-GISEL: ; %bb.0:
2600 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2601 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
2602 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
2603 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
2604 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2605 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
2606 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
2607 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
2608 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
2609 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2610 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
2611 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2612 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2613 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2614 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
2615 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2616 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
2617 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
2618 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2619 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2621 ; SI-SDAG-LABEL: v_exp_f32:
2623 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2624 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
2625 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2626 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2627 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2628 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
2629 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
2630 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
2631 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2632 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2633 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2634 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2635 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
2636 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2637 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
2638 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2639 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2640 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
2641 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2642 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2644 ; SI-GISEL-LABEL: v_exp_f32:
2645 ; SI-GISEL: ; %bb.0:
2646 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2647 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
2648 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
2649 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
2650 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2651 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
2652 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
2653 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
2654 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
2655 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2656 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2657 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2658 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
2659 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2660 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
2661 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2662 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
2663 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
2664 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2665 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2667 ; R600-LABEL: v_exp_f32:
2672 ; CM-LABEL: v_exp_f32:
2676 %result = call float @llvm.exp.f32(float %in)
2680 define float @v_exp_fabs_f32(float %in) {
2681 ; VI-SDAG-LABEL: v_exp_fabs_f32:
2683 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2684 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
2685 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
2686 ; VI-SDAG-NEXT: v_sub_f32_e64 v4, |v0|, v1
2687 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
2688 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
2689 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
2690 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
2691 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
2692 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2693 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
2694 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
2695 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
2696 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2697 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
2698 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2699 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2700 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2701 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2702 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2703 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2704 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2705 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2706 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2708 ; VI-GISEL-LABEL: v_exp_fabs_f32:
2709 ; VI-GISEL: ; %bb.0:
2710 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2711 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
2712 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
2713 ; VI-GISEL-NEXT: v_sub_f32_e64 v2, |v0|, v1
2714 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
2715 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
2716 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
2717 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
2718 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2719 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2720 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
2721 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
2722 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
2723 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
2724 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2725 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2726 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2727 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2728 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2729 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2730 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2731 ; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
2732 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2733 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2735 ; GFX900-SDAG-LABEL: v_exp_fabs_f32:
2736 ; GFX900-SDAG: ; %bb.0:
2737 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2738 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2739 ; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
2740 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2741 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2742 ; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
2743 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
2744 ; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
2745 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2746 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
2747 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2748 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2749 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2750 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2751 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2752 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2753 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2754 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2755 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2756 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2758 ; GFX900-GISEL-LABEL: v_exp_fabs_f32:
2759 ; GFX900-GISEL: ; %bb.0:
2760 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2761 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
2762 ; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, |v0|, v1
2763 ; GFX900-GISEL-NEXT: v_fma_f32 v1, |v0|, v1, -v2
2764 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2765 ; GFX900-GISEL-NEXT: v_fma_f32 v1, |v0|, v3, v1
2766 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
2767 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
2768 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
2769 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2770 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
2771 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2772 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2773 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2774 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2775 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2776 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2777 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
2778 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2779 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2781 ; SI-SDAG-LABEL: v_exp_fabs_f32:
2783 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2784 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2785 ; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
2786 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2787 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2788 ; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
2789 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
2790 ; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
2791 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2792 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2793 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2794 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2795 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2796 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2797 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
2798 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2799 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2800 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2801 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2802 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2804 ; SI-GISEL-LABEL: v_exp_fabs_f32:
2805 ; SI-GISEL: ; %bb.0:
2806 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2807 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
2808 ; SI-GISEL-NEXT: v_mul_f32_e64 v2, |v0|, v1
2809 ; SI-GISEL-NEXT: v_fma_f32 v1, |v0|, v1, -v2
2810 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2811 ; SI-GISEL-NEXT: v_fma_f32 v1, |v0|, v3, v1
2812 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
2813 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
2814 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
2815 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2816 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2817 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2818 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
2819 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2820 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2821 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2822 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2823 ; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
2824 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2825 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2827 ; R600-LABEL: v_exp_fabs_f32:
2832 ; CM-LABEL: v_exp_fabs_f32:
2836 %fabs = call float @llvm.fabs.f32(float %in)
2837 %result = call float @llvm.exp.f32(float %fabs)
2841 define float @v_exp_fneg_fabs_f32(float %in) {
2842 ; VI-SDAG-LABEL: v_exp_fneg_fabs_f32:
2844 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2845 ; VI-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v0
2846 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
2847 ; VI-SDAG-NEXT: v_sub_f32_e64 v4, -|v0|, v1
2848 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
2849 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
2850 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
2851 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
2852 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
2853 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2854 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
2855 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
2856 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
2857 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2858 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
2859 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
2860 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2861 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
2862 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2863 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2864 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2865 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2866 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2867 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2869 ; VI-GISEL-LABEL: v_exp_fneg_fabs_f32:
2870 ; VI-GISEL: ; %bb.0:
2871 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2872 ; VI-GISEL-NEXT: v_or_b32_e32 v1, 0x80000000, v0
2873 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
2874 ; VI-GISEL-NEXT: v_sub_f32_e64 v2, -|v0|, v1
2875 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
2876 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
2877 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
2878 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
2879 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2880 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2881 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
2882 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
2883 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
2884 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
2885 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2886 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2887 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2888 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2889 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
2890 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2891 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2892 ; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2
2893 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2894 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2896 ; GFX900-SDAG-LABEL: v_exp_fneg_fabs_f32:
2897 ; GFX900-SDAG: ; %bb.0:
2898 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2899 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
2900 ; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
2901 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2902 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2903 ; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
2904 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
2905 ; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
2906 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2907 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
2908 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2909 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
2910 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2911 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
2912 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2913 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2914 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2915 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2916 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2917 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2919 ; GFX900-GISEL-LABEL: v_exp_fneg_fabs_f32:
2920 ; GFX900-GISEL: ; %bb.0:
2921 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2922 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
2923 ; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, -|v0|, v1
2924 ; GFX900-GISEL-NEXT: v_fma_f32 v1, -|v0|, v1, -v2
2925 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2926 ; GFX900-GISEL-NEXT: v_fma_f32 v1, -|v0|, v3, v1
2927 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
2928 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
2929 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
2930 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2931 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
2932 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2933 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2934 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2935 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
2936 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2937 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2938 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2
2939 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2940 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2942 ; SI-SDAG-LABEL: v_exp_fneg_fabs_f32:
2944 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2945 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
2946 ; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
2947 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2948 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2949 ; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
2950 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
2951 ; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
2952 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2953 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2954 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2955 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
2956 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2957 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
2958 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
2959 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2960 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2961 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2962 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2963 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2965 ; SI-GISEL-LABEL: v_exp_fneg_fabs_f32:
2966 ; SI-GISEL: ; %bb.0:
2967 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2968 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
2969 ; SI-GISEL-NEXT: v_mul_f32_e64 v2, -|v0|, v1
2970 ; SI-GISEL-NEXT: v_fma_f32 v1, -|v0|, v1, -v2
2971 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2972 ; SI-GISEL-NEXT: v_fma_f32 v1, -|v0|, v3, v1
2973 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
2974 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
2975 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
2976 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2977 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2978 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2979 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
2980 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2981 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
2982 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2983 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2984 ; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2
2985 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2986 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2988 ; R600-LABEL: v_exp_fneg_fabs_f32:
2993 ; CM-LABEL: v_exp_fneg_fabs_f32:
2997 %fabs = call float @llvm.fabs.f32(float %in)
2998 %fneg.fabs = fneg float %fabs
2999 %result = call float @llvm.exp.f32(float %fneg.fabs)
3003 define float @v_exp_fneg_f32(float %in) {
3004 ; VI-SDAG-LABEL: v_exp_fneg_f32:
3006 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3007 ; VI-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
3008 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
3009 ; VI-SDAG-NEXT: v_sub_f32_e64 v4, -v0, v1
3010 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
3011 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
3012 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
3013 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
3014 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
3015 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3016 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
3017 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
3018 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
3019 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3020 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
3021 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
3022 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3023 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
3024 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3025 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3026 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3027 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3028 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3029 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3031 ; VI-GISEL-LABEL: v_exp_fneg_f32:
3032 ; VI-GISEL: ; %bb.0:
3033 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3034 ; VI-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
3035 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
3036 ; VI-GISEL-NEXT: v_sub_f32_e64 v2, -v0, v1
3037 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
3038 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
3039 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
3040 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
3041 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3042 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3043 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
3044 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
3045 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
3046 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
3047 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3048 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3049 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3050 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3051 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2
3052 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3053 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
3054 ; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2
3055 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3056 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3058 ; GFX900-SDAG-LABEL: v_exp_fneg_f32:
3059 ; GFX900-SDAG: ; %bb.0:
3060 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3061 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0xbfb8aa3b, v0
3062 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
3063 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3064 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3065 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3066 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
3067 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3068 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3069 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
3070 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3071 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
3072 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3073 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
3074 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3075 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3076 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3077 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3078 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3079 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3081 ; GFX900-GISEL-LABEL: v_exp_fneg_f32:
3082 ; GFX900-GISEL: ; %bb.0:
3083 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3084 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
3085 ; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, -v0, v1
3086 ; GFX900-GISEL-NEXT: v_fma_f32 v1, -v0, v1, -v2
3087 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3088 ; GFX900-GISEL-NEXT: v_fma_f32 v1, -v0, v3, v1
3089 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
3090 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
3091 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
3092 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3093 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
3094 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3095 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3096 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3097 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2
3098 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3099 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
3100 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2
3101 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3102 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3104 ; SI-SDAG-LABEL: v_exp_fneg_f32:
3106 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3107 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xbfb8aa3b, v0
3108 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
3109 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3110 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3111 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3112 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
3113 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3114 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3115 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3116 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3117 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
3118 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3119 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
3120 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
3121 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3122 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3123 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3124 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3125 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3127 ; SI-GISEL-LABEL: v_exp_fneg_f32:
3128 ; SI-GISEL: ; %bb.0:
3129 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3130 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
3131 ; SI-GISEL-NEXT: v_mul_f32_e64 v2, -v0, v1
3132 ; SI-GISEL-NEXT: v_fma_f32 v1, -v0, v1, -v2
3133 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3134 ; SI-GISEL-NEXT: v_fma_f32 v1, -v0, v3, v1
3135 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
3136 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
3137 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
3138 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3139 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3140 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3141 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
3142 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3143 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2
3144 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3145 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
3146 ; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2
3147 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3148 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3150 ; R600-LABEL: v_exp_fneg_f32:
3155 ; CM-LABEL: v_exp_fneg_f32:
3159 %fneg = fneg float %in
3160 %result = call float @llvm.exp.f32(float %fneg)
3164 define float @v_exp_f32_fast(float %in) {
3165 ; GCN-SDAG-LABEL: v_exp_f32_fast:
3166 ; GCN-SDAG: ; %bb.0:
3167 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3168 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3169 ; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3170 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3171 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3172 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3173 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3174 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3175 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3176 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3178 ; GCN-GISEL-LABEL: v_exp_f32_fast:
3179 ; GCN-GISEL: ; %bb.0:
3180 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3181 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3182 ; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3183 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3184 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3185 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3186 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3187 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3188 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3189 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3191 ; SI-SDAG-LABEL: v_exp_f32_fast:
3193 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3194 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3195 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3196 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3197 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3198 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3199 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3200 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3201 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3202 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3204 ; SI-GISEL-LABEL: v_exp_f32_fast:
3205 ; SI-GISEL: ; %bb.0:
3206 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3207 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3208 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3209 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3210 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3211 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3212 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3213 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3214 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3215 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3217 ; R600-LABEL: v_exp_f32_fast:
3222 ; CM-LABEL: v_exp_f32_fast:
3226 %result = call fast float @llvm.exp.f32(float %in)
3230 define float @v_exp_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
3231 ; GCN-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
3232 ; GCN-SDAG: ; %bb.0:
3233 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3234 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3235 ; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3236 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3237 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3238 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3239 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3240 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3241 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3242 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3244 ; GCN-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
3245 ; GCN-GISEL: ; %bb.0:
3246 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3247 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3248 ; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3249 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3250 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3251 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3252 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3253 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3254 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3255 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3257 ; SI-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
3259 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3260 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3261 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3262 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3263 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3264 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3265 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3266 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3267 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3268 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3270 ; SI-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
3271 ; SI-GISEL: ; %bb.0:
3272 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3273 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3274 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3275 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3276 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3277 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3278 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3279 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3280 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3281 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3283 ; R600-LABEL: v_exp_f32_unsafe_math_attr:
3288 ; CM-LABEL: v_exp_f32_unsafe_math_attr:
3292 %result = call float @llvm.exp.f32(float %in)
3296 define float @v_exp_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
3297 ; GCN-SDAG-LABEL: v_exp_f32_approx_fn_attr:
3298 ; GCN-SDAG: ; %bb.0:
3299 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3300 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3301 ; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3302 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3303 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3304 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3305 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3306 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3307 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3308 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3310 ; GCN-GISEL-LABEL: v_exp_f32_approx_fn_attr:
3311 ; GCN-GISEL: ; %bb.0:
3312 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3313 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3314 ; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3315 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3316 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3317 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3318 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3319 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3320 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3321 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3323 ; SI-SDAG-LABEL: v_exp_f32_approx_fn_attr:
3325 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3326 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3327 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3328 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3329 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3330 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3331 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3332 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3333 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3334 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3336 ; SI-GISEL-LABEL: v_exp_f32_approx_fn_attr:
3337 ; SI-GISEL: ; %bb.0:
3338 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3339 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3340 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3341 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3342 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3343 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3344 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3345 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3346 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3347 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3349 ; R600-LABEL: v_exp_f32_approx_fn_attr:
3354 ; CM-LABEL: v_exp_f32_approx_fn_attr:
3358 %result = call float @llvm.exp.f32(float %in)
3362 define float @v_exp_f32_ninf(float %in) {
3363 ; VI-SDAG-LABEL: v_exp_f32_ninf:
3365 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3366 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3367 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
3368 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
3369 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
3370 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
3371 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
3372 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
3373 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3374 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
3375 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
3376 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
3377 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3378 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
3379 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3380 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3381 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3382 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
3383 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3385 ; VI-GISEL-LABEL: v_exp_f32_ninf:
3386 ; VI-GISEL: ; %bb.0:
3387 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3388 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3389 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
3390 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
3391 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
3392 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
3393 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
3394 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3395 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3396 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
3397 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
3398 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
3399 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
3400 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3401 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3402 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3403 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3404 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
3405 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3407 ; GFX900-SDAG-LABEL: v_exp_f32_ninf:
3408 ; GFX900-SDAG: ; %bb.0:
3409 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3410 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3411 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3412 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3413 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3414 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3415 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3416 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3417 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3418 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
3419 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3420 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3421 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3422 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3423 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
3424 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3426 ; GFX900-GISEL-LABEL: v_exp_f32_ninf:
3427 ; GFX900-GISEL: ; %bb.0:
3428 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3429 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
3430 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
3431 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
3432 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3433 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
3434 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
3435 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
3436 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
3437 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3438 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
3439 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3440 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3441 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3442 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
3443 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3445 ; SI-SDAG-LABEL: v_exp_f32_ninf:
3447 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3448 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3449 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3450 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3451 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3452 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3453 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3454 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3455 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3456 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3457 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3458 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3459 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3460 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
3461 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
3462 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3464 ; SI-GISEL-LABEL: v_exp_f32_ninf:
3465 ; SI-GISEL: ; %bb.0:
3466 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3467 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
3468 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
3469 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
3470 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3471 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
3472 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
3473 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
3474 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
3475 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3476 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3477 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
3478 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3479 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3480 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
3481 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3483 ; R600-LABEL: v_exp_f32_ninf:
3488 ; CM-LABEL: v_exp_f32_ninf:
3492 %result = call ninf float @llvm.exp.f32(float %in)
3496 define float @v_exp_f32_afn(float %in) {
3497 ; GCN-SDAG-LABEL: v_exp_f32_afn:
3498 ; GCN-SDAG: ; %bb.0:
3499 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3500 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3501 ; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3502 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3503 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3504 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3505 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3506 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3507 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3508 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3510 ; GCN-GISEL-LABEL: v_exp_f32_afn:
3511 ; GCN-GISEL: ; %bb.0:
3512 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3513 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3514 ; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3515 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3516 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3517 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3518 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3519 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3520 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3521 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3523 ; SI-SDAG-LABEL: v_exp_f32_afn:
3525 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3526 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3527 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3528 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3529 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3530 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3531 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3532 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3533 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3534 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3536 ; SI-GISEL-LABEL: v_exp_f32_afn:
3537 ; SI-GISEL: ; %bb.0:
3538 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3539 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3540 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3541 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3542 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3543 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3544 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3545 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3546 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3547 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3549 ; R600-LABEL: v_exp_f32_afn:
3554 ; CM-LABEL: v_exp_f32_afn:
3558 %result = call afn float @llvm.exp.f32(float %in)
3562 define float @v_exp_f32_afn_daz(float %in) #0 {
3563 ; GCN-LABEL: v_exp_f32_afn_daz:
3565 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3566 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3567 ; GCN-NEXT: v_exp_f32_e32 v0, v0
3568 ; GCN-NEXT: s_setpc_b64 s[30:31]
3570 ; SI-LABEL: v_exp_f32_afn_daz:
3572 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3573 ; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3574 ; SI-NEXT: v_exp_f32_e32 v0, v0
3575 ; SI-NEXT: s_setpc_b64 s[30:31]
3577 ; R600-LABEL: v_exp_f32_afn_daz:
3582 ; CM-LABEL: v_exp_f32_afn_daz:
3586 %result = call afn float @llvm.exp.f32(float %in)
3590 define float @v_exp_f32_afn_dynamic(float %in) #1 {
3591 ; GCN-SDAG-LABEL: v_exp_f32_afn_dynamic:
3592 ; GCN-SDAG: ; %bb.0:
3593 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3594 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3595 ; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3596 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3597 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3598 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3599 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3600 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3601 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3602 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3604 ; GCN-GISEL-LABEL: v_exp_f32_afn_dynamic:
3605 ; GCN-GISEL: ; %bb.0:
3606 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3607 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3608 ; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3609 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3610 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3611 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3612 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3613 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3614 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3615 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3617 ; SI-SDAG-LABEL: v_exp_f32_afn_dynamic:
3619 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3620 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3621 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3622 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3623 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3624 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3625 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3626 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3627 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3628 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3630 ; SI-GISEL-LABEL: v_exp_f32_afn_dynamic:
3631 ; SI-GISEL: ; %bb.0:
3632 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3633 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3634 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3635 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3636 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3637 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3638 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3639 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3640 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3641 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3643 ; R600-LABEL: v_exp_f32_afn_dynamic:
3648 ; CM-LABEL: v_exp_f32_afn_dynamic:
3652 %result = call afn float @llvm.exp.f32(float %in)
3656 define float @v_fabs_exp_f32_afn(float %in) {
3657 ; GCN-SDAG-LABEL: v_fabs_exp_f32_afn:
3658 ; GCN-SDAG: ; %bb.0:
3659 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3660 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3661 ; GCN-SDAG-NEXT: s_mov_b32 s5, 0x42800000
3662 ; GCN-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5
3663 ; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
3664 ; GCN-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc
3665 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3666 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3667 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3668 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3669 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3671 ; GCN-GISEL-LABEL: v_fabs_exp_f32_afn:
3672 ; GCN-GISEL: ; %bb.0:
3673 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3674 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3675 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
3676 ; GCN-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2
3677 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
3678 ; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
3679 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3680 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3681 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3682 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3683 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3685 ; SI-SDAG-LABEL: v_fabs_exp_f32_afn:
3687 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3688 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3689 ; SI-SDAG-NEXT: s_mov_b32 s5, 0x42800000
3690 ; SI-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5
3691 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
3692 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc
3693 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3694 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3695 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3696 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3697 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3699 ; SI-GISEL-LABEL: v_fabs_exp_f32_afn:
3700 ; SI-GISEL: ; %bb.0:
3701 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3702 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3703 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
3704 ; SI-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2
3705 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
3706 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
3707 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3708 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3709 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3710 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3711 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3713 ; R600-LABEL: v_fabs_exp_f32_afn:
3718 ; CM-LABEL: v_fabs_exp_f32_afn:
3722 %fabs = call float @llvm.fabs.f32(float %in)
3723 %result = call afn float @llvm.exp.f32(float %fabs)
3727 define float @v_exp_f32_daz(float %in) #0 {
3728 ; VI-SDAG-LABEL: v_exp_f32_daz:
3730 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3731 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3732 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
3733 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
3734 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
3735 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
3736 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
3737 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
3738 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3739 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
3740 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
3741 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
3742 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3743 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
3744 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3745 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3746 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
3747 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3748 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3749 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3750 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3751 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3752 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3754 ; VI-GISEL-LABEL: v_exp_f32_daz:
3755 ; VI-GISEL: ; %bb.0:
3756 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3757 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3758 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
3759 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
3760 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
3761 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
3762 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
3763 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3764 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3765 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
3766 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
3767 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
3768 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
3769 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3770 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3771 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3772 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3773 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3774 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3775 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
3776 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
3777 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3778 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3780 ; GFX900-SDAG-LABEL: v_exp_f32_daz:
3781 ; GFX900-SDAG: ; %bb.0:
3782 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3783 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3784 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3785 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3786 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3787 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3788 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3789 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3790 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3791 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
3792 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3793 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3794 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3795 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
3796 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3797 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3798 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3799 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3800 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3801 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3803 ; GFX900-GISEL-LABEL: v_exp_f32_daz:
3804 ; GFX900-GISEL: ; %bb.0:
3805 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3806 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
3807 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
3808 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
3809 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3810 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
3811 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
3812 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
3813 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
3814 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3815 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
3816 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3817 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3818 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3819 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3820 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3821 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
3822 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
3823 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3824 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3826 ; SI-SDAG-LABEL: v_exp_f32_daz:
3828 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3829 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3830 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3831 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3832 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3833 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3834 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3835 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3836 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3837 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3838 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3839 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3840 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3841 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
3842 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
3843 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3844 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3845 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3846 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3847 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3849 ; SI-GISEL-LABEL: v_exp_f32_daz:
3850 ; SI-GISEL: ; %bb.0:
3851 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3852 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
3853 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
3854 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
3855 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3856 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
3857 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
3858 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
3859 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
3860 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3861 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3862 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3863 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
3864 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3865 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3866 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3867 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
3868 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
3869 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3870 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3872 ; R600-LABEL: v_exp_f32_daz:
3877 ; CM-LABEL: v_exp_f32_daz:
3881 %result = call float @llvm.exp.f32(float %in)
3885 define float @v_exp_f32_nnan(float %in) {
3886 ; VI-SDAG-LABEL: v_exp_f32_nnan:
3888 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3889 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3890 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
3891 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
3892 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
3893 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
3894 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
3895 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
3896 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3897 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
3898 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
3899 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
3900 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3901 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
3902 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3903 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3904 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
3905 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3906 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3907 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3908 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3909 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3910 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3912 ; VI-GISEL-LABEL: v_exp_f32_nnan:
3913 ; VI-GISEL: ; %bb.0:
3914 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3915 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3916 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
3917 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
3918 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
3919 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
3920 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
3921 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3922 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3923 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
3924 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
3925 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
3926 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
3927 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3928 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3929 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3930 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3931 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3932 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3933 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
3934 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
3935 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3936 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3938 ; GFX900-SDAG-LABEL: v_exp_f32_nnan:
3939 ; GFX900-SDAG: ; %bb.0:
3940 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3941 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3942 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3943 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3944 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3945 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3946 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3947 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3948 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3949 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
3950 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3951 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3952 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3953 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
3954 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3955 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3956 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3957 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3958 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3959 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3961 ; GFX900-GISEL-LABEL: v_exp_f32_nnan:
3962 ; GFX900-GISEL: ; %bb.0:
3963 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3964 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
3965 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
3966 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
3967 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3968 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
3969 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
3970 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
3971 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
3972 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3973 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
3974 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3975 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3976 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3977 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3978 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3979 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
3980 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
3981 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3982 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3984 ; SI-SDAG-LABEL: v_exp_f32_nnan:
3986 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3987 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3988 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3989 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3990 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3991 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3992 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3993 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3994 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3995 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3996 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3997 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3998 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3999 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4000 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4001 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4002 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4003 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4004 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4005 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4007 ; SI-GISEL-LABEL: v_exp_f32_nnan:
4008 ; SI-GISEL: ; %bb.0:
4009 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4010 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4011 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4012 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4013 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4014 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4015 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4016 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4017 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4018 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4019 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4020 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4021 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4022 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4023 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4024 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4025 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4026 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4027 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4028 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4030 ; R600-LABEL: v_exp_f32_nnan:
4035 ; CM-LABEL: v_exp_f32_nnan:
4039 %result = call nnan float @llvm.exp.f32(float %in)
4043 define float @v_exp_f32_nnan_daz(float %in) #0 {
4044 ; VI-SDAG-LABEL: v_exp_f32_nnan_daz:
4046 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4047 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4048 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4049 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4050 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4051 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4052 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4053 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4054 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4055 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4056 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4057 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4058 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4059 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4060 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4061 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4062 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4063 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4064 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4065 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4066 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4067 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4068 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4070 ; VI-GISEL-LABEL: v_exp_f32_nnan_daz:
4071 ; VI-GISEL: ; %bb.0:
4072 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4073 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4074 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4075 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4076 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4077 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4078 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4079 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4080 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4081 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4082 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4083 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4084 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4085 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4086 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4087 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4088 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4089 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4090 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4091 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4092 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4093 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4094 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4096 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_daz:
4097 ; GFX900-SDAG: ; %bb.0:
4098 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4099 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4100 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4101 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4102 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4103 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4104 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4105 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4106 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4107 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4108 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4109 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4110 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4111 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4112 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4113 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4114 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4115 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4116 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4117 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4119 ; GFX900-GISEL-LABEL: v_exp_f32_nnan_daz:
4120 ; GFX900-GISEL: ; %bb.0:
4121 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4122 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4123 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4124 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4125 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4126 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4127 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4128 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4129 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4130 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4131 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4132 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4133 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4134 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4135 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4136 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4137 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4138 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4139 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4140 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4142 ; SI-SDAG-LABEL: v_exp_f32_nnan_daz:
4144 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4145 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4146 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4147 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4148 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4149 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4150 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4151 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4152 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4153 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4154 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4155 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4156 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4157 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4158 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4159 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4160 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4161 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4162 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4163 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4165 ; SI-GISEL-LABEL: v_exp_f32_nnan_daz:
4166 ; SI-GISEL: ; %bb.0:
4167 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4168 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4169 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4170 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4171 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4172 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4173 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4174 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4175 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4176 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4177 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4178 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4179 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4180 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4181 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4182 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4183 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4184 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4185 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4186 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4188 ; R600-LABEL: v_exp_f32_nnan_daz:
4193 ; CM-LABEL: v_exp_f32_nnan_daz:
4197 %result = call nnan float @llvm.exp.f32(float %in)
4201 define float @v_exp_f32_nnan_dynamic(float %in) #1 {
4202 ; VI-SDAG-LABEL: v_exp_f32_nnan_dynamic:
4204 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4205 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4206 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4207 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4208 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4209 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4210 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4211 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4212 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4213 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4214 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4215 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4216 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4217 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4218 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4219 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4220 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4221 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4222 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4223 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4224 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4225 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4226 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4228 ; VI-GISEL-LABEL: v_exp_f32_nnan_dynamic:
4229 ; VI-GISEL: ; %bb.0:
4230 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4231 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4232 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4233 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4234 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4235 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4236 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4237 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4238 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4239 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4240 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4241 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4242 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4243 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4244 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4245 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4246 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4247 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4248 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4249 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4250 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4251 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4252 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4254 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_dynamic:
4255 ; GFX900-SDAG: ; %bb.0:
4256 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4257 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4258 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4259 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4260 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4261 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4262 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4263 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4264 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4265 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4266 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4267 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4268 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4269 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4270 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4271 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4272 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4273 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4274 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4275 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4277 ; GFX900-GISEL-LABEL: v_exp_f32_nnan_dynamic:
4278 ; GFX900-GISEL: ; %bb.0:
4279 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4280 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4281 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4282 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4283 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4284 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4285 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4286 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4287 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4288 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4289 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4290 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4291 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4292 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4293 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4294 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4295 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4296 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4297 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4298 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4300 ; SI-SDAG-LABEL: v_exp_f32_nnan_dynamic:
4302 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4303 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4304 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4305 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4306 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4307 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4308 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4309 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4310 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4311 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4312 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4313 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4314 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4315 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4316 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4317 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4318 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4319 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4320 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4321 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4323 ; SI-GISEL-LABEL: v_exp_f32_nnan_dynamic:
4324 ; SI-GISEL: ; %bb.0:
4325 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4326 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4327 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4328 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4329 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4330 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4331 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4332 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4333 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4334 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4335 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4336 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4337 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4338 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4339 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4340 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4341 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4342 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4343 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4344 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4346 ; R600-LABEL: v_exp_f32_nnan_dynamic:
4351 ; CM-LABEL: v_exp_f32_nnan_dynamic:
4355 %result = call nnan float @llvm.exp.f32(float %in)
4359 define float @v_exp_f32_ninf_daz(float %in) #0 {
4360 ; VI-SDAG-LABEL: v_exp_f32_ninf_daz:
4362 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4363 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4364 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4365 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4366 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4367 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4368 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4369 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4370 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4371 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4372 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4373 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4374 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4375 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4376 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4377 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4378 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4379 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4380 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4382 ; VI-GISEL-LABEL: v_exp_f32_ninf_daz:
4383 ; VI-GISEL: ; %bb.0:
4384 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4385 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4386 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4387 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4388 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4389 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4390 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4391 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4392 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4393 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4394 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4395 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4396 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4397 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4398 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4399 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4400 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4401 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4402 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4404 ; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz:
4405 ; GFX900-SDAG: ; %bb.0:
4406 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4407 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4408 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4409 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4410 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4411 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4412 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4413 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4414 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4415 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4416 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4417 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4418 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4419 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4420 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4421 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4423 ; GFX900-GISEL-LABEL: v_exp_f32_ninf_daz:
4424 ; GFX900-GISEL: ; %bb.0:
4425 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4426 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4427 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4428 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4429 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4430 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4431 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4432 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4433 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4434 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4435 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4436 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4437 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4438 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4439 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4440 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4442 ; SI-SDAG-LABEL: v_exp_f32_ninf_daz:
4444 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4445 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4446 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4447 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4448 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4449 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4450 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4451 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4452 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4453 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4454 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4455 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4456 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4457 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4458 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4459 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4461 ; SI-GISEL-LABEL: v_exp_f32_ninf_daz:
4462 ; SI-GISEL: ; %bb.0:
4463 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4464 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4465 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4466 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4467 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4468 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4469 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4470 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4471 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4472 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4473 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4474 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4475 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4476 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4477 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4478 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4480 ; R600-LABEL: v_exp_f32_ninf_daz:
4485 ; CM-LABEL: v_exp_f32_ninf_daz:
4489 %result = call ninf float @llvm.exp.f32(float %in)
4493 define float @v_exp_f32_ninf_dynamic(float %in) #1 {
4494 ; VI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
4496 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4497 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4498 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4499 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4500 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4501 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4502 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4503 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4504 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4505 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4506 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4507 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4508 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4509 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4510 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4511 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4512 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4513 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4514 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4516 ; VI-GISEL-LABEL: v_exp_f32_ninf_dynamic:
4517 ; VI-GISEL: ; %bb.0:
4518 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4519 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4520 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4521 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4522 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4523 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4524 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4525 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4526 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4527 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4528 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4529 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4530 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4531 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4532 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4533 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4534 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4535 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4536 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4538 ; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic:
4539 ; GFX900-SDAG: ; %bb.0:
4540 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4541 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4542 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4543 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4544 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4545 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4546 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4547 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4548 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4549 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4550 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4551 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4552 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4553 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4554 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4555 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4557 ; GFX900-GISEL-LABEL: v_exp_f32_ninf_dynamic:
4558 ; GFX900-GISEL: ; %bb.0:
4559 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4560 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4561 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4562 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4563 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4564 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4565 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4566 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4567 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4568 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4569 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4570 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4571 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4572 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4573 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4574 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4576 ; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
4578 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4579 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4580 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4581 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4582 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4583 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4584 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4585 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4586 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4587 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4588 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4589 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4590 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4591 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4592 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4593 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4595 ; SI-GISEL-LABEL: v_exp_f32_ninf_dynamic:
4596 ; SI-GISEL: ; %bb.0:
4597 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4598 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4599 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4600 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4601 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4602 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4603 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4604 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4605 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4606 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4607 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4608 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4609 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4610 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4611 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4612 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4614 ; R600-LABEL: v_exp_f32_ninf_dynamic:
4619 ; CM-LABEL: v_exp_f32_ninf_dynamic:
4623 %result = call ninf float @llvm.exp.f32(float %in)
4627 define float @v_exp_f32_nnan_ninf(float %in) {
4628 ; VI-SDAG-LABEL: v_exp_f32_nnan_ninf:
4630 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4631 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4632 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4633 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4634 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4635 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4636 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4637 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4638 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4639 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4640 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4641 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4642 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4643 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4644 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4645 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4646 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4647 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4648 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4650 ; VI-GISEL-LABEL: v_exp_f32_nnan_ninf:
4651 ; VI-GISEL: ; %bb.0:
4652 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4653 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4654 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4655 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4656 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4657 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4658 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4659 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4660 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4661 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4662 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4663 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4664 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4665 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4666 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4667 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4668 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4669 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4670 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4672 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf:
4673 ; GFX900-SDAG: ; %bb.0:
4674 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4675 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4676 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4677 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4678 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4679 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4680 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4681 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4682 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4683 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4684 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4685 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4686 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4687 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4688 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4689 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4691 ; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf:
4692 ; GFX900-GISEL: ; %bb.0:
4693 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4694 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4695 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4696 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4697 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4698 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4699 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4700 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4701 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4702 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4703 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4704 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4705 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4706 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4707 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4708 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4710 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf:
4712 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4713 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4714 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4715 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4716 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4717 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4718 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4719 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4720 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4721 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4722 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4723 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4724 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4725 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4726 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4727 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4729 ; SI-GISEL-LABEL: v_exp_f32_nnan_ninf:
4730 ; SI-GISEL: ; %bb.0:
4731 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4732 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4733 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4734 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4735 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4736 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4737 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4738 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4739 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4740 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4741 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4742 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4743 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4744 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4745 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4746 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4748 ; R600-LABEL: v_exp_f32_nnan_ninf:
4753 ; CM-LABEL: v_exp_f32_nnan_ninf:
4757 %result = call nnan ninf float @llvm.exp.f32(float %in)
4761 define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
4762 ; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
4764 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4765 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4766 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4767 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4768 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4769 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4770 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4771 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4772 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4773 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4774 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4775 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4776 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4777 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4778 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4779 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4780 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4781 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4782 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4784 ; VI-GISEL-LABEL: v_exp_f32_nnan_ninf_daz:
4785 ; VI-GISEL: ; %bb.0:
4786 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4787 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4788 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4789 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4790 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4791 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4792 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4793 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4794 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4795 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4796 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4797 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4798 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4799 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4800 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4801 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4802 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4803 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4804 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4806 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
4807 ; GFX900-SDAG: ; %bb.0:
4808 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4809 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4810 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4811 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4812 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4813 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4814 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4815 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4816 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4817 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4818 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4819 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4820 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4821 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4822 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4823 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4825 ; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf_daz:
4826 ; GFX900-GISEL: ; %bb.0:
4827 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4828 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4829 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4830 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4831 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4832 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4833 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4834 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4835 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4836 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4837 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4838 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4839 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4840 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4841 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4842 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4844 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
4846 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4847 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4848 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4849 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4850 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4851 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4852 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4853 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4854 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4855 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4856 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4857 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4858 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4859 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4860 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4861 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4863 ; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_daz:
4864 ; SI-GISEL: ; %bb.0:
4865 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4866 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4867 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4868 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4869 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4870 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4871 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4872 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4873 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4874 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4875 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4876 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4877 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4878 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4879 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4880 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4882 ; R600-LABEL: v_exp_f32_nnan_ninf_daz:
4887 ; CM-LABEL: v_exp_f32_nnan_ninf_daz:
4891 %result = call nnan ninf float @llvm.exp.f32(float %in)
4895 define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
4896 ; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
4898 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4899 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4900 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4901 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4902 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4903 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4904 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4905 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4906 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4907 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4908 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4909 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4910 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4911 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4912 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4913 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4914 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4915 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4916 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4918 ; VI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
4919 ; VI-GISEL: ; %bb.0:
4920 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4921 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4922 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4923 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4924 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4925 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4926 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4927 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4928 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4929 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4930 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4931 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4932 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4933 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4934 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4935 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4936 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4937 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4938 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4940 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
4941 ; GFX900-SDAG: ; %bb.0:
4942 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4943 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4944 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4945 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4946 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4947 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4948 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4949 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4950 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4951 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4952 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4953 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4954 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4955 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4956 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4957 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4959 ; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
4960 ; GFX900-GISEL: ; %bb.0:
4961 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4962 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
4963 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
4964 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
4965 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4966 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
4967 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
4968 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
4969 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
4970 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4971 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4972 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4973 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4974 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4975 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4976 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4978 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
4980 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4981 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4982 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4983 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4984 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4985 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4986 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4987 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4988 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4989 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4990 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4991 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4992 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4993 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4994 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4995 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4997 ; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
4998 ; SI-GISEL: ; %bb.0:
4999 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5000 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
5001 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5002 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
5003 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
5004 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
5005 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
5006 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
5007 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5008 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
5009 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5010 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
5011 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5012 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5013 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
5014 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5016 ; R600-LABEL: v_exp_f32_nnan_ninf_dynamic:
5021 ; CM-LABEL: v_exp_f32_nnan_ninf_dynamic:
5025 %result = call nnan ninf float @llvm.exp.f32(float %in)
5029 define float @v_exp_f32_fast_daz(float %in) #0 {
5030 ; GCN-LABEL: v_exp_f32_fast_daz:
5032 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5033 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5034 ; GCN-NEXT: v_exp_f32_e32 v0, v0
5035 ; GCN-NEXT: s_setpc_b64 s[30:31]
5037 ; SI-LABEL: v_exp_f32_fast_daz:
5039 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5040 ; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5041 ; SI-NEXT: v_exp_f32_e32 v0, v0
5042 ; SI-NEXT: s_setpc_b64 s[30:31]
5044 ; R600-LABEL: v_exp_f32_fast_daz:
5049 ; CM-LABEL: v_exp_f32_fast_daz:
5053 %result = call fast float @llvm.exp.f32(float %in)
5057 define float @v_exp_f32_dynamic_mode(float %in) #1 {
5058 ; VI-SDAG-LABEL: v_exp_f32_dynamic_mode:
5060 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5061 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5062 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
5063 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
5064 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
5065 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
5066 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
5067 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
5068 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5069 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
5070 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
5071 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
5072 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5073 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5074 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5075 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5076 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5077 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5078 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5079 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5080 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5081 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5082 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5084 ; VI-GISEL-LABEL: v_exp_f32_dynamic_mode:
5085 ; VI-GISEL: ; %bb.0:
5086 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5087 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5088 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5089 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5090 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5091 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
5092 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
5093 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5094 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5095 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5096 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5097 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5098 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5099 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5100 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5101 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5102 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5103 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5104 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5105 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5106 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5107 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5108 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5110 ; GFX900-SDAG-LABEL: v_exp_f32_dynamic_mode:
5111 ; GFX900-SDAG: ; %bb.0:
5112 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5113 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5114 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5115 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5116 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
5117 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
5118 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5119 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
5120 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
5121 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
5122 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5123 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5124 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5125 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5126 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5127 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5128 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5129 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5130 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5131 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5133 ; GFX900-GISEL-LABEL: v_exp_f32_dynamic_mode:
5134 ; GFX900-GISEL: ; %bb.0:
5135 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5136 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
5137 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5138 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
5139 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
5140 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
5141 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
5142 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
5143 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5144 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
5145 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
5146 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5147 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5148 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5149 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5150 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5151 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5152 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5153 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5154 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5156 ; SI-SDAG-LABEL: v_exp_f32_dynamic_mode:
5158 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5159 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5160 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5161 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5162 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
5163 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
5164 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5165 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
5166 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
5167 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5168 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5169 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5170 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5171 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5172 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
5173 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5174 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5175 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5176 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5177 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5179 ; SI-GISEL-LABEL: v_exp_f32_dynamic_mode:
5180 ; SI-GISEL: ; %bb.0:
5181 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5182 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
5183 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5184 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
5185 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
5186 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
5187 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
5188 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
5189 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5190 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
5191 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5192 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5193 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
5194 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5195 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5196 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5197 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5198 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5199 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5200 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5202 ; R600-LABEL: v_exp_f32_dynamic_mode:
5207 ; CM-LABEL: v_exp_f32_dynamic_mode:
5211 %result = call float @llvm.exp.f32(float %in)
5215 define float @v_exp_f32_undef() {
5216 ; VI-SDAG-LABEL: v_exp_f32_undef:
5218 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5219 ; VI-SDAG-NEXT: v_rndne_f32_e32 v0, 0
5220 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7fc00000
5221 ; VI-SDAG-NEXT: v_add_f32_e64 v1, -v0, s4
5222 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5223 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0
5224 ; VI-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
5225 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5227 ; VI-GISEL-LABEL: v_exp_f32_undef:
5228 ; VI-GISEL: ; %bb.0:
5229 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5230 ; VI-GISEL-NEXT: v_sub_f32_e64 v0, s4, 0
5231 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8a000
5232 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x39a3b295
5233 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v0
5234 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
5235 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
5236 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
5237 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0, v2
5238 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v2, v0
5239 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v1
5240 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
5241 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
5242 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
5243 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
5244 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5245 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
5246 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
5247 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1
5248 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
5249 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
5250 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
5251 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5252 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5254 ; GFX900-SDAG-LABEL: v_exp_f32_undef:
5255 ; GFX900-SDAG: ; %bb.0:
5256 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5257 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
5258 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000
5259 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1
5260 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5261 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
5262 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000
5263 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1
5264 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
5265 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
5266 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
5267 ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
5268 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5270 ; GFX900-GISEL-LABEL: v_exp_f32_undef:
5271 ; GFX900-GISEL: ; %bb.0:
5272 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5273 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
5274 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s4, v0
5275 ; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v0, -v1
5276 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
5277 ; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0
5278 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v1
5279 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
5280 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
5281 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
5282 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
5283 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5284 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
5285 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
5286 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1
5287 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
5288 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
5289 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
5290 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5291 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5293 ; SI-SDAG-LABEL: v_exp_f32_undef:
5295 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5296 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
5297 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000
5298 ; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1
5299 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5300 ; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
5301 ; SI-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000
5302 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1
5303 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
5304 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
5305 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
5306 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
5307 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5309 ; SI-GISEL-LABEL: v_exp_f32_undef:
5310 ; SI-GISEL: ; %bb.0:
5311 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5312 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
5313 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v0
5314 ; SI-GISEL-NEXT: v_fma_f32 v0, s4, v0, -v1
5315 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
5316 ; SI-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0
5317 ; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v1
5318 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
5319 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
5320 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
5321 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
5322 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5323 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
5324 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
5325 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1
5326 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
5327 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
5328 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
5329 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5330 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5332 ; R600-LABEL: v_exp_f32_undef:
5337 ; CM-LABEL: v_exp_f32_undef:
5341 %result = call float @llvm.exp.f32(float undef)
5345 define float @v_exp_f32_0() {
5346 ; GCN-LABEL: v_exp_f32_0:
5348 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5349 ; GCN-NEXT: v_mov_b32_e32 v0, 1.0
5350 ; GCN-NEXT: s_setpc_b64 s[30:31]
5352 ; SI-LABEL: v_exp_f32_0:
5354 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5355 ; SI-NEXT: v_mov_b32_e32 v0, 1.0
5356 ; SI-NEXT: s_setpc_b64 s[30:31]
5358 ; R600-LABEL: v_exp_f32_0:
5363 ; CM-LABEL: v_exp_f32_0:
5367 %result = call float @llvm.exp.f32(float 0.0)
5371 define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
5372 ; VI-SDAG-LABEL: v_exp_f32_from_fpext_f16:
5374 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5375 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5376 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5377 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5378 ; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
5379 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
5380 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v3
5381 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v3
5382 ; VI-SDAG-NEXT: v_rndne_f32_e32 v4, v2
5383 ; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
5384 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5385 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4
5386 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
5387 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
5388 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v4
5389 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5390 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5391 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5392 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5393 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5394 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5395 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5396 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5397 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5399 ; VI-GISEL-LABEL: v_exp_f32_from_fpext_f16:
5400 ; VI-GISEL: ; %bb.0:
5401 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5402 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5403 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5404 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5405 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5406 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5407 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
5408 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5409 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
5410 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5411 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5412 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5413 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5414 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5415 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5416 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5417 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5418 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5419 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5420 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5421 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5422 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5423 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5424 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5426 ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_f16:
5427 ; GFX900-SDAG: ; %bb.0:
5428 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5429 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5430 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5431 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
5432 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5433 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5434 ; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
5435 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2
5436 ; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3
5437 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
5438 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5439 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
5440 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5441 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5442 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5443 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5444 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5445 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5446 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5447 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5448 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5450 ; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_f16:
5451 ; GFX900-GISEL: ; %bb.0:
5452 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5453 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5454 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
5455 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
5456 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0
5457 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3
5458 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v3
5459 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
5460 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4
5461 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5462 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
5463 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
5464 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
5465 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
5466 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5467 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5468 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5469 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5470 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5471 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5472 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5474 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_f16:
5476 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5477 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5478 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5479 ; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
5480 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5481 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5482 ; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
5483 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2
5484 ; SI-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3
5485 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
5486 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5487 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5488 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5489 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5490 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5491 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
5492 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5493 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5494 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5495 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5496 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5498 ; SI-GISEL-LABEL: v_exp_f32_from_fpext_f16:
5499 ; SI-GISEL: ; %bb.0:
5500 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5501 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5502 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
5503 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
5504 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0
5505 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3
5506 ; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v3
5507 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
5508 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4
5509 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5510 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
5511 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5512 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
5513 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
5514 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5515 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
5516 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5517 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5518 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5519 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5520 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5522 ; R600-LABEL: v_exp_f32_from_fpext_f16:
5527 ; CM-LABEL: v_exp_f32_from_fpext_f16:
5531 %src = bitcast i16 %src.i to half
5532 %fpext = fpext half %src to float
5533 %result = call float @llvm.exp.f32(float %fpext)
5537 define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
5538 ; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
5540 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5541 ; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
5542 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5543 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5544 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5545 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
5546 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5547 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5548 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1
5549 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
5550 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
5551 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
5552 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
5553 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
5554 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5555 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5556 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5557 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5558 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5559 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5560 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5561 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5562 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5563 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5564 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5566 ; VI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
5567 ; VI-GISEL: ; %bb.0:
5568 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5569 ; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
5570 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5571 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5572 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5573 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5574 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5575 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
5576 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
5577 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5578 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5579 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5580 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5581 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5582 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5583 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5584 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5585 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5586 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5587 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5588 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5589 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5590 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5591 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5592 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5594 ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
5595 ; GFX900-SDAG: ; %bb.0:
5596 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5597 ; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
5598 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5599 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5600 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
5601 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5602 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5603 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1
5604 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
5605 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
5606 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5607 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5608 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
5609 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5610 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5611 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5612 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5613 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5614 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5615 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5616 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5617 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5619 ; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
5620 ; GFX900-GISEL: ; %bb.0:
5621 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5622 ; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
5623 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5624 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
5625 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
5626 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0
5627 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3
5628 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
5629 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5630 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5631 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5632 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5633 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
5634 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5635 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5636 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5637 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5638 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5639 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5640 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5641 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5642 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5644 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
5646 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5647 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5648 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
5649 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5650 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
5651 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5652 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5653 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5654 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
5655 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
5656 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
5657 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5658 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5659 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5660 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5661 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5662 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5663 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
5664 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5665 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5666 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5667 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5668 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5670 ; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
5671 ; SI-GISEL: ; %bb.0:
5672 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5673 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5674 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
5675 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
5676 ; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
5677 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
5678 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
5679 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
5680 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5681 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0
5682 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3
5683 ; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v3
5684 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
5685 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4
5686 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5687 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
5688 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5689 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5690 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
5691 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5692 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
5693 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5694 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
5695 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc
5696 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5698 ; R600-LABEL: v_exp_f32_from_fpext_math_f16:
5703 ; CM-LABEL: v_exp_f32_from_fpext_math_f16:
5707 %src0 = bitcast i16 %src0.i to half
5708 %src1 = bitcast i16 %src1.i to half
5709 %fadd = fadd half %src0, %src1
5710 %fpext = fpext half %fadd to float
5711 %result = call float @llvm.exp.f32(float %fpext)
5715 define float @v_exp_f32_from_fpext_bf16(bfloat %src) {
5716 ; VI-LABEL: v_exp_f32_from_fpext_bf16:
5718 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5719 ; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
5720 ; VI-NEXT: v_sub_f32_e32 v3, v0, v0
5721 ; VI-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v0
5722 ; VI-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v3
5723 ; VI-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v3
5724 ; VI-NEXT: v_rndne_f32_e32 v2, v1
5725 ; VI-NEXT: v_add_f32_e32 v3, v3, v4
5726 ; VI-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0
5727 ; VI-NEXT: v_sub_f32_e32 v1, v1, v2
5728 ; VI-NEXT: v_add_f32_e32 v3, v4, v3
5729 ; VI-NEXT: v_add_f32_e32 v1, v1, v3
5730 ; VI-NEXT: v_exp_f32_e32 v1, v1
5731 ; VI-NEXT: v_cvt_i32_f32_e32 v2, v2
5732 ; VI-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5733 ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5734 ; VI-NEXT: s_mov_b32 s4, 0x42b17218
5735 ; VI-NEXT: v_ldexp_f32 v1, v1, v2
5736 ; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5737 ; VI-NEXT: v_mov_b32_e32 v2, 0x7f800000
5738 ; VI-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5739 ; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5740 ; VI-NEXT: s_setpc_b64 s[30:31]
5742 ; GFX900-LABEL: v_exp_f32_from_fpext_bf16:
5744 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5745 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
5746 ; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5747 ; GFX900-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5748 ; GFX900-NEXT: v_rndne_f32_e32 v2, v1
5749 ; GFX900-NEXT: v_sub_f32_e32 v3, v1, v2
5750 ; GFX900-NEXT: v_fma_f32 v1, v0, s4, -v1
5751 ; GFX900-NEXT: s_mov_b32 s4, 0x32a5705f
5752 ; GFX900-NEXT: v_fma_f32 v1, v0, s4, v1
5753 ; GFX900-NEXT: v_add_f32_e32 v1, v3, v1
5754 ; GFX900-NEXT: v_exp_f32_e32 v1, v1
5755 ; GFX900-NEXT: v_cvt_i32_f32_e32 v2, v2
5756 ; GFX900-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5757 ; GFX900-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5758 ; GFX900-NEXT: s_mov_b32 s4, 0x42b17218
5759 ; GFX900-NEXT: v_ldexp_f32 v1, v1, v2
5760 ; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5761 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000
5762 ; GFX900-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5763 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5764 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5766 ; SI-LABEL: v_exp_f32_from_fpext_bf16:
5768 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5769 ; SI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5770 ; SI-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5771 ; SI-NEXT: v_rndne_f32_e32 v2, v1
5772 ; SI-NEXT: v_sub_f32_e32 v3, v1, v2
5773 ; SI-NEXT: v_fma_f32 v1, v0, s4, -v1
5774 ; SI-NEXT: s_mov_b32 s4, 0x32a5705f
5775 ; SI-NEXT: v_fma_f32 v1, v0, s4, v1
5776 ; SI-NEXT: v_add_f32_e32 v1, v3, v1
5777 ; SI-NEXT: v_exp_f32_e32 v1, v1
5778 ; SI-NEXT: v_cvt_i32_f32_e32 v2, v2
5779 ; SI-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5780 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5781 ; SI-NEXT: s_mov_b32 s4, 0x42b17218
5782 ; SI-NEXT: v_ldexp_f32_e32 v1, v1, v2
5783 ; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5784 ; SI-NEXT: v_mov_b32_e32 v2, 0x7f800000
5785 ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5786 ; SI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5787 ; SI-NEXT: s_setpc_b64 s[30:31]
5789 ; R600-LABEL: v_exp_f32_from_fpext_bf16:
5794 ; CM-LABEL: v_exp_f32_from_fpext_bf16:
5798 %fpext = fpext bfloat %src to float
5799 %result = call float @llvm.exp.f32(float %fpext)
5803 define float @v_exp_f32_from_fpext_math_f16_fast(i16 %src0.i, i16 %src1.i) {
5804 ; GCN-LABEL: v_exp_f32_from_fpext_math_f16_fast:
5806 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5807 ; GCN-NEXT: v_add_f16_e32 v0, v0, v1
5808 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
5809 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5810 ; GCN-NEXT: v_exp_f32_e32 v0, v0
5811 ; GCN-NEXT: s_setpc_b64 s[30:31]
5813 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_fast:
5815 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5816 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5817 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
5818 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
5819 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
5820 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
5821 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
5822 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5823 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5824 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
5825 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
5826 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5827 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5829 ; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_fast:
5830 ; SI-GISEL: ; %bb.0:
5831 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5832 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5833 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
5834 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
5835 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
5836 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5837 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5838 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
5839 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5841 ; R600-LABEL: v_exp_f32_from_fpext_math_f16_fast:
5846 ; CM-LABEL: v_exp_f32_from_fpext_math_f16_fast:
5850 %src0 = bitcast i16 %src0.i to half
5851 %src1 = bitcast i16 %src1.i to half
5852 %fadd = fadd half %src0, %src1
5853 %fpext = fpext half %fadd to float
5854 %result = call fast float @llvm.exp.f32(float %fpext)
5858 define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
5859 ; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
5861 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5862 ; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
5863 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5864 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5865 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5866 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
5867 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5868 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5869 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1
5870 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
5871 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
5872 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
5873 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
5874 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
5875 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5876 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5877 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5878 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5879 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5880 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5881 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5882 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5883 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5884 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5885 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5887 ; VI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz:
5888 ; VI-GISEL: ; %bb.0:
5889 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5890 ; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
5891 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5892 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5893 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5894 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5895 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5896 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
5897 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
5898 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5899 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5900 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5901 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5902 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5903 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5904 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5905 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5906 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5907 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5908 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5909 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5910 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5911 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5912 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5913 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5915 ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
5916 ; GFX900-SDAG: ; %bb.0:
5917 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5918 ; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
5919 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5920 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5921 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
5922 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5923 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5924 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1
5925 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
5926 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
5927 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5928 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5929 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
5930 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5931 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5932 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5933 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5934 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5935 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5936 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5937 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5938 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5940 ; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz:
5941 ; GFX900-GISEL: ; %bb.0:
5942 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5943 ; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
5944 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5945 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
5946 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
5947 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0
5948 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3
5949 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
5950 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5951 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5952 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5953 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5954 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
5955 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5956 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5957 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5958 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5959 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5960 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5961 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5962 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5963 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5965 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
5967 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5968 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5969 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
5970 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5971 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
5972 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5973 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5974 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5975 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
5976 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
5977 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
5978 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5979 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5980 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5981 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5982 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5983 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5984 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
5985 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5986 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5987 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5988 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5989 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5991 ; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz:
5992 ; SI-GISEL: ; %bb.0:
5993 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5994 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5995 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
5996 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
5997 ; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
5998 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
5999 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6000 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
6001 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6002 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0
6003 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3
6004 ; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v3
6005 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
6006 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4
6007 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
6008 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
6009 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6010 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
6011 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
6012 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
6013 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
6014 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
6015 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
6016 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc
6017 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6019 ; R600-LABEL: v_exp_f32_from_fpext_math_f16_daz:
6024 ; CM-LABEL: v_exp_f32_from_fpext_math_f16_daz:
6028 %src0 = bitcast i16 %src0.i to half
6029 %src1 = bitcast i16 %src1.i to half
6030 %fadd = fadd half %src0, %src1
6031 %fpext = fpext half %fadd to float
6032 %result = call float @llvm.exp.f32(float %fpext)
6036 ; FIXME: Fold out fp16_to_fp (FP_TO_FP16) on no-f16 targets
6037 define half @v_exp_f16(half %in) {
6038 ; GCN-LABEL: v_exp_f16:
6040 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6041 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
6042 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6043 ; GCN-NEXT: v_exp_f32_e32 v0, v0
6044 ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
6045 ; GCN-NEXT: s_setpc_b64 s[30:31]
6047 ; SI-SDAG-LABEL: v_exp_f16:
6049 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6050 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6051 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6052 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6053 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6054 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6055 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6056 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6058 ; SI-GISEL-LABEL: v_exp_f16:
6059 ; SI-GISEL: ; %bb.0:
6060 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6061 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6062 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6063 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6064 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6065 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6067 ; R600-LABEL: v_exp_f16:
6072 ; CM-LABEL: v_exp_f16:
6076 %result = call half @llvm.exp.f16(half %in)
6080 define half @v_exp_fabs_f16(half %in) {
6081 ; GCN-LABEL: v_exp_fabs_f16:
6083 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6084 ; GCN-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6085 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6086 ; GCN-NEXT: v_exp_f32_e32 v0, v0
6087 ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
6088 ; GCN-NEXT: s_setpc_b64 s[30:31]
6090 ; SI-SDAG-LABEL: v_exp_fabs_f16:
6092 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6093 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6094 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6095 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6096 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6097 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6098 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6099 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6101 ; SI-GISEL-LABEL: v_exp_fabs_f16:
6102 ; SI-GISEL: ; %bb.0:
6103 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6104 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6105 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6106 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6107 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6108 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6110 ; R600-LABEL: v_exp_fabs_f16:
6115 ; CM-LABEL: v_exp_fabs_f16:
6119 %fabs = call half @llvm.fabs.f16(half %in)
6120 %result = call half @llvm.exp.f16(half %fabs)
6124 define half @v_exp_fneg_fabs_f16(half %in) {
6125 ; GCN-SDAG-LABEL: v_exp_fneg_fabs_f16:
6126 ; GCN-SDAG: ; %bb.0:
6127 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6128 ; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6129 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
6130 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
6131 ; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6132 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
6134 ; GCN-GISEL-LABEL: v_exp_fneg_fabs_f16:
6135 ; GCN-GISEL: ; %bb.0:
6136 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6137 ; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
6138 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6139 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
6140 ; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6141 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
6143 ; SI-SDAG-LABEL: v_exp_fneg_fabs_f16:
6145 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6146 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6147 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6148 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
6149 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6150 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6151 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6152 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6154 ; SI-GISEL-LABEL: v_exp_fneg_fabs_f16:
6155 ; SI-GISEL: ; %bb.0:
6156 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6157 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
6158 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6159 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6160 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6161 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6163 ; R600-LABEL: v_exp_fneg_fabs_f16:
6168 ; CM-LABEL: v_exp_fneg_fabs_f16:
6172 %fabs = call half @llvm.fabs.f16(half %in)
6173 %fneg.fabs = fneg half %fabs
6174 %result = call half @llvm.exp.f16(half %fneg.fabs)
6178 define half @v_exp_fneg_f16(half %in) {
6179 ; GCN-SDAG-LABEL: v_exp_fneg_f16:
6180 ; GCN-SDAG: ; %bb.0:
6181 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6182 ; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6183 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
6184 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
6185 ; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6186 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
6188 ; GCN-GISEL-LABEL: v_exp_fneg_f16:
6189 ; GCN-GISEL: ; %bb.0:
6190 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6191 ; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
6192 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6193 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
6194 ; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6195 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
6197 ; SI-SDAG-LABEL: v_exp_fneg_f16:
6199 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6200 ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
6201 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6202 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6203 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6204 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6205 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6206 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6208 ; SI-GISEL-LABEL: v_exp_fneg_f16:
6209 ; SI-GISEL: ; %bb.0:
6210 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6211 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
6212 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6213 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6214 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6215 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6217 ; R600-LABEL: v_exp_fneg_f16:
6222 ; CM-LABEL: v_exp_fneg_f16:
6226 %fneg = fneg half %in
6227 %result = call half @llvm.exp.f16(half %fneg)
6231 define half @v_exp_f16_fast(half %in) {
6232 ; GCN-LABEL: v_exp_f16_fast:
6234 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6235 ; GCN-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
6236 ; GCN-NEXT: v_exp_f16_e32 v0, v0
6237 ; GCN-NEXT: s_setpc_b64 s[30:31]
6239 ; SI-SDAG-LABEL: v_exp_f16_fast:
6241 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6242 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6243 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6244 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
6245 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6246 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6247 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6248 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6250 ; SI-GISEL-LABEL: v_exp_f16_fast:
6251 ; SI-GISEL: ; %bb.0:
6252 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6253 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6254 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 0x3dc5
6255 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
6256 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6257 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6258 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6259 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6260 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6262 ; R600-LABEL: v_exp_f16_fast:
6267 ; CM-LABEL: v_exp_f16_fast:
6271 %result = call fast half @llvm.exp.f16(half %in)
6275 define <2 x half> @v_exp_v2f16(<2 x half> %in) {
6276 ; VI-SDAG-LABEL: v_exp_v2f16:
6278 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6279 ; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6280 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6281 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6282 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6283 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6284 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6285 ; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6286 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6287 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6288 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6290 ; VI-GISEL-LABEL: v_exp_v2f16:
6291 ; VI-GISEL: ; %bb.0:
6292 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6293 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6294 ; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6295 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6296 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6297 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6298 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6299 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6300 ; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6301 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6302 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6304 ; GFX900-LABEL: v_exp_v2f16:
6306 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6307 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v0
6308 ; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6309 ; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6310 ; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6311 ; GFX900-NEXT: v_exp_f32_e32 v1, v1
6312 ; GFX900-NEXT: v_exp_f32_e32 v0, v0
6313 ; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
6314 ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
6315 ; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0
6316 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6318 ; SI-SDAG-LABEL: v_exp_v2f16:
6320 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6321 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6322 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6323 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6324 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6325 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6326 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6327 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6328 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6329 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6330 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6331 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6332 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6333 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6335 ; SI-GISEL-LABEL: v_exp_v2f16:
6336 ; SI-GISEL: ; %bb.0:
6337 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6338 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6339 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6340 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6341 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6342 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6343 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6344 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6345 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6346 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6348 ; R600-LABEL: v_exp_v2f16:
6353 ; CM-LABEL: v_exp_v2f16:
6357 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %in)
6358 ret <2 x half> %result
6361 define <2 x half> @v_exp_fabs_v2f16(<2 x half> %in) {
6362 ; VI-SDAG-LABEL: v_exp_fabs_v2f16:
6364 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6365 ; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6366 ; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6367 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6368 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6369 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6370 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6371 ; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6372 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6373 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6374 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6376 ; VI-GISEL-LABEL: v_exp_fabs_v2f16:
6377 ; VI-GISEL: ; %bb.0:
6378 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6379 ; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
6380 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6381 ; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6382 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6383 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6384 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6385 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6386 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6387 ; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6388 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6389 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6391 ; GFX900-SDAG-LABEL: v_exp_fabs_v2f16:
6392 ; GFX900-SDAG: ; %bb.0:
6393 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6394 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v0|
6395 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6396 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6397 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6398 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
6399 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
6400 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6401 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6402 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
6403 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6405 ; GFX900-GISEL-LABEL: v_exp_fabs_v2f16:
6406 ; GFX900-GISEL: ; %bb.0:
6407 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6408 ; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
6409 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6410 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6411 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6412 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6413 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
6414 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
6415 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6416 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6417 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
6418 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6420 ; SI-SDAG-LABEL: v_exp_fabs_v2f16:
6422 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6423 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6424 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6425 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6426 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
6427 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6428 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6429 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6430 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6431 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6432 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6433 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6434 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6435 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6437 ; SI-GISEL-LABEL: v_exp_fabs_v2f16:
6438 ; SI-GISEL: ; %bb.0:
6439 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6440 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6441 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6442 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6443 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
6444 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6445 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6446 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6447 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6448 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6449 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6450 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
6451 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
6452 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
6453 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6455 ; R600-LABEL: v_exp_fabs_v2f16:
6460 ; CM-LABEL: v_exp_fabs_v2f16:
6464 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
6465 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %fabs)
6466 ret <2 x half> %result
6469 define <2 x half> @v_exp_fneg_fabs_v2f16(<2 x half> %in) {
6470 ; VI-SDAG-LABEL: v_exp_fneg_fabs_v2f16:
6472 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6473 ; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6474 ; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
6475 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6476 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6477 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6478 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6479 ; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6480 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6481 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6482 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6484 ; VI-GISEL-LABEL: v_exp_fneg_fabs_v2f16:
6485 ; VI-GISEL: ; %bb.0:
6486 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6487 ; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6488 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6489 ; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6490 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6491 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6492 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6493 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6494 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6495 ; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6496 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6497 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6499 ; GFX900-SDAG-LABEL: v_exp_fneg_fabs_v2f16:
6500 ; GFX900-SDAG: ; %bb.0:
6501 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6502 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v0|
6503 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6504 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6505 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6506 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
6507 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
6508 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6509 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6510 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
6511 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6513 ; GFX900-GISEL-LABEL: v_exp_fneg_fabs_v2f16:
6514 ; GFX900-GISEL: ; %bb.0:
6515 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6516 ; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6517 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6518 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6519 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6520 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6521 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
6522 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
6523 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6524 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6525 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
6526 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6528 ; SI-SDAG-LABEL: v_exp_fneg_fabs_v2f16:
6530 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6531 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6532 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6533 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6534 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6535 ; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6536 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
6537 ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6538 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6539 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6540 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6541 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6542 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6543 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6544 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
6545 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
6546 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
6547 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6549 ; SI-GISEL-LABEL: v_exp_fneg_fabs_v2f16:
6550 ; SI-GISEL: ; %bb.0:
6551 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6552 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6553 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6554 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6555 ; SI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6556 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6557 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6558 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6559 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6560 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6561 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6562 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
6563 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
6564 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
6565 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6567 ; R600-LABEL: v_exp_fneg_fabs_v2f16:
6572 ; CM-LABEL: v_exp_fneg_fabs_v2f16:
6576 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
6577 %fneg.fabs = fneg <2 x half> %fabs
6578 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %fneg.fabs)
6579 ret <2 x half> %result
6582 define <2 x half> @v_exp_fneg_v2f16(<2 x half> %in) {
6583 ; VI-SDAG-LABEL: v_exp_fneg_v2f16:
6585 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6586 ; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6587 ; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -v0
6588 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6589 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6590 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6591 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6592 ; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6593 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6594 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6595 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6597 ; VI-GISEL-LABEL: v_exp_fneg_v2f16:
6598 ; VI-GISEL: ; %bb.0:
6599 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6600 ; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
6601 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6602 ; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6603 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6604 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6605 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6606 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6607 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6608 ; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6609 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6610 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6612 ; GFX900-SDAG-LABEL: v_exp_fneg_v2f16:
6613 ; GFX900-SDAG: ; %bb.0:
6614 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6615 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -v0
6616 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6617 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6618 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6619 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
6620 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
6621 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6622 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6623 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
6624 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6626 ; GFX900-GISEL-LABEL: v_exp_fneg_v2f16:
6627 ; GFX900-GISEL: ; %bb.0:
6628 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6629 ; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
6630 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6631 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6632 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6633 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6634 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
6635 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
6636 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6637 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6638 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
6639 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6641 ; SI-SDAG-LABEL: v_exp_fneg_v2f16:
6643 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6644 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6645 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6646 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6647 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6648 ; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
6649 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
6650 ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6651 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6652 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6653 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6654 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6655 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6656 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6657 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
6658 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
6659 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
6660 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6662 ; SI-GISEL-LABEL: v_exp_fneg_v2f16:
6663 ; SI-GISEL: ; %bb.0:
6664 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6665 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6666 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6667 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6668 ; SI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
6669 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6670 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6671 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6672 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6673 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6674 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6675 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
6676 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
6677 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
6678 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6680 ; R600-LABEL: v_exp_fneg_v2f16:
6685 ; CM-LABEL: v_exp_fneg_v2f16:
6689 %fneg = fneg <2 x half> %in
6690 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %fneg)
6691 ret <2 x half> %result
6694 define <2 x half> @v_exp_v2f16_fast(<2 x half> %in) {
6695 ; VI-SDAG-LABEL: v_exp_v2f16_fast:
6697 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6698 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5
6699 ; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6700 ; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
6701 ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6702 ; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
6703 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6704 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6706 ; VI-GISEL-LABEL: v_exp_v2f16_fast:
6707 ; VI-GISEL: ; %bb.0:
6708 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6709 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5
6710 ; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
6711 ; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6712 ; VI-GISEL-NEXT: v_exp_f16_e32 v2, v2
6713 ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6714 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
6715 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6717 ; GFX900-SDAG-LABEL: v_exp_v2f16_fast:
6718 ; GFX900-SDAG: ; %bb.0:
6719 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6720 ; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
6721 ; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
6722 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0
6723 ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6724 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
6725 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6727 ; GFX900-GISEL-LABEL: v_exp_v2f16_fast:
6728 ; GFX900-GISEL: ; %bb.0:
6729 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6730 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5
6731 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
6732 ; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6733 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v2
6734 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
6735 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
6736 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6738 ; SI-SDAG-LABEL: v_exp_v2f16_fast:
6740 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6741 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6742 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6743 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6744 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6745 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
6746 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
6747 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6748 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6749 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6750 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6751 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6752 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6753 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6755 ; SI-GISEL-LABEL: v_exp_v2f16_fast:
6756 ; SI-GISEL: ; %bb.0:
6757 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6758 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6759 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, 0x3dc5
6760 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6761 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
6762 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2
6763 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6764 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6765 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6766 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6767 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6768 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6769 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6770 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6771 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6773 ; R600-LABEL: v_exp_v2f16_fast:
6778 ; CM-LABEL: v_exp_v2f16_fast:
6782 %result = call fast <2 x half> @llvm.exp.v2f16(<2 x half> %in)
6783 ret <2 x half> %result
6786 define <3 x half> @v_exp_v3f16(<3 x half> %in) {
6787 ; VI-LABEL: v_exp_v3f16:
6789 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6790 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v0
6791 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6792 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
6793 ; VI-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
6794 ; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6795 ; VI-NEXT: v_exp_f32_e32 v2, v2
6796 ; VI-NEXT: v_exp_f32_e32 v0, v0
6797 ; VI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6798 ; VI-NEXT: v_exp_f32_e32 v1, v1
6799 ; VI-NEXT: v_cvt_f16_f32_e32 v2, v2
6800 ; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6801 ; VI-NEXT: v_cvt_f16_f32_e32 v1, v1
6802 ; VI-NEXT: v_or_b32_e32 v0, v2, v0
6803 ; VI-NEXT: s_setpc_b64 s[30:31]
6805 ; GFX900-LABEL: v_exp_v3f16:
6807 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6808 ; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v0
6809 ; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6810 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
6811 ; GFX900-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
6812 ; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6813 ; GFX900-NEXT: v_exp_f32_e32 v2, v2
6814 ; GFX900-NEXT: v_exp_f32_e32 v0, v0
6815 ; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6816 ; GFX900-NEXT: v_exp_f32_e32 v1, v1
6817 ; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2
6818 ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
6819 ; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
6820 ; GFX900-NEXT: v_pack_b32_f16 v0, v2, v0
6821 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6823 ; SI-SDAG-LABEL: v_exp_v3f16:
6825 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6826 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6827 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6828 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
6829 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6830 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6831 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
6832 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6833 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6834 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
6835 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6836 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6837 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
6838 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6839 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6840 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
6841 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6842 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6843 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
6844 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6846 ; SI-GISEL-LABEL: v_exp_v3f16:
6847 ; SI-GISEL: ; %bb.0:
6848 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6849 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6850 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6851 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
6852 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6853 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6854 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
6855 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6856 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6857 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
6858 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6859 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6860 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
6861 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6863 ; R600-LABEL: v_exp_v3f16:
6868 ; CM-LABEL: v_exp_v3f16:
6872 %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %in)
6873 ret <3 x half> %result
6876 define <3 x half> @v_exp_v3f16_afn(<3 x half> %in) {
6877 ; VI-SDAG-LABEL: v_exp_v3f16_afn:
6879 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6880 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x3dc5
6881 ; VI-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
6882 ; VI-SDAG-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6883 ; VI-SDAG-NEXT: v_exp_f16_e32 v2, v2
6884 ; VI-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6885 ; VI-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
6886 ; VI-SDAG-NEXT: v_exp_f16_e32 v1, v1
6887 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
6888 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6890 ; VI-GISEL-LABEL: v_exp_v3f16_afn:
6891 ; VI-GISEL: ; %bb.0:
6892 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6893 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5
6894 ; VI-GISEL-NEXT: v_mul_f16_e32 v3, 0x3dc5, v0
6895 ; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6896 ; VI-GISEL-NEXT: v_exp_f16_e32 v3, v3
6897 ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6898 ; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
6899 ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
6900 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v3, v0
6901 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6903 ; GFX900-SDAG-LABEL: v_exp_v3f16_afn:
6904 ; GFX900-SDAG: ; %bb.0:
6905 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6906 ; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
6907 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
6908 ; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6909 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v2, v2
6910 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
6911 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
6912 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1
6913 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0
6914 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6916 ; GFX900-GISEL-LABEL: v_exp_v3f16_afn:
6917 ; GFX900-GISEL: ; %bb.0:
6918 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6919 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5
6920 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v3, 0x3dc5, v0
6921 ; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6922 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v3, v3
6923 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
6924 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
6925 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
6926 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v3
6927 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6929 ; SI-SDAG-LABEL: v_exp_v3f16_afn:
6931 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6932 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6933 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6934 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
6935 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6936 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6937 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
6938 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
6939 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
6940 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
6941 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6942 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6943 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
6944 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6945 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6946 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
6947 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6948 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6949 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
6950 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6952 ; SI-GISEL-LABEL: v_exp_v3f16_afn:
6953 ; SI-GISEL: ; %bb.0:
6954 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6955 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6956 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 0x3dc5
6957 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6958 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
6959 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v3
6960 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
6961 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
6962 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6963 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6964 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
6965 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6966 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6967 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
6968 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6969 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6970 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
6971 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6972 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6973 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
6974 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6976 ; R600-LABEL: v_exp_v3f16_afn:
6981 ; CM-LABEL: v_exp_v3f16_afn:
6985 %result = call afn <3 x half> @llvm.exp.v3f16(<3 x half> %in)
6986 ret <3 x half> %result
6989 define float @v_exp_f32_contract(float %in) {
6990 ; VI-SDAG-LABEL: v_exp_f32_contract:
6992 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6993 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
6994 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
6995 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
6996 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
6997 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
6998 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
6999 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
7000 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7001 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
7002 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
7003 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
7004 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7005 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
7006 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7007 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7008 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7009 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7010 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7011 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7012 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7013 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7014 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
7016 ; VI-GISEL-LABEL: v_exp_f32_contract:
7017 ; VI-GISEL: ; %bb.0:
7018 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7019 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7020 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
7021 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
7022 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
7023 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
7024 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
7025 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7026 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7027 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
7028 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
7029 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
7030 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
7031 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7032 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7033 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7034 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7035 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7036 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7037 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7038 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7039 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7040 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
7042 ; GFX900-SDAG-LABEL: v_exp_f32_contract:
7043 ; GFX900-SDAG: ; %bb.0:
7044 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7045 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7046 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7047 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7048 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7049 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7050 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7051 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7052 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7053 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
7054 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7055 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7056 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7057 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7058 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7059 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7060 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7061 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7062 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7063 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7065 ; GFX900-GISEL-LABEL: v_exp_f32_contract:
7066 ; GFX900-GISEL: ; %bb.0:
7067 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7068 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
7069 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
7070 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
7071 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7072 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
7073 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
7074 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
7075 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
7076 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7077 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
7078 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7079 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7080 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7081 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7082 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7083 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7084 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7085 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7086 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7088 ; SI-SDAG-LABEL: v_exp_f32_contract:
7090 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7091 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7092 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7093 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7094 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7095 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7096 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7097 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7098 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7099 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7100 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7101 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7102 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7103 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7104 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
7105 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7106 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7107 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7108 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7109 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7111 ; SI-GISEL-LABEL: v_exp_f32_contract:
7112 ; SI-GISEL: ; %bb.0:
7113 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7114 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
7115 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
7116 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
7117 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7118 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
7119 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
7120 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
7121 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
7122 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7123 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7124 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7125 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
7126 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7127 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7128 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7129 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7130 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7131 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7132 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7134 ; R600-LABEL: v_exp_f32_contract:
7139 ; CM-LABEL: v_exp_f32_contract:
7143 %result = call contract float @llvm.exp.f32(float %in)
7147 define float @v_exp_f32_contract_daz(float %in) #0 {
7148 ; VI-SDAG-LABEL: v_exp_f32_contract_daz:
7150 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7151 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7152 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
7153 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
7154 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
7155 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
7156 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
7157 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
7158 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7159 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
7160 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
7161 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
7162 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7163 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
7164 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7165 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7166 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7167 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7168 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7169 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7170 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7171 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7172 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
7174 ; VI-GISEL-LABEL: v_exp_f32_contract_daz:
7175 ; VI-GISEL: ; %bb.0:
7176 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7177 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7178 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
7179 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
7180 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
7181 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
7182 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
7183 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7184 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7185 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
7186 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
7187 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
7188 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
7189 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7190 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7191 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7192 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7193 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7194 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7195 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7196 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7197 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7198 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
7200 ; GFX900-SDAG-LABEL: v_exp_f32_contract_daz:
7201 ; GFX900-SDAG: ; %bb.0:
7202 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7203 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7204 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7205 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7206 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7207 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7208 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7209 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7210 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7211 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
7212 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7213 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7214 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7215 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7216 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7217 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7218 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7219 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7220 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7221 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7223 ; GFX900-GISEL-LABEL: v_exp_f32_contract_daz:
7224 ; GFX900-GISEL: ; %bb.0:
7225 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7226 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
7227 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
7228 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
7229 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7230 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
7231 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
7232 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
7233 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
7234 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7235 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
7236 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7237 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7238 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7239 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7240 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7241 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7242 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7243 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7244 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7246 ; SI-SDAG-LABEL: v_exp_f32_contract_daz:
7248 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7249 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7250 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7251 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7252 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7253 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7254 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7255 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7256 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7257 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7258 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7259 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7260 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7261 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7262 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
7263 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7264 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7265 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7266 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7267 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7269 ; SI-GISEL-LABEL: v_exp_f32_contract_daz:
7270 ; SI-GISEL: ; %bb.0:
7271 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7272 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
7273 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
7274 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
7275 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7276 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
7277 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
7278 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
7279 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
7280 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7281 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7282 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7283 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
7284 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7285 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7286 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7287 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7288 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7289 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7290 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7292 ; R600-LABEL: v_exp_f32_contract_daz:
7297 ; CM-LABEL: v_exp_f32_contract_daz:
7301 %result = call contract float @llvm.exp.f32(float %in)
7305 define float @v_exp_f32_contract_nnan_ninf(float %in) {
7306 ; VI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf:
7308 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7309 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7310 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
7311 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
7312 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
7313 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
7314 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
7315 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
7316 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7317 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
7318 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
7319 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
7320 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7321 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
7322 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7323 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7324 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7325 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
7326 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
7328 ; VI-GISEL-LABEL: v_exp_f32_contract_nnan_ninf:
7329 ; VI-GISEL: ; %bb.0:
7330 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7331 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7332 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
7333 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
7334 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
7335 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
7336 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
7337 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7338 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7339 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
7340 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
7341 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
7342 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
7343 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7344 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7345 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7346 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7347 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
7348 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
7350 ; GFX900-SDAG-LABEL: v_exp_f32_contract_nnan_ninf:
7351 ; GFX900-SDAG: ; %bb.0:
7352 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7353 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7354 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7355 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7356 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7357 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7358 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7359 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7360 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7361 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
7362 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7363 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7364 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7365 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7366 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
7367 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7369 ; GFX900-GISEL-LABEL: v_exp_f32_contract_nnan_ninf:
7370 ; GFX900-GISEL: ; %bb.0:
7371 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7372 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
7373 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
7374 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
7375 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7376 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
7377 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
7378 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
7379 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
7380 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7381 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
7382 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7383 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7384 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7385 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
7386 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7388 ; SI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf:
7390 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7391 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7392 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7393 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7394 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7395 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7396 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7397 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7398 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7399 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7400 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7401 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7402 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7403 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
7404 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
7405 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7407 ; SI-GISEL-LABEL: v_exp_f32_contract_nnan_ninf:
7408 ; SI-GISEL: ; %bb.0:
7409 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7410 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
7411 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
7412 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2
7413 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7414 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1
7415 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
7416 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
7417 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
7418 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7419 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7420 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
7421 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7422 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7423 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
7424 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7426 ; R600-LABEL: v_exp_f32_contract_nnan_ninf:
7431 ; CM-LABEL: v_exp_f32_contract_nnan_ninf:
7435 %result = call contract nnan ninf float @llvm.exp.f32(float %in)
7439 declare float @llvm.fabs.f32(float) #2
7440 declare float @llvm.exp.f32(float) #2
7441 declare <2 x float> @llvm.exp.v2f32(<2 x float>) #2
7442 declare <3 x float> @llvm.exp.v3f32(<3 x float>) #2
7443 declare <4 x float> @llvm.exp.v4f32(<4 x float>) #2
7444 declare half @llvm.fabs.f16(half) #2
7445 declare half @llvm.exp.f16(half) #2
7446 declare <2 x half> @llvm.exp.v2f16(<2 x half>) #2
7447 declare <3 x half> @llvm.exp.v3f16(<3 x half>) #2
7448 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
7450 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
7451 attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
7452 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }