1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,VI,VI-SDAG %s
3 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,VI,VI-GISEL %s
4 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,GFX900,GFX900-SDAG %s
5 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,GFX900,GFX900-GISEL %s
6 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s
7 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-GISEL %s
9 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
10 ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
12 define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
13 ; VI-SDAG-LABEL: s_exp_f32:
15 ; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
16 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
17 ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
18 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
19 ; VI-SDAG-NEXT: s_and_b32 s3, s2, 0xfffff000
20 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
21 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, s2, v1
22 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1
23 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
24 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s3, v0
25 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
26 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x39a3b295
27 ; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v0
28 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, s3, v3
29 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
30 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
31 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
32 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
33 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v2
34 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
35 ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
36 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
37 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1
38 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218
39 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
40 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1
41 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
42 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
43 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
44 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
45 ; VI-SDAG-NEXT: s_endpgm
47 ; VI-GISEL-LABEL: s_exp_f32:
49 ; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
50 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
51 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x39a3b295
52 ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
53 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
54 ; VI-GISEL-NEXT: s_and_b32 s3, s2, 0xfffff000
55 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s3
56 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2
57 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v2
58 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
59 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0
60 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
61 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s3, v1
62 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
63 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v0
64 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
65 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
66 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
67 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
68 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
69 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
70 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
71 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1
72 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
73 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
74 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
75 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
76 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
77 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
78 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
79 ; VI-GISEL-NEXT: s_endpgm
81 ; GFX900-SDAG-LABEL: s_exp_f32:
82 ; GFX900-SDAG: ; %bb.0:
83 ; GFX900-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
84 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
85 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
86 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
87 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
88 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0
89 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
90 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v2
91 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
92 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0
93 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
94 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3
95 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
96 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x7f800000
97 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
98 ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
99 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
100 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1
101 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218
102 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
103 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1
104 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
105 ; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1]
106 ; GFX900-SDAG-NEXT: s_endpgm
108 ; GFX900-GISEL-LABEL: s_exp_f32:
109 ; GFX900-GISEL: ; %bb.0:
110 ; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
111 ; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x3fb8aa3b
112 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f
113 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
114 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
115 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s2
116 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
117 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v1, s3, -v2
118 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
119 ; GFX900-GISEL-NEXT: v_fma_f32 v0, s2, v0, v1
120 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3
121 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
122 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3
123 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
124 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
125 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2
126 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
127 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
128 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
129 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
130 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
131 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
132 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
133 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
134 ; GFX900-GISEL-NEXT: s_endpgm
136 ; SI-SDAG-LABEL: s_exp_f32:
138 ; SI-SDAG-NEXT: s_load_dword s4, s[0:1], 0xb
139 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
140 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
141 ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
142 ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
143 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
144 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, s4, v0
145 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
146 ; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v2
147 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
148 ; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
149 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
150 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
151 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3
152 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
153 ; SI-SDAG-NEXT: s_mov_b32 s2, -1
154 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
155 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
156 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1
157 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218
158 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
159 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v1
160 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
161 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
162 ; SI-SDAG-NEXT: s_endpgm
164 ; SI-GISEL-LABEL: s_exp_f32:
166 ; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb
167 ; SI-GISEL-NEXT: s_mov_b32 s3, 0x3fb8aa3b
168 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f
169 ; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
170 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
171 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, s2
172 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
173 ; SI-GISEL-NEXT: v_fma_f32 v1, v1, s3, -v2
174 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
175 ; SI-GISEL-NEXT: v_fma_f32 v0, s2, v0, v1
176 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3
177 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
178 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3
179 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
180 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
181 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2
182 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
183 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
184 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
185 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
186 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
187 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
188 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
189 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
190 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
191 ; SI-GISEL-NEXT: s_endpgm
193 ; R600-LABEL: s_exp_f32:
195 ; R600-NEXT: ALU 59, @4, KC0[CB0:0-32], KC1[]
196 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
199 ; R600-NEXT: ALU clause starting at 4:
200 ; R600-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
201 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
202 ; R600-NEXT: ADD T1.W, KC0[2].Z, -PV.W,
203 ; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
204 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
205 ; R600-NEXT: RNDNE T3.W, PS,
206 ; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
207 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
208 ; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
209 ; R600-NEXT: TRUNC * T4.W, PV.W,
210 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
211 ; R600-NEXT: FLT_TO_INT T0.Z, PS,
212 ; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
213 ; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
214 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
215 ; R600-NEXT: ADD T1.Z, PS, PV.W,
216 ; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x,
217 ; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y,
218 ; R600-NEXT: -330(nan), 381(5.338947e-43)
219 ; R600-NEXT: ADD_INT T0.X, PS, literal.x,
220 ; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y,
221 ; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
222 ; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w,
223 ; R600-NEXT: EXP_IEEE * T1.X, PV.Z,
224 ; R600-NEXT: -254(nan), 204(2.858649e-43)
225 ; R600-NEXT: 102(1.429324e-43), -229(nan)
226 ; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x,
227 ; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y,
228 ; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
229 ; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x,
230 ; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z,
231 ; R600-NEXT: -127(nan), 254(3.559298e-43)
232 ; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
233 ; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
234 ; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y,
235 ; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
236 ; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X,
237 ; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z,
238 ; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
239 ; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
240 ; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
241 ; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W,
242 ; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.x,
243 ; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
244 ; R600-NEXT: CNDE_INT T1.Z, T1.Y, T3.X, PS,
245 ; R600-NEXT: CNDE_INT T0.W, T1.W, PV.W, T1.X,
246 ; R600-NEXT: LSHL * T1.W, PV.Z, literal.x,
247 ; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
248 ; R600-NEXT: ADD_INT T1.W, PS, literal.x,
249 ; R600-NEXT: CNDE_INT * T0.W, T4.W, PV.W, PV.Z,
250 ; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
251 ; R600-NEXT: MUL_IEEE T0.W, PS, PV.W,
252 ; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].Z,
253 ; R600-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
254 ; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0,
255 ; R600-NEXT: SETGT * T1.W, KC0[2].Z, literal.x,
256 ; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
257 ; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x,
258 ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
259 ; R600-NEXT: 2139095040(INF), 2(2.802597e-45)
261 ; CM-LABEL: s_exp_f32:
263 ; CM-NEXT: ALU 64, @4, KC0[CB0:0-32], KC1[]
264 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
267 ; CM-NEXT: ALU clause starting at 4:
268 ; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
269 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
270 ; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
271 ; CM-NEXT: ADD * T1.W, KC0[2].Z, -PV.W,
272 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
273 ; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
274 ; CM-NEXT: RNDNE * T2.W, PV.Z,
275 ; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
276 ; CM-NEXT: TRUNC T2.Z, PV.W,
277 ; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
278 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
279 ; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
280 ; CM-NEXT: ADD T0.Z, T0.Z, -T2.W,
281 ; CM-NEXT: FLT_TO_INT * T0.W, PV.Z,
282 ; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
283 ; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x,
284 ; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
285 ; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
286 ; CM-NEXT: EXP_IEEE T0.X, T1.W,
287 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
288 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
289 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
290 ; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x,
291 ; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y,
292 ; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z,
293 ; CM-NEXT: 2130706432(1.701412e+38), -254(nan)
294 ; CM-NEXT: -330(nan), 0(0.000000e+00)
295 ; CM-NEXT: ADD_INT T1.X, T0.W, literal.x,
296 ; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
297 ; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z,
298 ; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w,
299 ; CM-NEXT: -127(nan), 204(2.858649e-43)
300 ; CM-NEXT: 102(1.429324e-43), -229(nan)
301 ; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x,
302 ; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
303 ; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y,
304 ; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z,
305 ; CM-NEXT: 254(3.559298e-43), -127(nan)
306 ; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
307 ; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
308 ; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
309 ; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
310 ; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y,
311 ; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43)
312 ; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
313 ; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W,
314 ; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x,
315 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
316 ; CM-NEXT: CNDE_INT T0.Y, T2.X, T0.Y, PV.W,
317 ; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.X,
318 ; CM-NEXT: LSHL * T1.W, PV.Y, literal.x,
319 ; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
320 ; CM-NEXT: ADD_INT T1.Z, PV.W, literal.x,
321 ; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Z, PV.Y,
322 ; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
323 ; CM-NEXT: MUL_IEEE T0.Z, PV.W, PV.Z,
324 ; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
325 ; CM-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
326 ; CM-NEXT: CNDE T0.Z, PV.W, PV.Z, 0.0,
327 ; CM-NEXT: SETGT * T0.W, KC0[2].Z, literal.x,
328 ; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
329 ; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
330 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
331 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
332 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
333 %result = call float @llvm.exp.f32(float %in)
334 store float %result, ptr addrspace(1) %out
338 ; FIXME: We should be able to merge these packets together on Cayman so we
339 ; have a maximum of 4 instructions.
340 define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
341 ; VI-SDAG-LABEL: s_exp_v2f32:
343 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
344 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
345 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
346 ; VI-SDAG-NEXT: s_and_b32 s4, s3, 0xfffff000
347 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4
348 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, s3, v2
349 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
350 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
351 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
352 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295
353 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v0
354 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v4
355 ; VI-SDAG-NEXT: s_and_b32 s4, s2, 0xfffff000
356 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
357 ; VI-SDAG-NEXT: v_mov_b32_e32 v6, s4
358 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
359 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2
360 ; VI-SDAG-NEXT: v_sub_f32_e32 v6, s2, v6
361 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
362 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
363 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v6
364 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6
365 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
366 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
367 ; VI-SDAG-NEXT: v_rndne_f32_e32 v5, v0
368 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v6, v7
369 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, s4, v4
370 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5
371 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
372 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
373 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
374 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v5
375 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
376 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
377 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v2
378 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42b17218
379 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
380 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x7f800000
381 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3
382 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
383 ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4
384 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v2
385 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
386 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3
387 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1
388 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
389 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0
390 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
391 ; VI-SDAG-NEXT: s_endpgm
393 ; VI-GISEL-LABEL: s_exp_v2f32:
395 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
396 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
397 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x39a3b295
398 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
399 ; VI-GISEL-NEXT: s_and_b32 s4, s2, 0xfffff000
400 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
401 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2
402 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
403 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
404 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, s4, v0
405 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
406 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, s4, v1
407 ; VI-GISEL-NEXT: s_and_b32 s4, s3, 0xfffff000
408 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s4
409 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v4, v2
410 ; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3
411 ; VI-GISEL-NEXT: v_sub_f32_e32 v5, s3, v5
412 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
413 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x39a3b295, v5
414 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8a000, v5
415 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
416 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
417 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
418 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
419 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
420 ; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
421 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
422 ; VI-GISEL-NEXT: v_rndne_f32_e32 v5, v0
423 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5
424 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
425 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v5
426 ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v0
427 ; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3
428 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
429 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3
430 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
431 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
432 ; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
433 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4
434 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
435 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v5, v1
436 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v3
437 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
438 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v4
439 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
440 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
441 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
442 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
443 ; VI-GISEL-NEXT: s_endpgm
445 ; GFX900-SDAG-LABEL: s_exp_v2f32:
446 ; GFX900-SDAG: ; %bb.0:
447 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
448 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
449 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
450 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
451 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
452 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0
453 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
454 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2
455 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
456 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4
457 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s2, v0
458 ; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
459 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6
460 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v6
461 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
462 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
463 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7
464 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0
465 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v8, v0
466 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
467 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v7
468 ; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3
469 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5
470 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42b17218
471 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
472 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000
473 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3
474 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
475 ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v6
476 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5
477 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
478 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3
479 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
480 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
481 ; GFX900-SDAG-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
482 ; GFX900-SDAG-NEXT: s_endpgm
484 ; GFX900-GISEL-LABEL: s_exp_v2f32:
485 ; GFX900-GISEL: ; %bb.0:
486 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
487 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
488 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f
489 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
490 ; GFX900-GISEL-NEXT: s_mov_b32 s5, 0x42b17218
491 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
492 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s2
493 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
494 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v1, s4, -v2
495 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
496 ; GFX900-GISEL-NEXT: v_fma_f32 v1, s2, v0, v1
497 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
498 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
499 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
500 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
501 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s3, v3
502 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
503 ; GFX900-GISEL-NEXT: v_fma_f32 v3, s3, v3, -v5
504 ; GFX900-GISEL-NEXT: v_fma_f32 v0, s3, v0, v3
505 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v5
506 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v5, v5, v3
507 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v5, v0
508 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
509 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
510 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
511 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v0
512 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2
513 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
514 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
515 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v2
516 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0xc2ce8ed0
517 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
518 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s3
519 ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v5, v3
520 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
521 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
522 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v1
523 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
524 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
525 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
526 ; GFX900-GISEL-NEXT: s_endpgm
528 ; SI-SDAG-LABEL: s_exp_v2f32:
530 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
531 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
532 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
533 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
534 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
535 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
536 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0
537 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
538 ; SI-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2
539 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
540 ; SI-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4
541 ; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
542 ; SI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v0
543 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
544 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
545 ; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5
546 ; SI-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v5
547 ; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6
548 ; SI-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0
549 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v7, v0
550 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
551 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v5, v6
552 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3
553 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
554 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v3
555 ; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42b17218
556 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
557 ; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x7f800000
558 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v4
559 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc
560 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v5
561 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3
562 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
563 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v4
564 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
565 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
566 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
567 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
568 ; SI-SDAG-NEXT: s_endpgm
570 ; SI-GISEL-LABEL: s_exp_v2f32:
572 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
573 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
574 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f
575 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
576 ; SI-GISEL-NEXT: s_mov_b32 s5, 0x42b17218
577 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
578 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, s2
579 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
580 ; SI-GISEL-NEXT: v_fma_f32 v1, v1, s4, -v2
581 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
582 ; SI-GISEL-NEXT: v_fma_f32 v1, s2, v0, v1
583 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
584 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
585 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
586 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
587 ; SI-GISEL-NEXT: v_mul_f32_e32 v5, s3, v3
588 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
589 ; SI-GISEL-NEXT: v_fma_f32 v3, s3, v3, -v5
590 ; SI-GISEL-NEXT: v_fma_f32 v0, s3, v0, v3
591 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v5
592 ; SI-GISEL-NEXT: v_sub_f32_e32 v5, v5, v3
593 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v5, v0
594 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
595 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
596 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
597 ; SI-GISEL-NEXT: v_exp_f32_e32 v5, v0
598 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2
599 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
600 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
601 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v2
602 ; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2ce8ed0
603 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
604 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, s3
605 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v5, v3
606 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
607 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
608 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v1
609 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
610 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
611 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
612 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
613 ; SI-GISEL-NEXT: s_endpgm
615 ; R600-LABEL: s_exp_v2f32:
617 ; R600-NEXT: ALU 96, @4, KC0[CB0:0-32], KC1[]
618 ; R600-NEXT: ALU 12, @101, KC0[CB0:0-32], KC1[]
619 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
621 ; R600-NEXT: ALU clause starting at 4:
622 ; R600-NEXT: AND_INT * T0.W, KC0[3].X, literal.x,
623 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
624 ; R600-NEXT: ADD * T1.W, KC0[3].X, -PV.W,
625 ; R600-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
626 ; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.y,
627 ; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.z,
628 ; R600-NEXT: -4096(nan), 967029397(3.122284e-04)
629 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
630 ; R600-NEXT: RNDNE T1.Z, PS,
631 ; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
632 ; R600-NEXT: ADD * T2.W, KC0[2].W, -PV.Z,
633 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
634 ; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x,
635 ; R600-NEXT: MUL_IEEE T2.Z, T0.Z, literal.y,
636 ; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
637 ; R600-NEXT: ADD * T1.W, T3.W, -PV.Z,
638 ; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
639 ; R600-NEXT: ADD T3.Z, PS, PV.W,
640 ; R600-NEXT: RNDNE T0.W, PV.Z,
641 ; R600-NEXT: MULADD_IEEE * T1.W, T2.W, literal.x, PV.Y, BS:VEC_021/SCL_122
642 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
643 ; R600-NEXT: TRUNC T0.Y, T1.Z,
644 ; R600-NEXT: MULADD_IEEE T0.Z, T0.Z, literal.x, PS, BS:VEC_120/SCL_212
645 ; R600-NEXT: ADD T1.W, T2.Z, -PV.W, BS:VEC_201
646 ; R600-NEXT: EXP_IEEE * T0.X, PV.Z,
647 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
648 ; R600-NEXT: ADD T0.Z, PV.W, PV.Z,
649 ; R600-NEXT: FLT_TO_INT T1.W, PV.Y,
650 ; R600-NEXT: MUL_IEEE * T2.W, PS, literal.x,
651 ; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
652 ; R600-NEXT: MUL_IEEE T1.Z, PS, literal.x,
653 ; R600-NEXT: SETGT_UINT T3.W, PV.W, literal.y,
654 ; R600-NEXT: EXP_IEEE * T0.Y, PV.Z,
655 ; R600-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
656 ; R600-NEXT: CNDE_INT T1.X, PV.W, T2.W, PV.Z,
657 ; R600-NEXT: MUL_IEEE T1.Y, PS, literal.x,
658 ; R600-NEXT: MAX_INT T0.Z, T1.W, literal.y,
659 ; R600-NEXT: MIN_INT T2.W, T1.W, literal.z,
660 ; R600-NEXT: TRUNC * T0.W, T0.W,
661 ; R600-NEXT: 2130706432(1.701412e+38), -330(nan)
662 ; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
663 ; R600-NEXT: FLT_TO_INT T2.X, PS,
664 ; R600-NEXT: ADD_INT T2.Y, PV.W, literal.x,
665 ; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
666 ; R600-NEXT: ADD_INT T0.W, T1.W, literal.z,
667 ; R600-NEXT: SETGT_UINT * T2.W, T1.W, literal.w,
668 ; R600-NEXT: -254(nan), 204(2.858649e-43)
669 ; R600-NEXT: 102(1.429324e-43), -229(nan)
670 ; R600-NEXT: ADD_INT T3.X, T1.W, literal.x,
671 ; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W,
672 ; R600-NEXT: SETGT_INT T0.Z, T1.W, literal.x,
673 ; R600-NEXT: MUL_IEEE T0.W, T0.X, literal.y,
674 ; R600-NEXT: MUL_IEEE * T4.W, T0.Y, literal.y,
675 ; R600-NEXT: -127(nan), 209715200(1.972152e-31)
676 ; R600-NEXT: MUL_IEEE T4.X, PS, literal.x,
677 ; R600-NEXT: MUL_IEEE T4.Y, PV.W, literal.x,
678 ; R600-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, T1.W,
679 ; R600-NEXT: CNDE_INT T3.W, T3.W, PV.X, T2.Y,
680 ; R600-NEXT: MAX_INT * T5.W, T2.X, literal.y,
681 ; R600-NEXT: 209715200(1.972152e-31), -330(nan)
682 ; R600-NEXT: SETGT_INT T3.X, T1.W, literal.x,
683 ; R600-NEXT: ADD_INT T2.Y, PS, literal.y,
684 ; R600-NEXT: ADD_INT T2.Z, T2.X, literal.z,
685 ; R600-NEXT: SETGT_UINT * T1.W, T2.X, literal.w,
686 ; R600-NEXT: 127(1.779649e-43), 204(2.858649e-43)
687 ; R600-NEXT: 102(1.429324e-43), -229(nan)
688 ; R600-NEXT: MIN_INT * T5.W, T2.X, literal.x,
689 ; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
690 ; R600-NEXT: ADD_INT T5.X, PV.W, literal.x,
691 ; R600-NEXT: ADD_INT T3.Y, T2.X, literal.y,
692 ; R600-NEXT: SETGT_UINT T3.Z, T2.X, literal.z,
693 ; R600-NEXT: CNDE_INT T5.W, T1.W, T2.Y, T2.Z,
694 ; R600-NEXT: SETGT_INT * T6.W, T2.X, literal.y,
695 ; R600-NEXT: -254(nan), -127(nan)
696 ; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
697 ; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T2.X,
698 ; R600-NEXT: CNDE_INT T2.Y, PV.Z, PV.Y, PV.X,
699 ; R600-NEXT: SETGT_INT T2.Z, T2.X, literal.x, BS:VEC_120/SCL_212
700 ; R600-NEXT: CNDE_INT T3.W, T3.X, T1.Z, T3.W, BS:VEC_021/SCL_122
701 ; R600-NEXT: CNDE_INT * T0.W, T2.W, T4.Y, T0.W,
702 ; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
703 ; R600-NEXT: CNDE_INT T0.X, T0.Z, PS, T0.X,
704 ; R600-NEXT: LSHL T3.Y, PV.W, literal.x,
705 ; R600-NEXT: CNDE_INT T0.Z, PV.Z, PV.X, PV.Y,
706 ; R600-NEXT: CNDE_INT T0.W, T1.W, T4.X, T4.W,
707 ; R600-NEXT: MUL_IEEE * T1.W, T1.Y, literal.y,
708 ; R600-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
709 ; R600-NEXT: CNDE_INT T2.X, T3.Z, T1.Y, PS,
710 ; R600-NEXT: CNDE_INT T0.Y, T6.W, PV.W, T0.Y,
711 ; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
712 ; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y,
713 ; R600-NEXT: CNDE_INT * T1.W, T3.X, PV.X, T1.X,
714 ; R600-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
715 ; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W,
716 ; R600-NEXT: SETGT T1.Z, literal.x, KC0[3].X,
717 ; R600-NEXT: ADD_INT * T0.W, PV.Z, literal.y,
718 ; R600-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
719 ; R600-NEXT: ALU clause starting at 101:
720 ; R600-NEXT: CNDE_INT * T1.W, T2.Z, T0.Y, T2.X,
721 ; R600-NEXT: MUL_IEEE T0.Y, PV.W, T0.W,
722 ; R600-NEXT: SETGT T0.Z, literal.x, KC0[2].W,
723 ; R600-NEXT: CNDE T0.W, T1.Z, T1.Y, 0.0,
724 ; R600-NEXT: SETGT * T1.W, KC0[3].X, literal.y,
725 ; R600-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
726 ; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x,
727 ; R600-NEXT: CNDE T0.W, PV.Z, PV.Y, 0.0,
728 ; R600-NEXT: SETGT * T1.W, KC0[2].W, literal.y,
729 ; R600-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
730 ; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x,
731 ; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y,
732 ; R600-NEXT: 2139095040(INF), 2(2.802597e-45)
734 ; CM-LABEL: s_exp_v2f32:
736 ; CM-NEXT: ALU 100, @4, KC0[CB0:0-32], KC1[]
737 ; CM-NEXT: ALU 18, @105, KC0[CB0:0-32], KC1[]
738 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
740 ; CM-NEXT: ALU clause starting at 4:
741 ; CM-NEXT: AND_INT * T0.W, KC0[2].W, literal.x,
742 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
743 ; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
744 ; CM-NEXT: ADD * T1.W, KC0[2].W, -PV.W,
745 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
746 ; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
747 ; CM-NEXT: RNDNE * T2.W, PV.Z,
748 ; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
749 ; CM-NEXT: TRUNC T0.Y, PV.W,
750 ; CM-NEXT: AND_INT T2.Z, KC0[3].X, literal.x,
751 ; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.y, PV.Z,
752 ; CM-NEXT: -4096(nan), 1069064192(1.442383e+00)
753 ; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
754 ; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y,
755 ; CM-NEXT: FLT_TO_INT T1.Z, PV.Y,
756 ; CM-NEXT: ADD * T0.W, KC0[3].X, -PV.Z,
757 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
758 ; CM-NEXT: ADD T1.X, T0.Z, -T2.W,
759 ; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
760 ; CM-NEXT: MAX_INT T0.Z, PV.Z, literal.y,
761 ; CM-NEXT: RNDNE * T1.W, PV.Y,
762 ; CM-NEXT: 967029397(3.122284e-04), -330(nan)
763 ; CM-NEXT: TRUNC T2.X, PV.W,
764 ; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x,
765 ; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.Y,
766 ; CM-NEXT: ADD * T0.W, PV.X, T0.X,
767 ; CM-NEXT: 204(2.858649e-43), 1069064192(1.442383e+00)
768 ; CM-NEXT: EXP_IEEE T0.X, T0.W,
769 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
770 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
771 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
772 ; CM-NEXT: ADD_INT T1.X, T1.Z, literal.x,
773 ; CM-NEXT: MULADD_IEEE T0.Y, T2.Z, literal.y, T0.Z, BS:VEC_102/SCL_221
774 ; CM-NEXT: ADD T0.Z, T1.Y, -T1.W,
775 ; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z,
776 ; CM-NEXT: 102(1.429324e-43), 967029397(3.122284e-04)
777 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
778 ; CM-NEXT: SETGT_UINT T3.X, T1.Z, literal.x,
779 ; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y,
780 ; CM-NEXT: SETGT_UINT T2.Z, T1.Z, literal.z,
781 ; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
782 ; CM-NEXT: -229(nan), 2130706432(1.701412e+38)
783 ; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
784 ; CM-NEXT: EXP_IEEE T0.X (MASKED), T1.W,
785 ; CM-NEXT: EXP_IEEE T0.Y, T1.W,
786 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
787 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
788 ; CM-NEXT: CNDE_INT T4.X, T2.Z, T0.W, T1.Y,
789 ; CM-NEXT: CNDE_INT T1.Y, T3.X, T2.Y, T1.X,
790 ; CM-NEXT: FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
791 ; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.x,
792 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
793 ; CM-NEXT: SETGT_INT T1.X, T1.Z, literal.x,
794 ; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y,
795 ; CM-NEXT: MUL_IEEE T3.Z, PV.W, literal.z,
796 ; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w,
797 ; CM-NEXT: -127(nan), 209715200(1.972152e-31)
798 ; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
799 ; CM-NEXT: CNDE_INT T2.X, PV.W, T0.W, PV.Z,
800 ; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
801 ; CM-NEXT: CNDE_INT T3.Z, PV.X, T1.Y, T1.Z,
802 ; CM-NEXT: MAX_INT * T0.W, T0.Z, literal.y,
803 ; CM-NEXT: 209715200(1.972152e-31), -330(nan)
804 ; CM-NEXT: ADD_INT T5.X, PV.W, literal.x,
805 ; CM-NEXT: ADD_INT T1.Y, T0.Z, literal.y,
806 ; CM-NEXT: SETGT_UINT T4.Z, T0.Z, literal.z,
807 ; CM-NEXT: MUL_IEEE * T0.W, T0.Y, literal.w,
808 ; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
809 ; CM-NEXT: -229(nan), 209715200(1.972152e-31)
810 ; CM-NEXT: MUL_IEEE T6.X, PV.W, literal.x,
811 ; CM-NEXT: MIN_INT T4.Y, T0.Z, literal.y,
812 ; CM-NEXT: CNDE_INT T5.Z, PV.Z, PV.X, PV.Y,
813 ; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.z,
814 ; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
815 ; CM-NEXT: -127(nan), 0(0.000000e+00)
816 ; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
817 ; CM-NEXT: MIN_INT T1.Y, T1.Z, literal.x,
818 ; CM-NEXT: ADD_INT T5.Z, PV.Y, literal.y,
819 ; CM-NEXT: ADD_INT * T3.W, T0.Z, literal.z, BS:VEC_120/SCL_212
820 ; CM-NEXT: 381(5.338947e-43), -254(nan)
821 ; CM-NEXT: -127(nan), 0(0.000000e+00)
822 ; CM-NEXT: CNDE_INT T7.X, T1.W, PV.W, PV.Z,
823 ; CM-NEXT: SETGT_INT T4.Y, T0.Z, literal.x,
824 ; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
825 ; CM-NEXT: ADD_INT * T1.W, T1.Z, literal.z, BS:VEC_120/SCL_212
826 ; CM-NEXT: 127(1.779649e-43), -254(nan)
827 ; CM-NEXT: -127(nan), 0(0.000000e+00)
828 ; CM-NEXT: CNDE_INT T8.X, T2.Z, PV.W, PV.Z,
829 ; CM-NEXT: SETGT_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
830 ; CM-NEXT: CNDE_INT T0.Z, PV.Y, T5.X, PV.X,
831 ; CM-NEXT: CNDE_INT * T0.W, T4.Z, T6.X, T0.W, BS:VEC_201
832 ; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
833 ; CM-NEXT: CNDE_INT T5.X, T2.W, PV.W, T0.Y,
834 ; CM-NEXT: LSHL T0.Y, PV.Z, literal.x,
835 ; CM-NEXT: CNDE_INT T0.Z, PV.Y, T3.Z, PV.X,
836 ; CM-NEXT: CNDE_INT * T0.W, T3.X, T3.Y, T2.Y, BS:VEC_201
837 ; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
838 ; CM-NEXT: CNDE_INT T0.X, T1.X, PV.W, T0.X,
839 ; CM-NEXT: LSHL T2.Y, PV.Z, literal.x,
840 ; CM-NEXT: ADD_INT * T0.Z, PV.Y, literal.y,
841 ; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
842 ; CM-NEXT: ALU clause starting at 105:
843 ; CM-NEXT: CNDE_INT * T0.W, T4.Y, T5.X, T2.X,
844 ; CM-NEXT: MUL_IEEE T1.X, PV.W, T0.Z,
845 ; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].X,
846 ; CM-NEXT: ADD_INT T0.Z, T2.Y, literal.y,
847 ; CM-NEXT: CNDE_INT * T0.W, T1.Y, T0.X, T4.X, BS:VEC_120/SCL_212
848 ; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
849 ; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
850 ; CM-NEXT: SETGT T1.Y, literal.x, KC0[2].W,
851 ; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
852 ; CM-NEXT: SETGT * T0.W, KC0[3].X, literal.y,
853 ; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
854 ; CM-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.x,
855 ; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
856 ; CM-NEXT: SETGT * T0.W, KC0[2].W, literal.y,
857 ; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
858 ; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
859 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
860 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
861 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
862 %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %in)
863 store <2 x float> %result, ptr addrspace(1) %out
867 define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
868 ; VI-SDAG-LABEL: s_exp_v3f32:
870 ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
871 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
872 ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
873 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
874 ; VI-SDAG-NEXT: s_and_b32 s2, s6, 0xfffff000
875 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
876 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, s6, v2
877 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
878 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
879 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v0
880 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
881 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295
882 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
883 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v4
884 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
885 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2
886 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
887 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
888 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
889 ; VI-SDAG-NEXT: s_and_b32 s2, s5, 0xfffff000
890 ; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2
891 ; VI-SDAG-NEXT: v_sub_f32_e32 v7, s5, v7
892 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
893 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0
894 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v7
895 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7
896 ; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v2
897 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8
898 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, s2, v4
899 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6
900 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7
901 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7
902 ; VI-SDAG-NEXT: v_exp_f32_e32 v7, v2
903 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6
904 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
905 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3
906 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218
907 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
908 ; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
909 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5
910 ; VI-SDAG-NEXT: s_and_b32 s2, s4, 0xfffff000
911 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc
912 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v7, v6
913 ; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2
914 ; VI-SDAG-NEXT: v_sub_f32_e32 v7, s4, v7
915 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
916 ; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v7
917 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7
918 ; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v0
919 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v9
920 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4
921 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v6
922 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
923 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
924 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
925 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v6
926 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3
927 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
928 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5
929 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
930 ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4
931 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3
932 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
933 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5
934 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1
935 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
936 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0
937 ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
938 ; VI-SDAG-NEXT: s_endpgm
940 ; VI-GISEL-LABEL: s_exp_v3f32:
942 ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
943 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8a000
944 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x39a3b295
945 ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
946 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
947 ; VI-GISEL-NEXT: s_and_b32 s2, s4, 0xfffff000
948 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
949 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, s4, v0
950 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0
951 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
952 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, s2, v1
953 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4
954 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, s2, v2
955 ; VI-GISEL-NEXT: s_and_b32 s2, s5, 0xfffff000
956 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s2
957 ; VI-GISEL-NEXT: v_sub_f32_e32 v5, s5, v5
958 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v5
959 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8a000, v5
960 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v1
961 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7
962 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, s2, v2
963 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v7, v5
964 ; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6
965 ; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7
966 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v6, v5
967 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7
968 ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5
969 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0
970 ; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3
971 ; VI-GISEL-NEXT: s_and_b32 s2, s6, 0xfffff000
972 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
973 ; VI-GISEL-NEXT: v_ldexp_f32 v5, v5, v6
974 ; VI-GISEL-NEXT: v_mov_b32_e32 v6, s2
975 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0
976 ; VI-GISEL-NEXT: v_sub_f32_e32 v6, s6, v6
977 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
978 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
979 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v6
980 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6
981 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v1
982 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
983 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
984 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v6
985 ; VI-GISEL-NEXT: v_rndne_f32_e32 v6, v1
986 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
987 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v3
988 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
989 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
990 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v3
991 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
992 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v6
993 ; VI-GISEL-NEXT: v_exp_f32_e32 v6, v1
994 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
995 ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000
996 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
997 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
998 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v3
999 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc
1000 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v4
1001 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1002 ; VI-GISEL-NEXT: v_ldexp_f32 v2, v6, v2
1003 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3
1004 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
1005 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v4
1006 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1
1007 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
1008 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0
1009 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
1010 ; VI-GISEL-NEXT: s_endpgm
1012 ; GFX900-SDAG-LABEL: s_exp_v3f32:
1013 ; GFX900-SDAG: ; %bb.0:
1014 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1015 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
1016 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
1017 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218
1018 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1019 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1020 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v0
1021 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6
1022 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7
1023 ; GFX900-SDAG-NEXT: v_fma_f32 v6, s5, v0, -v6
1024 ; GFX900-SDAG-NEXT: v_fma_f32 v6, s5, v1, v6
1025 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
1026 ; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
1027 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
1028 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s6, v0, -v2
1029 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v6, v6
1030 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
1031 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
1032 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s6, v1, v4
1033 ; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
1034 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
1035 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
1036 ; GFX900-SDAG-NEXT: v_ldexp_f32 v6, v6, v7
1037 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s4, v0
1038 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v9, v7
1039 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v7
1040 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9
1041 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
1042 ; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3
1043 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
1044 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v10, v0
1045 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3
1046 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
1047 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9
1048 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1049 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
1050 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5
1051 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
1052 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3
1053 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc
1054 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5
1055 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
1056 ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v7
1057 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3
1058 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
1059 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5
1060 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
1061 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
1062 ; GFX900-SDAG-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
1063 ; GFX900-SDAG-NEXT: s_endpgm
1065 ; GFX900-GISEL-LABEL: s_exp_v3f32:
1066 ; GFX900-GISEL: ; %bb.0:
1067 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1068 ; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b
1069 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
1070 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
1071 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
1072 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1073 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
1074 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
1075 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v2
1076 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
1077 ; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v1, v0
1078 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
1079 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v2, v0
1080 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
1081 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
1082 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s5, v3
1083 ; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v6
1084 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v8, v6
1085 ; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v1, v7
1086 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v6, v6, v8
1087 ; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
1088 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8
1089 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6
1090 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
1091 ; GFX900-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0
1092 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v8, s5
1093 ; GFX900-GISEL-NEXT: v_ldexp_f32 v6, v6, v7
1094 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3
1095 ; GFX900-GISEL-NEXT: v_fma_f32 v3, s6, v3, -v7
1096 ; GFX900-GISEL-NEXT: v_fma_f32 v1, s6, v1, v3
1097 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v7
1098 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v7, v7, v3
1099 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v2
1100 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
1101 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v7, v1
1102 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v2
1103 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
1104 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v7, v1
1105 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1106 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
1107 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1108 ; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x42b17218
1109 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
1110 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8
1111 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, 0, vcc
1112 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8
1113 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
1114 ; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v7, v3
1115 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
1116 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v3, 0, vcc
1117 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v4
1118 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
1119 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
1120 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1121 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
1122 ; GFX900-GISEL-NEXT: s_endpgm
1124 ; SI-SDAG-LABEL: s_exp_v3f32:
1126 ; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
1127 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
1128 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x32a5705f
1129 ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1130 ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
1131 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1132 ; SI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v0
1133 ; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5
1134 ; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6
1135 ; SI-SDAG-NEXT: v_fma_f32 v5, s4, v0, -v5
1136 ; SI-SDAG-NEXT: v_fma_f32 v5, s4, v2, v5
1137 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0
1138 ; SI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5
1139 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
1140 ; SI-SDAG-NEXT: v_fma_f32 v4, s5, v0, -v1
1141 ; SI-SDAG-NEXT: v_exp_f32_e32 v5, v5
1142 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6
1143 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
1144 ; SI-SDAG-NEXT: v_fma_f32 v4, s5, v2, v4
1145 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
1146 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
1147 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
1148 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, v5, v6
1149 ; SI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v0
1150 ; SI-SDAG-NEXT: v_rndne_f32_e32 v8, v6
1151 ; SI-SDAG-NEXT: v_fma_f32 v0, s6, v0, -v6
1152 ; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v8
1153 ; SI-SDAG-NEXT: v_fma_f32 v0, s6, v2, v0
1154 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v3
1155 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
1156 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v9, v0
1157 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3
1158 ; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42b17218
1159 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0
1160 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v8
1161 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
1162 ; SI-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000
1163 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v4
1164 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
1165 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3
1166 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc
1167 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v4
1168 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
1169 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v6
1170 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3
1171 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1172 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v4
1173 ; SI-SDAG-NEXT: s_mov_b32 s2, -1
1174 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
1175 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
1176 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1177 ; SI-SDAG-NEXT: s_endpgm
1179 ; SI-GISEL-LABEL: s_exp_v3f32:
1180 ; SI-GISEL: ; %bb.0:
1181 ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
1182 ; SI-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b
1183 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
1184 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
1185 ; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1186 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1187 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
1188 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
1189 ; SI-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v2
1190 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
1191 ; SI-GISEL-NEXT: v_fma_f32 v0, s4, v1, v0
1192 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
1193 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v2, v0
1194 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
1195 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
1196 ; SI-GISEL-NEXT: v_mul_f32_e32 v6, s5, v3
1197 ; SI-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v6
1198 ; SI-GISEL-NEXT: v_rndne_f32_e32 v8, v6
1199 ; SI-GISEL-NEXT: v_fma_f32 v7, s5, v1, v7
1200 ; SI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v8
1201 ; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
1202 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8
1203 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6
1204 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
1205 ; SI-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0
1206 ; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
1207 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v6, v6, v7
1208 ; SI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3
1209 ; SI-GISEL-NEXT: v_fma_f32 v3, s6, v3, -v7
1210 ; SI-GISEL-NEXT: v_fma_f32 v1, s6, v1, v3
1211 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v7
1212 ; SI-GISEL-NEXT: v_sub_f32_e32 v7, v7, v3
1213 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2
1214 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
1215 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v7, v1
1216 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v2
1217 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
1218 ; SI-GISEL-NEXT: v_exp_f32_e32 v7, v1
1219 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1220 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
1221 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, s5
1222 ; SI-GISEL-NEXT: s_mov_b32 s3, 0x42b17218
1223 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
1224 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8
1225 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, 0, vcc
1226 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8
1227 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
1228 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v7, v3
1229 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
1230 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v3, 0, vcc
1231 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v4
1232 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
1233 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
1234 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
1235 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1236 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
1237 ; SI-GISEL-NEXT: s_endpgm
1239 ; R600-LABEL: s_exp_v3f32:
1241 ; R600-NEXT: ALU 100, @6, KC0[CB0:0-32], KC1[]
1242 ; R600-NEXT: ALU 69, @107, KC0[CB0:0-32], KC1[]
1243 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
1244 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1247 ; R600-NEXT: ALU clause starting at 6:
1248 ; R600-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
1249 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
1250 ; R600-NEXT: ADD T1.W, KC0[3].Y, -PV.W,
1251 ; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
1252 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1253 ; R600-NEXT: RNDNE T3.W, PS,
1254 ; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
1255 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
1256 ; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
1257 ; R600-NEXT: TRUNC * T4.W, PV.W,
1258 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1259 ; R600-NEXT: FLT_TO_INT T0.Z, PS,
1260 ; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
1261 ; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
1262 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
1263 ; R600-NEXT: ADD T0.W, PS, PV.W,
1264 ; R600-NEXT: MAX_INT * T1.W, PV.Z, literal.x,
1265 ; R600-NEXT: -330(nan), 0(0.000000e+00)
1266 ; R600-NEXT: ADD_INT T0.Y, PS, literal.x,
1267 ; R600-NEXT: ADD_INT T1.Z, T0.Z, literal.y,
1268 ; R600-NEXT: SETGT_UINT T1.W, T0.Z, literal.z,
1269 ; R600-NEXT: EXP_IEEE * T0.X, PV.W,
1270 ; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
1271 ; R600-NEXT: -229(nan), 0(0.000000e+00)
1272 ; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
1273 ; R600-NEXT: SETGT_INT T0.W, T0.Z, literal.x,
1274 ; R600-NEXT: MUL_IEEE * T2.W, PS, literal.y,
1275 ; R600-NEXT: -127(nan), 209715200(1.972152e-31)
1276 ; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x,
1277 ; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
1278 ; R600-NEXT: MIN_INT T3.W, T0.Z, literal.y,
1279 ; R600-NEXT: AND_INT * T4.W, KC0[3].W, literal.z,
1280 ; R600-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
1281 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
1282 ; R600-NEXT: MUL_IEEE T1.X, T0.X, literal.x,
1283 ; R600-NEXT: ADD T1.Y, KC0[3].W, -PS,
1284 ; R600-NEXT: ADD_INT T2.Z, PV.W, literal.y,
1285 ; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z,
1286 ; R600-NEXT: SETGT_UINT * T5.W, T0.Z, literal.w,
1287 ; R600-NEXT: 2130706432(1.701412e+38), -254(nan)
1288 ; R600-NEXT: -127(nan), 254(3.559298e-43)
1289 ; R600-NEXT: CNDE_INT T2.X, PS, PV.W, PV.Z,
1290 ; R600-NEXT: SETGT_INT T2.Y, T0.Z, literal.x,
1291 ; R600-NEXT: MUL_IEEE T0.Z, PV.Y, literal.y,
1292 ; R600-NEXT: MUL_IEEE T3.W, T4.W, literal.z,
1293 ; R600-NEXT: MUL_IEEE * T6.W, PV.X, literal.w,
1294 ; R600-NEXT: 127(1.779649e-43), 967029397(3.122284e-04)
1295 ; R600-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38)
1296 ; R600-NEXT: CNDE_INT T1.X, T5.W, T1.X, PS, BS:VEC_120/SCL_212
1297 ; R600-NEXT: RNDNE T3.Y, PV.W,
1298 ; R600-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
1299 ; R600-NEXT: CNDE_INT T5.W, PV.Y, T1.Z, PV.X,
1300 ; R600-NEXT: CNDE_INT * T1.W, T1.W, T0.Y, T2.W,
1301 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1302 ; R600-NEXT: CNDE_INT T0.X, T0.W, PS, T0.X,
1303 ; R600-NEXT: LSHL T0.Y, PV.W, literal.x,
1304 ; R600-NEXT: AND_INT T1.Z, KC0[3].Z, literal.y,
1305 ; R600-NEXT: MULADD_IEEE T0.W, T4.W, literal.z, PV.Z, BS:VEC_120/SCL_212
1306 ; R600-NEXT: ADD * T1.W, T3.W, -PV.Y,
1307 ; R600-NEXT: 23(3.222986e-44), -4096(nan)
1308 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
1309 ; R600-NEXT: ADD T1.Y, PS, PV.W,
1310 ; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.x,
1311 ; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y,
1312 ; R600-NEXT: CNDE_INT * T1.W, T2.Y, PV.X, T1.X,
1313 ; R600-NEXT: 1069064192(1.442383e+00), 1065353216(1.000000e+00)
1314 ; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
1315 ; R600-NEXT: ADD T0.Y, KC0[3].Z, -T1.Z,
1316 ; R600-NEXT: RNDNE T2.Z, PV.Z,
1317 ; R600-NEXT: TRUNC T0.W, T3.Y,
1318 ; R600-NEXT: EXP_IEEE * T1.X, PV.Y,
1319 ; R600-NEXT: SETGT T2.X, literal.x, KC0[3].Y,
1320 ; R600-NEXT: FLT_TO_INT T1.Y, PV.W,
1321 ; R600-NEXT: TRUNC T3.Z, PV.Z,
1322 ; R600-NEXT: MUL_IEEE T0.W, PV.Y, literal.y,
1323 ; R600-NEXT: MUL_IEEE * T1.W, PS, literal.z,
1324 ; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04)
1325 ; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
1326 ; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
1327 ; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
1328 ; R600-NEXT: MULADD_IEEE T4.Z, T0.Y, literal.z, PV.W,
1329 ; R600-NEXT: FLT_TO_INT T0.W, PV.Z,
1330 ; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.w,
1331 ; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
1332 ; R600-NEXT: 1069064192(1.442383e+00), 381(5.338947e-43)
1333 ; R600-NEXT: ADD_INT T4.X, PS, literal.x,
1334 ; R600-NEXT: MAX_INT T0.Y, PV.W, literal.y,
1335 ; R600-NEXT: MULADD_IEEE T1.Z, T1.Z, literal.z, PV.Z,
1336 ; R600-NEXT: ADD T2.W, T0.Z, -T2.Z, BS:VEC_120/SCL_212
1337 ; R600-NEXT: MIN_INT * T3.W, PV.W, literal.w,
1338 ; R600-NEXT: -254(nan), -330(nan)
1339 ; R600-NEXT: 967029397(3.122284e-04), 381(5.338947e-43)
1340 ; R600-NEXT: ADD_INT T5.X, PS, literal.x,
1341 ; R600-NEXT: ADD T3.Y, PV.W, PV.Z,
1342 ; R600-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
1343 ; R600-NEXT: ADD_INT T2.W, T0.W, literal.z,
1344 ; R600-NEXT: SETGT_UINT * T3.W, T0.W, literal.w,
1345 ; R600-NEXT: -254(nan), 204(2.858649e-43)
1346 ; R600-NEXT: 102(1.429324e-43), -229(nan)
1347 ; R600-NEXT: ADD_INT * T6.X, T0.W, literal.x,
1348 ; R600-NEXT: -127(nan), 0(0.000000e+00)
1349 ; R600-NEXT: ALU clause starting at 107:
1350 ; R600-NEXT: SETGT_UINT T0.Y, T0.W, literal.x,
1351 ; R600-NEXT: CNDE_INT T0.Z, T3.W, T0.Z, T2.W, BS:VEC_102/SCL_221
1352 ; R600-NEXT: SETGT_INT T2.W, T0.W, literal.y,
1353 ; R600-NEXT: EXP_IEEE * T1.Z, T3.Y,
1354 ; R600-NEXT: 254(3.559298e-43), -127(nan)
1355 ; R600-NEXT: ADD_INT T7.X, T1.Y, literal.x,
1356 ; R600-NEXT: MUL_IEEE T3.Y, PS, literal.y,
1357 ; R600-NEXT: CNDE_INT T0.Z, PV.W, PV.Z, T0.W,
1358 ; R600-NEXT: CNDE_INT T4.W, PV.Y, T6.X, T5.X,
1359 ; R600-NEXT: SETGT_INT * T0.W, T0.W, literal.z,
1360 ; R600-NEXT: -127(nan), 209715200(1.972152e-31)
1361 ; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
1362 ; R600-NEXT: SETGT_UINT T5.X, T1.Y, literal.x,
1363 ; R600-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W,
1364 ; R600-NEXT: MAX_INT T0.Z, T1.Y, literal.y,
1365 ; R600-NEXT: MUL_IEEE T4.W, T1.Z, literal.z,
1366 ; R600-NEXT: MUL_IEEE * T5.W, PV.Y, literal.w,
1367 ; R600-NEXT: 254(3.559298e-43), -330(nan)
1368 ; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
1369 ; R600-NEXT: CNDE_INT T6.X, T3.W, PS, T3.Y, BS:VEC_021/SCL_122
1370 ; R600-NEXT: MUL_IEEE T3.Y, PV.W, literal.x,
1371 ; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
1372 ; R600-NEXT: ADD_INT T3.W, T1.Y, literal.z,
1373 ; R600-NEXT: SETGT_UINT * T5.W, T1.Y, literal.w,
1374 ; R600-NEXT: 2130706432(1.701412e+38), 204(2.858649e-43)
1375 ; R600-NEXT: 102(1.429324e-43), -229(nan)
1376 ; R600-NEXT: CNDE_INT T8.X, PS, PV.Z, PV.W,
1377 ; R600-NEXT: SETGT_INT T5.Y, T1.Y, literal.x,
1378 ; R600-NEXT: CNDE_INT T0.Z, T0.Y, T4.W, PV.Y, BS:VEC_120/SCL_212
1379 ; R600-NEXT: CNDE_INT T2.W, T2.W, PV.X, T1.Z,
1380 ; R600-NEXT: LSHL * T3.W, T4.Y, literal.y,
1381 ; R600-NEXT: -127(nan), 23(3.222986e-44)
1382 ; R600-NEXT: ADD_INT T6.X, PS, literal.x,
1383 ; R600-NEXT: CNDE_INT T0.Y, T0.W, PV.W, PV.Z,
1384 ; R600-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T1.Y,
1385 ; R600-NEXT: CNDE_INT T0.W, T5.X, T7.X, T4.X,
1386 ; R600-NEXT: SETGT_INT * T2.W, T1.Y, literal.y,
1387 ; R600-NEXT: 1065353216(1.000000e+00), 127(1.779649e-43)
1388 ; R600-NEXT: CNDE_INT T4.X, PS, PV.Z, PV.W,
1389 ; R600-NEXT: MUL_IEEE T0.Y, PV.Y, PV.X,
1390 ; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].Z,
1391 ; R600-NEXT: CNDE_INT T0.W, T5.W, T2.Y, T1.W,
1392 ; R600-NEXT: MUL_IEEE * T1.W, T3.X, literal.y,
1393 ; R600-NEXT: -1026650416(-1.032789e+02), 2130706432(1.701412e+38)
1394 ; R600-NEXT: CNDE_INT T3.X, T5.X, T3.X, PS,
1395 ; R600-NEXT: CNDE_INT T1.Y, T5.Y, PV.W, T1.X,
1396 ; R600-NEXT: CNDE T0.Z, PV.Z, PV.Y, 0.0,
1397 ; R600-NEXT: SETGT T0.W, KC0[3].Z, literal.x,
1398 ; R600-NEXT: LSHL * T1.W, PV.X, literal.y,
1399 ; R600-NEXT: 1118925336(8.872284e+01), 23(3.222986e-44)
1400 ; R600-NEXT: ADD_INT T1.X, PS, literal.x,
1401 ; R600-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.y,
1402 ; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.Y, PV.X,
1403 ; R600-NEXT: CNDE T0.W, T2.X, T0.X, 0.0,
1404 ; R600-NEXT: SETGT * T1.W, KC0[3].Y, literal.z,
1405 ; R600-NEXT: 1065353216(1.000000e+00), 2139095040(INF)
1406 ; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
1407 ; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x,
1408 ; R600-NEXT: MUL_IEEE T0.W, PV.Z, PV.X,
1409 ; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
1410 ; R600-NEXT: 2139095040(INF), -1026650416(-1.032789e+02)
1411 ; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
1412 ; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0,
1413 ; R600-NEXT: SETGT * T1.W, KC0[3].W, literal.y,
1414 ; R600-NEXT: 2(2.802597e-45), 1118925336(8.872284e+01)
1415 ; R600-NEXT: CNDE T2.X, PS, PV.W, literal.x,
1416 ; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
1417 ; R600-NEXT: 2139095040(INF), 8(1.121039e-44)
1418 ; R600-NEXT: LSHR * T3.X, PV.W, literal.x,
1419 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1421 ; CM-LABEL: s_exp_v3f32:
1423 ; CM-NEXT: ALU 102, @6, KC0[CB0:0-32], KC1[]
1424 ; CM-NEXT: ALU 80, @109, KC0[CB0:0-32], KC1[]
1425 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T3.X
1426 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X
1429 ; CM-NEXT: ALU clause starting at 6:
1430 ; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
1431 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
1432 ; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
1433 ; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W,
1434 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1435 ; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
1436 ; CM-NEXT: RNDNE * T2.W, PV.Z,
1437 ; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
1438 ; CM-NEXT: TRUNC T2.Z, PV.W,
1439 ; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
1440 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1441 ; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
1442 ; CM-NEXT: ADD T0.Z, T0.Z, -T2.W,
1443 ; CM-NEXT: FLT_TO_INT * T0.W, PV.Z,
1444 ; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
1445 ; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x,
1446 ; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
1447 ; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
1448 ; CM-NEXT: EXP_IEEE T0.X, T1.W,
1449 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
1450 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
1451 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
1452 ; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x,
1453 ; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y,
1454 ; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z,
1455 ; CM-NEXT: 2130706432(1.701412e+38), -254(nan)
1456 ; CM-NEXT: -330(nan), 0(0.000000e+00)
1457 ; CM-NEXT: ADD_INT T1.X, T0.W, literal.x,
1458 ; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
1459 ; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z,
1460 ; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w,
1461 ; CM-NEXT: -127(nan), 204(2.858649e-43)
1462 ; CM-NEXT: 102(1.429324e-43), -229(nan)
1463 ; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x,
1464 ; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
1465 ; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y,
1466 ; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z,
1467 ; CM-NEXT: 254(3.559298e-43), -127(nan)
1468 ; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
1469 ; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
1470 ; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
1471 ; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
1472 ; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y,
1473 ; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43)
1474 ; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
1475 ; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W,
1476 ; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x,
1477 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
1478 ; CM-NEXT: CNDE_INT T1.X, T2.X, T0.Y, PV.W,
1479 ; CM-NEXT: CNDE_INT T0.Y, T1.Z, PV.Z, T0.X,
1480 ; CM-NEXT: LSHL T0.Z, PV.Y, literal.x,
1481 ; CM-NEXT: AND_INT * T1.W, KC0[3].Z, literal.y,
1482 ; CM-NEXT: 23(3.222986e-44), -4096(nan)
1483 ; CM-NEXT: MUL_IEEE T0.X, PV.W, literal.x,
1484 ; CM-NEXT: ADD T1.Y, KC0[3].Z, -PV.W,
1485 ; CM-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
1486 ; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Y, PV.X,
1487 ; CM-NEXT: 1069064192(1.442383e+00), 1065353216(1.000000e+00)
1488 ; CM-NEXT: MUL_IEEE T0.Y, PV.W, PV.Z,
1489 ; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
1490 ; CM-NEXT: RNDNE * T0.W, PV.X,
1491 ; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
1492 ; CM-NEXT: SETGT T1.X, literal.x, KC0[3].Y,
1493 ; CM-NEXT: TRUNC T2.Y, PV.W,
1494 ; CM-NEXT: AND_INT T1.Z, KC0[3].W, literal.y,
1495 ; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.z, PV.Z,
1496 ; CM-NEXT: -1026650416(-1.032789e+02), -4096(nan)
1497 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
1498 ; CM-NEXT: MULADD_IEEE T2.X, T1.W, literal.x, PV.W,
1499 ; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y,
1500 ; CM-NEXT: FLT_TO_INT T0.Z, PV.Y,
1501 ; CM-NEXT: ADD * T1.W, KC0[3].W, -PV.Z,
1502 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
1503 ; CM-NEXT: ADD T0.X, T0.X, -T0.W,
1504 ; CM-NEXT: MUL_IEEE T2.Y, PV.W, literal.x,
1505 ; CM-NEXT: MAX_INT T2.Z, PV.Z, literal.y,
1506 ; CM-NEXT: RNDNE * T0.W, PV.Y,
1507 ; CM-NEXT: 967029397(3.122284e-04), -330(nan)
1508 ; CM-NEXT: TRUNC T3.X, PV.W,
1509 ; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.x,
1510 ; CM-NEXT: MULADD_IEEE T2.Z, T1.W, literal.y, PV.Y,
1511 ; CM-NEXT: ADD * T1.W, PV.X, T2.X,
1512 ; CM-NEXT: 204(2.858649e-43), 1069064192(1.442383e+00)
1513 ; CM-NEXT: EXP_IEEE T0.X, T1.W,
1514 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
1515 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
1516 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
1517 ; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x,
1518 ; CM-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.y, T2.Z, BS:VEC_102/SCL_221
1519 ; CM-NEXT: ADD T1.Z, T1.Y, -T0.W,
1520 ; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z,
1521 ; CM-NEXT: 102(1.429324e-43), 967029397(3.122284e-04)
1522 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
1523 ; CM-NEXT: SETGT_UINT T4.X, T0.Z, literal.x,
1524 ; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y,
1525 ; CM-NEXT: SETGT_UINT T2.Z, T0.Z, literal.z,
1526 ; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
1527 ; CM-NEXT: -229(nan), 2130706432(1.701412e+38)
1528 ; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
1529 ; CM-NEXT: EXP_IEEE T1.X (MASKED), T1.W,
1530 ; CM-NEXT: EXP_IEEE T1.Y (MASKED), T1.W,
1531 ; CM-NEXT: EXP_IEEE T1.Z, T1.W,
1532 ; CM-NEXT: EXP_IEEE * T1.W (MASKED), T1.W,
1533 ; CM-NEXT: ALU clause starting at 109:
1534 ; CM-NEXT: CNDE_INT T5.X, T2.Z, T0.W, T1.Y,
1535 ; CM-NEXT: CNDE_INT T1.Y, T4.X, T3.Y, T2.X,
1536 ; CM-NEXT: FLT_TO_INT T3.Z, T3.X, BS:VEC_120/SCL_212
1537 ; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.x, BS:VEC_120/SCL_212
1538 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
1539 ; CM-NEXT: SETGT_INT T2.X, T0.Z, literal.x,
1540 ; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y,
1541 ; CM-NEXT: MUL_IEEE T4.Z, PV.W, literal.z,
1542 ; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w,
1543 ; CM-NEXT: -127(nan), 209715200(1.972152e-31)
1544 ; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
1545 ; CM-NEXT: CNDE_INT T3.X, PV.W, T0.W, PV.Z,
1546 ; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
1547 ; CM-NEXT: CNDE_INT T4.Z, PV.X, T1.Y, T0.Z,
1548 ; CM-NEXT: MAX_INT * T0.W, T3.Z, literal.y,
1549 ; CM-NEXT: 209715200(1.972152e-31), -330(nan)
1550 ; CM-NEXT: ADD_INT T6.X, PV.W, literal.x,
1551 ; CM-NEXT: ADD_INT T1.Y, T3.Z, literal.y,
1552 ; CM-NEXT: SETGT_UINT T5.Z, T3.Z, literal.z,
1553 ; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.w, BS:VEC_120/SCL_212
1554 ; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
1555 ; CM-NEXT: -229(nan), 209715200(1.972152e-31)
1556 ; CM-NEXT: MUL_IEEE T7.X, PV.W, literal.x,
1557 ; CM-NEXT: MIN_INT T4.Y, T3.Z, literal.y,
1558 ; CM-NEXT: CNDE_INT T6.Z, PV.Z, PV.X, PV.Y,
1559 ; CM-NEXT: SETGT_INT * T2.W, T3.Z, literal.z,
1560 ; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
1561 ; CM-NEXT: -127(nan), 0(0.000000e+00)
1562 ; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, T3.Z,
1563 ; CM-NEXT: MIN_INT T1.Y, T0.Z, literal.x,
1564 ; CM-NEXT: ADD_INT T6.Z, PV.Y, literal.y,
1565 ; CM-NEXT: ADD_INT * T3.W, T3.Z, literal.z, BS:VEC_120/SCL_212
1566 ; CM-NEXT: 381(5.338947e-43), -254(nan)
1567 ; CM-NEXT: -127(nan), 0(0.000000e+00)
1568 ; CM-NEXT: CNDE_INT T8.X, T1.W, PV.W, PV.Z,
1569 ; CM-NEXT: SETGT_INT T4.Y, T3.Z, literal.x,
1570 ; CM-NEXT: ADD_INT T3.Z, PV.Y, literal.y,
1571 ; CM-NEXT: ADD_INT * T1.W, T0.Z, literal.z, BS:VEC_120/SCL_212
1572 ; CM-NEXT: 127(1.779649e-43), -254(nan)
1573 ; CM-NEXT: -127(nan), 0(0.000000e+00)
1574 ; CM-NEXT: CNDE_INT T9.X, T2.Z, PV.W, PV.Z,
1575 ; CM-NEXT: SETGT_INT T1.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
1576 ; CM-NEXT: CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
1577 ; CM-NEXT: CNDE_INT * T0.W, T5.Z, T7.X, T0.W, BS:VEC_201
1578 ; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
1579 ; CM-NEXT: CNDE_INT T6.X, T2.W, PV.W, T1.Z,
1580 ; CM-NEXT: LSHL T5.Y, PV.Z, literal.x,
1581 ; CM-NEXT: CNDE_INT T0.Z, PV.Y, T4.Z, PV.X,
1582 ; CM-NEXT: CNDE_INT * T0.W, T4.X, T3.Y, T2.Y,
1583 ; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
1584 ; CM-NEXT: CNDE_INT T0.X, T2.X, PV.W, T0.X,
1585 ; CM-NEXT: LSHL T2.Y, PV.Z, literal.x,
1586 ; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
1587 ; CM-NEXT: CNDE_INT * T0.W, T4.Y, PV.X, T3.X, BS:VEC_021/SCL_122
1588 ; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
1589 ; CM-NEXT: MUL_IEEE T2.X, PV.W, PV.Z,
1590 ; CM-NEXT: SETGT T3.Y, literal.x, KC0[3].W,
1591 ; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
1592 ; CM-NEXT: CNDE_INT * T0.W, T1.Y, PV.X, T5.X,
1593 ; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
1594 ; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
1595 ; CM-NEXT: SETGT T1.Y, literal.x, KC0[3].Z,
1596 ; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
1597 ; CM-NEXT: SETGT * T0.W, KC0[3].W, literal.y,
1598 ; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
1599 ; CM-NEXT: CNDE T2.X, PV.W, PV.Z, literal.x,
1600 ; CM-NEXT: CNDE T1.Y, PV.Y, PV.X, 0.0,
1601 ; CM-NEXT: SETGT T0.Z, KC0[3].Z, literal.y,
1602 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
1603 ; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
1604 ; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
1605 ; CM-NEXT: LSHR T0.X, PV.W, literal.x,
1606 ; CM-NEXT: CNDE T1.Y, PV.Z, PV.Y, literal.y,
1607 ; CM-NEXT: CNDE T0.Z, T1.X, T0.Y, 0.0,
1608 ; CM-NEXT: SETGT * T0.W, KC0[3].Y, literal.z,
1609 ; CM-NEXT: 2(2.802597e-45), 2139095040(INF)
1610 ; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
1611 ; CM-NEXT: CNDE * T1.X, PV.W, PV.Z, literal.x,
1612 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
1613 ; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
1614 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1615 %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %in)
1616 store <3 x float> %result, ptr addrspace(1) %out
1620 ; FIXME: We should be able to merge these packets together on Cayman so we
1621 ; have a maximum of 4 instructions.
1622 define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
1623 ; VI-SDAG-LABEL: s_exp_v4f32:
1625 ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1626 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
1627 ; VI-SDAG-NEXT: v_mov_b32_e32 v6, 0x42b17218
1628 ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1629 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1630 ; VI-SDAG-NEXT: s_and_b32 s2, s7, 0xfffff000
1631 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
1632 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, s7, v2
1633 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
1634 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
1635 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v0
1636 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
1637 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295
1638 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
1639 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v4
1640 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
1641 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2
1642 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
1643 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
1644 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
1645 ; VI-SDAG-NEXT: s_and_b32 s2, s6, 0xfffff000
1646 ; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2
1647 ; VI-SDAG-NEXT: v_sub_f32_e32 v7, s6, v7
1648 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
1649 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0
1650 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v7
1651 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7
1652 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
1653 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8
1654 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, s2, v4
1655 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
1656 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7
1657 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7
1658 ; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
1659 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v3
1660 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
1661 ; VI-SDAG-NEXT: s_and_b32 s2, s5, 0xfffff000
1662 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v5
1663 ; VI-SDAG-NEXT: v_mov_b32_e32 v9, s2
1664 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
1665 ; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
1666 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v6
1667 ; VI-SDAG-NEXT: v_sub_f32_e32 v9, s5, v9
1668 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v1, vcc
1669 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v2, v7
1670 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0
1671 ; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x39a3b295, v9
1672 ; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3fb8a000, v9
1673 ; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v2
1674 ; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10
1675 ; VI-SDAG-NEXT: v_mul_f32_e32 v10, s2, v4
1676 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v7
1677 ; VI-SDAG-NEXT: v_add_f32_e32 v9, v10, v9
1678 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v9
1679 ; VI-SDAG-NEXT: v_exp_f32_e32 v9, v2
1680 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
1681 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v5
1682 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
1683 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v6
1684 ; VI-SDAG-NEXT: s_and_b32 s2, s4, 0xfffff000
1685 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc
1686 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v9, v7
1687 ; VI-SDAG-NEXT: v_mov_b32_e32 v9, s2
1688 ; VI-SDAG-NEXT: v_sub_f32_e32 v9, s4, v9
1689 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
1690 ; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x39a3b295, v9
1691 ; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3fb8a000, v9
1692 ; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v0
1693 ; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10
1694 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4
1695 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v7
1696 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v9
1697 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
1698 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
1699 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v7
1700 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v5
1701 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
1702 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v6
1703 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
1704 ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4
1705 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v5
1706 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
1707 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v6
1708 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1
1709 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
1710 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0
1711 ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
1712 ; VI-SDAG-NEXT: s_endpgm
1714 ; VI-GISEL-LABEL: s_exp_v4f32:
1715 ; VI-GISEL: ; %bb.0:
1716 ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1717 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8a000
1718 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x39a3b295
1719 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218
1720 ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1721 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1722 ; VI-GISEL-NEXT: s_and_b32 s2, s4, 0xfffff000
1723 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
1724 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, s4, v0
1725 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0
1726 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
1727 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v2
1728 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4
1729 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, s2, v3
1730 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0
1731 ; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v1
1732 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
1733 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
1734 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4
1735 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
1736 ; VI-GISEL-NEXT: s_and_b32 s2, s5, 0xfffff000
1737 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2
1738 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
1739 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
1740 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s2
1741 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, s5, v1
1742 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v1
1743 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
1744 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
1745 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, s2, v3
1746 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v7, v1
1747 ; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6
1748 ; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7
1749 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1
1750 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7
1751 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
1752 ; VI-GISEL-NEXT: s_and_b32 s2, s6, 0xfffff000
1753 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, s2, v2
1754 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4
1755 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v6
1756 ; VI-GISEL-NEXT: v_mov_b32_e32 v6, s2
1757 ; VI-GISEL-NEXT: v_sub_f32_e32 v6, s6, v6
1758 ; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v6
1759 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6
1760 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v9
1761 ; VI-GISEL-NEXT: v_mul_f32_e32 v9, s2, v3
1762 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v9, v6
1763 ; VI-GISEL-NEXT: v_rndne_f32_e32 v9, v8
1764 ; VI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v9
1765 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v8, v6
1766 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9
1767 ; VI-GISEL-NEXT: v_exp_f32_e32 v6, v6
1768 ; VI-GISEL-NEXT: s_and_b32 s2, s7, 0xfffff000
1769 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
1770 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, s2, v3
1771 ; VI-GISEL-NEXT: v_ldexp_f32 v6, v6, v8
1772 ; VI-GISEL-NEXT: v_mov_b32_e32 v8, s2
1773 ; VI-GISEL-NEXT: v_sub_f32_e32 v8, s7, v8
1774 ; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v8
1775 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3fb8a000, v8
1776 ; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
1777 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v8
1778 ; VI-GISEL-NEXT: v_rndne_f32_e32 v8, v2
1779 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
1780 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1781 ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000
1782 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5
1783 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
1784 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
1785 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v4
1786 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v8
1787 ; VI-GISEL-NEXT: v_exp_f32_e32 v8, v2
1788 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1789 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v5
1790 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1791 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4
1792 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
1793 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5
1794 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
1795 ; VI-GISEL-NEXT: v_ldexp_f32 v3, v8, v3
1796 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4
1797 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
1798 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5
1799 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1
1800 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
1801 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0
1802 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
1803 ; VI-GISEL-NEXT: s_endpgm
1805 ; GFX900-SDAG-LABEL: s_exp_v4f32:
1806 ; GFX900-SDAG: ; %bb.0:
1807 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1808 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
1809 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
1810 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
1811 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x42b17218
1812 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1813 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0
1814 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
1815 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s7, v0, -v2
1816 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
1817 ; GFX900-SDAG-NEXT: v_fma_f32 v4, s7, v1, v4
1818 ; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
1819 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
1820 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
1821 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v5
1822 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v9, 0x7f800000
1823 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1824 ; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3
1825 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0
1826 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v3
1827 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v3, v7
1828 ; GFX900-SDAG-NEXT: v_fma_f32 v3, s6, v0, -v3
1829 ; GFX900-SDAG-NEXT: v_fma_f32 v3, s6, v1, v3
1830 ; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v8, v3
1831 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v8, v3
1832 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
1833 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1834 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v6
1835 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, v9, v2, vcc
1836 ; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v8, v7
1837 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s5, v0
1838 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v8, v7
1839 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v8
1840 ; GFX900-SDAG-NEXT: v_fma_f32 v7, s5, v0, -v7
1841 ; GFX900-SDAG-NEXT: v_fma_f32 v7, s5, v1, v7
1842 ; GFX900-SDAG-NEXT: v_add_f32_e32 v7, v10, v7
1843 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v7, v7
1844 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v8
1845 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v5
1846 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1847 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v6
1848 ; GFX900-SDAG-NEXT: v_ldexp_f32 v7, v7, v8
1849 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v8, s4, v0
1850 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v10, v8
1851 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v8
1852 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v11, v8, v10
1853 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
1854 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v11, v0
1855 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
1856 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v10
1857 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
1858 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v5
1859 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
1860 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v6
1861 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
1862 ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v8
1863 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v5
1864 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
1865 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v6
1866 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
1867 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc
1868 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1869 ; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
1870 ; GFX900-SDAG-NEXT: s_endpgm
1872 ; GFX900-GISEL-LABEL: s_exp_v4f32:
1873 ; GFX900-GISEL: ; %bb.0:
1874 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1875 ; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b
1876 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
1877 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
1878 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218
1879 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1880 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
1881 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
1882 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v1
1883 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
1884 ; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0
1885 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
1886 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
1887 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3
1888 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
1889 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
1890 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4
1891 ; GFX900-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0
1892 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
1893 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s5, v3
1894 ; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v1
1895 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v8, v1
1896 ; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v2, v7
1897 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
1898 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
1899 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8
1900 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
1901 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1902 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
1903 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5
1904 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v8, s5
1905 ; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x42b17218
1906 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1907 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v7
1908 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3
1909 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8
1910 ; GFX900-GISEL-NEXT: v_fma_f32 v9, s6, v3, -v7
1911 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v10, v7
1912 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1913 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8
1914 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, s7, v3
1915 ; GFX900-GISEL-NEXT: v_fma_f32 v9, s6, v2, v9
1916 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v7, v7, v10
1917 ; GFX900-GISEL-NEXT: v_fma_f32 v3, s7, v3, -v8
1918 ; GFX900-GISEL-NEXT: v_add_f32_e32 v7, v7, v9
1919 ; GFX900-GISEL-NEXT: v_fma_f32 v2, s7, v2, v3
1920 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v8
1921 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v9, v10
1922 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v7, v7
1923 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3
1924 ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v8, v2
1925 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
1926 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v8, v2
1927 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1928 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
1929 ; GFX900-GISEL-NEXT: v_ldexp_f32 v7, v7, v9
1930 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4
1931 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc
1932 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5
1933 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
1934 ; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v8, v3
1935 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4
1936 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
1937 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5
1938 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
1939 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
1940 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1941 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
1942 ; GFX900-GISEL-NEXT: s_endpgm
1944 ; SI-SDAG-LABEL: s_exp_v4f32:
1946 ; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
1947 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
1948 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
1949 ; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218
1950 ; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
1951 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1952 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0
1953 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
1954 ; SI-SDAG-NEXT: v_fma_f32 v4, s7, v0, -v2
1955 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
1956 ; SI-SDAG-NEXT: v_fma_f32 v4, s7, v1, v4
1957 ; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
1958 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
1959 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
1960 ; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
1961 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v4
1962 ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1963 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3
1964 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0
1965 ; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v3
1966 ; SI-SDAG-NEXT: v_sub_f32_e32 v7, v3, v6
1967 ; SI-SDAG-NEXT: v_fma_f32 v3, s6, v0, -v3
1968 ; SI-SDAG-NEXT: v_fma_f32 v3, s6, v1, v3
1969 ; SI-SDAG-NEXT: v_add_f32_e32 v3, v7, v3
1970 ; SI-SDAG-NEXT: v_exp_f32_e32 v7, v3
1971 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6
1972 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1973 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v5
1974 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc
1975 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v7, v6
1976 ; SI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v0
1977 ; SI-SDAG-NEXT: v_rndne_f32_e32 v7, v6
1978 ; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v7
1979 ; SI-SDAG-NEXT: v_fma_f32 v6, s5, v0, -v6
1980 ; SI-SDAG-NEXT: v_fma_f32 v6, s5, v1, v6
1981 ; SI-SDAG-NEXT: v_add_f32_e32 v6, v9, v6
1982 ; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6
1983 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
1984 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v4
1985 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1986 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5
1987 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v6, v6, v7
1988 ; SI-SDAG-NEXT: v_mul_f32_e32 v7, s4, v0
1989 ; SI-SDAG-NEXT: v_rndne_f32_e32 v9, v7
1990 ; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v7
1991 ; SI-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9
1992 ; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
1993 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v10, v0
1994 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
1995 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9
1996 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
1997 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v4
1998 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc
1999 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5
2000 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
2001 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v7
2002 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v4
2003 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
2004 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5
2005 ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
2006 ; SI-SDAG-NEXT: s_mov_b32 s2, -1
2007 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
2008 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2009 ; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2010 ; SI-SDAG-NEXT: s_endpgm
2012 ; SI-GISEL-LABEL: s_exp_v4f32:
2013 ; SI-GISEL: ; %bb.0:
2014 ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
2015 ; SI-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b
2016 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
2017 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
2018 ; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218
2019 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2020 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
2021 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
2022 ; SI-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v1
2023 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
2024 ; SI-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0
2025 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
2026 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
2027 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3
2028 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
2029 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
2030 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4
2031 ; SI-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0
2032 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
2033 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v3
2034 ; SI-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v1
2035 ; SI-GISEL-NEXT: v_rndne_f32_e32 v8, v1
2036 ; SI-GISEL-NEXT: v_fma_f32 v7, s5, v2, v7
2037 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
2038 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
2039 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8
2040 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2041 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
2042 ; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
2043 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5
2044 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, s5
2045 ; SI-GISEL-NEXT: s_mov_b32 s3, 0x42b17218
2046 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
2047 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v7
2048 ; SI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3
2049 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8
2050 ; SI-GISEL-NEXT: v_fma_f32 v9, s6, v3, -v7
2051 ; SI-GISEL-NEXT: v_rndne_f32_e32 v10, v7
2052 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
2053 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8
2054 ; SI-GISEL-NEXT: v_mul_f32_e32 v8, s7, v3
2055 ; SI-GISEL-NEXT: v_fma_f32 v9, s6, v2, v9
2056 ; SI-GISEL-NEXT: v_sub_f32_e32 v7, v7, v10
2057 ; SI-GISEL-NEXT: v_fma_f32 v3, s7, v3, -v8
2058 ; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v9
2059 ; SI-GISEL-NEXT: v_fma_f32 v2, s7, v2, v3
2060 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v8
2061 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v9, v10
2062 ; SI-GISEL-NEXT: v_exp_f32_e32 v7, v7
2063 ; SI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3
2064 ; SI-GISEL-NEXT: v_add_f32_e32 v2, v8, v2
2065 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
2066 ; SI-GISEL-NEXT: v_exp_f32_e32 v8, v2
2067 ; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
2068 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
2069 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v7, v7, v9
2070 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4
2071 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc
2072 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5
2073 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
2074 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v8, v3
2075 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4
2076 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
2077 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5
2078 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
2079 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
2080 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
2081 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2082 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2083 ; SI-GISEL-NEXT: s_endpgm
2085 ; R600-LABEL: s_exp_v4f32:
2087 ; R600-NEXT: ALU 98, @6, KC0[CB0:0-32], KC1[]
2088 ; R600-NEXT: ALU 98, @105, KC0[CB0:0-32], KC1[]
2089 ; R600-NEXT: ALU 24, @204, KC0[CB0:0-32], KC1[]
2090 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
2093 ; R600-NEXT: ALU clause starting at 6:
2094 ; R600-NEXT: AND_INT * T0.W, KC0[3].Z, literal.x,
2095 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
2096 ; R600-NEXT: ADD T1.W, KC0[3].Z, -PV.W,
2097 ; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
2098 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
2099 ; R600-NEXT: RNDNE T3.W, PS,
2100 ; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
2101 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
2102 ; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
2103 ; R600-NEXT: TRUNC * T4.W, PV.W,
2104 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
2105 ; R600-NEXT: FLT_TO_INT T0.Z, PS,
2106 ; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
2107 ; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
2108 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
2109 ; R600-NEXT: ADD T1.Z, PS, PV.W,
2110 ; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x,
2111 ; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y,
2112 ; R600-NEXT: -330(nan), 381(5.338947e-43)
2113 ; R600-NEXT: ADD_INT T0.X, PS, literal.x,
2114 ; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y,
2115 ; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
2116 ; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w,
2117 ; R600-NEXT: EXP_IEEE * T1.X, PV.Z,
2118 ; R600-NEXT: -254(nan), 204(2.858649e-43)
2119 ; R600-NEXT: 102(1.429324e-43), -229(nan)
2120 ; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x,
2121 ; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y,
2122 ; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
2123 ; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x,
2124 ; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z,
2125 ; R600-NEXT: -127(nan), 254(3.559298e-43)
2126 ; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
2127 ; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
2128 ; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y,
2129 ; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
2130 ; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X,
2131 ; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z,
2132 ; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
2133 ; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
2134 ; R600-NEXT: AND_INT T2.Y, KC0[4].X, literal.x,
2135 ; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
2136 ; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W,
2137 ; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.y,
2138 ; R600-NEXT: -4096(nan), 2130706432(1.701412e+38)
2139 ; R600-NEXT: CNDE_INT T0.X, T1.Y, T3.X, PS,
2140 ; R600-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T1.X,
2141 ; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
2142 ; R600-NEXT: ADD T0.W, KC0[4].X, -PV.Y,
2143 ; R600-NEXT: MUL_IEEE * T1.W, PV.Y, literal.y,
2144 ; R600-NEXT: 23(3.222986e-44), 1069064192(1.442383e+00)
2145 ; R600-NEXT: RNDNE T1.Y, PS,
2146 ; R600-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
2147 ; R600-NEXT: ADD_INT T2.W, PV.Z, literal.y,
2148 ; R600-NEXT: CNDE_INT * T3.W, T4.W, PV.Y, PV.X,
2149 ; R600-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00)
2150 ; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W,
2151 ; R600-NEXT: AND_INT T0.Z, KC0[3].W, literal.x,
2152 ; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.y, PV.Z,
2153 ; R600-NEXT: TRUNC * T2.W, PV.Y,
2154 ; R600-NEXT: -4096(nan), 1069064192(1.442383e+00)
2155 ; R600-NEXT: SETGT T0.X, literal.x, KC0[3].Z,
2156 ; R600-NEXT: FLT_TO_INT T3.Y, PS,
2157 ; R600-NEXT: MULADD_IEEE T1.Z, T2.Y, literal.y, PV.W,
2158 ; R600-NEXT: ADD T0.W, T1.W, -T1.Y,
2159 ; R600-NEXT: MUL_IEEE * T1.W, PV.Z, literal.z,
2160 ; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04)
2161 ; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
2162 ; R600-NEXT: RNDNE T1.X, PS,
2163 ; R600-NEXT: AND_INT T1.Y, KC0[3].Y, literal.x,
2164 ; R600-NEXT: ADD T1.Z, PV.W, PV.Z,
2165 ; R600-NEXT: MAX_INT T0.W, PV.Y, literal.y,
2166 ; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.z,
2167 ; R600-NEXT: -4096(nan), -330(nan)
2168 ; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
2169 ; R600-NEXT: ADD_INT T2.X, PS, literal.x,
2170 ; R600-NEXT: ADD_INT T2.Y, PV.W, literal.y,
2171 ; R600-NEXT: ADD_INT T2.Z, T3.Y, literal.z,
2172 ; R600-NEXT: SETGT_UINT T0.W, T3.Y, literal.w,
2173 ; R600-NEXT: EXP_IEEE * T1.Z, PV.Z,
2174 ; R600-NEXT: -254(nan), 204(2.858649e-43)
2175 ; R600-NEXT: 102(1.429324e-43), -229(nan)
2176 ; R600-NEXT: ADD_INT T3.X, T3.Y, literal.x,
2177 ; R600-NEXT: SETGT_UINT T4.Y, T3.Y, literal.y,
2178 ; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
2179 ; R600-NEXT: SETGT_INT T2.W, T3.Y, literal.x,
2180 ; R600-NEXT: MUL_IEEE * T3.W, PS, literal.z,
2181 ; R600-NEXT: -127(nan), 254(3.559298e-43)
2182 ; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
2183 ; R600-NEXT: MUL_IEEE T4.X, T1.Z, literal.x,
2184 ; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
2185 ; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Z, T3.Y,
2186 ; R600-NEXT: CNDE_INT T4.W, PV.Y, PV.X, T2.X,
2187 ; R600-NEXT: SETGT_INT * T5.W, T3.Y, literal.z,
2188 ; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
2189 ; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
2190 ; R600-NEXT: ADD T2.X, KC0[3].W, -T0.Z,
2191 ; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W,
2192 ; R600-NEXT: CNDE_INT * T2.Z, T0.W, PV.Y, T3.W,
2193 ; R600-NEXT: ALU clause starting at 105:
2194 ; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.x,
2195 ; R600-NEXT: ADD * T3.W, KC0[3].Y, -T1.Y,
2196 ; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
2197 ; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
2198 ; R600-NEXT: MUL_IEEE T2.Y, T1.Y, literal.y,
2199 ; R600-NEXT: CNDE_INT T3.Z, T4.Y, T4.X, PV.W, BS:VEC_120/SCL_212
2200 ; R600-NEXT: CNDE_INT T0.W, T2.W, T2.Z, T1.Z,
2201 ; R600-NEXT: LSHL * T2.W, T3.Y, literal.z,
2202 ; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
2203 ; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
2204 ; R600-NEXT: ADD_INT T4.X, PS, literal.x,
2205 ; R600-NEXT: CNDE_INT T3.Y, T5.W, PV.W, PV.Z,
2206 ; R600-NEXT: RNDNE T1.Z, PV.Y,
2207 ; R600-NEXT: MULADD_IEEE T0.W, T3.W, literal.y, PV.X, BS:VEC_120/SCL_212
2208 ; R600-NEXT: MUL_IEEE * T2.W, T2.X, literal.z,
2209 ; R600-NEXT: 1065353216(1.000000e+00), 1069064192(1.442383e+00)
2210 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
2211 ; R600-NEXT: MULADD_IEEE T2.X, T2.X, literal.x, PS,
2212 ; R600-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.y, PV.W,
2213 ; R600-NEXT: ADD T2.Z, T2.Y, -PV.Z, BS:VEC_120/SCL_212
2214 ; R600-NEXT: MUL_IEEE T0.W, PV.Y, PV.X,
2215 ; R600-NEXT: SETGT * T2.W, literal.z, KC0[4].X,
2216 ; R600-NEXT: 1069064192(1.442383e+00), 967029397(3.122284e-04)
2217 ; R600-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
2218 ; R600-NEXT: CNDE T3.X, PS, PV.W, 0.0,
2219 ; R600-NEXT: ADD T1.Y, PV.Z, PV.Y,
2220 ; R600-NEXT: TRUNC T1.Z, T1.Z,
2221 ; R600-NEXT: MULADD_IEEE T0.W, T0.Z, literal.x, PV.X, BS:VEC_120/SCL_212
2222 ; R600-NEXT: ADD * T1.W, T1.W, -T1.X,
2223 ; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
2224 ; R600-NEXT: SETGT T2.X, KC0[4].X, literal.x,
2225 ; R600-NEXT: ADD T2.Y, PS, PV.W,
2226 ; R600-NEXT: FLT_TO_INT T0.Z, PV.Z,
2227 ; R600-NEXT: TRUNC T0.W, T1.X,
2228 ; R600-NEXT: EXP_IEEE * T1.X, PV.Y,
2229 ; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
2230 ; R600-NEXT: MUL_IEEE T4.X, PS, literal.x,
2231 ; R600-NEXT: FLT_TO_INT T1.Y, PV.W,
2232 ; R600-NEXT: MAX_INT T1.Z, PV.Z, literal.y,
2233 ; R600-NEXT: MUL_IEEE T0.W, PS, literal.z,
2234 ; R600-NEXT: EXP_IEEE * T1.W, PV.Y,
2235 ; R600-NEXT: 2130706432(1.701412e+38), -330(nan)
2236 ; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
2237 ; R600-NEXT: MUL_IEEE T5.X, PV.W, literal.x,
2238 ; R600-NEXT: MUL_IEEE T2.Y, PS, literal.x,
2239 ; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
2240 ; R600-NEXT: ADD_INT T2.W, T0.Z, literal.z,
2241 ; R600-NEXT: MAX_INT * T3.W, PV.Y, literal.w,
2242 ; R600-NEXT: 209715200(1.972152e-31), 204(2.858649e-43)
2243 ; R600-NEXT: 102(1.429324e-43), -330(nan)
2244 ; R600-NEXT: SETGT_UINT T6.X, T0.Z, literal.x,
2245 ; R600-NEXT: ADD_INT T3.Y, PS, literal.y,
2246 ; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.z,
2247 ; R600-NEXT: SETGT_UINT T3.W, T1.Y, literal.x,
2248 ; R600-NEXT: MIN_INT * T4.W, T1.Y, literal.w,
2249 ; R600-NEXT: -229(nan), 204(2.858649e-43)
2250 ; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43)
2251 ; R600-NEXT: ADD_INT T7.X, PS, literal.x,
2252 ; R600-NEXT: ADD_INT T4.Y, T1.Y, literal.y,
2253 ; R600-NEXT: SETGT_UINT T3.Z, T1.Y, literal.z,
2254 ; R600-NEXT: CNDE_INT T4.W, PV.W, PV.Y, PV.Z,
2255 ; R600-NEXT: SETGT_INT * T5.W, T1.Y, literal.y,
2256 ; R600-NEXT: -254(nan), -127(nan)
2257 ; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
2258 ; R600-NEXT: CNDE_INT T8.X, PS, PV.W, T1.Y,
2259 ; R600-NEXT: CNDE_INT T3.Y, PV.Z, PV.Y, PV.X,
2260 ; R600-NEXT: SETGT_INT T2.Z, T1.Y, literal.x,
2261 ; R600-NEXT: CNDE_INT T2.W, T6.X, T1.Z, T2.W,
2262 ; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.y,
2263 ; R600-NEXT: 127(1.779649e-43), -127(nan)
2264 ; R600-NEXT: CNDE_INT T7.X, PS, PV.W, T0.Z,
2265 ; R600-NEXT: CNDE_INT T1.Y, PV.Z, PV.X, PV.Y,
2266 ; R600-NEXT: MIN_INT T1.Z, T0.Z, literal.x,
2267 ; R600-NEXT: MUL_IEEE T2.W, T1.W, literal.y,
2268 ; R600-NEXT: MUL_IEEE * T6.W, T2.Y, literal.z,
2269 ; R600-NEXT: 381(5.338947e-43), 2130706432(1.701412e+38)
2270 ; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
2271 ; R600-NEXT: CNDE_INT T8.X, T3.W, PS, T2.Y,
2272 ; R600-NEXT: MUL_IEEE T2.Y, PV.W, literal.x,
2273 ; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
2274 ; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z,
2275 ; R600-NEXT: SETGT_UINT * T6.W, T0.Z, literal.w,
2276 ; R600-NEXT: 2130706432(1.701412e+38), -254(nan)
2277 ; R600-NEXT: -127(nan), 254(3.559298e-43)
2278 ; R600-NEXT: CNDE_INT T9.X, PS, PV.W, PV.Z,
2279 ; R600-NEXT: SETGT_INT T3.Y, T0.Z, literal.x,
2280 ; R600-NEXT: CNDE_INT T0.Z, T3.Z, T2.W, PV.Y, BS:VEC_120/SCL_212
2281 ; R600-NEXT: CNDE_INT T1.W, T5.W, PV.X, T1.W, BS:VEC_021/SCL_122
2282 ; R600-NEXT: LSHL * T2.W, T1.Y, literal.y,
2283 ; R600-NEXT: 127(1.779649e-43), 23(3.222986e-44)
2284 ; R600-NEXT: ADD_INT T8.X, PS, literal.x,
2285 ; R600-NEXT: CNDE_INT T1.Y, T2.Z, PV.W, PV.Z,
2286 ; R600-NEXT: CNDE_INT T0.Z, PV.Y, T7.X, PV.X,
2287 ; R600-NEXT: CNDE_INT * T0.W, T6.X, T5.X, T0.W, BS:VEC_021/SCL_122
2288 ; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
2289 ; R600-NEXT: MUL_IEEE * T1.W, T4.X, literal.x,
2290 ; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
2291 ; R600-NEXT: CNDE_INT T4.X, T6.W, T4.X, PV.W,
2292 ; R600-NEXT: CNDE_INT * T2.Y, T4.W, T0.W, T1.X, BS:VEC_120/SCL_212
2293 ; R600-NEXT: ALU clause starting at 204:
2294 ; R600-NEXT: LSHL T0.Z, T0.Z, literal.x,
2295 ; R600-NEXT: MUL_IEEE T0.W, T1.Y, T8.X,
2296 ; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
2297 ; R600-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02)
2298 ; R600-NEXT: CNDE T1.X, PS, PV.W, 0.0,
2299 ; R600-NEXT: SETGT T1.Y, KC0[3].W, literal.x,
2300 ; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
2301 ; R600-NEXT: CNDE_INT T0.W, T3.Y, T2.Y, T4.X, BS:VEC_120/SCL_212
2302 ; R600-NEXT: CNDE * T1.W, T2.X, T3.X, literal.z,
2303 ; R600-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00)
2304 ; R600-NEXT: 2139095040(INF), 0(0.000000e+00)
2305 ; R600-NEXT: MUL_IEEE T2.X, PV.W, PV.Z,
2306 ; R600-NEXT: SETGT T2.Y, literal.x, KC0[3].Y,
2307 ; R600-NEXT: CNDE T1.Z, PV.Y, PV.X, literal.y,
2308 ; R600-NEXT: CNDE T0.W, T0.X, T0.Y, 0.0,
2309 ; R600-NEXT: SETGT * T2.W, KC0[3].Z, literal.z,
2310 ; R600-NEXT: -1026650416(-1.032789e+02), 2139095040(INF)
2311 ; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
2312 ; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x,
2313 ; R600-NEXT: CNDE T0.W, PV.Y, PV.X, 0.0,
2314 ; R600-NEXT: SETGT * T2.W, KC0[3].Y, literal.y,
2315 ; R600-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
2316 ; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x,
2317 ; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y,
2318 ; R600-NEXT: 2139095040(INF), 2(2.802597e-45)
2320 ; CM-LABEL: s_exp_v4f32:
2322 ; CM-NEXT: ALU 97, @6, KC0[CB0:0-32], KC1[]
2323 ; CM-NEXT: ALU 100, @104, KC0[CB0:0-32], KC1[]
2324 ; CM-NEXT: ALU 36, @205, KC0[CB0:0-32], KC1[]
2325 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
2328 ; CM-NEXT: ALU clause starting at 6:
2329 ; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
2330 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
2331 ; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W,
2332 ; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
2333 ; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y,
2334 ; CM-NEXT: AND_INT * T2.W, KC0[3].W, literal.z,
2335 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
2336 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
2337 ; CM-NEXT: ADD T1.Y, KC0[3].W, -PV.W,
2338 ; CM-NEXT: RNDNE T1.Z, PV.Z,
2339 ; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y,
2340 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
2341 ; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
2342 ; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
2343 ; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
2344 ; CM-NEXT: MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
2345 ; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
2346 ; CM-NEXT: TRUNC T1.X, T1.Z,
2347 ; CM-NEXT: RNDNE T2.Y, PV.W,
2348 ; CM-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
2349 ; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
2350 ; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
2351 ; CM-NEXT: EXP_IEEE T0.X, T1.W,
2352 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
2353 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
2354 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
2355 ; CM-NEXT: MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
2356 ; CM-NEXT: ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
2357 ; CM-NEXT: FLT_TO_INT T0.Z, T1.X,
2358 ; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
2359 ; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
2360 ; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
2361 ; CM-NEXT: MUL_IEEE T1.Y, T0.X, literal.y,
2362 ; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.z,
2363 ; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.w,
2364 ; CM-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
2365 ; CM-NEXT: -330(nan), 381(5.338947e-43)
2366 ; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
2367 ; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.y,
2368 ; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
2369 ; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w,
2370 ; CM-NEXT: -254(nan), 204(2.858649e-43)
2371 ; CM-NEXT: 102(1.429324e-43), -229(nan)
2372 ; CM-NEXT: ADD_INT T4.X, T0.Z, literal.x,
2373 ; CM-NEXT: SETGT_UINT T4.Y, T0.Z, literal.y,
2374 ; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
2375 ; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x,
2376 ; CM-NEXT: -127(nan), 254(3.559298e-43)
2377 ; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
2378 ; CM-NEXT: CNDE_INT T3.Y, PV.Y, PV.X, T3.X,
2379 ; CM-NEXT: SETGT_INT T0.Z, T0.Z, literal.x,
2380 ; CM-NEXT: MUL_IEEE * T3.W, T1.Y, literal.y,
2381 ; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
2382 ; CM-NEXT: CNDE_INT T3.X, T4.Y, T1.Y, PV.W,
2383 ; CM-NEXT: AND_INT T1.Y, KC0[3].Z, literal.x,
2384 ; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
2385 ; CM-NEXT: CNDE_INT * T0.W, T1.W, T1.X, T0.W,
2386 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
2387 ; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X,
2388 ; CM-NEXT: LSHL T3.Y, PV.Z, literal.x,
2389 ; CM-NEXT: TRUNC T1.Z, T2.Y,
2390 ; CM-NEXT: ADD * T0.W, KC0[3].Z, -PV.Y,
2391 ; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
2392 ; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
2393 ; CM-NEXT: FLT_TO_INT T2.Y, PV.Z,
2394 ; CM-NEXT: ADD_INT T1.Z, PV.Y, literal.y,
2395 ; CM-NEXT: CNDE_INT * T1.W, T0.Z, PV.X, T3.X,
2396 ; CM-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00)
2397 ; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
2398 ; CM-NEXT: MIN_INT T3.Y, PV.Y, literal.x,
2399 ; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.X,
2400 ; CM-NEXT: ADD * T0.W, T0.Y, T2.X,
2401 ; CM-NEXT: 381(5.338947e-43), 1069064192(1.442383e+00)
2402 ; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
2403 ; CM-NEXT: EXP_IEEE T0.Y, T0.W,
2404 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
2405 ; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
2406 ; CM-NEXT: MULADD_IEEE T1.X, T1.Y, literal.x, T0.Z,
2407 ; CM-NEXT: MUL_IEEE T4.Y, PV.Y, literal.y,
2408 ; CM-NEXT: ADD_INT T0.Z, T3.Y, literal.z, BS:VEC_120/SCL_212
2409 ; CM-NEXT: MAX_INT * T0.W, T2.Y, literal.w, BS:VEC_201
2410 ; CM-NEXT: 967029397(3.122284e-04), 2130706432(1.701412e+38)
2411 ; CM-NEXT: -254(nan), -330(nan)
2412 ; CM-NEXT: ADD_INT T2.X, T2.Y, literal.x,
2413 ; CM-NEXT: ADD_INT T3.Y, PV.W, literal.y,
2414 ; CM-NEXT: ADD_INT T1.Z, T2.Y, literal.z,
2415 ; CM-NEXT: SETGT_UINT * T0.W, T2.Y, literal.w,
2416 ; CM-NEXT: -127(nan), 204(2.858649e-43)
2417 ; CM-NEXT: 102(1.429324e-43), -229(nan)
2418 ; CM-NEXT: SETGT_UINT T3.X, T2.Y, literal.x,
2419 ; CM-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z,
2420 ; CM-NEXT: SETGT_INT T1.Z, T2.Y, literal.y,
2421 ; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.z, BS:VEC_120/SCL_212
2422 ; CM-NEXT: 254(3.559298e-43), -127(nan)
2423 ; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
2424 ; CM-NEXT: MUL_IEEE T4.X, PV.W, literal.x,
2425 ; CM-NEXT: CNDE_INT * T3.Y, PV.Z, PV.Y, T2.Y,
2426 ; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
2427 ; CM-NEXT: ALU clause starting at 104:
2428 ; CM-NEXT: CNDE_INT T0.Z, T3.X, T2.X, T0.Z,
2429 ; CM-NEXT: SETGT_INT * T2.W, T2.Y, literal.x,
2430 ; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
2431 ; CM-NEXT: MUL_IEEE T2.X, T1.Y, literal.x,
2432 ; CM-NEXT: CNDE_INT T1.Y, PV.W, T3.Y, PV.Z,
2433 ; CM-NEXT: CNDE_INT T0.Z, T0.W, T4.X, T1.W,
2434 ; CM-NEXT: MUL_IEEE * T0.W, T4.Y, literal.y, BS:VEC_201
2435 ; CM-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38)
2436 ; CM-NEXT: AND_INT T4.X, KC0[4].X, literal.x,
2437 ; CM-NEXT: CNDE_INT T2.Y, T3.X, T4.Y, PV.W,
2438 ; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.Y,
2439 ; CM-NEXT: LSHL * T0.W, PV.Y, literal.y,
2440 ; CM-NEXT: -4096(nan), 23(3.222986e-44)
2441 ; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
2442 ; CM-NEXT: CNDE_INT T0.Y, T2.W, PV.Z, PV.Y,
2443 ; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.y,
2444 ; CM-NEXT: RNDNE * T0.W, T2.X,
2445 ; CM-NEXT: 1065353216(1.000000e+00), 1069064192(1.442383e+00)
2446 ; CM-NEXT: ADD T2.X, T2.X, -PV.W,
2447 ; CM-NEXT: RNDNE T1.Y, PV.Z,
2448 ; CM-NEXT: MUL_IEEE T1.Z, PV.Y, PV.X,
2449 ; CM-NEXT: SETGT * T1.W, literal.x, KC0[3].W,
2450 ; CM-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
2451 ; CM-NEXT: CNDE T3.X, PV.W, PV.Z, 0.0,
2452 ; CM-NEXT: TRUNC T0.Y, T0.W,
2453 ; CM-NEXT: TRUNC T1.Z, PV.Y,
2454 ; CM-NEXT: ADD * T0.W, PV.X, T1.X,
2455 ; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
2456 ; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
2457 ; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
2458 ; CM-NEXT: EXP_IEEE * T0.W, T0.W,
2459 ; CM-NEXT: FLT_TO_INT T1.X, T1.Z,
2460 ; CM-NEXT: FLT_TO_INT T0.Y, T0.Y,
2461 ; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
2462 ; CM-NEXT: ADD * T1.W, KC0[4].X, -T4.X,
2463 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
2464 ; CM-NEXT: MUL_IEEE T2.X, PV.W, literal.x,
2465 ; CM-NEXT: MUL_IEEE T2.Y, T0.W, literal.y,
2466 ; CM-NEXT: MUL_IEEE T2.Z, PV.Z, literal.z,
2467 ; CM-NEXT: SETGT_UINT * T2.W, PV.Y, literal.w,
2468 ; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
2469 ; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
2470 ; CM-NEXT: CNDE_INT T5.X, PV.W, T1.Z, PV.Z,
2471 ; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
2472 ; CM-NEXT: MULADD_IEEE T1.Z, T1.W, literal.y, PV.X,
2473 ; CM-NEXT: MAX_INT * T1.W, T1.X, literal.z,
2474 ; CM-NEXT: 209715200(1.972152e-31), 1069064192(1.442383e+00)
2475 ; CM-NEXT: -330(nan), 0(0.000000e+00)
2476 ; CM-NEXT: ADD_INT T2.X, PV.W, literal.x,
2477 ; CM-NEXT: ADD_INT T4.Y, T1.X, literal.y,
2478 ; CM-NEXT: MULADD_IEEE T1.Z, T4.X, literal.z, PV.Z, BS:VEC_120/SCL_212
2479 ; CM-NEXT: MAX_INT * T1.W, T0.Y, literal.w,
2480 ; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
2481 ; CM-NEXT: 967029397(3.122284e-04), -330(nan)
2482 ; CM-NEXT: ADD T4.X, T0.Z, -T1.Y,
2483 ; CM-NEXT: ADD_INT T1.Y, PV.W, literal.x,
2484 ; CM-NEXT: ADD_INT T0.Z, T0.Y, literal.y,
2485 ; CM-NEXT: SETGT_UINT * T1.W, T0.Y, literal.z,
2486 ; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
2487 ; CM-NEXT: -229(nan), 0(0.000000e+00)
2488 ; CM-NEXT: SETGT_UINT T6.X, T1.X, literal.x,
2489 ; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
2490 ; CM-NEXT: SETGT_INT T0.Z, T0.Y, literal.y,
2491 ; CM-NEXT: ADD * T3.W, PV.X, T1.Z,
2492 ; CM-NEXT: -229(nan), -127(nan)
2493 ; CM-NEXT: EXP_IEEE T1.X (MASKED), T3.W,
2494 ; CM-NEXT: EXP_IEEE T1.Y (MASKED), T3.W,
2495 ; CM-NEXT: EXP_IEEE T1.Z, T3.W,
2496 ; CM-NEXT: EXP_IEEE * T1.W (MASKED), T3.W,
2497 ; CM-NEXT: CNDE_INT T4.X, T0.Z, T1.Y, T0.Y,
2498 ; CM-NEXT: CNDE_INT T1.Y, T6.X, T2.X, T4.Y, BS:VEC_120/SCL_212
2499 ; CM-NEXT: SETGT_INT T2.Z, T1.X, literal.x,
2500 ; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.y,
2501 ; CM-NEXT: -127(nan), 209715200(1.972152e-31)
2502 ; CM-NEXT: MUL_IEEE T2.X, T1.Z, literal.x,
2503 ; CM-NEXT: MUL_IEEE T4.Y, PV.W, literal.y,
2504 ; CM-NEXT: CNDE_INT T3.Z, PV.Z, PV.Y, T1.X,
2505 ; CM-NEXT: MIN_INT * T4.W, T1.X, literal.z,
2506 ; CM-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
2507 ; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
2508 ; CM-NEXT: MIN_INT T7.X, T0.Y, literal.x,
2509 ; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
2510 ; CM-NEXT: ADD_INT T4.Z, T1.X, literal.z,
2511 ; CM-NEXT: SETGT_UINT * T4.W, T1.X, literal.w,
2512 ; CM-NEXT: 381(5.338947e-43), -254(nan)
2513 ; CM-NEXT: -127(nan), 254(3.559298e-43)
2514 ; CM-NEXT: CNDE_INT T8.X, PV.W, PV.Z, PV.Y,
2515 ; CM-NEXT: SETGT_INT T1.Y, T1.X, literal.x,
2516 ; CM-NEXT: ADD_INT T4.Z, PV.X, literal.y,
2517 ; CM-NEXT: ADD_INT * T5.W, T0.Y, literal.z,
2518 ; CM-NEXT: 127(1.779649e-43), -254(nan)
2519 ; CM-NEXT: -127(nan), 0(0.000000e+00)
2520 ; CM-NEXT: CNDE_INT T1.X, T2.W, PV.W, PV.Z,
2521 ; CM-NEXT: CNDE_INT T5.Y, PV.Y, T3.Z, PV.X,
2522 ; CM-NEXT: CNDE_INT T3.Z, T6.X, T4.Y, T3.W,
2523 ; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.x, BS:VEC_120/SCL_212
2524 ; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
2525 ; CM-NEXT: SETGT_INT T6.X, T0.Y, literal.x,
2526 ; CM-NEXT: CNDE_INT T0.Y, T4.W, T2.X, PV.W,
2527 ; CM-NEXT: CNDE_INT * T1.Z, T2.Z, PV.Z, T1.Z,
2528 ; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
2529 ; CM-NEXT: ALU clause starting at 205:
2530 ; CM-NEXT: LSHL * T2.W, T5.Y, literal.x,
2531 ; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
2532 ; CM-NEXT: ADD_INT T2.X, PV.W, literal.x,
2533 ; CM-NEXT: CNDE_INT T0.Y, T1.Y, T1.Z, T0.Y,
2534 ; CM-NEXT: CNDE_INT * T1.Z, T6.X, T4.X, T1.X,
2535 ; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
2536 ; CM-NEXT: CNDE_INT * T1.W, T1.W, T3.Y, T2.Y,
2537 ; CM-NEXT: CNDE_INT T1.X, T0.Z, PV.W, T0.W,
2538 ; CM-NEXT: LSHL T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
2539 ; CM-NEXT: MUL_IEEE T0.Z, T0.Y, T2.X,
2540 ; CM-NEXT: SETGT * T0.W, literal.y, KC0[4].X,
2541 ; CM-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02)
2542 ; CM-NEXT: CNDE T2.X, PV.W, PV.Z, 0.0,
2543 ; CM-NEXT: SETGT T0.Y, KC0[4].X, literal.x,
2544 ; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
2545 ; CM-NEXT: CNDE_INT * T0.W, T6.X, PV.X, T5.X,
2546 ; CM-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00)
2547 ; CM-NEXT: SETGT T1.X, KC0[3].W, literal.x,
2548 ; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z,
2549 ; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z,
2550 ; CM-NEXT: CNDE * T0.W, PV.Y, PV.X, literal.z,
2551 ; CM-NEXT: 1118925336(8.872284e+01), -1026650416(-1.032789e+02)
2552 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
2553 ; CM-NEXT: SETGT T2.X, literal.x, KC0[3].Y,
2554 ; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, 0.0,
2555 ; CM-NEXT: CNDE T0.Z, PV.X, T3.X, literal.y,
2556 ; CM-NEXT: SETGT * T1.W, KC0[3].Z, literal.z,
2557 ; CM-NEXT: -1026650416(-1.032789e+02), 2139095040(INF)
2558 ; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
2559 ; CM-NEXT: CNDE T0.Y, PV.W, PV.Y, literal.x,
2560 ; CM-NEXT: CNDE T1.Z, PV.X, T0.X, 0.0,
2561 ; CM-NEXT: SETGT * T1.W, KC0[3].Y, literal.y,
2562 ; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
2563 ; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
2564 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
2565 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
2566 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2567 %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %in)
2568 store <4 x float> %result, ptr addrspace(1) %out
2572 define float @v_exp_f32(float %in) {
2573 ; VI-SDAG-LABEL: v_exp_f32:
2575 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2576 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2577 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
2578 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
2579 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
2580 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
2581 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
2582 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
2583 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2584 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
2585 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
2586 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
2587 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2588 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
2589 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2590 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
2591 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2592 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2593 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2594 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2595 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
2596 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2597 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2599 ; VI-GISEL-LABEL: v_exp_f32:
2600 ; VI-GISEL: ; %bb.0:
2601 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2602 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2603 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
2604 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
2605 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
2606 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
2607 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
2608 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2609 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2610 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
2611 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
2612 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
2613 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
2614 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2615 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2616 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2617 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2618 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
2619 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2620 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
2621 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
2622 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2623 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2625 ; GFX900-SDAG-LABEL: v_exp_f32:
2626 ; GFX900-SDAG: ; %bb.0:
2627 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2628 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
2629 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2630 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2631 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2632 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
2633 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
2634 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
2635 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2636 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
2637 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2638 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2639 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
2640 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2641 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2642 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2643 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2644 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
2645 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2646 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2648 ; GFX900-GISEL-LABEL: v_exp_f32:
2649 ; GFX900-GISEL: ; %bb.0:
2650 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2651 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2652 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
2653 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
2654 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2655 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
2656 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
2657 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
2658 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2659 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2660 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
2661 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2662 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2663 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2664 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
2665 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2666 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
2667 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
2668 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2669 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2671 ; SI-SDAG-LABEL: v_exp_f32:
2673 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2674 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
2675 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2676 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2677 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2678 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
2679 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
2680 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
2681 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2682 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2683 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2684 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2685 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
2686 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2687 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
2688 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2689 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2690 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
2691 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2692 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2694 ; SI-GISEL-LABEL: v_exp_f32:
2695 ; SI-GISEL: ; %bb.0:
2696 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2697 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2698 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
2699 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
2700 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2701 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
2702 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
2703 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
2704 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2705 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2706 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2707 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2708 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
2709 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2710 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
2711 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2712 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
2713 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
2714 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2715 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2717 ; R600-LABEL: v_exp_f32:
2722 ; CM-LABEL: v_exp_f32:
2726 %result = call float @llvm.exp.f32(float %in)
2730 define float @v_exp_fabs_f32(float %in) {
2731 ; VI-SDAG-LABEL: v_exp_fabs_f32:
2733 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2734 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
2735 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
2736 ; VI-SDAG-NEXT: v_sub_f32_e64 v4, |v0|, v1
2737 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
2738 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
2739 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
2740 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
2741 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
2742 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2743 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
2744 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
2745 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
2746 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2747 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
2748 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2749 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2750 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2751 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2752 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2753 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2754 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2755 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2756 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2758 ; VI-GISEL-LABEL: v_exp_fabs_f32:
2759 ; VI-GISEL: ; %bb.0:
2760 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2761 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
2762 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
2763 ; VI-GISEL-NEXT: v_sub_f32_e64 v2, |v0|, v1
2764 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
2765 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
2766 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
2767 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
2768 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2769 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2770 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
2771 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
2772 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
2773 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
2774 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2775 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2776 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2777 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2778 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2779 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2780 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2781 ; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
2782 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2783 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2785 ; GFX900-SDAG-LABEL: v_exp_fabs_f32:
2786 ; GFX900-SDAG: ; %bb.0:
2787 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2788 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2789 ; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
2790 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2791 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2792 ; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
2793 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
2794 ; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
2795 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2796 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
2797 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2798 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2799 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2800 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2801 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2802 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2803 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2804 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2805 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2806 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2808 ; GFX900-GISEL-LABEL: v_exp_fabs_f32:
2809 ; GFX900-GISEL: ; %bb.0:
2810 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2811 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2812 ; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4
2813 ; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1
2814 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2815 ; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2
2816 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
2817 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
2818 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2819 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2820 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
2821 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2822 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2823 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2824 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2825 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2826 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2827 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
2828 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2829 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2831 ; SI-SDAG-LABEL: v_exp_fabs_f32:
2833 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2834 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2835 ; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
2836 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2837 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2838 ; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
2839 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
2840 ; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
2841 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2842 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2843 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2844 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
2845 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2846 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
2847 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
2848 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2849 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2850 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2851 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2852 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2854 ; SI-GISEL-LABEL: v_exp_fabs_f32:
2855 ; SI-GISEL: ; %bb.0:
2856 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2857 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2858 ; SI-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4
2859 ; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1
2860 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2861 ; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2
2862 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
2863 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
2864 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2865 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2866 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2867 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2868 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
2869 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2870 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2871 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2872 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2873 ; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
2874 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2875 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2877 ; R600-LABEL: v_exp_fabs_f32:
2882 ; CM-LABEL: v_exp_fabs_f32:
2886 %fabs = call float @llvm.fabs.f32(float %in)
2887 %result = call float @llvm.exp.f32(float %fabs)
2891 define float @v_exp_fneg_fabs_f32(float %in) {
2892 ; VI-SDAG-LABEL: v_exp_fneg_fabs_f32:
2894 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2895 ; VI-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v0
2896 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
2897 ; VI-SDAG-NEXT: v_sub_f32_e64 v4, -|v0|, v1
2898 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
2899 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
2900 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
2901 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
2902 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
2903 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2904 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
2905 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
2906 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
2907 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
2908 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
2909 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
2910 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2911 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
2912 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2913 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2914 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2915 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2916 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2917 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2919 ; VI-GISEL-LABEL: v_exp_fneg_fabs_f32:
2920 ; VI-GISEL: ; %bb.0:
2921 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2922 ; VI-GISEL-NEXT: v_or_b32_e32 v1, 0x80000000, v0
2923 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
2924 ; VI-GISEL-NEXT: v_sub_f32_e64 v2, -|v0|, v1
2925 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
2926 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
2927 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
2928 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
2929 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
2930 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2931 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
2932 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
2933 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
2934 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
2935 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
2936 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2937 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2938 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2939 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
2940 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2941 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2942 ; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2
2943 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2944 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2946 ; GFX900-SDAG-LABEL: v_exp_fneg_fabs_f32:
2947 ; GFX900-SDAG: ; %bb.0:
2948 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2949 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
2950 ; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
2951 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2952 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2953 ; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
2954 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
2955 ; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
2956 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
2957 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
2958 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
2959 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
2960 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
2961 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
2962 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
2963 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2964 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
2965 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
2966 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
2967 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2969 ; GFX900-GISEL-LABEL: v_exp_fneg_fabs_f32:
2970 ; GFX900-GISEL: ; %bb.0:
2971 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2972 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
2973 ; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, -|v0|, s4
2974 ; GFX900-GISEL-NEXT: v_fma_f32 v2, -|v0|, s4, -v1
2975 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
2976 ; GFX900-GISEL-NEXT: v_fma_f32 v2, -|v0|, v3, v2
2977 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
2978 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
2979 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2980 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
2981 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
2982 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
2983 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
2984 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
2985 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
2986 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
2987 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
2988 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2
2989 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
2990 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2992 ; SI-SDAG-LABEL: v_exp_fneg_fabs_f32:
2994 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2995 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
2996 ; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
2997 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
2998 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
2999 ; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
3000 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
3001 ; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
3002 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3003 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3004 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3005 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
3006 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
3007 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
3008 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
3009 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3010 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3011 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
3012 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3013 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3015 ; SI-GISEL-LABEL: v_exp_fneg_fabs_f32:
3016 ; SI-GISEL: ; %bb.0:
3017 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3018 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3019 ; SI-GISEL-NEXT: v_mul_f32_e64 v1, -|v0|, s4
3020 ; SI-GISEL-NEXT: v_fma_f32 v2, -|v0|, s4, -v1
3021 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3022 ; SI-GISEL-NEXT: v_fma_f32 v2, -|v0|, v3, v2
3023 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
3024 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
3025 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3026 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3027 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3028 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3029 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
3030 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3031 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
3032 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3033 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
3034 ; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2
3035 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3036 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3038 ; R600-LABEL: v_exp_fneg_fabs_f32:
3043 ; CM-LABEL: v_exp_fneg_fabs_f32:
3047 %fabs = call float @llvm.fabs.f32(float %in)
3048 %fneg.fabs = fneg float %fabs
3049 %result = call float @llvm.exp.f32(float %fneg.fabs)
3053 define float @v_exp_fneg_f32(float %in) {
3054 ; VI-SDAG-LABEL: v_exp_fneg_f32:
3056 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3057 ; VI-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
3058 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
3059 ; VI-SDAG-NEXT: v_sub_f32_e64 v4, -v0, v1
3060 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
3061 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
3062 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
3063 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
3064 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
3065 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3066 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
3067 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
3068 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
3069 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3070 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
3071 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
3072 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3073 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
3074 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3075 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3076 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3077 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3078 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3079 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3081 ; VI-GISEL-LABEL: v_exp_fneg_f32:
3082 ; VI-GISEL: ; %bb.0:
3083 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3084 ; VI-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
3085 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
3086 ; VI-GISEL-NEXT: v_sub_f32_e64 v2, -v0, v1
3087 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
3088 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
3089 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
3090 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
3091 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3092 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3093 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
3094 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
3095 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
3096 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
3097 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3098 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3099 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3100 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3101 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2
3102 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3103 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
3104 ; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2
3105 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3106 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3108 ; GFX900-SDAG-LABEL: v_exp_fneg_f32:
3109 ; GFX900-SDAG: ; %bb.0:
3110 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3111 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0xbfb8aa3b, v0
3112 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
3113 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3114 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3115 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3116 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
3117 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3118 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3119 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
3120 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3121 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
3122 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3123 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
3124 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3125 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3126 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3127 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3128 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3129 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3131 ; GFX900-GISEL-LABEL: v_exp_fneg_f32:
3132 ; GFX900-GISEL: ; %bb.0:
3133 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3134 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3135 ; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, -v0, s4
3136 ; GFX900-GISEL-NEXT: v_fma_f32 v2, -v0, s4, -v1
3137 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3138 ; GFX900-GISEL-NEXT: v_fma_f32 v2, -v0, v3, v2
3139 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
3140 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
3141 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3142 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3143 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
3144 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3145 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3146 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3147 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2
3148 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3149 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
3150 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2
3151 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3152 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3154 ; SI-SDAG-LABEL: v_exp_fneg_f32:
3156 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3157 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xbfb8aa3b, v0
3158 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
3159 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3160 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3161 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3162 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
3163 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3164 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3165 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3166 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3167 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
3168 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3169 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
3170 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
3171 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3172 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3173 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3174 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3175 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3177 ; SI-GISEL-LABEL: v_exp_fneg_f32:
3178 ; SI-GISEL: ; %bb.0:
3179 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3180 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3181 ; SI-GISEL-NEXT: v_mul_f32_e64 v1, -v0, s4
3182 ; SI-GISEL-NEXT: v_fma_f32 v2, -v0, s4, -v1
3183 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3184 ; SI-GISEL-NEXT: v_fma_f32 v2, -v0, v3, v2
3185 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
3186 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
3187 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3188 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3189 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3190 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3191 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
3192 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3193 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2
3194 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3195 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
3196 ; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2
3197 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3198 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3200 ; R600-LABEL: v_exp_fneg_f32:
3205 ; CM-LABEL: v_exp_fneg_f32:
3209 %fneg = fneg float %in
3210 %result = call float @llvm.exp.f32(float %fneg)
3214 define float @v_exp_f32_fast(float %in) {
3215 ; GCN-SDAG-LABEL: v_exp_f32_fast:
3216 ; GCN-SDAG: ; %bb.0:
3217 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3218 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3219 ; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3220 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3221 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3222 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3223 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3224 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3225 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3226 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3228 ; GCN-GISEL-LABEL: v_exp_f32_fast:
3229 ; GCN-GISEL: ; %bb.0:
3230 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3231 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3232 ; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3233 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3234 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3235 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3236 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3237 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3238 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3239 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3241 ; SI-SDAG-LABEL: v_exp_f32_fast:
3243 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3244 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3245 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3246 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3247 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3248 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3249 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3250 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3251 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3252 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3254 ; SI-GISEL-LABEL: v_exp_f32_fast:
3255 ; SI-GISEL: ; %bb.0:
3256 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3257 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3258 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3259 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3260 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3261 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3262 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3263 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3264 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3265 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3267 ; R600-LABEL: v_exp_f32_fast:
3272 ; CM-LABEL: v_exp_f32_fast:
3276 %result = call fast float @llvm.exp.f32(float %in)
3280 define float @v_exp_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
3281 ; GCN-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
3282 ; GCN-SDAG: ; %bb.0:
3283 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3284 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3285 ; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3286 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3287 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3288 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3289 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3290 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3291 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3292 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3294 ; GCN-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
3295 ; GCN-GISEL: ; %bb.0:
3296 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3297 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3298 ; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3299 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3300 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3301 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3302 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3303 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3304 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3305 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3307 ; SI-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
3309 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3310 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3311 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3312 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3313 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3314 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3315 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3316 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3317 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3318 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3320 ; SI-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
3321 ; SI-GISEL: ; %bb.0:
3322 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3323 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3324 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3325 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3326 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3327 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3328 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3329 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3330 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3331 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3333 ; R600-LABEL: v_exp_f32_unsafe_math_attr:
3338 ; CM-LABEL: v_exp_f32_unsafe_math_attr:
3342 %result = call float @llvm.exp.f32(float %in)
3346 define float @v_exp_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
3347 ; GCN-SDAG-LABEL: v_exp_f32_approx_fn_attr:
3348 ; GCN-SDAG: ; %bb.0:
3349 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3350 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3351 ; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3352 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3353 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3354 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3355 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3356 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3357 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3358 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3360 ; GCN-GISEL-LABEL: v_exp_f32_approx_fn_attr:
3361 ; GCN-GISEL: ; %bb.0:
3362 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3363 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3364 ; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3365 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3366 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3367 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3368 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3369 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3370 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3371 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3373 ; SI-SDAG-LABEL: v_exp_f32_approx_fn_attr:
3375 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3376 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3377 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3378 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3379 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3380 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3381 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3382 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3383 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3384 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3386 ; SI-GISEL-LABEL: v_exp_f32_approx_fn_attr:
3387 ; SI-GISEL: ; %bb.0:
3388 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3389 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3390 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3391 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3392 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3393 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3394 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3395 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3396 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3397 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3399 ; R600-LABEL: v_exp_f32_approx_fn_attr:
3404 ; CM-LABEL: v_exp_f32_approx_fn_attr:
3408 %result = call float @llvm.exp.f32(float %in)
3412 define float @v_exp_f32_ninf(float %in) {
3413 ; VI-SDAG-LABEL: v_exp_f32_ninf:
3415 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3416 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3417 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
3418 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
3419 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
3420 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
3421 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
3422 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
3423 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3424 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
3425 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
3426 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
3427 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3428 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
3429 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3430 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3431 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3432 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
3433 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3435 ; VI-GISEL-LABEL: v_exp_f32_ninf:
3436 ; VI-GISEL: ; %bb.0:
3437 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3438 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3439 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
3440 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
3441 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
3442 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
3443 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
3444 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3445 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3446 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
3447 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
3448 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
3449 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
3450 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3451 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3452 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3453 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3454 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
3455 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3457 ; GFX900-SDAG-LABEL: v_exp_f32_ninf:
3458 ; GFX900-SDAG: ; %bb.0:
3459 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3460 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3461 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3462 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3463 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3464 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3465 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3466 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3467 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3468 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
3469 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3470 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3471 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3472 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3473 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
3474 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3476 ; GFX900-GISEL-LABEL: v_exp_f32_ninf:
3477 ; GFX900-GISEL: ; %bb.0:
3478 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3479 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3480 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3481 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
3482 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3483 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
3484 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
3485 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
3486 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3487 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3488 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
3489 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3490 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3491 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3492 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
3493 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3495 ; SI-SDAG-LABEL: v_exp_f32_ninf:
3497 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3498 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3499 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3500 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3501 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3502 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3503 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3504 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3505 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3506 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3507 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3508 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3509 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3510 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
3511 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
3512 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3514 ; SI-GISEL-LABEL: v_exp_f32_ninf:
3515 ; SI-GISEL: ; %bb.0:
3516 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3517 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3518 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3519 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
3520 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3521 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
3522 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
3523 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
3524 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3525 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3526 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3527 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
3528 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3529 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3530 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
3531 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3533 ; R600-LABEL: v_exp_f32_ninf:
3538 ; CM-LABEL: v_exp_f32_ninf:
3542 %result = call ninf float @llvm.exp.f32(float %in)
3546 define float @v_exp_f32_afn(float %in) {
3547 ; GCN-SDAG-LABEL: v_exp_f32_afn:
3548 ; GCN-SDAG: ; %bb.0:
3549 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3550 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3551 ; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3552 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3553 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3554 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3555 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3556 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3557 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3558 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3560 ; GCN-GISEL-LABEL: v_exp_f32_afn:
3561 ; GCN-GISEL: ; %bb.0:
3562 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3563 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3564 ; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3565 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3566 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3567 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3568 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3569 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3570 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3571 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3573 ; SI-SDAG-LABEL: v_exp_f32_afn:
3575 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3576 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3577 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3578 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3579 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3580 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3581 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3582 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3583 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3584 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3586 ; SI-GISEL-LABEL: v_exp_f32_afn:
3587 ; SI-GISEL: ; %bb.0:
3588 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3589 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3590 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3591 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3592 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3593 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3594 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3595 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3596 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3597 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3599 ; R600-LABEL: v_exp_f32_afn:
3604 ; CM-LABEL: v_exp_f32_afn:
3608 %result = call afn float @llvm.exp.f32(float %in)
3612 define float @v_exp_f32_afn_daz(float %in) #0 {
3613 ; GCN-LABEL: v_exp_f32_afn_daz:
3615 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3616 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3617 ; GCN-NEXT: v_exp_f32_e32 v0, v0
3618 ; GCN-NEXT: s_setpc_b64 s[30:31]
3620 ; SI-LABEL: v_exp_f32_afn_daz:
3622 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3623 ; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3624 ; SI-NEXT: v_exp_f32_e32 v0, v0
3625 ; SI-NEXT: s_setpc_b64 s[30:31]
3627 ; R600-LABEL: v_exp_f32_afn_daz:
3632 ; CM-LABEL: v_exp_f32_afn_daz:
3636 %result = call afn float @llvm.exp.f32(float %in)
3640 define float @v_exp_f32_afn_dynamic(float %in) #1 {
3641 ; GCN-SDAG-LABEL: v_exp_f32_afn_dynamic:
3642 ; GCN-SDAG: ; %bb.0:
3643 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3644 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3645 ; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3646 ; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3647 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3648 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3649 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3650 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3651 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3652 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3654 ; GCN-GISEL-LABEL: v_exp_f32_afn_dynamic:
3655 ; GCN-GISEL: ; %bb.0:
3656 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3657 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3658 ; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3659 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3660 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3661 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3662 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3663 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3664 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3665 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3667 ; SI-SDAG-LABEL: v_exp_f32_afn_dynamic:
3669 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3670 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3671 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
3672 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3673 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3674 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3675 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3676 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3677 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3678 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3680 ; SI-GISEL-LABEL: v_exp_f32_afn_dynamic:
3681 ; SI-GISEL: ; %bb.0:
3682 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3683 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3684 ; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
3685 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3686 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
3687 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3688 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3689 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3690 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3691 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3693 ; R600-LABEL: v_exp_f32_afn_dynamic:
3698 ; CM-LABEL: v_exp_f32_afn_dynamic:
3702 %result = call afn float @llvm.exp.f32(float %in)
3706 define float @v_fabs_exp_f32_afn(float %in) {
3707 ; GCN-SDAG-LABEL: v_fabs_exp_f32_afn:
3708 ; GCN-SDAG: ; %bb.0:
3709 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3710 ; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3711 ; GCN-SDAG-NEXT: s_mov_b32 s5, 0x42800000
3712 ; GCN-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5
3713 ; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
3714 ; GCN-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc
3715 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3716 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
3717 ; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3718 ; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3719 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
3721 ; GCN-GISEL-LABEL: v_fabs_exp_f32_afn:
3722 ; GCN-GISEL: ; %bb.0:
3723 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3724 ; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3725 ; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
3726 ; GCN-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2
3727 ; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
3728 ; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
3729 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3730 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
3731 ; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3732 ; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3733 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
3735 ; SI-SDAG-LABEL: v_fabs_exp_f32_afn:
3737 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3738 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
3739 ; SI-SDAG-NEXT: s_mov_b32 s5, 0x42800000
3740 ; SI-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5
3741 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
3742 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc
3743 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3744 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
3745 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3746 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3747 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3749 ; SI-GISEL-LABEL: v_fabs_exp_f32_afn:
3750 ; SI-GISEL: ; %bb.0:
3751 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3752 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
3753 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
3754 ; SI-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2
3755 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
3756 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
3757 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
3758 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
3759 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
3760 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3761 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3763 ; R600-LABEL: v_fabs_exp_f32_afn:
3768 ; CM-LABEL: v_fabs_exp_f32_afn:
3772 %fabs = call float @llvm.fabs.f32(float %in)
3773 %result = call afn float @llvm.exp.f32(float %fabs)
3777 define float @v_exp_f32_daz(float %in) #0 {
3778 ; VI-SDAG-LABEL: v_exp_f32_daz:
3780 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3781 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3782 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
3783 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
3784 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
3785 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
3786 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
3787 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
3788 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3789 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
3790 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
3791 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
3792 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3793 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
3794 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3795 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3796 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
3797 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3798 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3799 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3800 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3801 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3802 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3804 ; VI-GISEL-LABEL: v_exp_f32_daz:
3805 ; VI-GISEL: ; %bb.0:
3806 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3807 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3808 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
3809 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
3810 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
3811 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
3812 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
3813 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3814 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3815 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
3816 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
3817 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
3818 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
3819 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3820 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3821 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3822 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3823 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3824 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3825 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
3826 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
3827 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3828 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3830 ; GFX900-SDAG-LABEL: v_exp_f32_daz:
3831 ; GFX900-SDAG: ; %bb.0:
3832 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3833 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3834 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3835 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3836 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3837 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3838 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3839 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3840 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3841 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
3842 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3843 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3844 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3845 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
3846 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3847 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3848 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3849 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3850 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3851 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3853 ; GFX900-GISEL-LABEL: v_exp_f32_daz:
3854 ; GFX900-GISEL: ; %bb.0:
3855 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3856 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3857 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3858 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
3859 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3860 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
3861 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
3862 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
3863 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3864 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3865 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
3866 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3867 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3868 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3869 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3870 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3871 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
3872 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
3873 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3874 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3876 ; SI-SDAG-LABEL: v_exp_f32_daz:
3878 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3879 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3880 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3881 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3882 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3883 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3884 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3885 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3886 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3887 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3888 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
3889 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3890 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3891 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
3892 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
3893 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3894 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3895 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3896 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3897 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3899 ; SI-GISEL-LABEL: v_exp_f32_daz:
3900 ; SI-GISEL: ; %bb.0:
3901 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3902 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3903 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3904 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
3905 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
3906 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
3907 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
3908 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
3909 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3910 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
3911 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3912 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3913 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
3914 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3915 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3916 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3917 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
3918 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
3919 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3920 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3922 ; R600-LABEL: v_exp_f32_daz:
3927 ; CM-LABEL: v_exp_f32_daz:
3931 %result = call float @llvm.exp.f32(float %in)
3935 define float @v_exp_f32_nnan(float %in) {
3936 ; VI-SDAG-LABEL: v_exp_f32_nnan:
3938 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3939 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3940 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
3941 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
3942 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
3943 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
3944 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
3945 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
3946 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3947 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
3948 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
3949 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
3950 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
3951 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
3952 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
3953 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
3954 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
3955 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
3956 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3957 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
3958 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
3959 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
3960 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3962 ; VI-GISEL-LABEL: v_exp_f32_nnan:
3963 ; VI-GISEL: ; %bb.0:
3964 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3965 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3966 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
3967 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
3968 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
3969 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
3970 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
3971 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
3972 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3973 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
3974 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
3975 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
3976 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
3977 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
3978 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
3979 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
3980 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
3981 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
3982 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
3983 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
3984 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
3985 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
3986 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3988 ; GFX900-SDAG-LABEL: v_exp_f32_nnan:
3989 ; GFX900-SDAG: ; %bb.0:
3990 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3991 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
3992 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
3993 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
3994 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
3995 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
3996 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
3997 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
3998 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
3999 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4000 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4001 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4002 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4003 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4004 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4005 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4006 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4007 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4008 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4009 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4011 ; GFX900-GISEL-LABEL: v_exp_f32_nnan:
4012 ; GFX900-GISEL: ; %bb.0:
4013 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4014 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4015 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4016 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4017 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4018 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4019 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4020 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4021 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4022 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4023 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4024 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4025 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4026 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4027 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4028 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4029 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4030 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4031 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4032 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4034 ; SI-SDAG-LABEL: v_exp_f32_nnan:
4036 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4037 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4038 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4039 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4040 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4041 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4042 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4043 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4044 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4045 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4046 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4047 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4048 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4049 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4050 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4051 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4052 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4053 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4054 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4055 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4057 ; SI-GISEL-LABEL: v_exp_f32_nnan:
4058 ; SI-GISEL: ; %bb.0:
4059 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4060 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4061 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4062 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4063 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4064 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4065 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4066 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4067 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4068 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4069 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4070 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4071 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4072 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4073 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4074 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4075 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4076 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4077 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4078 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4080 ; R600-LABEL: v_exp_f32_nnan:
4085 ; CM-LABEL: v_exp_f32_nnan:
4089 %result = call nnan float @llvm.exp.f32(float %in)
4093 define float @v_exp_f32_nnan_daz(float %in) #0 {
4094 ; VI-SDAG-LABEL: v_exp_f32_nnan_daz:
4096 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4097 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4098 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4099 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4100 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4101 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4102 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4103 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4104 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4105 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4106 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4107 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4108 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4109 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4110 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4111 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4112 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4113 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4114 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4115 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4116 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4117 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4118 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4120 ; VI-GISEL-LABEL: v_exp_f32_nnan_daz:
4121 ; VI-GISEL: ; %bb.0:
4122 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4123 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4124 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4125 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4126 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4127 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4128 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4129 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4130 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4131 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4132 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4133 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4134 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4135 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4136 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4137 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4138 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4139 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4140 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4141 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4142 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4143 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4144 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4146 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_daz:
4147 ; GFX900-SDAG: ; %bb.0:
4148 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4149 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4150 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4151 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4152 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4153 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4154 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4155 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4156 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4157 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4158 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4159 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4160 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4161 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4162 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4163 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4164 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4165 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4166 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4167 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4169 ; GFX900-GISEL-LABEL: v_exp_f32_nnan_daz:
4170 ; GFX900-GISEL: ; %bb.0:
4171 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4172 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4173 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4174 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4175 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4176 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4177 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4178 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4179 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4180 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4181 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4182 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4183 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4184 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4185 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4186 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4187 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4188 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4189 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4190 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4192 ; SI-SDAG-LABEL: v_exp_f32_nnan_daz:
4194 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4195 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4196 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4197 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4198 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4199 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4200 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4201 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4202 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4203 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4204 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4205 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4206 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4207 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4208 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4209 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4210 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4211 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4212 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4213 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4215 ; SI-GISEL-LABEL: v_exp_f32_nnan_daz:
4216 ; SI-GISEL: ; %bb.0:
4217 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4218 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4219 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4220 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4221 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4222 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4223 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4224 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4225 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4226 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4227 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4228 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4229 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4230 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4231 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4232 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4233 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4234 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4235 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4236 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4238 ; R600-LABEL: v_exp_f32_nnan_daz:
4243 ; CM-LABEL: v_exp_f32_nnan_daz:
4247 %result = call nnan float @llvm.exp.f32(float %in)
4251 define float @v_exp_f32_nnan_dynamic(float %in) #1 {
4252 ; VI-SDAG-LABEL: v_exp_f32_nnan_dynamic:
4254 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4255 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4256 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4257 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4258 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4259 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4260 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4261 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4262 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4263 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4264 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4265 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4266 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4267 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4268 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4269 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4270 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4271 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4272 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4273 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4274 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4275 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4276 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4278 ; VI-GISEL-LABEL: v_exp_f32_nnan_dynamic:
4279 ; VI-GISEL: ; %bb.0:
4280 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4281 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4282 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4283 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4284 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4285 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4286 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4287 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4288 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4289 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4290 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4291 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4292 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4293 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4294 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4295 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4296 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4297 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4298 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4299 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4300 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4301 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4302 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4304 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_dynamic:
4305 ; GFX900-SDAG: ; %bb.0:
4306 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4307 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4308 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4309 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4310 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4311 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4312 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4313 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4314 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4315 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4316 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4317 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4318 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4319 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4320 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4321 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4322 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4323 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4324 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4325 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4327 ; GFX900-GISEL-LABEL: v_exp_f32_nnan_dynamic:
4328 ; GFX900-GISEL: ; %bb.0:
4329 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4330 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4331 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4332 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4333 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4334 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4335 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4336 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4337 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4338 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4339 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4340 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4341 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4342 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4343 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4344 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4345 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4346 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4347 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4348 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4350 ; SI-SDAG-LABEL: v_exp_f32_nnan_dynamic:
4352 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4353 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4354 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4355 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4356 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4357 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4358 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4359 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4360 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4361 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4362 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4363 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4364 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4365 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
4366 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4367 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4368 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
4369 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
4370 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4371 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4373 ; SI-GISEL-LABEL: v_exp_f32_nnan_dynamic:
4374 ; SI-GISEL: ; %bb.0:
4375 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4376 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4377 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4378 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4379 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4380 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4381 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4382 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4383 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4384 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4385 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4386 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
4387 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4388 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4389 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4390 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
4391 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
4392 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
4393 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
4394 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4396 ; R600-LABEL: v_exp_f32_nnan_dynamic:
4401 ; CM-LABEL: v_exp_f32_nnan_dynamic:
4405 %result = call nnan float @llvm.exp.f32(float %in)
4409 define float @v_exp_f32_ninf_daz(float %in) #0 {
4410 ; VI-SDAG-LABEL: v_exp_f32_ninf_daz:
4412 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4413 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4414 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4415 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4416 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4417 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4418 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4419 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4420 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4421 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4422 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4423 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4424 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4425 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4426 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4427 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4428 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4429 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4430 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4432 ; VI-GISEL-LABEL: v_exp_f32_ninf_daz:
4433 ; VI-GISEL: ; %bb.0:
4434 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4435 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4436 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4437 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4438 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4439 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4440 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4441 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4442 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4443 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4444 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4445 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4446 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4447 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4448 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4449 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4450 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4451 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4452 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4454 ; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz:
4455 ; GFX900-SDAG: ; %bb.0:
4456 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4457 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4458 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4459 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4460 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4461 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4462 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4463 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4464 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4465 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4466 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4467 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4468 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4469 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4470 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4471 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4473 ; GFX900-GISEL-LABEL: v_exp_f32_ninf_daz:
4474 ; GFX900-GISEL: ; %bb.0:
4475 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4476 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4477 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4478 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4479 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4480 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4481 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4482 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4483 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4484 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4485 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4486 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4487 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4488 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4489 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4490 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4492 ; SI-SDAG-LABEL: v_exp_f32_ninf_daz:
4494 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4495 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4496 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4497 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4498 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4499 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4500 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4501 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4502 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4503 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4504 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4505 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4506 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4507 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4508 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4509 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4511 ; SI-GISEL-LABEL: v_exp_f32_ninf_daz:
4512 ; SI-GISEL: ; %bb.0:
4513 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4514 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4515 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4516 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4517 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4518 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4519 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4520 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4521 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4522 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4523 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4524 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4525 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4526 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4527 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4528 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4530 ; R600-LABEL: v_exp_f32_ninf_daz:
4535 ; CM-LABEL: v_exp_f32_ninf_daz:
4539 %result = call ninf float @llvm.exp.f32(float %in)
4543 define float @v_exp_f32_ninf_dynamic(float %in) #1 {
4544 ; VI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
4546 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4547 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4548 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4549 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4550 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4551 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4552 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4553 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4554 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4555 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4556 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4557 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4558 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4559 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4560 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4561 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4562 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4563 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4564 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4566 ; VI-GISEL-LABEL: v_exp_f32_ninf_dynamic:
4567 ; VI-GISEL: ; %bb.0:
4568 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4569 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4570 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4571 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4572 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4573 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4574 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4575 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4576 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4577 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4578 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4579 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4580 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4581 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4582 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4583 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4584 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4585 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4586 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4588 ; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic:
4589 ; GFX900-SDAG: ; %bb.0:
4590 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4591 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4592 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4593 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4594 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4595 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4596 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4597 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4598 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4599 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4600 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4601 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4602 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4603 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4604 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4605 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4607 ; GFX900-GISEL-LABEL: v_exp_f32_ninf_dynamic:
4608 ; GFX900-GISEL: ; %bb.0:
4609 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4610 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4611 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4612 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4613 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4614 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4615 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4616 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4617 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4618 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4619 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4620 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4621 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4622 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4623 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4624 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4626 ; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
4628 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4629 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4630 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4631 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4632 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4633 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4634 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4635 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4636 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4637 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4638 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4639 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4640 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4641 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4642 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4643 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4645 ; SI-GISEL-LABEL: v_exp_f32_ninf_dynamic:
4646 ; SI-GISEL: ; %bb.0:
4647 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4648 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4649 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4650 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4651 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4652 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4653 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4654 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4655 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4656 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4657 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4658 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4659 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4660 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4661 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4662 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4664 ; R600-LABEL: v_exp_f32_ninf_dynamic:
4669 ; CM-LABEL: v_exp_f32_ninf_dynamic:
4673 %result = call ninf float @llvm.exp.f32(float %in)
4677 define float @v_exp_f32_nnan_ninf(float %in) {
4678 ; VI-SDAG-LABEL: v_exp_f32_nnan_ninf:
4680 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4681 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4682 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4683 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4684 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4685 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4686 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4687 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4688 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4689 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4690 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4691 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4692 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4693 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4694 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4695 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4696 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4697 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4698 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4700 ; VI-GISEL-LABEL: v_exp_f32_nnan_ninf:
4701 ; VI-GISEL: ; %bb.0:
4702 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4703 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4704 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4705 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4706 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4707 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4708 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4709 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4710 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4711 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4712 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4713 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4714 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4715 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4716 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4717 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4718 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4719 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4720 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4722 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf:
4723 ; GFX900-SDAG: ; %bb.0:
4724 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4725 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4726 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4727 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4728 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4729 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4730 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4731 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4732 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4733 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4734 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4735 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4736 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4737 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4738 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4739 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4741 ; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf:
4742 ; GFX900-GISEL: ; %bb.0:
4743 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4744 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4745 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4746 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4747 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4748 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4749 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4750 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4751 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4752 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4753 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4754 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4755 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4756 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4757 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4758 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4760 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf:
4762 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4763 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4764 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4765 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4766 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4767 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4768 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4769 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4770 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4771 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4772 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4773 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4774 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4775 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4776 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4777 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4779 ; SI-GISEL-LABEL: v_exp_f32_nnan_ninf:
4780 ; SI-GISEL: ; %bb.0:
4781 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4782 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4783 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4784 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4785 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4786 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4787 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4788 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4789 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4790 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4791 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4792 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4793 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4794 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4795 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4796 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4798 ; R600-LABEL: v_exp_f32_nnan_ninf:
4803 ; CM-LABEL: v_exp_f32_nnan_ninf:
4807 %result = call nnan ninf float @llvm.exp.f32(float %in)
4811 define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
4812 ; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
4814 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4815 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4816 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4817 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4818 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4819 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4820 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4821 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4822 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4823 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4824 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4825 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4826 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4827 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4828 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4829 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4830 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4831 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4832 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4834 ; VI-GISEL-LABEL: v_exp_f32_nnan_ninf_daz:
4835 ; VI-GISEL: ; %bb.0:
4836 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4837 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4838 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4839 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4840 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4841 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4842 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4843 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4844 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4845 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4846 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4847 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4848 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4849 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4850 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4851 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4852 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4853 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4854 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4856 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
4857 ; GFX900-SDAG: ; %bb.0:
4858 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4859 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4860 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4861 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4862 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4863 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4864 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4865 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4866 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4867 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
4868 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4869 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4870 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4871 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4872 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4873 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4875 ; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf_daz:
4876 ; GFX900-GISEL: ; %bb.0:
4877 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4878 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4879 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4880 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4881 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4882 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4883 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4884 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4885 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4886 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4887 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
4888 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4889 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4890 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4891 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4892 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4894 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
4896 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4897 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4898 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4899 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4900 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4901 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4902 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4903 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
4904 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
4905 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4906 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
4907 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4908 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4909 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
4910 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4911 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4913 ; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_daz:
4914 ; SI-GISEL: ; %bb.0:
4915 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4916 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4917 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4918 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4919 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
4920 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4921 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
4922 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
4923 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4924 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
4925 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4926 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
4927 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4928 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4929 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4930 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4932 ; R600-LABEL: v_exp_f32_nnan_ninf_daz:
4937 ; CM-LABEL: v_exp_f32_nnan_ninf_daz:
4941 %result = call nnan ninf float @llvm.exp.f32(float %in)
4945 define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
4946 ; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
4948 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4949 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4950 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
4951 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
4952 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
4953 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
4954 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
4955 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
4956 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4957 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
4958 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
4959 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
4960 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
4961 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
4962 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
4963 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
4964 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
4965 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
4966 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4968 ; VI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
4969 ; VI-GISEL: ; %bb.0:
4970 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4971 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4972 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4973 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
4974 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
4975 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
4976 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
4977 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
4978 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4979 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
4980 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
4981 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4982 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
4983 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
4984 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
4985 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
4986 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
4987 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
4988 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4990 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
4991 ; GFX900-SDAG: ; %bb.0:
4992 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4993 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
4994 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
4995 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
4996 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
4997 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
4998 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
4999 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
5000 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
5001 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
5002 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5003 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5004 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5005 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5006 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
5007 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5009 ; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
5010 ; GFX900-GISEL: ; %bb.0:
5011 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5012 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5013 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5014 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5015 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
5016 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5017 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
5018 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
5019 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5020 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
5021 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
5022 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5023 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5024 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5025 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
5026 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5028 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
5030 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5031 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5032 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5033 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5034 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
5035 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
5036 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5037 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
5038 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
5039 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5040 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5041 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5042 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5043 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
5044 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
5045 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5047 ; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
5048 ; SI-GISEL: ; %bb.0:
5049 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5050 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5051 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5052 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5053 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
5054 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5055 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
5056 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
5057 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5058 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
5059 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5060 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
5061 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5062 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5063 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
5064 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5066 ; R600-LABEL: v_exp_f32_nnan_ninf_dynamic:
5071 ; CM-LABEL: v_exp_f32_nnan_ninf_dynamic:
5075 %result = call nnan ninf float @llvm.exp.f32(float %in)
5079 define float @v_exp_f32_fast_daz(float %in) #0 {
5080 ; GCN-LABEL: v_exp_f32_fast_daz:
5082 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5083 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5084 ; GCN-NEXT: v_exp_f32_e32 v0, v0
5085 ; GCN-NEXT: s_setpc_b64 s[30:31]
5087 ; SI-LABEL: v_exp_f32_fast_daz:
5089 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5090 ; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5091 ; SI-NEXT: v_exp_f32_e32 v0, v0
5092 ; SI-NEXT: s_setpc_b64 s[30:31]
5094 ; R600-LABEL: v_exp_f32_fast_daz:
5099 ; CM-LABEL: v_exp_f32_fast_daz:
5103 %result = call fast float @llvm.exp.f32(float %in)
5107 define float @v_exp_f32_dynamic_mode(float %in) #1 {
5108 ; VI-SDAG-LABEL: v_exp_f32_dynamic_mode:
5110 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5111 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5112 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
5113 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
5114 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
5115 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
5116 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
5117 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
5118 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5119 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
5120 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
5121 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
5122 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5123 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5124 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5125 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5126 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5127 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5128 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5129 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5130 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5131 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5132 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5134 ; VI-GISEL-LABEL: v_exp_f32_dynamic_mode:
5135 ; VI-GISEL: ; %bb.0:
5136 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5137 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5138 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5139 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5140 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5141 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
5142 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
5143 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5144 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5145 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5146 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5147 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5148 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5149 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5150 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5151 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5152 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5153 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5154 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5155 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5156 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5157 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5158 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5160 ; GFX900-SDAG-LABEL: v_exp_f32_dynamic_mode:
5161 ; GFX900-SDAG: ; %bb.0:
5162 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5163 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5164 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5165 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5166 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
5167 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
5168 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5169 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
5170 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
5171 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
5172 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5173 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5174 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5175 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5176 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5177 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5178 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5179 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5180 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5181 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5183 ; GFX900-GISEL-LABEL: v_exp_f32_dynamic_mode:
5184 ; GFX900-GISEL: ; %bb.0:
5185 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5186 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5187 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5188 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5189 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
5190 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5191 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
5192 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
5193 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5194 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
5195 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
5196 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5197 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5198 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5199 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5200 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5201 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5202 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5203 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5204 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5206 ; SI-SDAG-LABEL: v_exp_f32_dynamic_mode:
5208 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5209 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5210 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5211 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5212 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
5213 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
5214 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5215 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
5216 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
5217 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5218 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5219 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5220 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5221 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5222 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
5223 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5224 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5225 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5226 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5227 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5229 ; SI-GISEL-LABEL: v_exp_f32_dynamic_mode:
5230 ; SI-GISEL: ; %bb.0:
5231 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5232 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5233 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5234 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5235 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
5236 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5237 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
5238 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
5239 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5240 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
5241 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5242 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5243 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
5244 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5245 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5246 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5247 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5248 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5249 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5250 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5252 ; R600-LABEL: v_exp_f32_dynamic_mode:
5257 ; CM-LABEL: v_exp_f32_dynamic_mode:
5261 %result = call float @llvm.exp.f32(float %in)
5265 define float @v_exp_f32_undef() {
5266 ; VI-SDAG-LABEL: v_exp_f32_undef:
5268 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5269 ; VI-SDAG-NEXT: v_rndne_f32_e32 v0, 0
5270 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7fc00000
5271 ; VI-SDAG-NEXT: v_add_f32_e64 v1, -v0, s4
5272 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5273 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0
5274 ; VI-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
5275 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5277 ; VI-GISEL-LABEL: v_exp_f32_undef:
5278 ; VI-GISEL: ; %bb.0:
5279 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5280 ; VI-GISEL-NEXT: v_sub_f32_e64 v0, s4, 0
5281 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8a000
5282 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x39a3b295
5283 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v0
5284 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
5285 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
5286 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
5287 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0, v2
5288 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v2, v0
5289 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v1
5290 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
5291 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
5292 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
5293 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
5294 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5295 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
5296 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
5297 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1
5298 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
5299 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
5300 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
5301 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5302 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5304 ; GFX900-SDAG-LABEL: v_exp_f32_undef:
5305 ; GFX900-SDAG: ; %bb.0:
5306 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5307 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
5308 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000
5309 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1
5310 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5311 ; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
5312 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000
5313 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1
5314 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
5315 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
5316 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
5317 ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
5318 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5320 ; GFX900-GISEL-LABEL: v_exp_f32_undef:
5321 ; GFX900-GISEL: ; %bb.0:
5322 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5323 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5324 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5325 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v0
5326 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
5327 ; GFX900-GISEL-NEXT: v_fma_f32 v1, s4, v2, v1
5328 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v0
5329 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
5330 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
5331 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
5332 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
5333 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5334 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
5335 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
5336 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1
5337 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
5338 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
5339 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
5340 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5341 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5343 ; SI-SDAG-LABEL: v_exp_f32_undef:
5345 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5346 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
5347 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000
5348 ; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1
5349 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5350 ; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
5351 ; SI-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000
5352 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1
5353 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
5354 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
5355 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
5356 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
5357 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5359 ; SI-GISEL-LABEL: v_exp_f32_undef:
5360 ; SI-GISEL: ; %bb.0:
5361 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5362 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5363 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5364 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v0
5365 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
5366 ; SI-GISEL-NEXT: v_fma_f32 v1, s4, v2, v1
5367 ; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v0
5368 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
5369 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
5370 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
5371 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
5372 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5373 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
5374 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
5375 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1
5376 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
5377 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
5378 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
5379 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5380 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5382 ; R600-LABEL: v_exp_f32_undef:
5387 ; CM-LABEL: v_exp_f32_undef:
5391 %result = call float @llvm.exp.f32(float undef)
5395 define float @v_exp_f32_0() {
5396 ; GCN-LABEL: v_exp_f32_0:
5398 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5399 ; GCN-NEXT: v_mov_b32_e32 v0, 1.0
5400 ; GCN-NEXT: s_setpc_b64 s[30:31]
5402 ; SI-LABEL: v_exp_f32_0:
5404 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5405 ; SI-NEXT: v_mov_b32_e32 v0, 1.0
5406 ; SI-NEXT: s_setpc_b64 s[30:31]
5408 ; R600-LABEL: v_exp_f32_0:
5413 ; CM-LABEL: v_exp_f32_0:
5417 %result = call float @llvm.exp.f32(float 0.0)
5421 define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
5422 ; VI-SDAG-LABEL: v_exp_f32_from_fpext_f16:
5424 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5425 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5426 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5427 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5428 ; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
5429 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
5430 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v3
5431 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v3
5432 ; VI-SDAG-NEXT: v_rndne_f32_e32 v4, v2
5433 ; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
5434 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5435 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4
5436 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
5437 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
5438 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v4
5439 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5440 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5441 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5442 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5443 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5444 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5445 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5446 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5447 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5449 ; VI-GISEL-LABEL: v_exp_f32_from_fpext_f16:
5450 ; VI-GISEL: ; %bb.0:
5451 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5452 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5453 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5454 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5455 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5456 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5457 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
5458 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5459 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
5460 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5461 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5462 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5463 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5464 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5465 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5466 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5467 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5468 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5469 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5470 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5471 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5472 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5473 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5474 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5476 ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_f16:
5477 ; GFX900-SDAG: ; %bb.0:
5478 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5479 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5480 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5481 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
5482 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5483 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5484 ; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
5485 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2
5486 ; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3
5487 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
5488 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5489 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
5490 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5491 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5492 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5493 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5494 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5495 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5496 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5497 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5498 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5500 ; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_f16:
5501 ; GFX900-GISEL: ; %bb.0:
5502 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5503 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5504 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5505 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5506 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5507 ; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
5508 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2
5509 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
5510 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
5511 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5512 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
5513 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
5514 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
5515 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
5516 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5517 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5518 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5519 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5520 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5521 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5522 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5524 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_f16:
5526 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5527 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5528 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5529 ; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
5530 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5531 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5532 ; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
5533 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2
5534 ; SI-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3
5535 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
5536 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5537 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5538 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5539 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5540 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5541 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
5542 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5543 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5544 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5545 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5546 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5548 ; SI-GISEL-LABEL: v_exp_f32_from_fpext_f16:
5549 ; SI-GISEL: ; %bb.0:
5550 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5551 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5552 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5553 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5554 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5555 ; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
5556 ; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2
5557 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
5558 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
5559 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5560 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
5561 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5562 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
5563 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
5564 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5565 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
5566 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5567 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5568 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5569 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5570 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5572 ; R600-LABEL: v_exp_f32_from_fpext_f16:
5577 ; CM-LABEL: v_exp_f32_from_fpext_f16:
5581 %src = bitcast i16 %src.i to half
5582 %fpext = fpext half %src to float
5583 %result = call float @llvm.exp.f32(float %fpext)
5587 define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
5588 ; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
5590 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5591 ; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
5592 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5593 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5594 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5595 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
5596 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5597 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5598 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1
5599 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
5600 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
5601 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
5602 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
5603 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
5604 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5605 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5606 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5607 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5608 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5609 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5610 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5611 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5612 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5613 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5614 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5616 ; VI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
5617 ; VI-GISEL: ; %bb.0:
5618 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5619 ; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
5620 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5621 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5622 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5623 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5624 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5625 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
5626 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
5627 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5628 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5629 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5630 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5631 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5632 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5633 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5634 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5635 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5636 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5637 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5638 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5639 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5640 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5641 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5642 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5644 ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
5645 ; GFX900-SDAG: ; %bb.0:
5646 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5647 ; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
5648 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5649 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5650 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
5651 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5652 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5653 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1
5654 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
5655 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
5656 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5657 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5658 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
5659 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5660 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5661 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5662 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5663 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5664 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5665 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5666 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5667 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5669 ; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
5670 ; GFX900-GISEL: ; %bb.0:
5671 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5672 ; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
5673 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5674 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5675 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5676 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5677 ; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
5678 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
5679 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
5680 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
5681 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5682 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
5683 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
5684 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5685 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5686 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5687 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5688 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5689 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5690 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5691 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5692 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5694 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
5696 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5697 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5698 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
5699 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5700 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
5701 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5702 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5703 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5704 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
5705 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
5706 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
5707 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5708 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5709 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5710 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5711 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5712 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5713 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
5714 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5715 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5716 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5717 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5718 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5720 ; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
5721 ; SI-GISEL: ; %bb.0:
5722 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5723 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5724 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
5725 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5726 ; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
5727 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
5728 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
5729 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5730 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5731 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5732 ; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
5733 ; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2
5734 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
5735 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
5736 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5737 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
5738 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5739 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5740 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
5741 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5742 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
5743 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5744 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
5745 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc
5746 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5748 ; R600-LABEL: v_exp_f32_from_fpext_math_f16:
5753 ; CM-LABEL: v_exp_f32_from_fpext_math_f16:
5757 %src0 = bitcast i16 %src0.i to half
5758 %src1 = bitcast i16 %src1.i to half
5759 %fadd = fadd half %src0, %src1
5760 %fpext = fpext half %fadd to float
5761 %result = call float @llvm.exp.f32(float %fpext)
5765 define float @v_exp_f32_from_fpext_bf16(bfloat %src) {
5766 ; VI-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
5768 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5769 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5770 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
5771 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
5772 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
5773 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
5774 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
5775 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
5776 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5777 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
5778 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
5779 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
5780 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5781 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5782 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5783 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5784 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5785 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5786 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5787 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5788 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5789 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5790 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5792 ; VI-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
5793 ; VI-GISEL: ; %bb.0:
5794 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5795 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5796 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5797 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5798 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5799 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5800 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
5801 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5802 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
5803 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5804 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5805 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5806 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5807 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5808 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5809 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5810 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5811 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5812 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5813 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5814 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5815 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5816 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5817 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5819 ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
5820 ; GFX900-SDAG: ; %bb.0:
5821 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5822 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5823 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5824 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5825 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
5826 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
5827 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5828 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
5829 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
5830 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
5831 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5832 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5833 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5834 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5835 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5836 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5837 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5838 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5839 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5840 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5842 ; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
5843 ; GFX900-GISEL: ; %bb.0:
5844 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5845 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5846 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5847 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5848 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5849 ; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
5850 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2
5851 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
5852 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
5853 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5854 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
5855 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
5856 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
5857 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
5858 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5859 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5860 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5861 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5862 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5863 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5864 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5866 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
5868 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5869 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5870 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5871 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5872 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
5873 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
5874 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5875 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
5876 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
5877 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5878 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5879 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5880 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5881 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5882 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
5883 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5884 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5885 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5886 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5887 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5889 ; SI-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
5890 ; SI-GISEL: ; %bb.0:
5891 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5892 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5893 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5894 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5895 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5896 ; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
5897 ; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2
5898 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
5899 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
5900 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5901 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
5902 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5903 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
5904 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
5905 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5906 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
5907 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5908 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5909 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5910 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5911 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5913 ; R600-LABEL: v_exp_f32_from_fpext_bf16:
5918 ; CM-LABEL: v_exp_f32_from_fpext_bf16:
5922 %fpext = fpext bfloat %src to float
5923 %result = call float @llvm.exp.f32(float %fpext)
5927 define float @v_exp_f32_from_fpext_math_f16_fast(i16 %src0.i, i16 %src1.i) {
5928 ; GCN-LABEL: v_exp_f32_from_fpext_math_f16_fast:
5930 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5931 ; GCN-NEXT: v_add_f16_e32 v0, v0, v1
5932 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
5933 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5934 ; GCN-NEXT: v_exp_f32_e32 v0, v0
5935 ; GCN-NEXT: s_setpc_b64 s[30:31]
5937 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_fast:
5939 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5940 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5941 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
5942 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
5943 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
5944 ; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
5945 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
5946 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5947 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5948 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
5949 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
5950 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5951 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5953 ; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_fast:
5954 ; SI-GISEL: ; %bb.0:
5955 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5956 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5957 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
5958 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
5959 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
5960 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5961 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
5962 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
5963 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5965 ; R600-LABEL: v_exp_f32_from_fpext_math_f16_fast:
5970 ; CM-LABEL: v_exp_f32_from_fpext_math_f16_fast:
5974 %src0 = bitcast i16 %src0.i to half
5975 %src1 = bitcast i16 %src1.i to half
5976 %fadd = fadd half %src0, %src1
5977 %fpext = fpext half %fadd to float
5978 %result = call fast float @llvm.exp.f32(float %fpext)
5982 define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
5983 ; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
5985 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5986 ; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
5987 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5988 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5989 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5990 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
5991 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5992 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5993 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1
5994 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
5995 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
5996 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
5997 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
5998 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
5999 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6000 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6001 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
6002 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
6003 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
6004 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
6005 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
6006 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
6007 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
6008 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
6009 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6011 ; VI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz:
6012 ; VI-GISEL: ; %bb.0:
6013 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6014 ; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
6015 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6016 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
6017 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
6018 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
6019 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
6020 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
6021 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
6022 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
6023 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
6024 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
6025 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
6026 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
6027 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
6028 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6029 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
6030 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
6031 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
6032 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
6033 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
6034 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
6035 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
6036 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
6037 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6039 ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
6040 ; GFX900-SDAG: ; %bb.0:
6041 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6042 ; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
6043 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6044 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
6045 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
6046 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
6047 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
6048 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1
6049 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
6050 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
6051 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6052 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
6053 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
6054 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
6055 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
6056 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
6057 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
6058 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
6059 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
6060 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
6061 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
6062 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6064 ; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz:
6065 ; GFX900-GISEL: ; %bb.0:
6066 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6067 ; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
6068 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6069 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
6070 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
6071 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
6072 ; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
6073 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
6074 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
6075 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
6076 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
6077 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
6078 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
6079 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
6080 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
6081 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
6082 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
6083 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
6084 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
6085 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
6086 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
6087 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6089 ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
6091 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6092 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6093 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6094 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
6095 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
6096 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
6097 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
6098 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
6099 ; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
6100 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
6101 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
6102 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6103 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6104 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
6105 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
6106 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
6107 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
6108 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
6109 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
6110 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
6111 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
6112 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
6113 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6115 ; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz:
6116 ; SI-GISEL: ; %bb.0:
6117 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6118 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6119 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6120 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
6121 ; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
6122 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
6123 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6124 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
6125 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6126 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
6127 ; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
6128 ; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2
6129 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
6130 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
6131 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
6132 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
6133 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6134 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
6135 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
6136 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
6137 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
6138 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
6139 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
6140 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc
6141 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6143 ; R600-LABEL: v_exp_f32_from_fpext_math_f16_daz:
6148 ; CM-LABEL: v_exp_f32_from_fpext_math_f16_daz:
6152 %src0 = bitcast i16 %src0.i to half
6153 %src1 = bitcast i16 %src1.i to half
6154 %fadd = fadd half %src0, %src1
6155 %fpext = fpext half %fadd to float
6156 %result = call float @llvm.exp.f32(float %fpext)
6160 ; FIXME: Fold out fp16_to_fp (FP_TO_FP16) on no-f16 targets
6161 define half @v_exp_f16(half %in) {
6162 ; GCN-LABEL: v_exp_f16:
6164 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6165 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
6166 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6167 ; GCN-NEXT: v_exp_f32_e32 v0, v0
6168 ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
6169 ; GCN-NEXT: s_setpc_b64 s[30:31]
6171 ; SI-SDAG-LABEL: v_exp_f16:
6173 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6174 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6175 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6176 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6177 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6178 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6179 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6180 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6182 ; SI-GISEL-LABEL: v_exp_f16:
6183 ; SI-GISEL: ; %bb.0:
6184 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6185 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6186 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6187 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6188 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6189 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6191 ; R600-LABEL: v_exp_f16:
6196 ; CM-LABEL: v_exp_f16:
6200 %result = call half @llvm.exp.f16(half %in)
6204 define half @v_exp_fabs_f16(half %in) {
6205 ; GCN-LABEL: v_exp_fabs_f16:
6207 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6208 ; GCN-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6209 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6210 ; GCN-NEXT: v_exp_f32_e32 v0, v0
6211 ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
6212 ; GCN-NEXT: s_setpc_b64 s[30:31]
6214 ; SI-SDAG-LABEL: v_exp_fabs_f16:
6216 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6217 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6218 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6219 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6220 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6221 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6222 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6223 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6225 ; SI-GISEL-LABEL: v_exp_fabs_f16:
6226 ; SI-GISEL: ; %bb.0:
6227 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6228 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6229 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6230 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6231 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6232 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6234 ; R600-LABEL: v_exp_fabs_f16:
6239 ; CM-LABEL: v_exp_fabs_f16:
6243 %fabs = call half @llvm.fabs.f16(half %in)
6244 %result = call half @llvm.exp.f16(half %fabs)
6248 define half @v_exp_fneg_fabs_f16(half %in) {
6249 ; GCN-SDAG-LABEL: v_exp_fneg_fabs_f16:
6250 ; GCN-SDAG: ; %bb.0:
6251 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6252 ; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6253 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
6254 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
6255 ; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6256 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
6258 ; GCN-GISEL-LABEL: v_exp_fneg_fabs_f16:
6259 ; GCN-GISEL: ; %bb.0:
6260 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6261 ; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
6262 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6263 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
6264 ; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6265 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
6267 ; SI-SDAG-LABEL: v_exp_fneg_fabs_f16:
6269 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6270 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6271 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6272 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
6273 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6274 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6275 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6276 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6278 ; SI-GISEL-LABEL: v_exp_fneg_fabs_f16:
6279 ; SI-GISEL: ; %bb.0:
6280 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6281 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
6282 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6283 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6284 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6285 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6287 ; R600-LABEL: v_exp_fneg_fabs_f16:
6292 ; CM-LABEL: v_exp_fneg_fabs_f16:
6296 %fabs = call half @llvm.fabs.f16(half %in)
6297 %fneg.fabs = fneg half %fabs
6298 %result = call half @llvm.exp.f16(half %fneg.fabs)
6302 define half @v_exp_fneg_f16(half %in) {
6303 ; GCN-SDAG-LABEL: v_exp_fneg_f16:
6304 ; GCN-SDAG: ; %bb.0:
6305 ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6306 ; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6307 ; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
6308 ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
6309 ; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6310 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
6312 ; GCN-GISEL-LABEL: v_exp_fneg_f16:
6313 ; GCN-GISEL: ; %bb.0:
6314 ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6315 ; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
6316 ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6317 ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
6318 ; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6319 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
6321 ; SI-SDAG-LABEL: v_exp_fneg_f16:
6323 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6324 ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
6325 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6326 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6327 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6328 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6329 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6330 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6332 ; SI-GISEL-LABEL: v_exp_fneg_f16:
6333 ; SI-GISEL: ; %bb.0:
6334 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6335 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
6336 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6337 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6338 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6339 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6341 ; R600-LABEL: v_exp_fneg_f16:
6346 ; CM-LABEL: v_exp_fneg_f16:
6350 %fneg = fneg half %in
6351 %result = call half @llvm.exp.f16(half %fneg)
6355 define half @v_exp_f16_fast(half %in) {
6356 ; GCN-LABEL: v_exp_f16_fast:
6358 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6359 ; GCN-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
6360 ; GCN-NEXT: v_exp_f16_e32 v0, v0
6361 ; GCN-NEXT: s_setpc_b64 s[30:31]
6363 ; SI-SDAG-LABEL: v_exp_f16_fast:
6365 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6366 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6367 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6368 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
6369 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6370 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6371 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6372 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6374 ; SI-GISEL-LABEL: v_exp_f16_fast:
6375 ; SI-GISEL: ; %bb.0:
6376 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6377 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6378 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 0x3dc5
6379 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
6380 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6381 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6382 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6383 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6384 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6386 ; R600-LABEL: v_exp_f16_fast:
6391 ; CM-LABEL: v_exp_f16_fast:
6395 %result = call fast half @llvm.exp.f16(half %in)
6399 define <2 x half> @v_exp_v2f16(<2 x half> %in) {
6400 ; VI-SDAG-LABEL: v_exp_v2f16:
6402 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6403 ; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6404 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6405 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6406 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6407 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6408 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6409 ; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6410 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6411 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6412 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6414 ; VI-GISEL-LABEL: v_exp_v2f16:
6415 ; VI-GISEL: ; %bb.0:
6416 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6417 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6418 ; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6419 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6420 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6421 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6422 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6423 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6424 ; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6425 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6426 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6428 ; GFX900-LABEL: v_exp_v2f16:
6430 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6431 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v0
6432 ; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6433 ; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6434 ; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6435 ; GFX900-NEXT: v_exp_f32_e32 v1, v1
6436 ; GFX900-NEXT: v_exp_f32_e32 v0, v0
6437 ; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
6438 ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
6439 ; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0
6440 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6442 ; SI-SDAG-LABEL: v_exp_v2f16:
6444 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6445 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6446 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6447 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6448 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6449 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6450 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6451 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6452 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6453 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6454 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6455 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6456 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6457 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6459 ; SI-GISEL-LABEL: v_exp_v2f16:
6460 ; SI-GISEL: ; %bb.0:
6461 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6462 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6463 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6464 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6465 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6466 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6467 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6468 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6469 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6470 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6472 ; R600-LABEL: v_exp_v2f16:
6477 ; CM-LABEL: v_exp_v2f16:
6481 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %in)
6482 ret <2 x half> %result
6485 define <2 x half> @v_exp_fabs_v2f16(<2 x half> %in) {
6486 ; VI-SDAG-LABEL: v_exp_fabs_v2f16:
6488 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6489 ; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6490 ; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6491 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6492 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6493 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6494 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6495 ; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6496 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6497 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6498 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6500 ; VI-GISEL-LABEL: v_exp_fabs_v2f16:
6501 ; VI-GISEL: ; %bb.0:
6502 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6503 ; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
6504 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6505 ; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6506 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6507 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6508 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6509 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6510 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6511 ; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6512 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6513 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6515 ; GFX900-SDAG-LABEL: v_exp_fabs_v2f16:
6516 ; GFX900-SDAG: ; %bb.0:
6517 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6518 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v0|
6519 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6520 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6521 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6522 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
6523 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
6524 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6525 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6526 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
6527 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6529 ; GFX900-GISEL-LABEL: v_exp_fabs_v2f16:
6530 ; GFX900-GISEL: ; %bb.0:
6531 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6532 ; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
6533 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6534 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6535 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6536 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6537 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
6538 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
6539 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6540 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6541 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
6542 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6544 ; SI-SDAG-LABEL: v_exp_fabs_v2f16:
6546 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6547 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6548 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6549 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6550 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
6551 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6552 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6553 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6554 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6555 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6556 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6557 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6558 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6559 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6561 ; SI-GISEL-LABEL: v_exp_fabs_v2f16:
6562 ; SI-GISEL: ; %bb.0:
6563 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6564 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6565 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6566 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6567 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
6568 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6569 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6570 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6571 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6572 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6573 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6574 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
6575 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
6576 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
6577 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6579 ; R600-LABEL: v_exp_fabs_v2f16:
6584 ; CM-LABEL: v_exp_fabs_v2f16:
6588 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
6589 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %fabs)
6590 ret <2 x half> %result
6593 define <2 x half> @v_exp_fneg_fabs_v2f16(<2 x half> %in) {
6594 ; VI-SDAG-LABEL: v_exp_fneg_fabs_v2f16:
6596 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6597 ; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6598 ; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
6599 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6600 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6601 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6602 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6603 ; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6604 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6605 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6606 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6608 ; VI-GISEL-LABEL: v_exp_fneg_fabs_v2f16:
6609 ; VI-GISEL: ; %bb.0:
6610 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6611 ; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6612 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6613 ; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6614 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6615 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6616 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6617 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6618 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6619 ; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6620 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6621 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6623 ; GFX900-SDAG-LABEL: v_exp_fneg_fabs_v2f16:
6624 ; GFX900-SDAG: ; %bb.0:
6625 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6626 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v0|
6627 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6628 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6629 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6630 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
6631 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
6632 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6633 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6634 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
6635 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6637 ; GFX900-GISEL-LABEL: v_exp_fneg_fabs_v2f16:
6638 ; GFX900-GISEL: ; %bb.0:
6639 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6640 ; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6641 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6642 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6643 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6644 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6645 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
6646 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
6647 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6648 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6649 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
6650 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6652 ; SI-SDAG-LABEL: v_exp_fneg_fabs_v2f16:
6654 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6655 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6656 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6657 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6658 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6659 ; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6660 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
6661 ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6662 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6663 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6664 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6665 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6666 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6667 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6668 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
6669 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
6670 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
6671 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6673 ; SI-GISEL-LABEL: v_exp_fneg_fabs_v2f16:
6674 ; SI-GISEL: ; %bb.0:
6675 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6676 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6677 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6678 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6679 ; SI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6680 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6681 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6682 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6683 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6684 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6685 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6686 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
6687 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
6688 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
6689 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6691 ; R600-LABEL: v_exp_fneg_fabs_v2f16:
6696 ; CM-LABEL: v_exp_fneg_fabs_v2f16:
6700 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
6701 %fneg.fabs = fneg <2 x half> %fabs
6702 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %fneg.fabs)
6703 ret <2 x half> %result
6706 define <2 x half> @v_exp_fneg_v2f16(<2 x half> %in) {
6707 ; VI-SDAG-LABEL: v_exp_fneg_v2f16:
6709 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6710 ; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6711 ; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -v0
6712 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6713 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6714 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6715 ; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6716 ; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6717 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6718 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6719 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6721 ; VI-GISEL-LABEL: v_exp_fneg_v2f16:
6722 ; VI-GISEL: ; %bb.0:
6723 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6724 ; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
6725 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6726 ; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6727 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6728 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6729 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6730 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6731 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6732 ; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6733 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6734 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6736 ; GFX900-SDAG-LABEL: v_exp_fneg_v2f16:
6737 ; GFX900-SDAG: ; %bb.0:
6738 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6739 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -v0
6740 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6741 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6742 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6743 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
6744 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
6745 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6746 ; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6747 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
6748 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6750 ; GFX900-GISEL-LABEL: v_exp_fneg_v2f16:
6751 ; GFX900-GISEL: ; %bb.0:
6752 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6753 ; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
6754 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6755 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6756 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6757 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6758 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
6759 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
6760 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6761 ; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6762 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
6763 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6765 ; SI-SDAG-LABEL: v_exp_fneg_v2f16:
6767 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6768 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6769 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6770 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6771 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6772 ; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
6773 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
6774 ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6775 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6776 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6777 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6778 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6779 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6780 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6781 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
6782 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
6783 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
6784 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6786 ; SI-GISEL-LABEL: v_exp_fneg_v2f16:
6787 ; SI-GISEL: ; %bb.0:
6788 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6789 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6790 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6791 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6792 ; SI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
6793 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6794 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6795 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6796 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6797 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6798 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6799 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
6800 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
6801 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
6802 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6804 ; R600-LABEL: v_exp_fneg_v2f16:
6809 ; CM-LABEL: v_exp_fneg_v2f16:
6813 %fneg = fneg <2 x half> %in
6814 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %fneg)
6815 ret <2 x half> %result
6818 define <2 x half> @v_exp_v2f16_fast(<2 x half> %in) {
6819 ; VI-SDAG-LABEL: v_exp_v2f16_fast:
6821 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6822 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5
6823 ; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6824 ; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
6825 ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6826 ; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
6827 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6828 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6830 ; VI-GISEL-LABEL: v_exp_v2f16_fast:
6831 ; VI-GISEL: ; %bb.0:
6832 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6833 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5
6834 ; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
6835 ; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6836 ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
6837 ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6838 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6839 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6841 ; GFX900-SDAG-LABEL: v_exp_v2f16_fast:
6842 ; GFX900-SDAG: ; %bb.0:
6843 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6844 ; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
6845 ; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
6846 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0
6847 ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6848 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
6849 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6851 ; GFX900-GISEL-LABEL: v_exp_v2f16_fast:
6852 ; GFX900-GISEL: ; %bb.0:
6853 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6854 ; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5
6855 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
6856 ; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6857 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
6858 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
6859 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
6860 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6862 ; SI-SDAG-LABEL: v_exp_v2f16_fast:
6864 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6865 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6866 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6867 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6868 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6869 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
6870 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
6871 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6872 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6873 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6874 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6875 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6876 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6877 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6879 ; SI-GISEL-LABEL: v_exp_v2f16_fast:
6880 ; SI-GISEL: ; %bb.0:
6881 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6882 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6883 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, 0x3dc5
6884 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6885 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
6886 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2
6887 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6888 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6889 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6890 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6891 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6892 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6893 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6894 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6895 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6897 ; R600-LABEL: v_exp_v2f16_fast:
6902 ; CM-LABEL: v_exp_v2f16_fast:
6906 %result = call fast <2 x half> @llvm.exp.v2f16(<2 x half> %in)
6907 ret <2 x half> %result
6910 define <3 x half> @v_exp_v3f16(<3 x half> %in) {
6911 ; VI-LABEL: v_exp_v3f16:
6913 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6914 ; VI-NEXT: v_cvt_f32_f16_e32 v2, v0
6915 ; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6916 ; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
6917 ; VI-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
6918 ; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6919 ; VI-NEXT: v_exp_f32_e32 v2, v2
6920 ; VI-NEXT: v_exp_f32_e32 v0, v0
6921 ; VI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6922 ; VI-NEXT: v_exp_f32_e32 v1, v1
6923 ; VI-NEXT: v_cvt_f16_f32_e32 v2, v2
6924 ; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
6925 ; VI-NEXT: v_cvt_f16_f32_e32 v1, v1
6926 ; VI-NEXT: v_or_b32_e32 v0, v2, v0
6927 ; VI-NEXT: s_setpc_b64 s[30:31]
6929 ; GFX900-LABEL: v_exp_v3f16:
6931 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6932 ; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v0
6933 ; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6934 ; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
6935 ; GFX900-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
6936 ; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6937 ; GFX900-NEXT: v_exp_f32_e32 v2, v2
6938 ; GFX900-NEXT: v_exp_f32_e32 v0, v0
6939 ; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6940 ; GFX900-NEXT: v_exp_f32_e32 v1, v1
6941 ; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2
6942 ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
6943 ; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
6944 ; GFX900-NEXT: v_pack_b32_f16 v0, v2, v0
6945 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6947 ; SI-SDAG-LABEL: v_exp_v3f16:
6949 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6950 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6951 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6952 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
6953 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6954 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6955 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
6956 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6957 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6958 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
6959 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
6960 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
6961 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
6962 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6963 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6964 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
6965 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6966 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6967 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
6968 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6970 ; SI-GISEL-LABEL: v_exp_v3f16:
6971 ; SI-GISEL: ; %bb.0:
6972 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6973 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6974 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6975 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
6976 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
6977 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
6978 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
6979 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
6980 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
6981 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
6982 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6983 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6984 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
6985 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6987 ; R600-LABEL: v_exp_v3f16:
6992 ; CM-LABEL: v_exp_v3f16:
6996 %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %in)
6997 ret <3 x half> %result
7000 define <3 x half> @v_exp_v3f16_afn(<3 x half> %in) {
7001 ; VI-LABEL: v_exp_v3f16_afn:
7003 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7004 ; VI-NEXT: v_mov_b32_e32 v3, 0x3dc5
7005 ; VI-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
7006 ; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
7007 ; VI-NEXT: v_exp_f16_e32 v2, v2
7008 ; VI-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
7009 ; VI-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
7010 ; VI-NEXT: v_exp_f16_e32 v1, v1
7011 ; VI-NEXT: v_or_b32_e32 v0, v2, v0
7012 ; VI-NEXT: s_setpc_b64 s[30:31]
7014 ; GFX900-SDAG-LABEL: v_exp_v3f16_afn:
7015 ; GFX900-SDAG: ; %bb.0:
7016 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7017 ; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
7018 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
7019 ; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
7020 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v2, v2
7021 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
7022 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
7023 ; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1
7024 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0
7025 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7027 ; GFX900-GISEL-LABEL: v_exp_v3f16_afn:
7028 ; GFX900-GISEL: ; %bb.0:
7029 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7030 ; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5
7031 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
7032 ; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
7033 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v2
7034 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
7035 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
7036 ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
7037 ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
7038 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7040 ; SI-SDAG-LABEL: v_exp_v3f16_afn:
7042 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7043 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7044 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7045 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7046 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7047 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7048 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7049 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
7050 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
7051 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
7052 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
7053 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7054 ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
7055 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7056 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7057 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7058 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7059 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7060 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7061 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7063 ; SI-GISEL-LABEL: v_exp_v3f16_afn:
7064 ; SI-GISEL: ; %bb.0:
7065 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7066 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
7067 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 0x3dc5
7068 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
7069 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7070 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v3
7071 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
7072 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
7073 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
7074 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
7075 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7076 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
7077 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
7078 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7079 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
7080 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7081 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
7082 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
7083 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
7084 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7085 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7087 ; R600-LABEL: v_exp_v3f16_afn:
7092 ; CM-LABEL: v_exp_v3f16_afn:
7096 %result = call afn <3 x half> @llvm.exp.v3f16(<3 x half> %in)
7097 ret <3 x half> %result
7100 define float @v_exp_f32_contract(float %in) {
7101 ; VI-SDAG-LABEL: v_exp_f32_contract:
7103 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7104 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7105 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
7106 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
7107 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
7108 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
7109 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
7110 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
7111 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7112 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
7113 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
7114 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
7115 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7116 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
7117 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7118 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7119 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7120 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7121 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7122 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7123 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7124 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7125 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
7127 ; VI-GISEL-LABEL: v_exp_f32_contract:
7128 ; VI-GISEL: ; %bb.0:
7129 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7130 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7131 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
7132 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
7133 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
7134 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
7135 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
7136 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7137 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7138 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
7139 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
7140 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
7141 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
7142 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7143 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7144 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7145 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7146 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7147 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7148 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7149 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7150 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7151 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
7153 ; GFX900-SDAG-LABEL: v_exp_f32_contract:
7154 ; GFX900-SDAG: ; %bb.0:
7155 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7156 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7157 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7158 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7159 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7160 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7161 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7162 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7163 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7164 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
7165 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7166 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7167 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7168 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7169 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7170 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7171 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7172 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7173 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7174 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7176 ; GFX900-GISEL-LABEL: v_exp_f32_contract:
7177 ; GFX900-GISEL: ; %bb.0:
7178 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7179 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7180 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7181 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
7182 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7183 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
7184 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
7185 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
7186 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7187 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7188 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
7189 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7190 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7191 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7192 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7193 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7194 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7195 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7196 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7197 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7199 ; SI-SDAG-LABEL: v_exp_f32_contract:
7201 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7202 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7203 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7204 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7205 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7206 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7207 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7208 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7209 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7210 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7211 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7212 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7213 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7214 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7215 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
7216 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7217 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7218 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7219 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7220 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7222 ; SI-GISEL-LABEL: v_exp_f32_contract:
7223 ; SI-GISEL: ; %bb.0:
7224 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7225 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7226 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7227 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
7228 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7229 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
7230 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
7231 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
7232 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7233 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7234 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7235 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7236 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
7237 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7238 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7239 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7240 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7241 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7242 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7243 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7245 ; R600-LABEL: v_exp_f32_contract:
7250 ; CM-LABEL: v_exp_f32_contract:
7254 %result = call contract float @llvm.exp.f32(float %in)
7258 define float @v_exp_f32_contract_daz(float %in) #0 {
7259 ; VI-SDAG-LABEL: v_exp_f32_contract_daz:
7261 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7262 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7263 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
7264 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
7265 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
7266 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
7267 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
7268 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
7269 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7270 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
7271 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
7272 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
7273 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7274 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
7275 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7276 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7277 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7278 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7279 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7280 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7281 ; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7282 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7283 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
7285 ; VI-GISEL-LABEL: v_exp_f32_contract_daz:
7286 ; VI-GISEL: ; %bb.0:
7287 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7288 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7289 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
7290 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
7291 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
7292 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
7293 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
7294 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7295 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7296 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
7297 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
7298 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
7299 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
7300 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7301 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7302 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7303 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7304 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7305 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7306 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7307 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7308 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7309 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
7311 ; GFX900-SDAG-LABEL: v_exp_f32_contract_daz:
7312 ; GFX900-SDAG: ; %bb.0:
7313 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7314 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7315 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7316 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7317 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7318 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7319 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7320 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7321 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7322 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
7323 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7324 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7325 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7326 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7327 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7328 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7329 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7330 ; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7331 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7332 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7334 ; GFX900-GISEL-LABEL: v_exp_f32_contract_daz:
7335 ; GFX900-GISEL: ; %bb.0:
7336 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7337 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7338 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7339 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
7340 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7341 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
7342 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
7343 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
7344 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7345 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7346 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
7347 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7348 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7349 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7350 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7351 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7352 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7353 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7354 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7355 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7357 ; SI-SDAG-LABEL: v_exp_f32_contract_daz:
7359 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7360 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7361 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7362 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7363 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7364 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7365 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7366 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7367 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7368 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7369 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7370 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7371 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7372 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
7373 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
7374 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
7375 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
7376 ; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
7377 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
7378 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7380 ; SI-GISEL-LABEL: v_exp_f32_contract_daz:
7381 ; SI-GISEL: ; %bb.0:
7382 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7383 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7384 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7385 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
7386 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7387 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
7388 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
7389 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
7390 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7391 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7392 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7393 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
7394 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
7395 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7396 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7397 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
7398 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
7399 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
7400 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
7401 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7403 ; R600-LABEL: v_exp_f32_contract_daz:
7408 ; CM-LABEL: v_exp_f32_contract_daz:
7412 %result = call contract float @llvm.exp.f32(float %in)
7416 define float @v_exp_f32_contract_nnan_ninf(float %in) {
7417 ; VI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf:
7419 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7420 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7421 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
7422 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
7423 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
7424 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
7425 ; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
7426 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
7427 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7428 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
7429 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
7430 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
7431 ; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7432 ; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
7433 ; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7434 ; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7435 ; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7436 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
7437 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
7439 ; VI-GISEL-LABEL: v_exp_f32_contract_nnan_ninf:
7440 ; VI-GISEL: ; %bb.0:
7441 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7442 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
7443 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
7444 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
7445 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
7446 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
7447 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
7448 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
7449 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7450 ; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
7451 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
7452 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
7453 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
7454 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7455 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7456 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7457 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7458 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
7459 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
7461 ; GFX900-SDAG-LABEL: v_exp_f32_contract_nnan_ninf:
7462 ; GFX900-SDAG: ; %bb.0:
7463 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7464 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7465 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7466 ; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7467 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7468 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7469 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7470 ; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7471 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7472 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
7473 ; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7474 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7475 ; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7476 ; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
7477 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
7478 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7480 ; GFX900-GISEL-LABEL: v_exp_f32_contract_nnan_ninf:
7481 ; GFX900-GISEL: ; %bb.0:
7482 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7483 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7484 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7485 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
7486 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7487 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
7488 ; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
7489 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
7490 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7491 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7492 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
7493 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
7494 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7495 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7496 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
7497 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7499 ; SI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf:
7501 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7502 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7503 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7504 ; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
7505 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
7506 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
7507 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
7508 ; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
7509 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
7510 ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
7511 ; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
7512 ; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
7513 ; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
7514 ; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
7515 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
7516 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7518 ; SI-GISEL-LABEL: v_exp_f32_contract_nnan_ninf:
7519 ; SI-GISEL: ; %bb.0:
7520 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7521 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
7522 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
7523 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
7524 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
7525 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
7526 ; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
7527 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
7528 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
7529 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
7530 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
7531 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
7532 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
7533 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
7534 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
7535 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7537 ; R600-LABEL: v_exp_f32_contract_nnan_ninf:
7542 ; CM-LABEL: v_exp_f32_contract_nnan_ninf:
7546 %result = call contract nnan ninf float @llvm.exp.f32(float %in)
7550 declare float @llvm.fabs.f32(float) #2
7551 declare float @llvm.exp.f32(float) #2
7552 declare <2 x float> @llvm.exp.v2f32(<2 x float>) #2
7553 declare <3 x float> @llvm.exp.v3f32(<3 x float>) #2
7554 declare <4 x float> @llvm.exp.v4f32(<4 x float>) #2
7555 declare half @llvm.fabs.f16(half) #2
7556 declare half @llvm.exp.f16(half) #2
7557 declare <2 x half> @llvm.exp.v2f16(<2 x half>) #2
7558 declare <3 x half> @llvm.exp.v3f16(<3 x half>) #2
7559 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
7561 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
7562 attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
7563 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }