1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-SDAG,SI-SDAG %s
3 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s
4 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-SDAG,VI-SDAG %s
5 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s
6 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-SDAG,GFX900-SDAG %s
7 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s
8 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-SDAG %s
9 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s
11 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
12 ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
14 define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
15 ; SI-SDAG-LABEL: s_log_f32:
17 ; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb
18 ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
19 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
20 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
21 ; SI-SDAG-NEXT: s_mov_b32 s0, 0x3f317217
22 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
23 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
24 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
25 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
26 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
27 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
28 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
29 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
30 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, -v1
31 ; SI-SDAG-NEXT: s_mov_b32 s0, 0x3377d1cf
32 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, v2
33 ; SI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000
34 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
35 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
36 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
37 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
38 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
39 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
40 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
41 ; SI-SDAG-NEXT: s_endpgm
43 ; SI-GISEL-LABEL: s_log_f32:
45 ; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb
46 ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
47 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
48 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
49 ; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
50 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
51 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
52 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
53 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
54 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
55 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
56 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
57 ; SI-GISEL-NEXT: s_mov_b32 s6, -1
58 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
59 ; SI-GISEL-NEXT: v_fma_f32 v4, v0, s0, -v3
60 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
61 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
62 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
63 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
64 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
65 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
66 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
67 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
68 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[4:7], 0
69 ; SI-GISEL-NEXT: s_endpgm
71 ; VI-SDAG-LABEL: s_log_f32:
73 ; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
74 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
75 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
76 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
77 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
78 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
79 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
80 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
81 ; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
82 ; VI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000
83 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
84 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
85 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
86 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v2
87 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
88 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
89 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
90 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
91 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
92 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
93 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
94 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
95 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
96 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
97 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
98 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s2
99 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
100 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
101 ; VI-SDAG-NEXT: s_endpgm
103 ; VI-GISEL-LABEL: s_log_f32:
105 ; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
106 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
107 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
108 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
109 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
110 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
111 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
112 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
113 ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
114 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
115 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
116 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
117 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
118 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
119 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
120 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
121 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
122 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
123 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
124 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
125 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
126 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
127 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
128 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
129 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
130 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
131 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
132 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
133 ; VI-GISEL-NEXT: s_endpgm
135 ; GFX900-SDAG-LABEL: s_log_f32:
136 ; GFX900-SDAG: ; %bb.0:
137 ; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
138 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
139 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
140 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
141 ; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x3f317217
142 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
143 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
144 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
145 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
146 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
147 ; GFX900-SDAG-NEXT: s_mov_b32 s1, 0x3377d1cf
148 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0
149 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v0
150 ; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s0, -v2
151 ; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s1, v3
152 ; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x7f800000
153 ; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
154 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
155 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
156 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x41b17218
157 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
158 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
159 ; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[2:3]
160 ; GFX900-SDAG-NEXT: s_endpgm
162 ; GFX900-GISEL-LABEL: s_log_f32:
163 ; GFX900-GISEL: ; %bb.0:
164 ; GFX900-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
165 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
166 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
167 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
168 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
169 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
170 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
171 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
172 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
173 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
174 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
175 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
176 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
177 ; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s0, -v3
178 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
179 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
180 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
181 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
182 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
183 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
184 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
185 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
186 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
187 ; GFX900-GISEL-NEXT: s_endpgm
189 ; GFX1100-SDAG-LABEL: s_log_f32:
190 ; GFX1100-SDAG: ; %bb.0:
191 ; GFX1100-SDAG-NEXT: s_clause 0x1
192 ; GFX1100-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
193 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
194 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
195 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2
196 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
197 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3
198 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
199 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
200 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
201 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
202 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
203 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
204 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
205 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
206 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
207 ; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
208 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
209 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
210 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s3
211 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
212 ; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
213 ; GFX1100-SDAG-NEXT: s_nop 0
214 ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
215 ; GFX1100-SDAG-NEXT: s_endpgm
217 ; GFX1100-GISEL-LABEL: s_log_f32:
218 ; GFX1100-GISEL: ; %bb.0:
219 ; GFX1100-GISEL-NEXT: s_clause 0x1
220 ; GFX1100-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
221 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
222 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
223 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2
224 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
225 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3
226 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
227 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
228 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
229 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
230 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
231 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
232 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
233 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
234 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
235 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
236 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x41b17218, s3
237 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
238 ; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
239 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
240 ; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
241 ; GFX1100-GISEL-NEXT: s_nop 0
242 ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
243 ; GFX1100-GISEL-NEXT: s_endpgm
245 ; R600-LABEL: s_log_f32:
247 ; R600-NEXT: ALU 23, @4, KC0[CB0:0-32], KC1[]
248 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
251 ; R600-NEXT: ALU clause starting at 4:
252 ; R600-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
253 ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
254 ; R600-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x,
255 ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
256 ; R600-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W,
257 ; R600-NEXT: LOG_IEEE * T0.X, PV.W,
258 ; R600-NEXT: AND_INT * T1.W, PS, literal.x,
259 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
260 ; R600-NEXT: ADD * T2.W, T0.X, -PV.W,
261 ; R600-NEXT: MUL_IEEE * T3.W, PV.W, literal.x,
262 ; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
263 ; R600-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W,
264 ; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
265 ; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W,
266 ; R600-NEXT: 1060204544(6.931152e-01), 0(0.000000e+00)
267 ; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
268 ; R600-NEXT: SETGT * T2.W, literal.y, |T0.X|,
269 ; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
270 ; R600-NEXT: CNDE T1.W, PS, T0.X, PV.W,
271 ; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
272 ; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
273 ; R600-NEXT: ADD T0.X, PV.W, -PS,
274 ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
275 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
277 ; CM-LABEL: s_log_f32:
279 ; CM-NEXT: ALU 26, @4, KC0[CB0:0-32], KC1[]
280 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
283 ; CM-NEXT: ALU clause starting at 4:
284 ; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
285 ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
286 ; CM-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x,
287 ; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
288 ; CM-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W,
289 ; CM-NEXT: LOG_IEEE T0.X, T1.W,
290 ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W,
291 ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W,
292 ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W,
293 ; CM-NEXT: AND_INT * T1.W, PV.X, literal.x,
294 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
295 ; CM-NEXT: ADD * T2.W, T0.X, -PV.W,
296 ; CM-NEXT: MUL_IEEE * T3.W, PV.W, literal.x,
297 ; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
298 ; CM-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W,
299 ; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
300 ; CM-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W,
301 ; CM-NEXT: 1060204544(6.931152e-01), 0(0.000000e+00)
302 ; CM-NEXT: MULADD_IEEE T0.Z, T1.W, literal.x, PV.W,
303 ; CM-NEXT: SETGT * T1.W, literal.y, |T0.X|,
304 ; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
305 ; CM-NEXT: CNDE T0.Z, PV.W, T0.X, PV.Z,
306 ; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
307 ; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
308 ; CM-NEXT: ADD * T0.X, PV.Z, -PV.W,
309 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
310 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
311 %result = call float @llvm.log.f32(float %in)
312 store float %result, ptr addrspace(1) %out
316 ; FIXME: We should be able to merge these packets together on Cayman so we
317 ; have a maximum of 4 instructions.
318 define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
319 ; SI-SDAG-LABEL: s_log_v2f32:
321 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
322 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
323 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
324 ; SI-SDAG-NEXT: s_mov_b32 s8, 0x3377d1cf
325 ; SI-SDAG-NEXT: s_mov_b32 s9, 0x7f800000
326 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
327 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
328 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
329 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v2
330 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
331 ; SI-SDAG-NEXT: s_mov_b32 s3, 0x3f317217
332 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
333 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
334 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2
335 ; SI-SDAG-NEXT: v_fma_f32 v4, v2, s3, -v3
336 ; SI-SDAG-NEXT: v_fma_f32 v4, v2, s8, v4
337 ; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
338 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s9
339 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
340 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0
341 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
342 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
343 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
344 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41b17218
345 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
346 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1
347 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v0
348 ; SI-SDAG-NEXT: v_fma_f32 v4, v0, s3, -v2
349 ; SI-SDAG-NEXT: v_fma_f32 v4, v0, s8, v4
350 ; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
351 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s9
352 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
353 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
354 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
355 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
356 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
357 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
358 ; SI-SDAG-NEXT: s_endpgm
360 ; SI-GISEL-LABEL: s_log_v2f32:
362 ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
363 ; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000
364 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
365 ; SI-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
366 ; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
367 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
368 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6
369 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
370 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
371 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
372 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2
373 ; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
374 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
375 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
376 ; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2
377 ; SI-GISEL-NEXT: v_fma_f32 v5, v2, s0, -v4
378 ; SI-GISEL-NEXT: v_fma_f32 v5, v2, s2, v5
379 ; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
380 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s3
381 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
382 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
383 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
384 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
385 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v0
386 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218
387 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
388 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
389 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1
390 ; SI-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2
391 ; SI-GISEL-NEXT: v_fma_f32 v3, v1, s2, v3
392 ; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
393 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s3
394 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
395 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1]
396 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
397 ; SI-GISEL-NEXT: s_mov_b32 s6, -1
398 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
399 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
400 ; SI-GISEL-NEXT: s_endpgm
402 ; VI-SDAG-LABEL: s_log_v2f32:
404 ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
405 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
406 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
407 ; VI-SDAG-NEXT: s_mov_b32 s2, 0x7f800000
408 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
409 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
410 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
411 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2
412 ; VI-SDAG-NEXT: v_log_f32_e32 v2, v2
413 ; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
414 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3
415 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
416 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4
417 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4
418 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
419 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
420 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
421 ; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
422 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s2
423 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
424 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
425 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
426 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
427 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
428 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41b17218
429 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
430 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1
431 ; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
432 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v2
433 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v4
434 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4
435 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v2
436 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
437 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
438 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
439 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
440 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s2
441 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
442 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
443 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
444 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4
445 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
446 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
447 ; VI-SDAG-NEXT: s_endpgm
449 ; VI-GISEL-LABEL: s_log_v2f32:
451 ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
452 ; VI-GISEL-NEXT: s_mov_b32 s0, 0x800000
453 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000
454 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
455 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
456 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s6
457 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1
458 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc
459 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
460 ; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
461 ; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000
462 ; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v1
463 ; VI-GISEL-NEXT: v_sub_f32_e32 v4, v1, v3
464 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
465 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4
466 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
467 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
468 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
469 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
470 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
471 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s0
472 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
473 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
474 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v0, s[0:1]
475 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
476 ; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
477 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x41b17218
478 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
479 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
480 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v2
481 ; VI-GISEL-NEXT: v_sub_f32_e32 v5, v2, v1
482 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v5
483 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v1
484 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v7, v6
485 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
486 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
487 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
488 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
489 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
490 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, v4
491 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
492 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
493 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
494 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
495 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
496 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
497 ; VI-GISEL-NEXT: s_endpgm
499 ; GFX900-SDAG-LABEL: s_log_v2f32:
500 ; GFX900-SDAG: ; %bb.0:
501 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
502 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
503 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
504 ; GFX900-SDAG-NEXT: s_mov_b32 s2, 0x3f317217
505 ; GFX900-SDAG-NEXT: s_mov_b32 s3, 0x3377d1cf
506 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
507 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
508 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc
509 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3
510 ; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3
511 ; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x7f800000
512 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
513 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v3
514 ; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s2, -v4
515 ; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s3, v5
516 ; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
517 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v3|, s7
518 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1]
519 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
520 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
521 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
522 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
523 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218
524 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
525 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v3, v1
526 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
527 ; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s2, -v3
528 ; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s3, v5
529 ; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
530 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s7
531 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
532 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
533 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3
534 ; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
535 ; GFX900-SDAG-NEXT: s_endpgm
537 ; GFX900-GISEL-LABEL: s_log_v2f32:
538 ; GFX900-GISEL: ; %bb.0:
539 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
540 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000
541 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
542 ; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
543 ; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
544 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
545 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6
546 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
547 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
548 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
549 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v2
550 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
551 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
552 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
553 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2
554 ; GFX900-GISEL-NEXT: v_fma_f32 v5, v2, s0, -v4
555 ; GFX900-GISEL-NEXT: v_fma_f32 v5, v2, s2, v5
556 ; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
557 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s3
558 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
559 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
560 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
561 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
562 ; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0
563 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218
564 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
565 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
566 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1
567 ; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2
568 ; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, s2, v3
569 ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
570 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s3
571 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
572 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1]
573 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
574 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
575 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
576 ; GFX900-GISEL-NEXT: s_endpgm
578 ; GFX1100-SDAG-LABEL: s_log_v2f32:
579 ; GFX1100-SDAG: ; %bb.0:
580 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
581 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
582 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3
583 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s2
584 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
585 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4
586 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5
587 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
588 ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s3, v0 :: v_dual_mul_f32 v1, s2, v1
589 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
590 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
591 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
592 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
593 ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3f317217, v0 :: v_dual_mul_f32 v3, 0x3f317217, v1
594 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
595 ; GFX1100-SDAG-NEXT: v_fma_f32 v4, 0x3f317217, v0, -v2
596 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
597 ; GFX1100-SDAG-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3
598 ; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v4, 0x3377d1cf, v0 :: v_dual_fmac_f32 v5, 0x3377d1cf, v1
599 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
600 ; GFX1100-SDAG-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5
601 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s4
602 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5
603 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
604 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
605 ; GFX1100-SDAG-NEXT: v_dual_cndmask_b32 v2, v1, v3 :: v_dual_mov_b32 v3, 0
606 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
607 ; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5
608 ; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1]
609 ; GFX1100-SDAG-NEXT: s_nop 0
610 ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
611 ; GFX1100-SDAG-NEXT: s_endpgm
613 ; GFX1100-GISEL-LABEL: s_log_v2f32:
614 ; GFX1100-GISEL: ; %bb.0:
615 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
616 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
617 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2
618 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s3
619 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
620 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4
621 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5
622 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
623 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1
624 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
625 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
626 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
627 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
628 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3f317217, v0 :: v_dual_mul_f32 v3, 0x3f317217, v1
629 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
630 ; GFX1100-GISEL-NEXT: v_fma_f32 v4, v0, 0x3f317217, -v2
631 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
632 ; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3
633 ; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v4, 0x3377d1cf, v0 :: v_dual_fmac_f32 v5, 0x3377d1cf, v1
634 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
635 ; GFX1100-GISEL-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5
636 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s4
637 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5
638 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
639 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
640 ; GFX1100-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, v1, v3
641 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
642 ; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
643 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
644 ; GFX1100-GISEL-NEXT: s_nop 0
645 ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
646 ; GFX1100-GISEL-NEXT: s_endpgm
648 ; R600-LABEL: s_log_v2f32:
650 ; R600-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[]
651 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XY, T0.X, 1
654 ; R600-NEXT: ALU clause starting at 4:
655 ; R600-NEXT: SETGT T0.W, literal.x, KC0[3].X,
656 ; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].W,
657 ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
658 ; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x,
659 ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
660 ; R600-NEXT: MUL_IEEE T2.W, KC0[3].X, PV.W,
661 ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x,
662 ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
663 ; R600-NEXT: MUL_IEEE T3.W, KC0[2].W, PS,
664 ; R600-NEXT: LOG_IEEE * T0.X, PV.W,
665 ; R600-NEXT: AND_INT T2.W, PS, literal.x,
666 ; R600-NEXT: LOG_IEEE * T0.Y, PV.W,
667 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
668 ; R600-NEXT: ADD T3.W, T0.X, -PV.W,
669 ; R600-NEXT: AND_INT * T4.W, PS, literal.x,
670 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
671 ; R600-NEXT: ADD T5.W, T0.Y, -PS,
672 ; R600-NEXT: MUL_IEEE * T6.W, PV.W, literal.x,
673 ; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
674 ; R600-NEXT: MULADD_IEEE T6.W, T2.W, literal.x, PS,
675 ; R600-NEXT: MUL_IEEE * T7.W, PV.W, literal.x,
676 ; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
677 ; R600-NEXT: MULADD_IEEE T7.W, T4.W, literal.x, PS,
678 ; R600-NEXT: MULADD_IEEE * T3.W, T3.W, literal.y, PV.W, BS:VEC_021/SCL_122
679 ; R600-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01)
680 ; R600-NEXT: MULADD_IEEE T0.Z, T2.W, literal.x, PS,
681 ; R600-NEXT: SETGT T2.W, literal.y, |T0.X|,
682 ; R600-NEXT: MULADD_IEEE * T3.W, T5.W, literal.x, PV.W, BS:VEC_021/SCL_122
683 ; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
684 ; R600-NEXT: MULADD_IEEE T1.Y, T4.W, literal.x, PS,
685 ; R600-NEXT: SETGT T1.Z, literal.y, |T0.Y|,
686 ; R600-NEXT: CNDE T2.W, PV.W, T0.X, PV.Z,
687 ; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z,
688 ; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
689 ; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
690 ; R600-NEXT: ADD T2.Y, PV.W, -PS,
691 ; R600-NEXT: CNDE T0.W, PV.Z, T0.Y, PV.Y,
692 ; R600-NEXT: CNDE * T1.W, T1.W, 0.0, literal.x,
693 ; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
694 ; R600-NEXT: ADD T2.X, PV.W, -PS,
695 ; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
696 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
698 ; CM-LABEL: s_log_v2f32:
700 ; CM-NEXT: ALU 47, @4, KC0[CB0:0-32], KC1[]
701 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
704 ; CM-NEXT: ALU clause starting at 4:
705 ; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].X,
706 ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
707 ; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x,
708 ; CM-NEXT: SETGT * T1.W, literal.y, KC0[2].W,
709 ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
710 ; CM-NEXT: CNDE T1.Z, PV.W, 1.0, literal.x,
711 ; CM-NEXT: MUL_IEEE * T2.W, KC0[3].X, PV.Z,
712 ; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
713 ; CM-NEXT: LOG_IEEE T0.X, T2.W,
714 ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W,
715 ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
716 ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
717 ; CM-NEXT: AND_INT T0.Z, PV.X, literal.x,
718 ; CM-NEXT: MUL_IEEE * T2.W, KC0[2].W, T1.Z,
719 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
720 ; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W,
721 ; CM-NEXT: LOG_IEEE T0.Y, T2.W,
722 ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
723 ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
724 ; CM-NEXT: ADD T1.Z, T0.X, -T0.Z,
725 ; CM-NEXT: AND_INT * T2.W, PV.Y, literal.x,
726 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
727 ; CM-NEXT: ADD T2.Z, T0.Y, -PV.W,
728 ; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.x,
729 ; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
730 ; CM-NEXT: MULADD_IEEE T3.Z, T0.Z, literal.x, PV.W,
731 ; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.x,
732 ; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
733 ; CM-NEXT: MULADD_IEEE T4.Z, T2.W, literal.x, PV.W,
734 ; CM-NEXT: MULADD_IEEE * T3.W, T1.Z, literal.y, PV.Z,
735 ; CM-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01)
736 ; CM-NEXT: MULADD_IEEE T1.Y, T0.Z, literal.x, PV.W,
737 ; CM-NEXT: SETGT T0.Z, literal.y, |T0.X|,
738 ; CM-NEXT: MULADD_IEEE * T3.W, T2.Z, literal.x, PV.Z, BS:VEC_120/SCL_212
739 ; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
740 ; CM-NEXT: MULADD_IEEE T1.X, T2.W, literal.x, PV.W,
741 ; CM-NEXT: SETGT T2.Y, literal.y, |T0.Y|,
742 ; CM-NEXT: CNDE T0.Z, PV.Z, T0.X, PV.Y,
743 ; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z, BS:VEC_120/SCL_212
744 ; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
745 ; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
746 ; CM-NEXT: ADD T1.Y, PV.Z, -PV.W,
747 ; CM-NEXT: CNDE T0.Z, PV.Y, T0.Y, PV.X,
748 ; CM-NEXT: CNDE * T0.W, T1.W, 0.0, literal.x,
749 ; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
750 ; CM-NEXT: ADD * T1.X, PV.Z, -PV.W,
751 ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
752 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
753 %result = call <2 x float> @llvm.log.v2f32(<2 x float> %in)
754 store <2 x float> %result, ptr addrspace(1) %out
758 define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
759 ; SI-SDAG-LABEL: s_log_v3f32:
761 ; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
762 ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
763 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
764 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
765 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
766 ; SI-SDAG-NEXT: s_mov_b32 s11, 0x3377d1cf
767 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0
768 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
769 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, s9, v1
770 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
771 ; SI-SDAG-NEXT: s_mov_b32 s9, 0x3f317217
772 ; SI-SDAG-NEXT: s_mov_b32 s12, 0x7f800000
773 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
774 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v1
775 ; SI-SDAG-NEXT: v_fma_f32 v4, v1, s9, -v3
776 ; SI-SDAG-NEXT: v_fma_f32 v4, v1, s11, v4
777 ; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
778 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s12
779 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
780 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0
781 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1]
782 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, s8, v3
783 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
784 ; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218
785 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
786 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0
787 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
788 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5
789 ; SI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v3
790 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, s10, v0
791 ; SI-SDAG-NEXT: v_fma_f32 v6, v3, s9, -v5
792 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v0
793 ; SI-SDAG-NEXT: v_fma_f32 v6, v3, s11, v6
794 ; SI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6
795 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s12
796 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[2:3]
797 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
798 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3
799 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2
800 ; SI-SDAG-NEXT: v_fma_f32 v5, v2, s9, -v3
801 ; SI-SDAG-NEXT: v_fma_f32 v5, v2, s11, v5
802 ; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
803 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s12
804 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
805 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
806 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
807 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
808 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
809 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
810 ; SI-SDAG-NEXT: s_endpgm
812 ; SI-GISEL-LABEL: s_log_v3f32:
814 ; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
815 ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
816 ; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000
817 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
818 ; SI-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
819 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
820 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8
821 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
822 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
823 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
824 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
825 ; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
826 ; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
827 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
828 ; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0
829 ; SI-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v4
830 ; SI-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
831 ; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
832 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
833 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
834 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
835 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v2, s[0:1]
836 ; SI-GISEL-NEXT: v_mul_f32_e32 v4, s9, v4
837 ; SI-GISEL-NEXT: v_log_f32_e32 v4, v4
838 ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
839 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
840 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
841 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
842 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
843 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
844 ; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v4
845 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1
846 ; SI-GISEL-NEXT: v_fma_f32 v9, v4, v3, -v8
847 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v1
848 ; SI-GISEL-NEXT: v_fma_f32 v9, v4, s2, v9
849 ; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
850 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s3
851 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v8, s[2:3]
852 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
853 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
854 ; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2
855 ; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf
856 ; SI-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v4
857 ; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
858 ; SI-GISEL-NEXT: v_fma_f32 v3, v2, v6, v3
859 ; SI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
860 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
861 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
862 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc
863 ; SI-GISEL-NEXT: s_mov_b32 s6, -1
864 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
865 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
866 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
867 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
868 ; SI-GISEL-NEXT: s_endpgm
870 ; VI-SDAG-LABEL: s_log_v3f32:
872 ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
873 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
874 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
875 ; VI-SDAG-NEXT: s_mov_b32 s8, 0x7f800000
876 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
877 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
878 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
879 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2
880 ; VI-SDAG-NEXT: v_log_f32_e32 v2, v2
881 ; VI-SDAG-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24
882 ; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
883 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3
884 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
885 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4
886 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4
887 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
888 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
889 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
890 ; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
891 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s8
892 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
893 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v0
894 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
895 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, s5, v3
896 ; VI-SDAG-NEXT: v_log_f32_e32 v3, v3
897 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218
898 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
899 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5
900 ; VI-SDAG-NEXT: v_and_b32_e32 v5, 0xfffff000, v3
901 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
902 ; VI-SDAG-NEXT: v_sub_f32_e32 v6, v3, v5
903 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
904 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6
905 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6
906 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v5
907 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
908 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
909 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
910 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6
911 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
912 ; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6
913 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s8
914 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3]
915 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
916 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
917 ; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v0
918 ; VI-SDAG-NEXT: v_sub_f32_e32 v5, v0, v3
919 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v5
920 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v5
921 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v3
922 ; VI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5
923 ; VI-SDAG-NEXT: v_add_f32_e32 v5, v6, v5
924 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
925 ; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
926 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s8
927 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1]
928 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
929 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3
930 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
931 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s6
932 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s7
933 ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
934 ; VI-SDAG-NEXT: s_endpgm
936 ; VI-GISEL-LABEL: s_log_v3f32:
938 ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
939 ; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
940 ; VI-GISEL-NEXT: s_mov_b32 s2, 0x800000
941 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
942 ; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000
943 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
944 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
945 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0
946 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
947 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
948 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
949 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
950 ; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v0
951 ; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v3
952 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
953 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4
954 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
955 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
956 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
957 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
958 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
959 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
960 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1]
961 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2
962 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
963 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, s5, v3
964 ; VI-GISEL-NEXT: v_log_f32_e32 v3, v3
965 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41b17218
966 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
967 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6
968 ; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3
969 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
970 ; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6
971 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
972 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7
973 ; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6
974 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
975 ; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
976 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7
977 ; VI-GISEL-NEXT: v_log_f32_e32 v2, v1
978 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
979 ; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
980 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
981 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
982 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v4
983 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v6, s[2:3]
984 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1]
985 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
986 ; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
987 ; VI-GISEL-NEXT: v_sub_f32_e32 v6, v2, v3
988 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v6
989 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v3
990 ; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7
991 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
992 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
993 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
994 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
995 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v4
996 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
997 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
998 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
999 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s8
1000 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s9
1001 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
1002 ; VI-GISEL-NEXT: s_endpgm
1004 ; GFX900-SDAG-LABEL: s_log_v3f32:
1005 ; GFX900-SDAG: ; %bb.0:
1006 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1007 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
1008 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
1009 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
1010 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1011 ; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x3377d1cf
1012 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
1013 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
1014 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2
1015 ; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2
1016 ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x3f317217
1017 ; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x7f800000
1018 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0
1019 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2
1020 ; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s6, -v4
1021 ; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s7, v5
1022 ; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
1023 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s10
1024 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
1025 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v0
1026 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v1, s[0:1]
1027 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4
1028 ; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4
1029 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218
1030 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
1031 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
1032 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
1033 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6
1034 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317217, v4
1035 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
1036 ; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s6, -v6
1037 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
1038 ; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s7, v7
1039 ; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v6, v7
1040 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s10
1041 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v4, v6, s[2:3]
1042 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
1043 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4
1044 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0
1045 ; GFX900-SDAG-NEXT: v_fma_f32 v6, v0, s6, -v4
1046 ; GFX900-SDAG-NEXT: v_fma_f32 v6, v0, s7, v6
1047 ; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
1048 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s10
1049 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
1050 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc
1051 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4
1052 ; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9]
1053 ; GFX900-SDAG-NEXT: s_endpgm
1055 ; GFX900-GISEL-LABEL: s_log_v3f32:
1056 ; GFX900-GISEL: ; %bb.0:
1057 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1058 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
1059 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000
1060 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1061 ; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
1062 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1063 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
1064 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
1065 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
1066 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
1067 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
1068 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
1069 ; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
1070 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
1071 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0
1072 ; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v4
1073 ; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
1074 ; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
1075 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
1076 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
1077 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v1
1078 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v2, s[0:1]
1079 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4
1080 ; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v4
1081 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
1082 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
1083 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v1
1084 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
1085 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
1086 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
1087 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v4
1088 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
1089 ; GFX900-GISEL-NEXT: v_fma_f32 v9, v4, v3, -v8
1090 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1
1091 ; GFX900-GISEL-NEXT: v_fma_f32 v9, v4, s2, v9
1092 ; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
1093 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s3
1094 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v8, s[2:3]
1095 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
1096 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
1097 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2
1098 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf
1099 ; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v4
1100 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
1101 ; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v6, v3
1102 ; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
1103 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
1104 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
1105 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc
1106 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
1107 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
1108 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9]
1109 ; GFX900-GISEL-NEXT: s_endpgm
1111 ; GFX1100-SDAG-LABEL: s_log_v3f32:
1112 ; GFX1100-SDAG: ; %bb.0:
1113 ; GFX1100-SDAG-NEXT: s_clause 0x1
1114 ; GFX1100-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x34
1115 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1116 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1117 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s6
1118 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s5
1119 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s4
1120 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1121 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
1122 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
1123 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
1124 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7
1125 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s2
1126 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s3
1127 ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s6, v0 :: v_dual_mul_f32 v1, s5, v1
1128 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1129 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
1130 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, s4, v2
1131 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
1132 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
1133 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
1134 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2
1135 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1
1136 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
1137 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
1138 ; GFX1100-SDAG-NEXT: v_fma_f32 v6, 0x3f317217, v0, -v3
1139 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
1140 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v2
1141 ; GFX1100-SDAG-NEXT: v_fma_f32 v7, 0x3f317217, v1, -v4
1142 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v6, 0x3377d1cf, v0
1143 ; GFX1100-SDAG-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5
1144 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1145 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v7, 0x3377d1cf, v1
1146 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v3, v3, v6
1147 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7
1148 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1149 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
1150 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1151 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
1152 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1153 ; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v8, 0x3377d1cf, v2 :: v_dual_cndmask_b32 v1, v1, v4
1154 ; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_mov_b32 v4, 0
1155 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
1156 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1157 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v1, v10
1158 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc_lo
1159 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v0, v9
1160 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
1161 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v6
1162 ; GFX1100-SDAG-NEXT: global_store_b96 v4, v[0:2], s[0:1]
1163 ; GFX1100-SDAG-NEXT: s_nop 0
1164 ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1165 ; GFX1100-SDAG-NEXT: s_endpgm
1167 ; GFX1100-GISEL-LABEL: s_log_v3f32:
1168 ; GFX1100-GISEL: ; %bb.0:
1169 ; GFX1100-GISEL-NEXT: s_clause 0x1
1170 ; GFX1100-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x34
1171 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1172 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1173 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s4
1174 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s5
1175 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s6
1176 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1177 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
1178 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
1179 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
1180 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7
1181 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s3
1182 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s2
1183 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1
1184 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1185 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
1186 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
1187 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
1188 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
1189 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
1190 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
1191 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1
1192 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
1193 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
1194 ; GFX1100-GISEL-NEXT: v_fma_f32 v6, v0, 0x3f317217, -v3
1195 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
1196 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v2
1197 ; GFX1100-GISEL-NEXT: v_fma_f32 v7, 0x3f317217, v1, -v4
1198 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v6, 0x3377d1cf, v0
1199 ; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5
1200 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1201 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v7, 0x3377d1cf, v1
1202 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
1203 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7
1204 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1205 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v4, v4, v7
1206 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1207 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
1208 ; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v8, 0x3377d1cf, v2 :: v_dual_mov_b32 v3, 0
1209 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1210 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
1211 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
1212 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v10
1213 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
1214 ; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_sub_f32 v0, v0, v9
1215 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
1216 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1217 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
1218 ; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1]
1219 ; GFX1100-GISEL-NEXT: s_nop 0
1220 ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1221 ; GFX1100-GISEL-NEXT: s_endpgm
1223 ; R600-LABEL: s_log_v3f32:
1225 ; R600-NEXT: ALU 62, @4, KC0[CB0:0-32], KC1[]
1226 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
1227 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
1229 ; R600-NEXT: ALU clause starting at 4:
1230 ; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z,
1231 ; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].Y,
1232 ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
1233 ; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x,
1234 ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
1235 ; R600-NEXT: MUL_IEEE T2.W, KC0[3].Z, PV.W,
1236 ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x,
1237 ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
1238 ; R600-NEXT: MUL_IEEE T0.Z, KC0[3].Y, PS,
1239 ; R600-NEXT: SETGT T3.W, literal.x, KC0[3].W,
1240 ; R600-NEXT: LOG_IEEE * T0.X, PV.W,
1241 ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
1242 ; R600-NEXT: AND_INT T1.Z, PS, literal.x,
1243 ; R600-NEXT: CNDE T2.W, PV.W, 1.0, literal.y,
1244 ; R600-NEXT: LOG_IEEE * T0.Y, PV.Z,
1245 ; R600-NEXT: -4096(nan), 1333788672(4.294967e+09)
1246 ; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, PV.W,
1247 ; R600-NEXT: ADD T2.W, T0.X, -PV.Z,
1248 ; R600-NEXT: AND_INT * T4.W, PS, literal.x,
1249 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
1250 ; R600-NEXT: ADD T2.Z, T0.Y, -PS,
1251 ; R600-NEXT: MUL_IEEE T5.W, PV.W, literal.x,
1252 ; R600-NEXT: LOG_IEEE * T0.Z, PV.Z,
1253 ; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
1254 ; R600-NEXT: MULADD_IEEE T3.Z, T1.Z, literal.x, PV.W,
1255 ; R600-NEXT: AND_INT T5.W, PS, literal.y,
1256 ; R600-NEXT: MUL_IEEE * T6.W, PV.Z, literal.x,
1257 ; R600-NEXT: 939916788(3.194618e-05), -4096(nan)
1258 ; R600-NEXT: MULADD_IEEE T4.Z, T4.W, literal.x, PS,
1259 ; R600-NEXT: ADD T6.W, T0.Z, -PV.W,
1260 ; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.y, PV.Z, BS:VEC_021/SCL_122
1261 ; R600-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01)
1262 ; R600-NEXT: MULADD_IEEE T1.Y, T1.Z, literal.x, PS,
1263 ; R600-NEXT: SETGT T1.Z, literal.y, |T0.X|,
1264 ; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.z,
1265 ; R600-NEXT: MULADD_IEEE * T7.W, T2.Z, literal.x, PV.Z, BS:VEC_021/SCL_122
1266 ; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
1267 ; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
1268 ; R600-NEXT: MULADD_IEEE T1.X, T4.W, literal.x, PS,
1269 ; R600-NEXT: SETGT T2.Y, literal.y, |T0.Y|,
1270 ; R600-NEXT: MULADD_IEEE T2.Z, T5.W, literal.z, PV.W, BS:VEC_120/SCL_212
1271 ; R600-NEXT: CNDE T2.W, PV.Z, T0.X, PV.Y,
1272 ; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.w,
1273 ; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
1274 ; R600-NEXT: 939916788(3.194618e-05), 1102148120(2.218071e+01)
1275 ; R600-NEXT: ADD T1.Y, PV.W, -PS,
1276 ; R600-NEXT: MULADD_IEEE T1.Z, T6.W, literal.x, PV.Z,
1277 ; R600-NEXT: CNDE T0.W, PV.Y, T0.Y, PV.X,
1278 ; R600-NEXT: CNDE * T1.W, T1.W, 0.0, literal.y,
1279 ; R600-NEXT: 1060204544(6.931152e-01), 1102148120(2.218071e+01)
1280 ; R600-NEXT: ADD T1.X, PV.W, -PS,
1281 ; R600-NEXT: MULADD_IEEE T0.W, T5.W, literal.x, PV.Z,
1282 ; R600-NEXT: SETGT * T1.W, literal.y, |T0.Z|,
1283 ; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
1284 ; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
1285 ; R600-NEXT: CNDE T0.W, PS, T0.Z, PV.W,
1286 ; R600-NEXT: CNDE * T1.W, T3.W, 0.0, literal.y,
1287 ; R600-NEXT: 2(2.802597e-45), 1102148120(2.218071e+01)
1288 ; R600-NEXT: ADD T2.X, PV.W, -PS,
1289 ; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
1290 ; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00)
1291 ; R600-NEXT: LSHR * T3.X, PV.W, literal.x,
1292 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1294 ; CM-LABEL: s_log_v3f32:
1296 ; CM-NEXT: ALU 68, @4, KC0[CB0:0-32], KC1[]
1297 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T2.X
1298 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4.X, T1.X
1300 ; CM-NEXT: ALU clause starting at 4:
1301 ; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].Y,
1302 ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
1303 ; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x,
1304 ; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
1305 ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
1306 ; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x,
1307 ; CM-NEXT: SETGT T1.Z, literal.y, KC0[3].Z,
1308 ; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Y, PV.Z,
1309 ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
1310 ; CM-NEXT: LOG_IEEE T0.X, T2.W,
1311 ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W,
1312 ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
1313 ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
1314 ; CM-NEXT: CNDE T1.Y, T1.Z, 1.0, literal.x,
1315 ; CM-NEXT: AND_INT T0.Z, PV.X, literal.y,
1316 ; CM-NEXT: MUL_IEEE * T2.W, KC0[3].W, T0.Y,
1317 ; CM-NEXT: 1333788672(4.294967e+09), -4096(nan)
1318 ; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W,
1319 ; CM-NEXT: LOG_IEEE T0.Y, T2.W,
1320 ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
1321 ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
1322 ; CM-NEXT: ADD T2.Y, T0.X, -T0.Z,
1323 ; CM-NEXT: AND_INT T2.Z, PV.Y, literal.x,
1324 ; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Z, T1.Y,
1325 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
1326 ; CM-NEXT: LOG_IEEE T1.X, T2.W,
1327 ; CM-NEXT: LOG_IEEE T1.Y (MASKED), T2.W,
1328 ; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.W,
1329 ; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.W,
1330 ; CM-NEXT: ADD T1.Y, T0.Y, -T2.Z,
1331 ; CM-NEXT: AND_INT T3.Z, PV.X, literal.x,
1332 ; CM-NEXT: MUL_IEEE * T2.W, T2.Y, literal.y, BS:VEC_120/SCL_212
1333 ; CM-NEXT: -4096(nan), 939916788(3.194618e-05)
1334 ; CM-NEXT: MULADD_IEEE T3.Y, T0.Z, literal.x, PV.W,
1335 ; CM-NEXT: ADD T4.Z, T1.X, -PV.Z,
1336 ; CM-NEXT: MUL_IEEE * T2.W, PV.Y, literal.x,
1337 ; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
1338 ; CM-NEXT: MULADD_IEEE T4.Y, T2.Z, literal.x, PV.W,
1339 ; CM-NEXT: MUL_IEEE T5.Z, PV.Z, literal.x,
1340 ; CM-NEXT: MULADD_IEEE * T2.W, T2.Y, literal.y, PV.Y,
1341 ; CM-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01)
1342 ; CM-NEXT: MULADD_IEEE T2.Y, T0.Z, literal.x, PV.W,
1343 ; CM-NEXT: MULADD_IEEE T0.Z, T3.Z, literal.y, PV.Z, BS:VEC_120/SCL_212
1344 ; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.x, PV.Y,
1345 ; CM-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05)
1346 ; CM-NEXT: SETGT T2.X, literal.x, |T0.X|,
1347 ; CM-NEXT: MULADD_IEEE T1.Y, T2.Z, literal.y, PV.W,
1348 ; CM-NEXT: SETGT T2.Z, literal.x, |T0.Y|,
1349 ; CM-NEXT: MULADD_IEEE * T2.W, T4.Z, literal.y, PV.Z, BS:VEC_120/SCL_212
1350 ; CM-NEXT: 2139095040(INF), 1060204544(6.931152e-01)
1351 ; CM-NEXT: MULADD_IEEE T3.X, T3.Z, literal.x, PV.W,
1352 ; CM-NEXT: SETGT T3.Y, literal.y, |T1.X|,
1353 ; CM-NEXT: CNDE T0.Z, PV.Z, T0.Y, PV.Y,
1354 ; CM-NEXT: CNDE * T1.W, T1.W, 0.0, literal.z,
1355 ; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
1356 ; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
1357 ; CM-NEXT: ADD T4.X, PV.Z, -PV.W,
1358 ; CM-NEXT: CNDE T0.Y, PV.Y, T1.X, PV.X,
1359 ; CM-NEXT: CNDE T0.Z, T1.Z, 0.0, literal.x,
1360 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
1361 ; CM-NEXT: 1102148120(2.218071e+01), 8(1.121039e-44)
1362 ; CM-NEXT: LSHR T1.X, PV.W, literal.x,
1363 ; CM-NEXT: ADD T0.Y, PV.Y, -PV.Z,
1364 ; CM-NEXT: CNDE T0.Z, T2.X, T0.X, T2.Y,
1365 ; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.y,
1366 ; CM-NEXT: 2(2.802597e-45), 1102148120(2.218071e+01)
1367 ; CM-NEXT: ADD * T0.X, PV.Z, -PV.W,
1368 ; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
1369 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1370 %result = call <3 x float> @llvm.log.v3f32(<3 x float> %in)
1371 store <3 x float> %result, ptr addrspace(1) %out
1375 ; FIXME: We should be able to merge these packets together on Cayman so we
1376 ; have a maximum of 4 instructions.
1377 define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
1378 ; SI-SDAG-LABEL: s_log_v4f32:
1380 ; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
1381 ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
1382 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
1383 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
1384 ; SI-SDAG-NEXT: s_mov_b32 s12, 0x3377d1cf
1385 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1386 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0
1387 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
1388 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2
1389 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
1390 ; SI-SDAG-NEXT: s_mov_b32 s11, 0x3f317217
1391 ; SI-SDAG-NEXT: s_mov_b32 s13, 0x7f800000
1392 ; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218
1393 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2
1394 ; SI-SDAG-NEXT: v_fma_f32 v4, v2, s11, -v3
1395 ; SI-SDAG-NEXT: v_fma_f32 v4, v2, s12, v4
1396 ; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
1397 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s13
1398 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
1399 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0
1400 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
1401 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3
1402 ; SI-SDAG-NEXT: v_log_f32_e32 v4, v3
1403 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
1404 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3
1405 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0
1406 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v4
1407 ; SI-SDAG-NEXT: v_fma_f32 v6, v4, s11, -v2
1408 ; SI-SDAG-NEXT: v_fma_f32 v6, v4, s12, v6
1409 ; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6
1410 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc
1411 ; SI-SDAG-NEXT: v_mul_f32_e32 v6, s9, v6
1412 ; SI-SDAG-NEXT: v_log_f32_e32 v6, v6
1413 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s13
1414 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
1415 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
1416 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0
1417 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
1418 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4
1419 ; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v6
1420 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0
1421 ; SI-SDAG-NEXT: v_fma_f32 v7, v6, s11, -v4
1422 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
1423 ; SI-SDAG-NEXT: v_fma_f32 v7, v6, s12, v7
1424 ; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
1425 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s13
1426 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3]
1427 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc
1428 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4
1429 ; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0
1430 ; SI-SDAG-NEXT: v_fma_f32 v6, v0, s11, -v4
1431 ; SI-SDAG-NEXT: v_fma_f32 v6, v0, s12, v6
1432 ; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
1433 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s13
1434 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1435 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
1436 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
1437 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
1438 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4
1439 ; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1440 ; SI-SDAG-NEXT: s_endpgm
1442 ; SI-GISEL-LABEL: s_log_v4f32:
1443 ; SI-GISEL: ; %bb.0:
1444 ; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
1445 ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
1446 ; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000
1447 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
1448 ; SI-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
1449 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1450 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8
1451 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
1452 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
1453 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
1454 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
1455 ; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
1456 ; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
1457 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
1458 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
1459 ; SI-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v1
1460 ; SI-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
1461 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
1462 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
1463 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
1464 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
1465 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
1466 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
1467 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
1468 ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
1469 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
1470 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3f317217
1471 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
1472 ; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v1
1473 ; SI-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8
1474 ; SI-GISEL-NEXT: v_fma_f32 v9, v1, s2, v9
1475 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
1476 ; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
1477 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc
1478 ; SI-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9
1479 ; SI-GISEL-NEXT: v_log_f32_e32 v9, v9
1480 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s3
1481 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3]
1482 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1]
1483 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
1484 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
1485 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
1486 ; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v9
1487 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
1488 ; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf
1489 ; SI-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8
1490 ; SI-GISEL-NEXT: v_log_f32_e32 v3, v2
1491 ; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
1492 ; SI-GISEL-NEXT: v_fma_f32 v10, v9, v6, v10
1493 ; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v10
1494 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v5
1495 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3]
1496 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
1497 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
1498 ; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3
1499 ; SI-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8
1500 ; SI-GISEL-NEXT: v_fma_f32 v4, v3, v6, v4
1501 ; SI-GISEL-NEXT: v_add_f32_e32 v4, v8, v4
1502 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5
1503 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
1504 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
1505 ; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
1506 ; SI-GISEL-NEXT: s_mov_b32 s6, -1
1507 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
1508 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1509 ; SI-GISEL-NEXT: s_endpgm
1511 ; VI-SDAG-LABEL: s_log_v4f32:
1513 ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1514 ; VI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
1515 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
1516 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
1517 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1518 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
1519 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
1520 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2
1521 ; VI-SDAG-NEXT: v_log_f32_e32 v2, v2
1522 ; VI-SDAG-NEXT: s_mov_b32 s7, 0x7f800000
1523 ; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
1524 ; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3
1525 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
1526 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4
1527 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4
1528 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
1529 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
1530 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
1531 ; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
1532 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s7
1533 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
1534 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
1535 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
1536 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v3
1537 ; VI-SDAG-NEXT: v_log_f32_e32 v4, v3
1538 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218
1539 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
1540 ; VI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3
1541 ; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v4
1542 ; VI-SDAG-NEXT: v_sub_f32_e32 v6, v4, v2
1543 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6
1544 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6
1545 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v2
1546 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
1547 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6
1548 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
1549 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
1550 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6
1551 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc
1552 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6
1553 ; VI-SDAG-NEXT: v_log_f32_e32 v6, v6
1554 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s7
1555 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
1556 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
1557 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4
1558 ; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v6
1559 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s4, v0
1560 ; VI-SDAG-NEXT: v_sub_f32_e32 v7, v6, v4
1561 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
1562 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3f317000, v7
1563 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v7
1564 ; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v4
1565 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
1566 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v9, v7
1567 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
1568 ; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7
1569 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
1570 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
1571 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s7
1572 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3]
1573 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc
1574 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4
1575 ; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v0
1576 ; VI-SDAG-NEXT: v_sub_f32_e32 v6, v0, v4
1577 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6
1578 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6
1579 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v4
1580 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
1581 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6
1582 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
1583 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
1584 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s7
1585 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1586 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
1587 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4
1588 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s8
1589 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s9
1590 ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
1591 ; VI-SDAG-NEXT: s_endpgm
1593 ; VI-GISEL-LABEL: s_log_v4f32:
1594 ; VI-GISEL: ; %bb.0:
1595 ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1596 ; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
1597 ; VI-GISEL-NEXT: s_mov_b32 s2, 0x800000
1598 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1599 ; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000
1600 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1601 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
1602 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0
1603 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
1604 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
1605 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
1606 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x800000
1607 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
1608 ; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v1
1609 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v1
1610 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4
1611 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
1612 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
1613 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
1614 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
1615 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
1616 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
1617 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
1618 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v3
1619 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1]
1620 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1
1621 ; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
1622 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41b17218
1623 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
1624 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6
1625 ; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v1
1626 ; VI-GISEL-NEXT: v_sub_f32_e32 v7, v1, v6
1627 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7
1628 ; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6
1629 ; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
1630 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7
1631 ; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
1632 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
1633 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3
1634 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
1635 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 1.0, v2, vcc
1636 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v7
1637 ; VI-GISEL-NEXT: v_log_f32_e32 v7, v7
1638 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
1639 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v4
1640 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3]
1641 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v5, s[0:1]
1642 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
1643 ; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v7
1644 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v3
1645 ; VI-GISEL-NEXT: v_sub_f32_e32 v8, v7, v6
1646 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1]
1647 ; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v8
1648 ; VI-GISEL-NEXT: v_mul_f32_e32 v10, 0x3805fdf4, v6
1649 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
1650 ; VI-GISEL-NEXT: v_add_f32_e32 v9, v10, v9
1651 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317000, v8
1652 ; VI-GISEL-NEXT: v_log_f32_e32 v3, v2
1653 ; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
1654 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
1655 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
1656 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, v4
1657 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v6, s[2:3]
1658 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
1659 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
1660 ; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3
1661 ; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6
1662 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7
1663 ; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6
1664 ; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
1665 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7
1666 ; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
1667 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
1668 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
1669 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v4
1670 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
1671 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
1672 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
1673 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s8
1674 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s9
1675 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
1676 ; VI-GISEL-NEXT: s_endpgm
1678 ; GFX900-SDAG-LABEL: s_log_v4f32:
1679 ; GFX900-SDAG: ; %bb.0:
1680 ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1681 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
1682 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
1683 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
1684 ; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x3377d1cf
1685 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1686 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
1687 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
1688 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2
1689 ; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2
1690 ; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x3f317217
1691 ; GFX900-SDAG-NEXT: s_mov_b32 s11, 0x7f800000
1692 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x41b17218
1693 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2
1694 ; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s7, -v3
1695 ; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s10, v5
1696 ; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
1697 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s11
1698 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
1699 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
1700 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
1701 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s6, v3
1702 ; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v3
1703 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc
1704 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3
1705 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
1706 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v5
1707 ; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s7, -v2
1708 ; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s10, v7
1709 ; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v7
1710 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc
1711 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s5, v7
1712 ; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v7
1713 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s11
1714 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[2:3]
1715 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1]
1716 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s4, v0
1717 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
1718 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5
1719 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v7
1720 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
1721 ; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s7, -v5
1722 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
1723 ; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s10, v8
1724 ; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v8
1725 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, s11
1726 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[2:3]
1727 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v6, vcc
1728 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5
1729 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v0
1730 ; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s7, -v5
1731 ; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s10, v7
1732 ; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v7
1733 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s11
1734 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
1735 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1]
1736 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
1737 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5
1738 ; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9]
1739 ; GFX900-SDAG-NEXT: s_endpgm
1741 ; GFX900-GISEL-LABEL: s_log_v4f32:
1742 ; GFX900-GISEL: ; %bb.0:
1743 ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
1744 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
1745 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000
1746 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
1747 ; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
1748 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1749 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
1750 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
1751 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
1752 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
1753 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
1754 ; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
1755 ; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
1756 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
1757 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
1758 ; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v1
1759 ; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
1760 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
1761 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
1762 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
1763 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2
1764 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
1765 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1
1766 ; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1
1767 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
1768 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
1769 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3f317217
1770 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
1771 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v1
1772 ; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8
1773 ; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, s2, v9
1774 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
1775 ; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
1776 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc
1777 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v9, s6, v9
1778 ; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v9
1779 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s3
1780 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3]
1781 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1]
1782 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
1783 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
1784 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
1785 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v9
1786 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
1787 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf
1788 ; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8
1789 ; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2
1790 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
1791 ; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v6, v10
1792 ; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v10
1793 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v5
1794 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3]
1795 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
1796 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
1797 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3
1798 ; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8
1799 ; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v6, v4
1800 ; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v8, v4
1801 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5
1802 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
1803 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
1804 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
1805 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
1806 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9]
1807 ; GFX900-GISEL-NEXT: s_endpgm
1809 ; GFX1100-SDAG-LABEL: s_log_v4f32:
1810 ; GFX1100-SDAG: ; %bb.0:
1811 ; GFX1100-SDAG-NEXT: s_clause 0x1
1812 ; GFX1100-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x34
1813 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1814 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1815 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s7
1816 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s6
1817 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s5
1818 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s4
1819 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1820 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
1821 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
1822 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1823 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8
1824 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9
1825 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s2
1826 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
1827 ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s7, v0 :: v_dual_mul_f32 v1, s6, v1
1828 ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s5, v2 :: v_dual_mul_f32 v3, s4, v3
1829 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s3
1830 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1831 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
1832 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
1833 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3)
1834 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2
1835 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3
1836 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v14, 0, 0x41b17218, s8
1837 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v15, 0, 0x41b17218, s9
1838 ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_mul_f32 v6, 0x3f317217, v1
1839 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
1840 ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v7, 0x3f317217, v2 :: v_dual_mul_f32 v8, 0x3f317217, v3
1841 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
1842 ; GFX1100-SDAG-NEXT: v_fma_f32 v10, 0x3f317217, v0, -v5
1843 ; GFX1100-SDAG-NEXT: v_fma_f32 v11, 0x3f317217, v1, -v6
1844 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1845 ; GFX1100-SDAG-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7
1846 ; GFX1100-SDAG-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8
1847 ; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v10, 0x3377d1cf, v0 :: v_dual_fmac_f32 v11, 0x3377d1cf, v1
1848 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1849 ; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3
1850 ; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11
1851 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1852 ; GFX1100-SDAG-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13
1853 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1854 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
1855 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
1856 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo
1857 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
1858 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v5, v2, v7, vcc_lo
1859 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
1860 ; GFX1100-SDAG-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_sub_f32 v2, v1, v9
1861 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v6, v3, v8, vcc_lo
1862 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v3, v0, v4
1863 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
1864 ; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15
1865 ; GFX1100-SDAG-NEXT: global_store_b128 v7, v[0:3], s[0:1]
1866 ; GFX1100-SDAG-NEXT: s_nop 0
1867 ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1868 ; GFX1100-SDAG-NEXT: s_endpgm
1870 ; GFX1100-GISEL-LABEL: s_log_v4f32:
1871 ; GFX1100-GISEL: ; %bb.0:
1872 ; GFX1100-GISEL-NEXT: s_clause 0x1
1873 ; GFX1100-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x34
1874 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1875 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1876 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s4
1877 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s5
1878 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s6
1879 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s7
1880 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1881 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
1882 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
1883 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1884 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8
1885 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9
1886 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s2
1887 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
1888 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1
1889 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s6, v2 :: v_dual_mul_f32 v3, s7, v3
1890 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s3
1891 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1892 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
1893 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
1894 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3)
1895 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
1896 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3
1897 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 0x41b17218, s8
1898 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 0x41b17218, s9
1899 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_mul_f32 v6, 0x3f317217, v1
1900 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
1901 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v7, 0x3f317217, v2 :: v_dual_mul_f32 v8, 0x3f317217, v3
1902 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
1903 ; GFX1100-GISEL-NEXT: v_fma_f32 v10, v0, 0x3f317217, -v5
1904 ; GFX1100-GISEL-NEXT: v_fma_f32 v11, 0x3f317217, v1, -v6
1905 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1906 ; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7
1907 ; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8
1908 ; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v10, 0x3377d1cf, v0 :: v_dual_fmac_f32 v11, 0x3377d1cf, v1
1909 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1910 ; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3
1911 ; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11
1912 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1913 ; GFX1100-GISEL-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13
1914 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1915 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
1916 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1917 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo
1918 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
1919 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
1920 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
1921 ; GFX1100-GISEL-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v9
1922 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo
1923 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
1924 ; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v3, v3, v8 :: v_dual_sub_f32 v2, v2, v14
1925 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1926 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15
1927 ; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1]
1928 ; GFX1100-GISEL-NEXT: s_nop 0
1929 ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1930 ; GFX1100-GISEL-NEXT: s_endpgm
1932 ; R600-LABEL: s_log_v4f32:
1934 ; R600-NEXT: ALU 76, @4, KC0[CB0:0-32], KC1[]
1935 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1
1938 ; R600-NEXT: ALU clause starting at 4:
1939 ; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z,
1940 ; R600-NEXT: SETGT * T1.W, literal.x, KC0[4].X,
1941 ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
1942 ; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x,
1943 ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
1944 ; R600-NEXT: MUL_IEEE T0.Z, KC0[3].Z, PV.W,
1945 ; R600-NEXT: SETGT T2.W, literal.x, KC0[3].W,
1946 ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.y,
1947 ; R600-NEXT: 8388608(1.175494e-38), 1333788672(4.294967e+09)
1948 ; R600-NEXT: MUL_IEEE T1.Z, KC0[4].X, PS,
1949 ; R600-NEXT: CNDE T3.W, PV.W, 1.0, literal.x,
1950 ; R600-NEXT: LOG_IEEE * T0.X, PV.Z,
1951 ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
1952 ; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, PV.W,
1953 ; R600-NEXT: SETGT T3.W, literal.x, KC0[3].Y,
1954 ; R600-NEXT: LOG_IEEE * T0.Y, PV.Z,
1955 ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
1956 ; R600-NEXT: AND_INT T1.Y, PS, literal.x,
1957 ; R600-NEXT: AND_INT T1.Z, T0.X, literal.x,
1958 ; R600-NEXT: CNDE T4.W, PV.W, 1.0, literal.y,
1959 ; R600-NEXT: LOG_IEEE * T0.Z, PV.Z,
1960 ; R600-NEXT: -4096(nan), 1333788672(4.294967e+09)
1961 ; R600-NEXT: MUL_IEEE T2.Y, KC0[3].Y, PV.W,
1962 ; R600-NEXT: ADD T2.Z, T0.X, -PV.Z,
1963 ; R600-NEXT: AND_INT T4.W, PS, literal.x,
1964 ; R600-NEXT: ADD * T5.W, T0.Y, -PV.Y,
1965 ; R600-NEXT: -4096(nan), 0(0.000000e+00)
1966 ; R600-NEXT: MUL_IEEE T3.Y, PS, literal.x,
1967 ; R600-NEXT: ADD T3.Z, T0.Z, -PV.W,
1968 ; R600-NEXT: MUL_IEEE T6.W, PV.Z, literal.x,
1969 ; R600-NEXT: LOG_IEEE * T1.X, PV.Y,
1970 ; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
1971 ; R600-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.x, PV.W,
1972 ; R600-NEXT: MUL_IEEE T4.Z, PV.Z, literal.x,
1973 ; R600-NEXT: AND_INT T6.W, PS, literal.y,
1974 ; R600-NEXT: MULADD_IEEE * T7.W, T1.Y, literal.x, PV.Y, BS:VEC_021/SCL_122
1975 ; R600-NEXT: 939916788(3.194618e-05), -4096(nan)
1976 ; R600-NEXT: MULADD_IEEE T3.Y, T5.W, literal.x, PS,
1977 ; R600-NEXT: ADD T5.Z, T1.X, -PV.W,
1978 ; R600-NEXT: MULADD_IEEE T5.W, T4.W, literal.y, PV.Z, BS:VEC_120/SCL_212
1979 ; R600-NEXT: MULADD_IEEE * T7.W, T2.Z, literal.x, PV.Y, BS:VEC_021/SCL_122
1980 ; R600-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05)
1981 ; R600-NEXT: MULADD_IEEE T2.X, T1.Z, literal.x, PS,
1982 ; R600-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.x, PV.W, BS:VEC_120/SCL_212
1983 ; R600-NEXT: MUL_IEEE T1.Z, PV.Z, literal.y,
1984 ; R600-NEXT: MULADD_IEEE T5.W, T1.Y, literal.x, PV.Y,
1985 ; R600-NEXT: SETGT * T7.W, literal.z, |T0.Y|,
1986 ; R600-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05)
1987 ; R600-NEXT: 2139095040(INF), 0(0.000000e+00)
1988 ; R600-NEXT: CNDE T3.X, PS, T0.Y, PV.W,
1989 ; R600-NEXT: CNDE T0.Y, T1.W, 0.0, literal.x,
1990 ; R600-NEXT: MULADD_IEEE T1.Z, T6.W, literal.y, PV.Z, BS:VEC_120/SCL_212
1991 ; R600-NEXT: MULADD_IEEE T1.W, T4.W, literal.z, PV.Y, BS:VEC_201
1992 ; R600-NEXT: SETGT * T4.W, literal.w, |T0.Z|,
1993 ; R600-NEXT: 1102148120(2.218071e+01), 939916788(3.194618e-05)
1994 ; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
1995 ; R600-NEXT: SETGT T4.X, literal.x, |T0.X|,
1996 ; R600-NEXT: CNDE T1.Y, PS, T0.Z, PV.W,
1997 ; R600-NEXT: CNDE T0.Z, T2.W, 0.0, literal.y,
1998 ; R600-NEXT: MULADD_IEEE T1.W, T5.Z, literal.z, PV.Z,
1999 ; R600-NEXT: ADD * T2.W, PV.X, -PV.Y,
2000 ; R600-NEXT: 2139095040(INF), 1102148120(2.218071e+01)
2001 ; R600-NEXT: 1060204544(6.931152e-01), 0(0.000000e+00)
2002 ; R600-NEXT: MULADD_IEEE T3.X, T6.W, literal.x, PV.W,
2003 ; R600-NEXT: SETGT T0.Y, literal.y, |T1.X|,
2004 ; R600-NEXT: ADD T2.Z, PV.Y, -PV.Z,
2005 ; R600-NEXT: CNDE T1.W, PV.X, T0.X, T2.X, BS:VEC_120/SCL_212
2006 ; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z,
2007 ; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
2008 ; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
2009 ; R600-NEXT: ADD T2.Y, PV.W, -PS,
2010 ; R600-NEXT: CNDE T0.W, PV.Y, T1.X, PV.X,
2011 ; R600-NEXT: CNDE * T1.W, T3.W, 0.0, literal.x,
2012 ; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
2013 ; R600-NEXT: ADD T2.X, PV.W, -PS,
2014 ; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
2015 ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2017 ; CM-LABEL: s_log_v4f32:
2019 ; CM-NEXT: ALU 84, @4, KC0[CB0:0-32], KC1[]
2020 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T0.X
2023 ; CM-NEXT: ALU clause starting at 4:
2024 ; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].Y,
2025 ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
2026 ; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x,
2027 ; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W,
2028 ; CM-NEXT: SETGT * T1.W, literal.y, KC0[4].X,
2029 ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
2030 ; CM-NEXT: CNDE T0.X, PV.W, 1.0, literal.x,
2031 ; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.x,
2032 ; CM-NEXT: SETGT T1.Z, literal.y, KC0[3].Z,
2033 ; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Y, PV.Y,
2034 ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
2035 ; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W,
2036 ; CM-NEXT: LOG_IEEE T0.Y, T2.W,
2037 ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
2038 ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
2039 ; CM-NEXT: CNDE T1.X, T1.Z, 1.0, literal.x,
2040 ; CM-NEXT: AND_INT T2.Y, PV.Y, literal.y,
2041 ; CM-NEXT: MUL_IEEE T2.Z, KC0[3].W, T1.Y,
2042 ; CM-NEXT: MUL_IEEE * T2.W, KC0[4].X, T0.X,
2043 ; CM-NEXT: 1333788672(4.294967e+09), -4096(nan)
2044 ; CM-NEXT: LOG_IEEE T0.X, T2.W,
2045 ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W,
2046 ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
2047 ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
2048 ; CM-NEXT: LOG_IEEE T1.X (MASKED), T2.Z,
2049 ; CM-NEXT: LOG_IEEE T1.Y, T2.Z,
2050 ; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.Z,
2051 ; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.Z,
2052 ; CM-NEXT: ADD T2.X, T0.Y, -T2.Y,
2053 ; CM-NEXT: AND_INT T3.Y, PV.Y, literal.x,
2054 ; CM-NEXT: AND_INT T2.Z, T0.X, literal.x,
2055 ; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Z, T1.X,
2056 ; CM-NEXT: -4096(nan), 0(0.000000e+00)
2057 ; CM-NEXT: LOG_IEEE T1.X, T2.W,
2058 ; CM-NEXT: LOG_IEEE T1.Y (MASKED), T2.W,
2059 ; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.W,
2060 ; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.W,
2061 ; CM-NEXT: ADD T3.X, T0.X, -T2.Z,
2062 ; CM-NEXT: ADD T4.Y, T1.Y, -T3.Y,
2063 ; CM-NEXT: AND_INT T3.Z, PV.X, literal.x,
2064 ; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.y, BS:VEC_120/SCL_212
2065 ; CM-NEXT: -4096(nan), 939916788(3.194618e-05)
2066 ; CM-NEXT: MULADD_IEEE T4.X, T2.Y, literal.x, PV.W,
2067 ; CM-NEXT: ADD T5.Y, T1.X, -PV.Z,
2068 ; CM-NEXT: MUL_IEEE T4.Z, PV.Y, literal.x,
2069 ; CM-NEXT: MUL_IEEE * T2.W, PV.X, literal.x,
2070 ; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
2071 ; CM-NEXT: MULADD_IEEE T5.X, T2.Z, literal.x, PV.W,
2072 ; CM-NEXT: MULADD_IEEE T6.Y, T3.Y, literal.x, PV.Z,
2073 ; CM-NEXT: MUL_IEEE T4.Z, PV.Y, literal.x,
2074 ; CM-NEXT: MULADD_IEEE * T2.W, T2.X, literal.y, PV.X,
2075 ; CM-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01)
2076 ; CM-NEXT: MULADD_IEEE T2.X, T2.Y, literal.x, PV.W,
2077 ; CM-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.y, PV.Z,
2078 ; CM-NEXT: MULADD_IEEE T4.Z, T4.Y, literal.x, PV.Y, BS:VEC_120/SCL_212
2079 ; CM-NEXT: MULADD_IEEE * T2.W, T3.X, literal.x, PV.X,
2080 ; CM-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05)
2081 ; CM-NEXT: MULADD_IEEE T3.X, T2.Z, literal.x, PV.W,
2082 ; CM-NEXT: SETGT T4.Y, literal.y, |T0.X|,
2083 ; CM-NEXT: MULADD_IEEE T2.Z, T3.Y, literal.x, PV.Z,
2084 ; CM-NEXT: SETGT * T2.W, literal.y, |T1.Y|,
2085 ; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
2086 ; CM-NEXT: CNDE T4.X, PV.W, T1.Y, PV.Z,
2087 ; CM-NEXT: CNDE T1.Y, PV.Y, T0.X, PV.X,
2088 ; CM-NEXT: CNDE T2.Z, T1.W, 0.0, literal.x,
2089 ; CM-NEXT: MULADD_IEEE * T1.W, T5.Y, literal.y, T2.Y,
2090 ; CM-NEXT: 1102148120(2.218071e+01), 1060204544(6.931152e-01)
2091 ; CM-NEXT: CNDE T0.X, T0.Z, 0.0, literal.x,
2092 ; CM-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.y, PV.W, BS:VEC_120/SCL_212
2093 ; CM-NEXT: SETGT T0.Z, literal.z, |T1.X|,
2094 ; CM-NEXT: ADD * T2.W, PV.Y, -PV.Z,
2095 ; CM-NEXT: 1102148120(2.218071e+01), 1060204544(6.931152e-01)
2096 ; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
2097 ; CM-NEXT: SETGT T3.X, literal.x, |T0.Y|,
2098 ; CM-NEXT: CNDE T1.Y, PV.Z, T1.X, PV.Y,
2099 ; CM-NEXT: ADD T2.Z, T4.X, -PV.X,
2100 ; CM-NEXT: CNDE * T1.W, T1.Z, 0.0, literal.y,
2101 ; CM-NEXT: 2139095040(INF), 1102148120(2.218071e+01)
2102 ; CM-NEXT: ADD T2.Y, PV.Y, -PV.W,
2103 ; CM-NEXT: CNDE T0.Z, PV.X, T0.Y, T2.X,
2104 ; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
2105 ; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
2106 ; CM-NEXT: ADD * T2.X, PV.Z, -PV.W,
2107 ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
2108 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2109 %result = call <4 x float> @llvm.log.v4f32(<4 x float> %in)
2110 store <4 x float> %result, ptr addrspace(1) %out
2114 define float @v_log_f32(float %in) {
2115 ; SI-SDAG-LABEL: v_log_f32:
2117 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2118 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
2119 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2120 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2121 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2122 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
2123 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
2124 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
2125 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2126 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
2127 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
2128 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
2129 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2130 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2131 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2132 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2133 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2134 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2135 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2136 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2138 ; SI-GISEL-LABEL: v_log_f32:
2139 ; SI-GISEL: ; %bb.0:
2140 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2142 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2143 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
2144 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2145 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
2146 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
2147 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
2148 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
2149 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2150 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
2151 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
2152 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2153 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2154 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2155 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2156 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2157 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2158 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2159 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2161 ; VI-SDAG-LABEL: v_log_f32:
2163 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2164 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
2165 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2166 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2167 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2168 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
2169 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
2170 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2171 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2172 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
2173 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
2174 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
2175 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
2176 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
2177 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
2178 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
2179 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2180 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2181 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2182 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2183 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2184 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2185 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2187 ; VI-GISEL-LABEL: v_log_f32:
2188 ; VI-GISEL: ; %bb.0:
2189 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2190 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2191 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2192 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
2193 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2194 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
2195 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
2196 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2197 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
2198 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
2199 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
2200 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
2201 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
2202 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
2203 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
2204 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2205 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2206 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2207 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2208 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2209 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2210 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2211 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2213 ; GFX900-SDAG-LABEL: v_log_f32:
2214 ; GFX900-SDAG: ; %bb.0:
2215 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2216 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
2217 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2218 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2219 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2220 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
2221 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
2222 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
2223 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2224 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
2225 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
2226 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
2227 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2228 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2229 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2230 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2231 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2232 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2233 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2234 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2236 ; GFX900-GISEL-LABEL: v_log_f32:
2237 ; GFX900-GISEL: ; %bb.0:
2238 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2239 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2240 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2241 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
2242 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2243 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
2244 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
2245 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
2246 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
2247 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2248 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
2249 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
2250 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2251 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2252 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2253 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2254 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2255 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2256 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2257 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2259 ; GFX1100-SDAG-LABEL: v_log_f32:
2260 ; GFX1100-SDAG: ; %bb.0:
2261 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2262 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
2263 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
2264 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2265 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
2266 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
2267 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
2268 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2269 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
2270 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2271 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
2272 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
2273 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2274 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2275 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
2276 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
2277 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2278 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2279 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2281 ; GFX1100-GISEL-LABEL: v_log_f32:
2282 ; GFX1100-GISEL: ; %bb.0:
2283 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2284 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
2285 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
2286 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2287 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
2288 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
2289 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
2290 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2291 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
2292 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2293 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
2294 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
2295 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2296 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2297 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
2298 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
2299 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
2300 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2301 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2303 ; R600-LABEL: v_log_f32:
2308 ; CM-LABEL: v_log_f32:
2312 %result = call float @llvm.log.f32(float %in)
2316 define float @v_log_fabs_f32(float %in) {
2317 ; SI-SDAG-LABEL: v_log_fabs_f32:
2319 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2320 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
2321 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2322 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
2323 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2324 ; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
2325 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
2326 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
2327 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2328 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
2329 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
2330 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
2331 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2332 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2333 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2334 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2335 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2336 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2337 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2338 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2340 ; SI-GISEL-LABEL: v_log_fabs_f32:
2341 ; SI-GISEL: ; %bb.0:
2342 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2343 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2344 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2345 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
2346 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2347 ; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
2348 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
2349 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
2350 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
2351 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2352 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
2353 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
2354 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2355 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2356 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2357 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2358 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2359 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2360 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2361 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2363 ; VI-SDAG-LABEL: v_log_fabs_f32:
2365 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2366 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
2367 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2368 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
2369 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2370 ; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
2371 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
2372 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2373 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2374 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
2375 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
2376 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
2377 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
2378 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
2379 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
2380 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
2381 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2382 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2383 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2384 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2385 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2386 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2387 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2389 ; VI-GISEL-LABEL: v_log_fabs_f32:
2390 ; VI-GISEL: ; %bb.0:
2391 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2393 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2394 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
2395 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2396 ; VI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
2397 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
2398 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2399 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
2400 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
2401 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
2402 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
2403 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
2404 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
2405 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
2406 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2407 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2408 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2409 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2410 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2411 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2412 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2413 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2415 ; GFX900-SDAG-LABEL: v_log_fabs_f32:
2416 ; GFX900-SDAG: ; %bb.0:
2417 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2418 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
2419 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2420 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
2421 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2422 ; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
2423 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
2424 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
2425 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2426 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
2427 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
2428 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
2429 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2430 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2431 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2432 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2433 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2434 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2435 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2436 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2438 ; GFX900-GISEL-LABEL: v_log_fabs_f32:
2439 ; GFX900-GISEL: ; %bb.0:
2440 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2441 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2442 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2443 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
2444 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2445 ; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
2446 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
2447 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
2448 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
2449 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2450 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
2451 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
2452 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2453 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2454 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2455 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2456 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2457 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2458 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2459 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2461 ; GFX1100-SDAG-LABEL: v_log_fabs_f32:
2462 ; GFX1100-SDAG: ; %bb.0:
2463 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2464 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
2465 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2466 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
2467 ; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
2468 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
2469 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
2470 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
2471 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2472 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
2473 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
2474 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2475 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
2476 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2477 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2478 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2479 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
2480 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2481 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2483 ; GFX1100-GISEL-LABEL: v_log_fabs_f32:
2484 ; GFX1100-GISEL: ; %bb.0:
2485 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2486 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
2487 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2488 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
2489 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
2490 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
2491 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
2492 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
2493 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2494 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
2495 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
2496 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2497 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
2498 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2499 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2500 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2501 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
2502 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2503 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2505 ; R600-LABEL: v_log_fabs_f32:
2510 ; CM-LABEL: v_log_fabs_f32:
2514 %fabs = call float @llvm.fabs.f32(float %in)
2515 %result = call float @llvm.log.f32(float %fabs)
2519 define float @v_log_fneg_fabs_f32(float %in) {
2520 ; SI-SDAG-LABEL: v_log_fneg_fabs_f32:
2522 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2523 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
2524 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2525 ; SI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
2526 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2527 ; SI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
2528 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
2529 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
2530 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2531 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
2532 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
2533 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
2534 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2535 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2536 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2537 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2538 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2539 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2540 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2541 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2543 ; SI-GISEL-LABEL: v_log_fneg_fabs_f32:
2544 ; SI-GISEL: ; %bb.0:
2545 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2546 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2547 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2548 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
2549 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2550 ; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
2551 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
2552 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
2553 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
2554 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2555 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
2556 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
2557 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2558 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2559 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2560 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2561 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2562 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2563 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2564 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2566 ; VI-SDAG-LABEL: v_log_fneg_fabs_f32:
2568 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2569 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
2570 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2571 ; VI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
2572 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2573 ; VI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
2574 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
2575 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2576 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2577 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
2578 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
2579 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
2580 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
2581 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
2582 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
2583 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
2584 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2585 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2586 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2587 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2588 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2589 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2590 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2592 ; VI-GISEL-LABEL: v_log_fneg_fabs_f32:
2593 ; VI-GISEL: ; %bb.0:
2594 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2595 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2596 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2597 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
2598 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2599 ; VI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
2600 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
2601 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2602 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
2603 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
2604 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
2605 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
2606 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
2607 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
2608 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
2609 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2610 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2611 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2612 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2613 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2614 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2615 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2616 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2618 ; GFX900-SDAG-LABEL: v_log_fneg_fabs_f32:
2619 ; GFX900-SDAG: ; %bb.0:
2620 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2621 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000
2622 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2623 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
2624 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2625 ; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
2626 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
2627 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
2628 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2629 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
2630 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
2631 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
2632 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2633 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2634 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2635 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2636 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2637 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2638 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2639 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2641 ; GFX900-GISEL-LABEL: v_log_fneg_fabs_f32:
2642 ; GFX900-GISEL: ; %bb.0:
2643 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2644 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2645 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2646 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
2647 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2648 ; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
2649 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
2650 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
2651 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
2652 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2653 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
2654 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
2655 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2656 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2657 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2658 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2659 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2660 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2661 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2662 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2664 ; GFX1100-SDAG-LABEL: v_log_fneg_fabs_f32:
2665 ; GFX1100-SDAG: ; %bb.0:
2666 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2667 ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0|
2668 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2669 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
2670 ; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
2671 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
2672 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
2673 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
2674 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2675 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
2676 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
2677 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2678 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
2679 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2680 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2681 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2682 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
2683 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2684 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2686 ; GFX1100-GISEL-LABEL: v_log_fneg_fabs_f32:
2687 ; GFX1100-GISEL: ; %bb.0:
2688 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2689 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0|
2690 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2691 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
2692 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
2693 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
2694 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
2695 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
2696 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2697 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
2698 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
2699 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2700 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
2701 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2702 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2703 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2704 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
2705 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2706 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2708 ; R600-LABEL: v_log_fneg_fabs_f32:
2713 ; CM-LABEL: v_log_fneg_fabs_f32:
2717 %fabs = call float @llvm.fabs.f32(float %in)
2718 %fneg.fabs = fneg float %fabs
2719 %result = call float @llvm.log.f32(float %fneg.fabs)
2723 define float @v_log_fneg_f32(float %in) {
2724 ; SI-SDAG-LABEL: v_log_fneg_f32:
2726 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2727 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
2728 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2729 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
2730 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2731 ; SI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
2732 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
2733 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
2734 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2735 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
2736 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
2737 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
2738 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2739 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2740 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2741 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2742 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2743 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2744 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2745 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2747 ; SI-GISEL-LABEL: v_log_fneg_f32:
2748 ; SI-GISEL: ; %bb.0:
2749 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2750 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2751 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2752 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
2753 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2754 ; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
2755 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
2756 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
2757 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
2758 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2759 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
2760 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
2761 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2762 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2763 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2764 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2765 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2766 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2767 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2768 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2770 ; VI-SDAG-LABEL: v_log_fneg_f32:
2772 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2773 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
2774 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2775 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
2776 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2777 ; VI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
2778 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
2779 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2780 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2781 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
2782 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
2783 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
2784 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
2785 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
2786 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
2787 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
2788 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2789 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2790 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2791 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2792 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2793 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2794 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2796 ; VI-GISEL-LABEL: v_log_fneg_f32:
2797 ; VI-GISEL: ; %bb.0:
2798 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2799 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2800 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2801 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
2802 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2803 ; VI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
2804 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
2805 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
2806 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
2807 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
2808 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
2809 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
2810 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
2811 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
2812 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
2813 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2814 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2815 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2816 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2817 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2818 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2819 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2820 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2822 ; GFX900-SDAG-LABEL: v_log_fneg_f32:
2823 ; GFX900-SDAG: ; %bb.0:
2824 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2825 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000
2826 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
2827 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
2828 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2829 ; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
2830 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
2831 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
2832 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2833 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
2834 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
2835 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
2836 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
2837 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2838 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
2839 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2840 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
2841 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2842 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2843 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2845 ; GFX900-GISEL-LABEL: v_log_fneg_f32:
2846 ; GFX900-GISEL: ; %bb.0:
2847 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2848 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2849 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
2850 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
2851 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
2852 ; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
2853 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
2854 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
2855 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
2856 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2857 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
2858 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
2859 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2860 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
2861 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
2862 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
2863 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
2864 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2865 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2866 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2868 ; GFX1100-SDAG-LABEL: v_log_fneg_f32:
2869 ; GFX1100-SDAG: ; %bb.0:
2870 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2871 ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0
2872 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
2873 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2874 ; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
2875 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
2876 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
2877 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2878 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
2879 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2880 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
2881 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
2882 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2883 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
2884 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
2885 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
2886 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2887 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
2888 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2890 ; GFX1100-GISEL-LABEL: v_log_fneg_f32:
2891 ; GFX1100-GISEL: ; %bb.0:
2892 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2893 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0
2894 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2895 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
2896 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
2897 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
2898 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
2899 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
2900 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
2901 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
2902 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
2903 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2904 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
2905 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
2906 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2907 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2908 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
2909 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
2910 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2912 ; R600-LABEL: v_log_fneg_f32:
2917 ; CM-LABEL: v_log_fneg_f32:
2921 %fneg = fneg float %in
2922 %result = call float @llvm.log.f32(float %fneg)
2926 define float @v_log_f32_fast(float %in) {
2927 ; SI-SDAG-LABEL: v_log_f32_fast:
2929 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2930 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
2931 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2932 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
2933 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
2934 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
2935 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
2936 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
2937 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2938 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
2939 ; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
2940 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
2942 ; SI-GISEL-LABEL: v_log_f32_fast:
2943 ; SI-GISEL: ; %bb.0:
2944 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2945 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
2946 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2947 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
2948 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
2949 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
2950 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
2951 ; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
2952 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
2954 ; VI-SDAG-LABEL: v_log_f32_fast:
2956 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2957 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
2958 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2959 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
2960 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
2961 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
2962 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
2963 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
2964 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2965 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
2966 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
2967 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
2969 ; VI-GISEL-LABEL: v_log_f32_fast:
2970 ; VI-GISEL: ; %bb.0:
2971 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2972 ; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
2973 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
2974 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
2975 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
2976 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
2977 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
2978 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
2979 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
2981 ; GFX900-SDAG-LABEL: v_log_f32_fast:
2982 ; GFX900-SDAG: ; %bb.0:
2983 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2984 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
2985 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2986 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
2987 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
2988 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
2989 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
2990 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
2991 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
2992 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
2993 ; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
2994 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2996 ; GFX900-GISEL-LABEL: v_log_f32_fast:
2997 ; GFX900-GISEL: ; %bb.0:
2998 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2999 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
3000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3001 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3002 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3003 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3004 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3005 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3006 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3008 ; GFX1100-SDAG-LABEL: v_log_f32_fast:
3009 ; GFX1100-SDAG: ; %bb.0:
3010 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3011 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3012 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
3013 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
3014 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3015 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3016 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
3017 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
3018 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
3019 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
3020 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
3021 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
3023 ; GFX1100-GISEL-LABEL: v_log_f32_fast:
3024 ; GFX1100-GISEL: ; %bb.0:
3025 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3026 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
3027 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3028 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo
3029 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
3030 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
3031 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
3033 ; R600-LABEL: v_log_f32_fast:
3038 ; CM-LABEL: v_log_f32_fast:
3042 %result = call fast float @llvm.log.f32(float %in)
3046 define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
3047 ; SI-SDAG-LABEL: v_log_f32_unsafe_math_attr:
3049 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3050 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3051 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3052 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3053 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3054 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3055 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
3056 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3057 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3058 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
3059 ; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
3060 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3062 ; SI-GISEL-LABEL: v_log_f32_unsafe_math_attr:
3063 ; SI-GISEL: ; %bb.0:
3064 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3065 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
3066 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3067 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3068 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3069 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3070 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3071 ; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3072 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3074 ; VI-SDAG-LABEL: v_log_f32_unsafe_math_attr:
3076 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3077 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3078 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3079 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3080 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3081 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3082 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
3083 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3084 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3085 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
3086 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
3087 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3089 ; VI-GISEL-LABEL: v_log_f32_unsafe_math_attr:
3090 ; VI-GISEL: ; %bb.0:
3091 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3092 ; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
3093 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3094 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3095 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3096 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3097 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
3098 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
3099 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3101 ; GFX900-SDAG-LABEL: v_log_f32_unsafe_math_attr:
3102 ; GFX900-SDAG: ; %bb.0:
3103 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3104 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
3105 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3106 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3107 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3108 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3109 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
3110 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3111 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3112 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
3113 ; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
3114 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3116 ; GFX900-GISEL-LABEL: v_log_f32_unsafe_math_attr:
3117 ; GFX900-GISEL: ; %bb.0:
3118 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3119 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
3120 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3121 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3122 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3123 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3124 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3125 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3126 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3128 ; GFX1100-SDAG-LABEL: v_log_f32_unsafe_math_attr:
3129 ; GFX1100-SDAG: ; %bb.0:
3130 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3131 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3132 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
3133 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
3134 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3135 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3136 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
3137 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
3138 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
3139 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
3140 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
3141 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
3143 ; GFX1100-GISEL-LABEL: v_log_f32_unsafe_math_attr:
3144 ; GFX1100-GISEL: ; %bb.0:
3145 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3146 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
3147 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3148 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo
3149 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
3150 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
3151 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
3153 ; R600-LABEL: v_log_f32_unsafe_math_attr:
3158 ; CM-LABEL: v_log_f32_unsafe_math_attr:
3162 %result = call float @llvm.log.f32(float %in)
3166 define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
3167 ; SI-SDAG-LABEL: v_log_f32_approx_fn_attr:
3169 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3170 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3171 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3172 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3173 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3174 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3175 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
3176 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3177 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3178 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
3179 ; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
3180 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3182 ; SI-GISEL-LABEL: v_log_f32_approx_fn_attr:
3183 ; SI-GISEL: ; %bb.0:
3184 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3185 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
3186 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3187 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3188 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3189 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3190 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3191 ; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3192 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3194 ; VI-SDAG-LABEL: v_log_f32_approx_fn_attr:
3196 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3197 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3198 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3199 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3200 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3201 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3202 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
3203 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3204 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3205 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
3206 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
3207 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3209 ; VI-GISEL-LABEL: v_log_f32_approx_fn_attr:
3210 ; VI-GISEL: ; %bb.0:
3211 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3212 ; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
3213 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3214 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3215 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3216 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3217 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
3218 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
3219 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3221 ; GFX900-SDAG-LABEL: v_log_f32_approx_fn_attr:
3222 ; GFX900-SDAG: ; %bb.0:
3223 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3224 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
3225 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3226 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3227 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3228 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3229 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
3230 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3231 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3232 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
3233 ; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
3234 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3236 ; GFX900-GISEL-LABEL: v_log_f32_approx_fn_attr:
3237 ; GFX900-GISEL: ; %bb.0:
3238 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3239 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
3240 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3241 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3242 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3243 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3244 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3245 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3246 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3248 ; GFX1100-SDAG-LABEL: v_log_f32_approx_fn_attr:
3249 ; GFX1100-SDAG: ; %bb.0:
3250 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3251 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3252 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
3253 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
3254 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3255 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3256 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
3257 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
3258 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
3259 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
3260 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
3261 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
3263 ; GFX1100-GISEL-LABEL: v_log_f32_approx_fn_attr:
3264 ; GFX1100-GISEL: ; %bb.0:
3265 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3266 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
3267 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3268 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo
3269 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
3270 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
3271 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
3273 ; R600-LABEL: v_log_f32_approx_fn_attr:
3278 ; CM-LABEL: v_log_f32_approx_fn_attr:
3282 %result = call float @llvm.log.f32(float %in)
3286 define float @v_log_f32_ninf(float %in) {
3287 ; SI-SDAG-LABEL: v_log_f32_ninf:
3289 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3290 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3291 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
3292 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3293 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
3294 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
3295 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
3296 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
3297 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
3298 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
3299 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
3300 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
3301 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
3302 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
3303 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
3304 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
3305 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
3306 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3307 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
3308 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3310 ; SI-GISEL-LABEL: v_log_f32_ninf:
3311 ; SI-GISEL: ; %bb.0:
3312 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3313 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3314 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
3315 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3316 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
3317 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
3318 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
3319 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
3320 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
3321 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
3322 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
3323 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
3324 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3325 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
3326 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
3327 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
3328 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
3329 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3330 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
3331 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3333 ; VI-SDAG-LABEL: v_log_f32_ninf:
3335 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3336 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3337 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
3338 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3339 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
3340 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
3341 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
3342 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
3343 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3344 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
3345 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
3346 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
3347 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
3348 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
3349 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
3350 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
3351 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
3352 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
3353 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
3354 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
3355 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3356 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
3357 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3359 ; VI-GISEL-LABEL: v_log_f32_ninf:
3360 ; VI-GISEL: ; %bb.0:
3361 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3362 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3363 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
3364 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3365 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
3366 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
3367 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
3368 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3369 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
3370 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
3371 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
3372 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
3373 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
3374 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
3375 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
3376 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3377 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
3378 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
3379 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
3380 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
3381 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3382 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
3383 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3385 ; GFX900-SDAG-LABEL: v_log_f32_ninf:
3386 ; GFX900-SDAG: ; %bb.0:
3387 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3388 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
3389 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
3390 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3391 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
3392 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
3393 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
3394 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
3395 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
3396 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
3397 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
3398 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
3399 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
3400 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
3401 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
3402 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
3403 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
3404 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3405 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
3406 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3408 ; GFX900-GISEL-LABEL: v_log_f32_ninf:
3409 ; GFX900-GISEL: ; %bb.0:
3410 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3411 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3412 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
3413 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3414 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
3415 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
3416 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
3417 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
3418 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
3419 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
3420 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
3421 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
3422 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3423 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
3424 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
3425 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
3426 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
3427 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3428 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
3429 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3431 ; GFX1100-SDAG-LABEL: v_log_f32_ninf:
3432 ; GFX1100-SDAG: ; %bb.0:
3433 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3434 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3435 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
3436 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3437 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
3438 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
3439 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
3440 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
3441 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
3442 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3443 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
3444 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
3445 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3446 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
3447 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
3448 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
3449 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
3450 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
3451 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
3453 ; GFX1100-GISEL-LABEL: v_log_f32_ninf:
3454 ; GFX1100-GISEL: ; %bb.0:
3455 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3456 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3457 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
3458 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3459 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
3460 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
3461 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
3462 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
3463 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
3464 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3465 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
3466 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
3467 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3468 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3469 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
3470 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
3471 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
3472 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
3473 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
3475 ; R600-LABEL: v_log_f32_ninf:
3480 ; CM-LABEL: v_log_f32_ninf:
3484 %result = call ninf float @llvm.log.f32(float %in)
3488 define float @v_log_f32_afn(float %in) {
3489 ; SI-SDAG-LABEL: v_log_f32_afn:
3491 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3492 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3493 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3494 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3495 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3496 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3497 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
3498 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3499 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3500 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
3501 ; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
3502 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3504 ; SI-GISEL-LABEL: v_log_f32_afn:
3505 ; SI-GISEL: ; %bb.0:
3506 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3507 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
3508 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3509 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3510 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3511 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3512 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3513 ; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3514 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3516 ; VI-SDAG-LABEL: v_log_f32_afn:
3518 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3519 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3520 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3521 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3522 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3523 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3524 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
3525 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3526 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3527 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
3528 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
3529 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3531 ; VI-GISEL-LABEL: v_log_f32_afn:
3532 ; VI-GISEL: ; %bb.0:
3533 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3534 ; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
3535 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3536 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3537 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3538 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3539 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
3540 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
3541 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3543 ; GFX900-SDAG-LABEL: v_log_f32_afn:
3544 ; GFX900-SDAG: ; %bb.0:
3545 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3546 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
3547 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3548 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3549 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3550 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3551 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
3552 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3553 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3554 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
3555 ; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
3556 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3558 ; GFX900-GISEL-LABEL: v_log_f32_afn:
3559 ; GFX900-GISEL: ; %bb.0:
3560 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3561 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
3562 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3563 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3564 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3565 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3566 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3567 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3568 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3570 ; GFX1100-SDAG-LABEL: v_log_f32_afn:
3571 ; GFX1100-SDAG: ; %bb.0:
3572 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3573 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3574 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
3575 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
3576 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3577 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3578 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
3579 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
3580 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
3581 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
3582 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
3583 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
3585 ; GFX1100-GISEL-LABEL: v_log_f32_afn:
3586 ; GFX1100-GISEL: ; %bb.0:
3587 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3588 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
3589 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3590 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo
3591 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
3592 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
3593 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
3595 ; R600-LABEL: v_log_f32_afn:
3600 ; CM-LABEL: v_log_f32_afn:
3604 %result = call afn float @llvm.log.f32(float %in)
3608 define float @v_log_f32_afn_daz(float %in) #0 {
3609 ; GFX689-LABEL: v_log_f32_afn_daz:
3611 ; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3612 ; GFX689-NEXT: v_log_f32_e32 v0, v0
3613 ; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
3614 ; GFX689-NEXT: s_setpc_b64 s[30:31]
3616 ; GFX1100-LABEL: v_log_f32_afn_daz:
3618 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3619 ; GFX1100-NEXT: v_log_f32_e32 v0, v0
3620 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
3621 ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
3622 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
3624 ; R600-LABEL: v_log_f32_afn_daz:
3629 ; CM-LABEL: v_log_f32_afn_daz:
3633 %result = call afn float @llvm.log.f32(float %in)
3637 define float @v_log_f32_afn_dynamic(float %in) #1 {
3638 ; SI-SDAG-LABEL: v_log_f32_afn_dynamic:
3640 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3641 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3642 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3643 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3644 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3645 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3646 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
3647 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3648 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3649 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
3650 ; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
3651 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3653 ; SI-GISEL-LABEL: v_log_f32_afn_dynamic:
3654 ; SI-GISEL: ; %bb.0:
3655 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3656 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
3657 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3658 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3659 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3660 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3661 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3662 ; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3663 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3665 ; VI-SDAG-LABEL: v_log_f32_afn_dynamic:
3667 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3668 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3669 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3670 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3671 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3672 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3673 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
3674 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3675 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3676 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
3677 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
3678 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3680 ; VI-GISEL-LABEL: v_log_f32_afn_dynamic:
3681 ; VI-GISEL: ; %bb.0:
3682 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3683 ; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
3684 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3685 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3686 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3687 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3688 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
3689 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
3690 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3692 ; GFX900-SDAG-LABEL: v_log_f32_afn_dynamic:
3693 ; GFX900-SDAG: ; %bb.0:
3694 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3695 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
3696 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
3697 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3698 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3699 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3700 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
3701 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3702 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3703 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
3704 ; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
3705 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3707 ; GFX900-GISEL-LABEL: v_log_f32_afn_dynamic:
3708 ; GFX900-GISEL: ; %bb.0:
3709 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3710 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
3711 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3712 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3713 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
3714 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3715 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3716 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3717 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3719 ; GFX1100-SDAG-LABEL: v_log_f32_afn_dynamic:
3720 ; GFX1100-SDAG: ; %bb.0:
3721 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3722 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3723 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
3724 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
3725 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3726 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
3727 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
3728 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
3729 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
3730 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
3731 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
3732 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
3734 ; GFX1100-GISEL-LABEL: v_log_f32_afn_dynamic:
3735 ; GFX1100-GISEL: ; %bb.0:
3736 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3737 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
3738 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
3739 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo
3740 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
3741 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
3742 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
3744 ; R600-LABEL: v_log_f32_afn_dynamic:
3749 ; CM-LABEL: v_log_f32_afn_dynamic:
3753 %result = call afn float @llvm.log.f32(float %in)
3757 define float @v_fabs_log_f32_afn(float %in) {
3758 ; SI-SDAG-LABEL: v_fabs_log_f32_afn:
3760 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3761 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3762 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
3763 ; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3764 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3765 ; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
3766 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
3767 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3768 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3769 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
3770 ; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
3771 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3773 ; SI-GISEL-LABEL: v_fabs_log_f32_afn:
3774 ; SI-GISEL: ; %bb.0:
3775 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3776 ; SI-GISEL-NEXT: v_log_f32_e64 v2, |v0|
3777 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3778 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3779 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
3780 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3781 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3782 ; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3783 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3785 ; VI-SDAG-LABEL: v_fabs_log_f32_afn:
3787 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3788 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
3789 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
3790 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3791 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3792 ; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
3793 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
3794 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3795 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3796 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
3797 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
3798 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3800 ; VI-GISEL-LABEL: v_fabs_log_f32_afn:
3801 ; VI-GISEL: ; %bb.0:
3802 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3803 ; VI-GISEL-NEXT: v_log_f32_e64 v2, |v0|
3804 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3805 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3806 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
3807 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3808 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
3809 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
3810 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3812 ; GFX900-SDAG-LABEL: v_fabs_log_f32_afn:
3813 ; GFX900-SDAG: ; %bb.0:
3814 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3815 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
3816 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
3817 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
3818 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
3819 ; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
3820 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
3821 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
3822 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
3823 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
3824 ; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
3825 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3827 ; GFX900-GISEL-LABEL: v_fabs_log_f32_afn:
3828 ; GFX900-GISEL: ; %bb.0:
3829 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3830 ; GFX900-GISEL-NEXT: v_log_f32_e64 v2, |v0|
3831 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
3832 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
3833 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
3834 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
3835 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
3836 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
3837 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3839 ; GFX1100-SDAG-LABEL: v_fabs_log_f32_afn:
3840 ; GFX1100-SDAG: ; %bb.0:
3841 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3842 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
3843 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3844 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
3845 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, s0
3846 ; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
3847 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
3848 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
3849 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
3850 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
3851 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
3852 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
3854 ; GFX1100-GISEL-LABEL: v_fabs_log_f32_afn:
3855 ; GFX1100-GISEL: ; %bb.0:
3856 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3857 ; GFX1100-GISEL-NEXT: v_log_f32_e64 v1, |v0|
3858 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
3859 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
3860 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, s0
3861 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
3862 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
3863 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
3865 ; R600-LABEL: v_fabs_log_f32_afn:
3870 ; CM-LABEL: v_fabs_log_f32_afn:
3874 %fabs = call float @llvm.fabs.f32(float %in)
3875 %result = call afn float @llvm.log.f32(float %fabs)
3879 define float @v_log_f32_daz(float %in) #0 {
3880 ; SI-SDAG-LABEL: v_log_f32_daz:
3882 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3883 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
3884 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
3885 ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
3886 ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
3887 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
3888 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
3889 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
3890 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
3891 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
3892 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3893 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
3895 ; SI-GISEL-LABEL: v_log_f32_daz:
3896 ; SI-GISEL: ; %bb.0:
3897 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3898 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
3899 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
3900 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
3901 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
3902 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
3903 ; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
3904 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
3905 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
3906 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
3907 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3908 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
3910 ; VI-SDAG-LABEL: v_log_f32_daz:
3912 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3913 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
3914 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
3915 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3916 ; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
3917 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
3918 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
3919 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
3920 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
3921 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
3922 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
3923 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
3924 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
3925 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3926 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
3928 ; VI-GISEL-LABEL: v_log_f32_daz:
3929 ; VI-GISEL: ; %bb.0:
3930 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3931 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
3932 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
3933 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
3934 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
3935 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
3936 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
3937 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
3938 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
3939 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
3940 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
3941 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
3942 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
3943 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3944 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
3946 ; GFX900-SDAG-LABEL: v_log_f32_daz:
3947 ; GFX900-SDAG: ; %bb.0:
3948 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3949 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
3950 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
3951 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
3952 ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
3953 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
3954 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
3955 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
3956 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
3957 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
3958 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3959 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3961 ; GFX900-GISEL-LABEL: v_log_f32_daz:
3962 ; GFX900-GISEL: ; %bb.0:
3963 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3964 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
3965 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
3966 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
3967 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
3968 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
3969 ; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
3970 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
3971 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
3972 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
3973 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
3974 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3976 ; GFX1100-SDAG-LABEL: v_log_f32_daz:
3977 ; GFX1100-SDAG: ; %bb.0:
3978 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3979 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
3980 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
3981 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
3982 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
3983 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3984 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
3985 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
3986 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3987 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
3988 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3989 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
3991 ; GFX1100-GISEL-LABEL: v_log_f32_daz:
3992 ; GFX1100-GISEL: ; %bb.0:
3993 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3994 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
3995 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
3996 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
3997 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
3998 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3999 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
4000 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4001 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4002 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4003 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
4004 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
4006 ; R600-LABEL: v_log_f32_daz:
4011 ; CM-LABEL: v_log_f32_daz:
4015 %result = call float @llvm.log.f32(float %in)
4019 define float @v_log_f32_nnan(float %in) {
4020 ; SI-SDAG-LABEL: v_log_f32_nnan:
4022 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4023 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
4024 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4025 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4026 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4027 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4028 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
4029 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4030 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4031 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4032 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
4033 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
4034 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4035 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4036 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
4037 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4038 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4039 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4040 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4041 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4043 ; SI-GISEL-LABEL: v_log_f32_nnan:
4044 ; SI-GISEL: ; %bb.0:
4045 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4046 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4047 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4048 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4049 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4050 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4051 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
4052 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4053 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
4054 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4055 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4056 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4057 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4058 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4059 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
4060 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4061 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4062 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4063 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4064 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4066 ; VI-SDAG-LABEL: v_log_f32_nnan:
4068 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4069 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
4070 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4071 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4072 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4073 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4074 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
4075 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4076 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4077 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
4078 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
4079 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
4080 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
4081 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
4082 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
4083 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4084 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4085 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
4086 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4087 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4088 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4089 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4090 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4092 ; VI-GISEL-LABEL: v_log_f32_nnan:
4093 ; VI-GISEL: ; %bb.0:
4094 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4095 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4096 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4097 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4098 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4099 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4100 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
4101 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4102 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4103 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
4104 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
4105 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
4106 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
4107 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
4108 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4109 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4110 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4111 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
4112 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4113 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4114 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4115 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4116 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4118 ; GFX900-SDAG-LABEL: v_log_f32_nnan:
4119 ; GFX900-SDAG: ; %bb.0:
4120 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4121 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
4122 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4123 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4124 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4125 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4126 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
4127 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4128 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4129 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4130 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
4131 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
4132 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4133 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4134 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
4135 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4136 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4137 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4138 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4139 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4141 ; GFX900-GISEL-LABEL: v_log_f32_nnan:
4142 ; GFX900-GISEL: ; %bb.0:
4143 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4144 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4145 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4146 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4147 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4148 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4149 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
4150 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4151 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
4152 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4153 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4154 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4155 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4156 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4157 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
4158 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4159 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4160 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4161 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4162 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4164 ; GFX1100-SDAG-LABEL: v_log_f32_nnan:
4165 ; GFX1100-SDAG: ; %bb.0:
4166 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4167 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
4168 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
4169 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4170 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4171 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
4172 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
4173 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4174 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
4175 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4176 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
4177 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4178 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4179 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4180 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
4181 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
4182 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
4183 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4184 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
4186 ; GFX1100-GISEL-LABEL: v_log_f32_nnan:
4187 ; GFX1100-GISEL: ; %bb.0:
4188 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4189 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
4190 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
4191 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4192 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4193 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
4194 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
4195 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4196 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
4197 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4198 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
4199 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4200 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4201 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4202 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
4203 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
4204 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
4205 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4206 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
4208 ; R600-LABEL: v_log_f32_nnan:
4213 ; CM-LABEL: v_log_f32_nnan:
4217 %result = call nnan float @llvm.log.f32(float %in)
4221 define float @v_log_f32_nnan_daz(float %in) #0 {
4222 ; SI-SDAG-LABEL: v_log_f32_nnan_daz:
4224 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4225 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
4226 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4227 ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
4228 ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
4229 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4230 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4231 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
4232 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4233 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
4234 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4235 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4237 ; SI-GISEL-LABEL: v_log_f32_nnan_daz:
4238 ; SI-GISEL: ; %bb.0:
4239 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4240 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
4241 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4242 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
4243 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4244 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
4245 ; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
4246 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
4247 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4248 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
4249 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4250 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4252 ; VI-SDAG-LABEL: v_log_f32_nnan_daz:
4254 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4255 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
4256 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4257 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4258 ; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
4259 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
4260 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
4261 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
4262 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
4263 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4264 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
4265 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4266 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
4267 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4268 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4270 ; VI-GISEL-LABEL: v_log_f32_nnan_daz:
4271 ; VI-GISEL: ; %bb.0:
4272 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4273 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
4274 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4275 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4276 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
4277 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
4278 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
4279 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
4280 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4281 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
4282 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4283 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4284 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
4285 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4286 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4288 ; GFX900-SDAG-LABEL: v_log_f32_nnan_daz:
4289 ; GFX900-SDAG: ; %bb.0:
4290 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4291 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
4292 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4293 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
4294 ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
4295 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4296 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4297 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
4298 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4299 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
4300 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4301 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4303 ; GFX900-GISEL-LABEL: v_log_f32_nnan_daz:
4304 ; GFX900-GISEL: ; %bb.0:
4305 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4306 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
4307 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4308 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
4309 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4310 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
4311 ; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
4312 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
4313 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4314 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
4315 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4316 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4318 ; GFX1100-SDAG-LABEL: v_log_f32_nnan_daz:
4319 ; GFX1100-SDAG: ; %bb.0:
4320 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4321 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
4322 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
4323 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4324 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
4325 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4326 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
4327 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4328 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4329 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4330 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
4331 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
4333 ; GFX1100-GISEL-LABEL: v_log_f32_nnan_daz:
4334 ; GFX1100-GISEL: ; %bb.0:
4335 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4336 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
4337 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
4338 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4339 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
4340 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4341 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
4342 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4343 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4344 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4345 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
4346 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
4348 ; R600-LABEL: v_log_f32_nnan_daz:
4353 ; CM-LABEL: v_log_f32_nnan_daz:
4357 %result = call nnan float @llvm.log.f32(float %in)
4361 define float @v_log_f32_nnan_dynamic(float %in) #1 {
4362 ; SI-SDAG-LABEL: v_log_f32_nnan_dynamic:
4364 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4365 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
4366 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4367 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4368 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4369 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4370 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
4371 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4372 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4373 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4374 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
4375 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
4376 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4377 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4378 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
4379 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4380 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4381 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4382 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4383 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4385 ; SI-GISEL-LABEL: v_log_f32_nnan_dynamic:
4386 ; SI-GISEL: ; %bb.0:
4387 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4388 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4389 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4390 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4391 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4392 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4393 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
4394 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4395 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
4396 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4397 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4398 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4399 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4400 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4401 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
4402 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4403 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4404 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4405 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4406 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4408 ; VI-SDAG-LABEL: v_log_f32_nnan_dynamic:
4410 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4411 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
4412 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4413 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4414 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4415 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4416 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
4417 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4418 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4419 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
4420 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
4421 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
4422 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
4423 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
4424 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
4425 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4426 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4427 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
4428 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4429 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4430 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4431 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4432 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4434 ; VI-GISEL-LABEL: v_log_f32_nnan_dynamic:
4435 ; VI-GISEL: ; %bb.0:
4436 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4437 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4438 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4439 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4440 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4441 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4442 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
4443 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4444 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4445 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
4446 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
4447 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
4448 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
4449 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
4450 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4451 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4452 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4453 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
4454 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4455 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4456 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4457 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4458 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4460 ; GFX900-SDAG-LABEL: v_log_f32_nnan_dynamic:
4461 ; GFX900-SDAG: ; %bb.0:
4462 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4463 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
4464 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4465 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4466 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4467 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4468 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
4469 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4470 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4471 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4472 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
4473 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
4474 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4475 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4476 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
4477 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4478 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4479 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4480 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4481 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4483 ; GFX900-GISEL-LABEL: v_log_f32_nnan_dynamic:
4484 ; GFX900-GISEL: ; %bb.0:
4485 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4486 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4487 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4488 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4489 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4490 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4491 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
4492 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4493 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
4494 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4495 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4496 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4497 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4498 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4499 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
4500 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4501 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4502 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4503 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4504 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4506 ; GFX1100-SDAG-LABEL: v_log_f32_nnan_dynamic:
4507 ; GFX1100-SDAG: ; %bb.0:
4508 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4509 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
4510 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
4511 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4512 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4513 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
4514 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
4515 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4516 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
4517 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4518 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
4519 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4520 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4521 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4522 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
4523 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
4524 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
4525 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4526 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
4528 ; GFX1100-GISEL-LABEL: v_log_f32_nnan_dynamic:
4529 ; GFX1100-GISEL: ; %bb.0:
4530 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4531 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
4532 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
4533 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4534 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4535 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
4536 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
4537 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4538 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
4539 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4540 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
4541 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4542 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4543 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4544 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
4545 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
4546 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
4547 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4548 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
4550 ; R600-LABEL: v_log_f32_nnan_dynamic:
4555 ; CM-LABEL: v_log_f32_nnan_dynamic:
4559 %result = call nnan float @llvm.log.f32(float %in)
4563 define float @v_log_f32_ninf_daz(float %in) #0 {
4564 ; SI-SDAG-LABEL: v_log_f32_ninf_daz:
4566 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4567 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
4568 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4569 ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
4570 ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
4571 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4572 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4573 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
4574 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4575 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
4576 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4577 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4579 ; SI-GISEL-LABEL: v_log_f32_ninf_daz:
4580 ; SI-GISEL: ; %bb.0:
4581 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4582 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
4583 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4584 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
4585 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4586 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
4587 ; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
4588 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
4589 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4590 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
4591 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4592 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4594 ; VI-SDAG-LABEL: v_log_f32_ninf_daz:
4596 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4597 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
4598 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4599 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4600 ; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
4601 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
4602 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
4603 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
4604 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
4605 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4606 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
4607 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4608 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
4609 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4610 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4612 ; VI-GISEL-LABEL: v_log_f32_ninf_daz:
4613 ; VI-GISEL: ; %bb.0:
4614 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4615 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
4616 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4617 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4618 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
4619 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
4620 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
4621 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
4622 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4623 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
4624 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4625 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4626 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
4627 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4628 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4630 ; GFX900-SDAG-LABEL: v_log_f32_ninf_daz:
4631 ; GFX900-SDAG: ; %bb.0:
4632 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4633 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
4634 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4635 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
4636 ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
4637 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4638 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4639 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
4640 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4641 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
4642 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4643 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4645 ; GFX900-GISEL-LABEL: v_log_f32_ninf_daz:
4646 ; GFX900-GISEL: ; %bb.0:
4647 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4648 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
4649 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4650 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
4651 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4652 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
4653 ; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
4654 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
4655 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
4656 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
4657 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
4658 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4660 ; GFX1100-SDAG-LABEL: v_log_f32_ninf_daz:
4661 ; GFX1100-SDAG: ; %bb.0:
4662 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4663 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
4664 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
4665 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4666 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
4667 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4668 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
4669 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4670 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4671 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4672 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
4673 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
4675 ; GFX1100-GISEL-LABEL: v_log_f32_ninf_daz:
4676 ; GFX1100-GISEL: ; %bb.0:
4677 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4678 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
4679 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
4680 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4681 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
4682 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4683 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
4684 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4685 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4686 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4687 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
4688 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
4690 ; R600-LABEL: v_log_f32_ninf_daz:
4695 ; CM-LABEL: v_log_f32_ninf_daz:
4699 %result = call ninf float @llvm.log.f32(float %in)
4703 define float @v_log_f32_ninf_dynamic(float %in) #1 {
4704 ; SI-SDAG-LABEL: v_log_f32_ninf_dynamic:
4706 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4707 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
4708 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4709 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4710 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4711 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4712 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
4713 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4714 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4715 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4716 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
4717 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
4718 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4719 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4720 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
4721 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4722 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4723 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4724 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4725 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4727 ; SI-GISEL-LABEL: v_log_f32_ninf_dynamic:
4728 ; SI-GISEL: ; %bb.0:
4729 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4730 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4731 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4732 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4733 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4734 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4735 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
4736 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4737 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
4738 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4739 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4740 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4741 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4742 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4743 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
4744 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4745 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4746 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4747 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4748 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4750 ; VI-SDAG-LABEL: v_log_f32_ninf_dynamic:
4752 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4753 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
4754 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4755 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4756 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4757 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4758 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
4759 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4760 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4761 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
4762 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
4763 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
4764 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
4765 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
4766 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
4767 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4768 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4769 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
4770 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4771 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4772 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4773 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4774 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4776 ; VI-GISEL-LABEL: v_log_f32_ninf_dynamic:
4777 ; VI-GISEL: ; %bb.0:
4778 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4779 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4780 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4781 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4782 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4783 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4784 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
4785 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4786 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
4787 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
4788 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
4789 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
4790 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
4791 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
4792 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4793 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4794 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4795 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
4796 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4797 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4798 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4799 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4800 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4802 ; GFX900-SDAG-LABEL: v_log_f32_ninf_dynamic:
4803 ; GFX900-SDAG: ; %bb.0:
4804 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4805 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
4806 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4807 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4808 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4809 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4810 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
4811 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4812 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4813 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4814 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
4815 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
4816 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
4817 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4818 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
4819 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4820 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4821 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4822 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4823 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4825 ; GFX900-GISEL-LABEL: v_log_f32_ninf_dynamic:
4826 ; GFX900-GISEL: ; %bb.0:
4827 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4828 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4829 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4830 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4831 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4832 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4833 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
4834 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4835 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
4836 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4837 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4838 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
4839 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4840 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
4841 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
4842 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
4843 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4844 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4845 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4846 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4848 ; GFX1100-SDAG-LABEL: v_log_f32_ninf_dynamic:
4849 ; GFX1100-SDAG: ; %bb.0:
4850 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4851 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
4852 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
4853 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4854 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4855 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
4856 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
4857 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4858 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
4859 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4860 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
4861 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4862 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4863 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
4864 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
4865 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
4866 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
4867 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4868 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
4870 ; GFX1100-GISEL-LABEL: v_log_f32_ninf_dynamic:
4871 ; GFX1100-GISEL: ; %bb.0:
4872 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4873 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
4874 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
4875 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4876 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4877 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
4878 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
4879 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4880 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
4881 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4882 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
4883 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
4884 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4885 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
4886 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
4887 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
4888 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
4889 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4890 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
4892 ; R600-LABEL: v_log_f32_ninf_dynamic:
4897 ; CM-LABEL: v_log_f32_ninf_dynamic:
4901 %result = call ninf float @llvm.log.f32(float %in)
4905 define float @v_log_f32_nnan_ninf(float %in) {
4906 ; SI-SDAG-LABEL: v_log_f32_nnan_ninf:
4908 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4909 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
4910 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4911 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4912 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4913 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4914 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
4915 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
4916 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4917 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
4918 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
4919 ; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
4920 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
4921 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4922 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4923 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4924 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
4926 ; SI-GISEL-LABEL: v_log_f32_nnan_ninf:
4927 ; SI-GISEL: ; %bb.0:
4928 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4929 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4930 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4931 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4932 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4933 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4934 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
4935 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
4936 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
4937 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
4938 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
4939 ; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
4940 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
4941 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4942 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4943 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4944 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
4946 ; VI-SDAG-LABEL: v_log_f32_nnan_ninf:
4948 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4949 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
4950 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4951 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4952 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4953 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
4954 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
4955 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4956 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4957 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v0
4958 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3805fdf4, v0
4959 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
4960 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v3, v0
4961 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
4962 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4963 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
4964 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
4965 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4966 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
4967 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
4969 ; VI-GISEL-LABEL: v_log_f32_nnan_ninf:
4970 ; VI-GISEL: ; %bb.0:
4971 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4972 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
4973 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
4974 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
4975 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
4976 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
4977 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
4978 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
4979 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4980 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0
4981 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
4982 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
4983 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0
4984 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
4985 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
4986 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
4987 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
4988 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
4989 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
4990 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
4992 ; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf:
4993 ; GFX900-SDAG: ; %bb.0:
4994 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4995 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
4996 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
4997 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
4998 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
4999 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
5000 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
5001 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5002 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5003 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5004 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
5005 ; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
5006 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
5007 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
5008 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5009 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5010 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5012 ; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf:
5013 ; GFX900-GISEL: ; %bb.0:
5014 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5015 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5016 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
5017 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
5018 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
5019 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
5020 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
5021 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5022 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5023 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5024 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5025 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
5026 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
5027 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5028 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5029 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5030 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5032 ; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf:
5033 ; GFX1100-SDAG: ; %bb.0:
5034 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5035 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
5036 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
5037 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5038 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
5039 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
5040 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
5041 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5042 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5043 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
5044 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5045 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5046 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
5047 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
5048 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5049 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
5051 ; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf:
5052 ; GFX1100-GISEL: ; %bb.0:
5053 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5054 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
5055 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
5056 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5057 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
5058 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
5059 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
5060 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5061 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5062 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
5063 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5064 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5065 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
5066 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
5067 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5068 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
5070 ; R600-LABEL: v_log_f32_nnan_ninf:
5075 ; CM-LABEL: v_log_f32_nnan_ninf:
5079 %result = call nnan ninf float @llvm.log.f32(float %in)
5083 define float @v_log_f32_nnan_ninf_daz(float %in) #0 {
5084 ; SI-SDAG-LABEL: v_log_f32_nnan_ninf_daz:
5086 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5087 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
5088 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5089 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5090 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5091 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
5092 ; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
5093 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
5094 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5096 ; SI-GISEL-LABEL: v_log_f32_nnan_ninf_daz:
5097 ; SI-GISEL: ; %bb.0:
5098 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5099 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
5100 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5101 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5102 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5103 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5104 ; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
5105 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
5106 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5108 ; VI-LABEL: v_log_f32_nnan_ninf_daz:
5110 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5111 ; VI-NEXT: v_log_f32_e32 v0, v0
5112 ; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5113 ; VI-NEXT: v_sub_f32_e32 v0, v0, v1
5114 ; VI-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
5115 ; VI-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v0
5116 ; VI-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0
5117 ; VI-NEXT: v_add_f32_e32 v2, v2, v3
5118 ; VI-NEXT: v_add_f32_e32 v0, v0, v2
5119 ; VI-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5120 ; VI-NEXT: v_add_f32_e32 v0, v1, v0
5121 ; VI-NEXT: s_setpc_b64 s[30:31]
5123 ; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf_daz:
5124 ; GFX900-SDAG: ; %bb.0:
5125 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5126 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
5127 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5128 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5129 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5130 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
5131 ; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
5132 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
5133 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5135 ; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf_daz:
5136 ; GFX900-GISEL: ; %bb.0:
5137 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5138 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
5139 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5140 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5141 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5142 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5143 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
5144 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
5145 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5147 ; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_daz:
5148 ; GFX1100-SDAG: ; %bb.0:
5149 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5150 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
5151 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
5152 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5153 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5154 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
5155 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5156 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
5157 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
5158 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
5160 ; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf_daz:
5161 ; GFX1100-GISEL: ; %bb.0:
5162 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5163 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
5164 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
5165 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5166 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5167 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
5168 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5169 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
5170 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
5171 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
5173 ; R600-LABEL: v_log_f32_nnan_ninf_daz:
5178 ; CM-LABEL: v_log_f32_nnan_ninf_daz:
5182 %result = call nnan ninf float @llvm.log.f32(float %in)
5186 define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
5187 ; SI-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
5189 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5190 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
5191 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
5192 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
5193 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
5194 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
5195 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
5196 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5197 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5198 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5199 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
5200 ; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
5201 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
5202 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
5203 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5204 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5205 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5207 ; SI-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
5208 ; SI-GISEL: ; %bb.0:
5209 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5210 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5211 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
5212 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
5213 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
5214 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
5215 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
5216 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5218 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5219 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5220 ; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
5221 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
5222 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5223 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5224 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5225 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5227 ; VI-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
5229 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5230 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
5231 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
5232 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
5233 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
5234 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
5235 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
5236 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5237 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5238 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v0
5239 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3805fdf4, v0
5240 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
5241 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v3, v0
5242 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
5243 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5244 ; VI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
5245 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
5246 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5247 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5248 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5250 ; VI-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
5251 ; VI-GISEL: ; %bb.0:
5252 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5253 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5254 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
5255 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
5256 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
5257 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
5258 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
5259 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5260 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5261 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0
5262 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
5263 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
5264 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0
5265 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
5266 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5267 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
5268 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5269 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5270 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5271 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5273 ; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
5274 ; GFX900-SDAG: ; %bb.0:
5275 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5276 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
5277 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
5278 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
5279 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
5280 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
5281 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
5282 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5283 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5284 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5285 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
5286 ; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
5287 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
5288 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
5289 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5290 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5291 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5293 ; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
5294 ; GFX900-GISEL: ; %bb.0:
5295 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5296 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5297 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
5298 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
5299 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
5300 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
5301 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
5302 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5303 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5304 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5305 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5306 ; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
5307 ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
5308 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5309 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5310 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5311 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5313 ; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
5314 ; GFX1100-SDAG: ; %bb.0:
5315 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5316 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
5317 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
5318 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5319 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
5320 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
5321 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
5322 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5323 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5324 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
5325 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5326 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5327 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
5328 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
5329 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5330 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
5332 ; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
5333 ; GFX1100-GISEL: ; %bb.0:
5334 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5335 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
5336 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
5337 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5338 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
5339 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
5340 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
5341 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5342 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5343 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
5344 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5345 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5346 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
5347 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
5348 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5349 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
5351 ; R600-LABEL: v_log_f32_nnan_ninf_dynamic:
5356 ; CM-LABEL: v_log_f32_nnan_ninf_dynamic:
5360 %result = call nnan ninf float @llvm.log.f32(float %in)
5364 define float @v_log_f32_fast_daz(float %in) #0 {
5365 ; GFX689-LABEL: v_log_f32_fast_daz:
5367 ; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5368 ; GFX689-NEXT: v_log_f32_e32 v0, v0
5369 ; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
5370 ; GFX689-NEXT: s_setpc_b64 s[30:31]
5372 ; GFX1100-LABEL: v_log_f32_fast_daz:
5374 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5375 ; GFX1100-NEXT: v_log_f32_e32 v0, v0
5376 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
5377 ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
5378 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
5380 ; R600-LABEL: v_log_f32_fast_daz:
5385 ; CM-LABEL: v_log_f32_fast_daz:
5389 %result = call fast float @llvm.log.f32(float %in)
5393 define float @v_log_f32_dynamic_mode(float %in) #1 {
5394 ; SI-SDAG-LABEL: v_log_f32_dynamic_mode:
5396 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5397 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
5398 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
5399 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
5400 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
5401 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
5402 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
5403 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5404 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5405 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5406 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
5407 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
5408 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
5409 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5410 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
5411 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5412 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
5413 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5414 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5415 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5417 ; SI-GISEL-LABEL: v_log_f32_dynamic_mode:
5418 ; SI-GISEL: ; %bb.0:
5419 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5420 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5421 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
5422 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
5423 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
5424 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
5425 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
5426 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5427 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5428 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5429 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5430 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5431 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5432 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5433 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5434 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5435 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5436 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5437 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5438 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5440 ; VI-SDAG-LABEL: v_log_f32_dynamic_mode:
5442 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5443 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
5444 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
5445 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
5446 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
5447 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
5448 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
5449 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
5450 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5451 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
5452 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
5453 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
5454 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
5455 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
5456 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
5457 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5458 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5459 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
5460 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5461 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
5462 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5463 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5464 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5466 ; VI-GISEL-LABEL: v_log_f32_dynamic_mode:
5467 ; VI-GISEL: ; %bb.0:
5468 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5469 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5470 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
5471 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
5472 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
5473 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
5474 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
5475 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5476 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5477 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
5478 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
5479 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
5480 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
5481 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
5482 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5483 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5484 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5485 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5486 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5487 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5488 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5489 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5490 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5492 ; GFX900-SDAG-LABEL: v_log_f32_dynamic_mode:
5493 ; GFX900-SDAG: ; %bb.0:
5494 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5495 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
5496 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
5497 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
5498 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
5499 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
5500 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
5501 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5502 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5503 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5504 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
5505 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
5506 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
5507 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5508 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
5509 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5510 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
5511 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5512 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5513 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5515 ; GFX900-GISEL-LABEL: v_log_f32_dynamic_mode:
5516 ; GFX900-GISEL: ; %bb.0:
5517 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5518 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5519 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
5520 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
5521 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
5522 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
5523 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
5524 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5525 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5526 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5527 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5528 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5529 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5530 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5531 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5532 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5533 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5534 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5535 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5536 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5538 ; GFX1100-SDAG-LABEL: v_log_f32_dynamic_mode:
5539 ; GFX1100-SDAG: ; %bb.0:
5540 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5541 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
5542 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
5543 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5544 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
5545 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
5546 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
5547 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5548 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
5549 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5550 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
5551 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5552 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5553 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5554 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
5555 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
5556 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
5557 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
5558 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
5560 ; GFX1100-GISEL-LABEL: v_log_f32_dynamic_mode:
5561 ; GFX1100-GISEL: ; %bb.0:
5562 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5563 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
5564 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
5565 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5566 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
5567 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
5568 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
5569 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5570 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
5571 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5572 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
5573 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5574 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5575 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5576 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
5577 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
5578 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
5579 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5580 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
5582 ; R600-LABEL: v_log_f32_dynamic_mode:
5587 ; CM-LABEL: v_log_f32_dynamic_mode:
5591 %result = call float @llvm.log.f32(float %in)
5595 define float @v_log_f32_undef() {
5596 ; SI-SDAG-LABEL: v_log_f32_undef:
5598 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5599 ; SI-SDAG-NEXT: v_log_f32_e32 v0, s4
5600 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5601 ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
5602 ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
5603 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5604 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5605 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
5606 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5607 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
5608 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5609 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5611 ; SI-GISEL-LABEL: v_log_f32_undef:
5612 ; SI-GISEL: ; %bb.0:
5613 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5614 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
5615 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
5616 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
5617 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
5618 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5619 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
5620 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5621 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5622 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5623 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5624 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5625 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5626 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5627 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5628 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5629 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5630 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5631 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5632 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5634 ; VI-SDAG-LABEL: v_log_f32_undef:
5636 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5637 ; VI-SDAG-NEXT: v_log_f32_e32 v0, s4
5638 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
5639 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5640 ; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
5641 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
5642 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
5643 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
5644 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
5645 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5646 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
5647 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5648 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
5649 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5650 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5652 ; VI-GISEL-LABEL: v_log_f32_undef:
5653 ; VI-GISEL: ; %bb.0:
5654 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5655 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
5656 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
5657 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
5658 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
5659 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5660 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
5661 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5662 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5663 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
5664 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
5665 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
5666 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
5667 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
5668 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5669 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5670 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5671 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5672 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5673 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5674 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5675 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5676 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5678 ; GFX900-SDAG-LABEL: v_log_f32_undef:
5679 ; GFX900-SDAG: ; %bb.0:
5680 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5681 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, s4
5682 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5683 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
5684 ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
5685 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5686 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5687 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
5688 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5689 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
5690 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5691 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5693 ; GFX900-GISEL-LABEL: v_log_f32_undef:
5694 ; GFX900-GISEL: ; %bb.0:
5695 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5696 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
5697 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
5698 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
5699 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
5700 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5701 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
5702 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5703 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
5704 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5705 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
5706 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
5707 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5708 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5709 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
5710 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
5711 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
5712 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5713 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5714 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5716 ; GFX1100-SDAG-LABEL: v_log_f32_undef:
5717 ; GFX1100-SDAG: ; %bb.0:
5718 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5719 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s0
5720 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
5721 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5722 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
5723 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5724 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
5725 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5726 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5727 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5728 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
5729 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
5731 ; GFX1100-GISEL-LABEL: v_log_f32_undef:
5732 ; GFX1100-GISEL: ; %bb.0:
5733 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5734 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0
5735 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0
5736 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5737 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo
5738 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
5739 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
5740 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5741 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
5742 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5743 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
5744 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5745 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5746 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5747 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
5748 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
5749 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
5750 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5751 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
5753 ; R600-LABEL: v_log_f32_undef:
5758 ; CM-LABEL: v_log_f32_undef:
5762 %result = call float @llvm.log.f32(float undef)
5766 define float @v_log_f32_0() {
5767 ; SI-SDAG-LABEL: v_log_f32_0:
5769 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5770 ; SI-SDAG-NEXT: v_log_f32_e32 v0, 0
5771 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5772 ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
5773 ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
5774 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5775 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5776 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
5777 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5778 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
5779 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5780 ; SI-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0
5781 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5783 ; SI-GISEL-LABEL: v_log_f32_0:
5784 ; SI-GISEL: ; %bb.0:
5785 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5786 ; SI-GISEL-NEXT: v_log_f32_e32 v0, 0
5787 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5788 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
5789 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5790 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
5791 ; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
5792 ; SI-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
5793 ; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
5794 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5795 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
5796 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5797 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
5798 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
5799 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
5800 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5801 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5803 ; VI-SDAG-LABEL: v_log_f32_0:
5805 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5806 ; VI-SDAG-NEXT: v_log_f32_e32 v0, 0
5807 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
5808 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5809 ; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
5810 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
5811 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
5812 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
5813 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
5814 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5815 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
5816 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5817 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
5818 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5819 ; VI-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0
5820 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5822 ; VI-GISEL-LABEL: v_log_f32_0:
5823 ; VI-GISEL: ; %bb.0:
5824 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5825 ; VI-GISEL-NEXT: v_log_f32_e32 v0, 0
5826 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5827 ; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
5828 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
5829 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
5830 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
5831 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
5832 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
5833 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
5834 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
5835 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
5836 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5837 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
5838 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5839 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
5840 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
5841 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
5842 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5843 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5845 ; GFX900-SDAG-LABEL: v_log_f32_0:
5846 ; GFX900-SDAG: ; %bb.0:
5847 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5848 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, 0
5849 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5850 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
5851 ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
5852 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5853 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5854 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
5855 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5856 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
5857 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5858 ; GFX900-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0
5859 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5861 ; GFX900-GISEL-LABEL: v_log_f32_0:
5862 ; GFX900-GISEL: ; %bb.0:
5863 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5864 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, 0
5865 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5866 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
5867 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
5868 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
5869 ; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
5870 ; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
5871 ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
5872 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5873 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
5874 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5875 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
5876 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
5877 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
5878 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5879 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5881 ; GFX1100-SDAG-LABEL: v_log_f32_0:
5882 ; GFX1100-SDAG: ; %bb.0:
5883 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5884 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0
5885 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
5886 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5887 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
5888 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5889 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
5890 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5891 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5892 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5893 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
5894 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
5895 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0
5896 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
5898 ; GFX1100-GISEL-LABEL: v_log_f32_0:
5899 ; GFX1100-GISEL: ; %bb.0:
5900 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5901 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, 0
5902 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, 0
5903 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
5904 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5905 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
5906 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5907 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
5908 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
5909 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5910 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5911 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
5912 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
5913 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
5914 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
5915 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
5917 ; R600-LABEL: v_log_f32_0:
5922 ; CM-LABEL: v_log_f32_0:
5926 %result = call float @llvm.log.f32(float 0.0)
5930 define float @v_log_f32_from_fpext_f16(i16 %src.i) {
5931 ; SI-SDAG-LABEL: v_log_f32_from_fpext_f16:
5933 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5934 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5935 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
5936 ; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
5937 ; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
5938 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
5939 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
5940 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
5941 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
5942 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5943 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
5944 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5945 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5947 ; SI-GISEL-LABEL: v_log_f32_from_fpext_f16:
5948 ; SI-GISEL: ; %bb.0:
5949 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5950 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5951 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
5952 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
5953 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
5954 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
5955 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
5956 ; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
5957 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
5958 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5959 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
5960 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5961 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5963 ; VI-SDAG-LABEL: v_log_f32_from_fpext_f16:
5965 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5966 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
5967 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
5968 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
5969 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5970 ; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
5971 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
5972 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
5973 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
5974 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
5975 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
5976 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
5977 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
5978 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
5979 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
5980 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5982 ; VI-GISEL-LABEL: v_log_f32_from_fpext_f16:
5983 ; VI-GISEL: ; %bb.0:
5984 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5985 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5986 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
5987 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
5988 ; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
5989 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
5990 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
5991 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
5992 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
5993 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
5994 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
5995 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
5996 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
5997 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
5998 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5999 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6001 ; GFX900-SDAG-LABEL: v_log_f32_from_fpext_f16:
6002 ; GFX900-SDAG: ; %bb.0:
6003 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6004 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6005 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
6006 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
6007 ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
6008 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
6009 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6010 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
6011 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
6012 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6013 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
6014 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
6015 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6017 ; GFX900-GISEL-LABEL: v_log_f32_from_fpext_f16:
6018 ; GFX900-GISEL: ; %bb.0:
6019 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6020 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6021 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
6022 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
6023 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
6024 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
6025 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
6026 ; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
6027 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
6028 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
6029 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
6030 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
6031 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6033 ; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_f16:
6034 ; GFX1100-SDAG: ; %bb.0:
6035 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6036 ; GFX1100-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6037 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
6038 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
6039 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
6040 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6041 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
6042 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
6043 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6044 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
6045 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6046 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
6047 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
6048 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
6050 ; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_f16:
6051 ; GFX1100-GISEL: ; %bb.0:
6052 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6053 ; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6054 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
6055 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
6056 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
6057 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6058 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
6059 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
6060 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6061 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
6062 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
6063 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
6064 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
6065 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
6067 ; R600-LABEL: v_log_f32_from_fpext_f16:
6072 ; CM-LABEL: v_log_f32_from_fpext_f16:
6076 %src = bitcast i16 %src.i to half
6077 %fpext = fpext half %src to float
6078 %result = call float @llvm.log.f32(float %fpext)
6082 define float @v_log_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
6083 ; SI-SDAG-LABEL: v_log_f32_from_fpext_math_f16:
6085 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6086 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6087 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6088 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
6089 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
6090 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
6091 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
6092 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
6093 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
6094 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
6095 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
6096 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6097 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
6098 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
6099 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
6100 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
6101 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6102 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
6103 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
6104 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
6105 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
6106 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
6107 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6109 ; SI-GISEL-LABEL: v_log_f32_from_fpext_math_f16:
6110 ; SI-GISEL: ; %bb.0:
6111 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6112 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6113 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6114 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
6115 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
6116 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
6117 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6118 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
6119 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6120 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
6121 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
6122 ; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
6123 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
6124 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
6125 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
6126 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
6127 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6129 ; VI-SDAG-LABEL: v_log_f32_from_fpext_math_f16:
6131 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6132 ; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
6133 ; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6134 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
6135 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
6136 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
6137 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
6138 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
6139 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v2
6140 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
6141 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
6142 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
6143 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
6144 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6145 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
6146 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
6147 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6149 ; VI-GISEL-LABEL: v_log_f32_from_fpext_math_f16:
6150 ; VI-GISEL: ; %bb.0:
6151 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6152 ; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
6153 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6154 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
6155 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
6156 ; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
6157 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
6158 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
6159 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
6160 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
6161 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
6162 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
6163 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
6164 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
6165 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
6166 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6167 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6169 ; GFX900-SDAG-LABEL: v_log_f32_from_fpext_math_f16:
6170 ; GFX900-SDAG: ; %bb.0:
6171 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6172 ; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
6173 ; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6174 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
6175 ; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
6176 ; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
6177 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
6178 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6179 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
6180 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
6181 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6182 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
6183 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
6184 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6186 ; GFX900-GISEL-LABEL: v_log_f32_from_fpext_math_f16:
6187 ; GFX900-GISEL: ; %bb.0:
6188 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6189 ; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
6190 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6191 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
6192 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
6193 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
6194 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
6195 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
6196 ; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
6197 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
6198 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
6199 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
6200 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
6201 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6203 ; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_math_f16:
6204 ; GFX1100-SDAG: ; %bb.0:
6205 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6206 ; GFX1100-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
6207 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6208 ; GFX1100-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6209 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
6210 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
6211 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6212 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
6213 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
6214 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
6215 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
6216 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6217 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6218 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
6219 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
6221 ; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_math_f16:
6222 ; GFX1100-GISEL: ; %bb.0:
6223 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6224 ; GFX1100-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
6225 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6226 ; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6227 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
6228 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
6229 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6230 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
6231 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
6232 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
6233 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
6234 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6235 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
6236 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
6237 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
6239 ; R600-LABEL: v_log_f32_from_fpext_math_f16:
6244 ; CM-LABEL: v_log_f32_from_fpext_math_f16:
6248 %src0 = bitcast i16 %src0.i to half
6249 %src1 = bitcast i16 %src1.i to half
6250 %fadd = fadd half %src0, %src1
6251 %fpext = fpext half %fadd to float
6252 %result = call float @llvm.log.f32(float %fpext)
6256 define float @v_log_f32_from_fpext_bf16(bfloat %src) {
6257 ; SI-SDAG-LABEL: v_log_f32_from_fpext_bf16:
6259 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6260 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
6261 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
6262 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
6263 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
6264 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
6265 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
6266 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
6267 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6268 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
6269 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
6270 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
6271 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
6272 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6273 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
6274 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
6275 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
6276 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
6277 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
6278 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6280 ; SI-GISEL-LABEL: v_log_f32_from_fpext_bf16:
6281 ; SI-GISEL: ; %bb.0:
6282 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6283 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6284 ; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
6285 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
6286 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
6287 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
6288 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
6289 ; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
6290 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
6291 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
6292 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
6293 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
6294 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6296 ; VI-SDAG-LABEL: v_log_f32_from_fpext_bf16:
6298 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6299 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
6300 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
6301 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
6302 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
6303 ; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
6304 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
6305 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
6306 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
6307 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
6308 ; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
6309 ; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
6310 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
6311 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
6312 ; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
6313 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
6314 ; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6315 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
6316 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
6317 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
6318 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
6319 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
6320 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6322 ; VI-GISEL-LABEL: v_log_f32_from_fpext_bf16:
6323 ; VI-GISEL: ; %bb.0:
6324 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6325 ; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6326 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
6327 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
6328 ; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
6329 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
6330 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
6331 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
6332 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
6333 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
6334 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
6335 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
6336 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
6337 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
6338 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6339 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6341 ; GFX900-SDAG-LABEL: v_log_f32_from_fpext_bf16:
6342 ; GFX900-SDAG: ; %bb.0:
6343 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6344 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
6345 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
6346 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
6347 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
6348 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
6349 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
6350 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
6351 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6352 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
6353 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
6354 ; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
6355 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
6356 ; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6357 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
6358 ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
6359 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
6360 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
6361 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
6362 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6364 ; GFX900-GISEL-LABEL: v_log_f32_from_fpext_bf16:
6365 ; GFX900-GISEL: ; %bb.0:
6366 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6367 ; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6368 ; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
6369 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
6370 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
6371 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
6372 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
6373 ; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
6374 ; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
6375 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
6376 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
6377 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
6378 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6380 ; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_bf16:
6381 ; GFX1100-SDAG: ; %bb.0:
6382 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6383 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
6384 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
6385 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6386 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
6387 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
6388 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
6389 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6390 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
6391 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
6392 ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
6393 ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
6394 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6395 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
6396 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
6397 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
6398 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
6399 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
6400 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
6402 ; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_bf16:
6403 ; GFX1100-GISEL: ; %bb.0:
6404 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6405 ; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6406 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
6407 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
6408 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
6409 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
6410 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
6411 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
6412 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6413 ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
6414 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
6415 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
6416 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
6417 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
6419 ; R600-LABEL: v_log_f32_from_fpext_bf16:
6424 ; CM-LABEL: v_log_f32_from_fpext_bf16:
6428 %fpext = fpext bfloat %src to float
6429 %result = call float @llvm.log.f32(float %fpext)
6433 define half @v_log_f16(half %in) {
6434 ; SI-SDAG-LABEL: v_log_f16:
6436 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6437 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6438 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6439 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
6440 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6441 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6442 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6443 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6445 ; SI-GISEL-LABEL: v_log_f16:
6446 ; SI-GISEL: ; %bb.0:
6447 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6448 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6449 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
6450 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6451 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6452 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6454 ; VI-LABEL: v_log_f16:
6456 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6457 ; VI-NEXT: v_log_f16_e32 v0, v0
6458 ; VI-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6459 ; VI-NEXT: s_setpc_b64 s[30:31]
6461 ; GFX900-LABEL: v_log_f16:
6463 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6464 ; GFX900-NEXT: v_log_f16_e32 v0, v0
6465 ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6466 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6468 ; GFX1100-LABEL: v_log_f16:
6470 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6471 ; GFX1100-NEXT: v_log_f16_e32 v0, v0
6472 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
6473 ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6474 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
6476 ; R600-LABEL: v_log_f16:
6481 ; CM-LABEL: v_log_f16:
6485 %result = call half @llvm.log.f16(half %in)
6489 define half @v_log_fabs_f16(half %in) {
6490 ; SI-SDAG-LABEL: v_log_fabs_f16:
6492 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6493 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6494 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6495 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
6496 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6497 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6498 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6499 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6501 ; SI-GISEL-LABEL: v_log_fabs_f16:
6502 ; SI-GISEL: ; %bb.0:
6503 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6504 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6505 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
6506 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6507 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6508 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6510 ; VI-LABEL: v_log_fabs_f16:
6512 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6513 ; VI-NEXT: v_log_f16_e64 v0, |v0|
6514 ; VI-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6515 ; VI-NEXT: s_setpc_b64 s[30:31]
6517 ; GFX900-LABEL: v_log_fabs_f16:
6519 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6520 ; GFX900-NEXT: v_log_f16_e64 v0, |v0|
6521 ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6522 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6524 ; GFX1100-LABEL: v_log_fabs_f16:
6526 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6527 ; GFX1100-NEXT: v_log_f16_e64 v0, |v0|
6528 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
6529 ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6530 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
6532 ; R600-LABEL: v_log_fabs_f16:
6537 ; CM-LABEL: v_log_fabs_f16:
6541 %fabs = call half @llvm.fabs.f16(half %in)
6542 %result = call half @llvm.log.f16(half %fabs)
6546 define half @v_log_fneg_fabs_f16(half %in) {
6547 ; SI-SDAG-LABEL: v_log_fneg_fabs_f16:
6549 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6550 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6551 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
6552 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
6553 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6554 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6555 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6556 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6558 ; SI-GISEL-LABEL: v_log_fneg_fabs_f16:
6559 ; SI-GISEL: ; %bb.0:
6560 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6561 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
6562 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
6563 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6564 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6565 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6567 ; VI-LABEL: v_log_fneg_fabs_f16:
6569 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6570 ; VI-NEXT: v_log_f16_e64 v0, -|v0|
6571 ; VI-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6572 ; VI-NEXT: s_setpc_b64 s[30:31]
6574 ; GFX900-LABEL: v_log_fneg_fabs_f16:
6576 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6577 ; GFX900-NEXT: v_log_f16_e64 v0, -|v0|
6578 ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6579 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6581 ; GFX1100-LABEL: v_log_fneg_fabs_f16:
6583 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6584 ; GFX1100-NEXT: v_log_f16_e64 v0, -|v0|
6585 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
6586 ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6587 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
6589 ; R600-LABEL: v_log_fneg_fabs_f16:
6594 ; CM-LABEL: v_log_fneg_fabs_f16:
6598 %fabs = call half @llvm.fabs.f16(half %in)
6599 %fneg.fabs = fneg half %fabs
6600 %result = call half @llvm.log.f16(half %fneg.fabs)
6604 define half @v_log_fneg_f16(half %in) {
6605 ; SI-SDAG-LABEL: v_log_fneg_f16:
6607 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6608 ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
6609 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6610 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
6611 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6612 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6613 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6614 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6616 ; SI-GISEL-LABEL: v_log_fneg_f16:
6617 ; SI-GISEL: ; %bb.0:
6618 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6619 ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
6620 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
6621 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6622 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6623 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6625 ; VI-LABEL: v_log_fneg_f16:
6627 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6628 ; VI-NEXT: v_log_f16_e64 v0, -v0
6629 ; VI-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6630 ; VI-NEXT: s_setpc_b64 s[30:31]
6632 ; GFX900-LABEL: v_log_fneg_f16:
6634 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6635 ; GFX900-NEXT: v_log_f16_e64 v0, -v0
6636 ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6637 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6639 ; GFX1100-LABEL: v_log_fneg_f16:
6641 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6642 ; GFX1100-NEXT: v_log_f16_e64 v0, -v0
6643 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
6644 ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6645 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
6647 ; R600-LABEL: v_log_fneg_f16:
6652 ; CM-LABEL: v_log_fneg_f16:
6656 %fneg = fneg half %in
6657 %result = call half @llvm.log.f16(half %fneg)
6661 define half @v_log_f16_fast(half %in) {
6662 ; SI-SDAG-LABEL: v_log_f16_fast:
6664 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6665 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6666 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6667 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
6668 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6669 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6670 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6671 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6673 ; SI-GISEL-LABEL: v_log_f16_fast:
6674 ; SI-GISEL: ; %bb.0:
6675 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6676 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6677 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
6678 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6679 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6680 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6682 ; VI-LABEL: v_log_f16_fast:
6684 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6685 ; VI-NEXT: v_log_f16_e32 v0, v0
6686 ; VI-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6687 ; VI-NEXT: s_setpc_b64 s[30:31]
6689 ; GFX900-LABEL: v_log_f16_fast:
6691 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6692 ; GFX900-NEXT: v_log_f16_e32 v0, v0
6693 ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6694 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6696 ; GFX1100-LABEL: v_log_f16_fast:
6698 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6699 ; GFX1100-NEXT: v_log_f16_e32 v0, v0
6700 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
6701 ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6702 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
6704 ; R600-LABEL: v_log_f16_fast:
6709 ; CM-LABEL: v_log_f16_fast:
6713 %result = call fast half @llvm.log.f16(half %in)
6717 define <2 x half> @v_log_v2f16(<2 x half> %in) {
6718 ; SI-SDAG-LABEL: v_log_v2f16:
6720 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6721 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6722 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6723 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6724 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6725 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
6726 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
6727 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6728 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
6729 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6730 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6731 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6732 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6733 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6735 ; SI-GISEL-LABEL: v_log_v2f16:
6736 ; SI-GISEL: ; %bb.0:
6737 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6738 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6739 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
6740 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
6741 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
6742 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6743 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
6744 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6745 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6746 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6748 ; VI-SDAG-LABEL: v_log_v2f16:
6750 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6751 ; VI-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6752 ; VI-SDAG-NEXT: v_log_f16_e32 v0, v0
6753 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x398c
6754 ; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
6755 ; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6756 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6757 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6759 ; VI-GISEL-LABEL: v_log_v2f16:
6760 ; VI-GISEL: ; %bb.0:
6761 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6762 ; VI-GISEL-NEXT: v_log_f16_e32 v1, v0
6763 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6764 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x398c
6765 ; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
6766 ; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
6767 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6768 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6770 ; GFX900-LABEL: v_log_v2f16:
6772 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6773 ; GFX900-NEXT: v_log_f16_e32 v1, v0
6774 ; GFX900-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6775 ; GFX900-NEXT: v_mul_f16_e32 v1, 0x398c, v1
6776 ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6777 ; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0
6778 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6780 ; GFX1100-LABEL: v_log_v2f16:
6782 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6783 ; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0
6784 ; GFX1100-NEXT: v_log_f16_e32 v0, v0
6785 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
6786 ; GFX1100-NEXT: v_log_f16_e32 v1, v1
6787 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
6788 ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6789 ; GFX1100-NEXT: v_mul_f16_e32 v1, 0x398c, v1
6790 ; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1
6791 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
6793 ; R600-LABEL: v_log_v2f16:
6798 ; CM-LABEL: v_log_v2f16:
6802 %result = call <2 x half> @llvm.log.v2f16(<2 x half> %in)
6803 ret <2 x half> %result
6806 define <2 x half> @v_log_fabs_v2f16(<2 x half> %in) {
6807 ; SI-SDAG-LABEL: v_log_fabs_v2f16:
6809 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6810 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6811 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6812 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
6813 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
6814 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
6815 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
6816 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6817 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
6818 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6819 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6820 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6821 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
6822 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6824 ; SI-GISEL-LABEL: v_log_fabs_v2f16:
6825 ; SI-GISEL: ; %bb.0:
6826 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6827 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6828 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6829 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6830 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
6831 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6832 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6833 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6834 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
6835 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
6836 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v1
6837 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6838 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
6839 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6840 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6842 ; VI-SDAG-LABEL: v_log_fabs_v2f16:
6844 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6845 ; VI-SDAG-NEXT: v_log_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6846 ; VI-SDAG-NEXT: v_log_f16_e64 v0, |v0|
6847 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x398c
6848 ; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
6849 ; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6850 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6851 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6853 ; VI-GISEL-LABEL: v_log_fabs_v2f16:
6854 ; VI-GISEL: ; %bb.0:
6855 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6856 ; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
6857 ; VI-GISEL-NEXT: v_log_f16_e32 v1, v0
6858 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6859 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x398c
6860 ; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
6861 ; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
6862 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6863 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6865 ; GFX900-SDAG-LABEL: v_log_fabs_v2f16:
6866 ; GFX900-SDAG: ; %bb.0:
6867 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6868 ; GFX900-SDAG-NEXT: v_log_f16_e64 v1, |v0|
6869 ; GFX900-SDAG-NEXT: v_log_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6870 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1
6871 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6872 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
6873 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
6875 ; GFX900-GISEL-LABEL: v_log_fabs_v2f16:
6876 ; GFX900-GISEL: ; %bb.0:
6877 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6878 ; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
6879 ; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0
6880 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6881 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
6882 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6883 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
6884 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
6886 ; GFX1100-SDAG-LABEL: v_log_fabs_v2f16:
6887 ; GFX1100-SDAG: ; %bb.0:
6888 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6889 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
6890 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0|
6891 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
6892 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, |v1|
6893 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
6894 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6895 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1
6896 ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
6897 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
6899 ; GFX1100-GISEL-LABEL: v_log_fabs_v2f16:
6900 ; GFX1100-GISEL: ; %bb.0:
6901 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6902 ; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
6903 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6904 ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
6905 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
6906 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1
6907 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
6908 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6909 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
6910 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
6911 ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
6912 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
6914 ; R600-LABEL: v_log_fabs_v2f16:
6919 ; CM-LABEL: v_log_fabs_v2f16:
6923 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
6924 %result = call <2 x half> @llvm.log.v2f16(<2 x half> %fabs)
6925 ret <2 x half> %result
6928 define <2 x half> @v_log_fneg_fabs_v2f16(<2 x half> %in) {
6929 ; SI-SDAG-LABEL: v_log_fneg_fabs_v2f16:
6931 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6932 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6933 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
6934 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6935 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6936 ; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6937 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
6938 ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6939 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
6940 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
6941 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
6942 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
6943 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
6944 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
6945 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
6946 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
6947 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
6948 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
6950 ; SI-GISEL-LABEL: v_log_fneg_fabs_v2f16:
6951 ; SI-GISEL: ; %bb.0:
6952 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6953 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6954 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6955 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6956 ; SI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6957 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
6958 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6959 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
6960 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
6961 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
6962 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v1
6963 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
6964 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
6965 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
6966 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
6968 ; VI-SDAG-LABEL: v_log_fneg_fabs_v2f16:
6970 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6971 ; VI-SDAG-NEXT: v_log_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6972 ; VI-SDAG-NEXT: v_log_f16_e64 v0, -|v0|
6973 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x398c
6974 ; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
6975 ; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6976 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6977 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
6979 ; VI-GISEL-LABEL: v_log_fneg_fabs_v2f16:
6980 ; VI-GISEL: ; %bb.0:
6981 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6982 ; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
6983 ; VI-GISEL-NEXT: v_log_f16_e32 v1, v0
6984 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6985 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x398c
6986 ; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
6987 ; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
6988 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6989 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
6991 ; GFX900-SDAG-LABEL: v_log_fneg_fabs_v2f16:
6992 ; GFX900-SDAG: ; %bb.0:
6993 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6994 ; GFX900-SDAG-NEXT: v_log_f16_e64 v1, -|v0|
6995 ; GFX900-SDAG-NEXT: v_log_f16_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
6996 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1
6997 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
6998 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
6999 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7001 ; GFX900-GISEL-LABEL: v_log_fneg_fabs_v2f16:
7002 ; GFX900-GISEL: ; %bb.0:
7003 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7004 ; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
7005 ; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0
7006 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7007 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7008 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7009 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
7010 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7012 ; GFX1100-SDAG-LABEL: v_log_fneg_fabs_v2f16:
7013 ; GFX1100-SDAG: ; %bb.0:
7014 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7015 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
7016 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0|
7017 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
7018 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -|v1|
7019 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
7020 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7021 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7022 ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
7023 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
7025 ; GFX1100-GISEL-LABEL: v_log_fneg_fabs_v2f16:
7026 ; GFX1100-GISEL: ; %bb.0:
7027 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7028 ; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
7029 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
7030 ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
7031 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
7032 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1
7033 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
7034 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7035 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7036 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
7037 ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
7038 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
7040 ; R600-LABEL: v_log_fneg_fabs_v2f16:
7045 ; CM-LABEL: v_log_fneg_fabs_v2f16:
7049 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
7050 %fneg.fabs = fneg <2 x half> %fabs
7051 %result = call <2 x half> @llvm.log.v2f16(<2 x half> %fneg.fabs)
7052 ret <2 x half> %result
7055 define <2 x half> @v_log_fneg_v2f16(<2 x half> %in) {
7056 ; SI-SDAG-LABEL: v_log_fneg_v2f16:
7058 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7059 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7060 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7061 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
7062 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
7063 ; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
7064 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
7065 ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
7066 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7067 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
7068 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
7069 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7070 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7071 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7072 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
7073 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
7074 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
7075 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7077 ; SI-GISEL-LABEL: v_log_fneg_v2f16:
7078 ; SI-GISEL: ; %bb.0:
7079 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7080 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
7081 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
7082 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
7083 ; SI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
7084 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
7085 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
7086 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
7087 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
7088 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
7089 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v1
7090 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
7091 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
7092 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
7093 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7095 ; VI-SDAG-LABEL: v_log_fneg_v2f16:
7097 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7098 ; VI-SDAG-NEXT: v_log_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7099 ; VI-SDAG-NEXT: v_log_f16_e64 v0, -v0
7100 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x398c
7101 ; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7102 ; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7103 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
7104 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
7106 ; VI-GISEL-LABEL: v_log_fneg_v2f16:
7107 ; VI-GISEL: ; %bb.0:
7108 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7109 ; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
7110 ; VI-GISEL-NEXT: v_log_f16_e32 v1, v0
7111 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7112 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x398c
7113 ; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7114 ; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7115 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
7116 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
7118 ; GFX900-SDAG-LABEL: v_log_fneg_v2f16:
7119 ; GFX900-SDAG: ; %bb.0:
7120 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7121 ; GFX900-SDAG-NEXT: v_log_f16_e64 v1, -v0
7122 ; GFX900-SDAG-NEXT: v_log_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7123 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7124 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7125 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
7126 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7128 ; GFX900-GISEL-LABEL: v_log_fneg_v2f16:
7129 ; GFX900-GISEL: ; %bb.0:
7130 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7131 ; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
7132 ; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0
7133 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7134 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7135 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7136 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
7137 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7139 ; GFX1100-SDAG-LABEL: v_log_fneg_v2f16:
7140 ; GFX1100-SDAG: ; %bb.0:
7141 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7142 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
7143 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0
7144 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
7145 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -v1
7146 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
7147 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7148 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7149 ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
7150 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
7152 ; GFX1100-GISEL-LABEL: v_log_fneg_v2f16:
7153 ; GFX1100-GISEL: ; %bb.0:
7154 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7155 ; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
7156 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
7157 ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
7158 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
7159 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1
7160 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
7161 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7162 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7163 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
7164 ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
7165 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
7167 ; R600-LABEL: v_log_fneg_v2f16:
7172 ; CM-LABEL: v_log_fneg_v2f16:
7176 %fneg = fneg <2 x half> %in
7177 %result = call <2 x half> @llvm.log.v2f16(<2 x half> %fneg)
7178 ret <2 x half> %result
7181 define <2 x half> @v_log_v2f16_fast(<2 x half> %in) {
7182 ; SI-SDAG-LABEL: v_log_v2f16_fast:
7184 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7185 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7186 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7187 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7188 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7189 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
7190 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
7191 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7192 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7193 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7194 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7195 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7196 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7197 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7199 ; SI-GISEL-LABEL: v_log_v2f16_fast:
7200 ; SI-GISEL: ; %bb.0:
7201 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7202 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
7203 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
7204 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
7205 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
7206 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7207 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7208 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
7209 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
7210 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7212 ; VI-SDAG-LABEL: v_log_v2f16_fast:
7214 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7215 ; VI-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7216 ; VI-SDAG-NEXT: v_log_f16_e32 v0, v0
7217 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x398c
7218 ; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7219 ; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7220 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
7221 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
7223 ; VI-GISEL-LABEL: v_log_v2f16_fast:
7224 ; VI-GISEL: ; %bb.0:
7225 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7226 ; VI-GISEL-NEXT: v_log_f16_e32 v1, v0
7227 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7228 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x398c
7229 ; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7230 ; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7231 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
7232 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
7234 ; GFX900-LABEL: v_log_v2f16_fast:
7236 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7237 ; GFX900-NEXT: v_log_f16_e32 v1, v0
7238 ; GFX900-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7239 ; GFX900-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7240 ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7241 ; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0
7242 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7244 ; GFX1100-LABEL: v_log_v2f16_fast:
7246 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7247 ; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0
7248 ; GFX1100-NEXT: v_log_f16_e32 v0, v0
7249 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
7250 ; GFX1100-NEXT: v_log_f16_e32 v1, v1
7251 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
7252 ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7253 ; GFX1100-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7254 ; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1
7255 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
7257 ; R600-LABEL: v_log_v2f16_fast:
7262 ; CM-LABEL: v_log_v2f16_fast:
7266 %result = call fast <2 x half> @llvm.log.v2f16(<2 x half> %in)
7267 ret <2 x half> %result
7270 define <3 x half> @v_log_v3f16(<3 x half> %in) {
7271 ; SI-SDAG-LABEL: v_log_v3f16:
7273 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7274 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7275 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7276 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7277 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7278 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7279 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7280 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
7281 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
7282 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
7283 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7284 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7285 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
7286 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7287 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7288 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7289 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7290 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7291 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7292 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7294 ; SI-GISEL-LABEL: v_log_v3f16:
7295 ; SI-GISEL: ; %bb.0:
7296 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7297 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
7298 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
7299 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7300 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
7301 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
7302 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2
7303 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7304 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7305 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
7306 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
7307 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
7308 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7309 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7311 ; VI-LABEL: v_log_v3f16:
7313 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7314 ; VI-NEXT: v_log_f16_e32 v2, v0
7315 ; VI-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7316 ; VI-NEXT: v_log_f16_e32 v1, v1
7317 ; VI-NEXT: v_mov_b32_e32 v3, 0x398c
7318 ; VI-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7319 ; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7320 ; VI-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7321 ; VI-NEXT: v_or_b32_e32 v0, v2, v0
7322 ; VI-NEXT: s_setpc_b64 s[30:31]
7324 ; GFX900-LABEL: v_log_v3f16:
7326 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7327 ; GFX900-NEXT: v_log_f16_e32 v2, v0
7328 ; GFX900-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7329 ; GFX900-NEXT: v_log_f16_e32 v1, v1
7330 ; GFX900-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7331 ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7332 ; GFX900-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7333 ; GFX900-NEXT: v_pack_b32_f16 v0, v2, v0
7334 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7336 ; GFX1100-LABEL: v_log_v3f16:
7338 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7339 ; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0
7340 ; GFX1100-NEXT: v_log_f16_e32 v0, v0
7341 ; GFX1100-NEXT: v_log_f16_e32 v1, v1
7342 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3)
7343 ; GFX1100-NEXT: v_log_f16_e32 v2, v2
7344 ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7345 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
7346 ; GFX1100-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7347 ; GFX1100-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7348 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
7349 ; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v2
7350 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
7352 ; R600-LABEL: v_log_v3f16:
7357 ; CM-LABEL: v_log_v3f16:
7361 %result = call <3 x half> @llvm.log.v3f16(<3 x half> %in)
7362 ret <3 x half> %result
7365 define <3 x half> @v_log_v3f16_fast(<3 x half> %in) {
7366 ; SI-SDAG-LABEL: v_log_v3f16_fast:
7368 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7369 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7370 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7371 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7372 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7373 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7374 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7375 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
7376 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
7377 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
7378 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7379 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7380 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
7381 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7382 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7383 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7384 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7385 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7386 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7387 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7389 ; SI-GISEL-LABEL: v_log_v3f16_fast:
7390 ; SI-GISEL: ; %bb.0:
7391 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7392 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
7393 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
7394 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7395 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
7396 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
7397 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2
7398 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7399 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7400 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
7401 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
7402 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
7403 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7404 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7406 ; VI-LABEL: v_log_v3f16_fast:
7408 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7409 ; VI-NEXT: v_log_f16_e32 v2, v0
7410 ; VI-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7411 ; VI-NEXT: v_log_f16_e32 v1, v1
7412 ; VI-NEXT: v_mov_b32_e32 v3, 0x398c
7413 ; VI-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7414 ; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7415 ; VI-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7416 ; VI-NEXT: v_or_b32_e32 v0, v2, v0
7417 ; VI-NEXT: s_setpc_b64 s[30:31]
7419 ; GFX900-LABEL: v_log_v3f16_fast:
7421 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7422 ; GFX900-NEXT: v_log_f16_e32 v2, v0
7423 ; GFX900-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7424 ; GFX900-NEXT: v_log_f16_e32 v1, v1
7425 ; GFX900-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7426 ; GFX900-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7427 ; GFX900-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7428 ; GFX900-NEXT: v_pack_b32_f16 v0, v2, v0
7429 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7431 ; GFX1100-LABEL: v_log_v3f16_fast:
7433 ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7434 ; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0
7435 ; GFX1100-NEXT: v_log_f16_e32 v0, v0
7436 ; GFX1100-NEXT: v_log_f16_e32 v1, v1
7437 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3)
7438 ; GFX1100-NEXT: v_log_f16_e32 v2, v2
7439 ; GFX1100-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7440 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff
7441 ; GFX1100-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7442 ; GFX1100-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7443 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
7444 ; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v2
7445 ; GFX1100-NEXT: s_setpc_b64 s[30:31]
7447 ; R600-LABEL: v_log_v3f16_fast:
7452 ; CM-LABEL: v_log_v3f16_fast:
7456 %result = call fast <3 x half> @llvm.log.v3f16(<3 x half> %in)
7457 ret <3 x half> %result
7460 define <4 x half> @v_log_v4f16(<4 x half> %in) {
7461 ; SI-SDAG-LABEL: v_log_v4f16:
7463 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7464 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7465 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7466 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
7467 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7468 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7469 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7470 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
7471 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7472 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
7473 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
7474 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
7475 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
7476 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7477 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7478 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3
7479 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
7480 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7481 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7482 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7483 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
7484 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7485 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7486 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7487 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
7488 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7490 ; SI-GISEL-LABEL: v_log_v4f16:
7491 ; SI-GISEL: ; %bb.0:
7492 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7493 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
7494 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
7495 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7496 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
7497 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
7498 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
7499 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2
7500 ; SI-GISEL-NEXT: v_log_f32_e32 v3, v3
7501 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7502 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7503 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
7504 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3
7505 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
7506 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
7507 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7508 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
7509 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7511 ; VI-SDAG-LABEL: v_log_v4f16:
7513 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7514 ; VI-SDAG-NEXT: v_log_f16_e32 v2, v1
7515 ; VI-SDAG-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7516 ; VI-SDAG-NEXT: v_log_f16_e32 v3, v0
7517 ; VI-SDAG-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7518 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x398c
7519 ; VI-SDAG-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7520 ; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7521 ; VI-SDAG-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7522 ; VI-SDAG-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7523 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v3, v0
7524 ; VI-SDAG-NEXT: v_or_b32_e32 v1, v2, v1
7525 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
7527 ; VI-GISEL-LABEL: v_log_v4f16:
7528 ; VI-GISEL: ; %bb.0:
7529 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7530 ; VI-GISEL-NEXT: v_log_f16_e32 v2, v0
7531 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7532 ; VI-GISEL-NEXT: v_log_f16_e32 v3, v1
7533 ; VI-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7534 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x398c
7535 ; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7536 ; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7537 ; VI-GISEL-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7538 ; VI-GISEL-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7539 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
7540 ; VI-GISEL-NEXT: v_or_b32_e32 v1, v3, v1
7541 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
7543 ; GFX900-SDAG-LABEL: v_log_v4f16:
7544 ; GFX900-SDAG: ; %bb.0:
7545 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7546 ; GFX900-SDAG-NEXT: v_log_f16_e32 v2, v1
7547 ; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7548 ; GFX900-SDAG-NEXT: v_log_f16_e32 v3, v0
7549 ; GFX900-SDAG-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7550 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7551 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7552 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7553 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7554 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v3, v0
7555 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v1, v2, v1
7556 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7558 ; GFX900-GISEL-LABEL: v_log_v4f16:
7559 ; GFX900-GISEL: ; %bb.0:
7560 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7561 ; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0
7562 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7563 ; GFX900-GISEL-NEXT: v_log_f16_e32 v3, v1
7564 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7565 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7566 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7567 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7568 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7569 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0
7570 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v1, v3, v1
7571 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7573 ; GFX1100-SDAG-LABEL: v_log_v4f16:
7574 ; GFX1100-SDAG: ; %bb.0:
7575 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7576 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0
7577 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
7578 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1
7579 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v0, v0
7580 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
7581 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v2, v2
7582 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v3, v3
7583 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7584 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
7585 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7586 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
7587 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7588 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7589 ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
7590 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
7591 ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v1, v1, v3
7592 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
7594 ; GFX1100-GISEL-LABEL: v_log_v4f16:
7595 ; GFX1100-GISEL: ; %bb.0:
7596 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7597 ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
7598 ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
7599 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
7600 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1
7601 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
7602 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v2, v2
7603 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v3, v3
7604 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7605 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
7606 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7607 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
7608 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7609 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7610 ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v2
7611 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
7612 ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v1, v1, v3
7613 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
7615 ; R600-LABEL: v_log_v4f16:
7620 ; CM-LABEL: v_log_v4f16:
7624 %result = call <4 x half> @llvm.log.v4f16(<4 x half> %in)
7625 ret <4 x half> %result
7628 define <4 x half> @v_log_v4f16_fast(<4 x half> %in) {
7629 ; SI-SDAG-LABEL: v_log_v4f16_fast:
7631 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7632 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7633 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7634 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
7635 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7636 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7637 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7638 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
7639 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7640 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
7641 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
7642 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
7643 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
7644 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7645 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7646 ; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3
7647 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
7648 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
7649 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
7650 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
7651 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
7652 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
7653 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
7654 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
7655 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
7656 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
7658 ; SI-GISEL-LABEL: v_log_v4f16_fast:
7659 ; SI-GISEL: ; %bb.0:
7660 ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7661 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
7662 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
7663 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
7664 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
7665 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
7666 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
7667 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2
7668 ; SI-GISEL-NEXT: v_log_f32_e32 v3, v3
7669 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
7670 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
7671 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
7672 ; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3
7673 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
7674 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
7675 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
7676 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
7677 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
7679 ; VI-SDAG-LABEL: v_log_v4f16_fast:
7681 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7682 ; VI-SDAG-NEXT: v_log_f16_e32 v2, v1
7683 ; VI-SDAG-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7684 ; VI-SDAG-NEXT: v_log_f16_e32 v3, v0
7685 ; VI-SDAG-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7686 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x398c
7687 ; VI-SDAG-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7688 ; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7689 ; VI-SDAG-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7690 ; VI-SDAG-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7691 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v3, v0
7692 ; VI-SDAG-NEXT: v_or_b32_e32 v1, v2, v1
7693 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
7695 ; VI-GISEL-LABEL: v_log_v4f16_fast:
7696 ; VI-GISEL: ; %bb.0:
7697 ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7698 ; VI-GISEL-NEXT: v_log_f16_e32 v2, v0
7699 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7700 ; VI-GISEL-NEXT: v_log_f16_e32 v3, v1
7701 ; VI-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7702 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x398c
7703 ; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7704 ; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7705 ; VI-GISEL-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7706 ; VI-GISEL-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
7707 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
7708 ; VI-GISEL-NEXT: v_or_b32_e32 v1, v3, v1
7709 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
7711 ; GFX900-SDAG-LABEL: v_log_v4f16_fast:
7712 ; GFX900-SDAG: ; %bb.0:
7713 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7714 ; GFX900-SDAG-NEXT: v_log_f16_e32 v2, v1
7715 ; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7716 ; GFX900-SDAG-NEXT: v_log_f16_e32 v3, v0
7717 ; GFX900-SDAG-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7718 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7719 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7720 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7721 ; GFX900-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7722 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v3, v0
7723 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v1, v2, v1
7724 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
7726 ; GFX900-GISEL-LABEL: v_log_v4f16_fast:
7727 ; GFX900-GISEL: ; %bb.0:
7728 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7729 ; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0
7730 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7731 ; GFX900-GISEL-NEXT: v_log_f16_e32 v3, v1
7732 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
7733 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7734 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7735 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7736 ; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7737 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0
7738 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v1, v3, v1
7739 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
7741 ; GFX1100-SDAG-LABEL: v_log_v4f16_fast:
7742 ; GFX1100-SDAG: ; %bb.0:
7743 ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7744 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0
7745 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
7746 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1
7747 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v0, v0
7748 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
7749 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v2, v2
7750 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v3, v3
7751 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7752 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
7753 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7754 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
7755 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7756 ; GFX1100-SDAG-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7757 ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
7758 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
7759 ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v1, v1, v3
7760 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
7762 ; GFX1100-GISEL-LABEL: v_log_v4f16_fast:
7763 ; GFX1100-GISEL: ; %bb.0:
7764 ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7765 ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
7766 ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
7767 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0
7768 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1
7769 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
7770 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v2, v2
7771 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v3, v3
7772 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v0, 0x398c, v0
7773 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
7774 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v1, 0x398c, v1
7775 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
7776 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v2, 0x398c, v2
7777 ; GFX1100-GISEL-NEXT: v_mul_f16_e32 v3, 0x398c, v3
7778 ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v2
7779 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
7780 ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v1, v1, v3
7781 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
7783 ; R600-LABEL: v_log_v4f16_fast:
7788 ; CM-LABEL: v_log_v4f16_fast:
7792 %result = call fast <4 x half> @llvm.log.v4f16(<4 x half> %in)
7793 ret <4 x half> %result
7796 declare float @llvm.fabs.f32(float) #2
7797 declare float @llvm.log.f32(float) #2
7798 declare <2 x float> @llvm.log.v2f32(<2 x float>) #2
7799 declare <3 x float> @llvm.log.v3f32(<3 x float>) #2
7800 declare <4 x float> @llvm.log.v4f32(<4 x float>) #2
7801 declare half @llvm.fabs.f16(half) #2
7802 declare half @llvm.log.f16(half) #2
7803 declare <2 x half> @llvm.log.v2f16(<2 x half>) #2
7804 declare <3 x half> @llvm.log.v3f16(<3 x half>) #2
7805 declare <4 x half> @llvm.log.v4f16(<4 x half>) #2
7806 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
7808 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
7809 attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
7810 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
7811 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
7812 ; GFX689-GISEL: {{.*}}
7813 ; GFX689-SDAG: {{.*}}