1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
5 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
6 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
7 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
8 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SDAG_GFX6 %s
9 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=SDAG_GFX7 %s
10 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=SDAG_GFX8 %s
11 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=SDAG_GFX9 %s
12 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=SDAG_GFX10PLUS,SDAG_GFX10 %s
13 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=SDAG_GFX10PLUS,SDAG_GFX11 %s
15 define float @v_roundeven_f32(float %x) {
16 ; GFX6-LABEL: v_roundeven_f32:
18 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
20 ; GFX6-NEXT: s_setpc_b64 s[30:31]
22 ; GFX7-LABEL: v_roundeven_f32:
24 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
26 ; GFX7-NEXT: s_setpc_b64 s[30:31]
28 ; GFX8-LABEL: v_roundeven_f32:
30 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX8-NEXT: v_rndne_f32_e32 v0, v0
32 ; GFX8-NEXT: s_setpc_b64 s[30:31]
34 ; GFX9-LABEL: v_roundeven_f32:
36 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX9-NEXT: v_rndne_f32_e32 v0, v0
38 ; GFX9-NEXT: s_setpc_b64 s[30:31]
40 ; GFX10PLUS-LABEL: v_roundeven_f32:
42 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
44 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
46 ; SDAG_GFX6-LABEL: v_roundeven_f32:
48 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0
50 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
52 ; SDAG_GFX7-LABEL: v_roundeven_f32:
54 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0
56 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
58 ; SDAG_GFX8-LABEL: v_roundeven_f32:
60 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0
62 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
64 ; SDAG_GFX9-LABEL: v_roundeven_f32:
66 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0
68 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
70 ; SDAG_GFX10PLUS-LABEL: v_roundeven_f32:
71 ; SDAG_GFX10PLUS: ; %bb.0:
72 ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
74 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
75 %roundeven = call float @llvm.roundeven.f32(float %x)
79 define <2 x float> @v_roundeven_v2f32(<2 x float> %x) {
80 ; GFX6-LABEL: v_roundeven_v2f32:
82 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
84 ; GFX6-NEXT: v_rndne_f32_e32 v1, v1
85 ; GFX6-NEXT: s_setpc_b64 s[30:31]
87 ; GFX7-LABEL: v_roundeven_v2f32:
89 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
91 ; GFX7-NEXT: v_rndne_f32_e32 v1, v1
92 ; GFX7-NEXT: s_setpc_b64 s[30:31]
94 ; GFX8-LABEL: v_roundeven_v2f32:
96 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; GFX8-NEXT: v_rndne_f32_e32 v0, v0
98 ; GFX8-NEXT: v_rndne_f32_e32 v1, v1
99 ; GFX8-NEXT: s_setpc_b64 s[30:31]
101 ; GFX9-LABEL: v_roundeven_v2f32:
103 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104 ; GFX9-NEXT: v_rndne_f32_e32 v0, v0
105 ; GFX9-NEXT: v_rndne_f32_e32 v1, v1
106 ; GFX9-NEXT: s_setpc_b64 s[30:31]
108 ; GFX10PLUS-LABEL: v_roundeven_v2f32:
109 ; GFX10PLUS: ; %bb.0:
110 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
112 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
113 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
115 ; SDAG_GFX6-LABEL: v_roundeven_v2f32:
116 ; SDAG_GFX6: ; %bb.0:
117 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0
119 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1
120 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
122 ; SDAG_GFX7-LABEL: v_roundeven_v2f32:
123 ; SDAG_GFX7: ; %bb.0:
124 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0
126 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1
127 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
129 ; SDAG_GFX8-LABEL: v_roundeven_v2f32:
130 ; SDAG_GFX8: ; %bb.0:
131 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0
133 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v1, v1
134 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
136 ; SDAG_GFX9-LABEL: v_roundeven_v2f32:
137 ; SDAG_GFX9: ; %bb.0:
138 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0
140 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v1, v1
141 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
143 ; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f32:
144 ; SDAG_GFX10PLUS: ; %bb.0:
145 ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
147 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
148 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
149 %roundeven = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x)
150 ret <2 x float> %roundeven
153 define <3 x float> @v_roundeven_v3f32(<3 x float> %x) {
154 ; GFX6-LABEL: v_roundeven_v3f32:
156 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
158 ; GFX6-NEXT: v_rndne_f32_e32 v1, v1
159 ; GFX6-NEXT: v_rndne_f32_e32 v2, v2
160 ; GFX6-NEXT: s_setpc_b64 s[30:31]
162 ; GFX7-LABEL: v_roundeven_v3f32:
164 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
166 ; GFX7-NEXT: v_rndne_f32_e32 v1, v1
167 ; GFX7-NEXT: v_rndne_f32_e32 v2, v2
168 ; GFX7-NEXT: s_setpc_b64 s[30:31]
170 ; GFX8-LABEL: v_roundeven_v3f32:
172 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173 ; GFX8-NEXT: v_rndne_f32_e32 v0, v0
174 ; GFX8-NEXT: v_rndne_f32_e32 v1, v1
175 ; GFX8-NEXT: v_rndne_f32_e32 v2, v2
176 ; GFX8-NEXT: s_setpc_b64 s[30:31]
178 ; GFX9-LABEL: v_roundeven_v3f32:
180 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181 ; GFX9-NEXT: v_rndne_f32_e32 v0, v0
182 ; GFX9-NEXT: v_rndne_f32_e32 v1, v1
183 ; GFX9-NEXT: v_rndne_f32_e32 v2, v2
184 ; GFX9-NEXT: s_setpc_b64 s[30:31]
186 ; GFX10PLUS-LABEL: v_roundeven_v3f32:
187 ; GFX10PLUS: ; %bb.0:
188 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
190 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
191 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2
192 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
194 ; SDAG_GFX6-LABEL: v_roundeven_v3f32:
195 ; SDAG_GFX6: ; %bb.0:
196 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0
198 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1
199 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v2, v2
200 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
202 ; SDAG_GFX7-LABEL: v_roundeven_v3f32:
203 ; SDAG_GFX7: ; %bb.0:
204 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0
206 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1
207 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v2, v2
208 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
210 ; SDAG_GFX8-LABEL: v_roundeven_v3f32:
211 ; SDAG_GFX8: ; %bb.0:
212 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0
214 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v1, v1
215 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v2, v2
216 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
218 ; SDAG_GFX9-LABEL: v_roundeven_v3f32:
219 ; SDAG_GFX9: ; %bb.0:
220 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0
222 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v1, v1
223 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v2, v2
224 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
226 ; SDAG_GFX10PLUS-LABEL: v_roundeven_v3f32:
227 ; SDAG_GFX10PLUS: ; %bb.0:
228 ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
230 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
231 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2
232 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
233 %roundeven = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
234 ret <3 x float> %roundeven
237 define <4 x float> @v_roundeven_v4f32(<4 x float> %x) {
238 ; GFX6-LABEL: v_roundeven_v4f32:
240 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
242 ; GFX6-NEXT: v_rndne_f32_e32 v1, v1
243 ; GFX6-NEXT: v_rndne_f32_e32 v2, v2
244 ; GFX6-NEXT: v_rndne_f32_e32 v3, v3
245 ; GFX6-NEXT: s_setpc_b64 s[30:31]
247 ; GFX7-LABEL: v_roundeven_v4f32:
249 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
251 ; GFX7-NEXT: v_rndne_f32_e32 v1, v1
252 ; GFX7-NEXT: v_rndne_f32_e32 v2, v2
253 ; GFX7-NEXT: v_rndne_f32_e32 v3, v3
254 ; GFX7-NEXT: s_setpc_b64 s[30:31]
256 ; GFX8-LABEL: v_roundeven_v4f32:
258 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259 ; GFX8-NEXT: v_rndne_f32_e32 v0, v0
260 ; GFX8-NEXT: v_rndne_f32_e32 v1, v1
261 ; GFX8-NEXT: v_rndne_f32_e32 v2, v2
262 ; GFX8-NEXT: v_rndne_f32_e32 v3, v3
263 ; GFX8-NEXT: s_setpc_b64 s[30:31]
265 ; GFX9-LABEL: v_roundeven_v4f32:
267 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268 ; GFX9-NEXT: v_rndne_f32_e32 v0, v0
269 ; GFX9-NEXT: v_rndne_f32_e32 v1, v1
270 ; GFX9-NEXT: v_rndne_f32_e32 v2, v2
271 ; GFX9-NEXT: v_rndne_f32_e32 v3, v3
272 ; GFX9-NEXT: s_setpc_b64 s[30:31]
274 ; GFX10PLUS-LABEL: v_roundeven_v4f32:
275 ; GFX10PLUS: ; %bb.0:
276 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
278 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
279 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2
280 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v3, v3
281 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
283 ; SDAG_GFX6-LABEL: v_roundeven_v4f32:
284 ; SDAG_GFX6: ; %bb.0:
285 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0
287 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1
288 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v2, v2
289 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v3, v3
290 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
292 ; SDAG_GFX7-LABEL: v_roundeven_v4f32:
293 ; SDAG_GFX7: ; %bb.0:
294 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0
296 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1
297 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v2, v2
298 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v3, v3
299 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
301 ; SDAG_GFX8-LABEL: v_roundeven_v4f32:
302 ; SDAG_GFX8: ; %bb.0:
303 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0
305 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v1, v1
306 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v2, v2
307 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v3, v3
308 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
310 ; SDAG_GFX9-LABEL: v_roundeven_v4f32:
311 ; SDAG_GFX9: ; %bb.0:
312 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
313 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0
314 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v1, v1
315 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v2, v2
316 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v3, v3
317 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
319 ; SDAG_GFX10PLUS-LABEL: v_roundeven_v4f32:
320 ; SDAG_GFX10PLUS: ; %bb.0:
321 ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0
323 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1
324 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2
325 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v3, v3
326 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
327 %roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x)
328 ret <4 x float> %roundeven
331 define half @v_roundeven_f16(half %x) {
332 ; GFX6-LABEL: v_roundeven_f16:
334 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
336 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
337 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
338 ; GFX6-NEXT: s_setpc_b64 s[30:31]
340 ; GFX7-LABEL: v_roundeven_f16:
342 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
344 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
345 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
346 ; GFX7-NEXT: s_setpc_b64 s[30:31]
348 ; GFX8-LABEL: v_roundeven_f16:
350 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GFX8-NEXT: v_rndne_f16_e32 v0, v0
352 ; GFX8-NEXT: s_setpc_b64 s[30:31]
354 ; GFX9-LABEL: v_roundeven_f16:
356 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357 ; GFX9-NEXT: v_rndne_f16_e32 v0, v0
358 ; GFX9-NEXT: s_setpc_b64 s[30:31]
360 ; GFX10PLUS-LABEL: v_roundeven_f16:
361 ; GFX10PLUS: ; %bb.0:
362 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363 ; GFX10PLUS-NEXT: v_rndne_f16_e32 v0, v0
364 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
366 ; SDAG_GFX6-LABEL: v_roundeven_f16:
367 ; SDAG_GFX6: ; %bb.0:
368 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369 ; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
370 ; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
371 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0
372 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
374 ; SDAG_GFX7-LABEL: v_roundeven_f16:
375 ; SDAG_GFX7: ; %bb.0:
376 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
378 ; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
379 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0
380 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
382 ; SDAG_GFX8-LABEL: v_roundeven_f16:
383 ; SDAG_GFX8: ; %bb.0:
384 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385 ; SDAG_GFX8-NEXT: v_rndne_f16_e32 v0, v0
386 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
388 ; SDAG_GFX9-LABEL: v_roundeven_f16:
389 ; SDAG_GFX9: ; %bb.0:
390 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391 ; SDAG_GFX9-NEXT: v_rndne_f16_e32 v0, v0
392 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
394 ; SDAG_GFX10PLUS-LABEL: v_roundeven_f16:
395 ; SDAG_GFX10PLUS: ; %bb.0:
396 ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397 ; SDAG_GFX10PLUS-NEXT: v_rndne_f16_e32 v0, v0
398 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
399 %roundeven = call half @llvm.roundeven.f16(half %x)
403 define <2 x half> @v_roundeven_v2f16(<2 x half> %x) {
404 ; GFX6-LABEL: v_roundeven_v2f16:
406 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
407 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
408 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
409 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
410 ; GFX6-NEXT: v_rndne_f32_e32 v1, v1
411 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
412 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
413 ; GFX6-NEXT: s_setpc_b64 s[30:31]
415 ; GFX7-LABEL: v_roundeven_v2f16:
417 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
418 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
419 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
420 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
421 ; GFX7-NEXT: v_rndne_f32_e32 v1, v1
422 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
423 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
424 ; GFX7-NEXT: s_setpc_b64 s[30:31]
426 ; GFX8-LABEL: v_roundeven_v2f16:
428 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429 ; GFX8-NEXT: v_rndne_f16_e32 v1, v0
430 ; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
431 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
432 ; GFX8-NEXT: s_setpc_b64 s[30:31]
434 ; GFX9-LABEL: v_roundeven_v2f16:
436 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437 ; GFX9-NEXT: v_rndne_f16_e32 v1, v0
438 ; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
439 ; GFX9-NEXT: v_pack_b32_f16 v0, v1, v0
440 ; GFX9-NEXT: s_setpc_b64 s[30:31]
442 ; GFX10-LABEL: v_roundeven_v2f16:
444 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445 ; GFX10-NEXT: v_rndne_f16_e32 v1, v0
446 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
447 ; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0
448 ; GFX10-NEXT: s_setpc_b64 s[30:31]
450 ; GFX11-LABEL: v_roundeven_v2f16:
452 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
454 ; GFX11-NEXT: v_rndne_f16_e32 v0, v0
455 ; GFX11-NEXT: v_rndne_f16_e32 v1, v1
456 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
457 ; GFX11-NEXT: s_setpc_b64 s[30:31]
459 ; SDAG_GFX6-LABEL: v_roundeven_v2f16:
460 ; SDAG_GFX6: ; %bb.0:
461 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462 ; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
463 ; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
464 ; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
465 ; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
466 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0
467 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1
468 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
470 ; SDAG_GFX7-LABEL: v_roundeven_v2f16:
471 ; SDAG_GFX7: ; %bb.0:
472 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473 ; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
474 ; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
475 ; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
476 ; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
477 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0
478 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1
479 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
481 ; SDAG_GFX8-LABEL: v_roundeven_v2f16:
482 ; SDAG_GFX8: ; %bb.0:
483 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484 ; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
485 ; SDAG_GFX8-NEXT: v_rndne_f16_e32 v0, v0
486 ; SDAG_GFX8-NEXT: v_or_b32_e32 v0, v0, v1
487 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
489 ; SDAG_GFX9-LABEL: v_roundeven_v2f16:
490 ; SDAG_GFX9: ; %bb.0:
491 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492 ; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
493 ; SDAG_GFX9-NEXT: v_rndne_f16_e32 v0, v0
494 ; SDAG_GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
495 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
497 ; SDAG_GFX10-LABEL: v_roundeven_v2f16:
498 ; SDAG_GFX10: ; %bb.0:
499 ; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
500 ; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
501 ; SDAG_GFX10-NEXT: v_rndne_f16_e32 v0, v0
502 ; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v1
503 ; SDAG_GFX10-NEXT: s_setpc_b64 s[30:31]
505 ; SDAG_GFX11-LABEL: v_roundeven_v2f16:
506 ; SDAG_GFX11: ; %bb.0:
507 ; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508 ; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
509 ; SDAG_GFX11-NEXT: v_rndne_f16_e32 v0, v0
510 ; SDAG_GFX11-NEXT: v_rndne_f16_e32 v1, v1
511 ; SDAG_GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
512 ; SDAG_GFX11-NEXT: s_setpc_b64 s[30:31]
513 %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x)
514 ret <2 x half> %roundeven
517 define <2 x half> @v_roundeven_v2f16_fneg(<2 x half> %x) {
518 ; GFX6-LABEL: v_roundeven_v2f16_fneg:
520 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
522 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
523 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
524 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
525 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0
526 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
527 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v0
528 ; GFX6-NEXT: v_rndne_f32_e32 v0, v1
529 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
530 ; GFX6-NEXT: v_rndne_f32_e32 v1, v2
531 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
532 ; GFX6-NEXT: s_setpc_b64 s[30:31]
534 ; GFX7-LABEL: v_roundeven_v2f16_fneg:
536 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
538 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
539 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
540 ; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
541 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v0
542 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
543 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v0
544 ; GFX7-NEXT: v_rndne_f32_e32 v0, v1
545 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
546 ; GFX7-NEXT: v_rndne_f32_e32 v1, v2
547 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
548 ; GFX7-NEXT: s_setpc_b64 s[30:31]
550 ; GFX8-LABEL: v_roundeven_v2f16_fneg:
552 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
554 ; GFX8-NEXT: v_rndne_f16_e32 v1, v0
555 ; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
556 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
557 ; GFX8-NEXT: s_setpc_b64 s[30:31]
559 ; GFX9-LABEL: v_roundeven_v2f16_fneg:
561 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562 ; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
563 ; GFX9-NEXT: v_rndne_f16_e32 v1, v0
564 ; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
565 ; GFX9-NEXT: v_pack_b32_f16 v0, v1, v0
566 ; GFX9-NEXT: s_setpc_b64 s[30:31]
568 ; GFX10-LABEL: v_roundeven_v2f16_fneg:
570 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571 ; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
572 ; GFX10-NEXT: v_rndne_f16_e32 v1, v0
573 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
574 ; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0
575 ; GFX10-NEXT: s_setpc_b64 s[30:31]
577 ; GFX11-LABEL: v_roundeven_v2f16_fneg:
579 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
580 ; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
581 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
582 ; GFX11-NEXT: v_rndne_f16_e32 v0, v0
583 ; GFX11-NEXT: v_rndne_f16_e32 v1, v1
584 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
585 ; GFX11-NEXT: s_setpc_b64 s[30:31]
587 ; SDAG_GFX6-LABEL: v_roundeven_v2f16_fneg:
588 ; SDAG_GFX6: ; %bb.0:
589 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590 ; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
591 ; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
592 ; SDAG_GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
593 ; SDAG_GFX6-NEXT: v_or_b32_e32 v0, v0, v1
594 ; SDAG_GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
595 ; SDAG_GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
596 ; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
597 ; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
598 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0
599 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1
600 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
602 ; SDAG_GFX7-LABEL: v_roundeven_v2f16_fneg:
603 ; SDAG_GFX7: ; %bb.0:
604 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605 ; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
606 ; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
607 ; SDAG_GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
608 ; SDAG_GFX7-NEXT: v_or_b32_e32 v0, v0, v1
609 ; SDAG_GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
610 ; SDAG_GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0
611 ; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
612 ; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
613 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0
614 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1
615 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
617 ; SDAG_GFX8-LABEL: v_roundeven_v2f16_fneg:
618 ; SDAG_GFX8: ; %bb.0:
619 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620 ; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
621 ; SDAG_GFX8-NEXT: v_rndne_f16_e64 v0, -v0
622 ; SDAG_GFX8-NEXT: v_or_b32_e32 v0, v0, v1
623 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
625 ; SDAG_GFX9-LABEL: v_roundeven_v2f16_fneg:
626 ; SDAG_GFX9: ; %bb.0:
627 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628 ; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
629 ; SDAG_GFX9-NEXT: v_rndne_f16_e64 v0, -v0
630 ; SDAG_GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
631 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
633 ; SDAG_GFX10-LABEL: v_roundeven_v2f16_fneg:
634 ; SDAG_GFX10: ; %bb.0:
635 ; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636 ; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
637 ; SDAG_GFX10-NEXT: v_rndne_f16_e64 v0, -v0
638 ; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v1
639 ; SDAG_GFX10-NEXT: s_setpc_b64 s[30:31]
641 ; SDAG_GFX11-LABEL: v_roundeven_v2f16_fneg:
642 ; SDAG_GFX11: ; %bb.0:
643 ; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644 ; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
645 ; SDAG_GFX11-NEXT: v_rndne_f16_e64 v0, -v0
646 ; SDAG_GFX11-NEXT: v_rndne_f16_e64 v1, -v1
647 ; SDAG_GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
648 ; SDAG_GFX11-NEXT: s_setpc_b64 s[30:31]
649 %x.fneg = fneg <2 x half> %x
650 %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x.fneg)
651 ret <2 x half> %roundeven
654 define <4 x half> @v_roundeven_v4f16(<4 x half> %x) {
655 ; GFX6-LABEL: v_roundeven_v4f16:
657 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
659 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
660 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
661 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
662 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
663 ; GFX6-NEXT: v_rndne_f32_e32 v1, v1
664 ; GFX6-NEXT: v_rndne_f32_e32 v2, v2
665 ; GFX6-NEXT: v_rndne_f32_e32 v3, v3
666 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
667 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
668 ; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
669 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
670 ; GFX6-NEXT: s_setpc_b64 s[30:31]
672 ; GFX7-LABEL: v_roundeven_v4f16:
674 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
675 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
676 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
677 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
678 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
679 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
680 ; GFX7-NEXT: v_rndne_f32_e32 v1, v1
681 ; GFX7-NEXT: v_rndne_f32_e32 v2, v2
682 ; GFX7-NEXT: v_rndne_f32_e32 v3, v3
683 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
684 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
685 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
686 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
687 ; GFX7-NEXT: s_setpc_b64 s[30:31]
689 ; GFX8-LABEL: v_roundeven_v4f16:
691 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
692 ; GFX8-NEXT: v_rndne_f16_e32 v2, v0
693 ; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
694 ; GFX8-NEXT: v_rndne_f16_e32 v3, v1
695 ; GFX8-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
696 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
697 ; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
698 ; GFX8-NEXT: s_setpc_b64 s[30:31]
700 ; GFX9-LABEL: v_roundeven_v4f16:
702 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
703 ; GFX9-NEXT: v_rndne_f16_e32 v2, v0
704 ; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
705 ; GFX9-NEXT: v_rndne_f16_e32 v3, v1
706 ; GFX9-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
707 ; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0
708 ; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1
709 ; GFX9-NEXT: s_setpc_b64 s[30:31]
711 ; GFX10-LABEL: v_roundeven_v4f16:
713 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
714 ; GFX10-NEXT: v_rndne_f16_e32 v2, v0
715 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
716 ; GFX10-NEXT: v_rndne_f16_e32 v3, v1
717 ; GFX10-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
718 ; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
719 ; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1
720 ; GFX10-NEXT: s_setpc_b64 s[30:31]
722 ; GFX11-LABEL: v_roundeven_v4f16:
724 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
725 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
726 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
727 ; GFX11-NEXT: v_rndne_f16_e32 v0, v0
728 ; GFX11-NEXT: v_rndne_f16_e32 v1, v1
729 ; GFX11-NEXT: v_rndne_f16_e32 v2, v2
730 ; GFX11-NEXT: v_rndne_f16_e32 v3, v3
731 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v2
732 ; GFX11-NEXT: v_pack_b32_f16 v1, v1, v3
733 ; GFX11-NEXT: s_setpc_b64 s[30:31]
735 ; SDAG_GFX6-LABEL: v_roundeven_v4f16:
736 ; SDAG_GFX6: ; %bb.0:
737 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
738 ; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
739 ; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
740 ; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
741 ; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
742 ; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
743 ; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
744 ; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
745 ; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
746 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0
747 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1
748 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v2, v2
749 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v3, v3
750 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
752 ; SDAG_GFX7-LABEL: v_roundeven_v4f16:
753 ; SDAG_GFX7: ; %bb.0:
754 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755 ; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
756 ; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
757 ; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
758 ; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
759 ; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
760 ; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
761 ; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
762 ; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
763 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0
764 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1
765 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v2, v2
766 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v3, v3
767 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
769 ; SDAG_GFX8-LABEL: v_roundeven_v4f16:
770 ; SDAG_GFX8: ; %bb.0:
771 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
772 ; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
773 ; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
774 ; SDAG_GFX8-NEXT: v_rndne_f16_e32 v1, v1
775 ; SDAG_GFX8-NEXT: v_rndne_f16_e32 v0, v0
776 ; SDAG_GFX8-NEXT: v_or_b32_e32 v0, v0, v3
777 ; SDAG_GFX8-NEXT: v_or_b32_e32 v1, v1, v2
778 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
780 ; SDAG_GFX9-LABEL: v_roundeven_v4f16:
781 ; SDAG_GFX9: ; %bb.0:
782 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
783 ; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
784 ; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
785 ; SDAG_GFX9-NEXT: v_rndne_f16_e32 v1, v1
786 ; SDAG_GFX9-NEXT: v_rndne_f16_e32 v0, v0
787 ; SDAG_GFX9-NEXT: v_pack_b32_f16 v0, v0, v3
788 ; SDAG_GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
789 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
791 ; SDAG_GFX10-LABEL: v_roundeven_v4f16:
792 ; SDAG_GFX10: ; %bb.0:
793 ; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
794 ; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
795 ; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
796 ; SDAG_GFX10-NEXT: v_rndne_f16_e32 v0, v0
797 ; SDAG_GFX10-NEXT: v_rndne_f16_e32 v1, v1
798 ; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v3
799 ; SDAG_GFX10-NEXT: v_pack_b32_f16 v1, v1, v2
800 ; SDAG_GFX10-NEXT: s_setpc_b64 s[30:31]
802 ; SDAG_GFX11-LABEL: v_roundeven_v4f16:
803 ; SDAG_GFX11: ; %bb.0:
804 ; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805 ; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
806 ; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
807 ; SDAG_GFX11-NEXT: v_rndne_f16_e32 v1, v1
808 ; SDAG_GFX11-NEXT: v_rndne_f16_e32 v0, v0
809 ; SDAG_GFX11-NEXT: v_rndne_f16_e32 v2, v2
810 ; SDAG_GFX11-NEXT: v_rndne_f16_e32 v3, v3
811 ; SDAG_GFX11-NEXT: v_pack_b32_f16 v0, v0, v2
812 ; SDAG_GFX11-NEXT: v_pack_b32_f16 v1, v1, v3
813 ; SDAG_GFX11-NEXT: s_setpc_b64 s[30:31]
814 %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x)
815 ret <4 x half> %roundeven
819 define float @v_roundeven_f32_fabs(float %x) {
820 ; GFX6-LABEL: v_roundeven_f32_fabs:
822 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823 ; GFX6-NEXT: v_rndne_f32_e64 v0, |v0|
824 ; GFX6-NEXT: s_setpc_b64 s[30:31]
826 ; GFX7-LABEL: v_roundeven_f32_fabs:
828 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
829 ; GFX7-NEXT: v_rndne_f32_e64 v0, |v0|
830 ; GFX7-NEXT: s_setpc_b64 s[30:31]
832 ; GFX8-LABEL: v_roundeven_f32_fabs:
834 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
835 ; GFX8-NEXT: v_rndne_f32_e64 v0, |v0|
836 ; GFX8-NEXT: s_setpc_b64 s[30:31]
838 ; GFX9-LABEL: v_roundeven_f32_fabs:
840 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841 ; GFX9-NEXT: v_rndne_f32_e64 v0, |v0|
842 ; GFX9-NEXT: s_setpc_b64 s[30:31]
844 ; GFX10PLUS-LABEL: v_roundeven_f32_fabs:
845 ; GFX10PLUS: ; %bb.0:
846 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847 ; GFX10PLUS-NEXT: v_rndne_f32_e64 v0, |v0|
848 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
850 ; SDAG_GFX6-LABEL: v_roundeven_f32_fabs:
851 ; SDAG_GFX6: ; %bb.0:
852 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
853 ; SDAG_GFX6-NEXT: v_rndne_f32_e64 v0, |v0|
854 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
856 ; SDAG_GFX7-LABEL: v_roundeven_f32_fabs:
857 ; SDAG_GFX7: ; %bb.0:
858 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
859 ; SDAG_GFX7-NEXT: v_rndne_f32_e64 v0, |v0|
860 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
862 ; SDAG_GFX8-LABEL: v_roundeven_f32_fabs:
863 ; SDAG_GFX8: ; %bb.0:
864 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
865 ; SDAG_GFX8-NEXT: v_rndne_f32_e64 v0, |v0|
866 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
868 ; SDAG_GFX9-LABEL: v_roundeven_f32_fabs:
869 ; SDAG_GFX9: ; %bb.0:
870 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
871 ; SDAG_GFX9-NEXT: v_rndne_f32_e64 v0, |v0|
872 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
874 ; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fabs:
875 ; SDAG_GFX10PLUS: ; %bb.0:
876 ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
877 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e64 v0, |v0|
878 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
879 %fabs.x = call float @llvm.fabs.f32(float %x)
880 %roundeven = call float @llvm.roundeven.f32(float %fabs.x)
884 define amdgpu_ps float @s_roundeven_f32(float inreg %x) {
885 ; GFX6-LABEL: s_roundeven_f32:
887 ; GFX6-NEXT: v_rndne_f32_e32 v0, s0
888 ; GFX6-NEXT: ; return to shader part epilog
890 ; GFX7-LABEL: s_roundeven_f32:
892 ; GFX7-NEXT: v_rndne_f32_e32 v0, s0
893 ; GFX7-NEXT: ; return to shader part epilog
895 ; GFX8-LABEL: s_roundeven_f32:
897 ; GFX8-NEXT: v_rndne_f32_e32 v0, s0
898 ; GFX8-NEXT: ; return to shader part epilog
900 ; GFX9-LABEL: s_roundeven_f32:
902 ; GFX9-NEXT: v_rndne_f32_e32 v0, s0
903 ; GFX9-NEXT: ; return to shader part epilog
905 ; GFX10PLUS-LABEL: s_roundeven_f32:
906 ; GFX10PLUS: ; %bb.0:
907 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, s0
908 ; GFX10PLUS-NEXT: ; return to shader part epilog
910 ; SDAG_GFX6-LABEL: s_roundeven_f32:
911 ; SDAG_GFX6: ; %bb.0:
912 ; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, s0
913 ; SDAG_GFX6-NEXT: ; return to shader part epilog
915 ; SDAG_GFX7-LABEL: s_roundeven_f32:
916 ; SDAG_GFX7: ; %bb.0:
917 ; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, s0
918 ; SDAG_GFX7-NEXT: ; return to shader part epilog
920 ; SDAG_GFX8-LABEL: s_roundeven_f32:
921 ; SDAG_GFX8: ; %bb.0:
922 ; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, s0
923 ; SDAG_GFX8-NEXT: ; return to shader part epilog
925 ; SDAG_GFX9-LABEL: s_roundeven_f32:
926 ; SDAG_GFX9: ; %bb.0:
927 ; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, s0
928 ; SDAG_GFX9-NEXT: ; return to shader part epilog
930 ; SDAG_GFX10PLUS-LABEL: s_roundeven_f32:
931 ; SDAG_GFX10PLUS: ; %bb.0:
932 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, s0
933 ; SDAG_GFX10PLUS-NEXT: ; return to shader part epilog
934 %roundeven = call float @llvm.roundeven.f32(float %x)
938 define float @v_roundeven_f32_fneg(float %x) {
939 ; GFX6-LABEL: v_roundeven_f32_fneg:
941 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
942 ; GFX6-NEXT: v_rndne_f32_e64 v0, -v0
943 ; GFX6-NEXT: s_setpc_b64 s[30:31]
945 ; GFX7-LABEL: v_roundeven_f32_fneg:
947 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
948 ; GFX7-NEXT: v_rndne_f32_e64 v0, -v0
949 ; GFX7-NEXT: s_setpc_b64 s[30:31]
951 ; GFX8-LABEL: v_roundeven_f32_fneg:
953 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954 ; GFX8-NEXT: v_rndne_f32_e64 v0, -v0
955 ; GFX8-NEXT: s_setpc_b64 s[30:31]
957 ; GFX9-LABEL: v_roundeven_f32_fneg:
959 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
960 ; GFX9-NEXT: v_rndne_f32_e64 v0, -v0
961 ; GFX9-NEXT: s_setpc_b64 s[30:31]
963 ; GFX10PLUS-LABEL: v_roundeven_f32_fneg:
964 ; GFX10PLUS: ; %bb.0:
965 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966 ; GFX10PLUS-NEXT: v_rndne_f32_e64 v0, -v0
967 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
969 ; SDAG_GFX6-LABEL: v_roundeven_f32_fneg:
970 ; SDAG_GFX6: ; %bb.0:
971 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972 ; SDAG_GFX6-NEXT: v_rndne_f32_e64 v0, -v0
973 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
975 ; SDAG_GFX7-LABEL: v_roundeven_f32_fneg:
976 ; SDAG_GFX7: ; %bb.0:
977 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
978 ; SDAG_GFX7-NEXT: v_rndne_f32_e64 v0, -v0
979 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
981 ; SDAG_GFX8-LABEL: v_roundeven_f32_fneg:
982 ; SDAG_GFX8: ; %bb.0:
983 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984 ; SDAG_GFX8-NEXT: v_rndne_f32_e64 v0, -v0
985 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
987 ; SDAG_GFX9-LABEL: v_roundeven_f32_fneg:
988 ; SDAG_GFX9: ; %bb.0:
989 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
990 ; SDAG_GFX9-NEXT: v_rndne_f32_e64 v0, -v0
991 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
993 ; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fneg:
994 ; SDAG_GFX10PLUS: ; %bb.0:
995 ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
996 ; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e64 v0, -v0
997 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
998 %neg.x = fneg float %x
999 %roundeven = call float @llvm.roundeven.f32(float %neg.x)
1000 ret float %roundeven
1003 define double @v_roundeven_f64(double %x) {
1004 ; GFX6-LABEL: v_roundeven_f64:
1006 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1007 ; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v1
1008 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
1009 ; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
1010 ; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3]
1011 ; GFX6-NEXT: s_mov_b32 s4, -1
1012 ; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
1013 ; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
1014 ; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
1015 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
1016 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
1017 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1019 ; GFX7-LABEL: v_roundeven_f64:
1021 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1022 ; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1023 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1025 ; GFX8-LABEL: v_roundeven_f64:
1027 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1028 ; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1029 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1031 ; GFX9-LABEL: v_roundeven_f64:
1033 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1034 ; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1035 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1037 ; GFX10PLUS-LABEL: v_roundeven_f64:
1038 ; GFX10PLUS: ; %bb.0:
1039 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1040 ; GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1041 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1043 ; SDAG_GFX6-LABEL: v_roundeven_f64:
1044 ; SDAG_GFX6: ; %bb.0:
1045 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1046 ; SDAG_GFX6-NEXT: s_brev_b32 s4, -2
1047 ; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0x43300000
1048 ; SDAG_GFX6-NEXT: v_bfi_b32 v3, s4, v2, v1
1049 ; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0
1050 ; SDAG_GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3]
1051 ; SDAG_GFX6-NEXT: s_mov_b32 s4, -1
1052 ; SDAG_GFX6-NEXT: s_mov_b32 s5, 0x432fffff
1053 ; SDAG_GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
1054 ; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
1055 ; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
1056 ; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
1057 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
1059 ; SDAG_GFX7-LABEL: v_roundeven_f64:
1060 ; SDAG_GFX7: ; %bb.0:
1061 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1062 ; SDAG_GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1063 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
1065 ; SDAG_GFX8-LABEL: v_roundeven_f64:
1066 ; SDAG_GFX8: ; %bb.0:
1067 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1068 ; SDAG_GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1069 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
1071 ; SDAG_GFX9-LABEL: v_roundeven_f64:
1072 ; SDAG_GFX9: ; %bb.0:
1073 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074 ; SDAG_GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1075 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
1077 ; SDAG_GFX10PLUS-LABEL: v_roundeven_f64:
1078 ; SDAG_GFX10PLUS: ; %bb.0:
1079 ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1080 ; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1081 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1082 %roundeven = call double @llvm.roundeven.f64(double %x)
1083 ret double %roundeven
1086 define double @v_roundeven_f64_fneg(double %x) {
1087 ; GFX6-LABEL: v_roundeven_f64_fneg:
1089 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1090 ; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
1091 ; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v6
1092 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
1093 ; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
1094 ; GFX6-NEXT: v_add_f64 v[4:5], -v[0:1], v[2:3]
1095 ; GFX6-NEXT: s_mov_b32 s4, -1
1096 ; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
1097 ; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
1098 ; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
1099 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
1100 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
1101 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1103 ; GFX7-LABEL: v_roundeven_f64_fneg:
1105 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106 ; GFX7-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
1107 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1109 ; GFX8-LABEL: v_roundeven_f64_fneg:
1111 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1112 ; GFX8-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
1113 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1115 ; GFX9-LABEL: v_roundeven_f64_fneg:
1117 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118 ; GFX9-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
1119 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1121 ; GFX10PLUS-LABEL: v_roundeven_f64_fneg:
1122 ; GFX10PLUS: ; %bb.0:
1123 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124 ; GFX10PLUS-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
1125 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1127 ; SDAG_GFX6-LABEL: v_roundeven_f64_fneg:
1128 ; SDAG_GFX6: ; %bb.0:
1129 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1130 ; SDAG_GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
1131 ; SDAG_GFX6-NEXT: s_brev_b32 s4, -2
1132 ; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0x43300000
1133 ; SDAG_GFX6-NEXT: v_bfi_b32 v3, s4, v2, v6
1134 ; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0
1135 ; SDAG_GFX6-NEXT: v_add_f64 v[4:5], -v[0:1], v[2:3]
1136 ; SDAG_GFX6-NEXT: s_mov_b32 s4, -1
1137 ; SDAG_GFX6-NEXT: s_mov_b32 s5, 0x432fffff
1138 ; SDAG_GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
1139 ; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
1140 ; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
1141 ; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
1142 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
1144 ; SDAG_GFX7-LABEL: v_roundeven_f64_fneg:
1145 ; SDAG_GFX7: ; %bb.0:
1146 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1147 ; SDAG_GFX7-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
1148 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
1150 ; SDAG_GFX8-LABEL: v_roundeven_f64_fneg:
1151 ; SDAG_GFX8: ; %bb.0:
1152 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153 ; SDAG_GFX8-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
1154 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
1156 ; SDAG_GFX9-LABEL: v_roundeven_f64_fneg:
1157 ; SDAG_GFX9: ; %bb.0:
1158 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1159 ; SDAG_GFX9-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
1160 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
1162 ; SDAG_GFX10PLUS-LABEL: v_roundeven_f64_fneg:
1163 ; SDAG_GFX10PLUS: ; %bb.0:
1164 ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1165 ; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
1166 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1167 %neg.x = fneg double %x
1168 %roundeven = call double @llvm.roundeven.f64(double %neg.x)
1169 ret double %roundeven
1172 define <2 x double> @v_roundeven_v2f64(<2 x double> %x) {
1173 ; GFX6-LABEL: v_roundeven_v2f64:
1175 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1176 ; GFX6-NEXT: v_and_b32_e32 v5, 0x80000000, v1
1177 ; GFX6-NEXT: v_mov_b32_e32 v4, 0
1178 ; GFX6-NEXT: v_or_b32_e32 v5, 0x43300000, v5
1179 ; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], v[4:5]
1180 ; GFX6-NEXT: s_mov_b32 s4, -1
1181 ; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
1182 ; GFX6-NEXT: v_add_f64 v[5:6], v[6:7], -v[4:5]
1183 ; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
1184 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
1185 ; GFX6-NEXT: v_and_b32_e32 v5, 0x80000000, v3
1186 ; GFX6-NEXT: v_or_b32_e32 v5, 0x43300000, v5
1187 ; GFX6-NEXT: v_add_f64 v[7:8], v[2:3], v[4:5]
1188 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
1189 ; GFX6-NEXT: v_add_f64 v[4:5], v[7:8], -v[4:5]
1190 ; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5]
1191 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
1192 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
1193 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1195 ; GFX7-LABEL: v_roundeven_v2f64:
1197 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1198 ; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1199 ; GFX7-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
1200 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1202 ; GFX8-LABEL: v_roundeven_v2f64:
1204 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1205 ; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1206 ; GFX8-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
1207 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1209 ; GFX9-LABEL: v_roundeven_v2f64:
1211 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1212 ; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1213 ; GFX9-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
1214 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1216 ; GFX10PLUS-LABEL: v_roundeven_v2f64:
1217 ; GFX10PLUS: ; %bb.0:
1218 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1219 ; GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1220 ; GFX10PLUS-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
1221 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1223 ; SDAG_GFX6-LABEL: v_roundeven_v2f64:
1224 ; SDAG_GFX6: ; %bb.0:
1225 ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1226 ; SDAG_GFX6-NEXT: s_brev_b32 s6, -2
1227 ; SDAG_GFX6-NEXT: v_mov_b32_e32 v8, 0x43300000
1228 ; SDAG_GFX6-NEXT: v_bfi_b32 v5, s6, v8, v1
1229 ; SDAG_GFX6-NEXT: v_mov_b32_e32 v4, 0
1230 ; SDAG_GFX6-NEXT: v_add_f64 v[6:7], v[0:1], v[4:5]
1231 ; SDAG_GFX6-NEXT: s_mov_b32 s4, -1
1232 ; SDAG_GFX6-NEXT: s_mov_b32 s5, 0x432fffff
1233 ; SDAG_GFX6-NEXT: v_add_f64 v[5:6], v[6:7], -v[4:5]
1234 ; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
1235 ; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
1236 ; SDAG_GFX6-NEXT: v_bfi_b32 v5, s6, v8, v3
1237 ; SDAG_GFX6-NEXT: v_add_f64 v[7:8], v[2:3], v[4:5]
1238 ; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
1239 ; SDAG_GFX6-NEXT: v_add_f64 v[4:5], v[7:8], -v[4:5]
1240 ; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5]
1241 ; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
1242 ; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
1243 ; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31]
1245 ; SDAG_GFX7-LABEL: v_roundeven_v2f64:
1246 ; SDAG_GFX7: ; %bb.0:
1247 ; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248 ; SDAG_GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1249 ; SDAG_GFX7-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
1250 ; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31]
1252 ; SDAG_GFX8-LABEL: v_roundeven_v2f64:
1253 ; SDAG_GFX8: ; %bb.0:
1254 ; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255 ; SDAG_GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1256 ; SDAG_GFX8-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
1257 ; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31]
1259 ; SDAG_GFX9-LABEL: v_roundeven_v2f64:
1260 ; SDAG_GFX9: ; %bb.0:
1261 ; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262 ; SDAG_GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1263 ; SDAG_GFX9-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
1264 ; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31]
1266 ; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f64:
1267 ; SDAG_GFX10PLUS: ; %bb.0:
1268 ; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1269 ; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
1270 ; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
1271 ; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1272 %roundeven = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x)
1273 ret <2 x double> %roundeven
1276 declare half @llvm.roundeven.f16(half) #0
1277 declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0
1278 declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0
1280 declare float @llvm.roundeven.f32(float) #0
1281 declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0
1282 declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0
1283 declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0
1285 declare double @llvm.roundeven.f64(double) #0
1286 declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0
1288 declare half @llvm.fabs.f16(half) #0
1289 declare float @llvm.fabs.f32(float) #0
1291 attributes #0 = { nounwind readnone speculatable willreturn }