1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
5 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
6 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
8 define float @v_roundeven_f32(float %x) {
9 ; GFX6-LABEL: v_roundeven_f32:
11 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
13 ; GFX6-NEXT: s_setpc_b64 s[30:31]
15 ; GFX7-LABEL: v_roundeven_f32:
17 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
19 ; GFX7-NEXT: s_setpc_b64 s[30:31]
21 ; GFX8-LABEL: v_roundeven_f32:
23 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX8-NEXT: v_rndne_f32_e32 v0, v0
25 ; GFX8-NEXT: s_setpc_b64 s[30:31]
27 ; GFX9-LABEL: v_roundeven_f32:
29 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX9-NEXT: v_rndne_f32_e32 v0, v0
31 ; GFX9-NEXT: s_setpc_b64 s[30:31]
33 ; GFX10-LABEL: v_roundeven_f32:
35 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
37 ; GFX10-NEXT: v_rndne_f32_e32 v0, v0
38 ; GFX10-NEXT: s_setpc_b64 s[30:31]
39 %roundeven = call float @llvm.roundeven.f32(float %x)
43 define <2 x float> @v_roundeven_v2f32(<2 x float> %x) {
44 ; GFX6-LABEL: v_roundeven_v2f32:
46 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
48 ; GFX6-NEXT: v_rndne_f32_e32 v1, v1
49 ; GFX6-NEXT: s_setpc_b64 s[30:31]
51 ; GFX7-LABEL: v_roundeven_v2f32:
53 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
55 ; GFX7-NEXT: v_rndne_f32_e32 v1, v1
56 ; GFX7-NEXT: s_setpc_b64 s[30:31]
58 ; GFX8-LABEL: v_roundeven_v2f32:
60 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX8-NEXT: v_rndne_f32_e32 v0, v0
62 ; GFX8-NEXT: v_rndne_f32_e32 v1, v1
63 ; GFX8-NEXT: s_setpc_b64 s[30:31]
65 ; GFX9-LABEL: v_roundeven_v2f32:
67 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX9-NEXT: v_rndne_f32_e32 v0, v0
69 ; GFX9-NEXT: v_rndne_f32_e32 v1, v1
70 ; GFX9-NEXT: s_setpc_b64 s[30:31]
72 ; GFX10-LABEL: v_roundeven_v2f32:
74 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
76 ; GFX10-NEXT: v_rndne_f32_e32 v0, v0
77 ; GFX10-NEXT: v_rndne_f32_e32 v1, v1
78 ; GFX10-NEXT: s_setpc_b64 s[30:31]
79 %roundeven = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x)
80 ret <2 x float> %roundeven
83 define <3 x float> @v_roundeven_v3f32(<3 x float> %x) {
84 ; GFX6-LABEL: v_roundeven_v3f32:
86 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
88 ; GFX6-NEXT: v_rndne_f32_e32 v1, v1
89 ; GFX6-NEXT: v_rndne_f32_e32 v2, v2
90 ; GFX6-NEXT: s_setpc_b64 s[30:31]
92 ; GFX7-LABEL: v_roundeven_v3f32:
94 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
96 ; GFX7-NEXT: v_rndne_f32_e32 v1, v1
97 ; GFX7-NEXT: v_rndne_f32_e32 v2, v2
98 ; GFX7-NEXT: s_setpc_b64 s[30:31]
100 ; GFX8-LABEL: v_roundeven_v3f32:
102 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103 ; GFX8-NEXT: v_rndne_f32_e32 v0, v0
104 ; GFX8-NEXT: v_rndne_f32_e32 v1, v1
105 ; GFX8-NEXT: v_rndne_f32_e32 v2, v2
106 ; GFX8-NEXT: s_setpc_b64 s[30:31]
108 ; GFX9-LABEL: v_roundeven_v3f32:
110 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111 ; GFX9-NEXT: v_rndne_f32_e32 v0, v0
112 ; GFX9-NEXT: v_rndne_f32_e32 v1, v1
113 ; GFX9-NEXT: v_rndne_f32_e32 v2, v2
114 ; GFX9-NEXT: s_setpc_b64 s[30:31]
116 ; GFX10-LABEL: v_roundeven_v3f32:
118 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
120 ; GFX10-NEXT: v_rndne_f32_e32 v0, v0
121 ; GFX10-NEXT: v_rndne_f32_e32 v1, v1
122 ; GFX10-NEXT: v_rndne_f32_e32 v2, v2
123 ; GFX10-NEXT: s_setpc_b64 s[30:31]
124 %roundeven = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
125 ret <3 x float> %roundeven
128 define <4 x float> @v_roundeven_v4f32(<4 x float> %x) {
129 ; GFX6-LABEL: v_roundeven_v4f32:
131 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
133 ; GFX6-NEXT: v_rndne_f32_e32 v1, v1
134 ; GFX6-NEXT: v_rndne_f32_e32 v2, v2
135 ; GFX6-NEXT: v_rndne_f32_e32 v3, v3
136 ; GFX6-NEXT: s_setpc_b64 s[30:31]
138 ; GFX7-LABEL: v_roundeven_v4f32:
140 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
142 ; GFX7-NEXT: v_rndne_f32_e32 v1, v1
143 ; GFX7-NEXT: v_rndne_f32_e32 v2, v2
144 ; GFX7-NEXT: v_rndne_f32_e32 v3, v3
145 ; GFX7-NEXT: s_setpc_b64 s[30:31]
147 ; GFX8-LABEL: v_roundeven_v4f32:
149 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; GFX8-NEXT: v_rndne_f32_e32 v0, v0
151 ; GFX8-NEXT: v_rndne_f32_e32 v1, v1
152 ; GFX8-NEXT: v_rndne_f32_e32 v2, v2
153 ; GFX8-NEXT: v_rndne_f32_e32 v3, v3
154 ; GFX8-NEXT: s_setpc_b64 s[30:31]
156 ; GFX9-LABEL: v_roundeven_v4f32:
158 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159 ; GFX9-NEXT: v_rndne_f32_e32 v0, v0
160 ; GFX9-NEXT: v_rndne_f32_e32 v1, v1
161 ; GFX9-NEXT: v_rndne_f32_e32 v2, v2
162 ; GFX9-NEXT: v_rndne_f32_e32 v3, v3
163 ; GFX9-NEXT: s_setpc_b64 s[30:31]
165 ; GFX10-LABEL: v_roundeven_v4f32:
167 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
169 ; GFX10-NEXT: v_rndne_f32_e32 v0, v0
170 ; GFX10-NEXT: v_rndne_f32_e32 v1, v1
171 ; GFX10-NEXT: v_rndne_f32_e32 v2, v2
172 ; GFX10-NEXT: v_rndne_f32_e32 v3, v3
173 ; GFX10-NEXT: s_setpc_b64 s[30:31]
174 %roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x)
175 ret <4 x float> %roundeven
178 define half @v_roundeven_f16(half %x) {
179 ; GFX6-LABEL: v_roundeven_f16:
181 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
183 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
184 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
185 ; GFX6-NEXT: s_setpc_b64 s[30:31]
187 ; GFX7-LABEL: v_roundeven_f16:
189 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
191 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
192 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
193 ; GFX7-NEXT: s_setpc_b64 s[30:31]
195 ; GFX8-LABEL: v_roundeven_f16:
197 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198 ; GFX8-NEXT: v_rndne_f16_e32 v0, v0
199 ; GFX8-NEXT: s_setpc_b64 s[30:31]
201 ; GFX9-LABEL: v_roundeven_f16:
203 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204 ; GFX9-NEXT: v_rndne_f16_e32 v0, v0
205 ; GFX9-NEXT: s_setpc_b64 s[30:31]
207 ; GFX10-LABEL: v_roundeven_f16:
209 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
211 ; GFX10-NEXT: v_rndne_f16_e32 v0, v0
212 ; GFX10-NEXT: s_setpc_b64 s[30:31]
213 %roundeven = call half @llvm.roundeven.f16(half %x)
217 define <2 x half> @v_roundeven_v2f16(<2 x half> %x) {
218 ; GFX6-LABEL: v_roundeven_v2f16:
220 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
222 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
223 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
224 ; GFX6-NEXT: v_rndne_f32_e32 v1, v1
225 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
226 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
227 ; GFX6-NEXT: s_setpc_b64 s[30:31]
229 ; GFX7-LABEL: v_roundeven_v2f16:
231 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
233 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
234 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
235 ; GFX7-NEXT: v_rndne_f32_e32 v1, v1
236 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
237 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
238 ; GFX7-NEXT: s_setpc_b64 s[30:31]
240 ; GFX8-LABEL: v_roundeven_v2f16:
242 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243 ; GFX8-NEXT: v_rndne_f16_e32 v1, v0
244 ; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
245 ; GFX8-NEXT: v_mov_b32_e32 v2, 16
246 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
247 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
248 ; GFX8-NEXT: s_setpc_b64 s[30:31]
250 ; GFX9-LABEL: v_roundeven_v2f16:
252 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253 ; GFX9-NEXT: v_rndne_f16_e32 v1, v0
254 ; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
255 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
256 ; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0
257 ; GFX9-NEXT: s_setpc_b64 s[30:31]
259 ; GFX10-LABEL: v_roundeven_v2f16:
261 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
263 ; GFX10-NEXT: v_rndne_f16_e32 v1, v0
264 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
265 ; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v1, v0
266 ; GFX10-NEXT: s_setpc_b64 s[30:31]
267 %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x)
268 ret <2 x half> %roundeven
271 define <2 x half> @v_roundeven_v2f16_fneg(<2 x half> %x) {
272 ; GFX6-LABEL: v_roundeven_v2f16_fneg:
274 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
276 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
277 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
278 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
279 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0
280 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
281 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v0
282 ; GFX6-NEXT: v_rndne_f32_e32 v0, v1
283 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
284 ; GFX6-NEXT: v_rndne_f32_e32 v1, v2
285 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
286 ; GFX6-NEXT: s_setpc_b64 s[30:31]
288 ; GFX7-LABEL: v_roundeven_v2f16_fneg:
290 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
292 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
293 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
294 ; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
295 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v0
296 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
297 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v0
298 ; GFX7-NEXT: v_rndne_f32_e32 v0, v1
299 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
300 ; GFX7-NEXT: v_rndne_f32_e32 v1, v2
301 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
302 ; GFX7-NEXT: s_setpc_b64 s[30:31]
304 ; GFX8-LABEL: v_roundeven_v2f16_fneg:
306 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
308 ; GFX8-NEXT: v_rndne_f16_e32 v1, v0
309 ; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
310 ; GFX8-NEXT: v_mov_b32_e32 v2, 16
311 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
312 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
313 ; GFX8-NEXT: s_setpc_b64 s[30:31]
315 ; GFX9-LABEL: v_roundeven_v2f16_fneg:
317 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318 ; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
319 ; GFX9-NEXT: v_rndne_f16_e32 v1, v0
320 ; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
321 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
322 ; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0
323 ; GFX9-NEXT: s_setpc_b64 s[30:31]
325 ; GFX10-LABEL: v_roundeven_v2f16_fneg:
327 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
329 ; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
330 ; GFX10-NEXT: v_rndne_f16_e32 v1, v0
331 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
332 ; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v1, v0
333 ; GFX10-NEXT: s_setpc_b64 s[30:31]
334 %x.fneg = fneg <2 x half> %x
335 %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x.fneg)
336 ret <2 x half> %roundeven
339 define <4 x half> @v_roundeven_v4f16(<4 x half> %x) {
340 ; GFX6-LABEL: v_roundeven_v4f16:
342 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
344 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
345 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
346 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
347 ; GFX6-NEXT: v_rndne_f32_e32 v0, v0
348 ; GFX6-NEXT: v_rndne_f32_e32 v1, v1
349 ; GFX6-NEXT: v_rndne_f32_e32 v2, v2
350 ; GFX6-NEXT: v_rndne_f32_e32 v3, v3
351 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
352 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
353 ; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
354 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
355 ; GFX6-NEXT: s_setpc_b64 s[30:31]
357 ; GFX7-LABEL: v_roundeven_v4f16:
359 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
360 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
361 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
362 ; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
363 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
364 ; GFX7-NEXT: v_rndne_f32_e32 v0, v0
365 ; GFX7-NEXT: v_rndne_f32_e32 v1, v1
366 ; GFX7-NEXT: v_rndne_f32_e32 v2, v2
367 ; GFX7-NEXT: v_rndne_f32_e32 v3, v3
368 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
369 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
370 ; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
371 ; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
372 ; GFX7-NEXT: s_setpc_b64 s[30:31]
374 ; GFX8-LABEL: v_roundeven_v4f16:
376 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GFX8-NEXT: v_rndne_f16_e32 v2, v0
378 ; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
379 ; GFX8-NEXT: v_rndne_f16_e32 v3, v1
380 ; GFX8-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
381 ; GFX8-NEXT: v_mov_b32_e32 v4, 16
382 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
383 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
384 ; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
385 ; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
386 ; GFX8-NEXT: s_setpc_b64 s[30:31]
388 ; GFX9-LABEL: v_roundeven_v4f16:
390 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391 ; GFX9-NEXT: v_rndne_f16_e32 v2, v0
392 ; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
393 ; GFX9-NEXT: v_rndne_f16_e32 v3, v1
394 ; GFX9-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
395 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
396 ; GFX9-NEXT: v_and_or_b32 v0, v2, v4, v0
397 ; GFX9-NEXT: v_and_or_b32 v1, v3, v4, v1
398 ; GFX9-NEXT: s_setpc_b64 s[30:31]
400 ; GFX10-LABEL: v_roundeven_v4f16:
402 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
404 ; GFX10-NEXT: v_rndne_f16_e32 v2, v0
405 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
406 ; GFX10-NEXT: v_rndne_f16_e32 v3, v1
407 ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff
408 ; GFX10-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
409 ; GFX10-NEXT: v_and_or_b32 v0, v2, v4, v0
410 ; GFX10-NEXT: v_and_or_b32 v1, v3, v4, v1
411 ; GFX10-NEXT: s_setpc_b64 s[30:31]
412 %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x)
413 ret <4 x half> %roundeven
417 define float @v_roundeven_f32_fabs(float %x) {
418 ; GFX6-LABEL: v_roundeven_f32_fabs:
420 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421 ; GFX6-NEXT: v_rndne_f32_e64 v0, |v0|
422 ; GFX6-NEXT: s_setpc_b64 s[30:31]
424 ; GFX7-LABEL: v_roundeven_f32_fabs:
426 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427 ; GFX7-NEXT: v_rndne_f32_e64 v0, |v0|
428 ; GFX7-NEXT: s_setpc_b64 s[30:31]
430 ; GFX8-LABEL: v_roundeven_f32_fabs:
432 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433 ; GFX8-NEXT: v_rndne_f32_e64 v0, |v0|
434 ; GFX8-NEXT: s_setpc_b64 s[30:31]
436 ; GFX9-LABEL: v_roundeven_f32_fabs:
438 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439 ; GFX9-NEXT: v_rndne_f32_e64 v0, |v0|
440 ; GFX9-NEXT: s_setpc_b64 s[30:31]
442 ; GFX10-LABEL: v_roundeven_f32_fabs:
444 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
446 ; GFX10-NEXT: v_rndne_f32_e64 v0, |v0|
447 ; GFX10-NEXT: s_setpc_b64 s[30:31]
448 %fabs.x = call float @llvm.fabs.f32(float %x)
449 %roundeven = call float @llvm.roundeven.f32(float %fabs.x)
453 define amdgpu_ps float @s_roundeven_f32(float inreg %x) {
454 ; GFX6-LABEL: s_roundeven_f32:
456 ; GFX6-NEXT: v_rndne_f32_e32 v0, s0
457 ; GFX6-NEXT: ; return to shader part epilog
459 ; GFX7-LABEL: s_roundeven_f32:
461 ; GFX7-NEXT: v_rndne_f32_e32 v0, s0
462 ; GFX7-NEXT: ; return to shader part epilog
464 ; GFX8-LABEL: s_roundeven_f32:
466 ; GFX8-NEXT: v_rndne_f32_e32 v0, s0
467 ; GFX8-NEXT: ; return to shader part epilog
469 ; GFX9-LABEL: s_roundeven_f32:
471 ; GFX9-NEXT: v_rndne_f32_e32 v0, s0
472 ; GFX9-NEXT: ; return to shader part epilog
474 ; GFX10-LABEL: s_roundeven_f32:
476 ; GFX10-NEXT: v_rndne_f32_e32 v0, s0
477 ; GFX10-NEXT: ; return to shader part epilog
478 %roundeven = call float @llvm.roundeven.f32(float %x)
482 define float @v_roundeven_f32_fneg(float %x) {
483 ; GFX6-LABEL: v_roundeven_f32_fneg:
485 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486 ; GFX6-NEXT: v_rndne_f32_e64 v0, -v0
487 ; GFX6-NEXT: s_setpc_b64 s[30:31]
489 ; GFX7-LABEL: v_roundeven_f32_fneg:
491 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492 ; GFX7-NEXT: v_rndne_f32_e64 v0, -v0
493 ; GFX7-NEXT: s_setpc_b64 s[30:31]
495 ; GFX8-LABEL: v_roundeven_f32_fneg:
497 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; GFX8-NEXT: v_rndne_f32_e64 v0, -v0
499 ; GFX8-NEXT: s_setpc_b64 s[30:31]
501 ; GFX9-LABEL: v_roundeven_f32_fneg:
503 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
504 ; GFX9-NEXT: v_rndne_f32_e64 v0, -v0
505 ; GFX9-NEXT: s_setpc_b64 s[30:31]
507 ; GFX10-LABEL: v_roundeven_f32_fneg:
509 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
511 ; GFX10-NEXT: v_rndne_f32_e64 v0, -v0
512 ; GFX10-NEXT: s_setpc_b64 s[30:31]
513 %neg.x = fneg float %x
514 %roundeven = call float @llvm.roundeven.f32(float %neg.x)
518 define double @v_roundeven_f64(double %x) {
519 ; GFX6-LABEL: v_roundeven_f64:
521 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
522 ; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v1
523 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
524 ; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
525 ; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3]
526 ; GFX6-NEXT: s_mov_b32 s4, -1
527 ; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
528 ; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
529 ; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
530 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
531 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
532 ; GFX6-NEXT: s_setpc_b64 s[30:31]
534 ; GFX7-LABEL: v_roundeven_f64:
536 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537 ; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
538 ; GFX7-NEXT: s_setpc_b64 s[30:31]
540 ; GFX8-LABEL: v_roundeven_f64:
542 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543 ; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
544 ; GFX8-NEXT: s_setpc_b64 s[30:31]
546 ; GFX9-LABEL: v_roundeven_f64:
548 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
549 ; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
550 ; GFX9-NEXT: s_setpc_b64 s[30:31]
552 ; GFX10-LABEL: v_roundeven_f64:
554 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
556 ; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
557 ; GFX10-NEXT: s_setpc_b64 s[30:31]
558 %roundeven = call double @llvm.roundeven.f64(double %x)
559 ret double %roundeven
562 define double @v_roundeven_f64_fneg(double %x) {
563 ; GFX6-LABEL: v_roundeven_f64_fneg:
565 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566 ; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
567 ; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v6
568 ; GFX6-NEXT: v_mov_b32_e32 v2, 0
569 ; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
570 ; GFX6-NEXT: v_add_f64 v[4:5], -v[0:1], v[2:3]
571 ; GFX6-NEXT: s_mov_b32 s4, -1
572 ; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
573 ; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
574 ; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
575 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
576 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
577 ; GFX6-NEXT: s_setpc_b64 s[30:31]
579 ; GFX7-LABEL: v_roundeven_f64_fneg:
581 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582 ; GFX7-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
583 ; GFX7-NEXT: s_setpc_b64 s[30:31]
585 ; GFX8-LABEL: v_roundeven_f64_fneg:
587 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588 ; GFX8-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
589 ; GFX8-NEXT: s_setpc_b64 s[30:31]
591 ; GFX9-LABEL: v_roundeven_f64_fneg:
593 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
594 ; GFX9-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
595 ; GFX9-NEXT: s_setpc_b64 s[30:31]
597 ; GFX10-LABEL: v_roundeven_f64_fneg:
599 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
600 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
601 ; GFX10-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
602 ; GFX10-NEXT: s_setpc_b64 s[30:31]
603 %neg.x = fneg double %x
604 %roundeven = call double @llvm.roundeven.f64(double %neg.x)
605 ret double %roundeven
608 define <2 x double> @v_roundeven_v2f64(<2 x double> %x) {
609 ; GFX6-LABEL: v_roundeven_v2f64:
611 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612 ; GFX6-NEXT: s_brev_b32 s6, 1
613 ; GFX6-NEXT: s_mov_b32 s7, 0x43300000
614 ; GFX6-NEXT: v_and_b32_e32 v5, s6, v1
615 ; GFX6-NEXT: v_mov_b32_e32 v4, 0
616 ; GFX6-NEXT: v_or_b32_e32 v5, s7, v5
617 ; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], v[4:5]
618 ; GFX6-NEXT: s_mov_b32 s4, -1
619 ; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
620 ; GFX6-NEXT: v_add_f64 v[5:6], v[6:7], -v[4:5]
621 ; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
622 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
623 ; GFX6-NEXT: v_and_b32_e32 v5, s6, v3
624 ; GFX6-NEXT: v_or_b32_e32 v5, s7, v5
625 ; GFX6-NEXT: v_add_f64 v[7:8], v[2:3], v[4:5]
626 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
627 ; GFX6-NEXT: v_add_f64 v[4:5], v[7:8], -v[4:5]
628 ; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5]
629 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
630 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
631 ; GFX6-NEXT: s_setpc_b64 s[30:31]
633 ; GFX7-LABEL: v_roundeven_v2f64:
635 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636 ; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
637 ; GFX7-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
638 ; GFX7-NEXT: s_setpc_b64 s[30:31]
640 ; GFX8-LABEL: v_roundeven_v2f64:
642 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643 ; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
644 ; GFX8-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
645 ; GFX8-NEXT: s_setpc_b64 s[30:31]
647 ; GFX9-LABEL: v_roundeven_v2f64:
649 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
650 ; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
651 ; GFX9-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
652 ; GFX9-NEXT: s_setpc_b64 s[30:31]
654 ; GFX10-LABEL: v_roundeven_v2f64:
656 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
658 ; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
659 ; GFX10-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
660 ; GFX10-NEXT: s_setpc_b64 s[30:31]
661 %roundeven = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x)
662 ret <2 x double> %roundeven
665 declare half @llvm.roundeven.f16(half) #0
666 declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0
667 declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0
669 declare float @llvm.roundeven.f32(float) #0
670 declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0
671 declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0
672 declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0
674 declare double @llvm.roundeven.f64(double) #0
675 declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0
677 declare half @llvm.fabs.f16(half) #0
678 declare float @llvm.fabs.f32(float) #0
680 attributes #0 = { nounwind readnone speculatable willreturn }