1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX900 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX906 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
8 define i16 @shl_i16(i16 %x, i16 %y) {
11 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v1, v0
13 ; GFX8-NEXT: s_setpc_b64 s[30:31]
15 ; GFX9ALL-LABEL: shl_i16:
17 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX9ALL-NEXT: v_lshlrev_b16_e32 v0, v1, v0
19 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
21 ; GFX10-LABEL: shl_i16:
23 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0
25 ; GFX10-NEXT: s_setpc_b64 s[30:31]
27 ; GFX11-LABEL: shl_i16:
29 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0
31 ; GFX11-NEXT: s_setpc_b64 s[30:31]
36 define i16 @lshr_i16(i16 %x, i16 %y) {
37 ; GFX8-LABEL: lshr_i16:
39 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, v1, v0
41 ; GFX8-NEXT: s_setpc_b64 s[30:31]
43 ; GFX9ALL-LABEL: lshr_i16:
45 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46 ; GFX9ALL-NEXT: v_lshrrev_b16_e32 v0, v1, v0
47 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
49 ; GFX10-LABEL: lshr_i16:
51 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0
53 ; GFX10-NEXT: s_setpc_b64 s[30:31]
55 ; GFX11-LABEL: lshr_i16:
57 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58 ; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0
59 ; GFX11-NEXT: s_setpc_b64 s[30:31]
60 %res = lshr i16 %x, %y
64 define i16 @ashr_i16(i16 %x, i16 %y) {
65 ; GFX8-LABEL: ashr_i16:
67 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, v1, v0
69 ; GFX8-NEXT: s_setpc_b64 s[30:31]
71 ; GFX9ALL-LABEL: ashr_i16:
73 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74 ; GFX9ALL-NEXT: v_ashrrev_i16_e32 v0, v1, v0
75 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
77 ; GFX10-LABEL: ashr_i16:
79 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80 ; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0
81 ; GFX10-NEXT: s_setpc_b64 s[30:31]
83 ; GFX11-LABEL: ashr_i16:
85 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0
87 ; GFX11-NEXT: s_setpc_b64 s[30:31]
88 %res = ashr i16 %x, %y
92 define i16 @add_u16(i16 %x, i16 %y) {
93 ; GFX8-LABEL: add_u16:
95 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96 ; GFX8-NEXT: v_add_u16_e32 v0, v0, v1
97 ; GFX8-NEXT: s_setpc_b64 s[30:31]
99 ; GFX9ALL-LABEL: add_u16:
101 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; GFX9ALL-NEXT: v_add_u16_e32 v0, v0, v1
103 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
105 ; GFX10-LABEL: add_u16:
107 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
109 ; GFX10-NEXT: s_setpc_b64 s[30:31]
111 ; GFX11-LABEL: add_u16:
113 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX11-NEXT: v_add_nc_u16 v0, v0, v1
115 ; GFX11-NEXT: s_setpc_b64 s[30:31]
116 %res = add i16 %x, %y
120 define i16 @sub_u16(i16 %x, i16 %y) {
121 ; GFX8-LABEL: sub_u16:
123 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
125 ; GFX8-NEXT: s_setpc_b64 s[30:31]
127 ; GFX9ALL-LABEL: sub_u16:
129 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX9ALL-NEXT: v_sub_u16_e32 v0, v0, v1
131 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
133 ; GFX10-LABEL: sub_u16:
135 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136 ; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1
137 ; GFX10-NEXT: s_setpc_b64 s[30:31]
139 ; GFX11-LABEL: sub_u16:
141 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1
143 ; GFX11-NEXT: s_setpc_b64 s[30:31]
144 %res = sub i16 %x, %y
148 define i16 @mul_lo_u16(i16 %x, i16 %y) {
149 ; GFX8-LABEL: mul_lo_u16:
151 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v1
153 ; GFX8-NEXT: s_setpc_b64 s[30:31]
155 ; GFX9ALL-LABEL: mul_lo_u16:
157 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158 ; GFX9ALL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
159 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
161 ; GFX10-LABEL: mul_lo_u16:
163 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
165 ; GFX10-NEXT: s_setpc_b64 s[30:31]
167 ; GFX11-LABEL: mul_lo_u16:
169 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170 ; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1
171 ; GFX11-NEXT: s_setpc_b64 s[30:31]
172 %res = mul i16 %x, %y
176 define i16 @min_u16(i16 %x, i16 %y) {
177 ; GFX8-LABEL: min_u16:
179 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX8-NEXT: v_min_u16_e32 v0, v0, v1
181 ; GFX8-NEXT: s_setpc_b64 s[30:31]
183 ; GFX9ALL-LABEL: min_u16:
185 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186 ; GFX9ALL-NEXT: v_min_u16_e32 v0, v0, v1
187 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
189 ; GFX10-LABEL: min_u16:
191 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192 ; GFX10-NEXT: v_min_u16 v0, v0, v1
193 ; GFX10-NEXT: s_setpc_b64 s[30:31]
195 ; GFX11-LABEL: min_u16:
197 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198 ; GFX11-NEXT: v_min_u16 v0, v0, v1
199 ; GFX11-NEXT: s_setpc_b64 s[30:31]
200 %cmp = icmp ule i16 %x, %y
201 %res = select i1 %cmp, i16 %x, i16 %y
205 define i16 @min_i16(i16 %x, i16 %y) {
206 ; GFX8-LABEL: min_i16:
208 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209 ; GFX8-NEXT: v_min_i16_e32 v0, v0, v1
210 ; GFX8-NEXT: s_setpc_b64 s[30:31]
212 ; GFX9ALL-LABEL: min_i16:
214 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215 ; GFX9ALL-NEXT: v_min_i16_e32 v0, v0, v1
216 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
218 ; GFX10-LABEL: min_i16:
220 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221 ; GFX10-NEXT: v_min_i16 v0, v0, v1
222 ; GFX10-NEXT: s_setpc_b64 s[30:31]
224 ; GFX11-LABEL: min_i16:
226 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
227 ; GFX11-NEXT: v_min_i16 v0, v0, v1
228 ; GFX11-NEXT: s_setpc_b64 s[30:31]
229 %cmp = icmp sle i16 %x, %y
230 %res = select i1 %cmp, i16 %x, i16 %y
234 define i16 @max_u16(i16 %x, i16 %y) {
235 ; GFX8-LABEL: max_u16:
237 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238 ; GFX8-NEXT: v_max_u16_e32 v0, v0, v1
239 ; GFX8-NEXT: s_setpc_b64 s[30:31]
241 ; GFX9ALL-LABEL: max_u16:
243 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244 ; GFX9ALL-NEXT: v_max_u16_e32 v0, v0, v1
245 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
247 ; GFX10-LABEL: max_u16:
249 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250 ; GFX10-NEXT: v_max_u16 v0, v0, v1
251 ; GFX10-NEXT: s_setpc_b64 s[30:31]
253 ; GFX11-LABEL: max_u16:
255 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256 ; GFX11-NEXT: v_max_u16 v0, v0, v1
257 ; GFX11-NEXT: s_setpc_b64 s[30:31]
258 %cmp = icmp uge i16 %x, %y
259 %res = select i1 %cmp, i16 %x, i16 %y
263 define i16 @max_i16(i16 %x, i16 %y) {
264 ; GFX8-LABEL: max_i16:
266 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v1
268 ; GFX8-NEXT: s_setpc_b64 s[30:31]
270 ; GFX9ALL-LABEL: max_i16:
272 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX9ALL-NEXT: v_max_i16_e32 v0, v0, v1
274 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
276 ; GFX10-LABEL: max_i16:
278 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279 ; GFX10-NEXT: v_max_i16 v0, v0, v1
280 ; GFX10-NEXT: s_setpc_b64 s[30:31]
282 ; GFX11-LABEL: max_i16:
284 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285 ; GFX11-NEXT: v_max_i16 v0, v0, v1
286 ; GFX11-NEXT: s_setpc_b64 s[30:31]
287 %cmp = icmp sge i16 %x, %y
288 %res = select i1 %cmp, i16 %x, i16 %y
292 define i32 @shl_i16_zext_i32(i16 %x, i16 %y) {
293 ; GFX8-LABEL: shl_i16_zext_i32:
295 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v1, v0
297 ; GFX8-NEXT: s_setpc_b64 s[30:31]
299 ; GFX9ALL-LABEL: shl_i16_zext_i32:
301 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302 ; GFX9ALL-NEXT: v_lshlrev_b16_e32 v0, v1, v0
303 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
305 ; GFX10-LABEL: shl_i16_zext_i32:
307 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308 ; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0
309 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
310 ; GFX10-NEXT: s_setpc_b64 s[30:31]
312 ; GFX11-LABEL: shl_i16_zext_i32:
314 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315 ; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0
316 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
317 ; GFX11-NEXT: s_setpc_b64 s[30:31]
318 %res = shl i16 %x, %y
319 %zext = zext i16 %res to i32
323 define i32 @lshr_i16_zext_i32(i16 %x, i16 %y) {
324 ; GFX8-LABEL: lshr_i16_zext_i32:
326 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, v1, v0
328 ; GFX8-NEXT: s_setpc_b64 s[30:31]
330 ; GFX9ALL-LABEL: lshr_i16_zext_i32:
332 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333 ; GFX9ALL-NEXT: v_lshrrev_b16_e32 v0, v1, v0
334 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
336 ; GFX10-LABEL: lshr_i16_zext_i32:
338 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0
340 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
341 ; GFX10-NEXT: s_setpc_b64 s[30:31]
343 ; GFX11-LABEL: lshr_i16_zext_i32:
345 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346 ; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0
347 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
348 ; GFX11-NEXT: s_setpc_b64 s[30:31]
349 %res = lshr i16 %x, %y
350 %zext = zext i16 %res to i32
354 define i32 @ashr_i16_zext_i32(i16 %x, i16 %y) {
355 ; GFX8-LABEL: ashr_i16_zext_i32:
357 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, v1, v0
359 ; GFX8-NEXT: s_setpc_b64 s[30:31]
361 ; GFX9ALL-LABEL: ashr_i16_zext_i32:
363 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364 ; GFX9ALL-NEXT: v_ashrrev_i16_e32 v0, v1, v0
365 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
367 ; GFX10-LABEL: ashr_i16_zext_i32:
369 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370 ; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0
371 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
372 ; GFX10-NEXT: s_setpc_b64 s[30:31]
374 ; GFX11-LABEL: ashr_i16_zext_i32:
376 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0
378 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
379 ; GFX11-NEXT: s_setpc_b64 s[30:31]
380 %res = ashr i16 %x, %y
381 %zext = zext i16 %res to i32
385 define i32 @add_u16_zext_i32(i16 %x, i16 %y) {
386 ; GFX8-LABEL: add_u16_zext_i32:
388 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389 ; GFX8-NEXT: v_add_u16_e32 v0, v0, v1
390 ; GFX8-NEXT: s_setpc_b64 s[30:31]
392 ; GFX9ALL-LABEL: add_u16_zext_i32:
394 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395 ; GFX9ALL-NEXT: v_add_u16_e32 v0, v0, v1
396 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
398 ; GFX10-LABEL: add_u16_zext_i32:
400 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
401 ; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
402 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
403 ; GFX10-NEXT: s_setpc_b64 s[30:31]
405 ; GFX11-LABEL: add_u16_zext_i32:
407 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408 ; GFX11-NEXT: v_add_nc_u16 v0, v0, v1
409 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
410 ; GFX11-NEXT: s_setpc_b64 s[30:31]
411 %res = add i16 %x, %y
412 %zext = zext i16 %res to i32
416 define i32 @sub_u16_zext_i32(i16 %x, i16 %y) {
417 ; GFX8-LABEL: sub_u16_zext_i32:
419 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420 ; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
421 ; GFX8-NEXT: s_setpc_b64 s[30:31]
423 ; GFX9ALL-LABEL: sub_u16_zext_i32:
425 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
426 ; GFX9ALL-NEXT: v_sub_u16_e32 v0, v0, v1
427 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
429 ; GFX10-LABEL: sub_u16_zext_i32:
431 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432 ; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1
433 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
434 ; GFX10-NEXT: s_setpc_b64 s[30:31]
436 ; GFX11-LABEL: sub_u16_zext_i32:
438 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439 ; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1
440 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
441 ; GFX11-NEXT: s_setpc_b64 s[30:31]
442 %res = sub i16 %x, %y
443 %zext = zext i16 %res to i32
447 define i32 @mul_lo_u16_zext_i32(i16 %x, i16 %y) {
448 ; GFX8-LABEL: mul_lo_u16_zext_i32:
450 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
451 ; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v1
452 ; GFX8-NEXT: s_setpc_b64 s[30:31]
454 ; GFX9ALL-LABEL: mul_lo_u16_zext_i32:
456 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457 ; GFX9ALL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
458 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
460 ; GFX10-LABEL: mul_lo_u16_zext_i32:
462 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
464 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
465 ; GFX10-NEXT: s_setpc_b64 s[30:31]
467 ; GFX11-LABEL: mul_lo_u16_zext_i32:
469 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470 ; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1
471 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
472 ; GFX11-NEXT: s_setpc_b64 s[30:31]
473 %res = mul i16 %x, %y
474 %zext = zext i16 %res to i32
478 define i32 @min_u16_zext_i32(i16 %x, i16 %y) {
479 ; GFX8-LABEL: min_u16_zext_i32:
481 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482 ; GFX8-NEXT: v_min_u16_e32 v0, v0, v1
483 ; GFX8-NEXT: s_setpc_b64 s[30:31]
485 ; GFX9ALL-LABEL: min_u16_zext_i32:
487 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488 ; GFX9ALL-NEXT: v_min_u16_e32 v0, v0, v1
489 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
491 ; GFX10-LABEL: min_u16_zext_i32:
493 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494 ; GFX10-NEXT: v_min_u16 v0, v0, v1
495 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
496 ; GFX10-NEXT: s_setpc_b64 s[30:31]
498 ; GFX11-LABEL: min_u16_zext_i32:
500 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501 ; GFX11-NEXT: v_min_u16 v0, v0, v1
502 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
503 ; GFX11-NEXT: s_setpc_b64 s[30:31]
504 %cmp = icmp ule i16 %x, %y
505 %res = select i1 %cmp, i16 %x, i16 %y
506 %zext = zext i16 %res to i32
510 define i32 @min_i16_zext_i32(i16 %x, i16 %y) {
511 ; GFX8-LABEL: min_i16_zext_i32:
513 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
514 ; GFX8-NEXT: v_min_i16_e32 v0, v0, v1
515 ; GFX8-NEXT: s_setpc_b64 s[30:31]
517 ; GFX9ALL-LABEL: min_i16_zext_i32:
519 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
520 ; GFX9ALL-NEXT: v_min_i16_e32 v0, v0, v1
521 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
523 ; GFX10-LABEL: min_i16_zext_i32:
525 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526 ; GFX10-NEXT: v_min_i16 v0, v0, v1
527 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
528 ; GFX10-NEXT: s_setpc_b64 s[30:31]
530 ; GFX11-LABEL: min_i16_zext_i32:
532 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533 ; GFX11-NEXT: v_min_i16 v0, v0, v1
534 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
535 ; GFX11-NEXT: s_setpc_b64 s[30:31]
536 %cmp = icmp sle i16 %x, %y
537 %res = select i1 %cmp, i16 %x, i16 %y
538 %zext = zext i16 %res to i32
542 define i32 @max_u16_zext_i32(i16 %x, i16 %y) {
543 ; GFX8-LABEL: max_u16_zext_i32:
545 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546 ; GFX8-NEXT: v_max_u16_e32 v0, v0, v1
547 ; GFX8-NEXT: s_setpc_b64 s[30:31]
549 ; GFX9ALL-LABEL: max_u16_zext_i32:
551 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552 ; GFX9ALL-NEXT: v_max_u16_e32 v0, v0, v1
553 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
555 ; GFX10-LABEL: max_u16_zext_i32:
557 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558 ; GFX10-NEXT: v_max_u16 v0, v0, v1
559 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
560 ; GFX10-NEXT: s_setpc_b64 s[30:31]
562 ; GFX11-LABEL: max_u16_zext_i32:
564 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565 ; GFX11-NEXT: v_max_u16 v0, v0, v1
566 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
567 ; GFX11-NEXT: s_setpc_b64 s[30:31]
568 %cmp = icmp uge i16 %x, %y
569 %res = select i1 %cmp, i16 %x, i16 %y
570 %zext = zext i16 %res to i32
574 define i32 @max_i16_zext_i32(i16 %x, i16 %y) {
575 ; GFX8-LABEL: max_i16_zext_i32:
577 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
578 ; GFX8-NEXT: v_max_i16_e32 v0, v0, v1
579 ; GFX8-NEXT: s_setpc_b64 s[30:31]
581 ; GFX9ALL-LABEL: max_i16_zext_i32:
583 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584 ; GFX9ALL-NEXT: v_max_i16_e32 v0, v0, v1
585 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
587 ; GFX10-LABEL: max_i16_zext_i32:
589 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590 ; GFX10-NEXT: v_max_i16 v0, v0, v1
591 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
592 ; GFX10-NEXT: s_setpc_b64 s[30:31]
594 ; GFX11-LABEL: max_i16_zext_i32:
596 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597 ; GFX11-NEXT: v_max_i16 v0, v0, v1
598 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
599 ; GFX11-NEXT: s_setpc_b64 s[30:31]
600 %cmp = icmp sge i16 %x, %y
601 %res = select i1 %cmp, i16 %x, i16 %y
602 %zext = zext i16 %res to i32
606 define i32 @zext_fadd_f16(half %x, half %y) {
607 ; GFX8-LABEL: zext_fadd_f16:
609 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610 ; GFX8-NEXT: v_add_f16_e32 v0, v0, v1
611 ; GFX8-NEXT: s_setpc_b64 s[30:31]
613 ; GFX9ALL-LABEL: zext_fadd_f16:
615 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616 ; GFX9ALL-NEXT: v_add_f16_e32 v0, v0, v1
617 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
619 ; GFX10-LABEL: zext_fadd_f16:
621 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
622 ; GFX10-NEXT: v_add_f16_e32 v0, v0, v1
623 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
624 ; GFX10-NEXT: s_setpc_b64 s[30:31]
626 ; GFX11-LABEL: zext_fadd_f16:
628 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629 ; GFX11-NEXT: v_add_f16_e32 v0, v0, v1
630 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
631 ; GFX11-NEXT: s_setpc_b64 s[30:31]
632 %add = fadd half %x, %y
633 %cast = bitcast half %add to i16
634 %zext = zext i16 %cast to i32
638 define i32 @zext_fma_f16(half %x, half %y, half %z) {
639 ; GFX8-LABEL: zext_fma_f16:
641 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642 ; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
643 ; GFX8-NEXT: s_setpc_b64 s[30:31]
645 ; GFX9ALL-LABEL: zext_fma_f16:
647 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648 ; GFX9ALL-NEXT: v_fma_f16 v0, v0, v1, v2
649 ; GFX9ALL-NEXT: v_and_b32_e32 v0, 0xffff, v0
650 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
652 ; GFX10-LABEL: zext_fma_f16:
654 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655 ; GFX10-NEXT: v_fmac_f16_e32 v2, v0, v1
656 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v2
657 ; GFX10-NEXT: s_setpc_b64 s[30:31]
659 ; GFX11-LABEL: zext_fma_f16:
661 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
662 ; GFX11-NEXT: v_fmac_f16_e32 v2, v0, v1
663 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v2
664 ; GFX11-NEXT: s_setpc_b64 s[30:31]
665 %fma = call half @llvm.fma.f16(half %x, half %y, half %z)
666 %cast = bitcast half %fma to i16
667 %zext = zext i16 %cast to i32
671 define i32 @zext_div_fixup_f16(half %x, half %y, half %z) {
672 ; GFX8-LABEL: zext_div_fixup_f16:
674 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
675 ; GFX8-NEXT: v_div_fixup_f16 v0, v0, v1, v2
676 ; GFX8-NEXT: s_setpc_b64 s[30:31]
678 ; GFX9ALL-LABEL: zext_div_fixup_f16:
680 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
681 ; GFX9ALL-NEXT: v_div_fixup_f16 v0, v0, v1, v2
682 ; GFX9ALL-NEXT: v_and_b32_e32 v0, 0xffff, v0
683 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
685 ; GFX10-LABEL: zext_div_fixup_f16:
687 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688 ; GFX10-NEXT: v_div_fixup_f16 v0, v0, v1, v2
689 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
690 ; GFX10-NEXT: s_setpc_b64 s[30:31]
692 ; GFX11-LABEL: zext_div_fixup_f16:
694 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
695 ; GFX11-NEXT: v_div_fixup_f16 v0, v0, v1, v2
696 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
697 ; GFX11-NEXT: s_setpc_b64 s[30:31]
698 %div.fixup = call half @llvm.amdgcn.div.fixup.f16(half %x, half %y, half %z)
699 %cast = bitcast half %div.fixup to i16
700 %zext = zext i16 %cast to i32
704 ; We technically could eliminate the and on gfx9 here but we don't try
705 ; to inspect the source of the fptrunc. We're only worried about cases
706 ; that lower to v_fma_mix* instructions.
707 define i32 @zext_fptrunc_f16(float %x) {
708 ; GFX8-LABEL: zext_fptrunc_f16:
710 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
711 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
712 ; GFX8-NEXT: s_setpc_b64 s[30:31]
714 ; GFX9ALL-LABEL: zext_fptrunc_f16:
716 ; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717 ; GFX9ALL-NEXT: v_cvt_f16_f32_e32 v0, v0
718 ; GFX9ALL-NEXT: s_setpc_b64 s[30:31]
720 ; GFX10-LABEL: zext_fptrunc_f16:
722 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
724 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
725 ; GFX10-NEXT: s_setpc_b64 s[30:31]
727 ; GFX11-LABEL: zext_fptrunc_f16:
729 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
731 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
732 ; GFX11-NEXT: s_setpc_b64 s[30:31]
733 %fptrunc = fptrunc float %x to half
734 %cast = bitcast half %fptrunc to i16
735 %zext = zext i16 %cast to i32
739 define i32 @zext_fptrunc_fma_f16(float %x, float %y, float %z) {
740 ; GFX8-LABEL: zext_fptrunc_fma_f16:
742 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743 ; GFX8-NEXT: v_fma_f32 v0, v0, v1, v2
744 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
745 ; GFX8-NEXT: s_setpc_b64 s[30:31]
747 ; GFX900-LABEL: zext_fptrunc_fma_f16:
749 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750 ; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2
751 ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
752 ; GFX900-NEXT: s_setpc_b64 s[30:31]
754 ; GFX906-LABEL: zext_fptrunc_fma_f16:
756 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
757 ; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
758 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
759 ; GFX906-NEXT: s_setpc_b64 s[30:31]
761 ; GFX10-LABEL: zext_fptrunc_fma_f16:
763 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
764 ; GFX10-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
765 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
766 ; GFX10-NEXT: s_setpc_b64 s[30:31]
768 ; GFX11-LABEL: zext_fptrunc_fma_f16:
770 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771 ; GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2
772 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
773 ; GFX11-NEXT: s_setpc_b64 s[30:31]
774 %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
775 %fptrunc = fptrunc float %fma to half
776 %cast = bitcast half %fptrunc to i16
777 %zext = zext i16 %cast to i32
781 declare half @llvm.amdgcn.div.fixup.f16(half, half, half)
782 declare half @llvm.fma.f16(half, half, half)
783 declare float @llvm.fma.f32(float, float, float)