1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
5 define void @undef_lo_v2i16(i16 %arg0) {
6 ; GFX9-LABEL: undef_lo_v2i16:
8 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
10 ; GFX9-NEXT: ;;#ASMSTART
12 ; GFX9-NEXT: ;;#ASMEND
13 ; GFX9-NEXT: s_setpc_b64 s[30:31]
15 ; GFX8-LABEL: undef_lo_v2i16:
17 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
19 ; GFX8-NEXT: ;;#ASMSTART
21 ; GFX8-NEXT: ;;#ASMEND
22 ; GFX8-NEXT: s_setpc_b64 s[30:31]
23 %undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1
24 call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.lo);
28 define void @undef_lo_v2f16(half %arg0) {
29 ; GFX9-LABEL: undef_lo_v2f16:
31 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
33 ; GFX9-NEXT: ;;#ASMSTART
35 ; GFX9-NEXT: ;;#ASMEND
36 ; GFX9-NEXT: s_setpc_b64 s[30:31]
38 ; GFX8-LABEL: undef_lo_v2f16:
40 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
42 ; GFX8-NEXT: ;;#ASMSTART
44 ; GFX8-NEXT: ;;#ASMEND
45 ; GFX8-NEXT: s_setpc_b64 s[30:31]
46 %undef.lo = insertelement <2 x half> undef, half %arg0, i32 1
47 call void asm sideeffect "; use $0", "v"(<2 x half> %undef.lo);
51 define void @undef_lo_op_v2f16(half %arg0) {
52 ; GFX9-LABEL: undef_lo_op_v2f16:
54 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
56 ; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
57 ; GFX9-NEXT: ;;#ASMSTART
59 ; GFX9-NEXT: ;;#ASMEND
60 ; GFX9-NEXT: s_setpc_b64 s[30:31]
62 ; GFX8-LABEL: undef_lo_op_v2f16:
64 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x3c00
66 ; GFX8-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
67 ; GFX8-NEXT: v_or_b32_e32 v0, 0x7e00, v0
68 ; GFX8-NEXT: ;;#ASMSTART
70 ; GFX8-NEXT: ;;#ASMEND
71 ; GFX8-NEXT: s_setpc_b64 s[30:31]
72 %undef.lo = insertelement <2 x half> undef, half %arg0, i32 1
73 %op = fadd <2 x half> %undef.lo, <half 1.0, half 1.0>
74 call void asm sideeffect "; use $0", "v"(<2 x half> %op);
78 define void @undef_lo_op_v2i16(i16 %arg0) {
79 ; GFX9-LABEL: undef_lo_op_v2i16:
81 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
83 ; GFX9-NEXT: s_movk_i32 s4, 0x63
84 ; GFX9-NEXT: v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
85 ; GFX9-NEXT: ;;#ASMSTART
87 ; GFX9-NEXT: ;;#ASMEND
88 ; GFX9-NEXT: s_setpc_b64 s[30:31]
90 ; GFX8-LABEL: undef_lo_op_v2i16:
92 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x63
94 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
95 ; GFX8-NEXT: ;;#ASMSTART
97 ; GFX8-NEXT: ;;#ASMEND
98 ; GFX8-NEXT: s_setpc_b64 s[30:31]
99 %undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1
100 %op = add <2 x i16> %undef.lo, <i16 99, i16 99>
101 call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
105 define void @undef_lo3_v4i16(i16 %arg0) {
106 ; GFX9-LABEL: undef_lo3_v4i16:
108 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
110 ; GFX9-NEXT: ;;#ASMSTART
111 ; GFX9-NEXT: ; use v[0:1]
112 ; GFX9-NEXT: ;;#ASMEND
113 ; GFX9-NEXT: s_setpc_b64 s[30:31]
115 ; GFX8-LABEL: undef_lo3_v4i16:
117 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
119 ; GFX8-NEXT: ;;#ASMSTART
120 ; GFX8-NEXT: ; use v[0:1]
121 ; GFX8-NEXT: ;;#ASMEND
122 ; GFX8-NEXT: s_setpc_b64 s[30:31]
123 %undef.lo = insertelement <4 x i16> undef, i16 %arg0, i32 1
124 call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
128 define void @undef_lo3_v4f16(half %arg0) {
129 ; GFX9-LABEL: undef_lo3_v4f16:
131 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
133 ; GFX9-NEXT: ;;#ASMSTART
134 ; GFX9-NEXT: ; use v[0:1]
135 ; GFX9-NEXT: ;;#ASMEND
136 ; GFX9-NEXT: s_setpc_b64 s[30:31]
138 ; GFX8-LABEL: undef_lo3_v4f16:
140 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
142 ; GFX8-NEXT: ;;#ASMSTART
143 ; GFX8-NEXT: ; use v[0:1]
144 ; GFX8-NEXT: ;;#ASMEND
145 ; GFX8-NEXT: s_setpc_b64 s[30:31]
146 %undef.lo = insertelement <4 x half> undef, half %arg0, i32 1
147 call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
151 define void @undef_lo2_v4i16(<2 x i16> %arg0) {
152 ; GFX9-LABEL: undef_lo2_v4i16:
154 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
156 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff0000
157 ; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
158 ; GFX9-NEXT: ;;#ASMSTART
159 ; GFX9-NEXT: ; use v[0:1]
160 ; GFX9-NEXT: ;;#ASMEND
161 ; GFX9-NEXT: s_setpc_b64 s[30:31]
163 ; GFX8-LABEL: undef_lo2_v4i16:
165 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
167 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
168 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
169 ; GFX8-NEXT: ;;#ASMSTART
170 ; GFX8-NEXT: ; use v[0:1]
171 ; GFX8-NEXT: ;;#ASMEND
172 ; GFX8-NEXT: s_setpc_b64 s[30:31]
173 %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
174 call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
178 define void @undef_lo2_v4f16(<2 x half> %arg0) {
179 ; GFX9-LABEL: undef_lo2_v4f16:
181 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
183 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v0
184 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
185 ; GFX9-NEXT: ;;#ASMSTART
186 ; GFX9-NEXT: ; use v[0:1]
187 ; GFX9-NEXT: ;;#ASMEND
188 ; GFX9-NEXT: s_setpc_b64 s[30:31]
190 ; GFX8-LABEL: undef_lo2_v4f16:
192 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
194 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
195 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
196 ; GFX8-NEXT: ;;#ASMSTART
197 ; GFX8-NEXT: ; use v[0:1]
198 ; GFX8-NEXT: ;;#ASMEND
199 ; GFX8-NEXT: s_setpc_b64 s[30:31]
200 %undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
201 call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
205 define void @undef_hi_v2i16(i16 %arg0) {
206 ; GFX9-LABEL: undef_hi_v2i16:
208 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209 ; GFX9-NEXT: ;;#ASMSTART
210 ; GFX9-NEXT: ; use v0
211 ; GFX9-NEXT: ;;#ASMEND
212 ; GFX9-NEXT: s_setpc_b64 s[30:31]
214 ; GFX8-LABEL: undef_hi_v2i16:
216 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX8-NEXT: ;;#ASMSTART
218 ; GFX8-NEXT: ; use v0
219 ; GFX8-NEXT: ;;#ASMEND
220 ; GFX8-NEXT: s_setpc_b64 s[30:31]
221 %undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0
222 call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.hi);
226 define void @undef_hi_v2f16(half %arg0) {
227 ; GFX9-LABEL: undef_hi_v2f16:
229 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230 ; GFX9-NEXT: ;;#ASMSTART
231 ; GFX9-NEXT: ; use v0
232 ; GFX9-NEXT: ;;#ASMEND
233 ; GFX9-NEXT: s_setpc_b64 s[30:31]
235 ; GFX8-LABEL: undef_hi_v2f16:
237 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238 ; GFX8-NEXT: ;;#ASMSTART
239 ; GFX8-NEXT: ; use v0
240 ; GFX8-NEXT: ;;#ASMEND
241 ; GFX8-NEXT: s_setpc_b64 s[30:31]
242 %undef.hi = insertelement <2 x half> undef, half %arg0, i32 0
243 call void asm sideeffect "; use $0", "v"(<2 x half> %undef.hi);
247 define void @undef_hi_op_v2f16(half %arg0) {
248 ; GFX9-LABEL: undef_hi_op_v2f16:
250 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251 ; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
252 ; GFX9-NEXT: ;;#ASMSTART
253 ; GFX9-NEXT: ; use v0
254 ; GFX9-NEXT: ;;#ASMEND
255 ; GFX9-NEXT: s_setpc_b64 s[30:31]
257 ; GFX8-LABEL: undef_hi_op_v2f16:
259 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
260 ; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0
261 ; GFX8-NEXT: v_or_b32_e32 v0, 0x7e000000, v0
262 ; GFX8-NEXT: ;;#ASMSTART
263 ; GFX8-NEXT: ; use v0
264 ; GFX8-NEXT: ;;#ASMEND
265 ; GFX8-NEXT: s_setpc_b64 s[30:31]
266 %undef.hi = insertelement <2 x half> undef, half %arg0, i32 0
267 %op = fadd <2 x half> %undef.hi, <half 1.0, half 1.0>
268 call void asm sideeffect "; use $0", "v"(<2 x half> %op);
272 define void @undef_hi_op_v2i16(i16 %arg0) {
273 ; GFX9-LABEL: undef_hi_op_v2i16:
275 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276 ; GFX9-NEXT: s_movk_i32 s4, 0x63
277 ; GFX9-NEXT: v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
278 ; GFX9-NEXT: ;;#ASMSTART
279 ; GFX9-NEXT: ; use v0
280 ; GFX9-NEXT: ;;#ASMEND
281 ; GFX9-NEXT: s_setpc_b64 s[30:31]
283 ; GFX8-LABEL: undef_hi_op_v2i16:
285 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286 ; GFX8-NEXT: v_add_u16_e32 v0, 0x63, v0
287 ; GFX8-NEXT: ;;#ASMSTART
288 ; GFX8-NEXT: ; use v0
289 ; GFX8-NEXT: ;;#ASMEND
290 ; GFX8-NEXT: s_setpc_b64 s[30:31]
291 %undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0
292 %op = add <2 x i16> %undef.hi, <i16 99, i16 99>
293 call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
297 define void @undef_hi3_v4i16(i16 %arg0) {
298 ; GFX9-LABEL: undef_hi3_v4i16:
300 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301 ; GFX9-NEXT: ;;#ASMSTART
302 ; GFX9-NEXT: ; use v[0:1]
303 ; GFX9-NEXT: ;;#ASMEND
304 ; GFX9-NEXT: s_setpc_b64 s[30:31]
306 ; GFX8-LABEL: undef_hi3_v4i16:
308 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309 ; GFX8-NEXT: ;;#ASMSTART
310 ; GFX8-NEXT: ; use v[0:1]
311 ; GFX8-NEXT: ;;#ASMEND
312 ; GFX8-NEXT: s_setpc_b64 s[30:31]
313 %undef.hi = insertelement <4 x i16> undef, i16 %arg0, i32 0
314 call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
318 define void @undef_hi3_v4f16(half %arg0) {
319 ; GFX9-LABEL: undef_hi3_v4f16:
321 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX9-NEXT: ;;#ASMSTART
323 ; GFX9-NEXT: ; use v[0:1]
324 ; GFX9-NEXT: ;;#ASMEND
325 ; GFX9-NEXT: s_setpc_b64 s[30:31]
327 ; GFX8-LABEL: undef_hi3_v4f16:
329 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330 ; GFX8-NEXT: ;;#ASMSTART
331 ; GFX8-NEXT: ; use v[0:1]
332 ; GFX8-NEXT: ;;#ASMEND
333 ; GFX8-NEXT: s_setpc_b64 s[30:31]
334 %undef.hi = insertelement <4 x half> undef, half %arg0, i32 0
335 call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
339 define void @undef_hi2_v4i16(<2 x i16> %arg0) {
340 ; GFX9-LABEL: undef_hi2_v4i16:
342 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX9-NEXT: ;;#ASMSTART
344 ; GFX9-NEXT: ; use v[0:1]
345 ; GFX9-NEXT: ;;#ASMEND
346 ; GFX9-NEXT: s_setpc_b64 s[30:31]
348 ; GFX8-LABEL: undef_hi2_v4i16:
350 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GFX8-NEXT: ;;#ASMSTART
352 ; GFX8-NEXT: ; use v[0:1]
353 ; GFX8-NEXT: ;;#ASMEND
354 ; GFX8-NEXT: s_setpc_b64 s[30:31]
355 %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
356 call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
360 define void @undef_hi2_v4f16(<2 x half> %arg0) {
361 ; GFX9-LABEL: undef_hi2_v4f16:
363 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364 ; GFX9-NEXT: ;;#ASMSTART
365 ; GFX9-NEXT: ; use v[0:1]
366 ; GFX9-NEXT: ;;#ASMEND
367 ; GFX9-NEXT: s_setpc_b64 s[30:31]
369 ; GFX8-LABEL: undef_hi2_v4f16:
371 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372 ; GFX8-NEXT: ;;#ASMSTART
373 ; GFX8-NEXT: ; use v[0:1]
374 ; GFX8-NEXT: ;;#ASMEND
375 ; GFX8-NEXT: s_setpc_b64 s[30:31]
376 %undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
377 call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);