1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
4 ; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
5 ; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
7 define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) {
8 ; GCN-LABEL: s_andn2_i32:
10 ; GCN-NEXT: s_andn2_b32 s0, s2, s3
11 ; GCN-NEXT: ; return to shader part epilog
13 ; GFX10-LABEL: s_andn2_i32:
15 ; GFX10-NEXT: s_andn2_b32 s0, s2, s3
16 ; GFX10-NEXT: ; return to shader part epilog
18 ; GFX11-LABEL: s_andn2_i32:
20 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s3
21 ; GFX11-NEXT: ; return to shader part epilog
22 %not.src1 = xor i32 %src1, -1
23 %and = and i32 %src0, %not.src1
27 define amdgpu_ps i32 @s_andn2_i32_commute(i32 inreg %src0, i32 inreg %src1) {
28 ; GCN-LABEL: s_andn2_i32_commute:
30 ; GCN-NEXT: s_andn2_b32 s0, s2, s3
31 ; GCN-NEXT: ; return to shader part epilog
33 ; GFX10-LABEL: s_andn2_i32_commute:
35 ; GFX10-NEXT: s_andn2_b32 s0, s2, s3
36 ; GFX10-NEXT: ; return to shader part epilog
38 ; GFX11-LABEL: s_andn2_i32_commute:
40 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s3
41 ; GFX11-NEXT: ; return to shader part epilog
42 %not.src1 = xor i32 %src1, -1
43 %and = and i32 %not.src1, %src0
47 define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_use(i32 inreg %src0, i32 inreg %src1) {
48 ; GCN-LABEL: s_andn2_i32_multi_use:
50 ; GCN-NEXT: s_not_b32 s1, s3
51 ; GCN-NEXT: s_andn2_b32 s0, s2, s3
52 ; GCN-NEXT: ; return to shader part epilog
54 ; GFX10-LABEL: s_andn2_i32_multi_use:
56 ; GFX10-NEXT: s_andn2_b32 s0, s2, s3
57 ; GFX10-NEXT: s_not_b32 s1, s3
58 ; GFX10-NEXT: ; return to shader part epilog
60 ; GFX11-LABEL: s_andn2_i32_multi_use:
62 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s3
63 ; GFX11-NEXT: s_not_b32 s1, s3
64 ; GFX11-NEXT: ; return to shader part epilog
65 %not.src1 = xor i32 %src1, -1
66 %and = and i32 %src0, %not.src1
67 %insert.0 = insertvalue { i32, i32 } undef, i32 %and, 0
68 %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %not.src1, 1
69 ret { i32, i32 } %insert.1
72 define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_foldable_use(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) {
73 ; GCN-LABEL: s_andn2_i32_multi_foldable_use:
75 ; GCN-NEXT: s_andn2_b32 s0, s2, s4
76 ; GCN-NEXT: s_andn2_b32 s1, s3, s4
77 ; GCN-NEXT: ; return to shader part epilog
79 ; GFX10-LABEL: s_andn2_i32_multi_foldable_use:
81 ; GFX10-NEXT: s_andn2_b32 s0, s2, s4
82 ; GFX10-NEXT: s_andn2_b32 s1, s3, s4
83 ; GFX10-NEXT: ; return to shader part epilog
85 ; GFX11-LABEL: s_andn2_i32_multi_foldable_use:
87 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s4
88 ; GFX11-NEXT: s_and_not1_b32 s1, s3, s4
89 ; GFX11-NEXT: ; return to shader part epilog
90 %not.src2 = xor i32 %src2, -1
91 %and0 = and i32 %src0, %not.src2
92 %and1 = and i32 %src1, %not.src2
93 %insert.0 = insertvalue { i32, i32 } undef, i32 %and0, 0
94 %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %and1, 1
95 ret { i32, i32 } %insert.1
98 define i32 @v_andn2_i32(i32 %src0, i32 %src1) {
99 ; GCN-LABEL: v_andn2_i32:
101 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; GCN-NEXT: v_not_b32_e32 v1, v1
103 ; GCN-NEXT: v_and_b32_e32 v0, v0, v1
104 ; GCN-NEXT: s_setpc_b64 s[30:31]
106 ; GFX10PLUS-LABEL: v_andn2_i32:
107 ; GFX10PLUS: ; %bb.0:
108 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109 ; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
110 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
111 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
112 %not.src1 = xor i32 %src1, -1
113 %and = and i32 %src0, %not.src1
117 define amdgpu_ps float @v_andn2_i32_sv(i32 inreg %src0, i32 %src1) {
118 ; GCN-LABEL: v_andn2_i32_sv:
120 ; GCN-NEXT: v_not_b32_e32 v0, v0
121 ; GCN-NEXT: v_and_b32_e32 v0, s2, v0
122 ; GCN-NEXT: ; return to shader part epilog
124 ; GFX10PLUS-LABEL: v_andn2_i32_sv:
125 ; GFX10PLUS: ; %bb.0:
126 ; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0
127 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0
128 ; GFX10PLUS-NEXT: ; return to shader part epilog
129 %not.src1 = xor i32 %src1, -1
130 %and = and i32 %src0, %not.src1
131 %cast = bitcast i32 %and to float
135 define amdgpu_ps float @v_andn2_i32_vs(i32 %src0, i32 inreg %src1) {
136 ; GCN-LABEL: v_andn2_i32_vs:
138 ; GCN-NEXT: s_not_b32 s0, s2
139 ; GCN-NEXT: v_and_b32_e32 v0, s0, v0
140 ; GCN-NEXT: ; return to shader part epilog
142 ; GFX10PLUS-LABEL: v_andn2_i32_vs:
143 ; GFX10PLUS: ; %bb.0:
144 ; GFX10PLUS-NEXT: s_not_b32 s0, s2
145 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, s0, v0
146 ; GFX10PLUS-NEXT: ; return to shader part epilog
147 %not.src1 = xor i32 %src1, -1
148 %and = and i32 %src0, %not.src1
149 %cast = bitcast i32 %and to float
153 define amdgpu_ps i64 @s_andn2_i64(i64 inreg %src0, i64 inreg %src1) {
154 ; GCN-LABEL: s_andn2_i64:
156 ; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
157 ; GCN-NEXT: ; return to shader part epilog
159 ; GFX10-LABEL: s_andn2_i64:
161 ; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
162 ; GFX10-NEXT: ; return to shader part epilog
164 ; GFX11-LABEL: s_andn2_i64:
166 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[4:5]
167 ; GFX11-NEXT: ; return to shader part epilog
168 %not.src1 = xor i64 %src1, -1
169 %and = and i64 %src0, %not.src1
173 define amdgpu_ps i64 @s_andn2_i64_commute(i64 inreg %src0, i64 inreg %src1) {
174 ; GCN-LABEL: s_andn2_i64_commute:
176 ; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
177 ; GCN-NEXT: ; return to shader part epilog
179 ; GFX10-LABEL: s_andn2_i64_commute:
181 ; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
182 ; GFX10-NEXT: ; return to shader part epilog
184 ; GFX11-LABEL: s_andn2_i64_commute:
186 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[4:5]
187 ; GFX11-NEXT: ; return to shader part epilog
188 %not.src1 = xor i64 %src1, -1
189 %and = and i64 %not.src1, %src0
193 define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_foldable_use(i64 inreg %src0, i64 inreg %src1, i64 inreg %src2) {
194 ; GCN-LABEL: s_andn2_i64_multi_foldable_use:
196 ; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[6:7]
197 ; GCN-NEXT: s_andn2_b64 s[2:3], s[4:5], s[6:7]
198 ; GCN-NEXT: ; return to shader part epilog
200 ; GFX10-LABEL: s_andn2_i64_multi_foldable_use:
202 ; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[6:7]
203 ; GFX10-NEXT: s_andn2_b64 s[2:3], s[4:5], s[6:7]
204 ; GFX10-NEXT: ; return to shader part epilog
206 ; GFX11-LABEL: s_andn2_i64_multi_foldable_use:
208 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[6:7]
209 ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[4:5], s[6:7]
210 ; GFX11-NEXT: ; return to shader part epilog
211 %not.src2 = xor i64 %src2, -1
212 %and0 = and i64 %src0, %not.src2
213 %and1 = and i64 %src1, %not.src2
214 %insert.0 = insertvalue { i64, i64 } undef, i64 %and0, 0
215 %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %and1, 1
216 ret { i64, i64 } %insert.1
219 define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_use(i64 inreg %src0, i64 inreg %src1) {
220 ; GCN-LABEL: s_andn2_i64_multi_use:
222 ; GCN-NEXT: s_not_b64 s[6:7], s[4:5]
223 ; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
224 ; GCN-NEXT: s_mov_b32 s2, s6
225 ; GCN-NEXT: s_mov_b32 s3, s7
226 ; GCN-NEXT: ; return to shader part epilog
228 ; GFX10-LABEL: s_andn2_i64_multi_use:
230 ; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
231 ; GFX10-NEXT: s_not_b64 s[2:3], s[4:5]
232 ; GFX10-NEXT: ; return to shader part epilog
234 ; GFX11-LABEL: s_andn2_i64_multi_use:
236 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[4:5]
237 ; GFX11-NEXT: s_not_b64 s[2:3], s[4:5]
238 ; GFX11-NEXT: ; return to shader part epilog
239 %not.src1 = xor i64 %src1, -1
240 %and = and i64 %src0, %not.src1
241 %insert.0 = insertvalue { i64, i64 } undef, i64 %and, 0
242 %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %not.src1, 1
243 ret { i64, i64 } %insert.1
246 define i64 @v_andn2_i64(i64 %src0, i64 %src1) {
247 ; GCN-LABEL: v_andn2_i64:
249 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250 ; GCN-NEXT: v_not_b32_e32 v2, v2
251 ; GCN-NEXT: v_not_b32_e32 v3, v3
252 ; GCN-NEXT: v_and_b32_e32 v0, v0, v2
253 ; GCN-NEXT: v_and_b32_e32 v1, v1, v3
254 ; GCN-NEXT: s_setpc_b64 s[30:31]
256 ; GFX10PLUS-LABEL: v_andn2_i64:
257 ; GFX10PLUS: ; %bb.0:
258 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259 ; GFX10PLUS-NEXT: v_not_b32_e32 v2, v2
260 ; GFX10PLUS-NEXT: v_not_b32_e32 v3, v3
261 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
262 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3
263 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
264 %not.src1 = xor i64 %src1, -1
265 %and = and i64 %src0, %not.src1
269 define amdgpu_ps <2 x float> @v_andn2_i64_sv(i64 inreg %src0, i64 %src1) {
270 ; GCN-LABEL: v_andn2_i64_sv:
272 ; GCN-NEXT: v_not_b32_e32 v0, v0
273 ; GCN-NEXT: v_not_b32_e32 v1, v1
274 ; GCN-NEXT: v_and_b32_e32 v0, s2, v0
275 ; GCN-NEXT: v_and_b32_e32 v1, s3, v1
276 ; GCN-NEXT: ; return to shader part epilog
278 ; GFX10PLUS-LABEL: v_andn2_i64_sv:
279 ; GFX10PLUS: ; %bb.0:
280 ; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0
281 ; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
282 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0
283 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, s3, v1
284 ; GFX10PLUS-NEXT: ; return to shader part epilog
285 %not.src1 = xor i64 %src1, -1
286 %and = and i64 %src0, %not.src1
287 %cast = bitcast i64 %and to <2 x float>
288 ret <2 x float> %cast
291 define amdgpu_ps <2 x float> @v_andn2_i64_vs(i64 %src0, i64 inreg %src1) {
292 ; GCN-LABEL: v_andn2_i64_vs:
294 ; GCN-NEXT: s_not_b64 s[0:1], s[2:3]
295 ; GCN-NEXT: v_and_b32_e32 v0, s0, v0
296 ; GCN-NEXT: v_and_b32_e32 v1, s1, v1
297 ; GCN-NEXT: ; return to shader part epilog
299 ; GFX10PLUS-LABEL: v_andn2_i64_vs:
300 ; GFX10PLUS: ; %bb.0:
301 ; GFX10PLUS-NEXT: s_not_b64 s[0:1], s[2:3]
302 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, s0, v0
303 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, s1, v1
304 ; GFX10PLUS-NEXT: ; return to shader part epilog
305 %not.src1 = xor i64 %src1, -1
306 %and = and i64 %src0, %not.src1
307 %cast = bitcast i64 %and to <2 x float>
308 ret <2 x float> %cast
311 define amdgpu_ps <2 x i32> @s_andn2_v2i32(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
312 ; GCN-LABEL: s_andn2_v2i32:
314 ; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
315 ; GCN-NEXT: ; return to shader part epilog
317 ; GFX10-LABEL: s_andn2_v2i32:
319 ; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
320 ; GFX10-NEXT: ; return to shader part epilog
322 ; GFX11-LABEL: s_andn2_v2i32:
324 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[4:5]
325 ; GFX11-NEXT: ; return to shader part epilog
326 %not.src1 = xor <2 x i32> %src1, <i32 -1, i32 -1>
327 %and = and <2 x i32> %src0, %not.src1
331 define amdgpu_ps <2 x i32> @s_andn2_v2i32_commute(<2 x i32> inreg %src0, <2 x i32> inreg %src1) {
332 ; GCN-LABEL: s_andn2_v2i32_commute:
334 ; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
335 ; GCN-NEXT: ; return to shader part epilog
337 ; GFX10-LABEL: s_andn2_v2i32_commute:
339 ; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5]
340 ; GFX10-NEXT: ; return to shader part epilog
342 ; GFX11-LABEL: s_andn2_v2i32_commute:
344 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[4:5]
345 ; GFX11-NEXT: ; return to shader part epilog
346 %not.src1 = xor <2 x i32> %src1, <i32 -1, i32 -1>
347 %and = and <2 x i32> %not.src1, %src0
351 define amdgpu_ps i16 @s_andn2_i16(i16 inreg %src0, i16 inreg %src1) {
352 ; GCN-LABEL: s_andn2_i16:
354 ; GCN-NEXT: s_andn2_b32 s0, s2, s3
355 ; GCN-NEXT: ; return to shader part epilog
357 ; GFX10-LABEL: s_andn2_i16:
359 ; GFX10-NEXT: s_andn2_b32 s0, s2, s3
360 ; GFX10-NEXT: ; return to shader part epilog
362 ; GFX11-LABEL: s_andn2_i16:
364 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s3
365 ; GFX11-NEXT: ; return to shader part epilog
366 %not.src1 = xor i16 %src1, -1
367 %and = and i16 %src0, %not.src1
371 define amdgpu_ps i16 @s_andn2_i16_commute(i16 inreg %src0, i16 inreg %src1) {
372 ; GCN-LABEL: s_andn2_i16_commute:
374 ; GCN-NEXT: s_andn2_b32 s0, s2, s3
375 ; GCN-NEXT: ; return to shader part epilog
377 ; GFX10-LABEL: s_andn2_i16_commute:
379 ; GFX10-NEXT: s_andn2_b32 s0, s2, s3
380 ; GFX10-NEXT: ; return to shader part epilog
382 ; GFX11-LABEL: s_andn2_i16_commute:
384 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s3
385 ; GFX11-NEXT: ; return to shader part epilog
386 %not.src1 = xor i16 %src1, -1
387 %and = and i16 %not.src1, %src0
391 define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_use(i16 inreg %src0, i16 inreg %src1) {
392 ; GCN-LABEL: s_andn2_i16_multi_use:
394 ; GCN-NEXT: s_xor_b32 s1, s3, -1
395 ; GCN-NEXT: s_andn2_b32 s0, s2, s3
396 ; GCN-NEXT: ; return to shader part epilog
398 ; GFX10-LABEL: s_andn2_i16_multi_use:
400 ; GFX10-NEXT: s_andn2_b32 s0, s2, s3
401 ; GFX10-NEXT: s_xor_b32 s1, s3, -1
402 ; GFX10-NEXT: ; return to shader part epilog
404 ; GFX11-LABEL: s_andn2_i16_multi_use:
406 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s3
407 ; GFX11-NEXT: s_xor_b32 s1, s3, -1
408 ; GFX11-NEXT: ; return to shader part epilog
409 %not.src1 = xor i16 %src1, -1
410 %and = and i16 %src0, %not.src1
411 %insert.0 = insertvalue { i16, i16 } undef, i16 %and, 0
412 %insert.1 = insertvalue { i16, i16 } %insert.0, i16 %not.src1, 1
413 ret { i16, i16 } %insert.1
416 define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_foldable_use(i16 inreg %src0, i16 inreg %src1, i16 inreg %src2) {
417 ; GCN-LABEL: s_andn2_i16_multi_foldable_use:
419 ; GCN-NEXT: s_andn2_b32 s0, s2, s4
420 ; GCN-NEXT: s_andn2_b32 s1, s3, s4
421 ; GCN-NEXT: ; return to shader part epilog
423 ; GFX10-LABEL: s_andn2_i16_multi_foldable_use:
425 ; GFX10-NEXT: s_andn2_b32 s0, s2, s4
426 ; GFX10-NEXT: s_andn2_b32 s1, s3, s4
427 ; GFX10-NEXT: ; return to shader part epilog
429 ; GFX11-LABEL: s_andn2_i16_multi_foldable_use:
431 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s4
432 ; GFX11-NEXT: s_and_not1_b32 s1, s3, s4
433 ; GFX11-NEXT: ; return to shader part epilog
434 %not.src2 = xor i16 %src2, -1
435 %and0 = and i16 %src0, %not.src2
436 %and1 = and i16 %src1, %not.src2
437 %insert.0 = insertvalue { i16, i16 } undef, i16 %and0, 0
438 %insert.1 = insertvalue { i16, i16 } %insert.0, i16 %and1, 1
439 ret { i16, i16 } %insert.1
442 define i16 @v_andn2_i16(i16 %src0, i16 %src1) {
443 ; GCN-LABEL: v_andn2_i16:
445 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446 ; GCN-NEXT: v_xor_b32_e32 v1, -1, v1
447 ; GCN-NEXT: v_and_b32_e32 v0, v0, v1
448 ; GCN-NEXT: s_setpc_b64 s[30:31]
450 ; GFX10PLUS-LABEL: v_andn2_i16:
451 ; GFX10PLUS: ; %bb.0:
452 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453 ; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1
454 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
455 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
456 %not.src1 = xor i16 %src1, -1
457 %and = and i16 %src0, %not.src1
461 define amdgpu_ps float @v_andn2_i16_sv(i16 inreg %src0, i16 %src1) {
462 ; GCN-LABEL: v_andn2_i16_sv:
464 ; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
465 ; GCN-NEXT: v_and_b32_e32 v0, s2, v0
466 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
467 ; GCN-NEXT: ; return to shader part epilog
469 ; GFX10PLUS-LABEL: v_andn2_i16_sv:
470 ; GFX10PLUS: ; %bb.0:
471 ; GFX10PLUS-NEXT: v_xor_b32_e32 v0, -1, v0
472 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0
473 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
474 ; GFX10PLUS-NEXT: ; return to shader part epilog
475 %not.src1 = xor i16 %src1, -1
476 %and = and i16 %src0, %not.src1
477 %zext = zext i16 %and to i32
478 %cast.zext = bitcast i32 %zext to float
482 define amdgpu_ps float @v_andn2_i16_vs(i16 %src0, i16 inreg %src1) {
483 ; GCN-LABEL: v_andn2_i16_vs:
485 ; GCN-NEXT: s_xor_b32 s0, s2, -1
486 ; GCN-NEXT: v_and_b32_e32 v0, s0, v0
487 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
488 ; GCN-NEXT: ; return to shader part epilog
490 ; GFX10PLUS-LABEL: v_andn2_i16_vs:
491 ; GFX10PLUS: ; %bb.0:
492 ; GFX10PLUS-NEXT: s_xor_b32 s0, s2, -1
493 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, s0, v0
494 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
495 ; GFX10PLUS-NEXT: ; return to shader part epilog
496 %not.src1 = xor i16 %src1, -1
497 %and = and i16 %src0, %not.src1
498 %zext = zext i16 %and to i32
499 %cast.zext = bitcast i32 %zext to float
503 define amdgpu_ps i32 @s_andn2_v2i16(<2 x i16> inreg %src0, <2 x i16> inreg %src1) {
504 ; GFX6-LABEL: s_andn2_v2i16:
506 ; GFX6-NEXT: s_lshl_b32 s0, s3, 16
507 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
508 ; GFX6-NEXT: s_or_b32 s0, s0, s1
509 ; GFX6-NEXT: s_lshl_b32 s1, s5, 16
510 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
511 ; GFX6-NEXT: s_or_b32 s1, s1, s2
512 ; GFX6-NEXT: s_xor_b32 s1, s1, -1
513 ; GFX6-NEXT: s_and_b32 s0, s0, s1
514 ; GFX6-NEXT: ; return to shader part epilog
516 ; GFX9-LABEL: s_andn2_v2i16:
518 ; GFX9-NEXT: s_andn2_b32 s0, s2, s3
519 ; GFX9-NEXT: ; return to shader part epilog
521 ; GFX10-LABEL: s_andn2_v2i16:
523 ; GFX10-NEXT: s_andn2_b32 s0, s2, s3
524 ; GFX10-NEXT: ; return to shader part epilog
526 ; GFX11-LABEL: s_andn2_v2i16:
528 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s3
529 ; GFX11-NEXT: ; return to shader part epilog
530 %not.src1 = xor <2 x i16> %src1, <i16 -1, i16 -1>
531 %and = and <2 x i16> %src0, %not.src1
532 %cast = bitcast <2 x i16> %and to i32
536 define amdgpu_ps i32 @s_andn2_v2i16_commute(<2 x i16> inreg %src0, <2 x i16> inreg %src1) {
537 ; GFX6-LABEL: s_andn2_v2i16_commute:
539 ; GFX6-NEXT: s_lshl_b32 s0, s3, 16
540 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
541 ; GFX6-NEXT: s_or_b32 s0, s0, s1
542 ; GFX6-NEXT: s_lshl_b32 s1, s5, 16
543 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
544 ; GFX6-NEXT: s_or_b32 s1, s1, s2
545 ; GFX6-NEXT: s_xor_b32 s1, s1, -1
546 ; GFX6-NEXT: s_and_b32 s0, s1, s0
547 ; GFX6-NEXT: ; return to shader part epilog
549 ; GFX9-LABEL: s_andn2_v2i16_commute:
551 ; GFX9-NEXT: s_andn2_b32 s0, s2, s3
552 ; GFX9-NEXT: ; return to shader part epilog
554 ; GFX10-LABEL: s_andn2_v2i16_commute:
556 ; GFX10-NEXT: s_andn2_b32 s0, s2, s3
557 ; GFX10-NEXT: ; return to shader part epilog
559 ; GFX11-LABEL: s_andn2_v2i16_commute:
561 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s3
562 ; GFX11-NEXT: ; return to shader part epilog
563 %not.src1 = xor <2 x i16> %src1, <i16 -1, i16 -1>
564 %and = and <2 x i16> %not.src1, %src0
565 %cast = bitcast <2 x i16> %and to i32
569 define amdgpu_ps { i32, i32 } @s_andn2_v2i16_multi_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1) {
570 ; GFX6-LABEL: s_andn2_v2i16_multi_use:
572 ; GFX6-NEXT: s_lshl_b32 s0, s3, 16
573 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
574 ; GFX6-NEXT: s_or_b32 s0, s0, s1
575 ; GFX6-NEXT: s_lshl_b32 s1, s5, 16
576 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
577 ; GFX6-NEXT: s_or_b32 s1, s1, s2
578 ; GFX6-NEXT: s_xor_b32 s1, s1, -1
579 ; GFX6-NEXT: s_and_b32 s0, s0, s1
580 ; GFX6-NEXT: ; return to shader part epilog
582 ; GFX9-LABEL: s_andn2_v2i16_multi_use:
584 ; GFX9-NEXT: s_xor_b32 s1, s3, -1
585 ; GFX9-NEXT: s_andn2_b32 s0, s2, s3
586 ; GFX9-NEXT: ; return to shader part epilog
588 ; GFX10-LABEL: s_andn2_v2i16_multi_use:
590 ; GFX10-NEXT: s_andn2_b32 s0, s2, s3
591 ; GFX10-NEXT: s_xor_b32 s1, s3, -1
592 ; GFX10-NEXT: ; return to shader part epilog
594 ; GFX11-LABEL: s_andn2_v2i16_multi_use:
596 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s3
597 ; GFX11-NEXT: s_xor_b32 s1, s3, -1
598 ; GFX11-NEXT: ; return to shader part epilog
599 %not.src1 = xor <2 x i16> %src1, <i16 -1, i16 -1>
600 %and = and <2 x i16> %src0, %not.src1
602 %cast.0 = bitcast <2 x i16> %and to i32
603 %cast.1 = bitcast <2 x i16> %not.src1 to i32
604 %insert.0 = insertvalue { i32, i32 } undef, i32 %cast.0, 0
605 %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %cast.1, 1
606 ret { i32, i32 } %insert.1
609 define amdgpu_ps { i32, i32 } @s_andn2_v2i16_multi_foldable_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1, <2 x i16> inreg %src2) {
610 ; GFX6-LABEL: s_andn2_v2i16_multi_foldable_use:
612 ; GFX6-NEXT: s_lshl_b32 s0, s3, 16
613 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
614 ; GFX6-NEXT: s_or_b32 s0, s0, s1
615 ; GFX6-NEXT: s_lshl_b32 s1, s5, 16
616 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
617 ; GFX6-NEXT: s_or_b32 s1, s1, s2
618 ; GFX6-NEXT: s_lshl_b32 s2, s7, 16
619 ; GFX6-NEXT: s_and_b32 s3, s6, 0xffff
620 ; GFX6-NEXT: s_or_b32 s2, s2, s3
621 ; GFX6-NEXT: s_xor_b32 s2, s2, -1
622 ; GFX6-NEXT: s_and_b32 s0, s0, s2
623 ; GFX6-NEXT: s_and_b32 s1, s1, s2
624 ; GFX6-NEXT: ; return to shader part epilog
626 ; GFX9-LABEL: s_andn2_v2i16_multi_foldable_use:
628 ; GFX9-NEXT: s_andn2_b32 s0, s2, s4
629 ; GFX9-NEXT: s_andn2_b32 s1, s3, s4
630 ; GFX9-NEXT: ; return to shader part epilog
632 ; GFX10-LABEL: s_andn2_v2i16_multi_foldable_use:
634 ; GFX10-NEXT: s_andn2_b32 s0, s2, s4
635 ; GFX10-NEXT: s_andn2_b32 s1, s3, s4
636 ; GFX10-NEXT: ; return to shader part epilog
638 ; GFX11-LABEL: s_andn2_v2i16_multi_foldable_use:
640 ; GFX11-NEXT: s_and_not1_b32 s0, s2, s4
641 ; GFX11-NEXT: s_and_not1_b32 s1, s3, s4
642 ; GFX11-NEXT: ; return to shader part epilog
643 %not.src2 = xor <2 x i16> %src2, <i16 -1, i16 -1>
644 %and0 = and <2 x i16> %src0, %not.src2
645 %and1 = and <2 x i16> %src1, %not.src2
647 %cast.0 = bitcast <2 x i16> %and0 to i32
648 %cast.1 = bitcast <2 x i16> %and1 to i32
649 %insert.0 = insertvalue { i32, i32 } undef, i32 %cast.0, 0
650 %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %cast.1, 1
651 ret { i32, i32 } %insert.1
654 define <2 x i16> @v_andn2_v2i16(<2 x i16> %src0, <2 x i16> %src1) {
655 ; GFX6-LABEL: v_andn2_v2i16:
657 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
659 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
660 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
661 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
662 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
663 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
664 ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
665 ; GFX6-NEXT: v_and_b32_e32 v0, v0, v1
666 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
667 ; GFX6-NEXT: s_setpc_b64 s[30:31]
669 ; GFX9-LABEL: v_andn2_v2i16:
671 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
672 ; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
673 ; GFX9-NEXT: v_and_b32_e32 v0, v0, v1
674 ; GFX9-NEXT: s_setpc_b64 s[30:31]
676 ; GFX10PLUS-LABEL: v_andn2_v2i16:
677 ; GFX10PLUS: ; %bb.0:
678 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
679 ; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1
680 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
681 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
682 %not.src1 = xor <2 x i16> %src1, <i16 -1, i16 -1>
683 %and = and <2 x i16> %src0, %not.src1
688 define amdgpu_ps i48 @s_andn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
689 ; GFX6-LABEL: s_andn2_v3i16:
691 ; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
692 ; GFX6-NEXT: s_mov_b32 s0, -1
693 ; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
694 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
695 ; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
696 ; GFX6-NEXT: s_mov_b32 s1, 0xffff
697 ; GFX6-NEXT: s_or_b32 s6, s5, s6
698 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
699 ; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
700 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
701 ; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1]
702 ; GFX6-NEXT: s_or_b32 s2, s2, s3
703 ; GFX6-NEXT: s_and_b32 s3, s4, 0xffff
704 ; GFX6-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
705 ; GFX6-NEXT: s_lshr_b32 s2, s0, 16
706 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
707 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
708 ; GFX6-NEXT: s_or_b32 s0, s0, s2
709 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
710 ; GFX6-NEXT: ; return to shader part epilog
712 ; GFX9-LABEL: s_andn2_v3i16:
714 ; GFX9-NEXT: s_mov_b64 s[0:1], -1
715 ; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1]
716 ; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
717 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
718 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
719 ; GFX9-NEXT: s_lshl_b32 s2, s2, 16
720 ; GFX9-NEXT: s_or_b32 s0, s0, s2
721 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
722 ; GFX9-NEXT: ; return to shader part epilog
724 ; GFX10PLUS-LABEL: s_andn2_v3i16:
725 ; GFX10PLUS: ; %bb.0:
726 ; GFX10PLUS-NEXT: s_mov_b64 s[0:1], -1
727 ; GFX10PLUS-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1]
728 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
729 ; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16
730 ; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff
731 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 16
732 ; GFX10PLUS-NEXT: s_and_b32 s1, s1, 0xffff
733 ; GFX10PLUS-NEXT: s_or_b32 s0, s0, s2
734 ; GFX10PLUS-NEXT: ; return to shader part epilog
735 %not.src1 = xor <3 x i16> %src1, <i16 -1, i16 -1, i16 -1>
736 %and = and <3 x i16> %src0, %not.src1
737 %cast = bitcast <3 x i16> %and to i48
741 define amdgpu_ps i48 @s_andn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
742 ; GFX6-LABEL: s_andn2_v3i16_commute:
744 ; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
745 ; GFX6-NEXT: s_mov_b32 s0, -1
746 ; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
747 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
748 ; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
749 ; GFX6-NEXT: s_mov_b32 s1, 0xffff
750 ; GFX6-NEXT: s_or_b32 s6, s5, s6
751 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
752 ; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
753 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
754 ; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1]
755 ; GFX6-NEXT: s_or_b32 s2, s2, s3
756 ; GFX6-NEXT: s_and_b32 s3, s4, 0xffff
757 ; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
758 ; GFX6-NEXT: s_lshr_b32 s2, s0, 16
759 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
760 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
761 ; GFX6-NEXT: s_or_b32 s0, s0, s2
762 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
763 ; GFX6-NEXT: ; return to shader part epilog
765 ; GFX9-LABEL: s_andn2_v3i16_commute:
767 ; GFX9-NEXT: s_mov_b64 s[0:1], -1
768 ; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1]
769 ; GFX9-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
770 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
771 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
772 ; GFX9-NEXT: s_lshl_b32 s2, s2, 16
773 ; GFX9-NEXT: s_or_b32 s0, s0, s2
774 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
775 ; GFX9-NEXT: ; return to shader part epilog
777 ; GFX10PLUS-LABEL: s_andn2_v3i16_commute:
778 ; GFX10PLUS: ; %bb.0:
779 ; GFX10PLUS-NEXT: s_mov_b64 s[0:1], -1
780 ; GFX10PLUS-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1]
781 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
782 ; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16
783 ; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff
784 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 16
785 ; GFX10PLUS-NEXT: s_and_b32 s1, s1, 0xffff
786 ; GFX10PLUS-NEXT: s_or_b32 s0, s0, s2
787 ; GFX10PLUS-NEXT: ; return to shader part epilog
788 %not.src1 = xor <3 x i16> %src1, <i16 -1, i16 -1, i16 -1>
789 %and = and <3 x i16> %not.src1, %src0
790 %cast = bitcast <3 x i16> %and to i48
794 define amdgpu_ps { i48, i48 } @s_andn2_v3i16_multi_use(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
795 ; GFX6-LABEL: s_andn2_v3i16_multi_use:
797 ; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
798 ; GFX6-NEXT: s_mov_b32 s0, -1
799 ; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
800 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
801 ; GFX6-NEXT: s_mov_b32 s1, 0xffff
802 ; GFX6-NEXT: s_or_b32 s6, s5, s6
803 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
804 ; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1]
805 ; GFX6-NEXT: s_and_b32 s1, s3, 0xffff
806 ; GFX6-NEXT: s_and_b32 s0, s2, 0xffff
807 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
808 ; GFX6-NEXT: s_or_b32 s0, s0, s1
809 ; GFX6-NEXT: s_and_b32 s1, s4, 0xffff
810 ; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
811 ; GFX6-NEXT: s_lshr_b32 s2, s0, 16
812 ; GFX6-NEXT: s_lshr_b32 s5, s6, 16
813 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
814 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
815 ; GFX6-NEXT: s_or_b32 s0, s0, s2
816 ; GFX6-NEXT: s_and_b32 s2, s6, 0xffff
817 ; GFX6-NEXT: s_lshl_b32 s3, s5, 16
818 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
819 ; GFX6-NEXT: s_or_b32 s2, s2, s3
820 ; GFX6-NEXT: s_and_b32 s3, s7, 0xffff
821 ; GFX6-NEXT: ; return to shader part epilog
823 ; GFX9-LABEL: s_andn2_v3i16_multi_use:
825 ; GFX9-NEXT: s_mov_b64 s[0:1], -1
826 ; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1]
827 ; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
828 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
829 ; GFX9-NEXT: s_lshr_b32 s6, s4, 16
830 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
831 ; GFX9-NEXT: s_lshl_b32 s2, s2, 16
832 ; GFX9-NEXT: s_or_b32 s0, s0, s2
833 ; GFX9-NEXT: s_and_b32 s2, s4, 0xffff
834 ; GFX9-NEXT: s_lshl_b32 s3, s6, 16
835 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
836 ; GFX9-NEXT: s_or_b32 s2, s2, s3
837 ; GFX9-NEXT: s_and_b32 s3, s5, 0xffff
838 ; GFX9-NEXT: ; return to shader part epilog
840 ; GFX10PLUS-LABEL: s_andn2_v3i16_multi_use:
841 ; GFX10PLUS: ; %bb.0:
842 ; GFX10PLUS-NEXT: s_mov_b64 s[0:1], -1
843 ; GFX10PLUS-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1]
844 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
845 ; GFX10PLUS-NEXT: s_lshr_b32 s3, s4, 16
846 ; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16
847 ; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff
848 ; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 16
849 ; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, 16
850 ; GFX10PLUS-NEXT: s_or_b32 s0, s0, s2
851 ; GFX10PLUS-NEXT: s_and_b32 s2, s4, 0xffff
852 ; GFX10PLUS-NEXT: s_and_b32 s1, s1, 0xffff
853 ; GFX10PLUS-NEXT: s_or_b32 s2, s2, s3
854 ; GFX10PLUS-NEXT: s_and_b32 s3, s5, 0xffff
855 ; GFX10PLUS-NEXT: ; return to shader part epilog
856 %not.src1 = xor <3 x i16> %src1, <i16 -1, i16 -1, i16 -1>
857 %and = and <3 x i16> %src0, %not.src1
858 %cast.0 = bitcast <3 x i16> %and to i48
859 %cast.1 = bitcast <3 x i16> %not.src1 to i48
860 %insert.0 = insertvalue { i48, i48 } undef, i48 %cast.0, 0
861 %insert.1 = insertvalue { i48, i48 } %insert.0, i48 %cast.1, 1
862 ret { i48, i48 } %insert.1
865 define <3 x i16> @v_andn2_v3i16(<3 x i16> %src0, <3 x i16> %src1) {
866 ; GFX6-LABEL: v_andn2_v3i16:
868 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
869 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
870 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
871 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
872 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
873 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
874 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
875 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
876 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5
877 ; GFX6-NEXT: v_xor_b32_e32 v3, -1, v3
878 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
879 ; GFX6-NEXT: v_xor_b32_e32 v4, 0xfff5, v4
880 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
881 ; GFX6-NEXT: v_and_b32_e32 v0, v0, v3
882 ; GFX6-NEXT: v_and_b32_e32 v2, v1, v4
883 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
884 ; GFX6-NEXT: s_setpc_b64 s[30:31]
886 ; GFX9-LABEL: v_andn2_v3i16:
888 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
890 ; GFX9-NEXT: v_xor_b32_e32 v3, -11, v3
891 ; GFX9-NEXT: v_and_b32_e32 v0, v0, v2
892 ; GFX9-NEXT: v_and_b32_e32 v1, v1, v3
893 ; GFX9-NEXT: s_setpc_b64 s[30:31]
895 ; GFX10PLUS-LABEL: v_andn2_v3i16:
896 ; GFX10PLUS: ; %bb.0:
897 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
898 ; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2
899 ; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -11, v3
900 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
901 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3
902 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
903 %not.src1 = xor <3 x i16> %src1, <i16 -1, i16 -1, i16 -11>
904 %and = and <3 x i16> %src0, %not.src1
908 define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1) {
909 ; GFX6-LABEL: s_andn2_v4i16:
911 ; GFX6-NEXT: s_lshl_b32 s0, s3, 16
912 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
913 ; GFX6-NEXT: s_or_b32 s0, s0, s1
914 ; GFX6-NEXT: s_lshl_b32 s1, s5, 16
915 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
916 ; GFX6-NEXT: s_or_b32 s1, s1, s2
917 ; GFX6-NEXT: s_lshl_b32 s2, s7, 16
918 ; GFX6-NEXT: s_and_b32 s3, s6, 0xffff
919 ; GFX6-NEXT: s_or_b32 s2, s2, s3
920 ; GFX6-NEXT: s_lshl_b32 s3, s9, 16
921 ; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
922 ; GFX6-NEXT: s_or_b32 s3, s3, s4
923 ; GFX6-NEXT: s_mov_b32 s4, -1
924 ; GFX6-NEXT: s_mov_b32 s5, s4
925 ; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
926 ; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
927 ; GFX6-NEXT: ; return to shader part epilog
929 ; GFX9-LABEL: s_andn2_v4i16:
931 ; GFX9-NEXT: s_mov_b32 s0, -1
932 ; GFX9-NEXT: s_mov_b32 s1, s0
933 ; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1]
934 ; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
935 ; GFX9-NEXT: ; return to shader part epilog
937 ; GFX10PLUS-LABEL: s_andn2_v4i16:
938 ; GFX10PLUS: ; %bb.0:
939 ; GFX10PLUS-NEXT: s_mov_b32 s0, -1
940 ; GFX10PLUS-NEXT: s_mov_b32 s1, s0
941 ; GFX10PLUS-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1]
942 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
943 ; GFX10PLUS-NEXT: ; return to shader part epilog
944 %not.src1 = xor <4 x i16> %src1, <i16 -1, i16 -1, i16 -1, i16 -1>
945 %and = and <4 x i16> %src0, %not.src1
946 %cast = bitcast <4 x i16> %and to i64
950 define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inreg %src1) {
951 ; GFX6-LABEL: s_andn2_v4i16_commute:
953 ; GFX6-NEXT: s_lshl_b32 s0, s3, 16
954 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
955 ; GFX6-NEXT: s_or_b32 s0, s0, s1
956 ; GFX6-NEXT: s_lshl_b32 s1, s5, 16
957 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
958 ; GFX6-NEXT: s_or_b32 s1, s1, s2
959 ; GFX6-NEXT: s_lshl_b32 s2, s7, 16
960 ; GFX6-NEXT: s_and_b32 s3, s6, 0xffff
961 ; GFX6-NEXT: s_or_b32 s2, s2, s3
962 ; GFX6-NEXT: s_lshl_b32 s3, s9, 16
963 ; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
964 ; GFX6-NEXT: s_or_b32 s3, s3, s4
965 ; GFX6-NEXT: s_mov_b32 s4, -1
966 ; GFX6-NEXT: s_mov_b32 s5, s4
967 ; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
968 ; GFX6-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
969 ; GFX6-NEXT: ; return to shader part epilog
971 ; GFX9-LABEL: s_andn2_v4i16_commute:
973 ; GFX9-NEXT: s_mov_b32 s0, -1
974 ; GFX9-NEXT: s_mov_b32 s1, s0
975 ; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1]
976 ; GFX9-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
977 ; GFX9-NEXT: ; return to shader part epilog
979 ; GFX10PLUS-LABEL: s_andn2_v4i16_commute:
980 ; GFX10PLUS: ; %bb.0:
981 ; GFX10PLUS-NEXT: s_mov_b32 s0, -1
982 ; GFX10PLUS-NEXT: s_mov_b32 s1, s0
983 ; GFX10PLUS-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1]
984 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
985 ; GFX10PLUS-NEXT: ; return to shader part epilog
986 %not.src1 = xor <4 x i16> %src1, <i16 -1, i16 -1, i16 -1, i16 -1>
987 %and = and <4 x i16> %not.src1, %src0
988 %cast = bitcast <4 x i16> %and to i64
992 define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4 x i16> inreg %src1) {
993 ; GFX6-LABEL: s_andn2_v4i16_multi_use:
995 ; GFX6-NEXT: s_lshl_b32 s0, s3, 16
996 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
997 ; GFX6-NEXT: s_or_b32 s0, s0, s1
998 ; GFX6-NEXT: s_lshl_b32 s1, s5, 16
999 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
1000 ; GFX6-NEXT: s_or_b32 s1, s1, s2
1001 ; GFX6-NEXT: s_lshl_b32 s2, s7, 16
1002 ; GFX6-NEXT: s_and_b32 s3, s6, 0xffff
1003 ; GFX6-NEXT: s_or_b32 s2, s2, s3
1004 ; GFX6-NEXT: s_lshl_b32 s3, s9, 16
1005 ; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
1006 ; GFX6-NEXT: s_or_b32 s3, s3, s4
1007 ; GFX6-NEXT: s_mov_b32 s4, -1
1008 ; GFX6-NEXT: s_mov_b32 s5, s4
1009 ; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1010 ; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
1011 ; GFX6-NEXT: ; return to shader part epilog
1013 ; GFX9-LABEL: s_andn2_v4i16_multi_use:
1015 ; GFX9-NEXT: s_mov_b32 s0, -1
1016 ; GFX9-NEXT: s_mov_b32 s1, s0
1017 ; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1]
1018 ; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
1019 ; GFX9-NEXT: s_mov_b32 s2, s4
1020 ; GFX9-NEXT: s_mov_b32 s3, s5
1021 ; GFX9-NEXT: ; return to shader part epilog
1023 ; GFX10PLUS-LABEL: s_andn2_v4i16_multi_use:
1024 ; GFX10PLUS: ; %bb.0:
1025 ; GFX10PLUS-NEXT: s_mov_b32 s0, -1
1026 ; GFX10PLUS-NEXT: s_mov_b32 s1, s0
1027 ; GFX10PLUS-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1]
1028 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
1029 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
1030 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
1031 ; GFX10PLUS-NEXT: ; return to shader part epilog
1032 %not.src1 = xor <4 x i16> %src1, <i16 -1, i16 -1, i16 -1, i16 -1>
1033 %and = and <4 x i16> %src0, %not.src1
1035 %cast.0 = bitcast <4 x i16> %and to i64
1036 %cast.1 = bitcast <4 x i16> %not.src1 to i64
1037 %insert.0 = insertvalue { i64, i64 } undef, i64 %cast.0, 0
1038 %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %cast.1, 1
1039 ret { i64, i64 } %insert.1
1042 define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg %src0, <4 x i16> inreg %src1, <4 x i16> inreg %src2) {
1043 ; GFX6-LABEL: s_andn2_v4i16_multi_foldable_use:
1045 ; GFX6-NEXT: s_lshl_b32 s0, s3, 16
1046 ; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
1047 ; GFX6-NEXT: s_or_b32 s0, s0, s1
1048 ; GFX6-NEXT: s_lshl_b32 s1, s5, 16
1049 ; GFX6-NEXT: s_and_b32 s2, s4, 0xffff
1050 ; GFX6-NEXT: s_or_b32 s1, s1, s2
1051 ; GFX6-NEXT: s_lshl_b32 s2, s7, 16
1052 ; GFX6-NEXT: s_and_b32 s3, s6, 0xffff
1053 ; GFX6-NEXT: s_or_b32 s2, s2, s3
1054 ; GFX6-NEXT: s_lshl_b32 s3, s9, 16
1055 ; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
1056 ; GFX6-NEXT: s_or_b32 s3, s3, s4
1057 ; GFX6-NEXT: s_lshl_b32 s4, s11, 16
1058 ; GFX6-NEXT: s_and_b32 s5, s10, 0xffff
1059 ; GFX6-NEXT: s_or_b32 s4, s4, s5
1060 ; GFX6-NEXT: s_lshl_b32 s5, s13, 16
1061 ; GFX6-NEXT: s_and_b32 s6, s12, 0xffff
1062 ; GFX6-NEXT: s_or_b32 s5, s5, s6
1063 ; GFX6-NEXT: s_mov_b32 s6, -1
1064 ; GFX6-NEXT: s_mov_b32 s7, s6
1065 ; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
1066 ; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
1067 ; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5]
1068 ; GFX6-NEXT: ; return to shader part epilog
1070 ; GFX9-LABEL: s_andn2_v4i16_multi_foldable_use:
1072 ; GFX9-NEXT: s_mov_b32 s0, -1
1073 ; GFX9-NEXT: s_mov_b32 s1, s0
1074 ; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1]
1075 ; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
1076 ; GFX9-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7]
1077 ; GFX9-NEXT: ; return to shader part epilog
1079 ; GFX10PLUS-LABEL: s_andn2_v4i16_multi_foldable_use:
1080 ; GFX10PLUS: ; %bb.0:
1081 ; GFX10PLUS-NEXT: s_mov_b32 s0, -1
1082 ; GFX10PLUS-NEXT: s_mov_b32 s1, s0
1083 ; GFX10PLUS-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1]
1084 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
1085 ; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7]
1086 ; GFX10PLUS-NEXT: ; return to shader part epilog
1087 %not.src2 = xor <4 x i16> %src2, <i16 -1, i16 -1, i16 -1, i16 -1>
1088 %and0 = and <4 x i16> %src0, %not.src2
1089 %and1 = and <4 x i16> %src1, %not.src2
1091 %cast.0 = bitcast <4 x i16> %and0 to i64
1092 %cast.1 = bitcast <4 x i16> %and1 to i64
1093 %insert.0 = insertvalue { i64, i64 } undef, i64 %cast.0, 0
1094 %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %cast.1, 1
1095 ret { i64, i64 } %insert.1
1098 define <4 x i16> @v_andn2_v4i16(<4 x i16> %src0, <4 x i16> %src1) {
1099 ; GFX6-LABEL: v_andn2_v4i16:
1101 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1102 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1103 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
1104 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
1105 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
1106 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
1107 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1108 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5
1109 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
1110 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1111 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7
1112 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
1113 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
1114 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
1115 ; GFX6-NEXT: v_xor_b32_e32 v3, -1, v3
1116 ; GFX6-NEXT: v_and_b32_e32 v0, v0, v2
1117 ; GFX6-NEXT: v_and_b32_e32 v2, v1, v3
1118 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1119 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
1120 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1122 ; GFX9-LABEL: v_andn2_v4i16:
1124 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1125 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
1126 ; GFX9-NEXT: v_xor_b32_e32 v3, -1, v3
1127 ; GFX9-NEXT: v_and_b32_e32 v0, v0, v2
1128 ; GFX9-NEXT: v_and_b32_e32 v1, v1, v3
1129 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1131 ; GFX10PLUS-LABEL: v_andn2_v4i16:
1132 ; GFX10PLUS: ; %bb.0:
1133 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1134 ; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2
1135 ; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -1, v3
1136 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
1137 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3
1138 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1139 %not.src1 = xor <4 x i16> %src1, <i16 -1, i16 -1, i16 -1, i16 -1>
1140 %and = and <4 x i16> %src0, %not.src1