1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
5 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906 %s
6 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
7 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
9 define amdgpu_ps i32 @scalar_xnor_i32_one_use(i32 inreg %a, i32 inreg %b) {
10 ; GCN-LABEL: scalar_xnor_i32_one_use:
11 ; GCN: ; %bb.0: ; %entry
12 ; GCN-NEXT: s_xnor_b32 s0, s0, s1
13 ; GCN-NEXT: ; return to shader part epilog
15 ; GFX10-LABEL: scalar_xnor_i32_one_use:
16 ; GFX10: ; %bb.0: ; %entry
17 ; GFX10-NEXT: s_xnor_b32 s0, s0, s1
18 ; GFX10-NEXT: ; return to shader part epilog
21 %r0.val = xor i32 %xor, -1
25 ; FIXME: fails to match
26 define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) {
27 ; GFX7-LABEL: scalar_xnor_v2i16_one_use:
28 ; GFX7: ; %bb.0: ; %entry
29 ; GFX7-NEXT: s_lshl_b32 s1, s1, 16
30 ; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
31 ; GFX7-NEXT: s_or_b32 s0, s1, s0
32 ; GFX7-NEXT: s_lshl_b32 s1, s3, 16
33 ; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
34 ; GFX7-NEXT: s_or_b32 s1, s1, s2
35 ; GFX7-NEXT: s_xor_b32 s0, s0, s1
36 ; GFX7-NEXT: s_xor_b32 s0, s0, -1
37 ; GFX7-NEXT: ; return to shader part epilog
39 ; GFX8-LABEL: scalar_xnor_v2i16_one_use:
40 ; GFX8: ; %bb.0: ; %entry
41 ; GFX8-NEXT: s_mov_b32 s2, 0xffff
42 ; GFX8-NEXT: s_xor_b32 s0, s0, s1
43 ; GFX8-NEXT: s_mov_b32 s3, s2
44 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
45 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
46 ; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
47 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
48 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
49 ; GFX8-NEXT: s_or_b32 s0, s1, s0
50 ; GFX8-NEXT: ; return to shader part epilog
52 ; GFX900-LABEL: scalar_xnor_v2i16_one_use:
53 ; GFX900: ; %bb.0: ; %entry
54 ; GFX900-NEXT: s_xor_b32 s0, s0, s1
55 ; GFX900-NEXT: s_xor_b32 s0, s0, -1
56 ; GFX900-NEXT: ; return to shader part epilog
58 ; GFX906-LABEL: scalar_xnor_v2i16_one_use:
59 ; GFX906: ; %bb.0: ; %entry
60 ; GFX906-NEXT: s_xor_b32 s0, s0, s1
61 ; GFX906-NEXT: s_xor_b32 s0, s0, -1
62 ; GFX906-NEXT: ; return to shader part epilog
64 ; GFX10-LABEL: scalar_xnor_v2i16_one_use:
65 ; GFX10: ; %bb.0: ; %entry
66 ; GFX10-NEXT: s_xor_b32 s0, s0, s1
67 ; GFX10-NEXT: s_xor_b32 s0, s0, -1
68 ; GFX10-NEXT: ; return to shader part epilog
70 %xor = xor <2 x i16> %a, %b
71 %r0.val = xor <2 x i16> %xor, <i16 -1, i16 -1>
72 %cast = bitcast <2 x i16> %r0.val to i32
76 define amdgpu_ps <2 x i32> @scalar_xnor_i32_mul_use(i32 inreg %a, i32 inreg %b) {
77 ; GCN-LABEL: scalar_xnor_i32_mul_use:
78 ; GCN: ; %bb.0: ; %entry
79 ; GCN-NEXT: s_xor_b32 s1, s0, s1
80 ; GCN-NEXT: s_not_b32 s2, s1
81 ; GCN-NEXT: s_add_i32 s1, s1, s0
82 ; GCN-NEXT: s_mov_b32 s0, s2
83 ; GCN-NEXT: ; return to shader part epilog
85 ; GFX10-LABEL: scalar_xnor_i32_mul_use:
86 ; GFX10: ; %bb.0: ; %entry
87 ; GFX10-NEXT: s_xor_b32 s1, s0, s1
88 ; GFX10-NEXT: s_not_b32 s2, s1
89 ; GFX10-NEXT: s_add_i32 s1, s1, s0
90 ; GFX10-NEXT: s_mov_b32 s0, s2
91 ; GFX10-NEXT: ; return to shader part epilog
94 %r0.val = xor i32 %xor, -1
95 %r1.val = add i32 %xor, %a
96 %ins0 = insertelement <2 x i32> undef, i32 %r0.val, i32 0
97 %ins1 = insertelement <2 x i32> %ins0, i32 %r1.val, i32 1
101 define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) {
102 ; GCN-LABEL: scalar_xnor_i64_one_use:
104 ; GCN-NEXT: s_xnor_b64 s[0:1], s[0:1], s[2:3]
105 ; GCN-NEXT: ; return to shader part epilog
107 ; GFX10-LABEL: scalar_xnor_i64_one_use:
109 ; GFX10-NEXT: s_xnor_b64 s[0:1], s[0:1], s[2:3]
110 ; GFX10-NEXT: ; return to shader part epilog
111 %xor = xor i64 %a, %b
112 %r0.val = xor i64 %xor, -1
116 ; FIXME: fails to match
117 define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) {
118 ; GFX7-LABEL: scalar_xnor_v4i16_one_use:
120 ; GFX7-NEXT: s_lshl_b32 s1, s1, 16
121 ; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
122 ; GFX7-NEXT: s_or_b32 s0, s1, s0
123 ; GFX7-NEXT: s_lshl_b32 s1, s3, 16
124 ; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
125 ; GFX7-NEXT: s_or_b32 s1, s1, s2
126 ; GFX7-NEXT: s_lshl_b32 s2, s5, 16
127 ; GFX7-NEXT: s_and_b32 s3, s4, 0xffff
128 ; GFX7-NEXT: s_or_b32 s2, s2, s3
129 ; GFX7-NEXT: s_lshl_b32 s3, s7, 16
130 ; GFX7-NEXT: s_and_b32 s4, s6, 0xffff
131 ; GFX7-NEXT: s_or_b32 s3, s3, s4
132 ; GFX7-NEXT: s_mov_b32 s4, -1
133 ; GFX7-NEXT: s_mov_b32 s5, s4
134 ; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
135 ; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
136 ; GFX7-NEXT: ; return to shader part epilog
138 ; GFX8-LABEL: scalar_xnor_v4i16_one_use:
140 ; GFX8-NEXT: s_mov_b32 s4, 0xffff
141 ; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
142 ; GFX8-NEXT: s_mov_b32 s5, s4
143 ; GFX8-NEXT: s_lshr_b32 s3, s0, 16
144 ; GFX8-NEXT: s_and_b32 s2, s0, 0xffff
145 ; GFX8-NEXT: s_lshr_b32 s7, s1, 16
146 ; GFX8-NEXT: s_and_b32 s6, s1, 0xffff
147 ; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[4:5]
148 ; GFX8-NEXT: s_xor_b64 s[2:3], s[6:7], s[4:5]
149 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
150 ; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
151 ; GFX8-NEXT: s_or_b32 s0, s1, s0
152 ; GFX8-NEXT: s_lshl_b32 s1, s3, 16
153 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
154 ; GFX8-NEXT: s_or_b32 s1, s1, s2
155 ; GFX8-NEXT: ; return to shader part epilog
157 ; GFX900-LABEL: scalar_xnor_v4i16_one_use:
159 ; GFX900-NEXT: s_mov_b32 s4, -1
160 ; GFX900-NEXT: s_mov_b32 s5, s4
161 ; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
162 ; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
163 ; GFX900-NEXT: ; return to shader part epilog
165 ; GFX906-LABEL: scalar_xnor_v4i16_one_use:
167 ; GFX906-NEXT: s_mov_b32 s4, -1
168 ; GFX906-NEXT: s_mov_b32 s5, s4
169 ; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
170 ; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
171 ; GFX906-NEXT: ; return to shader part epilog
173 ; GFX10-LABEL: scalar_xnor_v4i16_one_use:
175 ; GFX10-NEXT: s_mov_b32 s4, -1
176 ; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
177 ; GFX10-NEXT: s_mov_b32 s5, s4
178 ; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
179 ; GFX10-NEXT: ; return to shader part epilog
180 %xor = xor <4 x i16> %a, %b
181 %ret = xor <4 x i16> %xor, <i16 -1, i16 -1, i16 -1, i16 -1>
182 %cast = bitcast <4 x i16> %ret to i64
186 define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b) {
187 ; GCN-LABEL: scalar_xnor_i64_mul_use:
189 ; GCN-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
190 ; GCN-NEXT: s_not_b64 s[4:5], s[2:3]
191 ; GCN-NEXT: s_add_u32 s2, s2, s0
192 ; GCN-NEXT: s_addc_u32 s3, s3, s1
193 ; GCN-NEXT: s_mov_b32 s0, s4
194 ; GCN-NEXT: s_mov_b32 s1, s5
195 ; GCN-NEXT: ; return to shader part epilog
197 ; GFX10-LABEL: scalar_xnor_i64_mul_use:
199 ; GFX10-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
200 ; GFX10-NEXT: s_not_b64 s[4:5], s[2:3]
201 ; GFX10-NEXT: s_add_u32 s2, s2, s0
202 ; GFX10-NEXT: s_addc_u32 s3, s3, s1
203 ; GFX10-NEXT: s_mov_b32 s0, s4
204 ; GFX10-NEXT: s_mov_b32 s1, s5
205 ; GFX10-NEXT: ; return to shader part epilog
206 %xor = xor i64 %a, %b
207 %r0.val = xor i64 %xor, -1
208 %r1.val = add i64 %xor, %a
209 %ins0 = insertelement <2 x i64> undef, i64 %r0.val, i32 0
210 %ins1 = insertelement <2 x i64> %ins0, i64 %r1.val, i32 1
214 define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
215 ; GCN-LABEL: vector_xnor_i32_one_use:
216 ; GCN: ; %bb.0: ; %entry
217 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218 ; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
219 ; GCN-NEXT: v_not_b32_e32 v0, v0
220 ; GCN-NEXT: s_setpc_b64 s[30:31]
222 ; GFX10-LABEL: vector_xnor_i32_one_use:
223 ; GFX10: ; %bb.0: ; %entry
224 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225 ; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
226 ; GFX10-NEXT: v_not_b32_e32 v0, v0
227 ; GFX10-NEXT: s_setpc_b64 s[30:31]
229 %xor = xor i32 %a, %b
230 %r = xor i32 %xor, -1
234 define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
235 ; GCN-LABEL: vector_xnor_i64_one_use:
236 ; GCN: ; %bb.0: ; %entry
237 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238 ; GCN-NEXT: v_xor_b32_e32 v0, v0, v2
239 ; GCN-NEXT: v_xor_b32_e32 v1, v1, v3
240 ; GCN-NEXT: v_not_b32_e32 v0, v0
241 ; GCN-NEXT: v_not_b32_e32 v1, v1
242 ; GCN-NEXT: s_setpc_b64 s[30:31]
244 ; GFX10-LABEL: vector_xnor_i64_one_use:
245 ; GFX10: ; %bb.0: ; %entry
246 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247 ; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2
248 ; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
249 ; GFX10-NEXT: v_not_b32_e32 v0, v0
250 ; GFX10-NEXT: v_not_b32_e32 v1, v1
251 ; GFX10-NEXT: s_setpc_b64 s[30:31]
253 %xor = xor i64 %a, %b
254 %r = xor i64 %xor, -1
258 define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
259 ; GCN-LABEL: xnor_s_v_i32_one_use:
261 ; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
262 ; GCN-NEXT: v_not_b32_e32 v0, v0
263 ; GCN-NEXT: ; return to shader part epilog
265 ; GFX10-LABEL: xnor_s_v_i32_one_use:
267 ; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
268 ; GFX10-NEXT: v_not_b32_e32 v0, v0
269 ; GFX10-NEXT: ; return to shader part epilog
270 %xor = xor i32 %s, %v
271 %d = xor i32 %xor, -1
272 %cast = bitcast i32 %d to float
276 define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) {
277 ; GCN-LABEL: xnor_v_s_i32_one_use:
279 ; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
280 ; GCN-NEXT: v_not_b32_e32 v0, v0
281 ; GCN-NEXT: ; return to shader part epilog
283 ; GFX10-LABEL: xnor_v_s_i32_one_use:
285 ; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
286 ; GFX10-NEXT: v_not_b32_e32 v0, v0
287 ; GFX10-NEXT: ; return to shader part epilog
288 %xor = xor i32 %v, %s
289 %d = xor i32 %xor, -1
290 %cast = bitcast i32 %d to float
294 define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) {
295 ; GFX7-LABEL: xnor_i64_s_v_one_use:
296 ; GFX7: ; %bb.0: ; %entry
297 ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 29
298 ; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
299 ; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1
300 ; GFX7-NEXT: v_not_b32_e32 v0, v0
301 ; GFX7-NEXT: v_not_b32_e32 v1, v1
302 ; GFX7-NEXT: ; return to shader part epilog
304 ; GFX8-LABEL: xnor_i64_s_v_one_use:
305 ; GFX8: ; %bb.0: ; %entry
306 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
307 ; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
308 ; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1
309 ; GFX8-NEXT: v_not_b32_e32 v0, v0
310 ; GFX8-NEXT: v_not_b32_e32 v1, v1
311 ; GFX8-NEXT: ; return to shader part epilog
313 ; GFX900-LABEL: xnor_i64_s_v_one_use:
314 ; GFX900: ; %bb.0: ; %entry
315 ; GFX900-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
316 ; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
317 ; GFX900-NEXT: v_xor_b32_e32 v1, s1, v1
318 ; GFX900-NEXT: v_not_b32_e32 v0, v0
319 ; GFX900-NEXT: v_not_b32_e32 v1, v1
320 ; GFX900-NEXT: ; return to shader part epilog
322 ; GFX906-LABEL: xnor_i64_s_v_one_use:
323 ; GFX906: ; %bb.0: ; %entry
324 ; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
325 ; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
326 ; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
327 ; GFX906-NEXT: v_not_b32_e32 v0, v0
328 ; GFX906-NEXT: v_not_b32_e32 v1, v1
329 ; GFX906-NEXT: ; return to shader part epilog
331 ; GFX10-LABEL: xnor_i64_s_v_one_use:
332 ; GFX10: ; %bb.0: ; %entry
333 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
334 ; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
335 ; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
336 ; GFX10-NEXT: v_not_b32_e32 v0, v0
337 ; GFX10-NEXT: v_not_b32_e32 v1, v1
338 ; GFX10-NEXT: ; return to shader part epilog
340 %b = shl i64 %b64, 29
341 %xor = xor i64 %a, %b
342 %r0.val = xor i64 %xor, -1
343 %cast = bitcast i64 %r0.val to <2 x float>
344 ret <2 x float> %cast
347 define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) {
348 ; GFX7-LABEL: xnor_i64_v_s_one_use:
350 ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 29
351 ; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
352 ; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1
353 ; GFX7-NEXT: v_not_b32_e32 v0, v0
354 ; GFX7-NEXT: v_not_b32_e32 v1, v1
355 ; GFX7-NEXT: ; return to shader part epilog
357 ; GFX8-LABEL: xnor_i64_v_s_one_use:
359 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
360 ; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
361 ; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1
362 ; GFX8-NEXT: v_not_b32_e32 v0, v0
363 ; GFX8-NEXT: v_not_b32_e32 v1, v1
364 ; GFX8-NEXT: ; return to shader part epilog
366 ; GFX900-LABEL: xnor_i64_v_s_one_use:
368 ; GFX900-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
369 ; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
370 ; GFX900-NEXT: v_xor_b32_e32 v1, s1, v1
371 ; GFX900-NEXT: v_not_b32_e32 v0, v0
372 ; GFX900-NEXT: v_not_b32_e32 v1, v1
373 ; GFX900-NEXT: ; return to shader part epilog
375 ; GFX906-LABEL: xnor_i64_v_s_one_use:
377 ; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
378 ; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
379 ; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
380 ; GFX906-NEXT: v_not_b32_e32 v0, v0
381 ; GFX906-NEXT: v_not_b32_e32 v1, v1
382 ; GFX906-NEXT: ; return to shader part epilog
384 ; GFX10-LABEL: xnor_i64_v_s_one_use:
386 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
387 ; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
388 ; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
389 ; GFX10-NEXT: v_not_b32_e32 v0, v0
390 ; GFX10-NEXT: v_not_b32_e32 v1, v1
391 ; GFX10-NEXT: ; return to shader part epilog
392 %b = shl i64 %b64, 29
393 %xor = xor i64 %b, %a
394 %r0.val = xor i64 %xor, -1
395 %cast = bitcast i64 %r0.val to <2 x float>
396 ret <2 x float> %cast
399 define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
400 ; GFX7-LABEL: vector_xor_na_b_i32_one_use:
401 ; GFX7: ; %bb.0: ; %entry
402 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403 ; GFX7-NEXT: v_not_b32_e32 v0, v0
404 ; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
405 ; GFX7-NEXT: s_setpc_b64 s[30:31]
407 ; GFX8-LABEL: vector_xor_na_b_i32_one_use:
408 ; GFX8: ; %bb.0: ; %entry
409 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410 ; GFX8-NEXT: v_not_b32_e32 v0, v0
411 ; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
412 ; GFX8-NEXT: s_setpc_b64 s[30:31]
414 ; GFX900-LABEL: vector_xor_na_b_i32_one_use:
415 ; GFX900: ; %bb.0: ; %entry
416 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417 ; GFX900-NEXT: v_not_b32_e32 v0, v0
418 ; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
419 ; GFX900-NEXT: s_setpc_b64 s[30:31]
421 ; GFX906-LABEL: vector_xor_na_b_i32_one_use:
422 ; GFX906: ; %bb.0: ; %entry
423 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424 ; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
425 ; GFX906-NEXT: s_setpc_b64 s[30:31]
427 ; GFX10-LABEL: vector_xor_na_b_i32_one_use:
428 ; GFX10: ; %bb.0: ; %entry
429 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430 ; GFX10-NEXT: v_xor3_b32 v0, v0, -1, v1
431 ; GFX10-NEXT: s_setpc_b64 s[30:31]
438 define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
439 ; GFX7-LABEL: vector_xor_a_nb_i32_one_use:
440 ; GFX7: ; %bb.0: ; %entry
441 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442 ; GFX7-NEXT: v_not_b32_e32 v1, v1
443 ; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
444 ; GFX7-NEXT: s_setpc_b64 s[30:31]
446 ; GFX8-LABEL: vector_xor_a_nb_i32_one_use:
447 ; GFX8: ; %bb.0: ; %entry
448 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449 ; GFX8-NEXT: v_not_b32_e32 v1, v1
450 ; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
451 ; GFX8-NEXT: s_setpc_b64 s[30:31]
453 ; GFX900-LABEL: vector_xor_a_nb_i32_one_use:
454 ; GFX900: ; %bb.0: ; %entry
455 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456 ; GFX900-NEXT: v_not_b32_e32 v1, v1
457 ; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
458 ; GFX900-NEXT: s_setpc_b64 s[30:31]
460 ; GFX906-LABEL: vector_xor_a_nb_i32_one_use:
461 ; GFX906: ; %bb.0: ; %entry
462 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463 ; GFX906-NEXT: v_xnor_b32_e32 v0, v1, v0
464 ; GFX906-NEXT: s_setpc_b64 s[30:31]
466 ; GFX10-LABEL: vector_xor_a_nb_i32_one_use:
467 ; GFX10: ; %bb.0: ; %entry
468 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
469 ; GFX10-NEXT: v_xor3_b32 v0, v1, -1, v0
470 ; GFX10-NEXT: s_setpc_b64 s[30:31]
477 define amdgpu_ps <2 x i32> @scalar_xor_a_nb_i64_one_use(i64 inreg %a, i64 inreg %b) {
478 ; GCN-LABEL: scalar_xor_a_nb_i64_one_use:
479 ; GCN: ; %bb.0: ; %entry
480 ; GCN-NEXT: s_not_b64 s[2:3], s[2:3]
481 ; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
482 ; GCN-NEXT: ; return to shader part epilog
484 ; GFX10-LABEL: scalar_xor_a_nb_i64_one_use:
485 ; GFX10: ; %bb.0: ; %entry
486 ; GFX10-NEXT: s_not_b64 s[2:3], s[2:3]
487 ; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
488 ; GFX10-NEXT: ; return to shader part epilog
491 %r0.val = xor i64 %a, %nb
492 %cast = bitcast i64 %r0.val to <2 x i32>
496 define amdgpu_ps <2 x i32> @scalar_xor_na_b_i64_one_use(i64 inreg %a, i64 inreg %b) {
497 ; GCN-LABEL: scalar_xor_na_b_i64_one_use:
498 ; GCN: ; %bb.0: ; %entry
499 ; GCN-NEXT: s_not_b64 s[0:1], s[0:1]
500 ; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
501 ; GCN-NEXT: ; return to shader part epilog
503 ; GFX10-LABEL: scalar_xor_na_b_i64_one_use:
504 ; GFX10: ; %bb.0: ; %entry
505 ; GFX10-NEXT: s_not_b64 s[0:1], s[0:1]
506 ; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
507 ; GFX10-NEXT: ; return to shader part epilog
510 %r0.val = xor i64 %na, %b
511 %cast = bitcast i64 %r0.val to <2 x i32>