1 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking -o - %s | FileCheck %s
4 define amdgpu_kernel void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) {
6 br i1 undef, label %if, label %end
8 if: ; preds = %main_body
9 %v.if = load volatile i32, i32 addrspace(1)* undef
12 end: ; preds = %if, %main_body
16 define amdgpu_kernel void @optimize_if_and_saveexec(i32 %z, i32 %v) {
18 br i1 undef, label %if, label %end
27 define amdgpu_kernel void @optimize_if_or_saveexec(i32 %z, i32 %v) {
29 br i1 undef, label %if, label %end
38 define amdgpu_kernel void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) {
40 br i1 undef, label %if, label %end
42 if: ; preds = %main_body
45 end: ; preds = %if, %main_body
49 define amdgpu_kernel void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v) {
51 br i1 undef, label %if, label %end
60 define amdgpu_kernel void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v) {
62 br i1 undef, label %if, label %end
71 define amdgpu_kernel void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v) {
73 br i1 undef, label %if, label %end
82 define amdgpu_kernel void @optimize_if_unknown_saveexec(i32 %z, i32 %v) {
84 br i1 undef, label %if, label %end
93 define amdgpu_kernel void @optimize_if_andn2_saveexec(i32 %z, i32 %v) {
95 br i1 undef, label %if, label %end
104 define amdgpu_kernel void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v) {
106 br i1 undef, label %if, label %end
115 define amdgpu_kernel void @if_and_xor_read_exec_copy_subreg() {
117 br i1 undef, label %if, label %end
119 if: ; preds = %main_body
122 end: ; preds = %if, %main_body
128 # CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}}
129 # CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
130 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
131 # CHECK-NEXT: SI_MASK_BRANCH
133 name: optimize_if_and_saveexec_xor
140 $sgpr0_sgpr1 = COPY $exec
141 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
142 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
143 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
144 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
145 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
146 SI_MASK_BRANCH %bb.2, implicit $exec
150 liveins: $sgpr0_sgpr1
152 $sgpr7 = S_MOV_B32 61440
153 $sgpr6 = S_MOV_B32 -1
154 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
157 liveins: $vgpr0, $sgpr0_sgpr1
159 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
160 $sgpr3 = S_MOV_B32 61440
161 $sgpr2 = S_MOV_B32 -1
162 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
167 # CHECK-LABEL: name: optimize_if_and_saveexec{{$}}
168 # CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
169 # CHECK-NEXT: SI_MASK_BRANCH
171 name: optimize_if_and_saveexec
178 $sgpr0_sgpr1 = COPY $exec
179 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
180 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
181 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
182 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
183 SI_MASK_BRANCH %bb.2, implicit $exec
187 liveins: $sgpr0_sgpr1
189 $sgpr7 = S_MOV_B32 61440
190 $sgpr6 = S_MOV_B32 -1
191 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
194 liveins: $vgpr0, $sgpr0_sgpr1
196 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
197 $sgpr3 = S_MOV_B32 61440
198 $sgpr2 = S_MOV_B32 -1
199 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
204 # CHECK-LABEL: name: optimize_if_or_saveexec{{$}}
205 # CHECK: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
206 # CHECK-NEXT: SI_MASK_BRANCH
208 name: optimize_if_or_saveexec
215 $sgpr0_sgpr1 = COPY $exec
216 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
217 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
218 $sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
219 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
220 SI_MASK_BRANCH %bb.2, implicit $exec
224 liveins: $sgpr0_sgpr1
226 $sgpr7 = S_MOV_B32 61440
227 $sgpr6 = S_MOV_B32 -1
228 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
231 liveins: $vgpr0, $sgpr0_sgpr1
233 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
234 $sgpr3 = S_MOV_B32 61440
235 $sgpr2 = S_MOV_B32 -1
236 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
241 # CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle
242 # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
243 # CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
244 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
245 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
246 # CHECK-NEXT: SI_MASK_BRANCH
247 name: optimize_if_and_saveexec_xor_valu_middle
254 $sgpr0_sgpr1 = COPY $exec
255 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
256 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
257 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
258 BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
259 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
260 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
261 SI_MASK_BRANCH %bb.2, implicit $exec
265 liveins: $sgpr0_sgpr1
267 $sgpr7 = S_MOV_B32 61440
268 $sgpr6 = S_MOV_B32 -1
269 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
272 liveins: $vgpr0, $sgpr0_sgpr1
274 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
275 $sgpr3 = S_MOV_B32 61440
276 $sgpr2 = S_MOV_B32 -1
277 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
282 # CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}}
283 # CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
284 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
285 # CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
286 # CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
287 name: optimize_if_and_saveexec_xor_wrong_reg
294 $sgpr6 = S_MOV_B32 -1
295 $sgpr7 = S_MOV_B32 61440
296 $sgpr0_sgpr1 = COPY $exec
297 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
298 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
299 $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
300 $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
301 $exec = S_MOV_B64_term $sgpr0_sgpr1
302 SI_MASK_BRANCH %bb.2, implicit $exec
306 liveins: $sgpr0_sgpr1 , $sgpr4_sgpr5_sgpr6_sgpr7
307 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
310 liveins: $vgpr0, $sgpr0_sgpr1, $sgpr4_sgpr5_sgpr6_sgpr7
312 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
313 $sgpr3 = S_MOV_B32 61440
314 $sgpr2 = S_MOV_B32 -1
315 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
320 # CHECK-LABEL: name: optimize_if_and_saveexec_xor_modify_copy_to_exec{{$}}
321 # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
322 # CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
323 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
324 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
325 # CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
327 name: optimize_if_and_saveexec_xor_modify_copy_to_exec
334 $sgpr0_sgpr1 = COPY $exec
335 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
336 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
337 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
338 $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
339 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
340 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
341 SI_MASK_BRANCH %bb.2, implicit $exec
345 liveins: $sgpr0_sgpr1
347 $sgpr7 = S_MOV_B32 61440
348 $sgpr6 = S_MOV_B32 -1
349 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
352 liveins: $vgpr0, $sgpr0_sgpr1
354 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
357 $sgpr2 = S_MOV_B32 -1
358 $sgpr3 = S_MOV_B32 61440
359 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
364 # CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}}
365 # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
366 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
367 # CHECK-NEXT: $exec = COPY $sgpr2_sgpr3
368 # CHECK-NEXT: SI_MASK_BRANCH
369 name: optimize_if_and_saveexec_xor_live_out_setexec
376 $sgpr0_sgpr1 = COPY $exec
377 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
378 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
379 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
380 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
381 $exec = S_MOV_B64_term $sgpr2_sgpr3
382 SI_MASK_BRANCH %bb.2, implicit $exec
386 liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
387 S_SLEEP 0, implicit $sgpr2_sgpr3
388 $sgpr7 = S_MOV_B32 61440
389 $sgpr6 = S_MOV_B32 -1
390 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
393 liveins: $vgpr0, $sgpr0_sgpr1
395 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
396 $sgpr3 = S_MOV_B32 61440
397 $sgpr2 = S_MOV_B32 -1
398 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
403 # CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}}
404 # CHECK: $sgpr0_sgpr1 = COPY $exec
405 # CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
406 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
407 # CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
409 name: optimize_if_unknown_saveexec
416 $sgpr0_sgpr1 = COPY $exec
417 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
418 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
419 $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
420 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
421 SI_MASK_BRANCH %bb.2, implicit $exec
425 liveins: $sgpr0_sgpr1
427 $sgpr7 = S_MOV_B32 61440
428 $sgpr6 = S_MOV_B32 -1
429 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
432 liveins: $vgpr0, $sgpr0_sgpr1
434 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
435 $sgpr3 = S_MOV_B32 61440
436 $sgpr2 = S_MOV_B32 -1
437 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
442 # CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}}
443 # CHECK: $sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
444 # CHECK-NEXT: SI_MASK_BRANCH
446 name: optimize_if_andn2_saveexec
453 $sgpr0_sgpr1 = COPY $exec
454 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
455 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
456 $sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
457 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
458 SI_MASK_BRANCH %bb.2, implicit $exec
462 liveins: $sgpr0_sgpr1
464 $sgpr7 = S_MOV_B32 61440
465 $sgpr6 = S_MOV_B32 -1
466 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
469 liveins: $vgpr0, $sgpr0_sgpr1
471 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
472 $sgpr3 = S_MOV_B32 61440
473 $sgpr2 = S_MOV_B32 -1
474 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
479 # CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}}
480 # CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
481 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
482 # CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
483 name: optimize_if_andn2_saveexec_no_commute
490 $sgpr0_sgpr1 = COPY $exec
491 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
492 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
493 $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
494 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
495 SI_MASK_BRANCH %bb.2, implicit $exec
499 liveins: $sgpr0_sgpr1
501 $sgpr7 = S_MOV_B32 61440
502 $sgpr6 = S_MOV_B32 -1
503 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
506 liveins: $vgpr0, $sgpr0_sgpr1
508 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
509 $sgpr3 = S_MOV_B32 61440
510 $sgpr2 = S_MOV_B32 -1
511 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
516 # A read from exec copy subreg prevents optimization
517 # CHECK-LABEL: name: if_and_xor_read_exec_copy_subreg{{$}}
518 # CHECK: $sgpr0_sgpr1 = COPY $exec
519 # CHECK-NEXT: $sgpr4 = S_MOV_B32 $sgpr1
520 name: if_and_xor_read_exec_copy_subreg
527 $sgpr0_sgpr1 = COPY $exec
528 $sgpr4 = S_MOV_B32 $sgpr1
529 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
530 $vgpr0 = V_MOV_B32_e32 4, implicit $exec
531 $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
532 $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
533 $exec = S_MOV_B64_term killed $sgpr2_sgpr3
534 SI_MASK_BRANCH %bb.2, implicit $exec
538 liveins: $sgpr0_sgpr1
540 $sgpr7 = S_MOV_B32 61440
541 $sgpr6 = S_MOV_B32 -1
542 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
545 liveins: $vgpr0, $sgpr0_sgpr1
547 $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
548 $sgpr3 = S_MOV_B32 61440
549 $sgpr2 = S_MOV_B32 -1
550 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec