1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -O0 -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -stop-after=regallocfast < %s | FileCheck -check-prefixes=GCN %s
4 ; Verify that we consider the xor at the end of the waterfall loop emitted for
5 ; divergent indirect addressing as a terminator.
7 declare i32 @llvm.amdgcn.workitem.id.x() #1
9 ; There should be no spill code inserted between the xor and the real terminator
10 define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
11 ; GCN-LABEL: name: extract_w_offset_vgpr
13 ; GCN-NEXT: successors: %bb.1(0x80000000)
14 ; GCN-NEXT: liveins: $vgpr0, $sgpr2_sgpr3
16 ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0
17 ; GCN-NEXT: early-clobber renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM_ec killed renamable $sgpr2_sgpr3, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
18 ; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1
19 ; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
20 ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 61440
21 ; GCN-NEXT: renamable $sgpr5 = S_MOV_B32 -1
22 ; GCN-NEXT: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
23 ; GCN-NEXT: renamable $sgpr1 = COPY killed renamable $sgpr6
24 ; GCN-NEXT: renamable $sgpr2 = COPY killed renamable $sgpr5
25 ; GCN-NEXT: renamable $sgpr3 = COPY killed renamable $sgpr4
26 ; GCN-NEXT: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.1, align 4, addrspace 5)
27 ; GCN-NEXT: renamable $sgpr0 = S_MOV_B32 16
28 ; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 15
29 ; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 14
30 ; GCN-NEXT: renamable $sgpr3 = S_MOV_B32 13
31 ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 12
32 ; GCN-NEXT: renamable $sgpr5 = S_MOV_B32 11
33 ; GCN-NEXT: renamable $sgpr6 = S_MOV_B32 10
34 ; GCN-NEXT: renamable $sgpr7 = S_MOV_B32 9
35 ; GCN-NEXT: renamable $sgpr8 = S_MOV_B32 8
36 ; GCN-NEXT: renamable $sgpr9 = S_MOV_B32 7
37 ; GCN-NEXT: renamable $sgpr10 = S_MOV_B32 6
38 ; GCN-NEXT: renamable $sgpr11 = S_MOV_B32 5
39 ; GCN-NEXT: renamable $sgpr12 = S_MOV_B32 3
40 ; GCN-NEXT: renamable $sgpr13 = S_MOV_B32 2
41 ; GCN-NEXT: renamable $sgpr14 = S_MOV_B32 1
42 ; GCN-NEXT: renamable $sgpr15 = S_MOV_B32 0
43 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr15
44 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr14
45 ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr13
46 ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr12
47 ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr11
48 ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr10
49 ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr9
50 ; GCN-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr8
51 ; GCN-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr7
52 ; GCN-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr6
53 ; GCN-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr5
54 ; GCN-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr4
55 ; GCN-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr3
56 ; GCN-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr2
57 ; GCN-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr1
58 ; GCN-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr0
59 ; GCN-NEXT: undef [[COPY17:%[0-9]+]].sub0:vreg_512 = COPY [[COPY1]]
60 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub1:vreg_512 = COPY [[COPY2]]
61 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub2:vreg_512 = COPY [[COPY3]]
62 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub3:vreg_512 = COPY [[COPY4]]
63 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub4:vreg_512 = COPY [[COPY5]]
64 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub5:vreg_512 = COPY [[COPY6]]
65 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub6:vreg_512 = COPY [[COPY7]]
66 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub7:vreg_512 = COPY [[COPY8]]
67 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub8:vreg_512 = COPY [[COPY9]]
68 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub9:vreg_512 = COPY [[COPY10]]
69 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub10:vreg_512 = COPY [[COPY11]]
70 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub11:vreg_512 = COPY [[COPY12]]
71 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub12:vreg_512 = COPY [[COPY13]]
72 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub13:vreg_512 = COPY [[COPY14]]
73 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub14:vreg_512 = COPY [[COPY15]]
74 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub15:vreg_512 = COPY [[COPY16]]
75 ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
76 ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
77 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
78 ; GCN-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
81 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
83 ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
84 ; GCN-NEXT: dead [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
85 ; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
86 ; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, [[COPY]](s32), implicit $exec
87 ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec
88 ; GCN-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 [[COPY17]], killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec
89 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]]
90 ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1
91 ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5)
92 ; GCN-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
93 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
96 ; GCN-NEXT: successors: %bb.2(0x80000000)
98 ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
99 ; GCN-NEXT: $exec = S_MOV_B64 renamable $sgpr0_sgpr1
102 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.1, align 4, addrspace 5)
103 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]], killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.load, addrspace 1)
104 ; GCN-NEXT: S_ENDPGM 0
106 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
107 %index = add i32 %id, 1
108 %value = extractelement <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, i32 %index
109 store i32 %value, ptr addrspace(1) %out
113 !llvm.module.flags = !{!0}
114 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}