1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -O0 -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -stop-after=regallocfast < %s | FileCheck -check-prefixes=GCN %s
4 ; Verify that we consider the xor at the end of the waterfall loop emitted for
5 ; divergent indirect addressing as a terminator.
7 declare i32 @llvm.amdgcn.workitem.id.x() #1
9 ; There should be no spill code inserted between the xor and the real terminator
10 define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
11 ; GCN-LABEL: name: extract_w_offset_vgpr
13 ; GCN: successors: %bb.1(0x80000000)
14 ; GCN: liveins: $vgpr0, $sgpr0_sgpr1
15 ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
16 ; GCN: renamable $sgpr2 = COPY renamable $sgpr1
17 ; GCN: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
18 ; GCN: renamable $sgpr1 = S_MOV_B32 61440
19 ; GCN: renamable $sgpr4 = S_MOV_B32 -1
20 ; GCN: undef renamable $sgpr8 = COPY killed renamable $sgpr0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
21 ; GCN: renamable $sgpr9 = COPY killed renamable $sgpr2
22 ; GCN: renamable $sgpr10 = COPY killed renamable $sgpr4
23 ; GCN: renamable $sgpr11 = COPY killed renamable $sgpr1
24 ; GCN: renamable $sgpr0 = S_MOV_B32 16
25 ; GCN: renamable $sgpr1 = S_MOV_B32 15
26 ; GCN: renamable $sgpr2 = S_MOV_B32 14
27 ; GCN: renamable $sgpr4 = S_MOV_B32 13
28 ; GCN: renamable $sgpr5 = S_MOV_B32 12
29 ; GCN: renamable $sgpr6 = S_MOV_B32 11
30 ; GCN: renamable $sgpr7 = S_MOV_B32 10
31 ; GCN: renamable $sgpr12 = S_MOV_B32 9
32 ; GCN: renamable $sgpr13 = S_MOV_B32 8
33 ; GCN: renamable $sgpr14 = S_MOV_B32 7
34 ; GCN: renamable $sgpr15 = S_MOV_B32 6
35 ; GCN: renamable $sgpr16 = S_MOV_B32 5
36 ; GCN: renamable $sgpr17 = S_MOV_B32 3
37 ; GCN: renamable $sgpr18 = S_MOV_B32 2
38 ; GCN: renamable $sgpr19 = S_MOV_B32 1
39 ; GCN: renamable $sgpr20 = S_MOV_B32 0
40 ; GCN: renamable $vgpr1 = COPY killed renamable $sgpr20
41 ; GCN: renamable $vgpr2 = COPY killed renamable $sgpr19
42 ; GCN: renamable $vgpr3 = COPY killed renamable $sgpr18
43 ; GCN: renamable $vgpr4 = COPY killed renamable $sgpr17
44 ; GCN: renamable $vgpr5 = COPY killed renamable $sgpr16
45 ; GCN: renamable $vgpr6 = COPY killed renamable $sgpr15
46 ; GCN: renamable $vgpr7 = COPY killed renamable $sgpr14
47 ; GCN: renamable $vgpr8 = COPY killed renamable $sgpr13
48 ; GCN: renamable $vgpr9 = COPY killed renamable $sgpr12
49 ; GCN: renamable $vgpr10 = COPY killed renamable $sgpr7
50 ; GCN: renamable $vgpr11 = COPY killed renamable $sgpr6
51 ; GCN: renamable $vgpr12 = COPY killed renamable $sgpr5
52 ; GCN: renamable $vgpr13 = COPY killed renamable $sgpr4
53 ; GCN: renamable $vgpr14 = COPY killed renamable $sgpr2
54 ; GCN: renamable $vgpr15 = COPY killed renamable $sgpr1
55 ; GCN: renamable $vgpr16 = COPY killed renamable $sgpr0
56 ; GCN: undef renamable $vgpr17 = COPY killed renamable $vgpr1, implicit-def $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32
57 ; GCN: renamable $vgpr18 = COPY killed renamable $vgpr2
58 ; GCN: renamable $vgpr19 = COPY killed renamable $vgpr3
59 ; GCN: renamable $vgpr20 = COPY killed renamable $vgpr4
60 ; GCN: renamable $vgpr21 = COPY killed renamable $vgpr5
61 ; GCN: renamable $vgpr22 = COPY killed renamable $vgpr6
62 ; GCN: renamable $vgpr23 = COPY killed renamable $vgpr7
63 ; GCN: renamable $vgpr24 = COPY killed renamable $vgpr8
64 ; GCN: renamable $vgpr25 = COPY killed renamable $vgpr9
65 ; GCN: renamable $vgpr26 = COPY killed renamable $vgpr10
66 ; GCN: renamable $vgpr27 = COPY killed renamable $vgpr11
67 ; GCN: renamable $vgpr28 = COPY killed renamable $vgpr12
68 ; GCN: renamable $vgpr29 = COPY killed renamable $vgpr13
69 ; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14
70 ; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15
71 ; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16
72 ; GCN: renamable $sgpr22_sgpr23 = S_MOV_B64 $exec
73 ; GCN: renamable $vgpr1 = IMPLICIT_DEF
74 ; GCN: renamable $sgpr24_sgpr25 = IMPLICIT_DEF
75 ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
76 ; GCN: SI_SPILL_S128_SAVE killed $sgpr8_sgpr9_sgpr10_sgpr11, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 16 into %stack.1, align 4, addrspace 5)
77 ; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5)
78 ; GCN: SI_SPILL_S64_SAVE killed $sgpr22_sgpr23, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.3, align 4, addrspace 5)
79 ; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
80 ; GCN: SI_SPILL_S64_SAVE killed $sgpr24_sgpr25, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.5, align 4, addrspace 5)
82 ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
83 ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.5, align 4, addrspace 5)
84 ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
85 ; GCN: $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
86 ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
87 ; GCN: renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 $sgpr2, killed $vgpr1, implicit $exec
88 ; GCN: renamable $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec
89 ; GCN: S_SET_GPR_IDX_ON killed renamable $sgpr2, 1, implicit-def $m0, implicit undef $m0
90 ; GCN: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = SI_SPILL_V512_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 64 from %stack.2, align 4, addrspace 5)
91 ; GCN: renamable $vgpr18 = V_MOV_B32_e32 undef $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0
92 ; GCN: S_SET_GPR_IDX_OFF
93 ; GCN: renamable $vgpr19 = COPY renamable $vgpr18
94 ; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5
95 ; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.5, align 4, addrspace 5)
96 ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.6, align 4, addrspace 5)
97 ; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
98 ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
99 ; GCN: SI_SPILL_V32_SAVE killed $vgpr18, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
100 ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
101 ; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec
103 ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.3, align 4, addrspace 5)
104 ; GCN: $exec = S_MOV_B64 renamable $sgpr0_sgpr1
105 ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
106 ; GCN: $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 16 from %stack.1, align 4, addrspace 5)
107 ; GCN: BUFFER_STORE_DWORD_OFFSET renamable $vgpr0, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
110 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
111 %index = add i32 %id, 1
112 %value = extractelement <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, i32 %index
113 store i32 %value, i32 addrspace(1)* %out