1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s
3 ; Test that the VGPR spiller correctly switches to SGPR offsets when the
4 ; instruction offset field would overflow, and that it accounts for memory
7 ; CHECK-LABEL: test_inst_offset_kernel
8 define amdgpu_kernel void @test_inst_offset_kernel() {
10 ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in
11 ; the instruction offset field.
12 %alloca = alloca i8, i32 4088, align 4, addrspace(5)
13 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
15 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
16 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
17 %a = load volatile i32, i32 addrspace(5)* %aptr
20 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
22 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
23 store volatile i32 %a, i32 addrspace(5)* %outptr
28 ; CHECK-LABEL: test_sgpr_offset_kernel
29 define amdgpu_kernel void @test_sgpr_offset_kernel() {
31 ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
32 ; fit in the instruction, and has to live in the SGPR offset.
33 %alloca = alloca i8, i32 4092, align 4, addrspace(5)
34 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
36 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
37 ; 0x40000 / 64 = 4096 (for wave64)
38 ; CHECK: s_add_u32 s7, s7, 0x40000
39 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill
40 ; CHECK: s_sub_u32 s7, s7, 0x40000
41 %a = load volatile i32, i32 addrspace(5)* %aptr
44 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
46 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
47 store volatile i32 %a, i32 addrspace(5)* %outptr
52 ; CHECK-LABEL: test_sgpr_offset_subregs_kernel
53 define amdgpu_kernel void @test_sgpr_offset_subregs_kernel() {
55 ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a
56 ; still fits below offset 4096 (4088 + 8 - 4 = 4092), and can be placed in
57 ; the instruction offset field.
58 %alloca = alloca i8, i32 4084, align 4, addrspace(5)
59 %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
60 %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
62 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
63 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
64 %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
65 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
68 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
70 ; Ensure the alloca sticks around.
71 %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1
72 %b = load volatile i32, i32 addrspace(5)* %bptr
74 ; Ensure the spill is of the full super-reg.
75 call void asm sideeffect "; $0", "r"(<2 x i32> %a)
80 ; CHECK-LABEL: test_inst_offset_subregs_kernel
81 define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
83 ; Occupy 4092 bytes of scratch, so that the spill of the last subreg of %a
84 ; does not fit below offset 4096 (4092 + 8 - 4 = 4096), and has to live
86 %alloca = alloca i8, i32 4088, align 4, addrspace(5)
87 %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
88 %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
90 ; 0x3ff00 / 64 = 4092 (for wave64)
91 ; CHECK: s_add_u32 s7, s7, 0x3ff00
92 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill
93 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 offset:4 ; 4-byte Folded Spill
94 ; CHECK: s_sub_u32 s7, s7, 0x3ff00
95 %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
96 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
99 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
101 ; Ensure the alloca sticks around.
102 %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1
103 %b = load volatile i32, i32 addrspace(5)* %bptr
105 ; Ensure the spill is of the full super-reg.
106 call void asm sideeffect "; $0", "r"(<2 x i32> %a)
111 ; CHECK-LABEL: test_inst_offset_function
112 define void @test_inst_offset_function() {
114 ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in
115 ; the instruction offset field.
116 %alloca = alloca i8, i32 4088, align 4, addrspace(5)
117 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
119 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
120 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
121 %a = load volatile i32, i32 addrspace(5)* %aptr
124 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
126 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
127 store volatile i32 %a, i32 addrspace(5)* %outptr
132 ; CHECK-LABEL: test_sgpr_offset_function
133 define void @test_sgpr_offset_function() {
135 ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
136 ; fit in the instruction, and has to live in the SGPR offset.
137 %alloca = alloca i8, i32 4092, align 4, addrspace(5)
138 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
140 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
141 ; 0x40000 / 64 = 4096 (for wave64)
142 ; CHECK: s_add_u32 s5, s5, 0x40000
143 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill
144 ; CHECK: s_sub_u32 s5, s5, 0x40000
145 %a = load volatile i32, i32 addrspace(5)* %aptr
148 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
150 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
151 store volatile i32 %a, i32 addrspace(5)* %outptr
156 ; CHECK-LABEL: test_sgpr_offset_subregs_function
157 define void @test_sgpr_offset_subregs_function() {
159 ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a
160 ; still fits below offset 4096 (4088 + 8 - 4 = 4092), and can be placed in
161 ; the instruction offset field.
162 %alloca = alloca i8, i32 4084, align 4, addrspace(5)
163 %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
164 %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
166 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
167 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
168 %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
169 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
172 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
174 ; Ensure the alloca sticks around.
175 %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1
176 %b = load volatile i32, i32 addrspace(5)* %bptr
178 ; Ensure the spill is of the full super-reg.
179 call void asm sideeffect "; $0", "r"(<2 x i32> %a)
184 ; CHECK-LABEL: test_inst_offset_subregs_function
185 define void @test_inst_offset_subregs_function() {
187 ; Occupy 4092 bytes of scratch, so that the spill of the last subreg of %a
188 ; does not fit below offset 4096 (4092 + 8 - 4 = 4096), and has to live
189 ; in the SGPR offset.
190 %alloca = alloca i8, i32 4088, align 4, addrspace(5)
191 %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
192 %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
194 ; 0x3ff00 / 64 = 4092 (for wave64)
195 ; CHECK: s_add_u32 s5, s5, 0x3ff00
196 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill
197 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 offset:4 ; 4-byte Folded Spill
198 ; CHECK: s_sub_u32 s5, s5, 0x3ff00
199 %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
200 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
203 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
205 ; Ensure the alloca sticks around.
206 %bptr = getelementptr i32, i32 addrspace(5)* %bufv1, i32 1
207 %b = load volatile i32, i32 addrspace(5)* %bptr
209 ; Ensure the spill is of the full super-reg.
210 call void asm sideeffect "; $0", "r"(<2 x i32> %a)