1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,MUBUF
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 -amdgpu-enable-flat-scratch < %s | FileCheck %s -check-prefixes=GCN,FLATSCR
4 ; GCN-LABEL: spill_v2i32:
5 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:16 ; 4-byte Folded Spill
6 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:20 ; 4-byte Folded Spill
7 ; FLATSCR: scratch_store_dwordx2 off, v{{.*}} offset:16 ; 8-byte Folded Spill
8 ; FLATSCR-NOT: scratch_store_dword
11 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:16 ; 4-byte Folded Reload
12 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:20 ; 4-byte Folded Reload
13 ; FLATSCR: scratch_load_dwordx2 v{{.*}} offset:16 ; 8-byte Folded Reload
14 ; FLATSCR-NOT: scratch_load_dword
16 define void @spill_v2i32() {
18 %alloca = alloca <2 x i32>, i32 2, align 4, addrspace(5)
20 %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %alloca, i32 1
21 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
24 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
26 %outptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %alloca, i32 1
27 store volatile <2 x i32> %a, <2 x i32> addrspace(5)* %outptr
32 ; GCN-LABEL: spill_v2f32:
33 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:16 ; 4-byte Folded Spill
34 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:20 ; 4-byte Folded Spill
35 ; FLATSCR: scratch_store_dwordx2 off, v{{.*}} offset:16 ; 8-byte Folded Spill
36 ; FLATSCR-NOT: scratch_store_dword
39 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:16 ; 4-byte Folded Reload
40 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:20 ; 4-byte Folded Reload
41 ; FLATSCR: scratch_load_dwordx2 v{{.*}} offset:16 ; 8-byte Folded Reload
42 ; FLATSCR-NOT: scratch_load_dword
44 define void @spill_v2f32() {
46 %alloca = alloca <2 x i32>, i32 2, align 4, addrspace(5)
48 %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %alloca, i32 1
49 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
52 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
54 %outptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %alloca, i32 1
55 store volatile <2 x i32> %a, <2 x i32> addrspace(5)* %outptr
60 ; GCN-LABEL: spill_v3i32:
61 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill
62 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
63 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
64 ; FLATSCR: scratch_store_dwordx3 off, v{{.*}} offset:32 ; 12-byte Folded Spill
65 ; FLATSCR-NOT: scratch_store_dword
68 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
69 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
70 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
71 ; FLATSCR: scratch_load_dwordx3 v{{.*}} offset:32 ; 12-byte Folded Reload
72 ; FLATSCR-NOT: scratch_load_dword
74 define void @spill_v3i32() {
76 %alloca = alloca <3 x i32>, i32 2, align 4, addrspace(5)
78 %aptr = getelementptr <3 x i32>, <3 x i32> addrspace(5)* %alloca, i32 1
79 %a = load volatile <3 x i32>, <3 x i32> addrspace(5)* %aptr
82 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
84 %outptr = getelementptr <3 x i32>, <3 x i32> addrspace(5)* %alloca, i32 1
85 store volatile <3 x i32> %a, <3 x i32> addrspace(5)* %outptr
90 ; GCN-LABEL: spill_v3f32:
91 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill
92 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
93 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
94 ; FLATSCR: scratch_store_dwordx3 off, v{{.*}} offset:32 ; 12-byte Folded Spill
95 ; FLATSCR-NOT: scratch_store_dword
98 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
99 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
100 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
101 ; FLATSCR: scratch_load_dwordx3 v{{.*}} offset:32 ; 12-byte Folded Reload
102 ; FLATSCR-NOT: scratch_load_dword
104 define void @spill_v3f32() {
106 %alloca = alloca <3 x i32>, i32 2, align 4, addrspace(5)
108 %aptr = getelementptr <3 x i32>, <3 x i32> addrspace(5)* %alloca, i32 1
109 %a = load volatile <3 x i32>, <3 x i32> addrspace(5)* %aptr
112 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
114 %outptr = getelementptr <3 x i32>, <3 x i32> addrspace(5)* %alloca, i32 1
115 store volatile <3 x i32> %a, <3 x i32> addrspace(5)* %outptr
120 ; GCN-LABEL: spill_v4i32:
121 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill
122 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
123 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
124 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:44 ; 4-byte Folded Spill
125 ; FLATSCR: scratch_store_dwordx4 off, v{{.*}} offset:32 ; 16-byte Folded Spill
126 ; FLATSCR-NOT: scratch_store_dword
128 ; GCN-NEXT: ;;#ASMEND
129 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
130 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
131 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
132 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:44 ; 4-byte Folded Reload
133 ; FLATSCR: scratch_load_dwordx4 v{{.*}} offset:32 ; 16-byte Folded Reload
134 ; FLATSCR-NOT: scratch_load_dword
136 define void @spill_v4i32() {
138 %alloca = alloca <4 x i32>, i32 2, align 4, addrspace(5)
140 %aptr = getelementptr <4 x i32>, <4 x i32> addrspace(5)* %alloca, i32 1
141 %a = load volatile <4 x i32>, <4 x i32> addrspace(5)* %aptr
144 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
146 %outptr = getelementptr <4 x i32>, <4 x i32> addrspace(5)* %alloca, i32 1
147 store volatile <4 x i32> %a, <4 x i32> addrspace(5)* %outptr
152 ; GCN-LABEL: spill_v4f32:
153 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill
154 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
155 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
156 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:44 ; 4-byte Folded Spill
157 ; FLATSCR: scratch_store_dwordx4 off, v{{.*}} offset:32 ; 16-byte Folded Spill
158 ; FLATSCR-NOT: scratch_store_dword
160 ; GCN-NEXT: ;;#ASMEND
161 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
162 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
163 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
164 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:44 ; 4-byte Folded Reload
165 ; FLATSCR: scratch_load_dwordx4 v{{.*}} offset:32 ; 16-byte Folded Reload
166 ; FLATSCR-NOT: scratch_load_dword
168 define void @spill_v4f32() {
170 %alloca = alloca <4 x i32>, i32 2, align 4, addrspace(5)
172 %aptr = getelementptr <4 x i32>, <4 x i32> addrspace(5)* %alloca, i32 1
173 %a = load volatile <4 x i32>, <4 x i32> addrspace(5)* %aptr
176 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
178 %outptr = getelementptr <4 x i32>, <4 x i32> addrspace(5)* %alloca, i32 1
179 store volatile <4 x i32> %a, <4 x i32> addrspace(5)* %outptr
184 ; GCN-LABEL: spill_v5i32:
185 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:64 ; 4-byte Folded Spill
186 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:68 ; 4-byte Folded Spill
187 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:72 ; 4-byte Folded Spill
188 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:76 ; 4-byte Folded Spill
189 ; FLATSCR-DAG: scratch_store_dwordx4 off, v{{.*}} offset:64 ; 16-byte Folded Spill
190 ; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:80 ; 4-byte Folded Spill
191 ; FLATSCR-NOT: scratch_store_dword
193 ; GCN-NEXT: ;;#ASMEND
194 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:64 ; 4-byte Folded Reload
195 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:68 ; 4-byte Folded Reload
196 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:72 ; 4-byte Folded Reload
197 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:76 ; 4-byte Folded Reload
198 ; FLATSCR-DAG: scratch_load_dwordx4 v{{.*}} offset:64 ; 16-byte Folded Reload
199 ; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:80 ; 4-byte Folded Reload
200 ; FLATSCR-NOT: scratch_load_dword
201 define void @spill_v5i32() {
203 %alloca = alloca <5 x i32>, i32 2, align 4, addrspace(5)
205 %aptr = getelementptr <5 x i32>, <5 x i32> addrspace(5)* %alloca, i32 1
206 %a = load volatile <5 x i32>, <5 x i32> addrspace(5)* %aptr
209 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
211 %outptr = getelementptr <5 x i32>, <5 x i32> addrspace(5)* %alloca, i32 1
212 store volatile <5 x i32> %a, <5 x i32> addrspace(5)* %outptr
217 ; GCN-LABEL: spill_v5f32:
218 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:64 ; 4-byte Folded Spill
219 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:68 ; 4-byte Folded Spill
220 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:72 ; 4-byte Folded Spill
221 ; MUBUF-DAG: buffer_store_dword v{{.*}} offset:76 ; 4-byte Folded Spill
222 ; FLATSCR-DAG: scratch_store_dwordx4 off, v{{.*}} offset:64 ; 16-byte Folded Spill
223 ; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:80 ; 4-byte Folded Spill
224 ; FLATSCR-NOT: scratch_store_dword
226 ; GCN-NEXT: ;;#ASMEND
227 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:64 ; 4-byte Folded Reload
228 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:68 ; 4-byte Folded Reload
229 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:72 ; 4-byte Folded Reload
230 ; MUBUF-DAG: buffer_load_dword v{{.*}} offset:76 ; 4-byte Folded Reload
231 ; FLATSCR-DAG: scratch_load_dwordx4 v{{.*}} offset:64 ; 16-byte Folded Reload
232 ; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:80 ; 4-byte Folded Reload
233 ; FLATSCR-NOT: scratch_load_dword
234 define void @spill_v5f32() {
236 %alloca = alloca <5 x i32>, i32 2, align 4, addrspace(5)
238 %aptr = getelementptr <5 x i32>, <5 x i32> addrspace(5)* %alloca, i32 1
239 %a = load volatile <5 x i32>, <5 x i32> addrspace(5)* %aptr
242 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
244 %outptr = getelementptr <5 x i32>, <5 x i32> addrspace(5)* %alloca, i32 1
245 store volatile <5 x i32> %a, <5 x i32> addrspace(5)* %outptr