1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
6 ; FIXME: Merge with regbankselect, which mostly overlaps when all types supported.
9 define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
10 ; GFX6-LABEL: name: s_buffer_load_i32
11 ; GFX6: bb.1 (%ir-block.0):
12 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
14 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
15 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
16 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
17 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
18 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
19 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
20 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
21 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
22 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
23 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
24 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
26 ; GFX7-LABEL: name: s_buffer_load_i32
27 ; GFX7: bb.1 (%ir-block.0):
28 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
30 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
31 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
32 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
33 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
34 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
35 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
36 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
37 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
38 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
39 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
40 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
42 ; GFX8-LABEL: name: s_buffer_load_i32
43 ; GFX8: bb.1 (%ir-block.0):
44 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
46 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
47 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
48 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
49 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
50 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
51 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
52 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
53 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
54 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
55 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
56 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
57 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
61 define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
62 ; GFX6-LABEL: name: s_buffer_load_i32_glc
63 ; GFX6: bb.1 (%ir-block.0):
64 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
66 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
67 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
68 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
69 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
70 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
71 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
72 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
73 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
74 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
75 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
76 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
78 ; GFX7-LABEL: name: s_buffer_load_i32_glc
79 ; GFX7: bb.1 (%ir-block.0):
80 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
82 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
83 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
84 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
85 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
86 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
87 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
88 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
89 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
90 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
91 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
92 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
94 ; GFX8-LABEL: name: s_buffer_load_i32_glc
95 ; GFX8: bb.1 (%ir-block.0):
96 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
98 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
99 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
100 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
101 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
102 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
103 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
104 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
105 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
106 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
107 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
108 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
109 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1)
113 define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
114 ; GFX6-LABEL: name: s_buffer_load_v2i32
115 ; GFX6: bb.1 (%ir-block.0):
116 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
118 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
119 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
120 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
121 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
122 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
123 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
124 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
125 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
126 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
127 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
128 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
129 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
130 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
131 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
132 ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
133 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
135 ; GFX7-LABEL: name: s_buffer_load_v2i32
136 ; GFX7: bb.1 (%ir-block.0):
137 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
139 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
140 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
141 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
142 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
143 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
144 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
145 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
146 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
147 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
148 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
149 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
150 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
151 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
152 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
153 ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
154 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
156 ; GFX8-LABEL: name: s_buffer_load_v2i32
157 ; GFX8: bb.1 (%ir-block.0):
158 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
160 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
161 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
162 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
163 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
164 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
165 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
166 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
167 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
168 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
169 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
170 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
171 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
172 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
173 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
174 ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
175 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
176 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
180 define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
181 ; GFX6-LABEL: name: s_buffer_load_v3i32
182 ; GFX6: bb.1 (%ir-block.0):
183 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
185 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
186 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
187 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
188 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
189 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
190 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
191 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
192 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0
193 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1
194 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2
195 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3
196 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
197 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
198 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
199 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
200 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
201 ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
202 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
203 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
204 ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
205 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
207 ; GFX7-LABEL: name: s_buffer_load_v3i32
208 ; GFX7: bb.1 (%ir-block.0):
209 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
211 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
212 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
213 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
214 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
215 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
216 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
217 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
218 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0
219 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1
220 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2
221 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3
222 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
223 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
224 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
225 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
226 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
227 ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
228 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
229 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
230 ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
231 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
233 ; GFX8-LABEL: name: s_buffer_load_v3i32
234 ; GFX8: bb.1 (%ir-block.0):
235 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
237 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
238 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
239 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
240 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
241 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
242 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
243 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
244 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0
245 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1
246 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2
247 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3
248 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
249 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
250 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
251 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
252 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
253 ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
254 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
255 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
256 ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
257 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
258 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
262 define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
263 ; GFX6-LABEL: name: s_buffer_load_v8i32
264 ; GFX6: bb.1 (%ir-block.0):
265 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
267 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
268 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
269 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
270 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
271 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
272 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
273 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
274 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
275 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
276 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
277 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
278 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
279 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
280 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
281 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
282 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
283 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
284 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
285 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
286 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
287 ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
288 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
289 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
290 ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
291 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
292 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
293 ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
294 ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
295 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
296 ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
297 ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
298 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
299 ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
300 ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
301 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
302 ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
303 ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
304 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
305 ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
306 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
308 ; GFX7-LABEL: name: s_buffer_load_v8i32
309 ; GFX7: bb.1 (%ir-block.0):
310 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
312 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
313 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
314 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
315 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
316 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
317 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
318 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
319 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
320 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
321 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
322 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
323 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
324 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
325 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
326 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
327 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
328 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
329 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
330 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
331 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
332 ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
333 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
334 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
335 ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
336 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
337 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
338 ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
339 ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
340 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
341 ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
342 ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
343 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
344 ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
345 ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
346 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
347 ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
348 ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
349 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
350 ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
351 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
353 ; GFX8-LABEL: name: s_buffer_load_v8i32
354 ; GFX8: bb.1 (%ir-block.0):
355 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
357 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
358 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
359 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
360 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
361 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
362 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
363 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
364 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
365 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
366 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
367 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
368 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
369 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
370 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
371 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
372 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
373 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
374 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
375 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
376 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
377 ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
378 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
379 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
380 ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
381 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
382 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
383 ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
384 ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
385 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
386 ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
387 ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
388 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
389 ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
390 ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
391 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
392 ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
393 ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
394 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
395 ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
396 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
397 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
401 define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
402 ; GFX6-LABEL: name: s_buffer_load_v16i32
403 ; GFX6: bb.1 (%ir-block.0):
404 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
406 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
407 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
408 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
409 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
410 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
411 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
412 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
413 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
414 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
415 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
416 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
417 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
418 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
419 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
420 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
421 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
422 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
423 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
424 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
425 ; GFX6-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
426 ; GFX6-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
427 ; GFX6-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
428 ; GFX6-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
429 ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
430 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
431 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
432 ; GFX6-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
433 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
434 ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
435 ; GFX6-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
436 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
437 ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
438 ; GFX6-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
439 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
440 ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
441 ; GFX6-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
442 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
443 ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
444 ; GFX6-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
445 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
446 ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
447 ; GFX6-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
448 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
449 ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
450 ; GFX6-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
451 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
452 ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
453 ; GFX6-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
454 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
455 ; GFX6-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
456 ; GFX6-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
457 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
458 ; GFX6-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
459 ; GFX6-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
460 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
461 ; GFX6-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
462 ; GFX6-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
463 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
464 ; GFX6-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
465 ; GFX6-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
466 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
467 ; GFX6-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
468 ; GFX6-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
469 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
470 ; GFX6-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
471 ; GFX6-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
472 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
473 ; GFX6-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
474 ; GFX6-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
475 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
476 ; GFX6-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
477 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
479 ; GFX7-LABEL: name: s_buffer_load_v16i32
480 ; GFX7: bb.1 (%ir-block.0):
481 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
483 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
484 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
485 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
486 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
487 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
488 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
489 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
490 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
491 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
492 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
493 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
494 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
495 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
496 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
497 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
498 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
499 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
500 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
501 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
502 ; GFX7-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
503 ; GFX7-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
504 ; GFX7-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
505 ; GFX7-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
506 ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
507 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
508 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
509 ; GFX7-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
510 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
511 ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
512 ; GFX7-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
513 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
514 ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
515 ; GFX7-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
516 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
517 ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
518 ; GFX7-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
519 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
520 ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
521 ; GFX7-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
522 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
523 ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
524 ; GFX7-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
525 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
526 ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
527 ; GFX7-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
528 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
529 ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
530 ; GFX7-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
531 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
532 ; GFX7-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
533 ; GFX7-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
534 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
535 ; GFX7-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
536 ; GFX7-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
537 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
538 ; GFX7-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
539 ; GFX7-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
540 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
541 ; GFX7-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
542 ; GFX7-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
543 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
544 ; GFX7-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
545 ; GFX7-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
546 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
547 ; GFX7-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
548 ; GFX7-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
549 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
550 ; GFX7-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
551 ; GFX7-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
552 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
553 ; GFX7-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
554 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
556 ; GFX8-LABEL: name: s_buffer_load_v16i32
557 ; GFX8: bb.1 (%ir-block.0):
558 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
560 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
561 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
562 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
563 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
564 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
565 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
566 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
567 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
568 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
569 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
570 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
571 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
572 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
573 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
574 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
575 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
576 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
577 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
578 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
579 ; GFX8-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
580 ; GFX8-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
581 ; GFX8-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
582 ; GFX8-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
583 ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
584 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
585 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
586 ; GFX8-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
587 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
588 ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
589 ; GFX8-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
590 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
591 ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
592 ; GFX8-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
593 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
594 ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
595 ; GFX8-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
596 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
597 ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
598 ; GFX8-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
599 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
600 ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
601 ; GFX8-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
602 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
603 ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
604 ; GFX8-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
605 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
606 ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
607 ; GFX8-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
608 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
609 ; GFX8-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
610 ; GFX8-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
611 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
612 ; GFX8-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
613 ; GFX8-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
614 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
615 ; GFX8-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
616 ; GFX8-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
617 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
618 ; GFX8-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
619 ; GFX8-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
620 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
621 ; GFX8-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
622 ; GFX8-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
623 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
624 ; GFX8-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
625 ; GFX8-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
626 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
627 ; GFX8-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
628 ; GFX8-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
629 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
630 ; GFX8-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
631 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
632 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
636 define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) {
637 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1
638 ; GFX6: bb.1 (%ir-block.0):
639 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
641 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
642 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
643 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
644 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
645 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
646 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
647 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
648 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
649 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
650 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
651 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
653 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1
654 ; GFX7: bb.1 (%ir-block.0):
655 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
657 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
658 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
659 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
660 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
661 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
662 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
663 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
664 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
665 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
666 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
667 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
669 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1
670 ; GFX8: bb.1 (%ir-block.0):
671 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
673 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
674 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
675 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
676 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
677 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
678 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32))
679 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
680 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
681 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
682 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
683 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0)
687 define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) {
688 ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4
689 ; GFX6: bb.1 (%ir-block.0):
690 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
692 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
693 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
694 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
695 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
696 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
697 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32))
698 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
699 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
700 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
701 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
703 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4
704 ; GFX7: bb.1 (%ir-block.0):
705 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
707 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
708 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
709 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
710 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
711 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
712 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32))
713 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
714 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
715 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
716 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
718 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4
719 ; GFX8: bb.1 (%ir-block.0):
720 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
722 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
723 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
724 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
725 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
726 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
727 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32))
728 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
729 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
730 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
731 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
732 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1)
736 define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) {
737 ; GFX6-LABEL: name: s_buffer_load_i32_offset_255
738 ; GFX6: bb.1 (%ir-block.0):
739 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
741 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
742 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
743 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
744 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
745 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
746 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
747 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
748 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
749 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
750 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
751 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
753 ; GFX7-LABEL: name: s_buffer_load_i32_offset_255
754 ; GFX7: bb.1 (%ir-block.0):
755 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
757 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
758 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
759 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
760 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
761 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
762 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
763 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
764 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
765 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
766 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
767 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
769 ; GFX8-LABEL: name: s_buffer_load_i32_offset_255
770 ; GFX8: bb.1 (%ir-block.0):
771 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
773 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
774 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
775 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
776 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
777 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
778 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
779 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
780 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
781 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
782 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
783 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0)
787 define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) {
788 ; GFX6-LABEL: name: s_buffer_load_i32_offset_256
789 ; GFX6: bb.1 (%ir-block.0):
790 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
792 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
793 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
794 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
795 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
796 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
797 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32))
798 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
799 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
800 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
801 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
803 ; GFX7-LABEL: name: s_buffer_load_i32_offset_256
804 ; GFX7: bb.1 (%ir-block.0):
805 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
807 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
808 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
809 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
810 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
811 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
812 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32))
813 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
814 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
815 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
816 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
818 ; GFX8-LABEL: name: s_buffer_load_i32_offset_256
819 ; GFX8: bb.1 (%ir-block.0):
820 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
822 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
823 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
824 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
825 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
826 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
827 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32))
828 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
829 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
830 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
831 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
832 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0)
836 define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) {
837 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1020
838 ; GFX6: bb.1 (%ir-block.0):
839 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
841 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
842 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
843 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
844 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
845 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
846 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
847 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
848 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
849 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
850 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
852 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020
853 ; GFX7: bb.1 (%ir-block.0):
854 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
856 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
857 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
858 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
859 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
860 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
861 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
862 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
863 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
864 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
865 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
867 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020
868 ; GFX8: bb.1 (%ir-block.0):
869 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
871 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
872 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
873 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
874 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
875 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
876 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32))
877 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
878 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
879 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
880 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
881 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0)
885 define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) {
886 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1023
887 ; GFX6: bb.1 (%ir-block.0):
888 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
890 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
891 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
892 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
893 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
894 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
895 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
896 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
897 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
898 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
899 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
900 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
902 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023
903 ; GFX7: bb.1 (%ir-block.0):
904 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
906 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
907 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
908 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
909 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
910 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
911 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
912 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
913 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
914 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
915 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
916 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
918 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023
919 ; GFX8: bb.1 (%ir-block.0):
920 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
922 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
923 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
924 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
925 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
926 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
927 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32))
928 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
929 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
930 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
931 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
932 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0)
936 define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) {
937 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1024
938 ; GFX6: bb.1 (%ir-block.0):
939 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
941 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
942 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
943 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
944 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
945 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
946 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
947 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
948 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
949 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
950 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
951 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
953 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024
954 ; GFX7: bb.1 (%ir-block.0):
955 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
957 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
958 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
959 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
960 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
961 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
962 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32))
963 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
964 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
965 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
966 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
968 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024
969 ; GFX8: bb.1 (%ir-block.0):
970 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
972 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
973 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
974 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
975 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
976 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
977 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32))
978 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
979 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
980 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
981 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
982 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0)
986 define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) {
987 ; GFX6-LABEL: name: s_buffer_load_i32_offset_1025
988 ; GFX6: bb.1 (%ir-block.0):
989 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
991 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
992 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
993 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
994 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
995 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
996 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
997 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
998 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
999 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1000 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1001 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1003 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025
1004 ; GFX7: bb.1 (%ir-block.0):
1005 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1007 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1008 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1009 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1010 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1011 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1012 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
1013 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1014 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1015 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1016 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1017 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1019 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025
1020 ; GFX8: bb.1 (%ir-block.0):
1021 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1023 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1024 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1025 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1026 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1027 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1028 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32))
1029 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
1030 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1031 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1032 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1033 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0)
1037 define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
1038 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1
1039 ; GFX6: bb.1 (%ir-block.0):
1040 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1042 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1043 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1044 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1045 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1046 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1047 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
1048 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1049 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1050 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1051 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1052 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1054 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1
1055 ; GFX7: bb.1 (%ir-block.0):
1056 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1058 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1059 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1060 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1061 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1062 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1063 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
1064 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1065 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1066 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1067 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1068 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1070 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1
1071 ; GFX8: bb.1 (%ir-block.0):
1072 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1074 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1075 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1076 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1077 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1078 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1079 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
1080 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1081 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1082 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1083 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1084 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1085 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
1089 define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
1090 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4
1091 ; GFX6: bb.1 (%ir-block.0):
1092 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1094 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1095 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1096 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1097 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1098 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1099 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
1100 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1101 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1102 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1103 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1104 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1106 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4
1107 ; GFX7: bb.1 (%ir-block.0):
1108 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1110 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1111 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1112 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1113 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1114 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1115 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32))
1116 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1117 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1118 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1119 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1121 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4
1122 ; GFX8: bb.1 (%ir-block.0):
1123 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1125 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1126 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1127 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1128 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1129 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1130 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
1131 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1132 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1133 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1134 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1135 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1136 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
1140 define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
1141 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8
1142 ; GFX6: bb.1 (%ir-block.0):
1143 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1145 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1146 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1147 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1148 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1149 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1150 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
1151 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1152 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1153 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1154 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1155 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1157 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8
1158 ; GFX7: bb.1 (%ir-block.0):
1159 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1161 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1162 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1163 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1164 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1165 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1166 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32))
1167 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1168 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1169 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1170 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1172 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8
1173 ; GFX8: bb.1 (%ir-block.0):
1174 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1176 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1177 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1178 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1179 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1180 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1181 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
1182 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1183 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1184 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1185 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1186 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1187 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
1191 define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
1192 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31
1193 ; GFX6: bb.1 (%ir-block.0):
1194 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1196 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1197 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1198 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1199 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1200 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1201 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
1202 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1203 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1204 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1205 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1206 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1208 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31
1209 ; GFX7: bb.1 (%ir-block.0):
1210 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1212 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1213 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1214 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1215 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1216 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1217 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32))
1218 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1219 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1220 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1221 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1223 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31
1224 ; GFX8: bb.1 (%ir-block.0):
1225 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1227 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1228 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1229 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1230 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1231 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1232 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
1233 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1234 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1235 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1236 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1237 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1238 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
1242 define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) {
1243 ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1244 ; GFX6: bb.1 (%ir-block.0):
1245 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1247 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1248 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1249 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1250 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1251 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1252 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
1253 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32))
1254 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1255 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1256 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1257 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1259 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1260 ; GFX7: bb.1 (%ir-block.0):
1261 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1263 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1264 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1265 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1266 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1267 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1268 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32))
1269 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1270 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1271 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1272 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1274 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1275 ; GFX8: bb.1 (%ir-block.0):
1276 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1278 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1279 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1280 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1281 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1282 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1283 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
1284 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32))
1285 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1286 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1287 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1288 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1289 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1)
1293 define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) {
1294 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29
1295 ; GFX6: bb.1 (%ir-block.0):
1296 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1298 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1299 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1300 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1301 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1302 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1303 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
1304 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1305 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1306 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1307 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1308 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1310 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29
1311 ; GFX7: bb.1 (%ir-block.0):
1312 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1314 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1315 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1316 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1317 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1318 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1319 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32))
1320 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1321 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1322 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1323 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1325 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29
1326 ; GFX8: bb.1 (%ir-block.0):
1327 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1329 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1330 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1331 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1332 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1333 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1334 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
1335 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1336 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1337 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1338 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1339 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1340 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
1344 define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) {
1345 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21
1346 ; GFX6: bb.1 (%ir-block.0):
1347 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1349 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1350 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1351 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1352 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1353 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1354 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
1355 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1356 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1357 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1358 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1359 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1361 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21
1362 ; GFX7: bb.1 (%ir-block.0):
1363 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1365 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1366 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1367 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1368 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1369 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1370 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32))
1371 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1372 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1373 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1374 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1376 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21
1377 ; GFX8: bb.1 (%ir-block.0):
1378 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1380 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1381 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1382 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1383 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1384 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1385 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
1386 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1387 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1388 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1389 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1390 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1391 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
1395 define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) {
1396 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20
1397 ; GFX6: bb.1 (%ir-block.0):
1398 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1400 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1401 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1402 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1403 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1404 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1405 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
1406 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1407 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1408 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1409 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1410 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1412 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20
1413 ; GFX7: bb.1 (%ir-block.0):
1414 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1416 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1417 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1418 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1419 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1420 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1421 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32))
1422 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1423 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1424 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1425 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1427 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20
1428 ; GFX8: bb.1 (%ir-block.0):
1429 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1431 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1432 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1433 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1434 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1435 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1436 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
1437 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1438 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1439 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1440 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1441 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1442 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
1446 define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) {
1447 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1448 ; GFX6: bb.1 (%ir-block.0):
1449 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1451 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1452 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1453 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1454 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1455 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1456 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
1457 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1458 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1459 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1460 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1461 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1463 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1464 ; GFX7: bb.1 (%ir-block.0):
1465 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1467 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1468 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1469 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1470 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1471 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1472 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32))
1473 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1474 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1475 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1476 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1478 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1479 ; GFX8: bb.1 (%ir-block.0):
1480 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1482 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1483 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1484 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1485 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1486 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1487 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
1488 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1489 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1490 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1491 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1492 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1493 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0)
1497 define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) {
1498 ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19
1499 ; GFX6: bb.1 (%ir-block.0):
1500 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1502 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1503 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1504 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1505 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1506 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1507 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288
1508 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1509 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1510 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1511 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1512 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1514 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19
1515 ; GFX7: bb.1 (%ir-block.0):
1516 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1518 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1519 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1520 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1521 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1522 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1523 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32))
1524 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1525 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1526 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1527 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1529 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19
1530 ; GFX8: bb.1 (%ir-block.0):
1531 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1533 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1534 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1535 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1536 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1537 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1538 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32))
1539 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
1540 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1541 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1542 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1543 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
1547 define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) {
1548 ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1549 ; GFX6: bb.1 (%ir-block.0):
1550 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1552 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1553 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1554 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1555 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1556 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1557 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
1558 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1559 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1560 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1561 ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1562 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1564 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1565 ; GFX7: bb.1 (%ir-block.0):
1566 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1568 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1569 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1570 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1571 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1572 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1573 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32))
1574 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1575 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1576 ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1577 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1579 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1580 ; GFX8: bb.1 (%ir-block.0):
1581 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1583 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1584 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1585 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1586 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1587 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1588 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
1589 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1590 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1591 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1592 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1593 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
1594 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
1598 ; Check cases that need to be converted to MUBUF due to the offset being a VGPR.
1599 define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1600 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset
1601 ; GFX6: bb.1 (%ir-block.0):
1602 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1604 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1605 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1606 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1607 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1608 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1609 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1610 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1611 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
1612 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1613 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1615 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset
1616 ; GFX7: bb.1 (%ir-block.0):
1617 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1619 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1620 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1621 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1622 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1623 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1624 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1625 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1626 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
1627 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1628 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1630 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset
1631 ; GFX8: bb.1 (%ir-block.0):
1632 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1634 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1635 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1636 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1637 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1638 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1639 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1640 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1641 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
1642 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1643 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1644 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1648 define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1649 ; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1650 ; GFX6: bb.1 (%ir-block.0):
1651 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1653 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1654 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1655 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1656 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1657 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1658 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1659 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1660 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
1661 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1662 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1663 ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
1664 ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
1665 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1667 ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1668 ; GFX7: bb.1 (%ir-block.0):
1669 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1671 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1672 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1673 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1674 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1675 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1676 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1677 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1678 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
1679 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1680 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1681 ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
1682 ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
1683 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1685 ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1686 ; GFX8: bb.1 (%ir-block.0):
1687 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1689 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1690 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1691 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1692 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1693 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1694 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1695 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1696 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
1697 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1698 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1699 ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
1700 ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
1701 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1702 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1703 ret <2 x float> %val
1706 define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1707 ; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1708 ; GFX6: bb.1 (%ir-block.0):
1709 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1711 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1712 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1713 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1714 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1715 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1716 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1717 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1718 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1719 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1720 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1721 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1722 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1723 ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
1724 ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
1725 ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
1726 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1728 ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1729 ; GFX7: bb.1 (%ir-block.0):
1730 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1732 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1733 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1734 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1735 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1736 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1737 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1738 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1739 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1740 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1741 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1742 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1743 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1744 ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
1745 ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
1746 ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
1747 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1749 ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1750 ; GFX8: bb.1 (%ir-block.0):
1751 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1753 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1754 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1755 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1756 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1757 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1758 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1759 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1760 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1761 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1762 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1763 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1764 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1765 ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
1766 ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
1767 ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
1768 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1769 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1770 ret <3 x float> %val
1773 define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1774 ; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1775 ; GFX6: bb.1 (%ir-block.0):
1776 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1778 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1779 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1780 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1781 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1782 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1783 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1784 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1785 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1786 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1787 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1788 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1789 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1790 ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
1791 ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
1792 ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
1793 ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
1794 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1796 ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1797 ; GFX7: bb.1 (%ir-block.0):
1798 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1800 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1801 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1802 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1803 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1804 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1805 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1806 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1807 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1808 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1809 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1810 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1811 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1812 ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
1813 ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
1814 ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
1815 ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
1816 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1818 ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1819 ; GFX8: bb.1 (%ir-block.0):
1820 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1822 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1823 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1824 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1825 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1826 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1827 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1828 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1829 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1830 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1831 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1832 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1833 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1834 ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
1835 ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
1836 ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
1837 ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
1838 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1839 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1840 ret <4 x float> %val
1843 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1844 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1845 ; GFX6: bb.1 (%ir-block.0):
1846 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1848 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1849 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1850 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1851 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1852 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1853 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1854 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1855 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1856 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1857 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1858 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1859 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1860 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1861 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1862 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1863 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1864 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1865 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1866 ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
1867 ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
1868 ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
1869 ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
1870 ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
1871 ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
1872 ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
1873 ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
1874 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1876 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1877 ; GFX7: bb.1 (%ir-block.0):
1878 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1880 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1881 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1882 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1883 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1884 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1885 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1886 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1887 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1888 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1889 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1890 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1891 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1892 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1893 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1894 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1895 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1896 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1897 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1898 ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
1899 ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
1900 ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
1901 ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
1902 ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
1903 ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
1904 ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
1905 ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
1906 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1908 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1909 ; GFX8: bb.1 (%ir-block.0):
1910 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1912 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1913 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1914 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1915 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1916 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1917 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1918 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1919 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1920 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1921 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1922 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1923 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1924 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1925 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1926 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1927 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1928 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1929 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1930 ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
1931 ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
1932 ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
1933 ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
1934 ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
1935 ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
1936 ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
1937 ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
1938 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1939 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1940 ret <8 x float> %val
1943 define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1944 ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset
1945 ; GFX6: bb.1 (%ir-block.0):
1946 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1948 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1949 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1950 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1951 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1952 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1953 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1954 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1955 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1956 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1957 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
1958 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
1959 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
1960 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1961 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1962 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1963 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1964 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1965 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1966 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1967 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1968 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
1969 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
1970 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
1971 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
1972 ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
1973 ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
1974 ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
1975 ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
1976 ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
1977 ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
1978 ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
1979 ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
1980 ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
1981 ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
1982 ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
1983 ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
1984 ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]]
1985 ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]]
1986 ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]]
1987 ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]]
1988 ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]]
1989 ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]]
1990 ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]]
1991 ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]]
1992 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
1994 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset
1995 ; GFX7: bb.1 (%ir-block.0):
1996 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1998 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1999 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2000 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2001 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2002 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2003 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2004 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2005 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2006 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2007 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2008 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2009 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2010 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2011 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2012 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2013 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2014 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2015 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2016 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2017 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2018 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2019 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2020 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2021 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2022 ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2023 ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2024 ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2025 ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2026 ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
2027 ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
2028 ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
2029 ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
2030 ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
2031 ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
2032 ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
2033 ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
2034 ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]]
2035 ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]]
2036 ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]]
2037 ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]]
2038 ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]]
2039 ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]]
2040 ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]]
2041 ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]]
2042 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2044 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset
2045 ; GFX8: bb.1 (%ir-block.0):
2046 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2048 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2049 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2050 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2051 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2052 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2053 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2054 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2055 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2056 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2057 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2058 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2059 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2060 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2061 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2062 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2063 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2064 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2065 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2066 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2067 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2068 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2069 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2070 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2071 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2072 ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2073 ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2074 ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2075 ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2076 ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
2077 ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
2078 ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
2079 ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
2080 ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
2081 ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
2082 ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
2083 ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
2084 ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]]
2085 ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]]
2086 ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]]
2087 ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]]
2088 ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]]
2089 ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]]
2090 ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]]
2091 ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]]
2092 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2093 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2094 ret <16 x float> %val
2097 define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2098 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
2099 ; GFX6: bb.1 (%ir-block.0):
2100 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2102 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2103 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2104 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2105 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2106 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2107 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2108 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2109 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2110 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2111 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2113 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
2114 ; GFX7: bb.1 (%ir-block.0):
2115 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2117 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2118 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2119 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2120 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2121 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2122 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2123 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2124 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2125 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2126 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2128 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
2129 ; GFX8: bb.1 (%ir-block.0):
2130 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2132 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2133 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2134 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2135 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2136 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2137 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2138 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2139 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2140 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2141 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2142 %soffset = add i32 %soffset.base, 4092
2143 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2147 define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2148 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2149 ; GFX6: bb.1 (%ir-block.0):
2150 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2152 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2153 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2154 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2155 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2156 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2157 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2158 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2159 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2160 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2161 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2163 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2164 ; GFX7: bb.1 (%ir-block.0):
2165 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2167 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2168 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2169 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2170 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2171 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2172 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2173 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2174 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2175 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2176 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2178 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2179 ; GFX8: bb.1 (%ir-block.0):
2180 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2182 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2183 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2184 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2185 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2186 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2187 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2188 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2189 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2190 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2191 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2192 %soffset = add i32 %soffset.base, 4095
2193 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2197 define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2198 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2199 ; GFX6: bb.1 (%ir-block.0):
2200 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2202 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2203 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2204 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2205 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2206 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2207 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2208 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2209 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2210 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2211 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2213 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2214 ; GFX7: bb.1 (%ir-block.0):
2215 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2217 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2218 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2219 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2220 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2221 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2222 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2223 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2224 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2225 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2226 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2228 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2229 ; GFX8: bb.1 (%ir-block.0):
2230 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2232 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2233 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2234 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2235 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2236 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2237 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2238 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
2239 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2240 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2241 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2242 %soffset = add i32 %soffset.base, 4096
2243 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2247 ; Make sure the base offset is added to each split load.
2248 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2249 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2250 ; GFX6: bb.1 (%ir-block.0):
2251 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2253 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2254 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2255 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2256 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2257 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2258 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2259 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2260 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2261 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2262 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2263 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2264 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2265 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2266 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2267 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2268 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2269 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2270 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2271 ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
2272 ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
2273 ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
2274 ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
2275 ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
2276 ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
2277 ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
2278 ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
2279 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2281 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2282 ; GFX7: bb.1 (%ir-block.0):
2283 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2285 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2286 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2287 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2288 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2289 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2290 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2291 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2292 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2293 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2294 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2295 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2296 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2297 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2298 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2299 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2300 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2301 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2302 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2303 ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
2304 ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
2305 ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
2306 ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
2307 ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
2308 ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
2309 ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
2310 ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
2311 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2313 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2314 ; GFX8: bb.1 (%ir-block.0):
2315 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2317 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2318 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2319 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2320 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2321 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2322 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2323 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2324 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2325 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2326 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2327 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2328 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2329 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2330 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2331 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2332 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2333 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2334 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2335 ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
2336 ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
2337 ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
2338 ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
2339 ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
2340 ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
2341 ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
2342 ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
2343 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2344 %soffset = add i32 %soffset.base, 4064
2345 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2346 ret <8 x float> %val
2349 ; Make sure the maximum offset isn't exeeded when splitting this
2350 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2351 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2352 ; GFX6: bb.1 (%ir-block.0):
2353 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2355 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2356 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2357 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2358 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2359 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2360 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2361 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
2362 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2363 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2364 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2365 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2366 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2367 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2368 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2369 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2370 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2371 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2372 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2373 ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
2374 ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
2375 ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
2376 ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
2377 ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
2378 ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
2379 ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
2380 ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
2381 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2383 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2384 ; GFX7: bb.1 (%ir-block.0):
2385 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2387 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2388 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2389 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2390 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2391 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2392 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2393 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
2394 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2395 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2396 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2397 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2398 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2399 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2400 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2401 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2402 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2403 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2404 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2405 ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
2406 ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
2407 ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
2408 ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
2409 ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
2410 ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
2411 ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
2412 ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
2413 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2415 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2416 ; GFX8: bb.1 (%ir-block.0):
2417 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2419 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2420 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2421 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2422 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2423 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2424 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2425 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
2426 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2427 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2428 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2429 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2430 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2431 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2432 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2433 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2434 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2435 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2436 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2437 ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
2438 ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
2439 ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
2440 ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
2441 ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
2442 ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
2443 ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
2444 ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
2445 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2446 %soffset = add i32 %soffset.base, 4068
2447 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2448 ret <8 x float> %val
2451 define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2452 ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2453 ; GFX6: bb.1 (%ir-block.0):
2454 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2456 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2457 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2458 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2459 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2460 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2461 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2462 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2463 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2464 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2465 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2466 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2467 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2468 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2469 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2470 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2471 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2472 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2473 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2474 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2475 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2476 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2477 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2478 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2479 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2480 ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2481 ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2482 ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2483 ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2484 ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
2485 ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
2486 ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
2487 ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
2488 ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
2489 ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
2490 ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
2491 ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
2492 ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]]
2493 ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]]
2494 ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]]
2495 ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]]
2496 ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]]
2497 ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]]
2498 ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]]
2499 ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]]
2500 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2502 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2503 ; GFX7: bb.1 (%ir-block.0):
2504 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2506 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2507 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2508 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2509 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2510 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2511 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2512 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2513 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2514 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2515 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2516 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2517 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2518 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2519 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2520 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2521 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2522 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2523 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2524 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2525 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2526 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2527 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2528 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2529 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2530 ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2531 ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2532 ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2533 ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2534 ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
2535 ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
2536 ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
2537 ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
2538 ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
2539 ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
2540 ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
2541 ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
2542 ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]]
2543 ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]]
2544 ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]]
2545 ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]]
2546 ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]]
2547 ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]]
2548 ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]]
2549 ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]]
2550 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2552 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2553 ; GFX8: bb.1 (%ir-block.0):
2554 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2556 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2557 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2558 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2559 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2560 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2561 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2562 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2563 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2564 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2565 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2566 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2567 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2568 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2569 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2570 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2571 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2572 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2573 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2574 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2575 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2576 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2577 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2578 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2579 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2580 ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2581 ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2582 ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2583 ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2584 ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
2585 ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
2586 ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
2587 ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
2588 ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
2589 ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
2590 ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
2591 ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
2592 ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]]
2593 ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]]
2594 ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]]
2595 ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]]
2596 ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]]
2597 ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]]
2598 ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]]
2599 ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]]
2600 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2601 %soffset = add i32 %soffset.base, 4032
2602 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2603 ret <16 x float> %val
2606 define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2607 ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2608 ; GFX6: bb.1 (%ir-block.0):
2609 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2611 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2612 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2613 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2614 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2615 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2616 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2617 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
2618 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2619 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2620 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2621 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2622 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2623 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2624 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2625 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2626 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2627 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2628 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2629 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2630 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2631 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2632 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2633 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2634 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2635 ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2636 ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2637 ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2638 ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2639 ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
2640 ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
2641 ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
2642 ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
2643 ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
2644 ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
2645 ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
2646 ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
2647 ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]]
2648 ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]]
2649 ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]]
2650 ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]]
2651 ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]]
2652 ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]]
2653 ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]]
2654 ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]]
2655 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2657 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2658 ; GFX7: bb.1 (%ir-block.0):
2659 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2661 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2662 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2663 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2664 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2665 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2666 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2667 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
2668 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2669 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2670 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2671 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2672 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2673 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2674 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2675 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2676 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2677 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2678 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2679 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2680 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2681 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2682 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2683 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2684 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2685 ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2686 ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2687 ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2688 ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2689 ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
2690 ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
2691 ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
2692 ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
2693 ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
2694 ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
2695 ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
2696 ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
2697 ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]]
2698 ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]]
2699 ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]]
2700 ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]]
2701 ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]]
2702 ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]]
2703 ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]]
2704 ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]]
2705 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2707 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2708 ; GFX8: bb.1 (%ir-block.0):
2709 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2711 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2712 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2713 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2714 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2715 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2716 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2717 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
2718 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2719 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2720 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2721 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2722 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2723 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2724 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2725 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2726 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2727 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2728 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2729 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2730 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2731 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2732 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2733 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2734 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2735 ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2736 ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2737 ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2738 ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2739 ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
2740 ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
2741 ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
2742 ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
2743 ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
2744 ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
2745 ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
2746 ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
2747 ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]]
2748 ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]]
2749 ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]]
2750 ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]]
2751 ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]]
2752 ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]]
2753 ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]]
2754 ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]]
2755 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2756 %soffset = add i32 %soffset.base, 4036
2757 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2758 ret <16 x float> %val
2761 ; Waterfall loop due to resource being VGPR
2762 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) {
2763 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2764 ; GFX6: bb.1 (%ir-block.0):
2765 ; GFX6-NEXT: successors: %bb.2(0x80000000)
2766 ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2768 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2769 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2770 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2771 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2772 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2773 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2774 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2775 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2776 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
2779 ; GFX6-NEXT: successors: %bb.3(0x80000000)
2781 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
2782 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
2783 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
2784 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
2785 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2786 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2787 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2788 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
2789 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
2790 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
2791 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
2792 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
2793 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2796 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
2798 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2799 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2800 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
2803 ; GFX6-NEXT: successors: %bb.5(0x80000000)
2805 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
2808 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2809 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2811 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2812 ; GFX7: bb.1 (%ir-block.0):
2813 ; GFX7-NEXT: successors: %bb.2(0x80000000)
2814 ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2816 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2817 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2818 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2819 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2820 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2821 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2822 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2823 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2824 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
2827 ; GFX7-NEXT: successors: %bb.3(0x80000000)
2829 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
2830 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
2831 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
2832 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
2833 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2834 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2835 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2836 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
2837 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
2838 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
2839 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
2840 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
2841 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2844 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
2846 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2847 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2848 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
2851 ; GFX7-NEXT: successors: %bb.5(0x80000000)
2853 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
2856 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2857 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2859 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2860 ; GFX8: bb.1 (%ir-block.0):
2861 ; GFX8-NEXT: successors: %bb.2(0x80000000)
2862 ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2864 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2865 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2866 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2867 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2868 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2869 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2870 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2871 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2872 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
2875 ; GFX8-NEXT: successors: %bb.3(0x80000000)
2877 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
2878 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
2879 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
2880 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
2881 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2882 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2883 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2884 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
2885 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
2886 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
2887 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
2888 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
2889 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2892 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
2894 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2895 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2896 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
2899 ; GFX8-NEXT: successors: %bb.5(0x80000000)
2901 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
2904 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2905 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2906 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2910 ; Use the offset inside the waterfall loop
2911 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) {
2912 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
2913 ; GFX6: bb.1 (%ir-block.0):
2914 ; GFX6-NEXT: successors: %bb.2(0x80000000)
2915 ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2917 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2918 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2919 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2920 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2921 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2922 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2923 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
2926 ; GFX6-NEXT: successors: %bb.3(0x80000000)
2928 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
2929 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
2930 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
2931 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
2932 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2933 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2934 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2935 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
2936 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
2937 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
2938 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
2939 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
2940 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2943 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
2945 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2946 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2947 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
2950 ; GFX6-NEXT: successors: %bb.5(0x80000000)
2952 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
2955 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2956 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2958 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
2959 ; GFX7: bb.1 (%ir-block.0):
2960 ; GFX7-NEXT: successors: %bb.2(0x80000000)
2961 ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2963 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2964 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2965 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2966 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2967 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2968 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2969 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
2972 ; GFX7-NEXT: successors: %bb.3(0x80000000)
2974 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
2975 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
2976 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
2977 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
2978 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2979 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2980 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2981 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
2982 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
2983 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
2984 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
2985 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
2986 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2989 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
2991 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2992 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2993 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
2996 ; GFX7-NEXT: successors: %bb.5(0x80000000)
2998 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3001 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3002 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3004 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
3005 ; GFX8: bb.1 (%ir-block.0):
3006 ; GFX8-NEXT: successors: %bb.2(0x80000000)
3007 ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3009 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3010 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3011 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3012 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3013 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3014 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3015 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3018 ; GFX8-NEXT: successors: %bb.3(0x80000000)
3020 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3021 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3022 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3023 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3024 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3025 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3026 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3027 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3028 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3029 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3030 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3031 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3032 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3035 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3037 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3038 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3039 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3042 ; GFX8-NEXT: successors: %bb.5(0x80000000)
3044 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3047 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3048 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3049 %soffset = add i32 %soffset.base, 4092
3050 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3054 ; Scalar offset exceeds MUBUF limit, keep add out of the loop
3055 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3056 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
3057 ; GFX6: bb.1 (%ir-block.0):
3058 ; GFX6-NEXT: successors: %bb.2(0x80000000)
3059 ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3061 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3062 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3063 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3064 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3065 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3066 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3067 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3068 ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3069 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3070 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3071 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3074 ; GFX6-NEXT: successors: %bb.3(0x80000000)
3076 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3077 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3078 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3079 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3080 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3081 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3082 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3083 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3084 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3085 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3086 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3087 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3088 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3091 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3093 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3094 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3095 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3098 ; GFX6-NEXT: successors: %bb.5(0x80000000)
3100 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3103 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3104 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3106 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
3107 ; GFX7: bb.1 (%ir-block.0):
3108 ; GFX7-NEXT: successors: %bb.2(0x80000000)
3109 ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3111 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3112 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3113 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3114 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3115 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3116 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3117 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3118 ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3119 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3120 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3121 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3124 ; GFX7-NEXT: successors: %bb.3(0x80000000)
3126 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3127 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3128 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3129 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3130 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3131 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3132 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3133 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3134 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3135 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3136 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3137 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3138 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3141 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3143 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3144 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3145 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3148 ; GFX7-NEXT: successors: %bb.5(0x80000000)
3150 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3153 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3154 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3156 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
3157 ; GFX8: bb.1 (%ir-block.0):
3158 ; GFX8-NEXT: successors: %bb.2(0x80000000)
3159 ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3161 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3162 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3163 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3164 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3165 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3166 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3167 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3168 ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3169 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3170 ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3171 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3174 ; GFX8-NEXT: successors: %bb.3(0x80000000)
3176 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3177 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3178 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3179 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3180 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3181 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3182 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3183 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3184 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3185 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3186 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3187 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3188 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3191 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3193 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3194 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3195 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3198 ; GFX8-NEXT: successors: %bb.5(0x80000000)
3200 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3203 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3204 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3205 %soffset = add i32 %soffset.base, 4096
3206 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3210 ; Waterfall loop, but constant offset
3211 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) {
3212 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
3213 ; GFX6: bb.1 (%ir-block.0):
3214 ; GFX6-NEXT: successors: %bb.2(0x80000000)
3215 ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3217 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3218 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3219 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3220 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3221 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3222 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3223 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3226 ; GFX6-NEXT: successors: %bb.3(0x80000000)
3228 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3229 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3230 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3231 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3232 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3233 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3234 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3235 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3236 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3237 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
3238 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3239 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3240 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3243 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3245 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
3246 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3247 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3250 ; GFX6-NEXT: successors: %bb.5(0x80000000)
3252 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3255 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3256 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3258 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
3259 ; GFX7: bb.1 (%ir-block.0):
3260 ; GFX7-NEXT: successors: %bb.2(0x80000000)
3261 ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3263 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3264 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3265 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3266 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3267 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3268 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3269 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3272 ; GFX7-NEXT: successors: %bb.3(0x80000000)
3274 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3275 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3276 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3277 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3278 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3279 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3280 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3281 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3282 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3283 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
3284 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3285 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3286 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3289 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3291 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
3292 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3293 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3296 ; GFX7-NEXT: successors: %bb.5(0x80000000)
3298 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3301 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3302 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3304 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
3305 ; GFX8: bb.1 (%ir-block.0):
3306 ; GFX8-NEXT: successors: %bb.2(0x80000000)
3307 ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3309 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3310 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3311 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3312 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3313 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3314 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3315 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3318 ; GFX8-NEXT: successors: %bb.3(0x80000000)
3320 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3321 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3322 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3323 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3324 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3325 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3326 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3327 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3328 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3329 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
3330 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3331 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3332 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3335 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3337 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
3338 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3339 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3342 ; GFX8-NEXT: successors: %bb.5(0x80000000)
3344 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3347 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3348 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3349 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0)
3353 ; Waterfall loop, but constant offset
3354 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) {
3355 ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3356 ; GFX6: bb.1 (%ir-block.0):
3357 ; GFX6-NEXT: successors: %bb.2(0x80000000)
3358 ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3360 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3361 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3362 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3363 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3364 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3365 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3366 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
3367 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3368 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3371 ; GFX6-NEXT: successors: %bb.3(0x80000000)
3373 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3374 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3375 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3376 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3377 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3378 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3379 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3380 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3381 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3382 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3383 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3384 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3385 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3388 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3390 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3391 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3392 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3395 ; GFX6-NEXT: successors: %bb.5(0x80000000)
3397 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3400 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3401 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3403 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3404 ; GFX7: bb.1 (%ir-block.0):
3405 ; GFX7-NEXT: successors: %bb.2(0x80000000)
3406 ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3408 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3409 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3410 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3411 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3412 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3413 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3414 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
3415 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3416 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3419 ; GFX7-NEXT: successors: %bb.3(0x80000000)
3421 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3422 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3423 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3424 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3425 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3426 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3427 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3428 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3429 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3430 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3431 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3432 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3433 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3436 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3438 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3439 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3440 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3443 ; GFX7-NEXT: successors: %bb.5(0x80000000)
3445 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3448 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3449 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3451 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3452 ; GFX8: bb.1 (%ir-block.0):
3453 ; GFX8-NEXT: successors: %bb.2(0x80000000)
3454 ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3456 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3457 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3458 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3459 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3460 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3461 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
3462 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3465 ; GFX8-NEXT: successors: %bb.3(0x80000000)
3467 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3468 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3469 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3470 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3471 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3472 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3473 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3474 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3475 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3476 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
3477 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3478 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3479 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3482 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3484 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096)
3485 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3486 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3489 ; GFX8-NEXT: successors: %bb.5(0x80000000)
3491 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3494 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3495 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
3496 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0)
3500 ; Need a waterfall loop, but the offset is scalar.
3501 ; Make sure the base offset is added to each split load.
3502 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3503 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3504 ; GFX6: bb.1 (%ir-block.0):
3505 ; GFX6-NEXT: successors: %bb.2(0x80000000)
3506 ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3508 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3509 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3510 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3511 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3512 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3513 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3514 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3517 ; GFX6-NEXT: successors: %bb.3(0x80000000)
3519 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3520 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3521 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3522 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3523 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3524 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3525 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3526 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3527 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3528 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3529 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3530 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3531 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3534 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3536 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3537 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3538 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3539 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3542 ; GFX6-NEXT: successors: %bb.5(0x80000000)
3544 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3547 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3548 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3549 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3550 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3551 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3552 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3553 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3554 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3555 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3556 ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]]
3557 ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]]
3558 ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]]
3559 ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]]
3560 ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]]
3561 ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]]
3562 ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]]
3563 ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]]
3564 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3566 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3567 ; GFX7: bb.1 (%ir-block.0):
3568 ; GFX7-NEXT: successors: %bb.2(0x80000000)
3569 ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3571 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3572 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3573 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3574 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3575 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3576 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3577 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3580 ; GFX7-NEXT: successors: %bb.3(0x80000000)
3582 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3583 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3584 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3585 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3586 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3587 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3588 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3589 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3590 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3591 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3592 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3593 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3594 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3597 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3599 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3600 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3601 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3602 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3605 ; GFX7-NEXT: successors: %bb.5(0x80000000)
3607 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3610 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3611 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3612 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3613 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3614 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3615 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3616 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3617 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3618 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3619 ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]]
3620 ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]]
3621 ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]]
3622 ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]]
3623 ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]]
3624 ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]]
3625 ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]]
3626 ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]]
3627 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3629 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3630 ; GFX8: bb.1 (%ir-block.0):
3631 ; GFX8-NEXT: successors: %bb.2(0x80000000)
3632 ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3634 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3635 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3636 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3637 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3638 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3639 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3640 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3643 ; GFX8-NEXT: successors: %bb.3(0x80000000)
3645 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3646 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3647 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3648 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3649 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3650 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3651 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3652 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3653 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3654 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3655 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3656 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3657 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3660 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3662 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3663 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3664 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3665 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3668 ; GFX8-NEXT: successors: %bb.5(0x80000000)
3670 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3673 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3674 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3675 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3676 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3677 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3678 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3679 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3680 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3681 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3682 ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]]
3683 ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]]
3684 ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]]
3685 ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]]
3686 ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]]
3687 ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]]
3688 ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]]
3689 ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]]
3690 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3691 %soffset = add i32 %soffset.base, 4064
3692 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3693 ret <8 x float> %val
3696 ; Need a waterfall loop, but the offset is scalar.
3697 ; Make sure the maximum offset isn't exeeded when splitting this
3698 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3699 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3700 ; GFX6: bb.1 (%ir-block.0):
3701 ; GFX6-NEXT: successors: %bb.2(0x80000000)
3702 ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3704 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3705 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3706 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3707 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3708 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3709 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3710 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3711 ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3712 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3713 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3714 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3717 ; GFX6-NEXT: successors: %bb.3(0x80000000)
3719 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3720 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3721 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3722 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3723 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3724 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3725 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3726 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3727 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3728 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3729 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3730 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3731 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3734 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3736 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3737 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3738 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3739 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3742 ; GFX6-NEXT: successors: %bb.5(0x80000000)
3744 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3747 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3748 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3749 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3750 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3751 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3752 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3753 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3754 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3755 ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3756 ; GFX6-NEXT: $vgpr0 = COPY [[COPY10]]
3757 ; GFX6-NEXT: $vgpr1 = COPY [[COPY11]]
3758 ; GFX6-NEXT: $vgpr2 = COPY [[COPY12]]
3759 ; GFX6-NEXT: $vgpr3 = COPY [[COPY13]]
3760 ; GFX6-NEXT: $vgpr4 = COPY [[COPY14]]
3761 ; GFX6-NEXT: $vgpr5 = COPY [[COPY15]]
3762 ; GFX6-NEXT: $vgpr6 = COPY [[COPY16]]
3763 ; GFX6-NEXT: $vgpr7 = COPY [[COPY17]]
3764 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3766 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3767 ; GFX7: bb.1 (%ir-block.0):
3768 ; GFX7-NEXT: successors: %bb.2(0x80000000)
3769 ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3771 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3772 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3773 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3774 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3775 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3776 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3777 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3778 ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3779 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3780 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3781 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3784 ; GFX7-NEXT: successors: %bb.3(0x80000000)
3786 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3787 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3788 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3789 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3790 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3791 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3792 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3793 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3794 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3795 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3796 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3797 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3798 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3801 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3803 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3804 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3805 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3806 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3809 ; GFX7-NEXT: successors: %bb.5(0x80000000)
3811 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3814 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3815 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3816 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3817 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3818 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3819 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3820 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3821 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3822 ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3823 ; GFX7-NEXT: $vgpr0 = COPY [[COPY10]]
3824 ; GFX7-NEXT: $vgpr1 = COPY [[COPY11]]
3825 ; GFX7-NEXT: $vgpr2 = COPY [[COPY12]]
3826 ; GFX7-NEXT: $vgpr3 = COPY [[COPY13]]
3827 ; GFX7-NEXT: $vgpr4 = COPY [[COPY14]]
3828 ; GFX7-NEXT: $vgpr5 = COPY [[COPY15]]
3829 ; GFX7-NEXT: $vgpr6 = COPY [[COPY16]]
3830 ; GFX7-NEXT: $vgpr7 = COPY [[COPY17]]
3831 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3833 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3834 ; GFX8: bb.1 (%ir-block.0):
3835 ; GFX8-NEXT: successors: %bb.2(0x80000000)
3836 ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3838 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3839 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3840 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3841 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3842 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3843 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3844 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3845 ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3846 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3847 ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3848 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3851 ; GFX8-NEXT: successors: %bb.3(0x80000000)
3853 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3854 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3855 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3856 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3857 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3858 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3859 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3860 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3861 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3862 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3863 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3864 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3865 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3868 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3870 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3871 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3872 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3873 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3876 ; GFX8-NEXT: successors: %bb.5(0x80000000)
3878 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3881 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3882 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3883 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3884 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3885 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3886 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3887 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3888 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3889 ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3890 ; GFX8-NEXT: $vgpr0 = COPY [[COPY10]]
3891 ; GFX8-NEXT: $vgpr1 = COPY [[COPY11]]
3892 ; GFX8-NEXT: $vgpr2 = COPY [[COPY12]]
3893 ; GFX8-NEXT: $vgpr3 = COPY [[COPY13]]
3894 ; GFX8-NEXT: $vgpr4 = COPY [[COPY14]]
3895 ; GFX8-NEXT: $vgpr5 = COPY [[COPY15]]
3896 ; GFX8-NEXT: $vgpr6 = COPY [[COPY16]]
3897 ; GFX8-NEXT: $vgpr7 = COPY [[COPY17]]
3898 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3899 %soffset = add i32 %soffset.base, 4068
3900 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3901 ret <8 x float> %val
3904 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3905 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
3906 ; GFX6: bb.1 (%ir-block.0):
3907 ; GFX6-NEXT: successors: %bb.2(0x80000000)
3908 ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3910 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3911 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3912 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3913 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3914 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3915 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3916 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3917 ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3918 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3919 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3920 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3923 ; GFX6-NEXT: successors: %bb.3(0x80000000)
3925 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3926 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3927 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3928 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3929 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3930 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3931 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3932 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3933 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3934 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3935 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3936 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3937 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3940 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
3942 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3943 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3944 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3945 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
3948 ; GFX6-NEXT: successors: %bb.5(0x80000000)
3950 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
3953 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3954 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3955 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3956 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3957 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3958 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3959 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3960 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3961 ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3962 ; GFX6-NEXT: $vgpr0 = COPY [[COPY10]]
3963 ; GFX6-NEXT: $vgpr1 = COPY [[COPY11]]
3964 ; GFX6-NEXT: $vgpr2 = COPY [[COPY12]]
3965 ; GFX6-NEXT: $vgpr3 = COPY [[COPY13]]
3966 ; GFX6-NEXT: $vgpr4 = COPY [[COPY14]]
3967 ; GFX6-NEXT: $vgpr5 = COPY [[COPY15]]
3968 ; GFX6-NEXT: $vgpr6 = COPY [[COPY16]]
3969 ; GFX6-NEXT: $vgpr7 = COPY [[COPY17]]
3970 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3972 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
3973 ; GFX7: bb.1 (%ir-block.0):
3974 ; GFX7-NEXT: successors: %bb.2(0x80000000)
3975 ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3977 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3978 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3979 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3980 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3981 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3982 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3983 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3984 ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3985 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3986 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3987 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3990 ; GFX7-NEXT: successors: %bb.3(0x80000000)
3992 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3993 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3994 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3995 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3996 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3997 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3998 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3999 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4000 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4001 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4002 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
4003 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4004 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4007 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4009 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4010 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4011 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4012 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4015 ; GFX7-NEXT: successors: %bb.5(0x80000000)
4017 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4020 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4021 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4022 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4023 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4024 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4025 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4026 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4027 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4028 ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4029 ; GFX7-NEXT: $vgpr0 = COPY [[COPY10]]
4030 ; GFX7-NEXT: $vgpr1 = COPY [[COPY11]]
4031 ; GFX7-NEXT: $vgpr2 = COPY [[COPY12]]
4032 ; GFX7-NEXT: $vgpr3 = COPY [[COPY13]]
4033 ; GFX7-NEXT: $vgpr4 = COPY [[COPY14]]
4034 ; GFX7-NEXT: $vgpr5 = COPY [[COPY15]]
4035 ; GFX7-NEXT: $vgpr6 = COPY [[COPY16]]
4036 ; GFX7-NEXT: $vgpr7 = COPY [[COPY17]]
4037 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4039 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
4040 ; GFX8: bb.1 (%ir-block.0):
4041 ; GFX8-NEXT: successors: %bb.2(0x80000000)
4042 ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
4044 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4045 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4046 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4047 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4048 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4049 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
4050 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
4051 ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
4052 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
4053 ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4054 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4057 ; GFX8-NEXT: successors: %bb.3(0x80000000)
4059 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4060 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4061 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4062 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4063 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4064 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4065 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4066 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4067 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4068 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4069 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
4070 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4071 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4074 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4076 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4077 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4078 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4079 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4082 ; GFX8-NEXT: successors: %bb.5(0x80000000)
4084 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4087 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4088 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4089 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4090 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4091 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4092 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4093 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4094 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4095 ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4096 ; GFX8-NEXT: $vgpr0 = COPY [[COPY10]]
4097 ; GFX8-NEXT: $vgpr1 = COPY [[COPY11]]
4098 ; GFX8-NEXT: $vgpr2 = COPY [[COPY12]]
4099 ; GFX8-NEXT: $vgpr3 = COPY [[COPY13]]
4100 ; GFX8-NEXT: $vgpr4 = COPY [[COPY14]]
4101 ; GFX8-NEXT: $vgpr5 = COPY [[COPY15]]
4102 ; GFX8-NEXT: $vgpr6 = COPY [[COPY16]]
4103 ; GFX8-NEXT: $vgpr7 = COPY [[COPY17]]
4104 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4105 %soffset = add i32 %soffset.base, 4096
4106 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
4107 ret <8 x float> %val
4110 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) {
4111 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
4112 ; GFX6: bb.1 (%ir-block.0):
4113 ; GFX6-NEXT: successors: %bb.2(0x80000000)
4114 ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4116 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4117 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4118 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4119 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4120 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4121 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4122 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
4123 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4126 ; GFX6-NEXT: successors: %bb.3(0x80000000)
4128 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4129 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4130 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4131 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4132 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4133 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4134 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4135 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4136 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4137 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4138 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4139 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4140 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4143 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4145 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4146 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4147 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4148 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4151 ; GFX6-NEXT: successors: %bb.5(0x80000000)
4153 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4156 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4157 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4158 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4159 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4160 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4161 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4162 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4163 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4164 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4165 ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]]
4166 ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]]
4167 ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]]
4168 ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]]
4169 ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]]
4170 ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]]
4171 ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]]
4172 ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]]
4173 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4175 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
4176 ; GFX7: bb.1 (%ir-block.0):
4177 ; GFX7-NEXT: successors: %bb.2(0x80000000)
4178 ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4180 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4181 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4182 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4183 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4184 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4185 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4186 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
4187 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4190 ; GFX7-NEXT: successors: %bb.3(0x80000000)
4192 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4193 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4194 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4195 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4196 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4197 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4198 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4199 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4200 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4201 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4202 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4203 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4204 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4207 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4209 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4210 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4211 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4212 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4215 ; GFX7-NEXT: successors: %bb.5(0x80000000)
4217 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4220 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4221 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4222 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4223 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4224 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4225 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4226 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4227 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4228 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4229 ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]]
4230 ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]]
4231 ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]]
4232 ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]]
4233 ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]]
4234 ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]]
4235 ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]]
4236 ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]]
4237 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4239 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
4240 ; GFX8: bb.1 (%ir-block.0):
4241 ; GFX8-NEXT: successors: %bb.2(0x80000000)
4242 ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4244 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4245 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4246 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4247 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4248 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4249 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4250 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064
4251 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4254 ; GFX8-NEXT: successors: %bb.3(0x80000000)
4256 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4257 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4258 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4259 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4260 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4261 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4262 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4263 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4264 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4265 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4266 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4267 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4268 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4271 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4273 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4274 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4275 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4276 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4279 ; GFX8-NEXT: successors: %bb.5(0x80000000)
4281 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4284 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4285 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4286 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4287 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4288 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4289 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4290 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4291 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4292 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4293 ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]]
4294 ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]]
4295 ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]]
4296 ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]]
4297 ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]]
4298 ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]]
4299 ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]]
4300 ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]]
4301 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4302 %soffset = add i32 %offset.base, 5000
4303 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
4304 ret <8 x float> %val
4307 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) {
4308 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
4309 ; GFX6: bb.1 (%ir-block.0):
4310 ; GFX6-NEXT: successors: %bb.2(0x80000000)
4311 ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4313 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4314 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4315 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4316 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4317 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4318 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4319 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
4320 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4323 ; GFX6-NEXT: successors: %bb.3(0x80000000)
4325 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4326 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4327 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4328 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4329 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4330 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4331 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4332 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4333 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4334 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4335 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4336 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4337 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4340 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4342 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4343 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4344 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4345 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4348 ; GFX6-NEXT: successors: %bb.5(0x80000000)
4350 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4353 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4354 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4355 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4356 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4357 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4358 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4359 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4360 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4361 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4362 ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]]
4363 ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]]
4364 ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]]
4365 ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]]
4366 ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]]
4367 ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]]
4368 ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]]
4369 ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]]
4370 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4372 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
4373 ; GFX7: bb.1 (%ir-block.0):
4374 ; GFX7-NEXT: successors: %bb.2(0x80000000)
4375 ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4377 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4378 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4379 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4380 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4381 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4382 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4383 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
4384 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4387 ; GFX7-NEXT: successors: %bb.3(0x80000000)
4389 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4390 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4391 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4392 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4393 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4394 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4395 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4396 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4397 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4398 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4399 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4400 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4401 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4404 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4406 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4407 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4408 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4409 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4412 ; GFX7-NEXT: successors: %bb.5(0x80000000)
4414 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4417 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4418 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4419 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4420 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4421 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4422 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4423 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4424 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4425 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4426 ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]]
4427 ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]]
4428 ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]]
4429 ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]]
4430 ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]]
4431 ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]]
4432 ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]]
4433 ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]]
4434 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4436 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
4437 ; GFX8: bb.1 (%ir-block.0):
4438 ; GFX8-NEXT: successors: %bb.2(0x80000000)
4439 ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4441 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4442 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4443 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4444 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4445 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4446 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4447 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12
4448 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4451 ; GFX8-NEXT: successors: %bb.3(0x80000000)
4453 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4454 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4455 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4456 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4457 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4458 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4459 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4460 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4461 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4462 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4463 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4464 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4465 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4468 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4470 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4471 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4472 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4473 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4476 ; GFX8-NEXT: successors: %bb.5(0x80000000)
4478 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4481 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4482 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4483 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4484 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4485 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4486 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4487 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4488 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4489 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4490 ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]]
4491 ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]]
4492 ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]]
4493 ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]]
4494 ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]]
4495 ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]]
4496 ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]]
4497 ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]]
4498 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4499 %soffset = add i32 %offset.base, 4076
4500 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
4501 ret <8 x float> %val
4504 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) {
4505 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
4506 ; GFX6: bb.1 (%ir-block.0):
4507 ; GFX6-NEXT: successors: %bb.2(0x80000000)
4508 ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4510 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4511 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4512 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4513 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4514 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4515 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4516 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
4517 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4520 ; GFX6-NEXT: successors: %bb.3(0x80000000)
4522 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4523 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4524 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4525 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4526 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4527 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4528 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4529 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4530 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4531 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4532 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4533 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4534 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4537 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4539 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4540 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4541 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4542 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4545 ; GFX6-NEXT: successors: %bb.5(0x80000000)
4547 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4550 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4551 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4552 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4553 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4554 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4555 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4556 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4557 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4558 ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4559 ; GFX6-NEXT: $vgpr0 = COPY [[COPY9]]
4560 ; GFX6-NEXT: $vgpr1 = COPY [[COPY10]]
4561 ; GFX6-NEXT: $vgpr2 = COPY [[COPY11]]
4562 ; GFX6-NEXT: $vgpr3 = COPY [[COPY12]]
4563 ; GFX6-NEXT: $vgpr4 = COPY [[COPY13]]
4564 ; GFX6-NEXT: $vgpr5 = COPY [[COPY14]]
4565 ; GFX6-NEXT: $vgpr6 = COPY [[COPY15]]
4566 ; GFX6-NEXT: $vgpr7 = COPY [[COPY16]]
4567 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4569 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
4570 ; GFX7: bb.1 (%ir-block.0):
4571 ; GFX7-NEXT: successors: %bb.2(0x80000000)
4572 ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4574 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4575 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4576 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4577 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4578 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4579 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4580 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
4581 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4584 ; GFX7-NEXT: successors: %bb.3(0x80000000)
4586 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4587 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4588 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4589 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4590 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4591 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4592 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4593 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4594 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4595 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4596 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4597 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4598 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4601 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4603 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4604 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4605 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4606 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4609 ; GFX7-NEXT: successors: %bb.5(0x80000000)
4611 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4614 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4615 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4616 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4617 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4618 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4619 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4620 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4621 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4622 ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4623 ; GFX7-NEXT: $vgpr0 = COPY [[COPY9]]
4624 ; GFX7-NEXT: $vgpr1 = COPY [[COPY10]]
4625 ; GFX7-NEXT: $vgpr2 = COPY [[COPY11]]
4626 ; GFX7-NEXT: $vgpr3 = COPY [[COPY12]]
4627 ; GFX7-NEXT: $vgpr4 = COPY [[COPY13]]
4628 ; GFX7-NEXT: $vgpr5 = COPY [[COPY14]]
4629 ; GFX7-NEXT: $vgpr6 = COPY [[COPY15]]
4630 ; GFX7-NEXT: $vgpr7 = COPY [[COPY16]]
4631 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4633 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
4634 ; GFX8: bb.1 (%ir-block.0):
4635 ; GFX8-NEXT: successors: %bb.2(0x80000000)
4636 ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4638 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4639 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4640 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4641 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4642 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4643 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4644 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
4645 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4648 ; GFX8-NEXT: successors: %bb.3(0x80000000)
4650 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4651 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4652 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4653 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4654 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4655 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4656 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4657 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4658 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4659 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4660 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4661 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4662 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4665 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4667 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4668 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4669 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4670 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4673 ; GFX8-NEXT: successors: %bb.5(0x80000000)
4675 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4678 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4679 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4680 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4681 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4682 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4683 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4684 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4685 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4686 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4687 ; GFX8-NEXT: $vgpr0 = COPY [[COPY9]]
4688 ; GFX8-NEXT: $vgpr1 = COPY [[COPY10]]
4689 ; GFX8-NEXT: $vgpr2 = COPY [[COPY11]]
4690 ; GFX8-NEXT: $vgpr3 = COPY [[COPY12]]
4691 ; GFX8-NEXT: $vgpr4 = COPY [[COPY13]]
4692 ; GFX8-NEXT: $vgpr5 = COPY [[COPY14]]
4693 ; GFX8-NEXT: $vgpr6 = COPY [[COPY15]]
4694 ; GFX8-NEXT: $vgpr7 = COPY [[COPY16]]
4695 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4696 %soffset = add i32 %offset.base, 4080
4697 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
4698 ret <8 x float> %val
4701 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) {
4702 ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4703 ; GFX6: bb.1 (%ir-block.0):
4704 ; GFX6-NEXT: successors: %bb.2(0x80000000)
4705 ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4707 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4708 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4709 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4710 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4711 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4712 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4713 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4716 ; GFX6-NEXT: successors: %bb.3(0x80000000)
4718 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4719 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4720 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4721 ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4722 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4723 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4724 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4725 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4726 ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4727 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
4728 ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4729 ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4730 ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4733 ; GFX6-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4735 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4736 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4737 ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4738 ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4741 ; GFX6-NEXT: successors: %bb.5(0x80000000)
4743 ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4746 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4747 ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4748 ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4749 ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4750 ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4751 ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4752 ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4753 ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4754 ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4755 ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]]
4756 ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]]
4757 ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]]
4758 ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]]
4759 ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]]
4760 ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]]
4761 ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]]
4762 ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]]
4763 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4765 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4766 ; GFX7: bb.1 (%ir-block.0):
4767 ; GFX7-NEXT: successors: %bb.2(0x80000000)
4768 ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4770 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4771 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4772 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4773 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4774 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4775 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4776 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4779 ; GFX7-NEXT: successors: %bb.3(0x80000000)
4781 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4782 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4783 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4784 ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4785 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4786 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4787 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4788 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4789 ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4790 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
4791 ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4792 ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4793 ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4796 ; GFX7-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4798 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4799 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4800 ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4801 ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4804 ; GFX7-NEXT: successors: %bb.5(0x80000000)
4806 ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4809 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4810 ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4811 ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4812 ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4813 ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4814 ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4815 ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4816 ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4817 ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4818 ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]]
4819 ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]]
4820 ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]]
4821 ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]]
4822 ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]]
4823 ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]]
4824 ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]]
4825 ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]]
4826 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4828 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4829 ; GFX8: bb.1 (%ir-block.0):
4830 ; GFX8-NEXT: successors: %bb.2(0x80000000)
4831 ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4833 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4834 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4835 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4836 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4837 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4838 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4839 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4842 ; GFX8-NEXT: successors: %bb.3(0x80000000)
4844 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4845 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4846 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4847 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4848 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4849 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4850 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4851 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4852 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4853 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
4854 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4855 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4856 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4859 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
4861 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4862 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4863 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4864 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
4867 ; GFX8-NEXT: successors: %bb.5(0x80000000)
4869 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
4872 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4873 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4874 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4875 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4876 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4877 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4878 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4879 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4880 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4881 ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]]
4882 ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]]
4883 ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]]
4884 ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]]
4885 ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]]
4886 ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]]
4887 ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]]
4888 ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]]
4889 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4890 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0)
4891 ret <8 x float> %val
4894 define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4895 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4896 ; GFX6: bb.1 (%ir-block.0):
4897 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4899 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4900 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4901 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4902 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4903 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4904 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4905 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4906 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4907 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4908 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
4910 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4911 ; GFX7: bb.1 (%ir-block.0):
4912 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4914 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4915 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4916 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4917 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4918 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4919 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4920 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4921 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4922 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4923 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
4925 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4926 ; GFX8: bb.1 (%ir-block.0):
4927 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4929 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4930 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4931 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4932 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4933 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4934 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4935 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4936 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4937 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4938 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
4939 %offset = add i32 %offset.v, %offset.s
4940 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4944 define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4945 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4946 ; GFX6: bb.1 (%ir-block.0):
4947 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4949 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4950 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4951 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4952 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4953 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4954 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4955 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4956 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4957 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4958 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
4960 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4961 ; GFX7: bb.1 (%ir-block.0):
4962 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4964 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4965 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4966 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4967 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4968 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4969 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4970 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4971 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4972 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4973 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
4975 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4976 ; GFX8: bb.1 (%ir-block.0):
4977 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4979 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4980 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4981 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4982 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4983 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4984 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4985 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4986 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4987 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4988 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
4989 %offset = add i32 %offset.s, %offset.v
4990 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4994 define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4995 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
4996 ; GFX6: bb.1 (%ir-block.0):
4997 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4999 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5000 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5001 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5002 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5003 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5004 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5005 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5006 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5007 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5008 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5009 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5010 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5011 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5013 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
5014 ; GFX7: bb.1 (%ir-block.0):
5015 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5017 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5018 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5019 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5020 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5021 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5022 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5023 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5024 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5025 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5026 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5027 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5028 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5029 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5031 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
5032 ; GFX8: bb.1 (%ir-block.0):
5033 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5035 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5036 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5037 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5038 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5039 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5040 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5041 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5042 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5043 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5044 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5045 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5046 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5047 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5048 %offset.base = add i32 %offset.v, %offset.s
5049 %offset = add i32 %offset.base, 1024
5050 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
5054 define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
5055 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
5056 ; GFX6: bb.1 (%ir-block.0):
5057 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5059 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5060 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5061 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5062 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5063 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5064 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5065 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5066 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5067 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5068 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5069 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5070 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5071 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5073 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
5074 ; GFX7: bb.1 (%ir-block.0):
5075 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5077 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5078 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5079 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5080 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5081 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5082 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5083 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5084 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5085 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5086 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5087 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5088 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5089 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5091 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
5092 ; GFX8: bb.1 (%ir-block.0):
5093 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5095 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5096 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5097 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5098 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5099 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5100 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5101 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5102 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5103 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5104 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5105 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5106 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5107 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5108 %offset.base = add i32 %offset.s, %offset.v
5109 %offset = add i32 %offset.base, 1024
5110 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
5114 ; TODO: Ideally this would be reassociated to fold.
5115 define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
5116 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
5117 ; GFX6: bb.1 (%ir-block.0):
5118 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5120 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5121 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5122 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5123 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5124 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5125 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5126 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5127 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5128 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5129 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5130 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5131 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5132 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5134 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
5135 ; GFX7: bb.1 (%ir-block.0):
5136 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5138 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5139 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5140 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5141 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5142 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5143 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5144 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5145 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5146 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5147 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5148 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5149 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5150 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5152 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
5153 ; GFX8: bb.1 (%ir-block.0):
5154 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5156 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5157 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5158 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5159 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5160 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5161 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5162 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5163 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5164 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5165 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5166 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5167 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5168 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5169 %offset.base = add i32 %offset.s, 1024
5170 %offset = add i32 %offset.base, %offset.v
5171 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
5175 define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
5176 ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
5177 ; GFX6: bb.1 (%ir-block.0):
5178 ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5180 ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5181 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5182 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5183 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5184 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5185 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5186 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5187 ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5188 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5189 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5190 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5191 ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5192 ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5194 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
5195 ; GFX7: bb.1 (%ir-block.0):
5196 ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5198 ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5199 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5200 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5201 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5202 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5203 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5204 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5205 ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5206 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5207 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5208 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5209 ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5210 ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5212 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
5213 ; GFX8: bb.1 (%ir-block.0):
5214 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5216 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5217 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5218 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5219 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5220 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5221 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5222 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5223 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5224 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5225 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5226 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5227 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5228 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
5229 %offset.base = add i32 %offset.v, 1024
5230 %offset = add i32 %offset.base, %offset.s
5231 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
5235 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
5236 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg)
5237 declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
5238 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg)
5239 declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32 immarg)
5240 declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32 immarg)
5242 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg)
5243 declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32 immarg)
5244 declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32 immarg)
5245 declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
5246 declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32 immarg)
5247 declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32 immarg)
5249 declare i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32>, i32, i32 immarg)
5250 declare i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32>, i32, i32 immarg)
5251 declare i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32>, i32, i32 immarg)
5253 declare <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32>, i32, i32 immarg)
5254 declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg)
5256 declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg)
5257 declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg)
5259 declare <4 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v4p1(<4 x i32>, i32, i32 immarg)
5260 declare <8 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v8p1(<4 x i32>, i32, i32 immarg)